1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import requests
- from bs4 import BeautifulSoup
- import csv
- def get_normalise_str(string): # нормализация данных
- result = string.split(' ')
- return result
- def write_csv(data): # конвертация данных в csv
- # 'a' - it appends a data in file
- with open('cmc.csv', 'a', newline='', encoding='utf-8') as file_csv:
- writer_file = csv.writer(file_csv)
- writer_file.writerow([
- data['num'],
- data['tik'],
- data['name'],
- data['url'],
- data['cap'],
- data['price'],
- data['vol'],
- data['chg']
- ])
- def get_html(url): # получение dom-html
- res = requests.get(url)
- return res.text
- def get_page_data(html): # получение данных из html
- soup = BeautifulSoup(html, 'lxml') # на вход html и название парсера
- trs = soup.find_all('tr', class_='cmc-table-row') # получение строк
- for tr in trs: # получение столбцов
- tds = tr.find_all('td')
- num = tds[0].find('div').text
- name = tds[1].find('div').find('a').text
- link = tds[1].find('div').find('a').get('href')
- cap = tds[2].find('p').text
- price = tds[3].find('a').text
- volume = tds[4].find('a').text
- tiker = tds[5].find('div').text
- change = tds[6].find('div').text
- data = {
- 'num': num,
- 'tik': get_normalise_str(tiker)[1],
- 'name': name,
- 'url': 'https://coinmarketcap.com'+link,
- 'cap': cap,
- 'price': price,
- 'vol': volume,
- 'chg': change
- }
- write_csv(data)
- # print(data)
- def main():
- url = 'https://coinmarketcap.com/'
- html = get_html(url)
- get_page_data(html)
- if __name__ == '__main__':
- main()
|