123456789101112131415161718192021222324252627282930313233343536373839404142 |
- import requests
- import csv
- import os
- def get_html(url):
- result = requests.get(url)
- return result.text
- def write_csv(data):
- path_f = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(path_f, "websites.csv"), "a", newline='', encoding='utf-8') as file_:
- order = ['name', 'url', 'descr', 'traffic', 'percent', 'status']
- writer_f = csv.DictWriter(file_, fieldnames=order)
- writer_f.writerow(data)
- def main(): # всего страниц 6288
- for i in range(0, 1001): # берём на 1 больше
- url = 'https://www.liveinternet.ru/rating/ru//today.tsv?page={}'.format(
- str(i))
- response = get_html(url)
- datas = response.strip().split('\n')[1:] # сплитуем по переносу строки
- for row in datas:
- columns = row.strip().split('\t') # сплитуем по табуляции
- dictn = {
- 'name': columns[0],
- 'url': columns[1],
- 'descr': columns[2],
- 'traffic': columns[3],
- 'percent': columns[4],
- 'status': columns[5]
- }
- write_csv(dictn)
- # print(dic)
- if __name__ == '__main__':
- main()
|