liveinternet.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import requests
  2. import csv
  3. import os
  4. def get_html(url):
  5. result = requests.get(url)
  6. return result.text
  7. def write_csv(data):
  8. path_f = os.path.dirname(os.path.abspath(__file__))
  9. with open(os.path.join(path_f, "websites.csv"), "a", newline='', encoding='utf-8') as file_:
  10. order = ['name', 'url', 'descr', 'traffic', 'percent', 'status']
  11. writer_f = csv.DictWriter(file_, fieldnames=order)
  12. writer_f.writerow(data)
  13. def main(): # всего страниц 6288
  14. for i in range(0, 1001): # берём на 1 больше
  15. url = 'https://www.liveinternet.ru/rating/ru//today.tsv?page={}'.format(
  16. str(i))
  17. response = get_html(url)
  18. datas = response.strip().split('\n')[1:] # сплитуем по переносу строки
  19. for row in datas:
  20. columns = row.strip().split('\t') # сплитуем по табуляции
  21. dictn = {
  22. 'name': columns[0],
  23. 'url': columns[1],
  24. 'descr': columns[2],
  25. 'traffic': columns[3],
  26. 'percent': columns[4],
  27. 'status': columns[5]
  28. }
  29. write_csv(dictn)
  30. # print(dic)
  31. if __name__ == '__main__':
  32. main()