gaze筛选.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : gaze筛选.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/10/20
  6. import re
  7. import requests
  8. from utils.htmlParser import jsoup
  9. headers = {'user-agent':'Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.120 MQQBrowser/6.2 TBS/045714 Mobile Safari/537.36'}
  10. def getFilters(url):
  11. # cate_id = str(re.search('.*/(\d+)', url).groups()[0])
  12. # print(cate_id)
  13. jsp = jsoup(url)
  14. pdfh = jsp.pdfh
  15. pdfa = jsp.pdfa
  16. print(jsp)
  17. r = requests.get(url,headers=headers)
  18. r.encoding = r.apparent_encoding
  19. html = r.text
  20. cls_list = pdfa(html,'.mform&&div')
  21. print(len(cls_list))
  22. print(cls_list)
  23. # ft_dict = {cate_id:[]}
  24. ft_dict = {}
  25. def getCate(cls):
  26. key = cls
  27. name = pdfh(html, f'.{cls}&&div:eq(0)&&a&&Text').replace('全部', '')
  28. values = pdfa(html, f'.{cls}&&div')
  29. # vl = [{"n":pdfh(i,'a&&Text'),"v":pdfh(i,'a&&href')} for i in values]
  30. # vl = [{"n":pdfh(i,'a&&Text'),"v":re.search('(.*?)-(.*)',pdfh(i,'a&&data-filter'),re.M|re.I|re.S).groups()[1].replace('.html','').replace('-','')} for i in values]
  31. vl = [{"n": pdfh(i, 'a&&Text'), "v": pdfh(i, 'a&&data-filter')} for i in values]
  32. return {
  33. 'key': key,
  34. 'name': name,
  35. 'value': vl
  36. }
  37. for cls in cls_list:
  38. cate_id = pdfh(cls,'a&&data-filter')
  39. # key = pdfh(html,'.mcountry&&div:eq(0)&&a&&data-filter')
  40. # key = 'mcountry'
  41. # name = pdfh(html,'.mcountry&&div:eq(0)&&a&&Text').replace('全部','')
  42. # values = pdfa(html,'.mcountry&&div')
  43. # vl = [{"n":pdfh(i,'a&&Text'),"v":pdfh(i,'a&&data-filter')} for i in values]
  44. ft_dict[cate_id] = []
  45. for c in ['mcountry','mtag','sort','album']:
  46. d = getCate(c)
  47. ft_dict[cate_id].append(d)
  48. print(ft_dict)
  49. # return ft_dict
  50. if __name__ == '__main__':
  51. getFilters('https://gaze.run/filter')