采集转换器.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : 采集转换器.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2024/7/4
  6. import json
  7. import requests
  8. from urllib.parse import urlsplit
  9. from collections import OrderedDict
  10. def get_host(url):
  11. """
  12. 获取主页地址
  13. @param url:
  14. @return:
  15. """
  16. ret = urlsplit(url)
  17. return f'{ret.scheme}://{ret.netloc}'
  18. def get_sid(url):
  19. """
  20. 获取id主页
  21. @param url:
  22. @return:
  23. """
  24. ret = urlsplit(url)
  25. return ret.netloc
  26. def get_api(url):
  27. """
  28. 获取接口api
  29. @param url:
  30. @return:
  31. """
  32. ret = urlsplit(url)
  33. return ret.path.rstrip('/') + '/'
  34. def delete_same(data, key='url'):
  35. """
  36. 字典列表去重,按字典的某个key
  37. @param data:
  38. @param key:
  39. @return:
  40. """
  41. unique_data = list(OrderedDict((d[key], d) for d in data).values())
  42. if key == 'sid':
  43. for site in unique_data:
  44. del site['sid']
  45. return unique_data
  46. def main(zy_url="https://cdn.jsdelivr.net/gh/waifu-project/v1@latest/zy.json"):
  47. r = requests.get(zy_url)
  48. ret = r.json()
  49. sites = ret['sites']['data']
  50. sites = [site for site in sites if site.get('type') and site['type'] == 1]
  51. print(f'共计发现type1的站点:{len(sites)}条记录')
  52. covert_sites = []
  53. for site in sites:
  54. if site.get("name") and site.get("api"):
  55. surl = site['api']
  56. host = get_host(surl)
  57. api = get_api(surl)
  58. sid = get_sid(surl)
  59. cvalue = {
  60. "sid": sid,
  61. "name": site["name"],
  62. "url": host,
  63. "parse_url": "",
  64. "cate_exclude": ""
  65. }
  66. if api != '/api.php/provide/vod/':
  67. cvalue["api"] = api
  68. covert_sites.append(cvalue)
  69. print(f'转换完成采集之王的站点:{len(covert_sites)}条记录')
  70. covert_sites = delete_same(covert_sites, 'sid')
  71. print(f'去重后的采集之王的站点:{len(covert_sites)}条记录')
  72. with open('采集[zy].json', mode='w+', encoding='utf-8') as f:
  73. f.write(json.dumps(covert_sites, ensure_ascii=False, indent=4))
  74. if __name__ == '__main__':
  75. main()