zbweb
/
dr_py
peilaus alkaen https://github.com/gaotianliuyun/dr_py


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : gaze筛选.py
# Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
# Date  : 2022/10/20

import re

import requests
from utils.htmlParser import jsoup

headers = {'user-agent':'Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.120 MQQBrowser/6.2 TBS/045714 Mobile Safari/537.36'}

def getFilters(url):
    # cate_id = str(re.search('.*/(\d+)', url).groups()[0])
    # print(cate_id)
    jsp = jsoup(url)
    pdfh = jsp.pdfh
    pdfa = jsp.pdfa
    print(jsp)
    r = requests.get(url,headers=headers)
    r.encoding = r.apparent_encoding
    html = r.text
    cls_list = pdfa(html,'.mform&&div')
    print(len(cls_list))
    print(cls_list)
    # ft_dict = {cate_id:[]}
    ft_dict = {}

    def getCate(cls):
        key = cls
        name = pdfh(html, f'.{cls}&&div:eq(0)&&a&&Text').replace('全部', '')
        values = pdfa(html, f'.{cls}&&div')
        # vl = [{"n":pdfh(i,'a&&Text'),"v":pdfh(i,'a&&href')} for i in values]
        # vl = [{"n":pdfh(i,'a&&Text'),"v":re.search('(.*?)-(.*)',pdfh(i,'a&&data-filter'),re.M|re.I|re.S).groups()[1].replace('.html','').replace('-','')} for i in values]
        vl = [{"n": pdfh(i, 'a&&Text'), "v": pdfh(i, 'a&&data-filter')} for i in values]

        return {
            'key': key,
            'name': name,
            'value': vl
        }
    for cls in cls_list:
        cate_id = pdfh(cls,'a&&data-filter')
        # key = pdfh(html,'.mcountry&&div:eq(0)&&a&&data-filter')
        # key = 'mcountry'
        # name = pdfh(html,'.mcountry&&div:eq(0)&&a&&Text').replace('全部','')
        # values = pdfa(html,'.mcountry&&div')
        # vl = [{"n":pdfh(i,'a&&Text'),"v":pdfh(i,'a&&data-filter')} for i in values]

        ft_dict[cate_id] = []
        for c in ['mcountry','mtag','sort','album']:
            d = getCate(c)
            ft_dict[cate_id].append(d)
    print(ft_dict)
    # return ft_dict

if __name__ == '__main__':
    getFilters('https://gaze.run/filter')