1234567891011121314151617181920212223242526272829303132333435363738 |
- from bs4 import BeautifulSoup
- import requests
- import time
- def get_html(url):
- response = requests.get(url)
- print(dir(response))
- def get_page_data(response):
- if 'html' in response.headers['Content-Type']:
- html = response.text
- else:
- html = response.json()['content_html']
- soup = BeautifulSoup(html, 'lxml')
- items = soup.find_all('h3', class_="ytd-grid-video-renderer")
- for i in items:
- name = i.text.strip()
- url = i.find('a').get('href')
- print(name)
- def main():
- # url = 'https://www.youtube.com/channel/UCYUk-TX7vQprXDtem2v6NtA/videos'
- url = 'https://www.youtube.com/browse_ajax?ctoken=4qmFsgJwEhhVQ1lVay1UWDd2UXByWER0ZW0ydjZOdEEaVEVnWjJhV1JsYjNNWUF5QUFNQUU0QWVvRExFTm9PRWxuVEdwMk0wMTVlakZqVWxoRmFFMUxRa1ZPVFZWVlNWTkRkMmxIY2t4UFNEQjBkalUxY0ZWQw%253D%253D&continuation=4qmFsgJwEhhVQ1lVay1UWDd2UXByWER0ZW0ydjZOdEEaVEVnWjJhV1JsYjNNWUF5QUFNQUU0QWVvRExFTm9PRWxuVEdwMk0wMTVlakZqVWxoRmFFMUxRa1ZPVFZWVlNWTkRkMmxIY2t4UFNEQjBkalUxY0ZWQw%253D%253D&itct=CAIQybcCIhMI4qHA2Imf6wIVTnebCh3Bxwr-'
- get_html(url)
- # get_page_data(response)
- # print(response.text)
- # write_file(response.json())
- if __name__ == '__main__':
- start_time = time.time()
- main()
- print("--- %s seconds ---" % (time.time() - start_time))
|