main.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import time
  4. def get_html(url):
  5. response = requests.get(url)
  6. print(dir(response))
  7. def get_page_data(response):
  8. if 'html' in response.headers['Content-Type']:
  9. html = response.text
  10. else:
  11. html = response.json()['content_html']
  12. soup = BeautifulSoup(html, 'lxml')
  13. items = soup.find_all('h3', class_="ytd-grid-video-renderer")
  14. for i in items:
  15. name = i.text.strip()
  16. url = i.find('a').get('href')
  17. print(name)
  18. def main():
  19. # url = 'https://www.youtube.com/channel/UCYUk-TX7vQprXDtem2v6NtA/videos'
  20. url = 'https://www.youtube.com/browse_ajax?ctoken=4qmFsgJwEhhVQ1lVay1UWDd2UXByWER0ZW0ydjZOdEEaVEVnWjJhV1JsYjNNWUF5QUFNQUU0QWVvRExFTm9PRWxuVEdwMk0wMTVlakZqVWxoRmFFMUxRa1ZPVFZWVlNWTkRkMmxIY2t4UFNEQjBkalUxY0ZWQw%253D%253D&continuation=4qmFsgJwEhhVQ1lVay1UWDd2UXByWER0ZW0ydjZOdEEaVEVnWjJhV1JsYjNNWUF5QUFNQUU0QWVvRExFTm9PRWxuVEdwMk0wMTVlakZqVWxoRmFFMUxRa1ZPVFZWVlNWTkRkMmxIY2t4UFNEQjBkalUxY0ZWQw%253D%253D&itct=CAIQybcCIhMI4qHA2Imf6wIVTnebCh3Bxwr-'
  21. get_html(url)
  22. # get_page_data(response)
  23. # print(response.text)
  24. # write_file(response.json())
  25. if __name__ == '__main__':
  26. start_time = time.time()
  27. main()
  28. print("--- %s seconds ---" % (time.time() - start_time))