scrape.py 835 B

1234567891011121314151617181920212223242526272829303132333435363738
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import csv
  4. source = requests.get('http://coreyms.com').text
  5. soup = BeautifulSoup(source, 'lxml')
  6. csv_file = open('cms_scrape.csv', 'w')
  7. csv_writer = csv.writer(csv_file)
  8. csv_writer.writerow(['headline', 'summary', 'video_link'])
  9. for article in soup.find_all('article'):
  10. headline = article.h2.a.text
  11. print(headline)
  12. summary = article.find('div', class_='entry-content').p.text
  13. print(summary)
  14. try:
  15. vid_src = article.find('iframe', class_='youtube-player')['src']
  16. vid_id = vid_src.split('/')[4]
  17. vid_id = vid_id.split('?')[0]
  18. yt_link = f'https://youtube.com/watch?v={vid_id}'
  19. except Exception as e:
  20. yt_link = None
  21. print(yt_link)
  22. print()
  23. csv_writer.writerow([headline, summary, yt_link])
  24. csv_file.close()