utils.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import re
  2. import requests
  3. from bs4 import BeautifulSoup
  4. headers = {
  5. "Accept": "*/*",
  6. "Connection": "keep-alive",
  7. "Accept-Language": "zh-CN,zh;q=0.8",
  8. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
  9. }
  10. session = requests.Session()
  11. def get_source_requests(url, data=None, proxy=None, timeout=30):
  12. """
  13. Get the source by requests
  14. """
  15. proxies = {"http": proxy}
  16. if data:
  17. response = session.post(
  18. url, headers=headers, data=data, proxies=proxies, timeout=timeout
  19. )
  20. else:
  21. response = session.get(url, headers=headers, proxies=proxies, timeout=timeout)
  22. source = re.sub(
  23. r"<!--.*?-->",
  24. "",
  25. response.text,
  26. flags=re.DOTALL,
  27. )
  28. return source
  29. def get_soup_requests(url, data=None, proxy=None, timeout=30):
  30. """
  31. Get the soup by requests
  32. """
  33. source = get_source_requests(url, data, proxy, timeout)
  34. soup = BeautifulSoup(source, "html.parser")
  35. return soup
  36. def close_session():
  37. """
  38. Close the requests session
  39. """
  40. session.close()