asyn_FilmDB.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import sqlite3
  4. from progress.bar import IncrementalBar
  5. from tqdm import tqdm
  6. import asyncio
  7. import aiohttp
  8. Database = sqlite3.connect("Film_list.db")
  9. cur = Database.cursor()
  10. cur.execute("""CREATE TABLE IF NOT EXISTS Film_list (
  11. ID INTEGER PRIMARY KEY,
  12. NAME TEXT,
  13. YEAR TEXT,
  14. LINK TEXT)""")
  15. Database.commit()
  16. data = []
  17. async def get_page_data(url, number, session, bar):
  18. r = url + f"page/{number}/"
  19. async with session.get(url = r) as response:
  20. response_text = await response.text()
  21. soup = BeautifulSoup(response_text, "lxml")
  22. for i, j, k in zip(soup.find_all("div", class_="th-title"), soup.find_all("div", class_="th-year"),
  23. soup.find_all("div", class_="th-item")):
  24. name_year = i.text + ' (' + j.text + ')'
  25. link = k.find("a", class_="th-in with-mask").get('href')
  26. data.append([name_year, link])
  27. bar.next()
  28. async def get_page_info(url):
  29. async with aiohttp.ClientSession() as session:
  30. response = await session.get(url=url, timeout = 3000)
  31. soup = BeautifulSoup(await response.text(), "lxml")
  32. number_of_films = int(soup.find("div", {"class": "navigation"}).text.split()[-1])
  33. tasks = []
  34. bar = IncrementalBar('Download links ', max = number_of_films)
  35. for page in range(1, number_of_films+1):
  36. task = asyncio.create_task(get_page_data(url, page, session, bar))
  37. tasks.append(task)
  38. bar.finish()
  39. await asyncio.gather(*tasks)
  40. def main():
  41. url = f"https://hd-4.lordfilm-s.co/"
  42. asyncio.run(get_page_info(url))
  43. for name_year, link in tqdm(data):
  44. cur.execute("""INSERT INTO Film_list (NAME, LINK) VALUES(?, ?);""", (name_year, link))
  45. Database.commit()
  46. if __name__=="__main__":
  47. main()