1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- import requests
- from bs4 import BeautifulSoup
- import sqlite3
- from progress.bar import IncrementalBar
- from tqdm import tqdm
- import asyncio
- import aiohttp
- Database = sqlite3.connect("Film_list.db")
- cur = Database.cursor()
- cur.execute("""CREATE TABLE IF NOT EXISTS Film_list (
- ID INTEGER PRIMARY KEY,
- NAME TEXT,
- YEAR TEXT,
- LINK TEXT)""")
- Database.commit()
- data = []
- async def get_page_data(url, number, session, bar):
- r = url + f"page/{number}/"
- async with session.get(url = r) as response:
- response_text = await response.text()
- soup = BeautifulSoup(response_text, "lxml")
- for i, j, k in zip(soup.find_all("div", class_="th-title"), soup.find_all("div", class_="th-year"),
- soup.find_all("div", class_="th-item")):
- name_year = i.text + ' (' + j.text + ')'
- link = k.find("a", class_="th-in with-mask").get('href')
- data.append([name_year, link])
- bar.next()
- async def get_page_info(url):
- async with aiohttp.ClientSession() as session:
- response = await session.get(url=url, timeout = 3000)
- soup = BeautifulSoup(await response.text(), "lxml")
- number_of_films = int(soup.find("div", {"class": "navigation"}).text.split()[-1])
- tasks = []
- bar = IncrementalBar('Download links ', max = number_of_films)
- for page in range(1, number_of_films+1):
- task = asyncio.create_task(get_page_data(url, page, session, bar))
- tasks.append(task)
- bar.finish()
- await asyncio.gather(*tasks)
- def main():
- url = f"https://hd-4.lordfilm-s.co/"
- asyncio.run(get_page_info(url))
- for name_year, link in tqdm(data):
- cur.execute("""INSERT INTO Film_list (NAME, LINK) VALUES(?, ?);""", (name_year, link))
- Database.commit()
- if __name__=="__main__":
- main()
|