12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- from bs4 import BeautifulSoup
- import time
- import requests
- from random import choice
- def get_proxy():
- html = requests.get('https://free-proxy-list.net/').text
- soup = BeautifulSoup(html, 'lxml')
- # trs = soup.find('tbody').find_all('tr')[1:11]
- trs = soup.find('tbody').find_all('tr')
- proxies = []
- for tr in trs:
- tds = tr.find_all('td')
- ip = tds[0].text.strip()
- port = tds[1].text.strip()
- schema = "https" if 'yes' in tds[6].text.strip() else 'http'
- data = {"schema": schema, "address": ip+":"+port}
- proxies.append(data)
- return choice(proxies)
- def time_watcher():
- print('--- %s seconds ---' % (time.time() - start_time))
- def get_html(url):
- # proxies = {'https': 'ipaddress:5000'}
- p = get_proxy() # {"schema": '', "address": ""}
- proxy = {p['schema']: p['address']}
- response = requests.get(url, proxies=proxy, timeout=5)
- return response.json()['origin']
- def main():
- url = 'http://httpbin.org/ip'
- result = get_html(url)
- print(result)
- if __name__ == '__main__':
- start_time = time.time()
- main()
- time_watcher()
|