123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- #!/usr/bin/python3
- import atomparser
- import logging, os, requests, time
- from argparse import ArgumentParser
- from base64 import urlsafe_b64encode
- parser = ArgumentParser()
- parser.add_argument('-u', '--url', help='URL to Atom feed to rip')
- parser.add_argument('-i', '--interval', default=5, help='seconds to wait between each fetch')
- parser.add_argument('-p', '--path', default='data', help='directory to put stuff in (will be created)')
- parser.add_argument('-d', '--debug', action='store_true', help='print debug logging output to console')
- args = parser.parse_args()
- if args.debug:
- logging.basicConfig(level=logging.DEBUG)
- log = logging.getLogger()
- if not os.path.isdir(args.path):
- os.mkdir(args.path)
- url = args.url
- while True:
- filename = urlsafe_b64encode(url.encode()).decode('utf-8')
- filepath = os.path.join(args.path, filename)
- retrieved = False
- if not os.path.isfile(filepath):
- print('downloading to {}'.format(filepath))
- log.debug('downloading to {}'.format(filepath))
- with open(filepath, 'wb') as xml:
- xml.write(requests.get(url).content)
- retrieved = True
- feed = atomparser.AtomFeed(filepath)
- try:
- url = feed.next.get('href')
- except AttributeError:
- log.debug('no more "next" in feed XML')
- break
- if not url:
- log.debug('url empty despite no exception thrown')
- break
- if retrieved:
- log.debug('sleeping {}s before next iteration')
- time.sleep(args.interval)
|