alyssa
/
dl


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
							"""
dl.py - Mythical downloader

Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""


import subprocess
import urllib.parse
import sys

# Don't actually download, just classify
dry_run = True

def read_all(file):
    with open(file, "r") as f:
        return f.read()

# Known sites. Format: hashmap, where the key is the domain, and the value is
# the list of adaptors. An adaptor is a list, where the first element is the
# names, and the other elements, if any, are parameters to that adaptor.
# TODO: Where to source these site lists?

SITES = {
        "discordapp.com": [["libpurple", "purple-discord"]],
        "discord.gg": [["libpurple", "purple-discord"]]
}

def domains_from_file(file, adaptor):
    # All of these domains are for the site in questions
    fanficfare_sites = read_all(file).split("\n")
    for site in fanficfare_sites:
        SITES[site] = [[adaptor]]

domains_from_file("fanficfare_sites.txt", "fanficfare")
domains_from_file("ytdl_sites.txt", "youtube-dl")
domains_from_file("drm_sites.txt", "drm")

# Functions to download a given URL with a given adaptor

def ytdl(url):
    # TODO
    subprocess.run(["mpv", "--vo=x11", url])

def fanficfare(url):
    # TODO
    subprocess.run(["fanficfare", url])

def libpurple(url):
    # TODO
    print("libpurple'ing " + url)

def drm(url):
    print("This site requires the use of Digital Restrictions Mangement.")
    print("To learn more, see https://defectivebydesign.org/")
    sys.exit(1)

ADAPTORS = {
        "youtube-dl": ytdl,
        "fanficfare": fanficfare,
        "libpurple": libpurple,
        "drm": drm
}

# Fast track certain extensions
AV = ["ogg", "ogv", "webm", "mp3", "mp4", "mkv"]

# If it's not a known site, we can just test individual downloaders over subprocess

def test_url_sub(args):
    proc = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return proc.returncode == 0

# Parsing a URL

def url_parts(url):
    return urllib.parse.urlparse(url if "://" in url else "https://" + url)

def domain_from_url(parts):
    domain = parts.netloc

    # Remove subdomain
    if domain.startswith("www."):
        domain = domain[len("www."):]

    return domain

# Find adaptors for a URL, first by consulting our dictionary, then by
# bruteforce

def find_adaptors(parts, allow_bruteforce=True):
    domain = domain_from_url(parts)
    ext = parts.path.split(".")[-1]

    # These "crude" techniques merely check the hashes
    if domain in SITES:
        return SITES[domain]

    # These run the full regexs stack but is slow and sometimes I/O bound

    if allow_bruteforce or ext in AV:
        if test_url_sub(["youtube-dl", "--simulate", url]):
            return [["youtube-dl"]]

    return []

# Actualise download

def download_with_adaptor(url, adaptor):
    fn = ADAPTORS[adaptor[0]]
    args = [url] + adaptors[1:]

    if fn is not None:
        fn(*args)
    else:
        print("Unknwon adaptor " + adaptor[0])

# Simple test with the cmdline

url = sys.argv[1]
parts = url_parts(url)
adaptors = find_adaptors(parts, allow_bruteforce=True)
print(adaptors)

if len(adaptors) > 1:
    print("Warning: ambiguous, using first:")
    print(adaptors)
elif len(adaptors) == 0:
    print("No adaptors found")
    sys.exit(1)

if not dry_run:
    download_with_adaptor(url, adaptors[0])