manu

Burning series downloader

It is no longer working, due to changes on the site. :(

import platform
import time
import json
import os

import bs4
import click
import requests
import youtube_dl

from urllib.parse import urljoin

API_KEY = os.getenv('API_KEY')
SITE_KEY = os.getenv('SITE_KEY')

@click.command()
@click.option('-f', '--file', type=click.File('a'), help='file to dump links into')
@click.option('-y', is_flag=True, help='skip confirmation prompt')
def main(file, y):
    """Burning series command line downloder"""

    if not API_KEY or not SITE_KEY:
        click.echo('No API_KEY or SITE_KEY found')
        quit()

    series = get_series()
    seasons = get_seasons(series)
    episodes = get_episodes(seasons)

    if not y:
        confirm = 'Do you really want to download {0} season(s) with {1} episode(s)'
        click.confirm(confirm.format(len(seasons), sum(len(e) for e in episodes)), abort=True)

    for season in episodes:
        with click.progressbar(season, label='Decoding captchas') as bar:
            for episode in bar:
                if file:
                    file.write(decaptcha(episode) + '\n')
                else:
                    click.echo('\n')
                    with youtube_dl.YoutubeDL() as ydl:
                        ydl.download([decaptcha(episode)])
    
    if 'Windows' in platform.system():
        click.pause()

def get_series():
    """Get wanted series"""
    all_series = get_all_series()
    series = []

    while True:
        prompt = click.prompt('Enter series')
        wanted = {name:url for name, url in all_series.items() if prompt.lower() in name.lower()}

        if wanted:
            break

    # select by number
    for i, name in enumerate(wanted):
        click.echo('[{0}]: {1}'.format(i + 1, name))
        series.append(name)

    i = click.prompt('Select series', type=int) - 1 if len(wanted) > 1 else 0

    return wanted[series[i]]

def get_seasons(series):
    """Get selected series"""
    all_seasons = get_all_seasons(series)
    seasons = {}

    if len(all_seasons) > 1:
        sel = click.prompt('Select season(s)', default='{0}-{1}'.format([*all_seasons][0], [*all_seasons][-1]))

    # e.g. 1 2 3 or 1-3
    for s in sel.split():
        i = s.split('/')[-1]

        if '-' in s:
            start, stop = [int(i) for i in s.split('-')]

            for i in range(start, stop + 1):
                seasons[int(i)] = urljoin('https://bs.to/', all_seasons[int(i)])

        else:
            seasons[int(i)] = urljoin('https://bs.to/', all_seasons[int(i)])

    return list(seasons.values())

def get_episodes(seasons):
    """Get all season episodes as list"""
    return [get_all_episodes(s) for s in seasons]

def get_all_series():
    """Get all series as dictionary"""
    series = bsto('https://bs.to/andere-serien', '.genre li a')

    return {s.text:urljoin('https://bs.to/', s.get('href')) for s in series}

def get_all_seasons(series):
    """Get all seasons as dictionary"""
    seasons = bsto(series, '#seasons li a')

    return {int(s.get('href').split('/')[-1]):s.get('href') for s in seasons}

def get_all_episodes(season):
    """Get episodes of season as list"""
    rows = bsto(season, '.episodes tr')
    episodes = []

    with click.progressbar(rows, label='Collecting episodes') as bar:
        for i, e in enumerate(bar):
            hoster = e.select('td')[-1].select('a')

            hosts = []

            for h in hoster:
                link = bsto(urljoin('https://bs.to/', h.get('href')), 'a[href^="https://bs.to/out/"]')[0]
                hosts.append(link.get('href'))

            if hosts:
                episodes.append(hosts)

    return episodes

def decaptcha(urls):
    """Solve captcha"""

    def create_task(url):
        return requests.post("https://api.anti-captcha.com/createTask", data=json.dumps({
            "clientKey": API_KEY,
            "task": {
                "type": "NoCaptchaTaskProxyless",
                "websiteKey": SITE_KEY,
                "websiteURL": url
            }
        }))
    
    def get_task_result(task):
        return requests.post("https://api.anti-captcha.com/getTaskResult", data=json.dumps({
            "clientKey": API_KEY,
            "taskId": task.json()['taskId']
        }))

    def goto(url, result):
        return requests.get(url, params={
            'token': result['solution']['gRecaptchaResponse']
        })

    for url in urls:
        captcha = create_task(url)

        # average time to solve captcha
        time.sleep(10)

        # try up to 60 more secs
        for attempt in range(60):
            result = get_task_result(captcha).json()

            if 'status' in result:

                if 'processing' in result['status']:
                    time.sleep(1)
                    continue

                if 'ready' in result['status']:
                    return goto(url, result).url

            if 'errorCode' in result:
                click.echo('Error #{errorId}: {#errorCode}'.format(**result))
                click.echo('{errorDescription}'.format(**result))
                quit()

def bsto(url, sel):
    """Get a selector from url"""
    r = requests.get(url)
    soup = bs4.BeautifulSoup(r.text, "html.parser")
    
    return soup.select(sel)