Integrate Quasarr CAPTCHA resolution method to Web Interface

rix1337 · Dec 21, 2024 · 79ae1ea · 79ae1ea
1 parent 784921d
commit 79ae1ea
Show file tree

Hide file tree

Showing 10 changed files with 429 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@ FeedCrawler automatisiert bequem das Hinzufügen von Links für den JDownloader.
 ### Voraussetzungen
 
 * [Python 3.8](https://www.python.org/downloads/) oder neuer (nur
-  4 [externe Abhängigkeiten](https://github.com/rix1337/FeedCrawler/blob/main/requirements.txt)!)
+  5 [externe Abhängigkeiten](https://github.com/rix1337/FeedCrawler/blob/main/requirements.txt)!)
 * [JDownloader 2](http://www.jdownloader.org/jdownloader2) mit
   aktivem [My JDownloader-Konto](https://my.jdownloader.org)
 * _optional: [FlareSolverr 3](https://github.com/FlareSolverr/FlareSolverr) um Cloudflare-Blockaden zu umgehen_

diff --git a/feedcrawler/external_tools/captcha.py b/feedcrawler/external_tools/captcha.py
@@ -0,0 +1,284 @@
+# -*- coding: utf-8 -*-
+# FeedCrawler
+# Project by https://github.com/rix1337
+
+import base64
+import json
+import random
+import re
+import xml.dom.minidom
+from urllib.parse import urlparse
+
+import dukpy
+import requests # todo replace with internal tool
+from Cryptodome.Cipher import AES
+from bs4 import BeautifulSoup
+
+
+class CNL:
+    def __init__(self, crypted_data):
+        self.crypted_data = crypted_data
+
+    def jk_eval(self, f_def):
+        js_code = f"""
+        {f_def}
+        f();
+        """
+
+        result = dukpy.evaljs(js_code).strip()
+
+        return result
+
+    def aes_decrypt(self, data, key):
+        try:
+            encrypted_data = base64.b64decode(data)
+        except Exception as e:
+            raise ValueError("Failed to decode base64 data") from e
+
+        try:
+            key_bytes = bytes.fromhex(key)
+        except Exception as e:
+            raise ValueError("Failed to convert key to bytes") from e
+
+        iv = key_bytes
+        cipher = AES.new(key_bytes, AES.MODE_CBC, iv)
+
+        try:
+            decrypted_data = cipher.decrypt(encrypted_data)
+        except ValueError as e:
+            raise ValueError("Decryption failed") from e
+
+        try:
+            return decrypted_data.decode('utf-8').replace('\x00', '').replace('\x08', '')
+        except UnicodeDecodeError as e:
+            raise ValueError("Failed to decode decrypted data") from e
+
+    def decrypt(self):
+        crypted = self.crypted_data[2]
+        jk = "function f(){ return \'" + self.crypted_data[1] + "';}"
+        key = self.jk_eval(jk)
+        uncrypted = self.aes_decrypt(crypted, key)
+        urls = [result for result in uncrypted.split("\r\n") if len(result) > 0]
+
+        return urls
+
+
+class DLC:
+    def __init__(self, dlc_file):
+        global user_agent
+        self.data = dlc_file
+        self.KEY = b"cb99b5cbc24db398"
+        self.IV = b"9bc24cb995cb8db3"
+        self.API_URL = "http://service.jdownloader.org/dlcrypt/service.php?srcType=dlc&destType=pylo&data="
+
+    def parse_packages(self, start_node):
+        return [
+            (
+                base64.b64decode(node.getAttribute("name")).decode("utf-8"),
+                self.parse_links(node)
+            )
+            for node in start_node.getElementsByTagName("package")
+        ]
+
+    def parse_links(self, start_node):
+        return [
+            base64.b64decode(node.getElementsByTagName("url")[0].firstChild.data).decode("utf-8")
+            for node in start_node.getElementsByTagName("file")
+        ]
+
+    def decrypt(self):
+        if not isinstance(self.data, bytes):
+            raise TypeError("data must be bytes.")
+
+        all_urls = []
+
+        try:
+            data = self.data.strip()
+
+            data += b"=" * (-len(data) % 4)
+
+            dlc_key = data[-88:].decode("utf-8")
+            dlc_data = base64.b64decode(data[:-88])
+            dlc_content = requests.get(self.API_URL + dlc_key).content.decode("utf-8")
+
+            rc = base64.b64decode(re.search(r"<rc>(.+)</rc>", dlc_content, re.S).group(1))[:16]
+
+            cipher = AES.new(self.KEY, AES.MODE_CBC, self.IV)
+            key = iv = cipher.decrypt(rc)
+
+            cipher = AES.new(key, AES.MODE_CBC, iv)
+            xml_data = base64.b64decode(cipher.decrypt(dlc_data)).decode("utf-8")
+
+            root = xml.dom.minidom.parseString(xml_data).documentElement
+            content_node = root.getElementsByTagName("content")[0]
+
+            packages = self.parse_packages(content_node)
+
+            for package in packages:
+                urls = package[1]
+                all_urls.extend(urls)
+
+        except Exception as e:
+            print("DLC Error: " + str(e))
+            return None
+
+        return all_urls
+
+
+def get_filecrypt_links(shared_state, token, title, url, password=None):
+    print("Attempting to decrypt Filecrypt link: " + url)
+    session = requests.Session()
+
+    password_field = None
+    if password:
+        try:
+            output = requests.get(url, headers={'User-Agent': shared_state.values["user_agent"]})
+            soup = BeautifulSoup(output.text, 'html.parser')
+            input_element = soup.find('input', placeholder=lambda value: value and 'password' in value.lower())
+            password_field = input_element['name']
+            print("Password field name identified: " + password_field)
+            url = output.url
+        except:
+            print("No password field found. Skipping password entry!")
+
+    if password and password_field:
+        print("Using Password: " + password)
+        output = session.post(url, data=password_field + "=" + password,
+                              headers={'User-Agent': shared_state.values["user_agent"],
+                                       'Content-Type': 'application/x-www-form-urlencoded'})
+    else:
+        output = session.get(url, headers={'User-Agent': shared_state.values["user_agent"]})
+
+    url = output.url
+    soup = BeautifulSoup(output.text, 'html.parser')
+    if bool(soup.findAll("input", {"id": "p4assw0rt"})):
+        print(f"Password was wrong or missing. Could not get links for {title}")
+        return False
+
+    no_captcha_present = bool(soup.find("form", {"class": "cnlform"}))
+    if no_captcha_present:
+        print("No CAPTCHA present. Skipping token!")
+    else:
+        circle_captcha = bool(soup.findAll("div", {"class": "circle_captcha"}))
+        i = 0
+        while circle_captcha and i < 3:
+            print("Sending Fake solution to skip Circle-CAPTCHA...")
+            random_x = str(random.randint(100, 200))
+            random_y = str(random.randint(100, 200))
+            output = session.post(url, data="buttonx.x=" + random_x + "&buttonx.y=" + random_y,
+                                  headers={'User-Agent': shared_state.values["user_agent"],
+                                           'Content-Type': 'application/x-www-form-urlencoded'})
+            url = output.url
+            soup = BeautifulSoup(output.text, 'html.parser')
+            circle_captcha = bool(soup.findAll("div", {"class": "circle_captcha"}))
+
+        output = session.post(url, data="cap_token=" + token, headers={'User-Agent': shared_state.values["user_agent"],
+                                                                       'Content-Type': 'application/x-www-form-urlencoded'})
+    url = output.url
+    soup = BeautifulSoup(output.text, 'html.parser')
+
+    solved = bool(soup.findAll("div", {"class": "container"}))
+    if not solved:
+        print(f"Filecrypt did did not accept the token! Could not get links for {title}")
+        return False
+    else:
+        season_number = ""
+        episode_number = ""
+        episode_in_title = re.findall(r'.*\.s(\d{1,3})e(\d{1,3})\..*', title, re.IGNORECASE)
+        season_in_title = re.findall(r'.*\.s(\d{1,3})\..*', title, re.IGNORECASE)
+        if episode_in_title:
+            try:
+                season_number = str(int(episode_in_title[0][0]))
+                episode_number = str(int(episode_in_title[0][1]))
+            except:
+                pass
+        elif season_in_title:
+            try:
+                season_number = str(int(season_in_title[0]))
+            except:
+                pass
+
+        season = ""
+        episode = ""
+        tv_show_selector = soup.find("div", {"class": "dlpart"})
+        if tv_show_selector:
+
+            season = "season="
+            episode = "episode="
+
+            season_selection = soup.find("div", {"id": "selbox_season"})
+            try:
+                if season_selection:
+                    season += str(season_number)
+            except:
+                pass
+
+            episode_selection = soup.find("div", {"id": "selbox_episode"})
+            try:
+                if episode_selection:
+                    episode += str(episode_number)
+            except:
+                pass
+
+        links = []
+
+        mirrors = []
+        mirrors_available = soup.select("a[href*=mirror]")
+        if mirrors_available:
+            for mirror in mirrors_available:
+                try:
+                    mirror_query = mirror.get("href").split("?")[1]
+                    base_url = url.split("?")[0] if "mirror" in url else url
+                    mirrors.append(f"{base_url}?{mirror_query}")
+                except IndexError:
+                    continue
+        else:
+            mirrors = [url]
+
+        for mirror in mirrors:
+            if not len(mirrors) == 1:
+                output = session.get(mirror, headers={'User-Agent': shared_state.values["user_agent"]})
+                url = output.url
+                soup = BeautifulSoup(output.text, 'html.parser')
+
+            try:
+                crypted_payload = soup.find("form", {"class": "cnlform"}).get('onsubmit')
+                crypted_data = re.findall(r"'(.*?)'", crypted_payload)
+                if not title:
+                    title = crypted_data[3]
+                crypted_data = [
+                    crypted_data[0],
+                    crypted_data[1],
+                    crypted_data[2],
+                    title
+                ]
+                if episode and season:
+                    domain = urlparse(url).netloc
+                    filtered_cnl_secret = soup.find("input", {"name": "hidden_cnl_id"}).attrs["value"]
+                    filtered_cnl_link = f"https://{domain}/_CNL/{filtered_cnl_secret}.html?{season}&{episode}"
+                    filtered_cnl_result = session.post(filtered_cnl_link,
+                                                       headers={'User-Agent': shared_state.values["user_agent"]})
+                    if filtered_cnl_result.status_code == 200:
+                        filtered_cnl_data = json.loads(filtered_cnl_result.text)
+                        if filtered_cnl_data["success"]:
+                            crypted_data = [
+                                crypted_data[0],
+                                filtered_cnl_data["data"][0],
+                                filtered_cnl_data["data"][1],
+                                title
+                            ]
+                links.extend(CNL(crypted_data).decrypt())
+            except:
+                print("Click'n'Load not found! Falling back to DLC...")
+                crypted_payload = soup.find("button", {"class": "dlcdownload"}).get("onclick")
+                crypted_data = re.findall(r"'(.*?)'", crypted_payload)
+                dlc_secret = crypted_data[0]
+                domain = urlparse(url).netloc
+                if episode and season:
+                    dlc_link = f"https://{domain}/DLC/{dlc_secret}.dlc?{episode}&{season}"
+                else:
+                    dlc_link = f"https://{domain}/DLC/{dlc_secret}.dlc"
+                dlc_file = session.get(dlc_link, headers={'User-Agent': shared_state.values["user_agent"]}).content
+                links.extend(DLC(dlc_file).decrypt())
+
+    return links
diff --git a/feedcrawler/web_interface/serve/api.py b/feedcrawler/web_interface/serve/api.py
@@ -22,7 +22,7 @@
 
 from Cryptodome.Protocol.KDF import scrypt
 from Cryptodome.Random import get_random_bytes
-from bottle import Bottle, request, HTTPError, static_file, redirect, abort
+from bottle import Bottle, request, response, HTTPError, static_file, redirect, abort
 from bs4 import BeautifulSoup
 
 import feedcrawler.external_sites
@@ -159,7 +159,8 @@ def redirect_without_trailing_slash():
             "/sponsors_helper/api/to_decrypt/",
             "/sponsors_helper/api/to_decrypt_disable/",
             "/sponsors_helper/replace_decrypt/",
-            "/sponsors_helper/to_download/"
+            "/sponsors_helper/to_download/",
+            "/captcha/"
         ]
         if not request.path.endswith('/') and not any(s in request.path for s in no_trailing_slash):
             raise redirect(request.url + '/')
@@ -1623,4 +1624,63 @@ def to_download():
             abort(400, f"Download attempt failed for payload: {payload}")
         return abort(400, "Request failed for unknown reason")
 
+    obfuscated_captcha_url = decode_base64('aHR0cHM6Ly92Mi5jdXRjYXB0Y2hhLm5ldA==')
+    import requests
+
+    @app.post(prefix + '/captcha/<captcha_id>.html')
+    def proxy(captcha_id):
+        target_url = f"{obfuscated_captcha_url}/captcha/{captcha_id}.html"
+
+        headers = {key: value for key, value in request.headers.items() if key != 'Host'}
+        data = request.body.read()
+        resp = requests.post(target_url, headers=headers, data=data)
+
+        response.content_type = resp.headers.get('Content-Type')
+
+        content = resp.text
+        content = re.sub(r'<script src="/(.*?)"></script>',
+                         f'<script src="{obfuscated_captcha_url}/\\1"></script>', content)
+        response.content_type = 'text/html'
+        return content
+
+    @app.post(prefix + '/captcha/<captcha_id>.json')
+    def specific_proxy(captcha_id):
+        target_url = f"{obfuscated_captcha_url}/captcha/{captcha_id}.json"
+
+        headers = {key: value for key, value in request.headers.items() if key != 'Host'}
+        data = request.body.read()
+        resp = requests.post(target_url, headers=headers, data=data)
+
+        response.content_type = resp.headers.get('Content-Type')
+        return resp.content
+
+    @app.get(prefix + '/captcha/<captcha_id>/<uuid>/<filename>')
+    def captcha_proxy(captcha_id, uuid, filename):
+        new_url = f"{obfuscated_captcha_url}/captcha/{captcha_id}/{uuid}/{filename}"
+
+        try:
+            external_response = requests.get(new_url, stream=True)
+            external_response.raise_for_status()
+            response.content_type = 'image/png'
+            response.headers['Content-Disposition'] = f'inline; filename="{filename}"'
+            return external_response.iter_content(chunk_size=8192)
+
+        except requests.RequestException as e:
+            response.status = 502
+            return f"Error fetching resource: {e}"
+
+    @app.post(prefix + '/captcha/<captcha_id>/check')
+    def captcha_check_proxy(captcha_id):
+        new_url = f"{obfuscated_captcha_url}/captcha/{captcha_id}/check"
+        headers = {key: value for key, value in request.headers.items()}
+
+        data = request.body.read()
+        resp = requests.post(new_url, headers=headers, data=data)
+
+        response.status = resp.status_code
+        for header in resp.headers:
+            if header.lower() not in ['content-encoding', 'transfer-encoding', 'content-length', 'connection']:
+                response.set_header(header, resp.headers[header])
+        return resp.content
+
     Server(app, listen='0.0.0.0', port=shared_state.values["port"]).serve_forever()