misc/collect.py: better error messages
[web/firmware-selector-openwrt-org.git] / misc / collect.py
index b6af771540712fc3c57433712ea15cf4cf46ee2f..26b9876b8ee8abb805f0ca02b4cc331f085ea403 100755 (executable)
 #!/usr/bin/env python3
+"""
+Tool to create overview.json files and update the config.js.
+"""
 
 from pathlib import Path
+import urllib.request
+import tempfile
 import argparse
 import json
+import glob
 import sys
 import os
+import re
 
-parser = argparse.ArgumentParser()
-parser.add_argument("input_path", nargs="+",
-  help="Input folder that is traversed for OpenWrt JSON device files.")
-parser.add_argument("--download-url", action="store", default="",
-  help="Link to get the image from. May contain {target}, {version} and {commit}")
-parser.add_argument("--formatted", action="store_true",
-  help="Output formatted JSON data.")
-parser.add_argument("--change-prefix",
-  help="Change the openwrt- file name prefix.")
+SUPPORTED_METADATA_VERSION = 1
 
-args = parser.parse_args()
+assert sys.version_info >= (3, 5), "Python version too old. Python >=3.5.0 needed."
+
+
+# accepts {<file-path>: <file-content>}
+def merge_profiles(profiles, download_url):
+    # json output data
+    output = {}
+
+    def get_title(title):
+        if "title" in title:
+            return title["title"]
+        else:
+            return "{} {} {}".format(
+                title.get("vendor", ""), title["model"], title.get("variant", "")
+            ).strip()
+
+    def add_profile(path, id, target, profile, code=None):
+        images = []
+        for image in profile["images"]:
+            images.append({"name": image["name"], "type": image["type"]})
+
+        if target is None:
+            target = profile["target"]
+
+        for entry in profile["titles"]:
+            title = get_title(entry)
+
+            if len(title) == 0:
+                sys.stderr.write(
+                    "Empty title. Skip title for {} in {}\n".format(id, path)
+                )
+                continue
+
+            output["models"][title] = {"id": id, "target": target, "images": images}
+
+            if code is not None:
+                output["models"][title]["code"] = code
+
+    for path, content in profiles.items():
+        obj = json.loads(content)
+
+        if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
+            sys.stderr.write(
+                "{} has unsupported metadata version: {} => skip\n".format(
+                    path, obj["metadata_version"]
+                )
+            )
+            continue
+
+        code = obj.get("version_code", obj.get("version_commit"))
+
+        if "version_code" not in output:
+            output = {"version_code": code, "download_url": download_url, "models": {}}
+
+        # if we have mixed codes/commits, store in device object
+        if output["version_code"] == code:
+            code = None
+
+        try:
+            if "profiles" in obj:
+                for id in obj["profiles"]:
+                    add_profile(path, id, obj.get("target"), obj["profiles"][id], code)
+            else:
+                add_profile(path, obj["id"], obj["target"], obj, code)
+        except json.decoder.JSONDecodeError as e:
+            sys.stderr.write("Skip {}\n   {}\n".format(path, e))
+        except KeyError as e:
+            sys.stderr.write("Abort on {}\n   Missing key {}\n".format(path, e))
+            exit(1)
+
+    return output
+
+
+def update_config(config_path, versions):
+    content = ""
+    with open(str(config_path), "r") as file:
+        content = file.read()
+
+    content = re.sub("versions:[\\s]*{[^}]*}", "versions: {}".format(versions), content)
+    with open(str(config_path), "w+") as file:
+        file.write(content)
+
+
+"""
+Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
+Merge into overview.json files.
+Update config.json.
+"""
+
+
+def scrape(args):
+    url = args.domain
+    selector_path = args.selector
+    config_path = "{}/config.js".format(selector_path)
+    data_path = "{}/data".format(selector_path)
+    versions = {}
+
+    def handle_release(target):
+        profiles = {}
+        with urllib.request.urlopen("{}/?json".format(target)) as file:
+            array = json.loads(file.read().decode("utf-8"))
+            for profile in filter(lambda x: x.endswith("/profiles.json"), array):
+                with urllib.request.urlopen("{}/{}".format(target, profile)) as file:
+                    profiles["{}/{}".format(target, profile)] = file.read().decode(
+                        "utf-8"
+                    )
+        return profiles
+
+    if not os.path.isfile(config_path):
+        print("file not found: {}".format(config_path))
+        exit(1)
+
+    # fetch release URLs
+    with urllib.request.urlopen(url) as infile:
+        for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
+            if not path.startswith("/") and path.endswith("targets/"):
+                release = path.strip("/").split("/")[-2]
+                download_url = "{}/{}/{{target}}".format(url, path)
+
+                profiles = handle_release("{}/{}".format(url, path))
+                output = merge_profiles(profiles, download_url)
+                if len(output) > 0:
+                    os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
+                    # write overview.json
+                    with open(
+                        "{}/{}/overview.json".format(data_path, release), "w"
+                    ) as outfile:
+                        if args.formatted:
+                            json.dump(output, outfile, indent="  ", sort_keys=True)
+                        else:
+                            json.dump(output, outfile, sort_keys=True)
+
+                    versions[release.upper()] = "data/{}/overview.json".format(release)
+
+    update_config(config_path, versions)
+
+
+"""
+Scrape profiles.json using wget (slower but more generic).
+Merge into overview.json files.
+Update config.json.
+"""
+
+
+def scrape_wget(args):
+    url = args.domain
+    selector_path = args.selector
+    config_path = "{}/config.js".format(selector_path)
+    data_path = "{}/data".format(selector_path)
+    versions = {}
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # download all profiles.json files
+        os.system(
+            "wget -c -r -P {} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {}".format(
+                tmp_dir, url
+            )
+        )
+
+        # delete empty folders
+        os.system("find {}/* -type d -empty -delete".format(tmp_dir))
+
+        # create overview.json files
+        for path in glob.glob("{}/*/snapshots".format(tmp_dir)) + glob.glob(
+            "{}/*/releases/*".format(tmp_dir)
+        ):
+            release = os.path.basename(path)
+            base = path[len(tmp_dir) + 1 :]
+
+            profiles = {}
+            for ppath in Path(path).rglob("profiles.json"):
+                with open(str(ppath), "r") as file:
+                    profiles[ppath] = file.read()
+
+            if len(profiles) == 0:
+                continue
+
+            versions[release.upper()] = "data/{}/overview.json".format(release)
+
+            output = merge_profiles(
+                profiles, "https://{}/targets/{{target}}".format(base)
+            )
+            os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
+
+            # write overview.json
+            with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
+                if args.formatted:
+                    json.dump(output, outfile, indent="  ", sort_keys=True)
+                else:
+                    json.dump(output, outfile, sort_keys=True)
+
+        update_config(config_path, versions)
+
+
+"""
+Find and merge json files for a single release.
+"""
+
+
+def merge(args):
+    input_paths = args.input_path
+    # OpenWrt JSON device files
+    profiles = {}
+
+    def add_path(path):
+        with open(str(path), "r") as file:
+            profiles[path] = file.read()
+
+    for path in input_paths:
+        if os.path.isdir(path):
+            for filepath in Path(path).rglob("*.json"):
+                add_path(filepath)
+        else:
+            if not path.endswith(".json"):
+                sys.stderr.write("Folder does not exists: {}\n".format(path))
+                exit(1)
+            add_path(path)
+
+    output = merge_profiles(profiles, args.download_url)
+
+    if args.formatted:
+        json.dump(output, sys.stdout, indent="  ", sort_keys=True)
+    else:
+        json.dump(output, sys.stdout, sort_keys=True)
+
+
+"""
+Scan local directory for releases with profiles.json.
+Merge into overview.json files.
+Update config.json.
+"""
+
+
+def scan(args):
+    selector_path = args.selector
+    config_path = "{}/config.js".format(selector_path)
+    data_path = "{}/data".format(selector_path)
+    versions = {}
+
+    # create overview.json files
+    for path in glob.glob("{}/snapshots".format(args.directory)) + glob.glob(
+        "{}/releases/*".format(args.directory)
+    ):
+        release = os.path.basename(path)
+        base_dir = path[len(args.directory) + 1 :]
+
+        profiles = {}
+        for ppath in Path(path).rglob("profiles.json"):
+            with open(str(ppath), "r", encoding="utf-8") as file:
+                profiles[ppath] = file.read()
+
+        if len(profiles) == 0:
+            continue
+
+        versions[release.upper()] = "data/{}/overview.json".format(release)
+
+        output = merge_profiles(
+            profiles, "https://{}/{}/targets/{{target}}".format(args.domain, base_dir)
+        )
+        os.makedirs("{}/{}".format(data_path, release), exist_ok=True)
+
+        # write overview.json
+        with open("{}/{}/overview.json".format(data_path, release), "w") as outfile:
+            if args.formatted:
+                json.dump(output, outfile, indent="  ", sort_keys=True)
+            else:
+                json.dump(output, outfile, sort_keys=True)
+
+    update_config(config_path, versions)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--formatted", action="store_true", help="Output formatted JSON data."
+    )
+    subparsers = parser.add_subparsers(dest="action")
+    subparsers.required = True
+
+    parser_merge = subparsers.add_parser(
+        "merge",
+        help="Create a grid structure with horizontal and vertical connections.",
+    )
+    parser_merge.add_argument(
+        "input_path",
+        nargs="+",
+        help="Input folder that is traversed for OpenWrt JSON device files.",
+    )
+    parser_merge.add_argument(
+        "--download-url",
+        action="store",
+        default="",
+        help="Link to get the image from. May contain {target}, {version} and {commit}",
+    )
+
+    parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
+    parser_scrape.add_argument(
+        "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
+    )
+    parser_scrape.add_argument("selector", help="Path the config.js file is in.")
+    parser_scrape.add_argument(
+        "--use-wget", action="store_true", help="Use wget to scrape the site."
+    )
+
+    parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
+    parser_scan.add_argument(
+        "domain",
+        help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
+    )
+    parser_scan.add_argument("directory", help="Directory to scan for releases.")
+    parser_scan.add_argument("selector", help="Path the config.js file is in.")
+
+    args = parser.parse_args()
+
+    if args.action == "merge":
+        merge(args)
+
+    if args.action == "scan":
+        scan(args)
+
+    if args.action == "scrape":
+        if args.use_wget:
+            scrape_wget(args)
+        else:
+            scrape(args)
 
-SUPPORTED_METADATA_VERSION = 1
 
-def change_prefix(images, old_prefix, new_prefix):
-    for image in images:
-        if image["name"].startswith(old_prefix):
-            image["name"] = new_prefix + image["name"][len(old_prefix):]
-
-# OpenWrt JSON device files
-paths = []
-
-# json output data
-output = {}
-
-for path in args.input_path:
-  if os.path.isdir(path):
-    for file in Path(path).rglob("*.json"):
-      paths.append(file)
-  else:
-    if not path.endswith(".json"):
-      sys.stderr.write(f"Folder does not exists: {path}\n")
-      exit(1)
-    paths.append(path)
-
-def get_title_name(title):
-  if "title" in title:
-    return title["title"]
-  else:
-    return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
-
-def add_profile(id, target, profile, code=None):
-  images = []
-  for image in profile["images"]:
-      images.append({"name": image["name"], "type": image["type"]})
-
-  if target is None:
-    target = profile["target"]
-
-  if args.change_prefix:
-      change_prefix(images, "openwrt-", args.change_prefix)
-
-  for title in profile["titles"]:
-    name = get_title_name(title)
-
-    if len(name) == 0:
-      sys.stderr.write(f"Empty title. Skip title in {path}\n")
-      continue
-
-    output["models"][name] = {"id": id, "target": target, "images": images}
-
-    if code is not None:
-      output["models"][name]["code"] = code
-
-for path in paths:
-  with open(path, "r") as file:
-    obj = json.load(file)
-
-    if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
-      sys.stderr.write(f"{path} has unsupported metadata version: {obj["metadata_version"]} => skip\n")
-      continue
-
-    code = obj.get("version_code", obj.get("version_commit"))
-
-    if not "version_code" in output:
-      output = {
-        "version_code": code,
-        "download_url": args.download_url,
-        "models" : {}
-      }
-
-    # if we have mixed codes/commits, store in device object
-    if output["version_code"] == code:
-      code = None;
-
-    try:
-      if "profiles" in obj:
-        for id in obj["profiles"]:
-          add_profile(id, obj.get("target"), obj["profiles"][id], code)
-      else:
-        add_profile(obj["id"], obj["target"], obj, code)
-    except json.decoder.JSONDecodeError as e:
-      sys.stderr.write(f"Skip {path}\n   {e}\n")
-    except KeyError as e:
-      sys.stderr.write(f"Abort on {path}\n   Missing key {e}\n")
-      exit(1)
-
-if args.formatted:
-  json.dump(output, sys.stdout, indent="  ", sort_keys=True)
-else:
-  json.dump(output, sys.stdout, sort_keys=True)
+if __name__ == "__main__":
+    main()