eede798169bde8a2deb1af4af5ec83f985bcc455
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2
3 from pathlib import Path
4 import urllib.request
5 import tempfile
6 import argparse
7 import json
8 import glob
9 import sys
10 import os
11 import re
12
13 '''
14 Tool to create overview.json files and update the config.js.
15 '''
16
17 parser = argparse.ArgumentParser()
18 parser.add_argument("--formatted", action="store_true",
19 help="Output formatted JSON data.")
20 subparsers = parser.add_subparsers(dest="action", required=True)
21
22 parser_merge = subparsers.add_parser("merge",
23 help="Create a grid structure with horizontal and vertical connections.")
24 parser_merge.add_argument("input_path", nargs="+",
25 help="Input folder that is traversed for OpenWrt JSON device files.")
26 parser_merge.add_argument("--download-url", action="store", default="",
27 help="Link to get the image from. May contain {target}, {version} and {commit}")
28 #parser_merge.add_argument("--change-prefix",
29 # help="Change the openwrt- file name prefix.")
30
31 parser_scrape = subparsers.add_parser("scrape",
32 help="Create a grid structure of horizontal, vertical and vertical connections.")
33 parser_scrape.add_argument("domain",
34 help="Domain to scrape. E.g. https://downloads.openwrt.org")
35 parser_scrape.add_argument("selector",
36 help="Path the config.js file is in.")
37 parser_scrape.add_argument("--use-wget", action="store_true",
38 help="Use wget to scrape the site.")
39
40 args = parser.parse_args()
41
42 SUPPORTED_METADATA_VERSION = 1
43
44 # accepts {<file-path>: <file-content>}
45 def merge_profiles(profiles, download_url):
46 # json output data
47 output = {}
48
49 def get_title_name(title):
50 if "title" in title:
51 return title["title"]
52 else:
53 return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
54
55 def add_profile(id, target, profile, code=None):
56 images = []
57 for image in profile["images"]:
58 images.append({"name": image["name"], "type": image["type"]})
59
60 if target is None:
61 target = profile["target"]
62
63 #if args.change_prefix:
64 # change_prefix(images, "openwrt-", args.change_prefix)
65
66 for title in profile["titles"]:
67 name = get_title_name(title)
68
69 if len(name) == 0:
70 sys.stderr.write(f"Empty title. Skip title in {path}\n")
71 continue
72
73 output["models"][name] = {"id": id, "target": target, "images": images}
74
75 if code is not None:
76 output["models"][name]["code"] = code
77
78 for path, content in profiles.items():
79 obj = json.loads(content)
80
81 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
82 sys.stderr.write(f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n")
83 continue
84
85 code = obj.get("version_code", obj.get("version_commit"))
86
87 if not "version_code" in output:
88 output = {
89 "version_code": code,
90 "download_url": download_url,
91 "models" : {}
92 }
93
94 # if we have mixed codes/commits, store in device object
95 if output["version_code"] == code:
96 code = None;
97
98 try:
99 if "profiles" in obj:
100 for id in obj["profiles"]:
101 add_profile(id, obj.get("target"), obj["profiles"][id], code)
102 else:
103 add_profile(obj["id"], obj["target"], obj, code)
104 except json.decoder.JSONDecodeError as e:
105 sys.stderr.write(f"Skip {path}\n {e}\n")
106 except KeyError as e:
107 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
108 exit(1)
109
110 return output
111
112 def update_config(config_path, versions):
113 content = ""
114 with open(config_path, "r") as file:
115 content = file.read()
116
117 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}" , content)
118 with open(config_path, "w+") as file:
119 # save updated config
120 file.write(content)
121
122 # use faster ?json feature of downloads.openwrt.org
123 def scrape(url, selector_path):
124 config_path = f"{selector_path}/config.js"
125 data_path = f"{selector_path}/data"
126 versions = {}
127
128 def handle_release(target):
129 profiles = {}
130 with urllib.request.urlopen(f"{target}/?json") as file:
131 array = json.loads(file.read().decode("utf-8"))
132 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
133 #print(profile)
134 with urllib.request.urlopen(f"{target}/{profile}") as file:
135 profiles[f"{target}/{profile}"] = file.read()
136 return profiles
137
138 if not os.path.isfile(config_path):
139 print(f"file not found: {config_path}")
140 exit(1)
141
142 # fetch release URLs
143 with urllib.request.urlopen(url) as infile:
144 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
145 if not path.startswith("/") and path.endswith("targets/"):
146 release = path.strip("/").split("/")[-2]
147 download_url = f"{url}/{path}/{{target}}"
148
149 profiles = handle_release(f"{url}/{path}")
150 output = merge_profiles(profiles, download_url)
151 if len(output) > 0:
152 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
153 # write overview.json
154 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
155 if args.formatted:
156 json.dump(output, outfile, indent=" ", sort_keys=True)
157 else:
158 json.dump(output, outfile, sort_keys=True)
159
160 versions[release.upper()] = f"data/{release}/overview.json"
161
162 update_config(config_path, versions)
163
164 # use wget (slower but generic)
165 def scrape_wget(url, selector_path):
166 config_path = f"{selector_path}/config.js"
167 data_path = f"{selector_path}/data"
168 versions = {}
169
170 with tempfile.TemporaryDirectory() as tmp_dir:
171 #tmp_dir = "/tmp/foo"
172 # download all profiles.json files
173 os.system(f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}")
174
175 # delete empty folders
176 os.system(f"find {tmp_dir}/* -type d -empty -delete")
177
178 # create overview.json files
179 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(f"{tmp_dir}/*/releases/*"):
180 release = os.path.basename(path)
181 base = path[len(tmp_dir)+1:]
182
183 versions[release.upper()] = f"data/{release}/overview.json"
184 os.system(f"mkdir -p {selector_path}/data/{release}/")
185
186 #print(f'path: {path}, base: {base}')
187 profiles = {}
188 for ppath in Path(path).rglob('profiles.json'):
189 with open(ppath, "r") as file:
190 profiles[ppath] = file.read()
191
192 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
193 Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
194
195 # write overview.json
196 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
197 if args.formatted:
198 json.dump(output, outfile, indent=" ", sort_keys=True)
199 else:
200 json.dump(output, outfile, sort_keys=True)
201
202 update_config(config_path, versions)
203
204 '''
205 def change_prefix(images, old_prefix, new_prefix):
206 for image in images:
207 if image["name"].startswith(old_prefix):
208 image["name"] = new_prefix + image["name"][len(old_prefix):]
209 '''
210
211 def merge(input_paths):
212 # OpenWrt JSON device files
213 profiles = {}
214
215 def add_path(path):
216 #paths.append(path)
217 with open(path, "r") as file:
218 profiles[path] = file.read()
219
220 for path in input_paths:
221 if os.path.isdir(path):
222 for filepath in Path(path).rglob("*.json"):
223 add_path(filepath)
224 else:
225 if not path.endswith(".json"):
226 sys.stderr.write(f"Folder does not exists: {path}\n")
227 exit(1)
228 add_path(path)
229
230 output = merge_profiles(profiles, args.download_url)
231
232 if args.formatted:
233 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
234 else:
235 json.dump(output, sys.stdout, sort_keys=True)
236
237 if args.action == "merge":
238 merge(args.input_path)
239
240 if args.action == "scrape":
241 if args.use_wget:
242 scrape_wget(args.domain, args.selector)
243 else:
244 scrape(args.domain, args.selector)