d5983a0e5f7a9aa7f47d4534e9610ba0cf92e3dc
[web/firmware-selector-openwrt-org.git] / misc / collect.py
1 #!/usr/bin/env python3
2 """
3 Tool to create overview.json files and update the config.js.
4 """
5
6 from pathlib import Path
7 import urllib.request
8 import tempfile
9 import argparse
10 import json
11 import glob
12 import sys
13 import os
14 import re
15
16 SUPPORTED_METADATA_VERSION = 1
17
18
19 # accepts {<file-path>: <file-content>}
20 def merge_profiles(profiles, download_url):
21 # json output data
22 output = {}
23
24 def get_title(title):
25 if "title" in title:
26 return title["title"]
27 else:
28 return "{} {} {}".format(
29 title.get("vendor", ""), title["model"], title.get("variant", "")
30 ).strip()
31
32 def add_profile(id, target, profile, code=None):
33 images = []
34 for image in profile["images"]:
35 images.append({"name": image["name"], "type": image["type"]})
36
37 if target is None:
38 target = profile["target"]
39
40 for entry in profile["titles"]:
41 title = get_title(entry)
42
43 if len(title) == 0:
44 sys.stderr.write(f"Empty title. Skip title in {path}\n")
45 continue
46
47 output["models"][title] = {"id": id, "target": target, "images": images}
48
49 if code is not None:
50 output["models"][title]["code"] = code
51
52 for path, content in profiles.items():
53 obj = json.loads(content)
54
55 if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
56 sys.stderr.write(
57 f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n"
58 )
59 continue
60
61 code = obj.get("version_code", obj.get("version_commit"))
62
63 if "version_code" not in output:
64 output = {"version_code": code, "download_url": download_url, "models": {}}
65
66 # if we have mixed codes/commits, store in device object
67 if output["version_code"] == code:
68 code = None
69
70 try:
71 if "profiles" in obj:
72 for id in obj["profiles"]:
73 add_profile(id, obj.get("target"), obj["profiles"][id], code)
74 else:
75 add_profile(obj["id"], obj["target"], obj, code)
76 except json.decoder.JSONDecodeError as e:
77 sys.stderr.write(f"Skip {path}\n {e}\n")
78 except KeyError as e:
79 sys.stderr.write(f"Abort on {path}\n Missing key {e}\n")
80 exit(1)
81
82 return output
83
84
85 def update_config(config_path, versions):
86 content = ""
87 with open(config_path, "r") as file:
88 content = file.read()
89
90 content = re.sub("versions:[\\s]*{[^}]*}", f"versions: {versions}", content)
91 with open(config_path, "w+") as file:
92 file.write(content)
93
94
95 """
96 Scrape profiles.json using links like https://downloads.openwrt.org/releases/19.07.3/targets/?json
97 Merge into overview.json files.
98 Update config.json.
99 """
100
101
102 def scrape(args):
103 url = args.domain
104 selector_path = args.selector
105 config_path = f"{selector_path}/config.js"
106 data_path = f"{selector_path}/data"
107 versions = {}
108
109 def handle_release(target):
110 profiles = {}
111 with urllib.request.urlopen(f"{target}/?json") as file:
112 array = json.loads(file.read().decode("utf-8"))
113 for profile in filter(lambda x: x.endswith("/profiles.json"), array):
114 with urllib.request.urlopen(f"{target}/{profile}") as file:
115 profiles[f"{target}/{profile}"] = file.read()
116 return profiles
117
118 if not os.path.isfile(config_path):
119 print(f"file not found: {config_path}")
120 exit(1)
121
122 # fetch release URLs
123 with urllib.request.urlopen(url) as infile:
124 for path in re.findall(r"href=[\"']?([^'\" >]+)", str(infile.read())):
125 if not path.startswith("/") and path.endswith("targets/"):
126 release = path.strip("/").split("/")[-2]
127 download_url = f"{url}/{path}/{{target}}"
128
129 profiles = handle_release(f"{url}/{path}")
130 output = merge_profiles(profiles, download_url)
131 if len(output) > 0:
132 os.makedirs(f"{data_path}/{release}", exist_ok=True)
133 # write overview.json
134 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
135 if args.formatted:
136 json.dump(output, outfile, indent=" ", sort_keys=True)
137 else:
138 json.dump(output, outfile, sort_keys=True)
139
140 versions[release.upper()] = f"data/{release}/overview.json"
141
142 update_config(config_path, versions)
143
144
145 """
146 Scrape profiles.json using wget (slower but more generic).
147 Merge into overview.json files.
148 Update config.json.
149 """
150
151
152 def scrape_wget(args):
153 url = args.domain
154 selector_path = args.selector
155 config_path = f"{selector_path}/config.js"
156 data_path = f"{selector_path}/data"
157 versions = {}
158
159 with tempfile.TemporaryDirectory() as tmp_dir:
160 # download all profiles.json files
161 os.system(
162 f"wget -c -r -P {tmp_dir} -A 'profiles.json' --reject-regex 'kmods|packages' --no-parent {url}"
163 )
164
165 # delete empty folders
166 os.system(f"find {tmp_dir}/* -type d -empty -delete")
167
168 # create overview.json files
169 for path in glob.glob(f"{tmp_dir}/*/snapshots") + glob.glob(
170 f"{tmp_dir}/*/releases/*"
171 ):
172 release = os.path.basename(path)
173 base = path[len(tmp_dir) + 1 :]
174
175 profiles = {}
176 for ppath in Path(path).rglob("profiles.json"):
177 with open(ppath, "r") as file:
178 profiles[ppath] = file.read()
179
180 if len(profiles) == 0:
181 continue
182
183 versions[release.upper()] = f"data/{release}/overview.json"
184
185 output = merge_profiles(profiles, f"https://{base}/targets/{{target}}")
186 os.makedirs(f"{data_path}/{release}", exist_ok=True)
187
188 # write overview.json
189 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
190 if args.formatted:
191 json.dump(output, outfile, indent=" ", sort_keys=True)
192 else:
193 json.dump(output, outfile, sort_keys=True)
194
195 update_config(config_path, versions)
196
197
198 """
199 Find and merge json files for a single release.
200 """
201
202
203 def merge(args):
204 input_paths = args.input_path
205 # OpenWrt JSON device files
206 profiles = {}
207
208 def add_path(path):
209 with open(path, "r") as file:
210 profiles[path] = file.read()
211
212 for path in input_paths:
213 if os.path.isdir(path):
214 for filepath in Path(path).rglob("*.json"):
215 add_path(filepath)
216 else:
217 if not path.endswith(".json"):
218 sys.stderr.write(f"Folder does not exists: {path}\n")
219 exit(1)
220 add_path(path)
221
222 output = merge_profiles(profiles, args.download_url)
223
224 if args.formatted:
225 json.dump(output, sys.stdout, indent=" ", sort_keys=True)
226 else:
227 json.dump(output, sys.stdout, sort_keys=True)
228
229
230 """
231 Scan local directory for releases with profiles.json.
232 Merge into overview.json files.
233 Update config.json.
234 """
235
236
237 def scan(args):
238 selector_path = args.selector
239 config_path = f"{selector_path}/config.js"
240 data_path = f"{selector_path}/data"
241 versions = {}
242
243 # create overview.json files
244 for path in glob.glob(f"{args.directory}/snapshots") + glob.glob(
245 f"{args.directory}/releases/*"
246 ):
247 release = os.path.basename(path)
248 base_dir = path[len(args.directory) + 1 :]
249
250 profiles = {}
251 for ppath in Path(path).rglob("profiles.json"):
252 with open(ppath, "r") as file:
253 profiles[ppath] = file.read()
254
255 if len(profiles) == 0:
256 continue
257
258 versions[release.upper()] = f"data/{release}/overview.json"
259
260 output = merge_profiles(
261 profiles, f"https://{args.domain}/{base_dir}/targets/{{target}}"
262 )
263 os.makedirs(f"{data_path}/{release}", exist_ok=True)
264
265 # write overview.json
266 with open(f"{data_path}/{release}/overview.json", "w") as outfile:
267 if args.formatted:
268 json.dump(output, outfile, indent=" ", sort_keys=True)
269 else:
270 json.dump(output, outfile, sort_keys=True)
271
272 update_config(config_path, versions)
273
274
275 def main():
276 parser = argparse.ArgumentParser()
277 parser.add_argument(
278 "--formatted", action="store_true", help="Output formatted JSON data."
279 )
280 subparsers = parser.add_subparsers(dest="action", required=True)
281
282 parser_merge = subparsers.add_parser(
283 "merge",
284 help="Create a grid structure with horizontal and vertical connections.",
285 )
286 parser_merge.add_argument(
287 "input_path",
288 nargs="+",
289 help="Input folder that is traversed for OpenWrt JSON device files.",
290 )
291 parser_merge.add_argument(
292 "--download-url",
293 action="store",
294 default="",
295 help="Link to get the image from. May contain {target}, {version} and {commit}",
296 )
297
298 parser_scrape = subparsers.add_parser("scrape", help="Scrape webpage for releases.")
299 parser_scrape.add_argument(
300 "domain", help="Domain to scrape. E.g. https://downloads.openwrt.org"
301 )
302 parser_scrape.add_argument("selector", help="Path the config.js file is in.")
303 parser_scrape.add_argument(
304 "--use-wget", action="store_true", help="Use wget to scrape the site."
305 )
306
307 parser_scan = subparsers.add_parser("scan", help="Scan directory for releases.")
308 parser_scan.add_argument(
309 "domain",
310 help="Domain for download_url attribute in overview.json. E.g. https://downloads.openwrt.org",
311 )
312 parser_scan.add_argument("directory", help="Directory to scan for releases.")
313 parser_scan.add_argument("selector", help="Path the config.js file is in.")
314
315 args = parser.parse_args()
316
317 if args.action == "merge":
318 merge(args)
319
320 if args.action == "scan":
321 scan(args)
322
323 if args.action == "scrape":
324 if args.use_wget:
325 scrape_wget(args)
326 else:
327 scrape(args)
328
329
330 if __name__ == "__main__":
331 main()