ar71xx: cap324: Use standard eth as static lan
[openwrt/openwrt.git] / scripts / dl_github_archive.py
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
4 #
5 # This is free software, licensed under the GNU General Public License v2.
6 # See /LICENSE for more information.
7
8 import argparse
9 import calendar
10 import datetime
11 import errno
12 import fcntl
13 import hashlib
14 import json
15 import os
16 import os.path
17 import re
18 import shutil
19 import ssl
20 import subprocess
21 import sys
22 import time
23 import urllib2
24
25 TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
26 TMPDIR_DL = os.path.join(TMPDIR, 'dl')
27
28
29 class PathException(Exception): pass
30 class DownloadGitHubError(Exception): pass
31
32
33 class Path(object):
34 """Context class for preparing and cleaning up directories.
35
36 If ```preclean` is ``False``, ``path`` will NOT be removed on context enter
37
38 If ``path`` ``isdir``, then it will be created on context enter.
39
40 If ``keep`` is True, then ``path`` will NOT be removed on context exit
41 """
42
43 def __init__(self, path, isdir=True, preclean=False, keep=False):
44 self.path = path
45 self.isdir = isdir
46 self.preclean = preclean
47 self.keep = keep
48
49 def __enter__(self):
50 if self.preclean:
51 self.rm_all(self.path)
52 if self.isdir:
53 self.mkdir_all(self.path)
54 return self
55
56 def __exit__(self, exc_type, exc_value, traceback):
57 if not self.keep:
58 self.rm_all(self.path)
59
60 @staticmethod
61 def mkdir_all(path):
62 """Same as mkdir -p."""
63 names = os.path.split(path)
64 p = ''
65 for name in names:
66 p = os.path.join(p, name)
67 Path._mkdir(p)
68
69 @staticmethod
70 def _rmdir_dir(dir_):
71 names = Path._listdir(dir_)
72 for name in names:
73 p = os.path.join(dir_, name)
74 Path.rm_all(p)
75 Path._rmdir(dir_)
76
77 @staticmethod
78 def _mkdir(path):
79 Path._os_func(os.mkdir, path, errno.EEXIST)
80
81 @staticmethod
82 def _rmdir(path):
83 Path._os_func(os.rmdir, path, errno.ENOENT)
84
85 @staticmethod
86 def _remove(path):
87 Path._os_func(os.remove, path, errno.ENOENT)
88
89 @staticmethod
90 def _listdir(path):
91 return Path._os_func(os.listdir, path, errno.ENOENT, default=[])
92
93 @staticmethod
94 def _os_func(func, path, errno, default=None):
95 """Call func(path) in an idempotent way.
96
97 On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
98 return ``default``, otherwise, re-raise
99 """
100 try:
101 return func(path)
102 except OSError as e:
103 if e.errno == errno:
104 return default
105 else:
106 raise
107
108 @staticmethod
109 def rm_all(path):
110 """Same as rm -r."""
111 if os.path.islink(path):
112 Path._remove(path)
113 elif os.path.isdir(path):
114 Path._rmdir_dir(path)
115 else:
116 Path._remove(path)
117
118 @staticmethod
119 def untar(path, into=None):
120 """Extract tarball at ``path`` into subdir ``into``.
121
122 return subdir name if and only if there exists one, otherwise raise PathException
123 """
124 args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
125 subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22))
126 dirs = os.listdir(into)
127 if len(dirs) == 1:
128 return dirs[0]
129 else:
130 raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs))
131
132 @staticmethod
133 def tar(path, subdir, into=None, ts=None):
134 """Pack ``path`` into tarball ``into``."""
135 # --sort=name requires a recent build of GNU tar
136 args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name']
137 args += ['-C', path, '-cf', into, subdir]
138 envs = os.environ.copy()
139 if ts is not None:
140 args.append('--mtime=@%d' % ts)
141 if into.endswith('.xz'):
142 envs['XZ_OPT'] = '-7e'
143 args.append('-J')
144 elif into.endswith('.bz2'):
145 args.append('-j')
146 elif into.endswith('.gz'):
147 args.append('-z')
148 envs['GZIP'] = '-n'
149 else:
150 raise PathException('unknown compression type %s' % into)
151 subprocess.check_call(args, env=envs)
152
153
154 class GitHubCommitTsCache(object):
155 __cachef = 'github.commit.ts.cache'
156 __cachen = 2048
157
158 def __init__(self):
159 Path.mkdir_all(TMPDIR_DL)
160 self.cachef = os.path.join(TMPDIR_DL, self.__cachef)
161 self.cache = {}
162
163 def get(self, k):
164 """Get timestamp with key ``k``."""
165 fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT)
166 with os.fdopen(fileno) as fin:
167 try:
168 fcntl.lockf(fileno, fcntl.LOCK_SH)
169 self._cache_init(fin)
170 if k in self.cache:
171 ts = self.cache[k][0]
172 return ts
173 finally:
174 fcntl.lockf(fileno, fcntl.LOCK_UN)
175 return None
176
177 def set(self, k, v):
178 """Update timestamp with ``k``."""
179 fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT)
180 with os.fdopen(fileno, 'wb+') as f:
181 try:
182 fcntl.lockf(fileno, fcntl.LOCK_EX)
183 self._cache_init(f)
184 self.cache[k] = (v, int(time.time()))
185 self._cache_flush(f)
186 finally:
187 fcntl.lockf(fileno, fcntl.LOCK_UN)
188
189 def _cache_init(self, fin):
190 for line in fin:
191 k, ts, updated = line.split()
192 ts = int(ts)
193 updated = int(updated)
194 self.cache[k] = (ts, updated)
195
196 def _cache_flush(self, fout):
197 cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1])
198 cache = cache[:self.__cachen]
199 self.cache = {}
200 os.ftruncate(fout.fileno(), 0)
201 fout.seek(0, os.SEEK_SET)
202 for k, ent in cache:
203 ts = ent[0]
204 updated = ent[1]
205 line = '{0} {1} {2}\n'.format(k, ts, updated)
206 fout.write(line)
207
208
209 class DownloadGitHubTarball(object):
210 """Download and repack archive tarabll from GitHub.
211
212 Compared with the method of packing after cloning the whole repo, this
213 method is more friendly to users with fragile internet connection.
214
215 However, there are limitations with this method
216
217 - GitHub imposes a 60 reqs/hour limit for unauthenticated API access.
218 This affects fetching commit date for reproducible tarballs. Download
219 through the archive link is not affected.
220
221 - GitHub archives do not contain source codes for submodules.
222
223 - GitHub archives seem to respect .gitattributes and ignore pathes with
224 export-ignore attributes.
225
226 For the first two issues, the method will fail loudly to allow fallback to
227 clone-then-pack method.
228
229 As for the 3rd issue, to make sure that this method only produces identical
230 tarballs as the fallback method, we require the expected hash value to be
231 supplied. That means the first tarball will need to be prepared by the
232 clone-then-pack method
233 """
234
235 __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
236
237 def __init__(self, args):
238 self.dl_dir = args.dl_dir
239 self.version = args.version
240 self.subdir = args.subdir
241 self.source = args.source
242 self.url = args.url
243 self._init_owner_repo()
244 self.xhash = args.hash
245 self._init_hasher()
246 self.commit_ts = None # lazy load commit timestamp
247 self.commit_ts_cache = GitHubCommitTsCache()
248 self.name = 'github-tarball'
249
250 def download(self):
251 """Download and repack GitHub archive tarball."""
252 self._init_commit_ts()
253 with Path(TMPDIR_DL, keep=True) as dir_dl:
254 # fetch tarball from GitHub
255 tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
256 with Path(tarball_path, isdir=False):
257 self._fetch(tarball_path)
258 # unpack
259 d = os.path.join(dir_dl.path, self.subdir + '.untar')
260 with Path(d, preclean=True) as dir_untar:
261 tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
262 dir0 = os.path.join(dir_untar.path, tarball_prefix)
263 dir1 = os.path.join(dir_untar.path, self.subdir)
264 # submodules check
265 if self._has_submodule(dir0):
266 raise self._error('Fetching submodules is not yet supported')
267 # rename subdir
268 os.rename(dir0, dir1)
269 # repack
270 into=os.path.join(TMPDIR_DL, self.source)
271 Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
272 try:
273 self._hash_check(into)
274 except Exception:
275 Path.rm_all(into)
276 raise
277 # move to target location
278 file1 = os.path.join(self.dl_dir, self.source)
279 if into != file1:
280 shutil.move(into, file1)
281
282 def _has_submodule(self, dir_):
283 m = os.path.join(dir_, '.gitmodules')
284 try:
285 st = os.stat(m)
286 return st.st_size > 0
287 except OSError as e:
288 return e.errno != errno.ENOENT
289
290 def _init_owner_repo(self):
291 m = self.__repo_url_regex.search(self.url)
292 if m is None:
293 raise self._error('Invalid github url: {}'.format(self.url))
294 owner = m.group('owner')
295 repo = m.group('repo')
296 if repo.endswith('.git'):
297 repo = repo[:-4]
298 self.owner = owner
299 self.repo = repo
300
301 def _init_hasher(self):
302 xhash = self.xhash
303 if len(xhash) == 64:
304 self.hasher = hashlib.sha256()
305 elif len(xhash) == 32:
306 self.hasher = hashlib.md5()
307 else:
308 raise self._error('Requires sha256sum for verification')
309 self.xhash = xhash
310
311 def _hash_check(self, f):
312 with open(f, 'rb') as fin:
313 while True:
314 d = fin.read(4096)
315 if not d:
316 break
317 self.hasher.update(d)
318 xhash = self.hasher.hexdigest()
319 if xhash != self.xhash:
320 raise self._error('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self.xhash, xhash))
321
322 def _init_commit_ts(self):
323 if self.commit_ts is not None:
324 return
325 # GitHub provides 2 APIs[1,2] for fetching commit data. API[1] is more
326 # terse while API[2] provides more verbose info such as commit diff
327 # etc. That's the main reason why API[1] is preferred: the response
328 # size is predictable.
329 #
330 # However, API[1] only accepts complete commit sha1sum as the parameter
331 # while API[2] is more liberal accepting also partial commit id and
332 # tags, etc.
333 #
334 # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit
335 # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit
336 apis = [
337 {
338 'url': self._make_repo_url_path('git', 'commits', self.version),
339 'attr_path': ('committer', 'date'),
340 }, {
341 'url': self._make_repo_url_path('commits', self.version),
342 'attr_path': ('commit', 'committer', 'date'),
343 },
344 ]
345 version_is_sha1sum = len(self.version) == 40
346 if not version_is_sha1sum:
347 apis.insert(0, apis.pop())
348 for api in apis:
349 url = api['url']
350 attr_path = api['attr_path']
351 try:
352 ct = self.commit_ts_cache.get(url)
353 if ct is not None:
354 self.commit_ts = ct
355 return
356 ct = self._init_commit_ts_remote_get(url, attr_path)
357 self.commit_ts = ct
358 self.commit_ts_cache.set(url, ct)
359 return
360 except Exception:
361 pass
362 raise self._error('Cannot fetch commit ts: {}'.format(url))
363
364 def _init_commit_ts_remote_get(self, url, attrpath):
365 resp = self._make_request(url)
366 data = resp.read()
367 date = json.loads(data)
368 for attr in attrpath:
369 date = date[attr]
370 date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
371 date = date.timetuple()
372 ct = calendar.timegm(date)
373 return ct
374
375 def _fetch(self, path):
376 """Fetch tarball of the specified version ref."""
377 ref = self.version
378 url = self._make_repo_url_path('tarball', ref)
379 resp = self._make_request(url)
380 with open(path, 'wb') as fout:
381 while True:
382 d = resp.read(4096)
383 if not d:
384 break
385 fout.write(d)
386
387 def _make_repo_url_path(self, *args):
388 url = '/repos/{0}/{1}'.format(self.owner, self.repo)
389 if args:
390 url += '/' + '/'.join(args)
391 return url
392
393 def _make_request(self, path):
394 """Request GitHub API endpoint on ``path``."""
395 url = 'https://api.github.com' + path
396 headers = {
397 'Accept': 'application/vnd.github.v3+json',
398 'User-Agent': 'OpenWrt',
399 }
400 req = urllib2.Request(url, headers=headers)
401 sslcontext = ssl._create_unverified_context()
402 fileobj = urllib2.urlopen(req, context=sslcontext)
403 return fileobj
404
405 def _error(self, msg):
406 return DownloadGitHubError('{}: {}'.format(self.source, msg))
407
408
409 def main():
410 parser = argparse.ArgumentParser()
411 parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
412 parser.add_argument('--url', help='Download URL')
413 parser.add_argument('--subdir', help='Source code subdir name')
414 parser.add_argument('--version', help='Source code version')
415 parser.add_argument('--source', help='Source tarball filename')
416 parser.add_argument('--hash', help='Source tarball\'s expected sha256sum')
417 args = parser.parse_args()
418 try:
419 method = DownloadGitHubTarball(args)
420 method.download()
421 except Exception as ex:
422 sys.stderr.write('{}: Download from {} failed\n'.format(args.source, args.url))
423 sys.stderr.write('{}\n'.format(ex))
424 sys.exit(1)
425
426 if __name__ == '__main__':
427 main()