build: download code from github using archive API
[openwrt/openwrt.git] / scripts / download.py
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
4 #
5 # This is free software, licensed under the GNU General Public License v2.
6 # See /LICENSE for more information.
7
8 import argparse
9 import calendar
10 import datetime
11 import errno
12 import fcntl
13 import json
14 import os
15 import os.path
16 import re
17 import shutil
18 import ssl
19 import subprocess
20 import sys
21 import time
22 import urllib2
23
24 TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
25 TMPDIR_DL = os.path.join(TMPDIR, 'dl')
26 DOWNLOAD_METHODS = []
27
28 class PathException(Exception): pass
29 class DownloadException(Exception): pass
30
31
32 class Path(object):
33 """Context class for preparing and cleaning up directories.
34
35 If ``path`` ``isdir``, then it will be created on context enter.
36
37 If ``keep`` is True, then ``path`` will NOT be removed on context exit
38 """
39
40 def __init__(self, path, isdir=True, keep=False):
41 self.path = path
42 self.isdir = isdir
43 self.keep = keep
44
45 def __enter__(self):
46 if self.isdir:
47 self.mkdir_all(self.path)
48 return self
49
50 def __exit__(self, exc_type, exc_value, traceback):
51 if not self.keep:
52 self.rm_all(self.path)
53
54 @staticmethod
55 def mkdir_all(path):
56 """Same as mkdir -p."""
57 names = os.path.split(path)
58 p = ''
59 for name in names:
60 p = os.path.join(p, name)
61 Path._mkdir(p)
62
63 @staticmethod
64 def _rmdir_all(dir_):
65 names = Path._listdir(dir_)
66 for name in names:
67 p = os.path.join(dir_, name)
68 if os.path.isdir(p):
69 Path._rmdir_all(p)
70 else:
71 Path._remove(p)
72 Path._rmdir(dir_)
73
74 @staticmethod
75 def _mkdir(path):
76 Path._os_func(os.mkdir, path, errno.EEXIST)
77
78 @staticmethod
79 def _rmdir(path):
80 Path._os_func(os.rmdir, path, errno.ENOENT)
81
82 @staticmethod
83 def _remove(path):
84 Path._os_func(os.remove, path, errno.ENOENT)
85
86 @staticmethod
87 def _listdir(path):
88 return Path._os_func(os.listdir, path, errno.ENOENT, default=[])
89
90 @staticmethod
91 def _os_func(func, path, errno, default=None):
92 """Call func(path) in an idempotent way.
93
94 On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
95 return ``default``, otherwise, re-raise
96 """
97 try:
98 return func(path)
99 except OSError as e:
100 if e.errno == errno:
101 return default
102 else:
103 raise
104
105 @staticmethod
106 def rm_all(path):
107 """Same as rm -r."""
108 if os.path.isdir(path):
109 Path._rmdir_all(path)
110 else:
111 Path._remove(path)
112
113 @staticmethod
114 def untar(path, into=None):
115 """Extract tarball at ``path`` into subdir ``into``.
116
117 return subdir name if and only if there exists one, otherwise raise PathException
118 """
119 args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
120 subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22))
121 dirs = os.listdir(into)
122 if len(dirs) == 1:
123 return dirs[0]
124 else:
125 raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs))
126
127 @staticmethod
128 def tar(path, subdir, into=None, ts=None):
129 """Pack ``path`` into tarball ``into``."""
130 # --sort=name requires a recent build of GNU tar
131 args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name']
132 args += ['-C', path, '-cf', into, subdir]
133 envs = os.environ.copy()
134 if ts is not None:
135 args.append('--mtime=@%d' % ts)
136 if into.endswith('.xz'):
137 envs['XZ_OPT'] = '-7e'
138 args.append('-J')
139 elif into.endswith('.bz2'):
140 args.append('-j')
141 elif into.endswith('.gz'):
142 args.append('-z')
143 envs['GZIP'] = '-n'
144 else:
145 raise PathException('unknown compression type %s' % into)
146 subprocess.check_call(args, env=envs)
147
148
149 class GitHubCommitTsCache(object):
150 __cachef = 'github.commit.ts.cache'
151 __cachen = 2048
152
153 def __init__(self):
154 Path.mkdir_all(TMPDIR_DL)
155 self.cachef = os.path.join(TMPDIR_DL, self.__cachef)
156 self.cache = {}
157
158 def get(self, k):
159 """Get timestamp with key ``k``."""
160 fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT)
161 with os.fdopen(fileno) as fin:
162 try:
163 fcntl.lockf(fileno, fcntl.LOCK_SH)
164 self._cache_init(fin)
165 if k in self.cache:
166 ts = self.cache[k][0]
167 return ts
168 finally:
169 fcntl.lockf(fileno, fcntl.LOCK_UN)
170 return None
171
172 def set(self, k, v):
173 """Update timestamp with ``k``."""
174 fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT)
175 with os.fdopen(fileno, 'wb+') as f:
176 try:
177 fcntl.lockf(fileno, fcntl.LOCK_EX)
178 self._cache_init(f)
179 self.cache[k] = (v, int(time.time()))
180 self._cache_flush(f)
181 finally:
182 fcntl.lockf(fileno, fcntl.LOCK_UN)
183
184 def _cache_init(self, fin):
185 for line in fin:
186 k, ts, updated = line.split()
187 ts = int(ts)
188 updated = int(updated)
189 self.cache[k] = (ts, updated)
190
191 def _cache_flush(self, fout):
192 cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1])
193 cache = cache[:self.__cachen]
194 self.cache = {}
195 os.ftruncate(fout.fileno(), 0)
196 fout.seek(0, os.SEEK_SET)
197 for k, ent in cache:
198 ts = ent[0]
199 updated = ent[1]
200 line = '{0} {1} {2}\n'.format(k, ts, updated)
201 fout.write(line)
202
203
204 class DownloadMethod(object):
205 """Base class of all download method."""
206
207 def __init__(self, args):
208 self.args = args
209 self.urls = args.urls
210 self.url = self.urls[0]
211 self.dl_dir = args.dl_dir
212
213 @classmethod
214 def resolve(cls, args):
215 """Resolve download method to use.
216
217 return instance of subclass of DownloadMethod
218 """
219 for c in DOWNLOAD_METHODS:
220 if c.match(args):
221 return c(args)
222
223 @staticmethod
224 def match(args):
225 """Return True if it can do the download."""
226 return NotImplemented
227
228 def download(self):
229 """Do the download and put it into the download dir."""
230 return NotImplemented
231
232
233 class DownloadMethodGitHubTarball(DownloadMethod):
234 """Download and repack archive tarabll from GitHub."""
235
236 __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
237
238 def __init__(self, args):
239 super(DownloadMethodGitHubTarball, self).__init__(args)
240 self._init_owner_repo()
241 self.version = args.version
242 self.subdir = args.subdir
243 self.source = args.source
244 self.commit_ts = None # lazy load commit timestamp
245 self.commit_ts_cache = GitHubCommitTsCache()
246 self.name = 'github-tarball'
247
248 @staticmethod
249 def match(args):
250 """Match if it's a GitHub clone url."""
251 url = args.urls[0]
252 proto = args.proto
253 if proto == 'git' and isinstance(url, basestring) \
254 and (url.startswith('https://github.com/') or url.startswith('git://github.com/')):
255 return True
256 return False
257
258 def download(self):
259 """Download and repack GitHub archive tarball."""
260 self._init_commit_ts()
261 with Path(TMPDIR_DL, keep=True) as dir_dl:
262 # fetch tarball from GitHub
263 tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
264 with Path(tarball_path, isdir=False):
265 self._fetch(tarball_path)
266 # unpack
267 d = os.path.join(dir_dl.path, self.subdir + '.untar')
268 with Path(d) as dir_untar:
269 tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
270 dir0 = os.path.join(dir_untar.path, tarball_prefix)
271 dir1 = os.path.join(dir_untar.path, self.subdir)
272 # submodules check
273 if self._has_submodule(dir0):
274 raise DownloadException('unable to fetch submodules\' source code')
275 # rename subdir
276 os.rename(dir0, dir1)
277 # repack
278 into=os.path.join(TMPDIR_DL, self.source)
279 Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
280 # move to target location
281 file1 = os.path.join(self.dl_dir, self.source)
282 if into != file1:
283 shutil.move(into, file1)
284
285 def _has_submodule(self, dir_):
286 m = os.path.join(dir_, '.gitmodules')
287 try:
288 st = os.stat(m)
289 return st.st_size > 0
290 except OSError as e:
291 return e.errno != errno.ENOENT
292
293 def _init_owner_repo(self):
294 url = self.url
295 m = self.__repo_url_regex.search(url)
296 if m is None:
297 raise DownloadException('invalid github url: %s' % url)
298 owner = m.group('owner')
299 repo = m.group('repo')
300 if repo.endswith('.git'):
301 repo = repo[:-4]
302 self.owner = owner
303 self.repo = repo
304
305 def _init_commit_ts(self):
306 if self.commit_ts is not None:
307 return
308 url = self._make_repo_url_path('commits', self.version)
309 ct = self.commit_ts_cache.get(url)
310 if ct is not None:
311 self.commit_ts = ct
312 return
313 resp = self._make_request(url)
314 data = resp.read()
315 data = json.loads(data)
316 date = data['commit']['committer']['date']
317 date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
318 date = date.timetuple()
319 ct = calendar.timegm(date)
320 self.commit_ts = ct
321 self.commit_ts_cache.set(url, ct)
322
323 def _fetch(self, path):
324 """Fetch tarball of the specified version ref."""
325 ref = self.version
326 url = self._make_repo_url_path('tarball', ref)
327 resp = self._make_request(url)
328 with open(path, 'wb') as fout:
329 while True:
330 d = resp.read(4096)
331 if not d:
332 break
333 fout.write(d)
334
335 def _make_repo_url_path(self, *args):
336 url = '/repos/{0}/{1}'.format(self.owner, self.repo)
337 if args:
338 url += '/' + '/'.join(args)
339 return url
340
341 def _make_request(self, path):
342 """Request GitHub API endpoint on ``path``."""
343 url = 'https://api.github.com' + path
344 headers = {
345 'Accept': 'application/vnd.github.v3+json',
346 'User-Agent': 'OpenWrt',
347 }
348 req = urllib2.Request(url, headers=headers)
349 sslcontext = ssl._create_unverified_context()
350 fileobj = urllib2.urlopen(req, context=sslcontext)
351 return fileobj
352
353
354 class DownloadMethodCatchall(DownloadMethod):
355 """Dummy method that knows names but not ways of download."""
356
357 def __init__(self, args):
358 super(DownloadMethodCatchall, self).__init__(args)
359 self.args = args
360 self.proto = args.proto
361 self.name = self._resolve_name()
362
363 def _resolve_name(self):
364 if self.proto:
365 return self.proto
366 methods_map = (
367 ('default', ('@APACHE/', '@GITHUB/', '@GNOME/', '@GNU/',
368 '@KERNEL/', '@SF/', '@SAVANNAH/', 'ftp://', 'http://',
369 'https://', 'file://')),
370 ('git', ('git://', )),
371 ('svn', ('svn://', )),
372 ('cvs', ('cvs://', )),
373 ('bzr', ('sftp://', )),
374 ('bzr', ('sftp://', )),
375 ('unknown', ('', )),
376 )
377 for name, prefixes in methods_map:
378 if any(url.startswith(prefix) for prefix in prefixes for url in self.urls):
379 return name
380
381 @staticmethod
382 def match(args):
383 """Return True."""
384 return True
385
386 def download(self):
387 """Not implemented.
388
389 raise DownloadException
390 """
391 raise DownloadException('download method for %s is not yet implemented' % self.name)
392
393 # order matters
394 DOWNLOAD_METHODS = [
395 DownloadMethodGitHubTarball,
396 DownloadMethodCatchall,
397 ]
398
399
400 def main():
401 parser = argparse.ArgumentParser()
402 parser.add_argument('action', choices=('dl_method', 'dl'), help='Action to take')
403 parser.add_argument('--urls', nargs='+', metavar='URL', help='Download URLs')
404 parser.add_argument('--proto', help='Download proto')
405 parser.add_argument('--subdir', help='Source code subdir name')
406 parser.add_argument('--version', help='Source code version')
407 parser.add_argument('--source', help='Source tarball filename')
408 parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
409 args = parser.parse_args()
410 if args.action == 'dl_method':
411 method = DownloadMethod.resolve(args)
412 sys.stdout.write(method.name + '\n')
413 elif args.action == 'dl':
414 method = DownloadMethod.resolve(args)
415 try:
416 method.download()
417 except Exception:
418 raise
419
420 if __name__ == '__main__':
421 main()