744c441ca7067604f7549cbba914b54d67a083c2
3 # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
5 # This is free software, licensed under the GNU General Public License v2.
6 # See /LICENSE for more information.
25 TMPDIR
= os
.environ
.get('TMP_DIR') or '/tmp'
26 TMPDIR_DL
= os
.path
.join(TMPDIR
, 'dl')
29 class PathException(Exception): pass
30 class DownloadGitHubError(Exception): pass
34 """Context class for preparing and cleaning up directories.
36 If ```preclean` is ``False``, ``path`` will NOT be removed on context enter
38 If ``path`` ``isdir``, then it will be created on context enter.
40 If ``keep`` is True, then ``path`` will NOT be removed on context exit
43 def __init__(self
, path
, isdir
=True, preclean
=False, keep
=False):
46 self
.preclean
= preclean
51 self
.rm_all(self
.path
)
53 self
.mkdir_all(self
.path
)
56 def __exit__(self
, exc_type
, exc_value
, traceback
):
58 self
.rm_all(self
.path
)
62 """Same as mkdir -p."""
63 names
= os
.path
.split(path
)
66 p
= os
.path
.join(p
, name
)
71 names
= Path
._listdir
(dir_
)
73 p
= os
.path
.join(dir_
, name
)
79 Path
._os
_func
(os
.mkdir
, path
, errno
.EEXIST
)
83 Path
._os
_func
(os
.rmdir
, path
, errno
.ENOENT
)
87 Path
._os
_func
(os
.remove
, path
, errno
.ENOENT
)
91 return Path
._os
_func
(os
.listdir
, path
, errno
.ENOENT
, default
=[])
94 def _os_func(func
, path
, errno
, default
=None):
95 """Call func(path) in an idempotent way.
97 On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
98 return ``default``, otherwise, re-raise
111 if os
.path
.islink(path
):
113 elif os
.path
.isdir(path
):
114 Path
._rmdir
_dir
(path
)
119 def untar(path
, into
=None):
120 """Extract tarball at ``path`` into subdir ``into``.
122 return subdir name if and only if there exists one, otherwise raise PathException
124 args
= ('tar', '-C', into
, '-xzf', path
, '--no-same-permissions')
125 subprocess
.check_call(args
, preexec_fn
=lambda: os
.umask(0o22))
126 dirs
= os
.listdir(into
)
130 raise PathException('untar %s: expecting a single subdir, got %s' % (path
, dirs
))
133 def tar(path
, subdir
, into
=None, ts
=None):
134 """Pack ``path`` into tarball ``into``."""
135 # --sort=name requires a recent build of GNU tar
136 args
= ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name', '--mode=a-s']
137 args
+= ['-C', path
, '-cf', into
, subdir
]
138 envs
= os
.environ
.copy()
140 args
.append('--mtime=@%d' % ts
)
141 if into
.endswith('.zst'):
142 envs
['ZSTD_CLEVEL'] = '20'
143 envs
['ZSTD_NBTHREADS'] = '0'
144 args
.append('--zstd')
145 elif into
.endswith('.xz'):
146 envs
['XZ_OPT'] = '-7e'
148 elif into
.endswith('.bz2'):
150 elif into
.endswith('.gz'):
154 raise PathException('unknown compression type %s' % into
)
155 subprocess
.check_call(args
, env
=envs
)
158 class GitHubCommitTsCache(object):
159 __cachef
= 'github.commit.ts.cache'
163 Path
.mkdir_all(TMPDIR_DL
)
164 self
.cachef
= os
.path
.join(TMPDIR_DL
, self
.__cachef
)
168 """Get timestamp with key ``k``."""
169 fileno
= os
.open(self
.cachef
, os
.O_RDONLY | os
.O_CREAT
)
170 with os
.fdopen(fileno
) as fin
:
172 fcntl
.lockf(fileno
, fcntl
.LOCK_SH
)
173 self
._cache
_init
(fin
)
175 ts
= self
.cache
[k
][0]
178 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
182 """Update timestamp with ``k``."""
183 fileno
= os
.open(self
.cachef
, os
.O_RDWR | os
.O_CREAT
)
184 with os
.fdopen(fileno
, 'w+') as f
:
186 fcntl
.lockf(fileno
, fcntl
.LOCK_EX
)
188 self
.cache
[k
] = (v
, int(time
.time()))
191 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
193 def _cache_init(self
, fin
):
195 k
, ts
, updated
= line
.split()
197 updated
= int(updated
)
198 self
.cache
[k
] = (ts
, updated
)
200 def _cache_flush(self
, fout
):
201 cache
= sorted(self
.cache
.items(), key
=lambda a
: a
[1][1])
202 cache
= cache
[:self
.__cachen
]
204 os
.ftruncate(fout
.fileno(), 0)
205 fout
.seek(0, os
.SEEK_SET
)
209 line
= '{0} {1} {2}\n'.format(k
, ts
, updated
)
213 class DownloadGitHubTarball(object):
214 """Download and repack archive tarball from GitHub.
216 Compared with the method of packing after cloning the whole repo, this
217 method is more friendly to users with fragile internet connection.
219 However, there are limitations with this method
221 - GitHub imposes a 60 reqs/hour limit for unauthenticated API access.
222 This affects fetching commit date for reproducible tarballs. Download
223 through the archive link is not affected.
225 - GitHub archives do not contain source codes for submodules.
227 - GitHub archives seem to respect .gitattributes and ignore paths with
228 export-ignore attributes.
230 For the first two issues, the method will fail loudly to allow fallback to
231 clone-then-pack method.
233 As for the 3rd issue, to make sure that this method only produces identical
234 tarballs as the fallback method, we require the expected hash value to be
235 supplied. That means the first tarball will need to be prepared by the
236 clone-then-pack method
239 __repo_url_regex
= re
.compile(r
'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
241 def __init__(self
, args
):
242 self
.dl_dir
= args
.dl_dir
243 self
.version
= args
.version
244 self
.subdir
= args
.subdir
245 self
.source
= args
.source
246 self
.submodules
= args
.submodules
248 self
._init
_owner
_repo
()
249 self
.xhash
= args
.hash
251 self
.commit_ts
= None # lazy load commit timestamp
252 self
.commit_ts_cache
= GitHubCommitTsCache()
253 self
.name
= 'github-tarball'
256 """Download and repack GitHub archive tarball."""
257 if self
.submodules
and self
.submodules
!= ['skip']:
258 raise self
._error
('Fetching submodules is not yet supported')
259 self
._init
_commit
_ts
()
260 with
Path(TMPDIR_DL
, keep
=True) as dir_dl
:
261 # fetch tarball from GitHub
262 tarball_path
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.tar.gz.dl')
263 with
Path(tarball_path
, isdir
=False):
264 self
._fetch
(tarball_path
)
266 d
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.untar')
267 with
Path(d
, preclean
=True) as dir_untar
:
268 tarball_prefix
= Path
.untar(tarball_path
, into
=dir_untar
.path
)
269 dir0
= os
.path
.join(dir_untar
.path
, tarball_prefix
)
270 dir1
= os
.path
.join(dir_untar
.path
, self
.subdir
)
272 if self
.submodules
!= ['skip'] and self
._has
_submodule
(dir0
):
273 raise self
._error
('Fetching submodules is not yet supported')
275 os
.rename(dir0
, dir1
)
277 into
=os
.path
.join(TMPDIR_DL
, self
.source
)
278 Path
.tar(dir_untar
.path
, self
.subdir
, into
=into
, ts
=self
.commit_ts
)
280 self
._hash
_check
(into
)
284 # move to target location
285 file1
= os
.path
.join(self
.dl_dir
, self
.source
)
287 shutil
.move(into
, file1
)
289 def _has_submodule(self
, dir_
):
290 m
= os
.path
.join(dir_
, '.gitmodules')
293 return st
.st_size
> 0
295 return e
.errno
!= errno
.ENOENT
297 def _init_owner_repo(self
):
298 m
= self
.__repo
_url
_regex
.search(self
.url
)
300 raise self
._error
('Invalid github url: {}'.format(self
.url
))
301 owner
= m
.group('owner')
302 repo
= m
.group('repo')
303 if repo
.endswith('.git'):
308 def _init_hasher(self
):
311 self
.hasher
= hashlib
.sha256()
312 elif len(xhash
) == 32:
313 self
.hasher
= hashlib
.md5()
315 raise self
._error
('Requires sha256sum for verification')
318 def _hash_check(self
, f
):
319 with
open(f
, 'rb') as fin
:
324 self
.hasher
.update(d
)
325 xhash
= self
.hasher
.hexdigest()
326 if xhash
!= self
.xhash
:
327 raise self
._error
('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self
.xhash
, xhash
))
329 def _init_commit_ts(self
):
330 if self
.commit_ts
is not None:
332 # GitHub provides 2 APIs[1,2] for fetching commit data. API[1] is more
333 # terse while API[2] provides more verbose info such as commit diff
334 # etc. That's the main reason why API[1] is preferred: the response
335 # size is predictable.
337 # However, API[1] only accepts complete commit sha1sum as the parameter
338 # while API[2] is more liberal accepting also partial commit id and
341 # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit
342 # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit
345 'url': self
._make
_repo
_url
_path
('git', 'commits', self
.version
),
346 'attr_path': ('committer', 'date'),
348 'url': self
._make
_repo
_url
_path
('commits', self
.version
),
349 'attr_path': ('commit', 'committer', 'date'),
352 version_is_sha1sum
= len(self
.version
) == 40
353 if not version_is_sha1sum
:
354 apis
.insert(0, apis
.pop())
358 attr_path
= api
['attr_path']
360 ct
= self
.commit_ts_cache
.get(url
)
364 ct
= self
._init
_commit
_ts
_remote
_get
(url
, attr_path
)
366 self
.commit_ts_cache
.set(url
, ct
)
368 except Exception as e
:
369 reasons
+= '\n' + (" {}: {}".format(url
, e
))
370 raise self
._error
('Cannot fetch commit ts:{}'.format(reasons
))
372 def _init_commit_ts_remote_get(self
, url
, attrpath
):
373 resp
= self
._make
_request
(url
)
375 date
= json
.loads(data
)
376 for attr
in attrpath
:
378 date
= datetime
.datetime
.strptime(date
, '%Y-%m-%dT%H:%M:%SZ')
379 date
= date
.timetuple()
380 ct
= calendar
.timegm(date
)
383 def _fetch(self
, path
):
384 """Fetch tarball of the specified version ref."""
386 url
= self
._make
_repo
_url
_path
('tarball', ref
)
387 resp
= self
._make
_request
(url
)
388 with
open(path
, 'wb') as fout
:
395 def _make_repo_url_path(self
, *args
):
396 url
= '/repos/{0}/{1}'.format(self
.owner
, self
.repo
)
398 url
+= '/' + '/'.join(args
)
401 def _make_request(self
, path
):
402 """Request GitHub API endpoint on ``path``."""
403 url
= 'https://api.github.com' + path
405 'Accept': 'application/vnd.github.v3+json',
406 'User-Agent': 'OpenWrt',
408 req
= urllib
.request
.Request(url
, headers
=headers
)
409 sslcontext
= ssl
._create
_unverified
_context
()
410 fileobj
= urllib
.request
.urlopen(req
, context
=sslcontext
)
413 def _error(self
, msg
):
414 return DownloadGitHubError('{}: {}'.format(self
.source
, msg
))
418 parser
= argparse
.ArgumentParser()
419 parser
.add_argument('--dl-dir', default
=os
.getcwd(), help='Download dir')
420 parser
.add_argument('--url', help='Download URL')
421 parser
.add_argument('--subdir', help='Source code subdir name')
422 parser
.add_argument('--version', help='Source code version')
423 parser
.add_argument('--source', help='Source tarball filename')
424 parser
.add_argument('--hash', help='Source tarball\'s expected sha256sum')
425 parser
.add_argument('--submodules', nargs
='*', help='List of submodules, or "skip"')
426 args
= parser
.parse_args()
428 method
= DownloadGitHubTarball(args
)
430 except Exception as ex
:
431 sys
.stderr
.write('{}: Download from {} failed\n'.format(args
.source
, args
.url
))
432 sys
.stderr
.write('{}\n'.format(ex
))
435 if __name__
== '__main__':