3 # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
5 # This is free software, licensed under the GNU General Public License v2.
6 # See /LICENSE for more information.
25 TMPDIR
= os
.environ
.get('TMP_DIR') or '/tmp'
26 TMPDIR_DL
= os
.path
.join(TMPDIR
, 'dl')
29 class PathException(Exception): pass
30 class DownloadGitHubError(Exception): pass
34 """Context class for preparing and cleaning up directories.
36 If ```preclean` is ``False``, ``path`` will NOT be removed on context enter
38 If ``path`` ``isdir``, then it will be created on context enter.
40 If ``keep`` is True, then ``path`` will NOT be removed on context exit
43 def __init__(self
, path
, isdir
=True, preclean
=False, keep
=False):
46 self
.preclean
= preclean
51 self
.rm_all(self
.path
)
53 self
.mkdir_all(self
.path
)
56 def __exit__(self
, exc_type
, exc_value
, traceback
):
58 self
.rm_all(self
.path
)
62 """Same as mkdir -p."""
63 names
= os
.path
.split(path
)
66 p
= os
.path
.join(p
, name
)
71 names
= Path
._listdir
(dir_
)
73 p
= os
.path
.join(dir_
, name
)
79 Path
._os
_func
(os
.mkdir
, path
, errno
.EEXIST
)
83 Path
._os
_func
(os
.rmdir
, path
, errno
.ENOENT
)
87 Path
._os
_func
(os
.remove
, path
, errno
.ENOENT
)
91 return Path
._os
_func
(os
.listdir
, path
, errno
.ENOENT
, default
=[])
94 def _os_func(func
, path
, errno
, default
=None):
95 """Call func(path) in an idempotent way.
97 On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
98 return ``default``, otherwise, re-raise
111 if os
.path
.islink(path
):
113 elif os
.path
.isdir(path
):
114 Path
._rmdir
_dir
(path
)
119 def untar(path
, into
=None):
120 """Extract tarball at ``path`` into subdir ``into``.
122 return subdir name if and only if there exists one, otherwise raise PathException
124 args
= ('tar', '-C', into
, '-xzf', path
, '--no-same-permissions')
125 subprocess
.check_call(args
, preexec_fn
=lambda: os
.umask(0o22))
126 dirs
= os
.listdir(into
)
130 raise PathException('untar %s: expecting a single subdir, got %s' % (path
, dirs
))
133 def tar(path
, subdir
, into
=None, ts
=None):
134 """Pack ``path`` into tarball ``into``."""
135 # --sort=name requires a recent build of GNU tar
136 args
= ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name', '--mode=a-s']
137 args
+= ['-C', path
, '-cf', into
, subdir
]
138 envs
= os
.environ
.copy()
140 args
.append('--mtime=@%d' % ts
)
141 if into
.endswith('.zst'):
142 args
.append('-I zstd -T0 --ultra -20')
143 elif into
.endswith('.xz'):
144 envs
['XZ_OPT'] = '-7e'
146 elif into
.endswith('.bz2'):
148 elif into
.endswith('.gz'):
152 raise PathException('unknown compression type %s' % into
)
153 subprocess
.check_call(args
, env
=envs
)
156 class GitHubCommitTsCache(object):
157 __cachef
= 'github.commit.ts.cache'
161 Path
.mkdir_all(TMPDIR_DL
)
162 self
.cachef
= os
.path
.join(TMPDIR_DL
, self
.__cachef
)
166 """Get timestamp with key ``k``."""
167 fileno
= os
.open(self
.cachef
, os
.O_RDONLY | os
.O_CREAT
)
168 with os
.fdopen(fileno
) as fin
:
170 fcntl
.lockf(fileno
, fcntl
.LOCK_SH
)
171 self
._cache
_init
(fin
)
173 ts
= self
.cache
[k
][0]
176 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
180 """Update timestamp with ``k``."""
181 fileno
= os
.open(self
.cachef
, os
.O_RDWR | os
.O_CREAT
)
182 with os
.fdopen(fileno
, 'w+') as f
:
184 fcntl
.lockf(fileno
, fcntl
.LOCK_EX
)
186 self
.cache
[k
] = (v
, int(time
.time()))
189 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
191 def _cache_init(self
, fin
):
193 k
, ts
, updated
= line
.split()
195 updated
= int(updated
)
196 self
.cache
[k
] = (ts
, updated
)
198 def _cache_flush(self
, fout
):
199 cache
= sorted(self
.cache
.items(), key
=lambda a
: a
[1][1])
200 cache
= cache
[:self
.__cachen
]
202 os
.ftruncate(fout
.fileno(), 0)
203 fout
.seek(0, os
.SEEK_SET
)
207 line
= '{0} {1} {2}\n'.format(k
, ts
, updated
)
211 class DownloadGitHubTarball(object):
212 """Download and repack archive tarball from GitHub.
214 Compared with the method of packing after cloning the whole repo, this
215 method is more friendly to users with fragile internet connection.
217 However, there are limitations with this method
219 - GitHub imposes a 60 reqs/hour limit for unauthenticated API access.
220 This affects fetching commit date for reproducible tarballs. Download
221 through the archive link is not affected.
223 - GitHub archives do not contain source codes for submodules.
225 - GitHub archives seem to respect .gitattributes and ignore paths with
226 export-ignore attributes.
228 For the first two issues, the method will fail loudly to allow fallback to
229 clone-then-pack method.
231 As for the 3rd issue, to make sure that this method only produces identical
232 tarballs as the fallback method, we require the expected hash value to be
233 supplied. That means the first tarball will need to be prepared by the
234 clone-then-pack method
237 __repo_url_regex
= re
.compile(r
'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
239 def __init__(self
, args
):
240 self
.dl_dir
= args
.dl_dir
241 self
.version
= args
.version
242 self
.subdir
= args
.subdir
243 self
.source
= args
.source
244 self
.submodules
= args
.submodules
246 self
._init
_owner
_repo
()
247 self
.xhash
= args
.hash
249 self
.commit_ts
= None # lazy load commit timestamp
250 self
.commit_ts_cache
= GitHubCommitTsCache()
251 self
.name
= 'github-tarball'
254 """Download and repack GitHub archive tarball."""
255 if self
.submodules
and self
.submodules
!= ['skip']:
256 raise self
._error
('Fetching submodules is not yet supported')
257 self
._init
_commit
_ts
()
258 with
Path(TMPDIR_DL
, keep
=True) as dir_dl
:
259 # fetch tarball from GitHub
260 tarball_path
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.tar.gz.dl')
261 with
Path(tarball_path
, isdir
=False):
262 self
._fetch
(tarball_path
)
264 d
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.untar')
265 with
Path(d
, preclean
=True) as dir_untar
:
266 tarball_prefix
= Path
.untar(tarball_path
, into
=dir_untar
.path
)
267 dir0
= os
.path
.join(dir_untar
.path
, tarball_prefix
)
268 dir1
= os
.path
.join(dir_untar
.path
, self
.subdir
)
270 if self
.submodules
!= ['skip'] and self
._has
_submodule
(dir0
):
271 raise self
._error
('Fetching submodules is not yet supported')
273 os
.rename(dir0
, dir1
)
275 into
=os
.path
.join(TMPDIR_DL
, self
.source
)
276 Path
.tar(dir_untar
.path
, self
.subdir
, into
=into
, ts
=self
.commit_ts
)
278 self
._hash
_check
(into
)
282 # move to target location
283 file1
= os
.path
.join(self
.dl_dir
, self
.source
)
285 shutil
.move(into
, file1
)
287 def _has_submodule(self
, dir_
):
288 m
= os
.path
.join(dir_
, '.gitmodules')
291 return st
.st_size
> 0
293 return e
.errno
!= errno
.ENOENT
295 def _init_owner_repo(self
):
296 m
= self
.__repo
_url
_regex
.search(self
.url
)
298 raise self
._error
('Invalid github url: {}'.format(self
.url
))
299 owner
= m
.group('owner')
300 repo
= m
.group('repo')
301 if repo
.endswith('.git'):
306 def _init_hasher(self
):
309 self
.hasher
= hashlib
.sha256()
310 elif len(xhash
) == 32:
311 self
.hasher
= hashlib
.md5()
313 raise self
._error
('Requires sha256sum for verification')
316 def _hash_check(self
, f
):
317 with
open(f
, 'rb') as fin
:
322 self
.hasher
.update(d
)
323 xhash
= self
.hasher
.hexdigest()
324 if xhash
!= self
.xhash
:
325 raise self
._error
('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self
.xhash
, xhash
))
327 def _init_commit_ts(self
):
328 if self
.commit_ts
is not None:
330 # GitHub provides 2 APIs[1,2] for fetching commit data. API[1] is more
331 # terse while API[2] provides more verbose info such as commit diff
332 # etc. That's the main reason why API[1] is preferred: the response
333 # size is predictable.
335 # However, API[1] only accepts complete commit sha1sum as the parameter
336 # while API[2] is more liberal accepting also partial commit id and
339 # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit
340 # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit
343 'url': self
._make
_repo
_url
_path
('git', 'commits', self
.version
),
344 'attr_path': ('committer', 'date'),
346 'url': self
._make
_repo
_url
_path
('commits', self
.version
),
347 'attr_path': ('commit', 'committer', 'date'),
350 version_is_sha1sum
= len(self
.version
) == 40
351 if not version_is_sha1sum
:
352 apis
.insert(0, apis
.pop())
356 attr_path
= api
['attr_path']
358 ct
= self
.commit_ts_cache
.get(url
)
362 ct
= self
._init
_commit
_ts
_remote
_get
(url
, attr_path
)
364 self
.commit_ts_cache
.set(url
, ct
)
366 except Exception as e
:
367 reasons
+= '\n' + (" {}: {}".format(url
, e
))
368 raise self
._error
('Cannot fetch commit ts:{}'.format(reasons
))
370 def _init_commit_ts_remote_get(self
, url
, attrpath
):
371 resp
= self
._make
_request
(url
)
373 date
= json
.loads(data
)
374 for attr
in attrpath
:
376 date
= datetime
.datetime
.strptime(date
, '%Y-%m-%dT%H:%M:%SZ')
377 date
= date
.timetuple()
378 ct
= calendar
.timegm(date
)
381 def _fetch(self
, path
):
382 """Fetch tarball of the specified version ref."""
384 url
= self
._make
_repo
_url
_path
('tarball', ref
)
385 resp
= self
._make
_request
(url
)
386 with
open(path
, 'wb') as fout
:
393 def _make_repo_url_path(self
, *args
):
394 url
= '/repos/{0}/{1}'.format(self
.owner
, self
.repo
)
396 url
+= '/' + '/'.join(args
)
399 def _make_request(self
, path
):
400 """Request GitHub API endpoint on ``path``."""
401 url
= 'https://api.github.com' + path
403 'Accept': 'application/vnd.github.v3+json',
404 'User-Agent': 'OpenWrt',
406 req
= urllib
.request
.Request(url
, headers
=headers
)
407 sslcontext
= ssl
._create
_unverified
_context
()
408 fileobj
= urllib
.request
.urlopen(req
, context
=sslcontext
)
411 def _error(self
, msg
):
412 return DownloadGitHubError('{}: {}'.format(self
.source
, msg
))
416 parser
= argparse
.ArgumentParser()
417 parser
.add_argument('--dl-dir', default
=os
.getcwd(), help='Download dir')
418 parser
.add_argument('--url', help='Download URL')
419 parser
.add_argument('--subdir', help='Source code subdir name')
420 parser
.add_argument('--version', help='Source code version')
421 parser
.add_argument('--source', help='Source tarball filename')
422 parser
.add_argument('--hash', help='Source tarball\'s expected sha256sum')
423 parser
.add_argument('--submodules', nargs
='*', help='List of submodules, or "skip"')
424 args
= parser
.parse_args()
426 method
= DownloadGitHubTarball(args
)
428 except Exception as ex
:
429 sys
.stderr
.write('{}: Download from {} failed\n'.format(args
.source
, args
.url
))
430 sys
.stderr
.write('{}\n'.format(ex
))
433 if __name__
== '__main__':