3 # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
5 # This is free software, licensed under the GNU General Public License v2.
6 # See /LICENSE for more information.
25 TMPDIR
= os
.environ
.get('TMP_DIR') or '/tmp'
26 TMPDIR_DL
= os
.path
.join(TMPDIR
, 'dl')
29 class PathException(Exception): pass
30 class DownloadGitHubError(Exception): pass
34 """Context class for preparing and cleaning up directories.
36 If ```preclean` is ``False``, ``path`` will NOT be removed on context enter
38 If ``path`` ``isdir``, then it will be created on context enter.
40 If ``keep`` is True, then ``path`` will NOT be removed on context exit
43 def __init__(self
, path
, isdir
=True, preclean
=False, keep
=False):
46 self
.preclean
= preclean
51 self
.rm_all(self
.path
)
53 self
.mkdir_all(self
.path
)
56 def __exit__(self
, exc_type
, exc_value
, traceback
):
58 self
.rm_all(self
.path
)
62 """Same as mkdir -p."""
63 names
= os
.path
.split(path
)
66 p
= os
.path
.join(p
, name
)
71 names
= Path
._listdir
(dir_
)
73 p
= os
.path
.join(dir_
, name
)
79 Path
._os
_func
(os
.mkdir
, path
, errno
.EEXIST
)
83 Path
._os
_func
(os
.rmdir
, path
, errno
.ENOENT
)
87 Path
._os
_func
(os
.remove
, path
, errno
.ENOENT
)
91 return Path
._os
_func
(os
.listdir
, path
, errno
.ENOENT
, default
=[])
94 def _os_func(func
, path
, errno
, default
=None):
95 """Call func(path) in an idempotent way.
97 On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
98 return ``default``, otherwise, re-raise
111 if os
.path
.islink(path
):
113 elif os
.path
.isdir(path
):
114 Path
._rmdir
_dir
(path
)
119 def untar(path
, into
=None):
120 """Extract tarball at ``path`` into subdir ``into``.
122 return subdir name if and only if there exists one, otherwise raise PathException
124 args
= ('tar', '-C', into
, '-xzf', path
, '--no-same-permissions')
125 subprocess
.check_call(args
, preexec_fn
=lambda: os
.umask(0o22))
126 dirs
= os
.listdir(into
)
130 raise PathException('untar %s: expecting a single subdir, got %s' % (path
, dirs
))
133 def tar(path
, subdir
, into
=None, ts
=None):
134 """Pack ``path`` into tarball ``into``."""
135 # --sort=name requires a recent build of GNU tar
136 args
= ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name', '--mode=a-s']
137 args
+= ['-C', path
, '-cf', into
, subdir
]
138 envs
= os
.environ
.copy()
140 args
.append('--mtime=@%d' % ts
)
141 if into
.endswith('.xz'):
142 envs
['XZ_OPT'] = '-7e'
144 elif into
.endswith('.bz2'):
146 elif into
.endswith('.gz'):
150 raise PathException('unknown compression type %s' % into
)
151 subprocess
.check_call(args
, env
=envs
)
154 class GitHubCommitTsCache(object):
155 __cachef
= 'github.commit.ts.cache'
159 Path
.mkdir_all(TMPDIR_DL
)
160 self
.cachef
= os
.path
.join(TMPDIR_DL
, self
.__cachef
)
164 """Get timestamp with key ``k``."""
165 fileno
= os
.open(self
.cachef
, os
.O_RDONLY | os
.O_CREAT
)
166 with os
.fdopen(fileno
) as fin
:
168 fcntl
.lockf(fileno
, fcntl
.LOCK_SH
)
169 self
._cache
_init
(fin
)
171 ts
= self
.cache
[k
][0]
174 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
178 """Update timestamp with ``k``."""
179 fileno
= os
.open(self
.cachef
, os
.O_RDWR | os
.O_CREAT
)
180 with os
.fdopen(fileno
, 'w+') as f
:
182 fcntl
.lockf(fileno
, fcntl
.LOCK_EX
)
184 self
.cache
[k
] = (v
, int(time
.time()))
187 fcntl
.lockf(fileno
, fcntl
.LOCK_UN
)
189 def _cache_init(self
, fin
):
191 k
, ts
, updated
= line
.split()
193 updated
= int(updated
)
194 self
.cache
[k
] = (ts
, updated
)
196 def _cache_flush(self
, fout
):
197 cache
= sorted(self
.cache
.items(), key
=lambda a
: a
[1][1])
198 cache
= cache
[:self
.__cachen
]
200 os
.ftruncate(fout
.fileno(), 0)
201 fout
.seek(0, os
.SEEK_SET
)
205 line
= '{0} {1} {2}\n'.format(k
, ts
, updated
)
209 class DownloadGitHubTarball(object):
210 """Download and repack archive tarball from GitHub.
212 Compared with the method of packing after cloning the whole repo, this
213 method is more friendly to users with fragile internet connection.
215 However, there are limitations with this method
217 - GitHub imposes a 60 reqs/hour limit for unauthenticated API access.
218 This affects fetching commit date for reproducible tarballs. Download
219 through the archive link is not affected.
221 - GitHub archives do not contain source codes for submodules.
223 - GitHub archives seem to respect .gitattributes and ignore paths with
224 export-ignore attributes.
226 For the first two issues, the method will fail loudly to allow fallback to
227 clone-then-pack method.
229 As for the 3rd issue, to make sure that this method only produces identical
230 tarballs as the fallback method, we require the expected hash value to be
231 supplied. That means the first tarball will need to be prepared by the
232 clone-then-pack method
235 __repo_url_regex
= re
.compile(r
'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
237 def __init__(self
, args
):
238 self
.dl_dir
= args
.dl_dir
239 self
.version
= args
.version
240 self
.subdir
= args
.subdir
241 self
.source
= args
.source
242 self
.submodules
= args
.submodules
244 self
._init
_owner
_repo
()
245 self
.xhash
= args
.hash
247 self
.commit_ts
= None # lazy load commit timestamp
248 self
.commit_ts_cache
= GitHubCommitTsCache()
249 self
.name
= 'github-tarball'
252 """Download and repack GitHub archive tarball."""
253 if self
.submodules
and self
.submodules
!= ['skip']:
254 raise self
._error
('Fetching submodules is not yet supported')
255 self
._init
_commit
_ts
()
256 with
Path(TMPDIR_DL
, keep
=True) as dir_dl
:
257 # fetch tarball from GitHub
258 tarball_path
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.tar.gz.dl')
259 with
Path(tarball_path
, isdir
=False):
260 self
._fetch
(tarball_path
)
262 d
= os
.path
.join(dir_dl
.path
, self
.subdir
+ '.untar')
263 with
Path(d
, preclean
=True) as dir_untar
:
264 tarball_prefix
= Path
.untar(tarball_path
, into
=dir_untar
.path
)
265 dir0
= os
.path
.join(dir_untar
.path
, tarball_prefix
)
266 dir1
= os
.path
.join(dir_untar
.path
, self
.subdir
)
268 if self
.submodules
!= ['skip'] and self
._has
_submodule
(dir0
):
269 raise self
._error
('Fetching submodules is not yet supported')
271 os
.rename(dir0
, dir1
)
273 into
=os
.path
.join(TMPDIR_DL
, self
.source
)
274 Path
.tar(dir_untar
.path
, self
.subdir
, into
=into
, ts
=self
.commit_ts
)
276 self
._hash
_check
(into
)
280 # move to target location
281 file1
= os
.path
.join(self
.dl_dir
, self
.source
)
283 shutil
.move(into
, file1
)
285 def _has_submodule(self
, dir_
):
286 m
= os
.path
.join(dir_
, '.gitmodules')
289 return st
.st_size
> 0
291 return e
.errno
!= errno
.ENOENT
293 def _init_owner_repo(self
):
294 m
= self
.__repo
_url
_regex
.search(self
.url
)
296 raise self
._error
('Invalid github url: {}'.format(self
.url
))
297 owner
= m
.group('owner')
298 repo
= m
.group('repo')
299 if repo
.endswith('.git'):
304 def _init_hasher(self
):
307 self
.hasher
= hashlib
.sha256()
308 elif len(xhash
) == 32:
309 self
.hasher
= hashlib
.md5()
311 raise self
._error
('Requires sha256sum for verification')
314 def _hash_check(self
, f
):
315 with
open(f
, 'rb') as fin
:
320 self
.hasher
.update(d
)
321 xhash
= self
.hasher
.hexdigest()
322 if xhash
!= self
.xhash
:
323 raise self
._error
('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self
.xhash
, xhash
))
325 def _init_commit_ts(self
):
326 if self
.commit_ts
is not None:
328 # GitHub provides 2 APIs[1,2] for fetching commit data. API[1] is more
329 # terse while API[2] provides more verbose info such as commit diff
330 # etc. That's the main reason why API[1] is preferred: the response
331 # size is predictable.
333 # However, API[1] only accepts complete commit sha1sum as the parameter
334 # while API[2] is more liberal accepting also partial commit id and
337 # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit
338 # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit
341 'url': self
._make
_repo
_url
_path
('git', 'commits', self
.version
),
342 'attr_path': ('committer', 'date'),
344 'url': self
._make
_repo
_url
_path
('commits', self
.version
),
345 'attr_path': ('commit', 'committer', 'date'),
348 version_is_sha1sum
= len(self
.version
) == 40
349 if not version_is_sha1sum
:
350 apis
.insert(0, apis
.pop())
354 attr_path
= api
['attr_path']
356 ct
= self
.commit_ts_cache
.get(url
)
360 ct
= self
._init
_commit
_ts
_remote
_get
(url
, attr_path
)
362 self
.commit_ts_cache
.set(url
, ct
)
364 except Exception as e
:
365 reasons
+= '\n' + (" {}: {}".format(url
, e
))
366 raise self
._error
('Cannot fetch commit ts:{}'.format(reasons
))
368 def _init_commit_ts_remote_get(self
, url
, attrpath
):
369 resp
= self
._make
_request
(url
)
371 date
= json
.loads(data
)
372 for attr
in attrpath
:
374 date
= datetime
.datetime
.strptime(date
, '%Y-%m-%dT%H:%M:%SZ')
375 date
= date
.timetuple()
376 ct
= calendar
.timegm(date
)
379 def _fetch(self
, path
):
380 """Fetch tarball of the specified version ref."""
382 url
= self
._make
_repo
_url
_path
('tarball', ref
)
383 resp
= self
._make
_request
(url
)
384 with
open(path
, 'wb') as fout
:
391 def _make_repo_url_path(self
, *args
):
392 url
= '/repos/{0}/{1}'.format(self
.owner
, self
.repo
)
394 url
+= '/' + '/'.join(args
)
397 def _make_request(self
, path
):
398 """Request GitHub API endpoint on ``path``."""
399 url
= 'https://api.github.com' + path
401 'Accept': 'application/vnd.github.v3+json',
402 'User-Agent': 'OpenWrt',
404 req
= urllib
.request
.Request(url
, headers
=headers
)
405 sslcontext
= ssl
._create
_unverified
_context
()
406 fileobj
= urllib
.request
.urlopen(req
, context
=sslcontext
)
409 def _error(self
, msg
):
410 return DownloadGitHubError('{}: {}'.format(self
.source
, msg
))
414 parser
= argparse
.ArgumentParser()
415 parser
.add_argument('--dl-dir', default
=os
.getcwd(), help='Download dir')
416 parser
.add_argument('--url', help='Download URL')
417 parser
.add_argument('--subdir', help='Source code subdir name')
418 parser
.add_argument('--version', help='Source code version')
419 parser
.add_argument('--source', help='Source tarball filename')
420 parser
.add_argument('--hash', help='Source tarball\'s expected sha256sum')
421 parser
.add_argument('--submodules', nargs
='*', help='List of submodules, or "skip"')
422 args
= parser
.parse_args()
424 method
= DownloadGitHubTarball(args
)
426 except Exception as ex
:
427 sys
.stderr
.write('{}: Download from {} failed\n'.format(args
.source
, args
.url
))
428 sys
.stderr
.write('{}\n'.format(ex
))
431 if __name__
== '__main__':