libbb: use external gzip command as I/O layer
[project/opkg-lede.git] / libbb / unarchive.c
1 /*
2 * Copyright (C) 2000 by Glenn McGrath
3 * Copyright (C) 2001 by Laurence Anderson
4 *
5 * Based on previous work by busybox developers and others.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <utime.h>
28 #include <libgen.h>
29
30 #include "libbb.h"
31 #include "gzip.h"
32
33 #define CONFIG_FEATURE_TAR_OLDGNU_COMPATABILITY 1
34 #define CONFIG_FEATURE_TAR_GNU_EXTENSIONS
35
36 #ifdef CONFIG_FEATURE_TAR_GNU_EXTENSIONS
37 static char *longname = NULL;
38 static char *linkname = NULL;
39 #endif
40
41 off_t archive_offset;
42
43 static ssize_t
44 seek_forward(struct gzip_handle *zh, ssize_t len)
45 {
46 ssize_t slen = gzip_seek(zh, len);
47
48 if (slen == len)
49 archive_offset += len;
50
51 return slen;
52 }
53
54
55 /* Extract the data postioned at src_stream to either filesystem, stdout or
56 * buffer depending on the value of 'function' which is defined in libbb.h
57 *
58 * prefix doesnt have to be just a directory, it may prefix the filename as well.
59 *
60 * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath
61 * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have
62 * 'dpkg.' as their prefix
63 *
64 * For this reason if prefix does point to a dir then it must end with a
65 * trailing '/' or else the last dir will be assumed to be the file prefix
66 */
67 static char *
68 extract_archive(struct gzip_handle *src_stream, FILE *out_stream,
69 const file_header_t *file_entry, const int function,
70 const char *prefix,
71 int *err)
72 {
73 FILE *dst_stream = NULL;
74 char *full_name = NULL;
75 char *full_link_name = NULL;
76 char *buffer = NULL;
77 struct utimbuf t;
78
79 *err = 0;
80
81 /* prefix doesnt have to be a proper path it may prepend
82 * the filename as well */
83 if (prefix != NULL) {
84 /* strip leading '/' in filename to extract as prefix may not be dir */
85 /* Cant use concat_path_file here as prefix might not be a directory */
86 char *path = file_entry->name;
87 if (strncmp("./", path, 2) == 0) {
88 path += 2;
89 if (strlen(path) == 0)
90 /* Do nothing, current dir already exists. */
91 return NULL;
92 }
93 full_name = xmalloc(strlen(prefix) + strlen(path) + 1);
94 strcpy(full_name, prefix);
95 strcat(full_name, path);
96 if ( file_entry->link_name ){
97 full_link_name = xmalloc(strlen(prefix) + strlen(file_entry->link_name) + 1);
98 strcpy(full_link_name, prefix);
99 strcat(full_link_name, file_entry->link_name);
100 }
101 } else {
102 full_name = xstrdup(file_entry->name);
103 if ( file_entry->link_name )
104 full_link_name = xstrdup(file_entry->link_name);
105 }
106
107
108 if (function & extract_to_stream) {
109 if (S_ISREG(file_entry->mode)) {
110 *err = gzip_copy(src_stream, out_stream, file_entry->size);
111 archive_offset += file_entry->size;
112 }
113 }
114 else if (function & extract_one_to_buffer) {
115 if (S_ISREG(file_entry->mode)) {
116 buffer = (char *) xmalloc(file_entry->size + 1);
117 gzip_read(src_stream, buffer, file_entry->size);
118 buffer[file_entry->size] = '\0';
119 archive_offset += file_entry->size;
120 goto cleanup;
121 }
122 }
123 else if (function & extract_all_to_fs) {
124 struct stat oldfile;
125 int stat_res;
126 stat_res = lstat (full_name, &oldfile);
127 if (stat_res == 0) { /* The file already exists */
128 if ((function & extract_unconditional) || (oldfile.st_mtime < file_entry->mtime)) {
129 if (!S_ISDIR(oldfile.st_mode)) {
130 unlink(full_name); /* Directories might not be empty etc */
131 }
132 } else {
133 if ((function & extract_quiet) != extract_quiet) {
134 *err = -1;
135 error_msg("%s not created: newer or same age file exists", file_entry->name);
136 }
137 seek_forward(src_stream, file_entry->size);
138 goto cleanup;
139 }
140 }
141 if (function & extract_create_leading_dirs) { /* Create leading directories with default umask */
142 char *buf, *parent;
143 buf = xstrdup(full_name);
144 parent = dirname(buf);
145 if (make_directory (parent, -1, FILEUTILS_RECUR) != 0) {
146 if ((function & extract_quiet) != extract_quiet) {
147 *err = -1;
148 error_msg("couldn't create leading directories");
149 }
150 }
151 free (buf);
152 }
153 switch(file_entry->mode & S_IFMT) {
154 case S_IFREG:
155 if (file_entry->link_name) { /* Found a cpio hard link */
156 if (link(full_link_name, full_name) != 0) {
157 if ((function & extract_quiet) != extract_quiet) {
158 *err = -1;
159 perror_msg("Cannot link from %s to '%s'",
160 file_entry->name, file_entry->link_name);
161 }
162 }
163 } else {
164 if ((dst_stream = wfopen(full_name, "w")) == NULL) {
165 *err = -1;
166 seek_forward(src_stream, file_entry->size);
167 goto cleanup;
168 }
169 archive_offset += file_entry->size;
170 *err = gzip_copy(src_stream, dst_stream, file_entry->size);
171 fclose(dst_stream);
172 }
173 break;
174 case S_IFDIR:
175 if (stat_res != 0) {
176 if (mkdir(full_name, file_entry->mode) < 0) {
177 if ((function & extract_quiet) != extract_quiet) {
178 *err = -1;
179 perror_msg("Cannot make dir %s", full_name);
180 }
181 }
182 }
183 break;
184 case S_IFLNK:
185 if (symlink(file_entry->link_name, full_name) < 0) {
186 if ((function & extract_quiet) != extract_quiet) {
187 *err = -1;
188 perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name);
189 }
190 goto cleanup;
191 }
192 break;
193 case S_IFSOCK:
194 case S_IFBLK:
195 case S_IFCHR:
196 case S_IFIFO:
197 if (mknod(full_name, file_entry->mode, file_entry->device) == -1) {
198 if ((function & extract_quiet) != extract_quiet) {
199 *err = -1;
200 perror_msg("Cannot create node %s", file_entry->name);
201 }
202 goto cleanup;
203 }
204 break;
205 default:
206 *err = -1;
207 perror_msg("Don't know how to handle %s", full_name);
208
209 }
210
211 /* Changing a symlink's properties normally changes the properties of the
212 * file pointed to, so dont try and change the date or mode, lchown does
213 * does the right thing, but isnt available in older versions of libc */
214 if (S_ISLNK(file_entry->mode)) {
215 #if (__GLIBC__ > 2) && (__GLIBC_MINOR__ > 1)
216 lchown(full_name, file_entry->uid, file_entry->gid);
217 #endif
218 } else {
219 if (function & extract_preserve_date) {
220 t.actime = file_entry->mtime;
221 t.modtime = file_entry->mtime;
222 utime(full_name, &t);
223 }
224 chown(full_name, file_entry->uid, file_entry->gid);
225 chmod(full_name, file_entry->mode);
226 }
227 } else {
228 /* If we arent extracting data we have to skip it,
229 * if data size is 0 then then just do it anyway
230 * (saves testing for it) */
231 seek_forward(src_stream, file_entry->size);
232 }
233
234 /* extract_list and extract_verbose_list can be used in conjunction
235 * with one of the above four extraction functions, so do this seperately */
236 if (function & extract_verbose_list) {
237 fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode),
238 file_entry->uid, file_entry->gid,
239 (int) file_entry->size, time_string(file_entry->mtime));
240 }
241 if ((function & extract_list) || (function & extract_verbose_list)){
242 /* fputs doesnt add a trailing \n, so use fprintf */
243 fprintf(out_stream, "%s\n", file_entry->name);
244 }
245
246 cleanup:
247 free(full_name);
248 if ( full_link_name )
249 free(full_link_name);
250
251 return buffer;
252 }
253
254 static char *
255 unarchive(struct gzip_handle *src_stream, FILE *out_stream,
256 file_header_t *(*get_headers)(struct gzip_handle *),
257 void (*free_headers)(file_header_t *),
258 const int extract_function,
259 const char *prefix,
260 const char **extract_names,
261 int *err)
262 {
263 file_header_t *file_entry;
264 int extract_flag;
265 int i;
266 char *buffer = NULL;
267
268 *err = 0;
269
270 archive_offset = 0;
271 while ((file_entry = get_headers(src_stream)) != NULL) {
272 extract_flag = TRUE;
273
274 if (extract_names != NULL) {
275 int found_flag = FALSE;
276 char *p = file_entry->name;
277
278 if (p[0] == '.' && p[1] == '/')
279 p += 2;
280
281 for(i = 0; extract_names[i] != 0; i++) {
282 if (strcmp(extract_names[i], p) == 0) {
283 found_flag = TRUE;
284 break;
285 }
286 }
287 if (extract_function & extract_exclude_list) {
288 if (found_flag == TRUE) {
289 extract_flag = FALSE;
290 }
291 } else {
292 /* If its not found in the include list dont extract it */
293 if (found_flag == FALSE) {
294 extract_flag = FALSE;
295 }
296 }
297 }
298
299 if (extract_flag == TRUE) {
300 buffer = extract_archive(src_stream, out_stream,
301 file_entry, extract_function,
302 prefix, err);
303 *err = 0; /* XXX: ignore extraction errors */
304 if (*err) {
305 free_headers(file_entry);
306 break;
307 }
308 } else {
309 /* seek past the data entry */
310 seek_forward(src_stream, file_entry->size);
311 }
312 free_headers(file_entry);
313 }
314
315 return buffer;
316 }
317
318
319 static file_header_t *
320 get_header_tar(struct gzip_handle *tar_stream)
321 {
322 union {
323 unsigned char raw[512];
324 struct {
325 char name[100]; /* 0-99 */
326 char mode[8]; /* 100-107 */
327 char uid[8]; /* 108-115 */
328 char gid[8]; /* 116-123 */
329 char size[12]; /* 124-135 */
330 char mtime[12]; /* 136-147 */
331 char chksum[8]; /* 148-155 */
332 char typeflag; /* 156-156 */
333 char linkname[100]; /* 157-256 */
334 char magic[6]; /* 257-262 */
335 char version[2]; /* 263-264 */
336 char uname[32]; /* 265-296 */
337 char gname[32]; /* 297-328 */
338 char devmajor[8]; /* 329-336 */
339 char devminor[8]; /* 337-344 */
340 char prefix[155]; /* 345-499 */
341 char padding[12]; /* 500-512 */
342 } formated;
343 } tar;
344 file_header_t *tar_entry = NULL;
345 long i;
346 long sum = 0;
347
348 if (archive_offset % 512 != 0) {
349 seek_forward(tar_stream, 512 - (archive_offset % 512));
350 }
351
352 if (gzip_read(tar_stream, tar.raw, 512) != 512) {
353 /* Unfortunately its common for tar files to have all sorts of
354 * trailing garbage, fail silently */
355 // error_msg("Couldnt read header");
356 return(NULL);
357 }
358 archive_offset += 512;
359
360 /* Check header has valid magic, unfortunately some tar files
361 * have empty (0'ed) tar entries at the end, which will
362 * cause this to fail, so fail silently for now
363 */
364 if (strncmp(tar.formated.magic, "ustar", 5) != 0) {
365 #ifdef CONFIG_FEATURE_TAR_OLDGNU_COMPATABILITY
366 if (strncmp(tar.formated.magic, "\0\0\0\0\0", 5) != 0)
367 #endif
368 return(NULL);
369 }
370
371 /* Do checksum on headers */
372 for (i = 0; i < 148 ; i++) {
373 sum += tar.raw[i];
374 }
375 sum += ' ' * 8;
376 for (i = 156; i < 512 ; i++) {
377 sum += tar.raw[i];
378 }
379 if (sum != strtol(tar.formated.chksum, NULL, 8)) {
380 if ( strtol(tar.formated.chksum,NULL,8) != 0 )
381 error_msg("Invalid tar header checksum");
382 return(NULL);
383 }
384
385 /* convert to type'ed variables */
386 tar_entry = xcalloc(1, sizeof(file_header_t));
387
388
389
390 // tar_entry->name = xstrdup(tar.formated.name);
391
392 /*
393 parse_mode(tar.formated.mode, &tar_entry->mode);
394 */
395 tar_entry->mode = 07777 & strtol(tar.formated.mode, NULL, 8);
396
397 tar_entry->uid = strtol(tar.formated.uid, NULL, 8);
398 tar_entry->gid = strtol(tar.formated.gid, NULL, 8);
399 tar_entry->size = strtol(tar.formated.size, NULL, 8);
400 tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8);
401
402 tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) +
403 strtol(tar.formated.devminor, NULL, 8);
404
405 /* Fix mode, used by the old format */
406 switch (tar.formated.typeflag) {
407 /* hard links are detected as regular files with 0 size and a link name */
408 case '1':
409 tar_entry->mode |= S_IFREG ;
410 break;
411 case 0:
412 case '0':
413
414 # ifdef CONFIG_FEATURE_TAR_OLDGNU_COMPATABILITY
415 if (last_char_is(tar_entry->name, '/')) {
416 tar_entry->mode |= S_IFDIR;
417 } else
418 # endif
419 tar_entry->mode |= S_IFREG;
420 break;
421 case '2':
422 tar_entry->mode |= S_IFLNK;
423 break;
424 case '3':
425 tar_entry->mode |= S_IFCHR;
426 break;
427 case '4':
428 tar_entry->mode |= S_IFBLK;
429 break;
430 case '5':
431 tar_entry->mode |= S_IFDIR;
432 break;
433 case '6':
434 tar_entry->mode |= S_IFIFO;
435 break;
436 # ifdef CONFIG_FEATURE_TAR_GNU_EXTENSIONS
437 case 'L': {
438 longname = xmalloc(tar_entry->size + 1);
439 if(gzip_read(tar_stream, longname, tar_entry->size) != tar_entry->size)
440 return NULL;
441 longname[tar_entry->size] = '\0';
442 archive_offset += tar_entry->size;
443
444 return(get_header_tar(tar_stream));
445 }
446 case 'K': {
447 linkname = xmalloc(tar_entry->size + 1);
448 if(gzip_read(tar_stream, linkname, tar_entry->size) != tar_entry->size)
449 return NULL;
450 linkname[tar_entry->size] = '\0';
451 archive_offset += tar_entry->size;
452
453 return(get_header_tar(tar_stream));
454 }
455 case 'D':
456 case 'M':
457 case 'N':
458 case 'S':
459 case 'V':
460 perror_msg("Ignoring GNU extension type %c", tar.formated.typeflag);
461 # endif
462 default:
463 perror_msg("Unknown typeflag: 0x%x", tar.formated.typeflag);
464 break;
465
466 }
467
468
469 #ifdef CONFIG_FEATURE_TAR_GNU_EXTENSIONS
470 if (longname) {
471 tar_entry->name = longname;
472 longname = NULL;
473 } else
474 #endif
475 {
476 tar_entry->name = xstrndup(tar.formated.name, 100);
477
478 if (tar.formated.prefix[0]) {
479 char *temp = tar_entry->name;
480 char *prefixTemp = xstrndup(tar.formated.prefix, 155);
481 tar_entry->name = concat_path_file(prefixTemp, temp);
482 free(temp);
483 free(prefixTemp);
484 }
485 }
486
487 if (linkname) {
488 tar_entry->link_name = linkname;
489 linkname = NULL;
490 } else
491 {
492 tar_entry->link_name = *tar.formated.linkname != '\0' ?
493 xstrndup(tar.formated.linkname, 100) : NULL;
494 }
495
496 return(tar_entry);
497 }
498
499 static void
500 free_header_tar(file_header_t *tar_entry)
501 {
502 if (tar_entry == NULL)
503 return;
504
505 free(tar_entry->name);
506 if (tar_entry->link_name)
507 free(tar_entry->link_name);
508
509 free(tar_entry);
510 }
511
512 char *
513 deb_extract(const char *package_filename, FILE *out_stream,
514 const int extract_function, const char *prefix,
515 const char *filename, int *err)
516 {
517 FILE *deb_stream = NULL;
518 file_header_t *ar_header = NULL;
519 const char **file_list = NULL;
520 char *output_buffer = NULL;
521 char *ared_file = NULL;
522 char ar_magic[8];
523 int gz_err;
524 struct gzip_handle tar_outer, tar_inner;
525 file_header_t *tar_header;
526 ssize_t len;
527
528 *err = 0;
529
530 if (filename != NULL) {
531 file_list = xmalloc(sizeof(char *) * 2);
532 file_list[0] = filename;
533 file_list[1] = NULL;
534 }
535
536 if (extract_function & extract_control_tar_gz) {
537 ared_file = "control.tar.gz";
538 }
539 else if (extract_function & extract_data_tar_gz) {
540 ared_file = "data.tar.gz";
541 } else {
542 opkg_msg(ERROR, "Internal error: extract_function=%x\n",
543 extract_function);
544 *err = -1;
545 goto cleanup;
546 }
547
548 /* open the debian package to be worked on */
549 deb_stream = wfopen(package_filename, "r");
550 if (deb_stream == NULL) {
551 *err = -1;
552 goto cleanup;
553 }
554 /* set the buffer size */
555 setvbuf(deb_stream, NULL, _IOFBF, 0x8000);
556
557 memset(&tar_outer, 0, sizeof(tar_outer));
558 tar_outer.file = deb_stream;
559 gzip_exec(&tar_outer, NULL);
560
561 /* walk through outer tar file to find ared_file */
562 while ((tar_header = get_header_tar(&tar_outer)) != NULL) {
563 int name_offset = 0;
564 if (strncmp(tar_header->name, "./", 2) == 0)
565 name_offset = 2;
566
567 if (strcmp(ared_file, tar_header->name+name_offset) == 0) {
568 memset(&tar_inner, 0, sizeof(tar_inner));
569 tar_inner.gzip = &tar_outer;
570 gzip_exec(&tar_inner, NULL);
571
572 archive_offset = 0;
573
574 output_buffer = unarchive(&tar_inner,
575 out_stream,
576 get_header_tar,
577 free_header_tar,
578 extract_function,
579 prefix,
580 file_list,
581 err);
582
583 free_header_tar(tar_header);
584 gzip_close(&tar_inner);
585 break;
586 }
587
588 seek_forward(&tar_outer, tar_header->size);
589 free_header_tar(tar_header);
590 }
591
592 cleanup:
593 gzip_close(&tar_outer);
594
595 if (file_list)
596 free(file_list);
597
598 return output_buffer;
599 }