diff options
Diffstat (limited to 'libbb/unarchive.c')
-rw-r--r-- | libbb/unarchive.c | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/libbb/unarchive.c b/libbb/unarchive.c new file mode 100644 index 000000000..a0cc28eca --- /dev/null +++ b/libbb/unarchive.c | |||
@@ -0,0 +1,500 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000 by Glenn McGrath | ||
3 | * Copyright (C) 2001 by Laurence Anderson | ||
4 | * | ||
5 | * Based on previous work by busybox developers and others. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU Library General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
20 | */ | ||
21 | |||
22 | #include <stdio.h> | ||
23 | #include <errno.h> | ||
24 | #include <stdlib.h> | ||
25 | #include <string.h> | ||
26 | #include <unistd.h> | ||
27 | #include <utime.h> | ||
28 | #include "libbb.h" | ||
29 | |||
30 | typedef struct file_headers_s { | ||
31 | char *name; | ||
32 | char *link_name; | ||
33 | off_t size; | ||
34 | uid_t uid; | ||
35 | gid_t gid; | ||
36 | mode_t mode; | ||
37 | time_t mtime; | ||
38 | dev_t device; | ||
39 | } file_header_t; | ||
40 | |||
41 | off_t archive_offset; | ||
42 | |||
43 | void seek_sub_file(FILE *src_stream, const int count) | ||
44 | { | ||
45 | int i; | ||
46 | /* Try to fseek as faster */ | ||
47 | archive_offset += count; | ||
48 | if (fseek(src_stream, count, SEEK_CUR) != 0 && errno == ESPIPE) { | ||
49 | for (i = 0; i < count; i++) { | ||
50 | fgetc(src_stream); | ||
51 | } | ||
52 | } | ||
53 | return; | ||
54 | } | ||
55 | |||
56 | |||
57 | /* Extract the data postioned at src_stream to either filesystem, stdout or | ||
58 | * buffer depending on the value of 'function' which is defined in libbb.h | ||
59 | * | ||
60 | * prefix doesnt have to be just a directory, it may prefix the filename as well. | ||
61 | * | ||
62 | * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath | ||
63 | * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have | ||
64 | * 'dpkg.' as their prefix | ||
65 | * | ||
66 | * For this reason if prefix does point to a dir then it must end with a | ||
67 | * trailing '/' or else the last dir will be assumed to be the file prefix | ||
68 | */ | ||
69 | char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry, | ||
70 | const int function, const char *prefix) | ||
71 | { | ||
72 | FILE *dst_stream = NULL; | ||
73 | char *full_name = NULL; | ||
74 | char *buffer = NULL; | ||
75 | struct utimbuf t; | ||
76 | |||
77 | /* prefix doesnt have to be a proper path it may prepend | ||
78 | * the filename as well */ | ||
79 | if (prefix != NULL) { | ||
80 | /* strip leading '/' in filename to extract as prefix may not be dir */ | ||
81 | /* Cant use concat_path_file here as prefix might not be a directory */ | ||
82 | char *path = file_entry->name; | ||
83 | if (*path == '/') { | ||
84 | path++; | ||
85 | } | ||
86 | full_name = xmalloc(strlen(prefix) + strlen(path) + 1); | ||
87 | strcpy(full_name, prefix); | ||
88 | strcat(full_name, path); | ||
89 | } else { | ||
90 | full_name = file_entry->name; | ||
91 | } | ||
92 | |||
93 | if (function & extract_to_stdout) { | ||
94 | if (S_ISREG(file_entry->mode)) { | ||
95 | copy_file_chunk(src_stream, out_stream, file_entry->size); | ||
96 | archive_offset += file_entry->size; | ||
97 | } | ||
98 | } | ||
99 | else if (function & extract_one_to_buffer) { | ||
100 | if (S_ISREG(file_entry->mode)) { | ||
101 | buffer = (char *) xmalloc(file_entry->size + 1); | ||
102 | fread(buffer, 1, file_entry->size, src_stream); | ||
103 | archive_offset += file_entry->size; | ||
104 | return(buffer); | ||
105 | } | ||
106 | } | ||
107 | else if (function & extract_all_to_fs) { | ||
108 | #if 0 | ||
109 | struct stat oldfile; | ||
110 | if ( (S_ISLNK(file_entry->mode) ? lstat (full_name, &oldfile) : stat (full_name, &oldfile)) == 0) { /* The file already exists */ | ||
111 | if (function & extract_unconditional || oldfile.st_mtime < file_entry->mtime) { | ||
112 | if (!S_ISDIR(oldfile.st_mode)) { | ||
113 | unlink(full_name); /* Directories might not be empty etc */ | ||
114 | } | ||
115 | } else { | ||
116 | error_msg("%s not created: newer or same age file exists", file_entry->name); | ||
117 | if (S_ISREG(file_entry->mode)) { | ||
118 | seek_sub_file(src_stream, file_entry->size); | ||
119 | } | ||
120 | return (NULL); | ||
121 | } | ||
122 | } | ||
123 | #endif | ||
124 | switch(file_entry->mode & S_IFMT) { | ||
125 | case S_IFREG: | ||
126 | if (file_entry->link_name) { /* Found a cpio hard link */ | ||
127 | if (link(file_entry->link_name, full_name) != 0) { | ||
128 | perror_msg("Cannot link from %s to '%s'", | ||
129 | file_entry->name, file_entry->link_name); | ||
130 | } | ||
131 | } else { | ||
132 | if ((dst_stream = wfopen(full_name, "w")) == NULL) { | ||
133 | seek_sub_file(src_stream, file_entry->size); | ||
134 | return NULL; | ||
135 | } | ||
136 | archive_offset += file_entry->size; | ||
137 | copy_file_chunk(src_stream, dst_stream, file_entry->size); | ||
138 | fclose(dst_stream); | ||
139 | } | ||
140 | break; | ||
141 | case S_IFDIR: | ||
142 | /* Use create_path instead of mkdir incase prefix path | ||
143 | * hasnt been created */ | ||
144 | if (function & extract_create_dirs) { | ||
145 | if (create_path(full_name, file_entry->mode) == FALSE) { | ||
146 | return NULL; | ||
147 | } | ||
148 | } | ||
149 | break; | ||
150 | case S_IFLNK: | ||
151 | if (symlink(file_entry->link_name, full_name) < 0) { | ||
152 | perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name); | ||
153 | return NULL; | ||
154 | } | ||
155 | break; | ||
156 | case S_IFSOCK: | ||
157 | case S_IFBLK: | ||
158 | case S_IFCHR: | ||
159 | case S_IFIFO: | ||
160 | if (mknod(full_name, file_entry->mode, file_entry->device) == -1) { | ||
161 | perror_msg("Cannot create node %s", file_entry->name); | ||
162 | return NULL; | ||
163 | } | ||
164 | break; | ||
165 | } | ||
166 | if (function & extract_preserve_date) { | ||
167 | t.actime = file_entry->mtime; | ||
168 | t.modtime = file_entry->mtime; | ||
169 | utime(full_name, &t); | ||
170 | } | ||
171 | chmod(full_name, file_entry->mode); | ||
172 | lchown(full_name, file_entry->uid, file_entry->gid); | ||
173 | } else { | ||
174 | /* If we arent extracting data we have to skip it, | ||
175 | * if data size is 0 then then just do it anyway | ||
176 | * (saves testing for it) */ | ||
177 | seek_sub_file(src_stream, file_entry->size); | ||
178 | } | ||
179 | |||
180 | /* extract_list and extract_verbose_list can be used in conjunction | ||
181 | * with one of the above four extraction functions, so do this seperately */ | ||
182 | if (function & extract_verbose_list) { | ||
183 | fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode), | ||
184 | file_entry->uid, file_entry->gid, | ||
185 | (int) file_entry->size, time_string(file_entry->mtime)); | ||
186 | } | ||
187 | if ((function & extract_list) || (function & extract_verbose_list)){ | ||
188 | /* fputs doesnt add a trailing \n, so use fprintf */ | ||
189 | fprintf(out_stream, "%s\n", file_entry->name); | ||
190 | } | ||
191 | |||
192 | free(full_name); | ||
193 | |||
194 | return(NULL); /* Maybe we should say if failed */ | ||
195 | } | ||
196 | |||
197 | #if defined BB_AR || defined BB_CPIO || defined BB_UNTAR | ||
198 | char *unarchive(FILE *src_stream, void *(*get_headers)(FILE *), | ||
199 | const int extract_function, const char *prefix, char **extract_names) | ||
200 | { | ||
201 | file_header_t *file_entry; | ||
202 | int found; | ||
203 | int i; | ||
204 | char *buffer = NULL; | ||
205 | |||
206 | archive_offset = 0; | ||
207 | while ((file_entry = (file_header_t *) get_headers(src_stream)) != NULL) { | ||
208 | found = FALSE; | ||
209 | if (extract_names[0] != NULL) { | ||
210 | for(i = 0; extract_names[i] != 0; i++) { | ||
211 | if (strcmp(extract_names[i], file_entry->name) == 0) { | ||
212 | found = TRUE; | ||
213 | } | ||
214 | } | ||
215 | if (!found) { | ||
216 | /* seek past the data entry */ | ||
217 | if (!S_ISLNK(file_entry->mode) && file_entry->link_name && file_entry->size == 0) { | ||
218 | error_msg("You should extract %s as other files are hardlinked to it", file_entry->name); | ||
219 | } | ||
220 | seek_sub_file(src_stream, file_entry->size); | ||
221 | continue; | ||
222 | } | ||
223 | } | ||
224 | buffer = extract_archive(src_stream, stdout, file_entry, extract_function, prefix); | ||
225 | } | ||
226 | return(buffer); | ||
227 | } | ||
228 | #endif | ||
229 | |||
230 | #if defined BB_AR || defined BB_DPKG_DEB || defined BB_DPKG | ||
231 | void *get_header_ar(FILE *src_stream) | ||
232 | { | ||
233 | file_header_t *typed; | ||
234 | union { | ||
235 | char raw[60]; | ||
236 | struct { | ||
237 | char name[16]; | ||
238 | char date[12]; | ||
239 | char uid[6]; | ||
240 | char gid[6]; | ||
241 | char mode[8]; | ||
242 | char size[10]; | ||
243 | char magic[2]; | ||
244 | } formated; | ||
245 | } ar; | ||
246 | static char *ar_long_names; | ||
247 | |||
248 | if (fread(ar.raw, 1, 60, src_stream) != 60) { | ||
249 | return(NULL); | ||
250 | } | ||
251 | archive_offset += 60; | ||
252 | /* align the headers based on the header magic */ | ||
253 | if ((ar.formated.magic[0] != '`') || (ar.formated.magic[1] != '\n')) { | ||
254 | /* some version of ar, have an extra '\n' after each data entry, | ||
255 | * this puts the next header out by 1 */ | ||
256 | if (ar.formated.magic[1] != '`') { | ||
257 | error_msg("Invalid magic"); | ||
258 | return(NULL); | ||
259 | } | ||
260 | /* read the next char out of what would be the data section, | ||
261 | * if its a '\n' then it is a valid header offset by 1*/ | ||
262 | archive_offset++; | ||
263 | if (fgetc(src_stream) != '\n') { | ||
264 | error_msg("Invalid magic"); | ||
265 | return(NULL); | ||
266 | } | ||
267 | /* fix up the header, we started reading 1 byte too early */ | ||
268 | /* raw_header[60] wont be '\n' as it should, but it doesnt matter */ | ||
269 | memmove(ar.raw, &ar.raw[1], 59); | ||
270 | } | ||
271 | |||
272 | typed = (file_header_t *) xcalloc(1, sizeof(file_header_t)); | ||
273 | |||
274 | typed->size = (size_t) atoi(ar.formated.size); | ||
275 | /* long filenames have '/' as the first character */ | ||
276 | if (ar.formated.name[0] == '/') { | ||
277 | if (ar.formated.name[1] == '/') { | ||
278 | /* If the second char is a '/' then this entries data section | ||
279 | * stores long filename for multiple entries, they are stored | ||
280 | * in static variable long_names for use in future entries */ | ||
281 | ar_long_names = (char *) xrealloc(ar_long_names, typed->size); | ||
282 | fread(ar_long_names, 1, typed->size, src_stream); | ||
283 | archive_offset += typed->size; | ||
284 | /* This ar entries data section only contained filenames for other records | ||
285 | * they are stored in the static ar_long_names for future reference */ | ||
286 | return(NULL); | ||
287 | } else { | ||
288 | /* The number after the '/' indicates the offset in the ar data section | ||
289 | (saved in variable long_name) that conatains the real filename */ | ||
290 | if (!ar_long_names) { | ||
291 | error_msg("Cannot resolve long file name"); | ||
292 | return (NULL); | ||
293 | } | ||
294 | typed->name = xstrdup(ar_long_names + atoi(&ar.formated.name[1])); | ||
295 | } | ||
296 | } else { | ||
297 | /* short filenames */ | ||
298 | typed->name = xcalloc(1, 16); | ||
299 | strncpy(typed->name, ar.formated.name, 16); | ||
300 | } | ||
301 | typed->name[strcspn(typed->name, " /")]='\0'; | ||
302 | |||
303 | /* convert the rest of the now valid char header to its typed struct */ | ||
304 | parse_mode(ar.formated.mode, &typed->mode); | ||
305 | typed->mtime = atoi(ar.formated.date); | ||
306 | typed->uid = atoi(ar.formated.uid); | ||
307 | typed->gid = atoi(ar.formated.gid); | ||
308 | |||
309 | return(typed); | ||
310 | } | ||
311 | #endif | ||
312 | |||
313 | #if defined BB_CPIO | ||
314 | void *get_header_cpio(FILE *src_stream) | ||
315 | { | ||
316 | file_header_t *cpio_entry = NULL; | ||
317 | char cpio_header[110]; | ||
318 | char dummy[14]; | ||
319 | int namesize; | ||
320 | int major, minor, nlink; | ||
321 | |||
322 | /* There can be padding before archive header */ | ||
323 | seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4); | ||
324 | if (fread(cpio_header, 1, 110, src_stream) == 110) { | ||
325 | archive_offset += 110; | ||
326 | if (strncmp(cpio_header, "07070", 5) != 0) { | ||
327 | error_msg("Unsupported format or invalid magic"); | ||
328 | return(NULL); | ||
329 | } | ||
330 | switch (cpio_header[5]) { | ||
331 | case '2': /* "crc" header format */ | ||
332 | /* Doesnt do the crc check yet */ | ||
333 | case '1': /* "newc" header format */ | ||
334 | cpio_entry = (file_header_t *) xcalloc(1, sizeof(file_header_t)); | ||
335 | sscanf(cpio_header, "%14c%8x%8x%8x%8x%8lx%8lx%16c%8x%8x%8x%8c", | ||
336 | dummy, &cpio_entry->mode, &cpio_entry->uid, &cpio_entry->gid, | ||
337 | &nlink, &cpio_entry->mtime, &cpio_entry->size, | ||
338 | dummy, &major, &minor, &namesize, dummy); | ||
339 | |||
340 | cpio_entry->name = (char *) xcalloc(1, namesize); | ||
341 | fread(cpio_entry->name, 1, namesize, src_stream); /* Read in filename */ | ||
342 | archive_offset += namesize; | ||
343 | /* Skip padding before file contents */ | ||
344 | seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4); | ||
345 | if (strcmp(cpio_entry->name, "TRAILER!!!") == 0) { | ||
346 | printf("%d blocks\n", (int) (archive_offset % 512 ? (archive_offset / 512) + 1 : archive_offset / 512)); /* Always round up */ | ||
347 | return(NULL); | ||
348 | } | ||
349 | |||
350 | if (S_ISLNK(cpio_entry->mode)) { | ||
351 | cpio_entry->link_name = (char *) xcalloc(1, cpio_entry->size + 1); | ||
352 | fread(cpio_entry->link_name, 1, cpio_entry->size, src_stream); | ||
353 | archive_offset += cpio_entry->size; | ||
354 | } | ||
355 | if (nlink > 1 && !S_ISDIR(cpio_entry->mode) && cpio_entry->size == 0) { | ||
356 | error_msg("%s not extracted: Cannot handle hard links yet", cpio_entry->name); | ||
357 | return(get_header_cpio(src_stream)); /* Recurse to next file */ | ||
358 | } | ||
359 | cpio_entry->device = (major << 8) | minor; | ||
360 | break; | ||
361 | default: | ||
362 | error_msg("Unsupported format"); | ||
363 | return(NULL); | ||
364 | } | ||
365 | if (ferror(src_stream) || feof(src_stream)) { | ||
366 | perror_msg("Stream error"); | ||
367 | return(NULL); | ||
368 | } | ||
369 | } | ||
370 | return(cpio_entry); | ||
371 | } | ||
372 | #endif | ||
373 | |||
374 | #if defined BB_UNTAR || defined BB_DPKG_DEB || defined BB_DPKG | ||
375 | void *get_header_tar(FILE *tar_stream) | ||
376 | { | ||
377 | union { | ||
378 | unsigned char raw[512]; | ||
379 | struct { | ||
380 | char name[100]; /* 0-99 */ | ||
381 | char mode[8]; /* 100-107 */ | ||
382 | char uid[8]; /* 108-115 */ | ||
383 | char gid[8]; /* 116-123 */ | ||
384 | char size[12]; /* 124-135 */ | ||
385 | char mtime[12]; /* 136-147 */ | ||
386 | char chksum[8]; /* 148-155 */ | ||
387 | char typeflag; /* 156-156 */ | ||
388 | char linkname[100]; /* 157-256 */ | ||
389 | char magic[6]; /* 257-262 */ | ||
390 | char version[2]; /* 263-264 */ | ||
391 | char uname[32]; /* 265-296 */ | ||
392 | char gname[32]; /* 297-328 */ | ||
393 | char devmajor[8]; /* 329-336 */ | ||
394 | char devminor[8]; /* 337-344 */ | ||
395 | char prefix[155]; /* 345-499 */ | ||
396 | char padding[12]; /* 500-512 */ | ||
397 | } formated; | ||
398 | } tar; | ||
399 | file_header_t *tar_entry = NULL; | ||
400 | long i; | ||
401 | long sum = 0; | ||
402 | |||
403 | if (archive_offset % 512 != 0) { | ||
404 | seek_sub_file(tar_stream, 512 - (archive_offset % 512)); | ||
405 | } | ||
406 | |||
407 | if (fread(tar.raw, 1, 512, tar_stream) != 512) { | ||
408 | error_msg("Couldnt read header"); | ||
409 | return(NULL); | ||
410 | } | ||
411 | archive_offset += 512; | ||
412 | |||
413 | /* Check header has valid magic, unfortunately some tar files | ||
414 | * have empty (0'ed) tar entries at the end, which will | ||
415 | * cause this to fail, so fail silently for now | ||
416 | */ | ||
417 | if (strncmp(tar.formated.magic, "ustar", 5) != 0) { | ||
418 | return(NULL); | ||
419 | } | ||
420 | |||
421 | /* Do checksum on headers */ | ||
422 | for (i = 0; i < 148 ; i++) { | ||
423 | sum += tar.raw[i]; | ||
424 | } | ||
425 | sum += ' ' * 8; | ||
426 | for (i = 156; i < 512 ; i++) { | ||
427 | sum += tar.raw[i]; | ||
428 | } | ||
429 | if (sum != strtol(tar.formated.chksum, NULL, 8)) { | ||
430 | error_msg("Invalid tar header checksum"); | ||
431 | return(NULL); | ||
432 | } | ||
433 | |||
434 | /* convert to type'ed variables */ | ||
435 | tar_entry = xcalloc(1, sizeof(file_header_t)); | ||
436 | tar_entry->name = xstrdup(tar.formated.name); | ||
437 | |||
438 | parse_mode(tar.formated.mode, &tar_entry->mode); | ||
439 | tar_entry->uid = strtol(tar.formated.uid, NULL, 8); | ||
440 | tar_entry->gid = strtol(tar.formated.gid, NULL, 8); | ||
441 | tar_entry->size = strtol(tar.formated.size, NULL, 8); | ||
442 | tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8); | ||
443 | tar_entry->link_name = strlen(tar.formated.linkname) ? xstrdup(tar.formated.linkname) : NULL; | ||
444 | tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) + | ||
445 | strtol(tar.formated.devminor, NULL, 8); | ||
446 | |||
447 | return(tar_entry); | ||
448 | } | ||
449 | #endif | ||
450 | |||
451 | #if defined BB_DPKG || defined BB_DPKG_DEB | ||
452 | char *deb_extract(const char *package_filename, FILE *out_stream, const int extract_function, | ||
453 | const char *prefix, const char *filename) | ||
454 | { | ||
455 | FILE *deb_stream; | ||
456 | FILE *uncompressed_stream = NULL; | ||
457 | file_header_t *ar_header = NULL; | ||
458 | char *output_buffer = NULL; | ||
459 | char *ared_file = NULL; | ||
460 | char ar_magic[8]; | ||
461 | char **file_list; | ||
462 | int gunzip_pid; | ||
463 | |||
464 | file_list = malloc(sizeof(char *)); | ||
465 | file_list[0] = xstrdup(filename); | ||
466 | file_list[1] = NULL; | ||
467 | |||
468 | if (extract_function & extract_control_tar_gz) { | ||
469 | ared_file = xstrdup("control.tar.gz"); | ||
470 | } | ||
471 | else if (extract_function & extract_data_tar_gz) { | ||
472 | ared_file = xstrdup("data.tar.gz"); | ||
473 | } | ||
474 | |||
475 | /* open the debian package to be worked on */ | ||
476 | deb_stream = wfopen(package_filename, "r"); | ||
477 | |||
478 | /* check ar magic */ | ||
479 | fread(ar_magic, 1, 8, deb_stream); | ||
480 | if (strncmp(ar_magic,"!<arch>",7) != 0) { | ||
481 | error_msg_and_die("invalid magic"); | ||
482 | } | ||
483 | archive_offset = 8; | ||
484 | |||
485 | while ((ar_header = get_header_ar(deb_stream)) != NULL) { | ||
486 | if (strcmp(ared_file, ar_header->name) == 0) { | ||
487 | /* open a stream of decompressed data */ | ||
488 | uncompressed_stream = gz_open(deb_stream, &gunzip_pid); | ||
489 | archive_offset = 0; | ||
490 | output_buffer = unarchive(uncompressed_stream, get_header_tar, extract_function, prefix, file_list); | ||
491 | } | ||
492 | seek_sub_file(deb_stream, ar_header->size); | ||
493 | } | ||
494 | gz_close(gunzip_pid); | ||
495 | fclose(deb_stream); | ||
496 | fclose(uncompressed_stream); | ||
497 | free(ared_file); | ||
498 | return(output_buffer); | ||
499 | } | ||
500 | #endif | ||