aboutsummaryrefslogtreecommitdiff
path: root/archival
diff options
context:
space:
mode:
Diffstat (limited to 'archival')
-rw-r--r--archival/Config.in30
-rw-r--r--archival/Kbuild19
-rw-r--r--archival/bbunzip.c173
-rw-r--r--archival/bzip2.c10
-rw-r--r--archival/gzip.c12
-rw-r--r--archival/libunarchive/Kbuild1
-rw-r--r--archival/libunarchive/decompress_uncompress.c2
-rw-r--r--archival/libunarchive/decompress_unxz.c122
-rw-r--r--archival/libunarchive/unxz/README136
-rw-r--r--archival/libunarchive/unxz/xz.h222
-rw-r--r--archival/libunarchive/unxz/xz_config.h121
-rw-r--r--archival/libunarchive/unxz/xz_dec_bcj.c564
-rw-r--r--archival/libunarchive/unxz/xz_dec_lzma2.c1155
-rw-r--r--archival/libunarchive/unxz/xz_dec_stream.c821
-rw-r--r--archival/libunarchive/unxz/xz_lzma2.h204
-rw-r--r--archival/libunarchive/unxz/xz_private.h120
-rw-r--r--archival/libunarchive/unxz/xz_stream.h57
-rw-r--r--archival/lzop.c14
-rw-r--r--archival/rpm2cpio.c35
-rw-r--r--archival/unzip.c154
20 files changed, 3778 insertions, 194 deletions
diff --git a/archival/Config.in b/archival/Config.in
index deacc2822..4f762e860 100644
--- a/archival/Config.in
+++ b/archival/Config.in
@@ -5,6 +5,12 @@
5 5
6menu "Archival Utilities" 6menu "Archival Utilities"
7 7
8config FEATURE_SEAMLESS_XZ
9 bool "Make tar, rpm, modprobe etc understand .xz data"
10 default n
11 help
12 Make tar, rpm, modprobe etc understand .xz data.
13
8config FEATURE_SEAMLESS_LZMA 14config FEATURE_SEAMLESS_LZMA
9 bool "Make tar, rpm, modprobe etc understand .lzma data" 15 bool "Make tar, rpm, modprobe etc understand .lzma data"
10 default n 16 default n
@@ -225,7 +231,7 @@ config FEATURE_TAR_CREATE
225config FEATURE_TAR_AUTODETECT 231config FEATURE_TAR_AUTODETECT
226 bool "Autodetect compressed tarballs" 232 bool "Autodetect compressed tarballs"
227 default n 233 default n
228 depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA) 234 depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA || FEATURE_SEAMLESS_XZ)
229 help 235 help
230 With this option tar can automatically detect compressed 236 With this option tar can automatically detect compressed
231 tarballs. Currently it works only on files (not pipes etc). 237 tarballs. Currently it works only on files (not pipes etc).
@@ -327,6 +333,28 @@ config FEATURE_LZMA_FAST
327 This option reduces decompression time by about 25% at the cost of 333 This option reduces decompression time by about 25% at the cost of
328 a 1K bigger binary. 334 a 1K bigger binary.
329 335
336config LZMA
337 bool "Provide lzma alias which supports only unpacking"
338 default n
339 depends on UNLZMA
340 help
341 Enable this option if you want commands like "lzma -d" to work.
342 IOW: you'll get lzma applet, but it will always require -d option.
343
344config UNXZ
345 bool "unxz"
346 default n
347 help
348 unxz is a unlzma successor.
349
350config XZ
351 bool "Provide xz alias which supports only unpacking"
352 default n
353 depends on UNXZ
354 help
355 Enable this option if you want commands like "xz -d" to work.
356 IOW: you'll get xz applet, but it will always require -d option.
357
330config UNZIP 358config UNZIP
331 bool "unzip" 359 bool "unzip"
332 default n 360 default n
diff --git a/archival/Kbuild b/archival/Kbuild
index 53bd7e21e..3300ea90f 100644
--- a/archival/Kbuild
+++ b/archival/Kbuild
@@ -8,18 +8,21 @@ libs-y += libunarchive/
8 8
9lib-y:= 9lib-y:=
10lib-$(CONFIG_AR) += ar.o 10lib-$(CONFIG_AR) += ar.o
11lib-$(CONFIG_BUNZIP2) += bbunzip.o
12lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o
13lib-$(CONFIG_UNLZMA) += bbunzip.o
14lib-$(CONFIG_CPIO) += cpio.o 11lib-$(CONFIG_CPIO) += cpio.o
15lib-$(CONFIG_DPKG) += dpkg.o 12lib-$(CONFIG_DPKG) += dpkg.o
16lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o 13lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o
17lib-$(CONFIG_GUNZIP) += bbunzip.o
18lib-$(CONFIG_GZIP) += gzip.o bbunzip.o
19lib-$(CONFIG_LZOP) += lzop.o lzo1x_1.o lzo1x_1o.o lzo1x_d.o bbunzip.o
20lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
21lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o 14lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o
22lib-$(CONFIG_RPM) += rpm.o 15lib-$(CONFIG_RPM) += rpm.o
23lib-$(CONFIG_TAR) += tar.o 16lib-$(CONFIG_TAR) += tar.o
24lib-$(CONFIG_UNCOMPRESS) += bbunzip.o
25lib-$(CONFIG_UNZIP) += unzip.o 17lib-$(CONFIG_UNZIP) += unzip.o
18
19lib-$(CONFIG_LZOP) += lzop.o lzo1x_1.o lzo1x_1o.o lzo1x_d.o bbunzip.o
20lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
21lib-$(CONFIG_GZIP) += gzip.o bbunzip.o
22lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o
23
24lib-$(CONFIG_UNXZ) += bbunzip.o
25lib-$(CONFIG_UNLZMA) += bbunzip.o
26lib-$(CONFIG_BUNZIP2) += bbunzip.o
27lib-$(CONFIG_GUNZIP) += bbunzip.o
28lib-$(CONFIG_UNCOMPRESS) += bbunzip.o
diff --git a/archival/bbunzip.c b/archival/bbunzip.c
index df674bc6c..08db2752c 100644
--- a/archival/bbunzip.c
+++ b/archival/bbunzip.c
@@ -4,17 +4,16 @@
4 * 4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 5 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
6 */ 6 */
7
8#include "libbb.h" 7#include "libbb.h"
9#include "unarchive.h" 8#include "unarchive.h"
10 9
11enum { 10enum {
12 OPT_STDOUT = 0x1, 11 OPT_STDOUT = 1 << 0,
13 OPT_FORCE = 0x2, 12 OPT_FORCE = 1 << 1,
14/* gunzip and bunzip2 only: */ 13 /* only some decompressors: */
15 OPT_VERBOSE = 0x4, 14 OPT_VERBOSE = 1 << 2,
16 OPT_DECOMPRESS = 0x8, 15 OPT_DECOMPRESS = 1 << 3,
17 OPT_TEST = 0x10, 16 OPT_TEST = 1 << 4,
18}; 17};
19 18
20static 19static
@@ -28,9 +27,15 @@ int open_to_or_warn(int to_fd, const char *filename, int flags, int mode)
28 return 0; 27 return 0;
29} 28}
30 29
30char* FAST_FUNC append_ext(char *filename, const char *expected_ext)
31{
32 return xasprintf("%s.%s", filename, expected_ext);
33}
34
31int FAST_FUNC bbunpack(char **argv, 35int FAST_FUNC bbunpack(char **argv,
32 char* (*make_new_name)(char *filename), 36 IF_DESKTOP(long long) int FAST_FUNC (*unpacker)(unpack_info_t *info),
33 IF_DESKTOP(long long) int (*unpacker)(unpack_info_t *info) 37 char* FAST_FUNC (*make_new_name)(char *filename, const char *expected_ext),
38 const char *expected_ext
34) 39)
35{ 40{
36 struct stat stat_buf; 41 struct stat stat_buf;
@@ -69,7 +74,7 @@ int FAST_FUNC bbunpack(char **argv,
69 74
70 /* Open dst if we are going to unpack to file */ 75 /* Open dst if we are going to unpack to file */
71 if (filename) { 76 if (filename) {
72 new_name = make_new_name(filename); 77 new_name = make_new_name(filename, expected_ext);
73 if (!new_name) { 78 if (!new_name) {
74 bb_error_msg("%s: unknown suffix - ignored", filename); 79 bb_error_msg("%s: unknown suffix - ignored", filename);
75 goto err; 80 goto err;
@@ -141,10 +146,9 @@ int FAST_FUNC bbunpack(char **argv,
141 return exitcode; 146 return exitcode;
142} 147}
143 148
144#if ENABLE_BUNZIP2 || ENABLE_UNLZMA || ENABLE_UNCOMPRESS 149#if ENABLE_UNCOMPRESS || ENABLE_BUNZIP2 || ENABLE_UNLZMA || ENABLE_UNXZ
145
146static 150static
147char* make_new_name_generic(char *filename, const char *expected_ext) 151char* FAST_FUNC make_new_name_generic(char *filename, const char *expected_ext)
148{ 152{
149 char *extension = strrchr(filename, '.'); 153 char *extension = strrchr(filename, '.');
150 if (!extension || strcmp(extension + 1, expected_ext) != 0) { 154 if (!extension || strcmp(extension + 1, expected_ext) != 0) {
@@ -155,42 +159,35 @@ char* make_new_name_generic(char *filename, const char *expected_ext)
155 *extension = '\0'; 159 *extension = '\0';
156 return filename; 160 return filename;
157} 161}
158
159#endif 162#endif
160 163
161 164
162/* 165/*
163 * Modified for busybox by Glenn McGrath 166 * Uncompress applet for busybox (c) 2002 Glenn McGrath
164 * Added support output to stdout by Thomas Lundquist <thomasez@zelow.no>
165 * 167 *
166 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 168 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
167 */ 169 */
168 170#if ENABLE_UNCOMPRESS
169#if ENABLE_BUNZIP2
170
171static 171static
172char* make_new_name_bunzip2(char *filename) 172IF_DESKTOP(long long) int FAST_FUNC unpack_uncompress(unpack_info_t *info UNUSED_PARAM)
173{ 173{
174 return make_new_name_generic(filename, "bz2"); 174 IF_DESKTOP(long long) int status = -1;
175}
176 175
177static 176 if ((xread_char(STDIN_FILENO) != 0x1f) || (xread_char(STDIN_FILENO) != 0x9d)) {
178IF_DESKTOP(long long) int unpack_bunzip2(unpack_info_t *info UNUSED_PARAM) 177 bb_error_msg("invalid magic");
179{ 178 } else {
180 return unpack_bz2_stream_prime(STDIN_FILENO, STDOUT_FILENO); 179 status = unpack_Z_stream(STDIN_FILENO, STDOUT_FILENO);
180 }
181 return status;
181} 182}
182 183int uncompress_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
183int bunzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 184int uncompress_main(int argc UNUSED_PARAM, char **argv)
184int bunzip2_main(int argc UNUSED_PARAM, char **argv)
185{ 185{
186 getopt32(argv, "cfvdt"); 186 getopt32(argv, "cf");
187 argv += optind; 187 argv += optind;
188 if (applet_name[2] == 'c')
189 option_mask32 |= OPT_STDOUT;
190 188
191 return bbunpack(argv, make_new_name_bunzip2, unpack_bunzip2); 189 return bbunpack(argv, unpack_uncompress, make_new_name_generic, "Z");
192} 190}
193
194#endif 191#endif
195 192
196 193
@@ -221,11 +218,9 @@ int bunzip2_main(int argc UNUSED_PARAM, char **argv)
221 * See the license_msg below and the file COPYING for the software license. 218 * See the license_msg below and the file COPYING for the software license.
222 * See the file algorithm.doc for the compression algorithms and file formats. 219 * See the file algorithm.doc for the compression algorithms and file formats.
223 */ 220 */
224
225#if ENABLE_GUNZIP 221#if ENABLE_GUNZIP
226
227static 222static
228char* make_new_name_gunzip(char *filename) 223char* FAST_FUNC make_new_name_gunzip(char *filename, const char *expected_ext UNUSED_PARAM)
229{ 224{
230 char *extension = strrchr(filename, '.'); 225 char *extension = strrchr(filename, '.');
231 226
@@ -249,9 +244,8 @@ char* make_new_name_gunzip(char *filename)
249 } 244 }
250 return filename; 245 return filename;
251} 246}
252
253static 247static
254IF_DESKTOP(long long) int unpack_gunzip(unpack_info_t *info) 248IF_DESKTOP(long long) int FAST_FUNC unpack_gunzip(unpack_info_t *info)
255{ 249{
256 IF_DESKTOP(long long) int status = -1; 250 IF_DESKTOP(long long) int status = -1;
257 251
@@ -277,7 +271,6 @@ IF_DESKTOP(long long) int unpack_gunzip(unpack_info_t *info)
277 } 271 }
278 return status; 272 return status;
279} 273}
280
281/* 274/*
282 * Linux kernel build uses gzip -d -n. We accept and ignore it. 275 * Linux kernel build uses gzip -d -n. We accept and ignore it.
283 * Man page says: 276 * Man page says:
@@ -291,7 +284,6 @@ IF_DESKTOP(long long) int unpack_gunzip(unpack_info_t *info)
291 * gzip: always save the original file name and time stamp (this is the default) 284 * gzip: always save the original file name and time stamp (this is the default)
292 * gunzip: restore the original file name and time stamp if present. 285 * gunzip: restore the original file name and time stamp if present.
293 */ 286 */
294
295int gunzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 287int gunzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
296int gunzip_main(int argc UNUSED_PARAM, char **argv) 288int gunzip_main(int argc UNUSED_PARAM, char **argv)
297{ 289{
@@ -301,9 +293,33 @@ int gunzip_main(int argc UNUSED_PARAM, char **argv)
301 if (applet_name[1] == 'c') 293 if (applet_name[1] == 'c')
302 option_mask32 |= OPT_STDOUT; 294 option_mask32 |= OPT_STDOUT;
303 295
304 return bbunpack(argv, make_new_name_gunzip, unpack_gunzip); 296 return bbunpack(argv, unpack_gunzip, make_new_name_gunzip, /*unused:*/ NULL);
305} 297}
298#endif
306 299
300
301/*
302 * Modified for busybox by Glenn McGrath
303 * Added support output to stdout by Thomas Lundquist <thomasez@zelow.no>
304 *
305 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
306 */
307#if ENABLE_BUNZIP2
308static
309IF_DESKTOP(long long) int FAST_FUNC unpack_bunzip2(unpack_info_t *info UNUSED_PARAM)
310{
311 return unpack_bz2_stream_prime(STDIN_FILENO, STDOUT_FILENO);
312}
313int bunzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
314int bunzip2_main(int argc UNUSED_PARAM, char **argv)
315{
316 getopt32(argv, "cfvdt");
317 argv += optind;
318 if (applet_name[2] == 'c') /* bzcat */
319 option_mask32 |= OPT_STDOUT;
320
321 return bbunpack(argv, unpack_bunzip2, make_new_name_generic, "bz2");
322}
307#endif 323#endif
308 324
309 325
@@ -315,70 +331,51 @@ int gunzip_main(int argc UNUSED_PARAM, char **argv)
315 * 331 *
316 * Licensed under GPL v2, see file LICENSE in this tarball for details. 332 * Licensed under GPL v2, see file LICENSE in this tarball for details.
317 */ 333 */
318
319#if ENABLE_UNLZMA 334#if ENABLE_UNLZMA
320
321static
322char* make_new_name_unlzma(char *filename)
323{
324 return make_new_name_generic(filename, "lzma");
325}
326
327static 335static
328IF_DESKTOP(long long) int unpack_unlzma(unpack_info_t *info UNUSED_PARAM) 336IF_DESKTOP(long long) int FAST_FUNC unpack_unlzma(unpack_info_t *info UNUSED_PARAM)
329{ 337{
330 return unpack_lzma_stream(STDIN_FILENO, STDOUT_FILENO); 338 return unpack_lzma_stream(STDIN_FILENO, STDOUT_FILENO);
331} 339}
332
333int unlzma_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 340int unlzma_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
334int unlzma_main(int argc UNUSED_PARAM, char **argv) 341int unlzma_main(int argc UNUSED_PARAM, char **argv)
335{ 342{
336 getopt32(argv, "cf"); 343 IF_LZMA(int opts =) getopt32(argv, "cfvdt");
337 argv += optind; 344# if ENABLE_LZMA
338 /* lzmacat? */ 345 /* lzma without -d or -t? */
339 if (applet_name[4] == 'c') 346 if (applet_name[2] == 'm' && !(opts & (OPT_DECOMPRESS|OPT_TEST)))
347 bb_show_usage();
348# endif
349 /* lzcat? */
350 if (applet_name[2] == 'c')
340 option_mask32 |= OPT_STDOUT; 351 option_mask32 |= OPT_STDOUT;
341 352
342 return bbunpack(argv, make_new_name_unlzma, unpack_unlzma); 353 argv += optind;
354 return bbunpack(argv, unpack_unlzma, make_new_name_generic, "lzma");
343} 355}
344
345#endif 356#endif
346 357
347 358
348/* 359#if ENABLE_UNXZ
349 * Uncompress applet for busybox (c) 2002 Glenn McGrath
350 *
351 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
352 */
353
354#if ENABLE_UNCOMPRESS
355
356static 360static
357char* make_new_name_uncompress(char *filename) 361IF_DESKTOP(long long) int FAST_FUNC unpack_unxz(unpack_info_t *info UNUSED_PARAM)
358{ 362{
359 return make_new_name_generic(filename, "Z"); 363 return unpack_xz_stream(STDIN_FILENO, STDOUT_FILENO);
360} 364}
361 365int unxz_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
362static 366int unxz_main(int argc UNUSED_PARAM, char **argv)
363IF_DESKTOP(long long) int unpack_uncompress(unpack_info_t *info UNUSED_PARAM)
364{ 367{
365 IF_DESKTOP(long long) int status = -1; 368 int opts = getopt32(argv, "cfvdt");
366 369# if ENABLE_XZ
367 if ((xread_char(STDIN_FILENO) != 0x1f) || (xread_char(STDIN_FILENO) != 0x9d)) { 370 /* xz without -d or -t? */
368 bb_error_msg("invalid magic"); 371 if (applet_name[2] == '\0' && !(opts & (OPT_DECOMPRESS|OPT_TEST)))
369 } else { 372 bb_show_usage();
370 status = unpack_Z_stream(STDIN_FILENO, STDOUT_FILENO); 373# endif
371 } 374 /* xzcat? */
372 return status; 375 if (applet_name[2] == 'c')
373} 376 option_mask32 |= OPT_STDOUT;
374 377
375int uncompress_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
376int uncompress_main(int argc UNUSED_PARAM, char **argv)
377{
378 getopt32(argv, "cf");
379 argv += optind; 378 argv += optind;
380 379 return bbunpack(argv, unpack_unxz, make_new_name_generic, "xz");
381 return bbunpack(argv, make_new_name_uncompress, unpack_uncompress);
382} 380}
383
384#endif 381#endif
diff --git a/archival/bzip2.c b/archival/bzip2.c
index bbaf56669..f1c84d681 100644
--- a/archival/bzip2.c
+++ b/archival/bzip2.c
@@ -102,7 +102,7 @@ IF_DESKTOP(long long) int bz_write(bz_stream *strm, void* rbuf, ssize_t rlen, vo
102} 102}
103 103
104static 104static
105IF_DESKTOP(long long) int compressStream(unpack_info_t *info UNUSED_PARAM) 105IF_DESKTOP(long long) int FAST_FUNC compressStream(unpack_info_t *info UNUSED_PARAM)
106{ 106{
107 IF_DESKTOP(long long) int total; 107 IF_DESKTOP(long long) int total;
108 ssize_t count; 108 ssize_t count;
@@ -135,12 +135,6 @@ IF_DESKTOP(long long) int compressStream(unpack_info_t *info UNUSED_PARAM)
135 return total; 135 return total;
136} 136}
137 137
138static
139char* make_new_name_bzip2(char *filename)
140{
141 return xasprintf("%s.bz2", filename);
142}
143
144int bzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 138int bzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
145int bzip2_main(int argc UNUSED_PARAM, char **argv) 139int bzip2_main(int argc UNUSED_PARAM, char **argv)
146{ 140{
@@ -181,5 +175,5 @@ int bzip2_main(int argc UNUSED_PARAM, char **argv)
181 175
182 argv += optind; 176 argv += optind;
183 option_mask32 &= 0x7; /* ignore all except -cfv */ 177 option_mask32 &= 0x7; /* ignore all except -cfv */
184 return bbunpack(argv, make_new_name_bzip2, compressStream); 178 return bbunpack(argv, compressStream, append_ext, "bz2");
185} 179}
diff --git a/archival/gzip.c b/archival/gzip.c
index 958336303..9e915519d 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -1998,13 +1998,7 @@ static void zip(ulg time_stamp)
1998 1998
1999/* ======================================================================== */ 1999/* ======================================================================== */
2000static 2000static
2001char* make_new_name_gzip(char *filename) 2001IF_DESKTOP(long long) int FAST_FUNC pack_gzip(unpack_info_t *info UNUSED_PARAM)
2002{
2003 return xasprintf("%s.gz", filename);
2004}
2005
2006static
2007IF_DESKTOP(long long) int pack_gzip(unpack_info_t *info UNUSED_PARAM)
2008{ 2002{
2009 struct stat s; 2003 struct stat s;
2010 2004
@@ -2063,7 +2057,7 @@ static const char gzip_longopts[] ALIGN1 =
2063#endif 2057#endif
2064 2058
2065/* 2059/*
2066 * Linux kernel build uses gzip -d -n. We accept and ignore it. 2060 * Linux kernel build uses gzip -d -n. We accept and ignore -n.
2067 * Man page says: 2061 * Man page says:
2068 * -n --no-name 2062 * -n --no-name
2069 * gzip: do not save the original file name and time stamp. 2063 * gzip: do not save the original file name and time stamp.
@@ -2113,5 +2107,5 @@ int gzip_main(int argc UNUSED_PARAM, char **argv)
2113 /* Initialise the CRC32 table */ 2107 /* Initialise the CRC32 table */
2114 G1.crc_32_tab = crc32_filltable(NULL, 0); 2108 G1.crc_32_tab = crc32_filltable(NULL, 0);
2115 2109
2116 return bbunpack(argv, make_new_name_gzip, pack_gzip); 2110 return bbunpack(argv, pack_gzip, append_ext, "gz");
2117} 2111}
diff --git a/archival/libunarchive/Kbuild b/archival/libunarchive/Kbuild
index 11d23b25f..ed8e85793 100644
--- a/archival/libunarchive/Kbuild
+++ b/archival/libunarchive/Kbuild
@@ -49,6 +49,7 @@ lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompr
49lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o 49lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o
50lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o 50lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o
51lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o 51lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o
52lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o
52lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o 53lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o
53 54
54ifneq ($(lib-y),) 55ifneq ($(lib-y),)
diff --git a/archival/libunarchive/decompress_uncompress.c b/archival/libunarchive/decompress_uncompress.c
index 2877c8981..1ff89ce3c 100644
--- a/archival/libunarchive/decompress_uncompress.c
+++ b/archival/libunarchive/decompress_uncompress.c
@@ -229,7 +229,7 @@ unpack_Z_stream(int fd_in, int fd_out)
229 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", 229 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
230 insize, posbits, p[-1], p[0], p[1], p[2], p[3], 230 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
231 (posbits & 07)); 231 (posbits & 07));
232 bb_error_msg("uncompress: corrupt input"); 232 bb_error_msg("corrupted data");
233 goto err; 233 goto err;
234 } 234 }
235 235
diff --git a/archival/libunarchive/decompress_unxz.c b/archival/libunarchive/decompress_unxz.c
new file mode 100644
index 000000000..3f9392984
--- /dev/null
+++ b/archival/libunarchive/decompress_unxz.c
@@ -0,0 +1,122 @@
1/*
2 * This file uses XZ Embedded library code which is written
3 * by Lasse Collin <lasse.collin@tukaani.org>
4 * and Igor Pavlov <http://7-zip.org/>
5 *
6 * See README file in unxz/ directory for more information.
7 *
8 * This file is:
9 * Copyright (C) 2010 Denys Vlasenko <vda.linux@googlemail.com>
10 * Licensed under GPLv2, see file LICENSE in this tarball for details.
11 */
12#include "libbb.h"
13#include "unarchive.h"
14
15#define XZ_REALLOC_DICT_BUF(ptr, size) xrealloc(ptr, size)
16#define XZ_FUNC FAST_FUNC
17#define XZ_EXTERN static
18
19/* Skip check (rather than fail) of unsupported hash functions */
20#define XZ_DEC_ANY_CHECK 1
21
22/* We use our own crc32 function */
23#define XZ_INTERNAL_CRC32 0
24static uint32_t *crc32_table;
25static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
26{
27 crc = ~crc;
28
29 while (size != 0) {
30 crc = crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
31 --size;
32 }
33
34 return ~crc;
35}
36
37/* We use arch-optimized unaligned accessors */
38#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); })
39#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); })
40#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val))
41#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val))
42
43#include "unxz/xz.h"
44#include "unxz/xz_config.h"
45
46#include "unxz/xz_dec_bcj.c"
47#include "unxz/xz_dec_lzma2.c"
48#include "unxz/xz_dec_stream.c"
49#include "unxz/xz_lzma2.h"
50#include "unxz/xz_private.h"
51#include "unxz/xz_stream.h"
52
53IF_DESKTOP(long long) int FAST_FUNC
54unpack_xz_stream(int src_fd, int dst_fd)
55{
56 struct xz_buf iobuf;
57 struct xz_dec *state;
58 unsigned char *membuf;
59 IF_DESKTOP(long long) int total = 0;
60 enum {
61 IN_SIZE = 4 * 1024,
62 OUT_SIZE = 60 * 1024,
63 };
64
65 if (!crc32_table)
66 crc32_table = crc32_filltable(NULL, /*endian:*/ 0);
67
68 membuf = xmalloc(IN_SIZE + OUT_SIZE);
69 memset(&iobuf, 0, sizeof(iobuf));
70 iobuf.in = membuf;
71 iobuf.out = membuf + IN_SIZE;
72 iobuf.out_size = OUT_SIZE;
73
74 state = xz_dec_init(64*1024); /* initial dict of 64k */
75
76 while (1) {
77 enum xz_ret r;
78 int insz, rd, outpos;
79
80 iobuf.in_size -= iobuf.in_pos;
81 insz = iobuf.in_size;
82 if (insz)
83 memmove(membuf, membuf + iobuf.in_pos, insz);
84 iobuf.in_pos = 0;
85 rd = IN_SIZE - insz;
86 if (rd) {
87 rd = safe_read(src_fd, membuf + insz, rd);
88 if (rd < 0) {
89 bb_error_msg("read error");
90 total = -1;
91 break;
92 }
93 iobuf.in_size = insz + rd;
94 }
95// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d",
96// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size);
97 r = xz_dec_run(state, &iobuf);
98// bb_error_msg("<in pos:%d size:%d out pos:%d size:%d r:%d",
99// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size, r);
100 outpos = iobuf.out_pos;
101 if (outpos) {
102 xwrite(dst_fd, iobuf.out, outpos);
103 IF_DESKTOP(total += outpos;)
104 }
105 if (r == XZ_STREAM_END
106 /* this happens even with well-formed files: */
107 || (r == XZ_BUF_ERROR && insz == 0 && outpos == 0)
108 ) {
109 break;
110 }
111 if (r != XZ_OK && r != XZ_UNSUPPORTED_CHECK) {
112 bb_error_msg("corrupted data");
113 total = -1;
114 break;
115 }
116 iobuf.out_pos = 0;
117 }
118 xz_dec_end(state);
119 free(membuf);
120
121 return total;
122}
diff --git a/archival/libunarchive/unxz/README b/archival/libunarchive/unxz/README
new file mode 100644
index 000000000..f79b0a404
--- /dev/null
+++ b/archival/libunarchive/unxz/README
@@ -0,0 +1,136 @@
1
2XZ Embedded
3===========
4
5 XZ Embedded is a relatively small, limited implementation of the .xz
6 file format. Currently only decoding is implemented.
7
8 XZ Embedded was written for use in the Linux kernel, but the code can
9 be easily used in other environments too, including regular userspace
10 applications.
11
12 This README contains information that is useful only when the copy
13 of XZ Embedded isn't part of the Linux kernel tree. You should also
14 read linux/Documentation/xz.txt even if you aren't using XZ Embedded
15 as part of Linux; information in that file is not repeated in this
16 README.
17
18Compiling the Linux kernel module
19
20 The xz_dec module depends on crc32 module, so make sure that you have
21 it enabled (CONFIG_CRC32).
22
23 Building the xz_dec and xz_dec_test modules without support for BCJ
24 filters:
25
26 cd linux/lib/xz
27 make -C /path/to/kernel/source \
28 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
29 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m
30
31 Building the xz_dec and xz_dec_test modules with support for BCJ
32 filters:
33
34 cd linux/lib/xz
35 make -C /path/to/kernel/source \
36 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
37 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \
38 CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
39 CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
40 CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
41
42 If you want only one or a few of the BCJ filters, omit the appropriate
43 variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
44 code shared between all BCJ filters.
45
46 Most people don't need the xz_dec_test module. You can skip building
47 it by omitting CONFIG_XZ_DEC_TEST=m from the make command line.
48
49Compiler requirements
50
51 XZ Embedded should compile as either GNU-C89 (used in the Linux
52 kernel) or with any C99 compiler. Getting the code to compile with
53 non-GNU C89 compiler or a C++ compiler should be quite easy as
54 long as there is a data type for unsigned 64-bit integer (or the
55 code is modified not to support large files, which needs some more
56 care than just using 32-bit integer instead of 64-bit).
57
58 If you use GCC, try to use a recent version. For example, on x86,
59 xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when
60 compiled with GCC 4.3.3.
61
62Embedding into userspace applications
63
64 To embed the XZ decoder, copy the following files into a single
65 directory in your source code tree:
66
67 linux/include/linux/xz.h
68 linux/lib/xz/xz_crc32.c
69 linux/lib/xz/xz_dec_lzma2.c
70 linux/lib/xz/xz_dec_stream.c
71 linux/lib/xz/xz_lzma2.h
72 linux/lib/xz/xz_private.h
73 linux/lib/xz/xz_stream.h
74 userspace/xz_config.h
75
76 Alternatively, xz.h may be placed into a different directory but then
77 that directory must be in the compiler include path when compiling
78 the .c files.
79
80 Your code should use only the functions declared in xz.h. The rest of
81 the .h files are meant only for internal use in XZ Embedded.
82
83 You may want to modify xz_config.h to be more suitable for your build
84 environment. Probably you should at least skim through it even if the
85 default file works as is.
86
87BCJ filter support
88
89 If you want support for one or more BCJ filters, you need to copy also
90 linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
91 #defines in xz_config.h or in compiler flags. You don't need these
92 #defines in the code that just uses XZ Embedded via xz.h, but having
93 them always #defined doesn't hurt either.
94
95 #define Instruction set BCJ filter endianness
96 XZ_DEC_X86 x86 or x86-64 Little endian only
97 XZ_DEC_POWERPC PowerPC Big endian only
98 XZ_DEC_IA64 Itanium (IA-64) Big or little endian
99 XZ_DEC_ARM ARM Little endian only
100 XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
101 XZ_DEC_SPARC SPARC Big or little endian
102
103 While some architectures are (partially) bi-endian, the endianness
104 setting doesn't change the endianness of the instructions on all
105 architectures. That's why Itanium and SPARC filters work for both big
106 and little endian executables (Itanium has little endian instructions
107 and SPARC has big endian instructions).
108
109 There currently is no filter for little endian PowerPC or big endian
110 ARM or ARM-Thumb. Implementing filters for them can be considered if
111 there is a need for such filters in real-world applications.
112
113Notes about shared libraries
114
115 If you are including XZ Embedded into a shared library, you very
116 probably should rename the xz_* functions to prevent symbol
117 conflicts in case your library is linked against some other library
118 or application that also has XZ Embedded in it (which may even be
119 a different version of XZ Embedded). TODO: Provide an easy way
120 to do this.
121
122 Please don't create a shared library of XZ Embedded itself unless
123 it is fine to rebuild everything depending on that shared library
124 everytime you upgrade to a newer version of XZ Embedded. There are
125 no API or ABI stability guarantees between different versions of
126 XZ Embedded.
127
128Specifying the calling convention
129
130 XZ_FUNC macro was included to support declaring functions with __init
131 in Linux. Outside Linux, it can be used to specify the calling
132 convention on systems that support multiple calling conventions.
133 For example, on Windows, you may make all functions use the stdcall
134 calling convention by defining XZ_FUNC=__stdcall when building and
135 using the functions from XZ Embedded.
136
diff --git a/archival/libunarchive/unxz/xz.h b/archival/libunarchive/unxz/xz.h
new file mode 100644
index 000000000..eb82706b9
--- /dev/null
+++ b/archival/libunarchive/unxz/xz.h
@@ -0,0 +1,222 @@
1/*
2 * XZ decompressor
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_H
12#define XZ_H
13
14#ifdef __KERNEL__
15# include <linux/stddef.h>
16# include <linux/types.h>
17#else
18# include <stddef.h>
19# include <stdint.h>
20#endif
21
22/* In Linux, this is used to make extern functions static when needed. */
23#ifndef XZ_EXTERN
24# define XZ_EXTERN extern
25#endif
26
27/* In Linux, this is used to mark the functions with __init when needed. */
28#ifndef XZ_FUNC
29# define XZ_FUNC
30#endif
31
32/**
33 * enum xz_ret - Return codes
34 * @XZ_OK: Everything is OK so far. More input or more
35 * output space is required to continue.
36 * @XZ_STREAM_END: Operation finished successfully.
37 * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
38 * is still possible in multi-call mode by simply
39 * calling xz_dec_run() again.
40 * NOTE: This return value is used only if
41 * XZ_DEC_ANY_CHECK was defined at build time,
42 * which is not used in the kernel. Unsupported
43 * check types return XZ_OPTIONS_ERROR if
44 * XZ_DEC_ANY_CHECK was not defined at build time.
45 * @XZ_MEMLIMIT_ERROR: Not enough memory was preallocated at decoder
46 * initialization time.
47 * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
48 * bytes).
49 * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
50 * compression options. In the decoder this means
51 * that the header CRC32 matches, but the header
52 * itself specifies something that we don't support.
53 * @XZ_DATA_ERROR: Compressed data is corrupt.
54 * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
55 * different between multi-call and single-call
56 * mode; more information below.
57 *
58 * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
59 * to XZ code cannot consume any input and cannot produce any new output.
60 * This happens when there is no new input available, or the output buffer
61 * is full while at least one output byte is still pending. Assuming your
62 * code is not buggy, you can get this error only when decoding a compressed
63 * stream that is truncated or otherwise corrupt.
64 *
65 * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
66 * is too small, or the compressed input is corrupt in a way that makes the
67 * decoder produce more output than the caller expected. When it is
68 * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
69 * is used instead of XZ_BUF_ERROR.
70 */
71enum xz_ret {
72 XZ_OK,
73 XZ_STREAM_END,
74 XZ_UNSUPPORTED_CHECK,
75 XZ_MEMLIMIT_ERROR,
76 XZ_FORMAT_ERROR,
77 XZ_OPTIONS_ERROR,
78 XZ_DATA_ERROR,
79 XZ_BUF_ERROR
80};
81
82/**
83 * struct xz_buf - Passing input and output buffers to XZ code
84 * @in: Beginning of the input buffer. This may be NULL if and only
85 * if in_pos is equal to in_size.
86 * @in_pos: Current position in the input buffer. This must not exceed
87 * in_size.
88 * @in_size: Size of the input buffer
89 * @out: Beginning of the output buffer. This may be NULL if and only
90 * if out_pos is equal to out_size.
91 * @out_pos: Current position in the output buffer. This must not exceed
92 * out_size.
93 * @out_size: Size of the output buffer
94 *
95 * Only the contents of the output buffer from out[out_pos] onward, and
96 * the variables in_pos and out_pos are modified by the XZ code.
97 */
98struct xz_buf {
99 const uint8_t *in;
100 size_t in_pos;
101 size_t in_size;
102
103 uint8_t *out;
104 size_t out_pos;
105 size_t out_size;
106};
107
108/**
109 * struct xz_dec - Opaque type to hold the XZ decoder state
110 */
111struct xz_dec;
112
113/**
114 * xz_dec_init() - Allocate and initialize a XZ decoder state
115 * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
116 * multi-call decoding, or special value of zero to indicate
117 * single-call decoding mode.
118 *
119 * If dict_max > 0, the decoder is initialized to work in multi-call mode.
120 * dict_max number of bytes of memory is preallocated for the LZMA2
121 * dictionary. This way there is no risk that xz_dec_run() could run out
122 * of memory, since xz_dec_run() will never allocate any memory. Instead,
123 * if the preallocated dictionary is too small for decoding the given input
124 * stream, xz_dec_run() will return XZ_MEMLIMIT_ERROR. Thus, it is important
125 * to know what kind of data will be decoded to avoid allocating excessive
126 * amount of memory for the dictionary.
127 *
128 * LZMA2 dictionary is always 2^n bytes or 2^n + 2^(n-1) bytes (the latter
129 * sizes are less common in practice). In the kernel, dictionary sizes of
130 * 64 KiB, 128 KiB, 256 KiB, 512 KiB, and 1 MiB are probably the only
131 * reasonable values.
132 *
133 * If dict_max == 0, the decoder is initialized to work in single-call mode.
134 * In single-call mode, xz_dec_run() decodes the whole stream at once. The
135 * caller must provide enough output space or the decoding will fail. The
136 * output space is used as the dictionary buffer, which is why there is
137 * no need to allocate the dictionary as part of the decoder's internal
138 * state.
139 *
140 * Because the output buffer is used as the workspace, streams encoded using
141 * a big dictionary are not a problem in single-call. It is enough that the
142 * output buffer is big enough to hold the actual uncompressed data; it
143 * can be smaller than the dictionary size stored in the stream headers.
144 *
145 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
146 * ready to be used with xz_dec_run(). On error, xz_dec_init() returns NULL.
147 */
148XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max);
149
150/**
151 * xz_dec_run() - Run the XZ decoder
152 * @s: Decoder state allocated using xz_dec_init()
153 * @b: Input and output buffers
154 *
155 * In multi-call mode, this function may return any of the values listed in
156 * enum xz_ret.
157 *
158 * In single-call mode, this function never returns XZ_OK. If an error occurs
159 * in single-call mode (return value is not XZ_STREAM_END), b->in_pos and
160 * b->out_pos are not modified, and the contents of the output buffer from
161 * b->out[b->out_pos] onward are undefined.
162 *
163 * NOTE: In single-call mode, the contents of the output buffer are undefined
164 * also after XZ_BUF_ERROR. This is because with some filter chains, there
165 * may be a second pass over the output buffer, and this pass cannot be
166 * properly done if the output buffer is truncated. Thus, you cannot give
167 * the single-call decoder a too small buffer and then expect to get that
168 * amount valid data from the beginning of the stream. You must use the
169 * multi-call decoder if you don't want to uncompress the whole stream.
170 */
171XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b);
172
173/**
174 * xz_dec_reset() - Reset an already allocated decoder state
175 * @s: Decoder state allocated using xz_dec_init()
176 *
177 * This function can be used to reset the multi-call decoder state without
178 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
179 *
180 * In single-call mode, xz_dec_reset() is always called in the beginning of
181 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
182 * multi-call mode.
183 */
184XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s);
185
186/**
187 * xz_dec_end() - Free the memory allocated for the decoder state
188 * @s: Decoder state allocated using xz_dec_init(). If s is NULL,
189 * this function does nothing.
190 */
191XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
192
193/*
194 * Standalone build (userspace build or in-kernel build for boot time use)
195 * needs a CRC32 implementation. For normal in-kernel use, kernel's own
196 * CRC32 module is used instead, and users of this module don't need to
197 * care about the functions below.
198 */
199#ifndef XZ_INTERNAL_CRC32
200# ifdef __KERNEL__
201# define XZ_INTERNAL_CRC32 0
202# else
203# define XZ_INTERNAL_CRC32 1
204# endif
205#endif
206
207#if XZ_INTERNAL_CRC32
208/*
209 * This must be called before any other xz_* function to initialize
210 * the CRC32 lookup table.
211 */
212XZ_EXTERN void XZ_FUNC xz_crc32_init(void);
213
214/*
215 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
216 * calculation, the third argument must be zero. To continue the calculation,
217 * the previously returned value is passed as the third argument.
218 */
219XZ_EXTERN uint32_t XZ_FUNC xz_crc32(
220 const uint8_t *buf, size_t size, uint32_t crc);
221#endif
222#endif
diff --git a/archival/libunarchive/unxz/xz_config.h b/archival/libunarchive/unxz/xz_config.h
new file mode 100644
index 000000000..ff90eff26
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_config.h
@@ -0,0 +1,121 @@
1/*
2 * Private includes and definitions for userspace use of XZ Embedded
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_CONFIG_H
11#define XZ_CONFIG_H
12
13/* Uncomment as needed to enable BCJ filter decoders. */
14/* #define XZ_DEC_X86 */
15/* #define XZ_DEC_POWERPC */
16/* #define XZ_DEC_IA64 */
17/* #define XZ_DEC_ARM */
18/* #define XZ_DEC_ARMTHUMB */
19/* #define XZ_DEC_SPARC */
20
21#include <stdbool.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "xz.h"
26
27#define kmalloc(size, flags) malloc(size)
28#define kfree(ptr) free(ptr)
29#define vmalloc(size) malloc(size)
30#define vfree(ptr) free(ptr)
31
32#define memeq(a, b, size) (memcmp(a, b, size) == 0)
33#define memzero(buf, size) memset(buf, 0, size)
34
35#define min(x, y) ((x) < (y) ? (x) : (y))
36#define min_t(type, x, y) min(x, y)
37
38/*
39 * Some functions have been marked with __always_inline to keep the
40 * performance reasonable even when the compiler is optimizing for
41 * small code size. You may be able to save a few bytes by #defining
42 * __always_inline to plain inline, but don't complain if the code
43 * becomes slow.
44 *
45 * NOTE: System headers on GNU/Linux may #define this macro already,
46 * so if you want to change it, you need to #undef it first.
47 */
48#ifndef __always_inline
49# ifdef __GNUC__
50# define __always_inline \
51 inline __attribute__((__always_inline__))
52# else
53# define __always_inline inline
54# endif
55#endif
56
57/*
58 * Some functions are marked to never be inlined to reduce stack usage.
59 * If you don't care about stack usage, you may want to modify this so
60 * that noinline_for_stack is #defined to be empty even when using GCC.
61 * Doing so may save a few bytes in binary size.
62 */
63#ifndef noinline_for_stack
64# ifdef __GNUC__
65# define noinline_for_stack __attribute__((__noinline__))
66# else
67# define noinline_for_stack
68# endif
69#endif
70
71/* Inline functions to access unaligned unsigned 32-bit integers */
72#ifndef get_unaligned_le32
73static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf)
74{
75 return (uint32_t)buf[0]
76 | ((uint32_t)buf[1] << 8)
77 | ((uint32_t)buf[2] << 16)
78 | ((uint32_t)buf[3] << 24);
79}
80#endif
81
82#ifndef get_unaligned_be32
83static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf)
84{
85 return (uint32_t)(buf[0] << 24)
86 | ((uint32_t)buf[1] << 16)
87 | ((uint32_t)buf[2] << 8)
88 | (uint32_t)buf[3];
89}
90#endif
91
92#ifndef put_unaligned_le32
93static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf)
94{
95 buf[0] = (uint8_t)val;
96 buf[1] = (uint8_t)(val >> 8);
97 buf[2] = (uint8_t)(val >> 16);
98 buf[3] = (uint8_t)(val >> 24);
99}
100#endif
101
102#ifndef put_unaligned_be32
103static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
104{
105 buf[0] = (uint8_t)(val >> 24);
106 buf[1] = (uint8_t)(val >> 16);
107 buf[2] = (uint8_t)(val >> 8);
108 buf[3] = (uint8_t)val;
109}
110#endif
111
112/*
113 * Use get_unaligned_le32() also for aligned access for simplicity. On
114 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
115 * could save a few bytes in code size.
116 */
117#ifndef get_le32
118# define get_le32 get_unaligned_le32
119#endif
120
121#endif
diff --git a/archival/libunarchive/unxz/xz_dec_bcj.c b/archival/libunarchive/unxz/xz_dec_bcj.c
new file mode 100644
index 000000000..09162b51f
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_bcj.c
@@ -0,0 +1,564 @@
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13/*
14 * The rest of the file is inside this ifdef. It makes things a little more
15 * convenient when building without support for any BCJ filters.
16 */
17#ifdef XZ_DEC_BCJ
18
19struct xz_dec_bcj {
20 /* Type of the BCJ filter being used */
21 enum {
22 BCJ_X86 = 4, /* x86 or x86-64 */
23 BCJ_POWERPC = 5, /* Big endian only */
24 BCJ_IA64 = 6, /* Big or little endian */
25 BCJ_ARM = 7, /* Little endian only */
26 BCJ_ARMTHUMB = 8, /* Little endian only */
27 BCJ_SPARC = 9 /* Big or little endian */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76};
77
78#ifdef XZ_DEC_X86
79/*
80 * This is macro used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
83#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
84
85static noinline_for_stack size_t XZ_FUNC bcj_x86(
86 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
87{
88 static const bool mask_to_allowed_status[8]
89 = { true, true, true, false, true, false, false, false };
90
91 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
92
93 size_t i;
94 size_t prev_pos = (size_t)-1;
95 uint32_t prev_mask = s->x86_prev_mask;
96 uint32_t src;
97 uint32_t dest;
98 uint32_t j;
99 uint8_t b;
100
101 if (size <= 4)
102 return 0;
103
104 size -= 4;
105 for (i = 0; i < size; ++i) {
106 if ((buf[i] & 0xFE) != 0xE8)
107 continue;
108
109 prev_pos = i - prev_pos;
110 if (prev_pos > 3) {
111 prev_mask = 0;
112 } else {
113 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
114 if (prev_mask != 0) {
115 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
116 if (!mask_to_allowed_status[prev_mask]
117 || bcj_x86_test_msbyte(b)) {
118 prev_pos = i;
119 prev_mask = (prev_mask << 1) | 1;
120 continue;
121 }
122 }
123 }
124
125 prev_pos = i;
126
127 if (bcj_x86_test_msbyte(buf[i + 4])) {
128 src = get_unaligned_le32(buf + i + 1);
129 while (true) {
130 dest = src - (s->pos + (uint32_t)i + 5);
131 if (prev_mask == 0)
132 break;
133
134 j = mask_to_bit_num[prev_mask] * 8;
135 b = (uint8_t)(dest >> (24 - j));
136 if (!bcj_x86_test_msbyte(b))
137 break;
138
139 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
140 }
141
142 dest &= 0x01FFFFFF;
143 dest |= (uint32_t)0 - (dest & 0x01000000);
144 put_unaligned_le32(dest, buf + i + 1);
145 i += 4;
146 } else {
147 prev_mask = (prev_mask << 1) | 1;
148 }
149 }
150
151 prev_pos = i - prev_pos;
152 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
153 return i;
154}
155#endif
156
157#ifdef XZ_DEC_POWERPC
158static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
159 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
160{
161 size_t i;
162 uint32_t instr;
163
164 for (i = 0; i + 4 <= size; i += 4) {
165 instr = get_unaligned_be32(buf + i);
166 if ((instr & 0xFC000003) == 0x48000001) {
167 instr &= 0x03FFFFFC;
168 instr -= s->pos + (uint32_t)i;
169 instr &= 0x03FFFFFC;
170 instr |= 0x48000001;
171 put_unaligned_be32(instr, buf + i);
172 }
173 }
174
175 return i;
176}
177#endif
178
179#ifdef XZ_DEC_IA64
180static noinline_for_stack size_t XZ_FUNC bcj_ia64(
181 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183 static const uint8_t branch_table[32] = {
184 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 4, 4, 6, 6, 0, 0, 7, 7,
187 4, 4, 0, 0, 4, 4, 0, 0
188 };
189
190 /*
191 * The local variables take a little bit stack space, but it's less
192 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193 * stack usage here without doing that for the LZMA2 decoder too.
194 */
195
196 /* Loop counters */
197 size_t i;
198 size_t j;
199
200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201 uint32_t slot;
202
203 /* Bitwise offset of the instruction indicated by slot */
204 uint32_t bit_pos;
205
206 /* bit_pos split into byte and bit parts */
207 uint32_t byte_pos;
208 uint32_t bit_res;
209
210 /* Address part of an instruction */
211 uint32_t addr;
212
213 /* Mask used to detect which instructions to convert */
214 uint32_t mask;
215
216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
217 uint64_t instr;
218
219 /* Instruction normalized with bit_res for easier manipulation */
220 uint64_t norm;
221
222 for (i = 0; i + 16 <= size; i += 16) {
223 mask = branch_table[buf[i] & 0x1F];
224 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225 if (((mask >> slot) & 1) == 0)
226 continue;
227
228 byte_pos = bit_pos >> 3;
229 bit_res = bit_pos & 7;
230 instr = 0;
231 for (j = 0; j < 6; ++j)
232 instr |= (uint64_t)(buf[i + j + byte_pos])
233 << (8 * j);
234
235 norm = instr >> bit_res;
236
237 if (((norm >> 37) & 0x0F) == 0x05
238 && ((norm >> 9) & 0x07) == 0) {
239 addr = (norm >> 13) & 0x0FFFFF;
240 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241 addr <<= 4;
242 addr -= s->pos + (uint32_t)i;
243 addr >>= 4;
244
245 norm &= ~((uint64_t)0x8FFFFF << 13);
246 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247 norm |= (uint64_t)(addr & 0x100000)
248 << (36 - 20);
249
250 instr &= (1 << bit_res) - 1;
251 instr |= norm << bit_res;
252
253 for (j = 0; j < 6; j++)
254 buf[i + j + byte_pos]
255 = (uint8_t)(instr >> (8 * j));
256 }
257 }
258 }
259
260 return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static noinline_for_stack size_t XZ_FUNC bcj_arm(
266 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
267{
268 size_t i;
269 uint32_t addr;
270
271 for (i = 0; i + 4 <= size; i += 4) {
272 if (buf[i + 3] == 0xEB) {
273 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
274 | ((uint32_t)buf[i + 2] << 16);
275 addr <<= 2;
276 addr -= s->pos + (uint32_t)i + 8;
277 addr >>= 2;
278 buf[i] = (uint8_t)addr;
279 buf[i + 1] = (uint8_t)(addr >> 8);
280 buf[i + 2] = (uint8_t)(addr >> 16);
281 }
282 }
283
284 return i;
285}
286#endif
287
288#ifdef XZ_DEC_ARMTHUMB
289static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
290 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
291{
292 size_t i;
293 uint32_t addr;
294
295 for (i = 0; i + 4 <= size; i += 2) {
296 if ((buf[i + 1] & 0xF8) == 0xF0
297 && (buf[i + 3] & 0xF8) == 0xF8) {
298 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
299 | ((uint32_t)buf[i] << 11)
300 | (((uint32_t)buf[i + 3] & 0x07) << 8)
301 | (uint32_t)buf[i + 2];
302 addr <<= 1;
303 addr -= s->pos + (uint32_t)i + 4;
304 addr >>= 1;
305 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
306 buf[i] = (uint8_t)(addr >> 11);
307 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
308 buf[i + 2] = (uint8_t)addr;
309 i += 2;
310 }
311 }
312
313 return i;
314}
315#endif
316
317#ifdef XZ_DEC_SPARC
318static noinline_for_stack size_t XZ_FUNC bcj_sparc(
319 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
320{
321 size_t i;
322 uint32_t instr;
323
324 for (i = 0; i + 4 <= size; i += 4) {
325 instr = get_unaligned_be32(buf + i);
326 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
327 instr <<= 2;
328 instr -= s->pos + (uint32_t)i;
329 instr >>= 2;
330 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
331 | 0x40000000 | (instr & 0x3FFFFF);
332 put_unaligned_be32(instr, buf + i);
333 }
334 }
335
336 return i;
337}
338#endif
339
340/*
341 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
342 * of data that got filtered.
343 *
344 * NOTE: This is implemented as a switch statement to avoid using function
345 * pointers, which could be problematic in the kernel boot code, which must
346 * avoid pointers to static data (at least on x86).
347 */
348static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
349 uint8_t *buf, size_t *pos, size_t size)
350{
351 size_t filtered;
352
353 buf += *pos;
354 size -= *pos;
355
356 switch (s->type) {
357#ifdef XZ_DEC_X86
358 case BCJ_X86:
359 filtered = bcj_x86(s, buf, size);
360 break;
361#endif
362#ifdef XZ_DEC_POWERPC
363 case BCJ_POWERPC:
364 filtered = bcj_powerpc(s, buf, size);
365 break;
366#endif
367#ifdef XZ_DEC_IA64
368 case BCJ_IA64:
369 filtered = bcj_ia64(s, buf, size);
370 break;
371#endif
372#ifdef XZ_DEC_ARM
373 case BCJ_ARM:
374 filtered = bcj_arm(s, buf, size);
375 break;
376#endif
377#ifdef XZ_DEC_ARMTHUMB
378 case BCJ_ARMTHUMB:
379 filtered = bcj_armthumb(s, buf, size);
380 break;
381#endif
382#ifdef XZ_DEC_SPARC
383 case BCJ_SPARC:
384 filtered = bcj_sparc(s, buf, size);
385 break;
386#endif
387 default:
388 /* Never reached but silence compiler warnings. */
389 filtered = 0;
390 break;
391 }
392
393 *pos += filtered;
394 s->pos += filtered;
395}
396
397/*
398 * Flush pending filtered data from temp to the output buffer.
399 * Move the remaining mixture of possibly filtered and unfiltered
400 * data to the beginning of temp.
401 */
402static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
403{
404 size_t copy_size;
405
406 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
407 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
408 b->out_pos += copy_size;
409
410 s->temp.filtered -= copy_size;
411 s->temp.size -= copy_size;
412 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
413}
414
415/*
416 * The BCJ filter functions are primitive in sense that they process the
417 * data in chunks of 1-16 bytes. To hide this issue, this function does
418 * some buffering.
419 */
420XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
421 struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
422{
423 size_t out_start;
424
425 /*
426 * Flush pending already filtered data to the output buffer. Return
427 * immediatelly if we couldn't flush everything, or if the next
428 * filter in the chain had already returned XZ_STREAM_END.
429 */
430 if (s->temp.filtered > 0) {
431 bcj_flush(s, b);
432 if (s->temp.filtered > 0)
433 return XZ_OK;
434
435 if (s->ret == XZ_STREAM_END)
436 return XZ_STREAM_END;
437 }
438
439 /*
440 * If we have more output space than what is currently pending in
441 * temp, copy the unfiltered data from temp to the output buffer
442 * and try to fill the output buffer by decoding more data from the
443 * next filter in the chain. Apply the BCJ filter on the new data
444 * in the output buffer. If everything cannot be filtered, copy it
445 * to temp and rewind the output buffer position accordingly.
446 */
447 if (s->temp.size < b->out_size - b->out_pos) {
448 out_start = b->out_pos;
449 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
450 b->out_pos += s->temp.size;
451
452 s->ret = xz_dec_lzma2_run(lzma2, b);
453 if (s->ret != XZ_STREAM_END
454 && (s->ret != XZ_OK || s->single_call))
455 return s->ret;
456
457 bcj_apply(s, b->out, &out_start, b->out_pos);
458
459 /*
460 * As an exception, if the next filter returned XZ_STREAM_END,
461 * we can do that too, since the last few bytes that remain
462 * unfiltered are meant to remain unfiltered.
463 */
464 if (s->ret == XZ_STREAM_END)
465 return XZ_STREAM_END;
466
467 s->temp.size = b->out_pos - out_start;
468 b->out_pos -= s->temp.size;
469 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
470 }
471
472 /*
473 * If we have unfiltered data in temp, try to fill by decoding more
474 * data from the next filter. Apply the BCJ filter on temp. Then we
475 * hopefully can fill the actual output buffer by copying filtered
476 * data from temp. A mix of filtered and unfiltered data may be left
477 * in temp; it will be taken care on the next call to this function.
478 */
479 if (s->temp.size > 0) {
480 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
481 s->out = b->out;
482 s->out_pos = b->out_pos;
483 s->out_size = b->out_size;
484 b->out = s->temp.buf;
485 b->out_pos = s->temp.size;
486 b->out_size = sizeof(s->temp.buf);
487
488 s->ret = xz_dec_lzma2_run(lzma2, b);
489
490 s->temp.size = b->out_pos;
491 b->out = s->out;
492 b->out_pos = s->out_pos;
493 b->out_size = s->out_size;
494
495 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
496 return s->ret;
497
498 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
499
500 /*
501 * If the next filter returned XZ_STREAM_END, we mark that
502 * everything is filtered, since the last unfiltered bytes
503 * of the stream are meant to be left as is.
504 */
505 if (s->ret == XZ_STREAM_END)
506 s->temp.filtered = s->temp.size;
507
508 bcj_flush(s, b);
509 if (s->temp.filtered > 0)
510 return XZ_OK;
511 }
512
513 return s->ret;
514}
515
516XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
517{
518 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
519 if (s != NULL)
520 s->single_call = single_call;
521
522 return s;
523}
524
525XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
526 struct xz_dec_bcj *s, uint8_t id)
527{
528 switch (id) {
529#ifdef XZ_DEC_X86
530 case BCJ_X86:
531#endif
532#ifdef XZ_DEC_POWERPC
533 case BCJ_POWERPC:
534#endif
535#ifdef XZ_DEC_IA64
536 case BCJ_IA64:
537#endif
538#ifdef XZ_DEC_ARM
539 case BCJ_ARM:
540#endif
541#ifdef XZ_DEC_ARMTHUMB
542 case BCJ_ARMTHUMB:
543#endif
544#ifdef XZ_DEC_SPARC
545 case BCJ_SPARC:
546#endif
547 break;
548
549 default:
550 /* Unsupported Filter ID */
551 return XZ_OPTIONS_ERROR;
552 }
553
554 s->type = id;
555 s->ret = XZ_OK;
556 s->pos = 0;
557 s->x86_prev_mask = 0;
558 s->temp.filtered = 0;
559 s->temp.size = 0;
560
561 return XZ_OK;
562}
563
564#endif
diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c
new file mode 100644
index 000000000..37de6fc32
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_lzma2.c
@@ -0,0 +1,1155 @@
1/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= allocated
38 *
39 * Most of these variables are size_t to support single-call mode,
40 * in which the dictionary variables address the actual output
41 * buffer directly.
42 */
43struct dictionary {
44 /* Beginning of the history buffer */
45 uint8_t *buf;
46
47 /* Old position in buf (before decoding more data) */
48 size_t start;
49
50 /* Position in buf */
51 size_t pos;
52
53 /*
54 * How full dictionary is. This is used to detect corrupt input that
55 * would read beyond the beginning of the uncompressed stream.
56 */
57 size_t full;
58
59 /* Write limit; we don't write to buf[limit] or later bytes. */
60 size_t limit;
61
62 /*
63 * End of the dictionary buffer. In multi-call mode, this is
64 * the same as the dictionary size. In single-call mode, this
65 * indicates the size of the output buffer.
66 */
67 size_t end;
68
69 /*
70 * Size of the dictionary as specified in Block Header. This is used
71 * together with "full" to detect corrupt input that would make us
72 * read beyond the beginning of the uncompressed stream.
73 */
74 uint32_t size;
75
76 /*
77 * Amount of memory allocated for the dictionary. A special
78 * value of zero indicates that we are in single-call mode,
79 * where the output buffer works as the dictionary.
80 */
81 uint32_t allocated;
82};
83
84/* Range decoder */
85struct rc_dec {
86 uint32_t range;
87 uint32_t code;
88
89 /*
90 * Number of initializing bytes remaining to be read
91 * by rc_read_init().
92 */
93 uint32_t init_bytes_left;
94
95 /*
96 * Buffer from which we read our input. It can be either
97 * temp.buf or the caller-provided input buffer.
98 */
99 const uint8_t *in;
100 size_t in_pos;
101 size_t in_limit;
102};
103
104/* Probabilities for a length decoder. */
105struct lzma_len_dec {
106 /* Probability of match length being at least 10 */
107 uint16_t choice;
108
109 /* Probability of match length being at least 18 */
110 uint16_t choice2;
111
112 /* Probabilities for match lengths 2-9 */
113 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
114
115 /* Probabilities for match lengths 10-17 */
116 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
117
118 /* Probabilities for match lengths 18-273 */
119 uint16_t high[LEN_HIGH_SYMBOLS];
120};
121
122struct lzma_dec {
123 /*
124 * LZMA properties or related bit masks (number of literal
125 * context bits, a mask dervied from the number of literal
126 * position bits, and a mask dervied from the number
127 * position bits)
128 */
129 uint32_t lc;
130 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
131 uint32_t pos_mask; /* (1 << pb) - 1 */
132
133 /* Types of the most recently seen LZMA symbols */
134 enum lzma_state state;
135
136 /* Distances of latest four matches */
137 uint32_t rep0;
138 uint32_t rep1;
139 uint32_t rep2;
140 uint32_t rep3;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
149 uint16_t is_match[STATES][POS_STATES_MAX];
150
151 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
152 uint16_t is_rep[STATES];
153
154 /*
155 * If 0, distance of a repeated match is rep0.
156 * Otherwise check is_rep1.
157 */
158 uint16_t is_rep0[STATES];
159
160 /*
161 * If 0, distance of a repeated match is rep1.
162 * Otherwise check is_rep2.
163 */
164 uint16_t is_rep1[STATES];
165
166 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
167 uint16_t is_rep2[STATES];
168
169 /*
170 * If 1, the repeated match has length of one byte. Otherwise
171 * the length is decoded from rep_len_decoder.
172 */
173 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
174
175 /*
176 * Probability tree for the highest two bits of the match
177 * distance. There is a separate probability tree for match
178 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
179 */
180 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
181
182 /*
183 * Probility trees for additional bits for match distance
184 * when the distance is in the range [4, 127].
185 */
186 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
187
188 /*
189 * Probability tree for the lowest four bits of a match
190 * distance that is equal to or greater than 128.
191 */
192 uint16_t dist_align[ALIGN_SIZE];
193
194 /* Length of a normal match */
195 struct lzma_len_dec match_len_dec;
196
197 /* Length of a repeated match */
198 struct lzma_len_dec rep_len_dec;
199
200 /* Probabilities of literals */
201 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
202};
203
204struct xz_dec_lzma2 {
205 /* LZMA2 */
206 struct {
207 /* Position in xz_dec_lzma2_run(). */
208 enum lzma2_seq {
209 SEQ_CONTROL,
210 SEQ_UNCOMPRESSED_1,
211 SEQ_UNCOMPRESSED_2,
212 SEQ_COMPRESSED_0,
213 SEQ_COMPRESSED_1,
214 SEQ_PROPERTIES,
215 SEQ_LZMA_PREPARE,
216 SEQ_LZMA_RUN,
217 SEQ_COPY
218 } sequence;
219
220 /*
221 * Next position after decoding the compressed size of
222 * the chunk.
223 */
224 enum lzma2_seq next_sequence;
225
226 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
227 uint32_t uncompressed;
228
229 /*
230 * Compressed size of LZMA chunk or compressed/uncompressed
231 * size of uncompressed chunk (64 KiB at maximum)
232 */
233 uint32_t compressed;
234
235 /*
236 * True if dictionary reset is needed. This is false before
237 * the first chunk (LZMA or uncompressed).
238 */
239 bool need_dict_reset;
240
241 /*
242 * True if new LZMA properties are needed. This is false
243 * before the first LZMA chunk.
244 */
245 bool need_props;
246 } lzma2;
247
248 /*
249 * Temporary buffer which holds small number of input bytes between
250 * decoder calls. See lzma2_lzma() for details.
251 */
252 struct {
253 uint32_t size;
254 uint8_t buf[3 * LZMA_IN_REQUIRED];
255 } temp;
256
257 struct dictionary dict;
258 struct rc_dec rc;
259 struct lzma_dec lzma;
260};
261
262/**************
263 * Dictionary *
264 **************/
265
266/*
267 * Reset the dictionary state. When in single-call mode, set up the beginning
268 * of the dictionary to point to the actual output buffer.
269 */
270static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
271{
272 if (dict->allocated == 0) {
273 dict->buf = b->out + b->out_pos;
274 dict->end = b->out_size - b->out_pos;
275 }
276
277 dict->start = 0;
278 dict->pos = 0;
279 dict->limit = 0;
280 dict->full = 0;
281}
282
283/* Set dictionary write limit */
284static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max)
285{
286 if (dict->end - dict->pos <= out_max)
287 dict->limit = dict->end;
288 else
289 dict->limit = dict->pos + out_max;
290}
291
292/* Return true if at least one byte can be written into the dictionary. */
293static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict)
294{
295 return dict->pos < dict->limit;
296}
297
298/*
299 * Get a byte from the dictionary at the given distance. The distance is
300 * assumed to valid, or as a special case, zero when the dictionary is
301 * still empty. This special case is needed for single-call decoding to
302 * avoid writing a '\0' to the end of the destination buffer.
303 */
304static __always_inline uint32_t XZ_FUNC dict_get(
305 const struct dictionary *dict, uint32_t dist)
306{
307 size_t offset = dict->pos - dist - 1;
308
309 if (dist >= dict->pos)
310 offset += dict->end;
311
312 return dict->full > 0 ? dict->buf[offset] : 0;
313}
314
315/*
316 * Put one byte into the dictionary. It is assumed that there is space for it.
317 */
318static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte)
319{
320 dict->buf[dict->pos++] = byte;
321
322 if (dict->full < dict->pos)
323 dict->full = dict->pos;
324}
325
326/*
327 * Repeat given number of bytes from the given distance. If the distance is
328 * invalid, false is returned. On success, true is returned and *len is
329 * updated to indicate how many bytes were left to be repeated.
330 */
331static bool XZ_FUNC dict_repeat(
332 struct dictionary *dict, uint32_t *len, uint32_t dist)
333{
334 size_t back;
335 uint32_t left;
336
337 if (dist >= dict->full || dist >= dict->size)
338 return false;
339
340 left = min_t(size_t, dict->limit - dict->pos, *len);
341 *len -= left;
342
343 back = dict->pos - dist - 1;
344 if (dist >= dict->pos)
345 back += dict->end;
346
347 do {
348 dict->buf[dict->pos++] = dict->buf[back++];
349 if (back == dict->end)
350 back = 0;
351 } while (--left > 0);
352
353 if (dict->full < dict->pos)
354 dict->full = dict->pos;
355
356 return true;
357}
358
359/* Copy uncompressed data as is from input to dictionary and output buffers. */
360static void XZ_FUNC dict_uncompressed(
361 struct dictionary *dict, struct xz_buf *b, uint32_t *left)
362{
363 size_t copy_size;
364
365 while (*left > 0 && b->in_pos < b->in_size
366 && b->out_pos < b->out_size) {
367 copy_size = min(b->in_size - b->in_pos,
368 b->out_size - b->out_pos);
369 if (copy_size > dict->end - dict->pos)
370 copy_size = dict->end - dict->pos;
371 if (copy_size > *left)
372 copy_size = *left;
373
374 *left -= copy_size;
375
376 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
377 dict->pos += copy_size;
378
379 if (dict->full < dict->pos)
380 dict->full = dict->pos;
381
382 if (dict->allocated != 0) {
383 if (dict->pos == dict->end)
384 dict->pos = 0;
385
386 memcpy(b->out + b->out_pos, b->in + b->in_pos,
387 copy_size);
388 }
389
390 dict->start = dict->pos;
391
392 b->out_pos += copy_size;
393 b->in_pos += copy_size;
394
395 }
396}
397
398/*
399 * Flush pending data from dictionary to b->out. It is assumed that there is
400 * enough space in b->out. This is guaranteed because caller uses dict_limit()
401 * before decoding data into the dictionary.
402 */
403static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b)
404{
405 size_t copy_size = dict->pos - dict->start;
406
407 if (dict->allocated != 0) {
408 if (dict->pos == dict->end)
409 dict->pos = 0;
410
411 memcpy(b->out + b->out_pos, dict->buf + dict->start,
412 copy_size);
413 }
414
415 dict->start = dict->pos;
416 b->out_pos += copy_size;
417 return copy_size;
418}
419
420/*****************
421 * Range decoder *
422 *****************/
423
424/* Reset the range decoder. */
425static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc)
426{
427 rc->range = (uint32_t)-1;
428 rc->code = 0;
429 rc->init_bytes_left = RC_INIT_BYTES;
430}
431
432/*
433 * Read the first five initial bytes into rc->code if they haven't been
434 * read already. (Yes, the first byte gets completely ignored.)
435 */
436static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b)
437{
438 while (rc->init_bytes_left > 0) {
439 if (b->in_pos == b->in_size)
440 return false;
441
442 rc->code = (rc->code << 8) + b->in[b->in_pos++];
443 --rc->init_bytes_left;
444 }
445
446 return true;
447}
448
449/* Return true if there may not be enough input for the next decoding loop. */
450static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc)
451{
452 return rc->in_pos > rc->in_limit;
453}
454
455/*
456 * Return true if it is possible (from point of view of range decoder) that
457 * we have reached the end of the LZMA chunk.
458 */
459static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc)
460{
461 return rc->code == 0;
462}
463
464/* Read the next input byte if needed. */
465static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc)
466{
467 if (rc->range < RC_TOP_VALUE) {
468 rc->range <<= RC_SHIFT_BITS;
469 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
470 }
471}
472
473/*
474 * Decode one bit. In some versions, this function has been splitted in three
475 * functions so that the compiler is supposed to be able to more easily avoid
476 * an extra branch. In this particular version of the LZMA decoder, this
477 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
478 * on x86). Using a non-splitted version results in nicer looking code too.
479 *
480 * NOTE: This must return an int. Do not make it return a bool or the speed
481 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
482 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
483 */
484static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob)
485{
486 uint32_t bound;
487 int bit;
488
489 rc_normalize(rc);
490 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
491 if (rc->code < bound) {
492 rc->range = bound;
493 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
494 bit = 0;
495 } else {
496 rc->range -= bound;
497 rc->code -= bound;
498 *prob -= *prob >> RC_MOVE_BITS;
499 bit = 1;
500 }
501
502 return bit;
503}
504
505/* Decode a bittree starting from the most significant bit. */
506static __always_inline uint32_t XZ_FUNC rc_bittree(
507 struct rc_dec *rc, uint16_t *probs, uint32_t limit)
508{
509 uint32_t symbol = 1;
510
511 do {
512 if (rc_bit(rc, &probs[symbol]))
513 symbol = (symbol << 1) + 1;
514 else
515 symbol <<= 1;
516 } while (symbol < limit);
517
518 return symbol;
519}
520
521/* Decode a bittree starting from the least significant bit. */
522static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc,
523 uint16_t *probs, uint32_t *dest, uint32_t limit)
524{
525 uint32_t symbol = 1;
526 uint32_t i = 0;
527
528 do {
529 if (rc_bit(rc, &probs[symbol])) {
530 symbol = (symbol << 1) + 1;
531 *dest += 1 << i;
532 } else {
533 symbol <<= 1;
534 }
535 } while (++i < limit);
536}
537
538/* Decode direct bits (fixed fifty-fifty probability) */
539static inline void XZ_FUNC rc_direct(
540 struct rc_dec *rc, uint32_t *dest, uint32_t limit)
541{
542 uint32_t mask;
543
544 do {
545 rc_normalize(rc);
546 rc->range >>= 1;
547 rc->code -= rc->range;
548 mask = (uint32_t)0 - (rc->code >> 31);
549 rc->code += rc->range & mask;
550 *dest = (*dest << 1) + (mask + 1);
551 } while (--limit > 0);
552}
553
554/********
555 * LZMA *
556 ********/
557
558/* Get pointer to literal coder probability array. */
559static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s)
560{
561 uint32_t prev_byte = dict_get(&s->dict, 0);
562 uint32_t low = prev_byte >> (8 - s->lzma.lc);
563 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
564 return s->lzma.literal[low + high];
565}
566
567/* Decode a literal (one 8-bit byte) */
568static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s)
569{
570 uint16_t *probs;
571 uint32_t symbol;
572 uint32_t match_byte;
573 uint32_t match_bit;
574 uint32_t offset;
575 uint32_t i;
576
577 probs = lzma_literal_probs(s);
578
579 if (lzma_state_is_literal(s->lzma.state)) {
580 symbol = rc_bittree(&s->rc, probs, 0x100);
581 } else {
582 symbol = 1;
583 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
584 offset = 0x100;
585
586 do {
587 match_bit = match_byte & offset;
588 match_byte <<= 1;
589 i = offset + match_bit + symbol;
590
591 if (rc_bit(&s->rc, &probs[i])) {
592 symbol = (symbol << 1) + 1;
593 offset &= match_bit;
594 } else {
595 symbol <<= 1;
596 offset &= ~match_bit;
597 }
598 } while (symbol < 0x100);
599 }
600
601 dict_put(&s->dict, (uint8_t)symbol);
602 lzma_state_literal(&s->lzma.state);
603}
604
605/* Decode the length of the match into s->lzma.len. */
606static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
607 uint32_t pos_state)
608{
609 uint16_t *probs;
610 uint32_t limit;
611
612 if (!rc_bit(&s->rc, &l->choice)) {
613 probs = l->low[pos_state];
614 limit = LEN_LOW_SYMBOLS;
615 s->lzma.len = MATCH_LEN_MIN;
616 } else {
617 if (!rc_bit(&s->rc, &l->choice2)) {
618 probs = l->mid[pos_state];
619 limit = LEN_MID_SYMBOLS;
620 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
621 } else {
622 probs = l->high;
623 limit = LEN_HIGH_SYMBOLS;
624 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
625 + LEN_MID_SYMBOLS;
626 }
627 }
628
629 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
630}
631
632/* Decode a match. The distance will be stored in s->lzma.rep0. */
633static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
634{
635 uint16_t *probs;
636 uint32_t dist_slot;
637 uint32_t limit;
638
639 lzma_state_match(&s->lzma.state);
640
641 s->lzma.rep3 = s->lzma.rep2;
642 s->lzma.rep2 = s->lzma.rep1;
643 s->lzma.rep1 = s->lzma.rep0;
644
645 lzma_len(s, &s->lzma.match_len_dec, pos_state);
646
647 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
648 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
649
650 if (dist_slot < DIST_MODEL_START) {
651 s->lzma.rep0 = dist_slot;
652 } else {
653 limit = (dist_slot >> 1) - 1;
654 s->lzma.rep0 = 2 + (dist_slot & 1);
655
656 if (dist_slot < DIST_MODEL_END) {
657 s->lzma.rep0 <<= limit;
658 probs = s->lzma.dist_special + s->lzma.rep0
659 - dist_slot - 1;
660 rc_bittree_reverse(&s->rc, probs,
661 &s->lzma.rep0, limit);
662 } else {
663 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
664 s->lzma.rep0 <<= ALIGN_BITS;
665 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
666 &s->lzma.rep0, ALIGN_BITS);
667 }
668 }
669}
670
671/*
672 * Decode a repeated match. The distance is one of the four most recently
673 * seen matches. The distance will be stored in s->lzma.rep0.
674 */
675static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
676{
677 uint32_t tmp;
678
679 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
680 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
681 s->lzma.state][pos_state])) {
682 lzma_state_short_rep(&s->lzma.state);
683 s->lzma.len = 1;
684 return;
685 }
686 } else {
687 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
688 tmp = s->lzma.rep1;
689 } else {
690 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
691 tmp = s->lzma.rep2;
692 } else {
693 tmp = s->lzma.rep3;
694 s->lzma.rep3 = s->lzma.rep2;
695 }
696
697 s->lzma.rep2 = s->lzma.rep1;
698 }
699
700 s->lzma.rep1 = s->lzma.rep0;
701 s->lzma.rep0 = tmp;
702 }
703
704 lzma_state_long_rep(&s->lzma.state);
705 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
706}
707
708/* LZMA decoder core */
709static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s)
710{
711 uint32_t pos_state;
712
713 /*
714 * If the dictionary was reached during the previous call, try to
715 * finish the possibly pending repeat in the dictionary.
716 */
717 if (dict_has_space(&s->dict) && s->lzma.len > 0)
718 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
719
720 /*
721 * Decode more LZMA symbols. One iteration may consume up to
722 * LZMA_IN_REQUIRED - 1 bytes.
723 */
724 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
725 pos_state = s->dict.pos & s->lzma.pos_mask;
726
727 if (!rc_bit(&s->rc, &s->lzma.is_match[
728 s->lzma.state][pos_state])) {
729 lzma_literal(s);
730 } else {
731 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
732 lzma_rep_match(s, pos_state);
733 else
734 lzma_match(s, pos_state);
735
736 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
737 return false;
738 }
739 }
740
741 /*
742 * Having the range decoder always normalized when we are outside
743 * this function makes it easier to correctly handle end of the chunk.
744 */
745 rc_normalize(&s->rc);
746
747 return true;
748}
749
750/*
751 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
752 * here, because LZMA state may be reset without resetting the dictionary.
753 */
754static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s)
755{
756 uint16_t *probs;
757 size_t i;
758
759 s->lzma.state = STATE_LIT_LIT;
760 s->lzma.rep0 = 0;
761 s->lzma.rep1 = 0;
762 s->lzma.rep2 = 0;
763 s->lzma.rep3 = 0;
764
765 /*
766 * All probabilities are initialized to the same value. This hack
767 * makes the code smaller by avoiding a separate loop for each
768 * probability array.
769 *
770 * This could be optimized so that only that part of literal
771 * probabilities that are actually required. In the common case
772 * we would write 12 KiB less.
773 */
774 probs = s->lzma.is_match[0];
775 for (i = 0; i < PROBS_TOTAL; ++i)
776 probs[i] = RC_BIT_MODEL_TOTAL / 2;
777
778 rc_reset(&s->rc);
779}
780
781/*
782 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
783 * from the decoded lp and pb values. On success, the LZMA decoder state is
784 * reset and true is returned.
785 */
786static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
787{
788 if (props > (4 * 5 + 4) * 9 + 8)
789 return false;
790
791 s->lzma.pos_mask = 0;
792 while (props >= 9 * 5) {
793 props -= 9 * 5;
794 ++s->lzma.pos_mask;
795 }
796
797 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
798
799 s->lzma.literal_pos_mask = 0;
800 while (props >= 9) {
801 props -= 9;
802 ++s->lzma.literal_pos_mask;
803 }
804
805 s->lzma.lc = props;
806
807 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
808 return false;
809
810 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
811
812 lzma_reset(s);
813
814 return true;
815}
816
817/*********
818 * LZMA2 *
819 *********/
820
821/*
822 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
823 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
824 * wrapper function takes care of making the LZMA decoder's assumption safe.
825 *
826 * As long as there is plenty of input left to be decoded in the current LZMA
827 * chunk, we decode directly from the caller-supplied input buffer until
828 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
829 * s->temp.buf, which (hopefully) gets filled on the next call to this
830 * function. We decode a few bytes from the temporary buffer so that we can
831 * continue decoding from the caller-supplied input buffer again.
832 */
833static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
834{
835 size_t in_avail;
836 uint32_t tmp;
837
838 in_avail = b->in_size - b->in_pos;
839 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
840 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
841 if (tmp > s->lzma2.compressed - s->temp.size)
842 tmp = s->lzma2.compressed - s->temp.size;
843 if (tmp > in_avail)
844 tmp = in_avail;
845
846 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
847
848 if (s->temp.size + tmp == s->lzma2.compressed) {
849 memzero(s->temp.buf + s->temp.size + tmp,
850 sizeof(s->temp.buf)
851 - s->temp.size - tmp);
852 s->rc.in_limit = s->temp.size + tmp;
853 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
854 s->temp.size += tmp;
855 b->in_pos += tmp;
856 return true;
857 } else {
858 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
859 }
860
861 s->rc.in = s->temp.buf;
862 s->rc.in_pos = 0;
863
864 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
865 return false;
866
867 s->lzma2.compressed -= s->rc.in_pos;
868
869 if (s->rc.in_pos < s->temp.size) {
870 s->temp.size -= s->rc.in_pos;
871 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
872 s->temp.size);
873 return true;
874 }
875
876 b->in_pos += s->rc.in_pos - s->temp.size;
877 s->temp.size = 0;
878 }
879
880 in_avail = b->in_size - b->in_pos;
881 if (in_avail >= LZMA_IN_REQUIRED) {
882 s->rc.in = b->in;
883 s->rc.in_pos = b->in_pos;
884
885 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
886 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
887 else
888 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
889
890 if (!lzma_main(s))
891 return false;
892
893 in_avail = s->rc.in_pos - b->in_pos;
894 if (in_avail > s->lzma2.compressed)
895 return false;
896
897 s->lzma2.compressed -= in_avail;
898 b->in_pos = s->rc.in_pos;
899 }
900
901 in_avail = b->in_size - b->in_pos;
902 if (in_avail < LZMA_IN_REQUIRED) {
903 if (in_avail > s->lzma2.compressed)
904 in_avail = s->lzma2.compressed;
905
906 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
907 s->temp.size = in_avail;
908 b->in_pos += in_avail;
909 }
910
911 return true;
912}
913
914/*
915 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
916 * decoding or copying of uncompressed chunks to other functions.
917 */
918XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run(
919 struct xz_dec_lzma2 *s, struct xz_buf *b)
920{
921 uint32_t tmp;
922
923 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
924 switch (s->lzma2.sequence) {
925 case SEQ_CONTROL:
926 /*
927 * LZMA2 control byte
928 *
929 * Exact values:
930 * 0x00 End marker
931 * 0x01 Dictionary reset followed by
932 * an uncompressed chunk
933 * 0x02 Uncompressed chunk (no dictionary reset)
934 *
935 * Highest three bits (s->control & 0xE0):
936 * 0xE0 Dictionary reset, new properties and state
937 * reset, followed by LZMA compressed chunk
938 * 0xC0 New properties and state reset, followed
939 * by LZMA compressed chunk (no dictionary
940 * reset)
941 * 0xA0 State reset using old properties,
942 * followed by LZMA compressed chunk (no
943 * dictionary reset)
944 * 0x80 LZMA chunk (no dictionary or state reset)
945 *
946 * For LZMA compressed chunks, the lowest five bits
947 * (s->control & 1F) are the highest bits of the
948 * uncompressed size (bits 16-20).
949 *
950 * A new LZMA2 stream must begin with a dictionary
951 * reset. The first LZMA chunk must set new
952 * properties and reset the LZMA state.
953 *
954 * Values that don't match anything described above
955 * are invalid and we return XZ_DATA_ERROR.
956 */
957 tmp = b->in[b->in_pos++];
958
959 if (tmp >= 0xE0 || tmp == 0x01) {
960 s->lzma2.need_props = true;
961 s->lzma2.need_dict_reset = false;
962 dict_reset(&s->dict, b);
963 } else if (s->lzma2.need_dict_reset) {
964 return XZ_DATA_ERROR;
965 }
966
967 if (tmp >= 0x80) {
968 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
969 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
970
971 if (tmp >= 0xC0) {
972 /*
973 * When there are new properties,
974 * state reset is done at
975 * SEQ_PROPERTIES.
976 */
977 s->lzma2.need_props = false;
978 s->lzma2.next_sequence
979 = SEQ_PROPERTIES;
980
981 } else if (s->lzma2.need_props) {
982 return XZ_DATA_ERROR;
983
984 } else {
985 s->lzma2.next_sequence
986 = SEQ_LZMA_PREPARE;
987 if (tmp >= 0xA0)
988 lzma_reset(s);
989 }
990 } else {
991 if (tmp == 0x00)
992 return XZ_STREAM_END;
993
994 if (tmp > 0x02)
995 return XZ_DATA_ERROR;
996
997 s->lzma2.sequence = SEQ_COMPRESSED_0;
998 s->lzma2.next_sequence = SEQ_COPY;
999 }
1000
1001 break;
1002
1003 case SEQ_UNCOMPRESSED_1:
1004 s->lzma2.uncompressed
1005 += (uint32_t)b->in[b->in_pos++] << 8;
1006 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1007 break;
1008
1009 case SEQ_UNCOMPRESSED_2:
1010 s->lzma2.uncompressed
1011 += (uint32_t)b->in[b->in_pos++] + 1;
1012 s->lzma2.sequence = SEQ_COMPRESSED_0;
1013 break;
1014
1015 case SEQ_COMPRESSED_0:
1016 s->lzma2.compressed
1017 = (uint32_t)b->in[b->in_pos++] << 8;
1018 s->lzma2.sequence = SEQ_COMPRESSED_1;
1019 break;
1020
1021 case SEQ_COMPRESSED_1:
1022 s->lzma2.compressed
1023 += (uint32_t)b->in[b->in_pos++] + 1;
1024 s->lzma2.sequence = s->lzma2.next_sequence;
1025 break;
1026
1027 case SEQ_PROPERTIES:
1028 if (!lzma_props(s, b->in[b->in_pos++]))
1029 return XZ_DATA_ERROR;
1030
1031 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1032
1033 case SEQ_LZMA_PREPARE:
1034 if (s->lzma2.compressed < RC_INIT_BYTES)
1035 return XZ_DATA_ERROR;
1036
1037 if (!rc_read_init(&s->rc, b))
1038 return XZ_OK;
1039
1040 s->lzma2.compressed -= RC_INIT_BYTES;
1041 s->lzma2.sequence = SEQ_LZMA_RUN;
1042
1043 case SEQ_LZMA_RUN:
1044 /*
1045 * Set dictionary limit to indicate how much we want
1046 * to be encoded at maximum. Decode new data into the
1047 * dictionary. Flush the new data from dictionary to
1048 * b->out. Check if we finished decoding this chunk.
1049 * In case the dictionary got full but we didn't fill
1050 * the output buffer yet, we may run this loop
1051 * multiple times without changing s->lzma2.sequence.
1052 */
1053 dict_limit(&s->dict, min_t(size_t,
1054 b->out_size - b->out_pos,
1055 s->lzma2.uncompressed));
1056 if (!lzma2_lzma(s, b))
1057 return XZ_DATA_ERROR;
1058
1059 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1060
1061 if (s->lzma2.uncompressed == 0) {
1062 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1063 || !rc_is_finished(&s->rc))
1064 return XZ_DATA_ERROR;
1065
1066 rc_reset(&s->rc);
1067 s->lzma2.sequence = SEQ_CONTROL;
1068
1069 } else if (b->out_pos == b->out_size
1070 || (b->in_pos == b->in_size
1071 && s->temp.size
1072 < s->lzma2.compressed)) {
1073 return XZ_OK;
1074 }
1075
1076 break;
1077
1078 case SEQ_COPY:
1079 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1080 if (s->lzma2.compressed > 0)
1081 return XZ_OK;
1082
1083 s->lzma2.sequence = SEQ_CONTROL;
1084 break;
1085 }
1086 }
1087
1088 return XZ_OK;
1089}
1090
1091XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max)
1092{
1093 struct xz_dec_lzma2 *s;
1094
1095 /* Maximum supported dictionary by this implementation is 3 GiB. */
1096 if (dict_max > ((uint32_t)3 << 30))
1097 return NULL;
1098
1099 s = kmalloc(sizeof(*s), GFP_KERNEL);
1100 if (s == NULL)
1101 return NULL;
1102
1103 if (dict_max > 0) {
1104 s->dict.buf = vmalloc(dict_max);
1105 if (s->dict.buf == NULL) {
1106 kfree(s);
1107 return NULL;
1108 }
1109 }
1110
1111 s->dict.allocated = dict_max;
1112
1113 return s;
1114}
1115
1116XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
1117 struct xz_dec_lzma2 *s, uint8_t props)
1118{
1119 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1120 if (props > 39)
1121 return XZ_OPTIONS_ERROR;
1122
1123 s->dict.size = 2 + (props & 1);
1124 s->dict.size <<= (props >> 1) + 11;
1125
1126 if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) {
1127#ifdef XZ_REALLOC_DICT_BUF
1128 s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size);
1129 if (!s->dict.buf)
1130 return XZ_MEMLIMIT_ERROR;
1131 s->dict.allocated = s->dict.size;
1132#else
1133 return XZ_MEMLIMIT_ERROR;
1134#endif
1135 }
1136
1137 s->dict.end = s->dict.size;
1138
1139 s->lzma.len = 0;
1140
1141 s->lzma2.sequence = SEQ_CONTROL;
1142 s->lzma2.need_dict_reset = true;
1143
1144 s->temp.size = 0;
1145
1146 return XZ_OK;
1147}
1148
1149XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1150{
1151 if (s->dict.allocated > 0)
1152 vfree(s->dict.buf);
1153
1154 kfree(s);
1155}
diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c
new file mode 100644
index 000000000..21db283fb
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_stream.c
@@ -0,0 +1,821 @@
1/*
2 * .xz Stream decoder
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include "xz_private.h"
11#include "xz_stream.h"
12
13/* Hash used to validate the Index field */
14struct xz_dec_hash {
15 vli_type unpadded;
16 vli_type uncompressed;
17 uint32_t crc32;
18};
19
20struct xz_dec {
21 /* Position in dec_main() */
22 enum {
23 SEQ_STREAM_HEADER,
24 SEQ_BLOCK_START,
25 SEQ_BLOCK_HEADER,
26 SEQ_BLOCK_UNCOMPRESS,
27 SEQ_BLOCK_PADDING,
28 SEQ_BLOCK_CHECK,
29 SEQ_INDEX,
30 SEQ_INDEX_PADDING,
31 SEQ_INDEX_CRC32,
32 SEQ_STREAM_FOOTER
33 } sequence;
34
35 /* Position in variable-length integers and Check fields */
36 uint32_t pos;
37
38 /* Variable-length integer decoded by dec_vli() */
39 vli_type vli;
40
41 /* Saved in_pos and out_pos */
42 size_t in_start;
43 size_t out_start;
44
45 /* CRC32 value in Block or Index */
46 uint32_t crc32;
47
48 /* Type of the integrity check calculated from uncompressed data */
49 enum xz_check check_type;
50
51 /* True if we are operating in single-call mode. */
52 bool single_call;
53
54 /*
55 * True if the next call to xz_dec_run() is allowed to return
56 * XZ_BUF_ERROR.
57 */
58 bool allow_buf_error;
59
60 /* Information stored in Block Header */
61 struct {
62 /*
63 * Value stored in the Compressed Size field, or
64 * VLI_UNKNOWN if Compressed Size is not present.
65 */
66 vli_type compressed;
67
68 /*
69 * Value stored in the Uncompressed Size field, or
70 * VLI_UNKNOWN if Uncompressed Size is not present.
71 */
72 vli_type uncompressed;
73
74 /* Size of the Block Header field */
75 uint32_t size;
76 } block_header;
77
78 /* Information collected when decoding Blocks */
79 struct {
80 /* Observed compressed size of the current Block */
81 vli_type compressed;
82
83 /* Observed uncompressed size of the current Block */
84 vli_type uncompressed;
85
86 /* Number of Blocks decoded so far */
87 vli_type count;
88
89 /*
90 * Hash calculated from the Block sizes. This is used to
91 * validate the Index field.
92 */
93 struct xz_dec_hash hash;
94 } block;
95
96 /* Variables needed when verifying the Index field */
97 struct {
98 /* Position in dec_index() */
99 enum {
100 SEQ_INDEX_COUNT,
101 SEQ_INDEX_UNPADDED,
102 SEQ_INDEX_UNCOMPRESSED
103 } sequence;
104
105 /* Size of the Index in bytes */
106 vli_type size;
107
108 /* Number of Records (matches block.count in valid files) */
109 vli_type count;
110
111 /*
112 * Hash calculated from the Records (matches block.hash in
113 * valid files).
114 */
115 struct xz_dec_hash hash;
116 } index;
117
118 /*
119 * Temporary buffer needed to hold Stream Header, Block Header,
120 * and Stream Footer. The Block Header is the biggest (1 KiB)
121 * so we reserve space according to that. buf[] has to be aligned
122 * to a multiple of four bytes; the size_t variables before it
123 * should guarantee this.
124 */
125 struct {
126 size_t pos;
127 size_t size;
128 uint8_t buf[1024];
129 } temp;
130
131 struct xz_dec_lzma2 *lzma2;
132
133#ifdef XZ_DEC_BCJ
134 struct xz_dec_bcj *bcj;
135 bool bcj_active;
136#endif
137};
138
139#ifdef XZ_DEC_ANY_CHECK
140/* Sizes of the Check field with different Check IDs */
141static const uint8_t check_sizes[16] = {
142 0,
143 4, 4, 4,
144 8, 8, 8,
145 16, 16, 16,
146 32, 32, 32,
147 64, 64, 64
148};
149#endif
150
151/*
152 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
153 * must have set s->temp.pos to indicate how much data we are supposed
154 * to copy into s->temp.buf. Return true once s->temp.pos has reached
155 * s->temp.size.
156 */
157static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b)
158{
159 size_t copy_size = min_t(size_t,
160 b->in_size - b->in_pos, s->temp.size - s->temp.pos);
161
162 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
163 b->in_pos += copy_size;
164 s->temp.pos += copy_size;
165
166 if (s->temp.pos == s->temp.size) {
167 s->temp.pos = 0;
168 return true;
169 }
170
171 return false;
172}
173
174/* Decode a variable-length integer (little-endian base-128 encoding) */
175static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s,
176 const uint8_t *in, size_t *in_pos, size_t in_size)
177{
178 uint8_t byte;
179
180 if (s->pos == 0)
181 s->vli = 0;
182
183 while (*in_pos < in_size) {
184 byte = in[*in_pos];
185 ++*in_pos;
186
187 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
188
189 if ((byte & 0x80) == 0) {
190 /* Don't allow non-minimal encodings. */
191 if (byte == 0 && s->pos != 0)
192 return XZ_DATA_ERROR;
193
194 s->pos = 0;
195 return XZ_STREAM_END;
196 }
197
198 s->pos += 7;
199 if (s->pos == 7 * VLI_BYTES_MAX)
200 return XZ_DATA_ERROR;
201 }
202
203 return XZ_OK;
204}
205
206/*
207 * Decode the Compressed Data field from a Block. Update and validate
208 * the observed compressed and uncompressed sizes of the Block so that
209 * they don't exceed the values possibly stored in the Block Header
210 * (validation assumes that no integer overflow occurs, since vli_type
211 * is normally uint64_t). Update the CRC32 if presence of the CRC32
212 * field was indicated in Stream Header.
213 *
214 * Once the decoding is finished, validate that the observed sizes match
215 * the sizes possibly stored in the Block Header. Update the hash and
216 * Block count, which are later used to validate the Index field.
217 */
218static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
219{
220 enum xz_ret ret;
221
222 s->in_start = b->in_pos;
223 s->out_start = b->out_pos;
224
225#ifdef XZ_DEC_BCJ
226 if (s->bcj_active)
227 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
228 else
229#endif
230 ret = xz_dec_lzma2_run(s->lzma2, b);
231
232 s->block.compressed += b->in_pos - s->in_start;
233 s->block.uncompressed += b->out_pos - s->out_start;
234
235 /*
236 * There is no need to separately check for VLI_UNKNOWN, since
237 * the observed sizes are always smaller than VLI_UNKNOWN.
238 */
239 if (s->block.compressed > s->block_header.compressed
240 || s->block.uncompressed
241 > s->block_header.uncompressed)
242 return XZ_DATA_ERROR;
243
244 if (s->check_type == XZ_CHECK_CRC32)
245 s->crc32 = xz_crc32(b->out + s->out_start,
246 b->out_pos - s->out_start, s->crc32);
247
248 if (ret == XZ_STREAM_END) {
249 if (s->block_header.compressed != VLI_UNKNOWN
250 && s->block_header.compressed
251 != s->block.compressed)
252 return XZ_DATA_ERROR;
253
254 if (s->block_header.uncompressed != VLI_UNKNOWN
255 && s->block_header.uncompressed
256 != s->block.uncompressed)
257 return XZ_DATA_ERROR;
258
259 s->block.hash.unpadded += s->block_header.size
260 + s->block.compressed;
261
262#ifdef XZ_DEC_ANY_CHECK
263 s->block.hash.unpadded += check_sizes[s->check_type];
264#else
265 if (s->check_type == XZ_CHECK_CRC32)
266 s->block.hash.unpadded += 4;
267#endif
268
269 s->block.hash.uncompressed += s->block.uncompressed;
270 s->block.hash.crc32 = xz_crc32(
271 (const uint8_t *)&s->block.hash,
272 sizeof(s->block.hash), s->block.hash.crc32);
273
274 ++s->block.count;
275 }
276
277 return ret;
278}
279
280/* Update the Index size and the CRC32 value. */
281static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
282{
283 size_t in_used = b->in_pos - s->in_start;
284 s->index.size += in_used;
285 s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
286}
287
288/*
289 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
290 * fields from the Index field. That is, Index Padding and CRC32 are not
291 * decoded by this function.
292 *
293 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
294 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
295 */
296static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
297{
298 enum xz_ret ret;
299
300 do {
301 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
302 if (ret != XZ_STREAM_END) {
303 index_update(s, b);
304 return ret;
305 }
306
307 switch (s->index.sequence) {
308 case SEQ_INDEX_COUNT:
309 s->index.count = s->vli;
310
311 /*
312 * Validate that the Number of Records field
313 * indicates the same number of Records as
314 * there were Blocks in the Stream.
315 */
316 if (s->index.count != s->block.count)
317 return XZ_DATA_ERROR;
318
319 s->index.sequence = SEQ_INDEX_UNPADDED;
320 break;
321
322 case SEQ_INDEX_UNPADDED:
323 s->index.hash.unpadded += s->vli;
324 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
325 break;
326
327 case SEQ_INDEX_UNCOMPRESSED:
328 s->index.hash.uncompressed += s->vli;
329 s->index.hash.crc32 = xz_crc32(
330 (const uint8_t *)&s->index.hash,
331 sizeof(s->index.hash),
332 s->index.hash.crc32);
333 --s->index.count;
334 s->index.sequence = SEQ_INDEX_UNPADDED;
335 break;
336 }
337 } while (s->index.count > 0);
338
339 return XZ_STREAM_END;
340}
341
342/*
343 * Validate that the next four input bytes match the value of s->crc32.
344 * s->pos must be zero when starting to validate the first byte.
345 */
346static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
347{
348 do {
349 if (b->in_pos == b->in_size)
350 return XZ_OK;
351
352 if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
353 return XZ_DATA_ERROR;
354
355 s->pos += 8;
356
357 } while (s->pos < 32);
358
359 s->crc32 = 0;
360 s->pos = 0;
361
362 return XZ_STREAM_END;
363}
364
365#ifdef XZ_DEC_ANY_CHECK
366/*
367 * Skip over the Check field when the Check ID is not supported.
368 * Returns true once the whole Check field has been skipped over.
369 */
370static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b)
371{
372 while (s->pos < check_sizes[s->check_type]) {
373 if (b->in_pos == b->in_size)
374 return false;
375
376 ++b->in_pos;
377 ++s->pos;
378 }
379
380 s->pos = 0;
381
382 return true;
383}
384#endif
385
386/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
387static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
388{
389 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
390 return XZ_FORMAT_ERROR;
391
392 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
393 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
394 return XZ_DATA_ERROR;
395
396 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
397 return XZ_OPTIONS_ERROR;
398
399 /*
400 * Of integrity checks, we support only none (Check ID = 0) and
401 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
402 * we will accept other check types too, but then the check won't
403 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
404 */
405 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
406
407#ifdef XZ_DEC_ANY_CHECK
408 if (s->check_type > XZ_CHECK_MAX)
409 return XZ_OPTIONS_ERROR;
410
411 if (s->check_type > XZ_CHECK_CRC32)
412 return XZ_UNSUPPORTED_CHECK;
413#else
414 if (s->check_type > XZ_CHECK_CRC32)
415 return XZ_OPTIONS_ERROR;
416#endif
417
418 return XZ_OK;
419}
420
421/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
422static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
423{
424 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
425 return XZ_DATA_ERROR;
426
427 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
428 return XZ_DATA_ERROR;
429
430 /*
431 * Validate Backward Size. Note that we never added the size of the
432 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
433 * instead of s->index.size / 4 - 1.
434 */
435 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
436 return XZ_DATA_ERROR;
437
438 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
439 return XZ_DATA_ERROR;
440
441 /*
442 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
443 * for the caller.
444 */
445 return XZ_STREAM_END;
446}
447
448/* Decode the Block Header and initialize the filter chain. */
449static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
450{
451 enum xz_ret ret;
452
453 /*
454 * Validate the CRC32. We know that the temp buffer is at least
455 * eight bytes so this is safe.
456 */
457 s->temp.size -= 4;
458 if (xz_crc32(s->temp.buf, s->temp.size, 0)
459 != get_le32(s->temp.buf + s->temp.size))
460 return XZ_DATA_ERROR;
461
462 s->temp.pos = 2;
463
464 /*
465 * Catch unsupported Block Flags. We support only one or two filters
466 * in the chain, so we catch that with the same test.
467 */
468#ifdef XZ_DEC_BCJ
469 if (s->temp.buf[1] & 0x3E)
470#else
471 if (s->temp.buf[1] & 0x3F)
472#endif
473 return XZ_OPTIONS_ERROR;
474
475 /* Compressed Size */
476 if (s->temp.buf[1] & 0x40) {
477 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
478 != XZ_STREAM_END)
479 return XZ_DATA_ERROR;
480
481 s->block_header.compressed = s->vli;
482 } else {
483 s->block_header.compressed = VLI_UNKNOWN;
484 }
485
486 /* Uncompressed Size */
487 if (s->temp.buf[1] & 0x80) {
488 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
489 != XZ_STREAM_END)
490 return XZ_DATA_ERROR;
491
492 s->block_header.uncompressed = s->vli;
493 } else {
494 s->block_header.uncompressed = VLI_UNKNOWN;
495 }
496
497#ifdef XZ_DEC_BCJ
498 /* If there are two filters, the first one must be a BCJ filter. */
499 s->bcj_active = s->temp.buf[1] & 0x01;
500 if (s->bcj_active) {
501 if (s->temp.size - s->temp.pos < 2)
502 return XZ_OPTIONS_ERROR;
503
504 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
505 if (ret != XZ_OK)
506 return ret;
507
508 /*
509 * We don't support custom start offset,
510 * so Size of Properties must be zero.
511 */
512 if (s->temp.buf[s->temp.pos++] != 0x00)
513 return XZ_OPTIONS_ERROR;
514 }
515#endif
516
517 /* Valid Filter Flags always take at least two bytes. */
518 if (s->temp.size - s->temp.pos < 2)
519 return XZ_DATA_ERROR;
520
521 /* Filter ID = LZMA2 */
522 if (s->temp.buf[s->temp.pos++] != 0x21)
523 return XZ_OPTIONS_ERROR;
524
525 /* Size of Properties = 1-byte Filter Properties */
526 if (s->temp.buf[s->temp.pos++] != 0x01)
527 return XZ_OPTIONS_ERROR;
528
529 /* Filter Properties contains LZMA2 dictionary size. */
530 if (s->temp.size - s->temp.pos < 1)
531 return XZ_DATA_ERROR;
532
533 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
534 if (ret != XZ_OK)
535 return ret;
536
537 /* The rest must be Header Padding. */
538 while (s->temp.pos < s->temp.size)
539 if (s->temp.buf[s->temp.pos++] != 0x00)
540 return XZ_OPTIONS_ERROR;
541
542 s->temp.pos = 0;
543 s->block.compressed = 0;
544 s->block.uncompressed = 0;
545
546 return XZ_OK;
547}
548
549static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
550{
551 enum xz_ret ret;
552
553 /*
554 * Store the start position for the case when we are in the middle
555 * of the Index field.
556 */
557 s->in_start = b->in_pos;
558
559 while (true) {
560 switch (s->sequence) {
561 case SEQ_STREAM_HEADER:
562 /*
563 * Stream Header is copied to s->temp, and then
564 * decoded from there. This way if the caller
565 * gives us only little input at a time, we can
566 * still keep the Stream Header decoding code
567 * simple. Similar approach is used in many places
568 * in this file.
569 */
570 if (!fill_temp(s, b))
571 return XZ_OK;
572
573 /*
574 * If dec_stream_header() returns
575 * XZ_UNSUPPORTED_CHECK, it is still possible
576 * to continue decoding if working in multi-call
577 * mode. Thus, update s->sequence before calling
578 * dec_stream_header().
579 */
580 s->sequence = SEQ_BLOCK_START;
581
582 ret = dec_stream_header(s);
583 if (ret != XZ_OK)
584 return ret;
585
586 case SEQ_BLOCK_START:
587 /* We need one byte of input to continue. */
588 if (b->in_pos == b->in_size)
589 return XZ_OK;
590
591 /* See if this is the beginning of the Index field. */
592 if (b->in[b->in_pos] == 0) {
593 s->in_start = b->in_pos++;
594 s->sequence = SEQ_INDEX;
595 break;
596 }
597
598 /*
599 * Calculate the size of the Block Header and
600 * prepare to decode it.
601 */
602 s->block_header.size
603 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
604
605 s->temp.size = s->block_header.size;
606 s->temp.pos = 0;
607 s->sequence = SEQ_BLOCK_HEADER;
608
609 case SEQ_BLOCK_HEADER:
610 if (!fill_temp(s, b))
611 return XZ_OK;
612
613 ret = dec_block_header(s);
614 if (ret != XZ_OK)
615 return ret;
616
617 s->sequence = SEQ_BLOCK_UNCOMPRESS;
618
619 case SEQ_BLOCK_UNCOMPRESS:
620 ret = dec_block(s, b);
621 if (ret != XZ_STREAM_END)
622 return ret;
623
624 s->sequence = SEQ_BLOCK_PADDING;
625
626 case SEQ_BLOCK_PADDING:
627 /*
628 * Size of Compressed Data + Block Padding
629 * must be a multiple of four. We don't need
630 * s->block.compressed for anything else
631 * anymore, so we use it here to test the size
632 * of the Block Padding field.
633 */
634 while (s->block.compressed & 3) {
635 if (b->in_pos == b->in_size)
636 return XZ_OK;
637
638 if (b->in[b->in_pos++] != 0)
639 return XZ_DATA_ERROR;
640
641 ++s->block.compressed;
642 }
643
644 s->sequence = SEQ_BLOCK_CHECK;
645
646 case SEQ_BLOCK_CHECK:
647 if (s->check_type == XZ_CHECK_CRC32) {
648 ret = crc32_validate(s, b);
649 if (ret != XZ_STREAM_END)
650 return ret;
651 }
652#ifdef XZ_DEC_ANY_CHECK
653 else if (!check_skip(s, b)) {
654 return XZ_OK;
655 }
656#endif
657
658 s->sequence = SEQ_BLOCK_START;
659 break;
660
661 case SEQ_INDEX:
662 ret = dec_index(s, b);
663 if (ret != XZ_STREAM_END)
664 return ret;
665
666 s->sequence = SEQ_INDEX_PADDING;
667
668 case SEQ_INDEX_PADDING:
669 while ((s->index.size + (b->in_pos - s->in_start))
670 & 3) {
671 if (b->in_pos == b->in_size) {
672 index_update(s, b);
673 return XZ_OK;
674 }
675
676 if (b->in[b->in_pos++] != 0)
677 return XZ_DATA_ERROR;
678 }
679
680 /* Finish the CRC32 value and Index size. */
681 index_update(s, b);
682
683 /* Compare the hashes to validate the Index field. */
684 if (!memeq(&s->block.hash, &s->index.hash,
685 sizeof(s->block.hash)))
686 return XZ_DATA_ERROR;
687
688 s->sequence = SEQ_INDEX_CRC32;
689
690 case SEQ_INDEX_CRC32:
691 ret = crc32_validate(s, b);
692 if (ret != XZ_STREAM_END)
693 return ret;
694
695 s->temp.size = STREAM_HEADER_SIZE;
696 s->sequence = SEQ_STREAM_FOOTER;
697
698 case SEQ_STREAM_FOOTER:
699 if (!fill_temp(s, b))
700 return XZ_OK;
701
702 return dec_stream_footer(s);
703 }
704 }
705
706 /* Never reached */
707}
708
709/*
710 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
711 * multi-call and single-call decoding.
712 *
713 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
714 * are not going to make any progress anymore. This is to prevent the caller
715 * from calling us infinitely when the input file is truncated or otherwise
716 * corrupt. Since zlib-style API allows that the caller fills the input buffer
717 * only when the decoder doesn't produce any new output, we have to be careful
718 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
719 * after the second consecutive call to xz_dec_run() that makes no progress.
720 *
721 * In single-call mode, if we couldn't decode everything and no error
722 * occurred, either the input is truncated or the output buffer is too small.
723 * Since we know that the last input byte never produces any output, we know
724 * that if all the input was consumed and decoding wasn't finished, the file
725 * must be corrupt. Otherwise the output buffer has to be too small or the
726 * file is corrupt in a way that decoding it produces too big output.
727 *
728 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
729 * their original values. This is because with some filter chains there won't
730 * be any valid uncompressed data in the output buffer unless the decoding
731 * actually succeeds (that's the price to pay of using the output buffer as
732 * the workspace).
733 */
734XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b)
735{
736 size_t in_start;
737 size_t out_start;
738 enum xz_ret ret;
739
740 if (s->single_call)
741 xz_dec_reset(s);
742
743 in_start = b->in_pos;
744 out_start = b->out_pos;
745 ret = dec_main(s, b);
746
747 if (s->single_call) {
748 if (ret == XZ_OK)
749 ret = b->in_pos == b->in_size
750 ? XZ_DATA_ERROR : XZ_BUF_ERROR;
751
752 if (ret != XZ_STREAM_END) {
753 b->in_pos = in_start;
754 b->out_pos = out_start;
755 }
756
757 } else if (ret == XZ_OK && in_start == b->in_pos
758 && out_start == b->out_pos) {
759 if (s->allow_buf_error)
760 ret = XZ_BUF_ERROR;
761
762 s->allow_buf_error = true;
763 } else {
764 s->allow_buf_error = false;
765 }
766
767 return ret;
768}
769
770XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max)
771{
772 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
773 if (s == NULL)
774 return NULL;
775
776 s->single_call = dict_max == 0;
777
778#ifdef XZ_DEC_BCJ
779 s->bcj = xz_dec_bcj_create(s->single_call);
780 if (s->bcj == NULL)
781 goto error_bcj;
782#endif
783
784 s->lzma2 = xz_dec_lzma2_create(dict_max);
785 if (s->lzma2 == NULL)
786 goto error_lzma2;
787
788 xz_dec_reset(s);
789 return s;
790
791error_lzma2:
792#ifdef XZ_DEC_BCJ
793 xz_dec_bcj_end(s->bcj);
794error_bcj:
795#endif
796 kfree(s);
797 return NULL;
798}
799
800XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s)
801{
802 s->sequence = SEQ_STREAM_HEADER;
803 s->allow_buf_error = false;
804 s->pos = 0;
805 s->crc32 = 0;
806 memzero(&s->block, sizeof(s->block));
807 memzero(&s->index, sizeof(s->index));
808 s->temp.pos = 0;
809 s->temp.size = STREAM_HEADER_SIZE;
810}
811
812XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s)
813{
814 if (s != NULL) {
815 xz_dec_lzma2_end(s->lzma2);
816#ifdef XZ_DEC_BCJ
817 xz_dec_bcj_end(s->bcj);
818#endif
819 kfree(s);
820 }
821}
diff --git a/archival/libunarchive/unxz/xz_lzma2.h b/archival/libunarchive/unxz/xz_lzma2.h
new file mode 100644
index 000000000..47f21afbc
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_lzma2.h
@@ -0,0 +1,204 @@
1/*
2 * LZMA2 definitions
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_LZMA2_H
12#define XZ_LZMA2_H
13
14/* Range coder constants */
15#define RC_SHIFT_BITS 8
16#define RC_TOP_BITS 24
17#define RC_TOP_VALUE (1 << RC_TOP_BITS)
18#define RC_BIT_MODEL_TOTAL_BITS 11
19#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
20#define RC_MOVE_BITS 5
21
22/*
23 * Maximum number of position states. A position state is the lowest pb
24 * number of bits of the current uncompressed offset. In some places there
25 * are different sets of probabilities for different position states.
26 */
27#define POS_STATES_MAX (1 << 4)
28
29/*
30 * This enum is used to track which LZMA symbols have occurred most recently
31 * and in which order. This information is used to predict the next symbol.
32 *
33 * Symbols:
34 * - Literal: One 8-bit byte
35 * - Match: Repeat a chunk of data at some distance
36 * - Long repeat: Multi-byte match at a recently seen distance
37 * - Short repeat: One-byte repeat at a recently seen distance
38 *
39 * The symbol names are in from STATE_oldest_older_previous. REP means
40 * either short or long repeated match, and NONLIT means any non-literal.
41 */
42enum lzma_state {
43 STATE_LIT_LIT,
44 STATE_MATCH_LIT_LIT,
45 STATE_REP_LIT_LIT,
46 STATE_SHORTREP_LIT_LIT,
47 STATE_MATCH_LIT,
48 STATE_REP_LIT,
49 STATE_SHORTREP_LIT,
50 STATE_LIT_MATCH,
51 STATE_LIT_LONGREP,
52 STATE_LIT_SHORTREP,
53 STATE_NONLIT_MATCH,
54 STATE_NONLIT_REP
55};
56
57/* Total number of states */
58#define STATES 12
59
60/* The lowest 7 states indicate that the previous state was a literal. */
61#define LIT_STATES 7
62
63/* Indicate that the latest symbol was a literal. */
64static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state)
65{
66 if (*state <= STATE_SHORTREP_LIT_LIT)
67 *state = STATE_LIT_LIT;
68 else if (*state <= STATE_LIT_SHORTREP)
69 *state -= 3;
70 else
71 *state -= 6;
72}
73
74/* Indicate that the latest symbol was a match. */
75static inline void XZ_FUNC lzma_state_match(enum lzma_state *state)
76{
77 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
78}
79
80/* Indicate that the latest state was a long repeated match. */
81static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state)
82{
83 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
84}
85
86/* Indicate that the latest symbol was a short match. */
87static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state)
88{
89 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
90}
91
92/* Test if the previous symbol was a literal. */
93static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state)
94{
95 return state < LIT_STATES;
96}
97
98/* Each literal coder is divided in three sections:
99 * - 0x001-0x0FF: Without match byte
100 * - 0x101-0x1FF: With match byte; match bit is 0
101 * - 0x201-0x2FF: With match byte; match bit is 1
102 *
103 * Match byte is used when the previous LZMA symbol was something else than
104 * a literal (that is, it was some kind of match).
105 */
106#define LITERAL_CODER_SIZE 0x300
107
108/* Maximum number of literal coders */
109#define LITERAL_CODERS_MAX (1 << 4)
110
111/* Minimum length of a match is two bytes. */
112#define MATCH_LEN_MIN 2
113
114/* Match length is encoded with 4, 5, or 10 bits.
115 *
116 * Length Bits
117 * 2-9 4 = Choice=0 + 3 bits
118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
120 */
121#define LEN_LOW_BITS 3
122#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
123#define LEN_MID_BITS 3
124#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
125#define LEN_HIGH_BITS 8
126#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
127#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
128
129/*
130 * Maximum length of a match is 273 which is a result of the encoding
131 * described above.
132 */
133#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
134
135/*
136 * Different sets of probabilities are used for match distances that have
137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
138 * set of probabilities for each length. The matches with longer length
139 * use a shared set of probabilities.
140 */
141#define DIST_STATES 4
142
143/*
144 * Get the index of the appropriate probability array for decoding
145 * the distance slot.
146 */
147static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len)
148{
149 return len < DIST_STATES + MATCH_LEN_MIN
150 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
151}
152
153/*
154 * The highest two bits of a 32-bit match distance are encoded using six bits.
155 * This six-bit value is called a distance slot. This way encoding a 32-bit
156 * value takes 6-36 bits, larger values taking more bits.
157 */
158#define DIST_SLOT_BITS 6
159#define DIST_SLOTS (1 << DIST_SLOT_BITS)
160
161/* Match distances up to 127 are fully encoded using probabilities. Since
162 * the highest two bits (distance slot) are always encoded using six bits,
163 * the distances 0-3 don't need any additional bits to encode, since the
164 * distance slot itself is the same as the actual distance. DIST_MODEL_START
165 * indicates the first distance slot where at least one additional bit is
166 * needed.
167 */
168#define DIST_MODEL_START 4
169
170/*
171 * Match distances greater than 127 are encoded in three pieces:
172 * - distance slot: the highest two bits
173 * - direct bits: 2-26 bits below the highest two bits
174 * - alignment bits: four lowest bits
175 *
176 * Direct bits don't use any probabilities.
177 *
178 * The distance slot value of 14 is for distances 128-191.
179 */
180#define DIST_MODEL_END 14
181
182/* Distance slots that indicate a distance <= 127. */
183#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
184#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
185
186/*
187 * For match distances greater than 127, only the highest two bits and the
188 * lowest four bits (alignment) is encoded using probabilities.
189 */
190#define ALIGN_BITS 4
191#define ALIGN_SIZE (1 << ALIGN_BITS)
192#define ALIGN_MASK (ALIGN_SIZE - 1)
193
194/* Total number of all probability variables */
195#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
196
197/*
198 * LZMA remembers the four most recent match distances. Reusing these
199 * distances tends to take less space than re-encoding the actual
200 * distance value.
201 */
202#define REPS 4
203
204#endif
diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h
new file mode 100644
index 000000000..f4e0b4010
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_private.h
@@ -0,0 +1,120 @@
1/*
2 * Private includes and definitions
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_PRIVATE_H
11#define XZ_PRIVATE_H
12
13#ifdef __KERNEL__
14 /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
15# ifndef XZ_PREBOOT
16# include <linux/slab.h>
17# include <linux/vmalloc.h>
18# include <linux/string.h>
19# define memeq(a, b, size) (memcmp(a, b, size) == 0)
20# define memzero(buf, size) memset(buf, 0, size)
21# endif
22# include <asm/byteorder.h>
23# include <asm/unaligned.h>
24# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
25 /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */
26# ifndef XZ_IGNORE_KCONFIG
27# ifdef CONFIG_XZ_DEC_X86
28# define XZ_DEC_X86
29# endif
30# ifdef CONFIG_XZ_DEC_POWERPC
31# define XZ_DEC_POWERPC
32# endif
33# ifdef CONFIG_XZ_DEC_IA64
34# define XZ_DEC_IA64
35# endif
36# ifdef CONFIG_XZ_DEC_ARM
37# define XZ_DEC_ARM
38# endif
39# ifdef CONFIG_XZ_DEC_ARMTHUMB
40# define XZ_DEC_ARMTHUMB
41# endif
42# ifdef CONFIG_XZ_DEC_SPARC
43# define XZ_DEC_SPARC
44# endif
45# endif
46# include <linux/xz.h>
47#else
48 /*
49 * For userspace builds, use a separate header to define the required
50 * macros and functions. This makes it easier to adapt the code into
51 * different environments and avoids clutter in the Linux kernel tree.
52 */
53# include "xz_config.h"
54#endif
55
56/*
57 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
58 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
59 */
60#ifndef XZ_DEC_BCJ
61# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
62 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
63 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
64 || defined(XZ_DEC_SPARC)
65# define XZ_DEC_BCJ
66# endif
67#endif
68
69/*
70 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
71 * before calling xz_dec_lzma2_run().
72 */
73XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
74 uint32_t dict_max);
75
76/*
77 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
78 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
79 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
80 * decoder doesn't support.
81 */
82XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
83 struct xz_dec_lzma2 *s, uint8_t props);
84
85/* Decode raw LZMA2 stream from b->in to b->out. */
86XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
87 struct xz_dec_lzma2 *s, struct xz_buf *b);
88
89/* Free the memory allocated for the LZMA2 decoder. */
90XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
91
92#ifdef XZ_DEC_BCJ
93/*
94 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
95 * calling xz_dec_bcj_run().
96 */
97XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call);
98
99/*
100 * Decode the Filter ID of a BCJ filter. This implementation doesn't
101 * support custom start offsets, so no decoding of Filter Properties
102 * is needed. Returns XZ_OK if the given Filter ID is supported.
103 * Otherwise XZ_OPTIONS_ERROR is returned.
104 */
105XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
106 struct xz_dec_bcj *s, uint8_t id);
107
108/*
109 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
110 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
111 * must be called directly.
112 */
113XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
114 struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
115
116/* Free the memory allocated for the BCJ filters. */
117#define xz_dec_bcj_end(s) kfree(s)
118#endif
119
120#endif
diff --git a/archival/libunarchive/unxz/xz_stream.h b/archival/libunarchive/unxz/xz_stream.h
new file mode 100644
index 000000000..36f2a7cbf
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_stream.h
@@ -0,0 +1,57 @@
1/*
2 * Definitions for handling the .xz file format
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_STREAM_H
11#define XZ_STREAM_H
12
13#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
14# include <linux/crc32.h>
15# undef crc32
16# define xz_crc32(buf, size, crc) \
17 (~crc32_le(~(uint32_t)(crc), buf, size))
18#endif
19
20/*
21 * See the .xz file format specification at
22 * http://tukaani.org/xz/xz-file-format.txt
23 * to understand the container format.
24 */
25
26#define STREAM_HEADER_SIZE 12
27
28#define HEADER_MAGIC "\3757zXZ\0"
29#define HEADER_MAGIC_SIZE 6
30
31#define FOOTER_MAGIC "YZ"
32#define FOOTER_MAGIC_SIZE 2
33
34/*
35 * Variable-length integer can hold a 63-bit unsigned integer, or a special
36 * value to indicate that the value is unknown.
37 */
38typedef uint64_t vli_type;
39
40#define VLI_MAX ((vli_type)-1 / 2)
41#define VLI_UNKNOWN ((vli_type)-1)
42
43/* Maximum encoded size of a VLI */
44#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
45
46/* Integrity Check types */
47enum xz_check {
48 XZ_CHECK_NONE = 0,
49 XZ_CHECK_CRC32 = 1,
50 XZ_CHECK_CRC64 = 4,
51 XZ_CHECK_SHA256 = 10
52};
53
54/* Maximum possible Check ID */
55#define XZ_CHECK_MAX 15
56
57#endif
diff --git a/archival/lzop.c b/archival/lzop.c
index 0a15c51aa..ab4d34c88 100644
--- a/archival/lzop.c
+++ b/archival/lzop.c
@@ -475,7 +475,7 @@ lzo_crc32(uint32_t c, const uint8_t* buf, unsigned len)
475 475
476 crc = ~c; 476 crc = ~c;
477 if (len != 0) do { 477 if (len != 0) do {
478 crc = G.lzo_crc32_table[((int)crc ^ *buf) & 0xff] ^ (crc >> 8); 478 crc = G.lzo_crc32_table[(uint8_t)((int)crc ^ *buf)] ^ (crc >> 8);
479 buf += 1; 479 buf += 1;
480 len -= 1; 480 len -= 1;
481 } while (len > 0); 481 } while (len > 0);
@@ -738,12 +738,12 @@ static NOINLINE smallint lzo_decompress(const header_t *h)
738 bb_error_msg_and_die("this file is a split lzop file"); 738 bb_error_msg_and_die("this file is a split lzop file");
739 739
740 if (dst_len > MAX_BLOCK_SIZE) 740 if (dst_len > MAX_BLOCK_SIZE)
741 bb_error_msg_and_die("lzop file corrupted"); 741 bb_error_msg_and_die("corrupted data");
742 742
743 /* read compressed block size */ 743 /* read compressed block size */
744 src_len = read32(); 744 src_len = read32();
745 if (src_len <= 0 || src_len > dst_len) 745 if (src_len <= 0 || src_len > dst_len)
746 bb_error_msg_and_die("lzop file corrupted"); 746 bb_error_msg_and_die("corrupted data");
747 747
748 if (dst_len > block_size) { 748 if (dst_len > block_size) {
749 if (b2) { 749 if (b2) {
@@ -797,7 +797,7 @@ static NOINLINE smallint lzo_decompress(const header_t *h)
797 r = lzo1x_decompress_safe(b1, src_len, b2, &d, NULL); 797 r = lzo1x_decompress_safe(b1, src_len, b2, &d, NULL);
798 798
799 if (r != 0 /*LZO_E_OK*/ || dst_len != d) { 799 if (r != 0 /*LZO_E_OK*/ || dst_len != d) {
800 bb_error_msg_and_die("corrupted compressed data"); 800 bb_error_msg_and_die("corrupted data");
801 } 801 }
802 dst = b2; 802 dst = b2;
803 } else { 803 } else {
@@ -1042,7 +1042,7 @@ static smallint do_lzo_decompress(void)
1042 return lzo_decompress(&header); 1042 return lzo_decompress(&header);
1043} 1043}
1044 1044
1045static char* make_new_name_lzop(char *filename) 1045static char* FAST_FUNC make_new_name_lzop(char *filename, const char *expected_ext UNUSED_PARAM)
1046{ 1046{
1047 if (option_mask32 & OPT_DECOMPRESS) { 1047 if (option_mask32 & OPT_DECOMPRESS) {
1048 char *extension = strrchr(filename, '.'); 1048 char *extension = strrchr(filename, '.');
@@ -1054,7 +1054,7 @@ static char* make_new_name_lzop(char *filename)
1054 return xasprintf("%s.lzo", filename); 1054 return xasprintf("%s.lzo", filename);
1055} 1055}
1056 1056
1057static IF_DESKTOP(long long) int pack_lzop(unpack_info_t *info UNUSED_PARAM) 1057static IF_DESKTOP(long long) int FAST_FUNC pack_lzop(unpack_info_t *info UNUSED_PARAM)
1058{ 1058{
1059 if (option_mask32 & OPT_DECOMPRESS) 1059 if (option_mask32 & OPT_DECOMPRESS)
1060 return do_lzo_decompress(); 1060 return do_lzo_decompress();
@@ -1074,5 +1074,5 @@ int lzop_main(int argc UNUSED_PARAM, char **argv)
1074 option_mask32 |= OPT_DECOMPRESS; 1074 option_mask32 |= OPT_DECOMPRESS;
1075 1075
1076 G.lzo_crc32_table = crc32_filltable(NULL, 0); 1076 G.lzo_crc32_table = crc32_filltable(NULL, 0);
1077 return bbunpack(argv, make_new_name_lzop, pack_lzop); 1077 return bbunpack(argv, pack_lzop, make_new_name_lzop, /*unused:*/ NULL);
1078} 1078}
diff --git a/archival/rpm2cpio.c b/archival/rpm2cpio.c
index 4ed5b023b..598ec8670 100644
--- a/archival/rpm2cpio.c
+++ b/archival/rpm2cpio.c
@@ -68,22 +68,35 @@ int rpm2cpio_main(int argc UNUSED_PARAM, char **argv)
68#else 68#else
69 /* BLOAT */ 69 /* BLOAT */
70 { 70 {
71 unsigned char magic[2]; 71 unsigned char magic[8];
72 IF_DESKTOP(long long) int FAST_FUNC (*unpack)(int src_fd, int dst_fd); 72 IF_DESKTOP(long long) int FAST_FUNC (*unpack)(int src_fd, int dst_fd);
73 73
74 xread(rpm_fd, &magic, 2); 74 xread(rpm_fd, &magic, 2);
75 unpack = unpack_gz_stream; 75 if (magic[0] == 0x1f && magic[1] == 0x8b) {
76 if (magic[0] != 0x1f || magic[1] != 0x8b) { 76 unpack = unpack_gz_stream;
77 if (!ENABLE_FEATURE_SEAMLESS_BZ2 77 } else
78 || magic[0] != 'B' || magic[1] != 'Z' 78 if (ENABLE_FEATURE_SEAMLESS_BZ2
79 ) { 79 && magic[0] == 'B' && magic[1] == 'Z'
80 bb_error_msg_and_die("invalid gzip" 80 ) {
81 IF_FEATURE_SEAMLESS_BZ2("/bzip2")
82 " magic");
83 }
84 unpack = unpack_bz2_stream; 81 unpack = unpack_bz2_stream;
82 } else
83 if (ENABLE_FEATURE_SEAMLESS_XZ
84 && magic[0] == 0xfd && magic[1] == '7'
85 ) {
86 /* .xz signature: 0xfd, '7', 'z', 'X', 'Z', 0x00 */
87 /* More info at: http://tukaani.org/xz/xz-file-format.txt */
88 xread(rpm_fd, magic + 2, 4);
89 if (strcmp((char*)magic + 2, "zXZ") != 0)
90 goto no_magic;
91 xlseek(rpm_fd, -6, SEEK_CUR);
92 unpack = unpack_xz_stream;
93 } else {
94 no_magic:
95 bb_error_msg_and_die("no gzip"
96 IF_FEATURE_SEAMLESS_BZ2("/bzip2")
97 IF_FEATURE_SEAMLESS_XZ("/xz")
98 " magic");
85 } 99 }
86
87 if (unpack(rpm_fd, STDOUT_FILENO) < 0) 100 if (unpack(rpm_fd, STDOUT_FILENO) < 0)
88 bb_error_msg_and_die("error unpacking"); 101 bb_error_msg_and_die("error unpacking");
89 } 102 }
diff --git a/archival/unzip.c b/archival/unzip.c
index afab3280d..1d3291ab8 100644
--- a/archival/unzip.c
+++ b/archival/unzip.c
@@ -25,12 +25,12 @@
25enum { 25enum {
26#if BB_BIG_ENDIAN 26#if BB_BIG_ENDIAN
27 ZIP_FILEHEADER_MAGIC = 0x504b0304, 27 ZIP_FILEHEADER_MAGIC = 0x504b0304,
28 ZIP_CDS_MAGIC = 0x504b0102, 28 ZIP_CDF_MAGIC = 0x504b0102, /* central directory's file header */
29 ZIP_CDE_MAGIC = 0x504b0506, 29 ZIP_CDE_MAGIC = 0x504b0506, /* "end of central directory" record */
30 ZIP_DD_MAGIC = 0x504b0708, 30 ZIP_DD_MAGIC = 0x504b0708,
31#else 31#else
32 ZIP_FILEHEADER_MAGIC = 0x04034b50, 32 ZIP_FILEHEADER_MAGIC = 0x04034b50,
33 ZIP_CDS_MAGIC = 0x02014b50, 33 ZIP_CDF_MAGIC = 0x02014b50,
34 ZIP_CDE_MAGIC = 0x06054b50, 34 ZIP_CDE_MAGIC = 0x06054b50,
35 ZIP_DD_MAGIC = 0x08074b50, 35 ZIP_DD_MAGIC = 0x08074b50,
36#endif 36#endif
@@ -42,7 +42,7 @@ typedef union {
42 uint8_t raw[ZIP_HEADER_LEN]; 42 uint8_t raw[ZIP_HEADER_LEN];
43 struct { 43 struct {
44 uint16_t version; /* 0-1 */ 44 uint16_t version; /* 0-1 */
45 uint16_t flags; /* 2-3 */ 45 uint16_t zip_flags; /* 2-3 */
46 uint16_t method; /* 4-5 */ 46 uint16_t method; /* 4-5 */
47 uint16_t modtime; /* 6-7 */ 47 uint16_t modtime; /* 6-7 */
48 uint16_t moddate; /* 8-9 */ 48 uint16_t moddate; /* 8-9 */
@@ -66,7 +66,6 @@ struct BUG_zip_header_must_be_26_bytes {
66 66
67#define FIX_ENDIANNESS_ZIP(zip_header) do { \ 67#define FIX_ENDIANNESS_ZIP(zip_header) do { \
68 (zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \ 68 (zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \
69 (zip_header).formatted.flags = SWAP_LE16((zip_header).formatted.flags ); \
70 (zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \ 69 (zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \
71 (zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \ 70 (zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \
72 (zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \ 71 (zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \
@@ -77,15 +76,15 @@ struct BUG_zip_header_must_be_26_bytes {
77 (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \ 76 (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \
78} while (0) 77} while (0)
79 78
80#define CDS_HEADER_LEN 42 79#define CDF_HEADER_LEN 42
81 80
82typedef union { 81typedef union {
83 uint8_t raw[CDS_HEADER_LEN]; 82 uint8_t raw[CDF_HEADER_LEN];
84 struct { 83 struct {
85 /* uint32_t signature; 50 4b 01 02 */ 84 /* uint32_t signature; 50 4b 01 02 */
86 uint16_t version_made_by; /* 0-1 */ 85 uint16_t version_made_by; /* 0-1 */
87 uint16_t version_needed; /* 2-3 */ 86 uint16_t version_needed; /* 2-3 */
88 uint16_t cds_flags; /* 4-5 */ 87 uint16_t cdf_flags; /* 4-5 */
89 uint16_t method; /* 6-7 */ 88 uint16_t method; /* 6-7 */
90 uint16_t mtime; /* 8-9 */ 89 uint16_t mtime; /* 8-9 */
91 uint16_t mdate; /* 10-11 */ 90 uint16_t mdate; /* 10-11 */
@@ -100,21 +99,25 @@ typedef union {
100 uint32_t external_file_attributes PACKED; /* 34-37 */ 99 uint32_t external_file_attributes PACKED; /* 34-37 */
101 uint32_t relative_offset_of_local_header PACKED; /* 38-41 */ 100 uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
102 } formatted PACKED; 101 } formatted PACKED;
103} cds_header_t; 102} cdf_header_t;
104 103
105struct BUG_cds_header_must_be_42_bytes { 104struct BUG_cdf_header_must_be_42_bytes {
106 char BUG_cds_header_must_be_42_bytes[ 105 char BUG_cdf_header_must_be_42_bytes[
107 offsetof(cds_header_t, formatted.relative_offset_of_local_header) + 4 106 offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4
108 == CDS_HEADER_LEN ? 1 : -1]; 107 == CDF_HEADER_LEN ? 1 : -1];
109}; 108};
110 109
111#define FIX_ENDIANNESS_CDS(cds_header) do { \ 110#define FIX_ENDIANNESS_CDF(cdf_header) do { \
112 (cds_header).formatted.crc32 = SWAP_LE32((cds_header).formatted.crc32 ); \ 111 (cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \
113 (cds_header).formatted.cmpsize = SWAP_LE32((cds_header).formatted.cmpsize ); \ 112 (cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \
114 (cds_header).formatted.ucmpsize = SWAP_LE32((cds_header).formatted.ucmpsize ); \ 113 (cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \
115 (cds_header).formatted.file_name_length = SWAP_LE16((cds_header).formatted.file_name_length); \ 114 (cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \
116 (cds_header).formatted.extra_field_length = SWAP_LE16((cds_header).formatted.extra_field_length); \ 115 (cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \
117 (cds_header).formatted.file_comment_length = SWAP_LE16((cds_header).formatted.file_comment_length); \ 116 (cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \
117 IF_DESKTOP( \
118 (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \
119 (cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \
120 ) \
118} while (0) 121} while (0)
119 122
120#define CDE_HEADER_LEN 16 123#define CDE_HEADER_LEN 16
@@ -124,11 +127,11 @@ typedef union {
124 struct { 127 struct {
125 /* uint32_t signature; 50 4b 05 06 */ 128 /* uint32_t signature; 50 4b 05 06 */
126 uint16_t this_disk_no; 129 uint16_t this_disk_no;
127 uint16_t disk_with_cds_no; 130 uint16_t disk_with_cdf_no;
128 uint16_t cds_entries_on_this_disk; 131 uint16_t cdf_entries_on_this_disk;
129 uint16_t cds_entries_total; 132 uint16_t cdf_entries_total;
130 uint32_t cds_size; 133 uint32_t cdf_size;
131 uint32_t cds_offset; 134 uint32_t cdf_offset;
132 /* uint16_t file_comment_length; */ 135 /* uint16_t file_comment_length; */
133 /* .ZIP file comment (variable size) */ 136 /* .ZIP file comment (variable size) */
134 } formatted PACKED; 137 } formatted PACKED;
@@ -140,7 +143,7 @@ struct BUG_cde_header_must_be_16_bytes {
140}; 143};
141 144
142#define FIX_ENDIANNESS_CDE(cde_header) do { \ 145#define FIX_ENDIANNESS_CDE(cde_header) do { \
143 (cde_header).formatted.cds_offset = SWAP_LE32((cde_header).formatted.cds_offset); \ 146 (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \
144} while (0) 147} while (0)
145 148
146enum { zip_fd = 3 }; 149enum { zip_fd = 3 };
@@ -148,7 +151,7 @@ enum { zip_fd = 3 };
148 151
149#if ENABLE_DESKTOP 152#if ENABLE_DESKTOP
150/* NB: does not preserve file position! */ 153/* NB: does not preserve file position! */
151static uint32_t find_cds_offset(void) 154static uint32_t find_cdf_offset(void)
152{ 155{
153 unsigned char buf[1024]; 156 unsigned char buf[1024];
154 cde_header_t cde_header; 157 cde_header_t cde_header;
@@ -156,9 +159,9 @@ static uint32_t find_cds_offset(void)
156 off_t end; 159 off_t end;
157 160
158 end = xlseek(zip_fd, 0, SEEK_END); 161 end = xlseek(zip_fd, 0, SEEK_END);
159 if (end < 1024)
160 end = 1024;
161 end -= 1024; 162 end -= 1024;
163 if (end < 0)
164 end = 0;
162 xlseek(zip_fd, end, SEEK_SET); 165 xlseek(zip_fd, end, SEEK_SET);
163 full_read(zip_fd, buf, 1024); 166 full_read(zip_fd, buf, 1024);
164 167
@@ -177,32 +180,30 @@ static uint32_t find_cds_offset(void)
177 /* we found CDE! */ 180 /* we found CDE! */
178 memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN); 181 memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN);
179 FIX_ENDIANNESS_CDE(cde_header); 182 FIX_ENDIANNESS_CDE(cde_header);
180 return cde_header.formatted.cds_offset; 183 return cde_header.formatted.cdf_offset;
181 } 184 }
182 bb_error_msg_and_die("can't find file table"); 185 bb_error_msg_and_die("can't find file table");
183}; 186};
184 187
185static uint32_t read_next_cds(int count_m1, uint32_t cds_offset, cds_header_t *cds_ptr) 188static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
186{ 189{
187 off_t org; 190 off_t org;
188 191
189 org = xlseek(zip_fd, 0, SEEK_CUR); 192 org = xlseek(zip_fd, 0, SEEK_CUR);
190 193
191 if (!cds_offset) 194 if (!cdf_offset)
192 cds_offset = find_cds_offset(); 195 cdf_offset = find_cdf_offset();
193 196
194 while (count_m1-- >= 0) { 197 xlseek(zip_fd, cdf_offset + 4, SEEK_SET);
195 xlseek(zip_fd, cds_offset + 4, SEEK_SET); 198 xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN);
196 xread(zip_fd, cds_ptr->raw, CDS_HEADER_LEN); 199 FIX_ENDIANNESS_CDF(*cdf_ptr);
197 FIX_ENDIANNESS_CDS(*cds_ptr); 200 cdf_offset += 4 + CDF_HEADER_LEN
198 cds_offset += 4 + CDS_HEADER_LEN 201 + cdf_ptr->formatted.file_name_length
199 + cds_ptr->formatted.file_name_length 202 + cdf_ptr->formatted.extra_field_length
200 + cds_ptr->formatted.extra_field_length 203 + cdf_ptr->formatted.file_comment_length;
201 + cds_ptr->formatted.file_comment_length;
202 }
203 204
204 xlseek(zip_fd, org, SEEK_SET); 205 xlseek(zip_fd, org, SEEK_SET);
205 return cds_offset; 206 return cdf_offset;
206}; 207};
207#endif 208#endif
208 209
@@ -258,8 +259,7 @@ int unzip_main(int argc, char **argv)
258 smallint listing = 0; 259 smallint listing = 0;
259 smallint overwrite = O_PROMPT; 260 smallint overwrite = O_PROMPT;
260#if ENABLE_DESKTOP 261#if ENABLE_DESKTOP
261 uint32_t cds_offset; 262 uint32_t cdf_offset;
262 unsigned cds_entries;
263#endif 263#endif
264 unsigned long total_usize; 264 unsigned long total_usize;
265 unsigned long total_size; 265 unsigned long total_size;
@@ -435,20 +435,42 @@ int unzip_main(int argc, char **argv)
435 } 435 }
436 } 436 }
437 437
438/* Example of an archive with one 0-byte long file named 'z'
439 * created by Zip 2.31 on Unix:
440 * 0000 [50 4b]03 04 0a 00 00 00 00 00 42 1a b8 3c 00 00 |PK........B..<..|
441 * sig........ vneed flags compr mtime mdate crc32>
442 * 0010 00 00 00 00 00 00 00 00 00 00 01 00 15 00 7a 55 |..............zU|
443 * >..... csize...... usize...... fnlen exlen fn ex>
444 * 0020 54 09 00 03 cc d3 f9 4b cc d3 f9 4b 55 78 04 00 |T......K...KUx..|
445 * >tra_field......................................
446 * 0030 00 00 00 00[50 4b]01 02 17 03 0a 00 00 00 00 00 |....PK..........|
447 * ........... sig........ vmade vneed flags compr
448 * 0040 42 1a b8 3c 00 00 00 00 00 00 00 00 00 00 00 00 |B..<............|
449 * mtime mdate crc32...... csize...... usize......
450 * 0050 01 00 0d 00 00 00 00 00 00 00 00 00 a4 81 00 00 |................|
451 * fnlen exlen clen. dnum. iattr eattr...... relofs> (eattr = rw-r--r--)
452 * 0060 00 00 7a 55 54 05 00 03 cc d3 f9 4b 55 78 00 00 |..zUT......KUx..|
453 * >..... fn extra_field...........................
454 * 0070 [50 4b]05 06 00 00 00 00 01 00 01 00 3c 00 00 00 |PK..........<...|
455 * 0080 34 00 00 00 00 00 |4.....|
456 */
438 total_usize = 0; 457 total_usize = 0;
439 total_size = 0; 458 total_size = 0;
440 total_entries = 0; 459 total_entries = 0;
441#if ENABLE_DESKTOP 460#if ENABLE_DESKTOP
442 cds_entries = 0; 461 cdf_offset = 0;
443 cds_offset = 0;
444#endif 462#endif
445 while (1) { 463 while (1) {
446 uint32_t magic; 464 uint32_t magic;
465 mode_t dir_mode = 0777;
466#if ENABLE_DESKTOP
467 mode_t file_mode = 0666;
468#endif
447 469
448 /* Check magic number */ 470 /* Check magic number */
449 xread(zip_fd, &magic, 4); 471 xread(zip_fd, &magic, 4);
450 /* Central directory? It's at the end, so exit */ 472 /* Central directory? It's at the end, so exit */
451 if (magic == ZIP_CDS_MAGIC) 473 if (magic == ZIP_CDF_MAGIC)
452 break; 474 break;
453#if ENABLE_DESKTOP 475#if ENABLE_DESKTOP
454 /* Data descriptor? It was a streaming file, go on */ 476 /* Data descriptor? It was a streaming file, go on */
@@ -468,23 +490,29 @@ int unzip_main(int argc, char **argv)
468 bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); 490 bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
469 } 491 }
470#if !ENABLE_DESKTOP 492#if !ENABLE_DESKTOP
471 if (zip_header.formatted.flags & 0x0009) { 493 if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) {
472 bb_error_msg_and_die("zip flags 1 and 8 are not supported"); 494 bb_error_msg_and_die("zip flags 1 and 8 are not supported");
473 } 495 }
474#else 496#else
475 if (zip_header.formatted.flags & 0x0001) { 497 if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) {
476 /* 0x0001 - encrypted */ 498 /* 0x0001 - encrypted */
477 bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); 499 bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
478 } 500 }
479 if (zip_header.formatted.flags & 0x0008) { 501
480 cds_header_t cds_header; 502 {
481 /* 0x0008 - streaming. [u]cmpsize can be reliably gotten 503 cdf_header_t cdf_header;
482 * only from Central Directory. See unzip_doc.txt */ 504 cdf_offset = read_next_cdf(cdf_offset, &cdf_header);
483 cds_offset = read_next_cds(total_entries - cds_entries, cds_offset, &cds_header); 505 if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) {
484 cds_entries = total_entries + 1; 506 /* 0x0008 - streaming. [u]cmpsize can be reliably gotten
485 zip_header.formatted.crc32 = cds_header.formatted.crc32; 507 * only from Central Directory. See unzip_doc.txt */
486 zip_header.formatted.cmpsize = cds_header.formatted.cmpsize; 508 zip_header.formatted.crc32 = cdf_header.formatted.crc32;
487 zip_header.formatted.ucmpsize = cds_header.formatted.ucmpsize; 509 zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize;
510 zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize;
511 }
512 if ((cdf_header.formatted.version_made_by >> 8) == 3) {
513 /* this archive is created on Unix */
514 dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16);
515 }
488 } 516 }
489#endif 517#endif
490 518
@@ -550,7 +578,7 @@ int unzip_main(int argc, char **argv)
550 printf(" creating: %s\n", dst_fn); 578 printf(" creating: %s\n", dst_fn);
551 } 579 }
552 unzip_create_leading_dirs(dst_fn); 580 unzip_create_leading_dirs(dst_fn);
553 if (bb_make_directory(dst_fn, 0777, 0)) { 581 if (bb_make_directory(dst_fn, dir_mode, 0)) {
554 bb_error_msg_and_die("exiting"); 582 bb_error_msg_and_die("exiting");
555 } 583 }
556 } else { 584 } else {
@@ -592,7 +620,11 @@ int unzip_main(int argc, char **argv)
592 overwrite = O_ALWAYS; 620 overwrite = O_ALWAYS;
593 case 'y': /* Open file and fall into unzip */ 621 case 'y': /* Open file and fall into unzip */
594 unzip_create_leading_dirs(dst_fn); 622 unzip_create_leading_dirs(dst_fn);
623#if ENABLE_DESKTOP
624 dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode);
625#else
595 dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC); 626 dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC);
627#endif
596 case -1: /* Unzip */ 628 case -1: /* Unzip */
597 if (!quiet) { 629 if (!quiet) {
598 printf(" inflating: %s\n", dst_fn); 630 printf(" inflating: %s\n", dst_fn);