aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--archival/Config.in22
-rw-r--r--archival/Kbuild19
-rw-r--r--archival/bbunzip.c33
-rw-r--r--archival/libunarchive/Kbuild1
-rw-r--r--archival/libunarchive/unxz/README136
-rw-r--r--archival/libunarchive/unxz/xz.h212
-rw-r--r--archival/libunarchive/unxz/xz_config.h119
-rw-r--r--archival/libunarchive/unxz/xz_dec_bcj.c560
-rw-r--r--archival/libunarchive/unxz/xz_dec_lzma2.c1157
-rw-r--r--archival/libunarchive/unxz/xz_dec_stream.c787
-rw-r--r--archival/libunarchive/unxz/xz_lzma2.h204
-rw-r--r--archival/libunarchive/unxz/xz_private.h120
-rw-r--r--archival/libunarchive/unxz/xz_stream.h46
-rw-r--r--include/applets.h3
-rw-r--r--include/unarchive.h1
-rw-r--r--include/usage.h22
16 files changed, 3433 insertions, 9 deletions
diff --git a/archival/Config.in b/archival/Config.in
index 028fce32f..4f762e860 100644
--- a/archival/Config.in
+++ b/archival/Config.in
@@ -5,6 +5,12 @@
5 5
6menu "Archival Utilities" 6menu "Archival Utilities"
7 7
8config FEATURE_SEAMLESS_XZ
9 bool "Make tar, rpm, modprobe etc understand .xz data"
10 default n
11 help
12 Make tar, rpm, modprobe etc understand .xz data.
13
8config FEATURE_SEAMLESS_LZMA 14config FEATURE_SEAMLESS_LZMA
9 bool "Make tar, rpm, modprobe etc understand .lzma data" 15 bool "Make tar, rpm, modprobe etc understand .lzma data"
10 default n 16 default n
@@ -225,7 +231,7 @@ config FEATURE_TAR_CREATE
225config FEATURE_TAR_AUTODETECT 231config FEATURE_TAR_AUTODETECT
226 bool "Autodetect compressed tarballs" 232 bool "Autodetect compressed tarballs"
227 default n 233 default n
228 depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA) 234 depends on TAR && (FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA || FEATURE_SEAMLESS_XZ)
229 help 235 help
230 With this option tar can automatically detect compressed 236 With this option tar can automatically detect compressed
231 tarballs. Currently it works only on files (not pipes etc). 237 tarballs. Currently it works only on files (not pipes etc).
@@ -335,6 +341,20 @@ config LZMA
335 Enable this option if you want commands like "lzma -d" to work. 341 Enable this option if you want commands like "lzma -d" to work.
336 IOW: you'll get lzma applet, but it will always require -d option. 342 IOW: you'll get lzma applet, but it will always require -d option.
337 343
344config UNXZ
345 bool "unxz"
346 default n
347 help
348 unxz is a unlzma successor.
349
350config XZ
351 bool "Provide xz alias which supports only unpacking"
352 default n
353 depends on UNXZ
354 help
355 Enable this option if you want commands like "xz -d" to work.
356 IOW: you'll get xz applet, but it will always require -d option.
357
338config UNZIP 358config UNZIP
339 bool "unzip" 359 bool "unzip"
340 default n 360 default n
diff --git a/archival/Kbuild b/archival/Kbuild
index 53bd7e21e..3300ea90f 100644
--- a/archival/Kbuild
+++ b/archival/Kbuild
@@ -8,18 +8,21 @@ libs-y += libunarchive/
8 8
9lib-y:= 9lib-y:=
10lib-$(CONFIG_AR) += ar.o 10lib-$(CONFIG_AR) += ar.o
11lib-$(CONFIG_BUNZIP2) += bbunzip.o
12lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o
13lib-$(CONFIG_UNLZMA) += bbunzip.o
14lib-$(CONFIG_CPIO) += cpio.o 11lib-$(CONFIG_CPIO) += cpio.o
15lib-$(CONFIG_DPKG) += dpkg.o 12lib-$(CONFIG_DPKG) += dpkg.o
16lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o 13lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o
17lib-$(CONFIG_GUNZIP) += bbunzip.o
18lib-$(CONFIG_GZIP) += gzip.o bbunzip.o
19lib-$(CONFIG_LZOP) += lzop.o lzo1x_1.o lzo1x_1o.o lzo1x_d.o bbunzip.o
20lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
21lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o 14lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o
22lib-$(CONFIG_RPM) += rpm.o 15lib-$(CONFIG_RPM) += rpm.o
23lib-$(CONFIG_TAR) += tar.o 16lib-$(CONFIG_TAR) += tar.o
24lib-$(CONFIG_UNCOMPRESS) += bbunzip.o
25lib-$(CONFIG_UNZIP) += unzip.o 17lib-$(CONFIG_UNZIP) += unzip.o
18
19lib-$(CONFIG_LZOP) += lzop.o lzo1x_1.o lzo1x_1o.o lzo1x_d.o bbunzip.o
20lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
21lib-$(CONFIG_GZIP) += gzip.o bbunzip.o
22lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o
23
24lib-$(CONFIG_UNXZ) += bbunzip.o
25lib-$(CONFIG_UNLZMA) += bbunzip.o
26lib-$(CONFIG_BUNZIP2) += bbunzip.o
27lib-$(CONFIG_GUNZIP) += bbunzip.o
28lib-$(CONFIG_UNCOMPRESS) += bbunzip.o
diff --git a/archival/bbunzip.c b/archival/bbunzip.c
index 178dc63be..824b0027f 100644
--- a/archival/bbunzip.c
+++ b/archival/bbunzip.c
@@ -387,3 +387,36 @@ int uncompress_main(int argc UNUSED_PARAM, char **argv)
387} 387}
388 388
389#endif 389#endif
390
391#if ENABLE_UNXZ
392
393static
394char* make_new_name_unxz(char *filename)
395{
396 return make_new_name_generic(filename, "xz");
397}
398
399static
400IF_DESKTOP(long long) int unpack_unxz(unpack_info_t *info UNUSED_PARAM)
401{
402 return unpack_xz_stream_stdin();
403}
404
405int unxz_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
406int unxz_main(int argc UNUSED_PARAM, char **argv)
407{
408 int opts = getopt32(argv, "cfvdt");
409# if ENABLE_XZ
410 /* xz without -d or -t? */
411 if (applet_name[2] == '\0' && !(opts & (OPT_DECOMPRESS|OPT_TEST)))
412 bb_show_usage();
413# endif
414 /* xzcat? */
415 if (applet_name[2] == 'c')
416 option_mask32 |= OPT_STDOUT;
417
418 argv += optind;
419 return bbunpack(argv, make_new_name_unxz, unpack_unxz);
420}
421
422#endif
diff --git a/archival/libunarchive/Kbuild b/archival/libunarchive/Kbuild
index 11d23b25f..ed8e85793 100644
--- a/archival/libunarchive/Kbuild
+++ b/archival/libunarchive/Kbuild
@@ -49,6 +49,7 @@ lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompr
49lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o 49lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o
50lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o 50lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o
51lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o 51lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o
52lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o
52lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o 53lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o
53 54
54ifneq ($(lib-y),) 55ifneq ($(lib-y),)
diff --git a/archival/libunarchive/unxz/README b/archival/libunarchive/unxz/README
new file mode 100644
index 000000000..f79b0a404
--- /dev/null
+++ b/archival/libunarchive/unxz/README
@@ -0,0 +1,136 @@
1
2XZ Embedded
3===========
4
5 XZ Embedded is a relatively small, limited implementation of the .xz
6 file format. Currently only decoding is implemented.
7
8 XZ Embedded was written for use in the Linux kernel, but the code can
9 be easily used in other environments too, including regular userspace
10 applications.
11
12 This README contains information that is useful only when the copy
13 of XZ Embedded isn't part of the Linux kernel tree. You should also
14 read linux/Documentation/xz.txt even if you aren't using XZ Embedded
15 as part of Linux; information in that file is not repeated in this
16 README.
17
18Compiling the Linux kernel module
19
20 The xz_dec module depends on crc32 module, so make sure that you have
21 it enabled (CONFIG_CRC32).
22
23 Building the xz_dec and xz_dec_test modules without support for BCJ
24 filters:
25
26 cd linux/lib/xz
27 make -C /path/to/kernel/source \
28 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
29 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m
30
31 Building the xz_dec and xz_dec_test modules with support for BCJ
32 filters:
33
34 cd linux/lib/xz
35 make -C /path/to/kernel/source \
36 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
37 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \
38 CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
39 CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
40 CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
41
42 If you want only one or a few of the BCJ filters, omit the appropriate
43 variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
44 code shared between all BCJ filters.
45
46 Most people don't need the xz_dec_test module. You can skip building
47 it by omitting CONFIG_XZ_DEC_TEST=m from the make command line.
48
49Compiler requirements
50
51 XZ Embedded should compile as either GNU-C89 (used in the Linux
52 kernel) or with any C99 compiler. Getting the code to compile with
53 non-GNU C89 compiler or a C++ compiler should be quite easy as
54 long as there is a data type for unsigned 64-bit integer (or the
55 code is modified not to support large files, which needs some more
56 care than just using 32-bit integer instead of 64-bit).
57
58 If you use GCC, try to use a recent version. For example, on x86,
59 xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when
60 compiled with GCC 4.3.3.
61
62Embedding into userspace applications
63
64 To embed the XZ decoder, copy the following files into a single
65 directory in your source code tree:
66
67 linux/include/linux/xz.h
68 linux/lib/xz/xz_crc32.c
69 linux/lib/xz/xz_dec_lzma2.c
70 linux/lib/xz/xz_dec_stream.c
71 linux/lib/xz/xz_lzma2.h
72 linux/lib/xz/xz_private.h
73 linux/lib/xz/xz_stream.h
74 userspace/xz_config.h
75
76 Alternatively, xz.h may be placed into a different directory but then
77 that directory must be in the compiler include path when compiling
78 the .c files.
79
80 Your code should use only the functions declared in xz.h. The rest of
81 the .h files are meant only for internal use in XZ Embedded.
82
83 You may want to modify xz_config.h to be more suitable for your build
84 environment. Probably you should at least skim through it even if the
85 default file works as is.
86
87BCJ filter support
88
89 If you want support for one or more BCJ filters, you need to copy also
90 linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
91 #defines in xz_config.h or in compiler flags. You don't need these
92 #defines in the code that just uses XZ Embedded via xz.h, but having
93 them always #defined doesn't hurt either.
94
95 #define Instruction set BCJ filter endianness
96 XZ_DEC_X86 x86 or x86-64 Little endian only
97 XZ_DEC_POWERPC PowerPC Big endian only
98 XZ_DEC_IA64 Itanium (IA-64) Big or little endian
99 XZ_DEC_ARM ARM Little endian only
100 XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
101 XZ_DEC_SPARC SPARC Big or little endian
102
103 While some architectures are (partially) bi-endian, the endianness
104 setting doesn't change the endianness of the instructions on all
105 architectures. That's why Itanium and SPARC filters work for both big
106 and little endian executables (Itanium has little endian instructions
107 and SPARC has big endian instructions).
108
109 There currently is no filter for little endian PowerPC or big endian
110 ARM or ARM-Thumb. Implementing filters for them can be considered if
111 there is a need for such filters in real-world applications.
112
113Notes about shared libraries
114
115 If you are including XZ Embedded into a shared library, you very
116 probably should rename the xz_* functions to prevent symbol
117 conflicts in case your library is linked against some other library
118 or application that also has XZ Embedded in it (which may even be
119 a different version of XZ Embedded). TODO: Provide an easy way
120 to do this.
121
122 Please don't create a shared library of XZ Embedded itself unless
123 it is fine to rebuild everything depending on that shared library
124 everytime you upgrade to a newer version of XZ Embedded. There are
125 no API or ABI stability guarantees between different versions of
126 XZ Embedded.
127
128Specifying the calling convention
129
130 XZ_FUNC macro was included to support declaring functions with __init
131 in Linux. Outside Linux, it can be used to specify the calling
132 convention on systems that support multiple calling conventions.
133 For example, on Windows, you may make all functions use the stdcall
134 calling convention by defining XZ_FUNC=__stdcall when building and
135 using the functions from XZ Embedded.
136
diff --git a/archival/libunarchive/unxz/xz.h b/archival/libunarchive/unxz/xz.h
new file mode 100644
index 000000000..82f16ee22
--- /dev/null
+++ b/archival/libunarchive/unxz/xz.h
@@ -0,0 +1,212 @@
1/*
2 * XZ decompressor
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_H
12#define XZ_H
13
14#ifdef __KERNEL__
15# include <linux/stddef.h>
16# include <linux/types.h>
17#else
18# include <stddef.h>
19# include <stdint.h>
20#endif
21
22#ifndef XZ_DEBUG_MSG
23# define XZ_DEBUG_MSG(...) ((void)0)
24#endif
25
26/* In Linux, this is used to make extern functions static when needed. */
27#ifndef XZ_EXTERN
28# define XZ_EXTERN extern
29#endif
30
31/* In Linux, this is used to mark the functions with __init when needed. */
32#ifndef XZ_FUNC
33# define XZ_FUNC
34#endif
35
36/**
37 * enum xz_ret - Return codes
38 * @XZ_OK: Everything is OK so far. More input or more output
39 * space is required to continue.
40 * @XZ_STREAM_END: Operation finished successfully.
41 * @XZ_MEMLIMIT_ERROR: Not enough memory was preallocated at decoder
42 * initialization time.
43 * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic bytes).
44 * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
45 * compression options. In the decoder this means that
46 * the header CRC32 matches, but the header itself
47 * specifies something that we don't support.
48 * @XZ_DATA_ERROR: Compressed data is corrupt.
49 * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
50 * different between multi-call and single-call mode;
51 * more information below.
52 *
53 * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
54 * to XZ code cannot consume any input and cannot produce any new output.
55 * This happens when there is no new input available, or the output buffer
56 * is full while at least one output byte is still pending. Assuming your
57 * code is not buggy, you can get this error only when decoding a compressed
58 * stream that is truncated or otherwise corrupt.
59 *
60 * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
61 * is too small, or the compressed input is corrupt in a way that makes the
62 * decoder produce more output than the caller expected. When it is
63 * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
64 * is used instead of XZ_BUF_ERROR.
65 */
66enum xz_ret {
67 XZ_OK,
68 XZ_STREAM_END,
69 XZ_MEMLIMIT_ERROR,
70 XZ_FORMAT_ERROR,
71 XZ_OPTIONS_ERROR,
72 XZ_DATA_ERROR,
73 XZ_BUF_ERROR
74};
75
76/**
77 * struct xz_buf - Passing input and output buffers to XZ code
78 * @in: Beginning of the input buffer. This may be NULL if and only
79 * if in_pos is equal to in_size.
80 * @in_pos: Current position in the input buffer. This must not exceed
81 * in_size.
82 * @in_size: Size of the input buffer
83 * @out: Beginning of the output buffer. This may be NULL if and only
84 * if out_pos is equal to out_size.
85 * @out_pos: Current position in the output buffer. This must not exceed
86 * out_size.
87 * @out_size: Size of the output buffer
88 *
89 * Only the contents of the output buffer from out[out_pos] onward, and
90 * the variables in_pos and out_pos are modified by the XZ code.
91 */
92struct xz_buf {
93 const uint8_t *in;
94 size_t in_pos;
95 size_t in_size;
96
97 uint8_t *out;
98 size_t out_pos;
99 size_t out_size;
100};
101
102/**
103 * struct xz_dec - Opaque type to hold the XZ decoder state
104 */
105struct xz_dec;
106
107/**
108 * xz_dec_init() - Allocate and initialize a XZ decoder state
109 * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
110 * multi-call decoding, or special value of zero to indicate
111 * single-call decoding mode.
112 *
113 * If dict_max > 0, the decoder is initialized to work in multi-call mode.
114 * dict_max number of bytes of memory is preallocated for the LZMA2
115 * dictionary. This way there is no risk that xz_dec_run() could run out
116 * of memory, since xz_dec_run() will never allocate any memory. Instead,
117 * if the preallocated dictionary is too small for decoding the given input
118 * stream, xz_dec_run() will return XZ_MEMLIMIT_ERROR. Thus, it is important
119 * to know what kind of data will be decoded to avoid allocating excessive
120 * amount of memory for the dictionary.
121 *
122 * LZMA2 dictionary is always 2^n bytes or 2^n + 2^(n-1) bytes (the latter
123 * sizes are less common in practice). In the kernel, dictionary sizes of
124 * 64 KiB, 128 KiB, 256 KiB, 512 KiB, and 1 MiB are probably the only
125 * reasonable values.
126 *
127 * If dict_max == 0, the decoder is initialized to work in single-call mode.
128 * In single-call mode, xz_dec_run() decodes the whole stream at once. The
129 * caller must provide enough output space or the decoding will fail. The
130 * output space is used as the dictionary buffer, which is why there is
131 * no need to allocate the dictionary as part of the decoder's internal
132 * state.
133 *
134 * Because the output buffer is used as the workspace, streams encoded using
135 * a big dictionary are not a problem in single-call. It is enough that the
136 * output buffer is is big enough to hold the actual uncompressed data; it
137 * can be smaller than the dictionary size stored in the stream headers.
138 *
139 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
140 * ready to be used with xz_dec_run(). On error, xz_dec_init() returns NULL.
141 */
142XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max);
143
144/**
145 * xz_dec_run() - Run the XZ decoder
146 * @s: Decoder state allocated using xz_dec_init()
147 * @b: Input and output buffers
148 *
149 * In multi-call mode, this function may return any of the values listed in
150 * enum xz_ret.
151 *
152 * In single-call mode, this function never returns XZ_OK. If an error occurs
153 * in single-call mode (return value is not XZ_STREAM_END), b->in_pos and
154 * b->out_pos are not modified, and the contents of the output buffer from
155 * b->out[b->out_pos] onward are undefined.
156 *
157 * NOTE: In single-call mode, the contents of the output buffer are undefined
158 * also after XZ_BUF_ERROR. This is because with some filter chains, there
159 * may be a second pass over the output buffer, and this pass cannot be
160 * properly done if the output buffer is truncated. Thus, you cannot give
161 * the single-call decoder a too small buffer and then expect to get that
162 * amount valid data from the beginning of the stream. You must use the
163 * multi-call decoder if you don't want to uncompress the whole stream.
164 */
165XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b);
166
167/**
168 * xz_dec_reset() - Reset an already allocated decoder state
169 * @s: Decoder state allocated using xz_dec_init()
170 *
171 * This function can be used to reset the multi-call decoder state without
172 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
173 *
174 * In single-call mode, xz_dec_reset() is always called in the beginning of
175 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
176 * multi-call mode.
177 */
178XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s);
179
180/**
181 * xz_dec_end() - Free the memory allocated for the decoder state
182 * @s: Decoder state allocated using xz_dec_init(). If s is NULL,
183 * this function does nothing.
184 */
185XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
186
187/*
188 * Standalone build (userspace build or in-kernel build for boot time use)
189 * needs a CRC32 implementation. For normal in-kernel use, kernel's own
190 * CRC32 module is used instead, and users of this module don't need to
191 * care about the functions below.
192 */
193#if !defined(__KERNEL__) || defined(XZ_INTERNAL_CRC32)
194/*
195 * This must be called before any other xz_* function to initialize
196 * the CRC32 lookup table.
197 */
198#ifndef xz_crc32_init
199XZ_EXTERN void XZ_FUNC xz_crc32_init(uint32_t *crc32_table);
200#endif
201
202/*
203 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
204 * calculation, the third argument must be zero. To continue the calculation,
205 * the previously returned value is passed as the third argument.
206 */
207#ifndef xz_crc32
208XZ_EXTERN uint32_t XZ_FUNC xz_crc32(uint32_t *crc32_table,
209 const uint8_t *buf, size_t size, uint32_t crc);
210#endif
211#endif
212#endif
diff --git a/archival/libunarchive/unxz/xz_config.h b/archival/libunarchive/unxz/xz_config.h
new file mode 100644
index 000000000..3259815f0
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_config.h
@@ -0,0 +1,119 @@
1/*
2 * Private includes and definitions for userspace use of XZ Embedded
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_CONFIG_H
11#define XZ_CONFIG_H
12
13/* Uncomment as needed to enable BCJ filter decoders. */
14/* #define XZ_DEC_X86 */
15/* #define XZ_DEC_POWERPC */
16/* #define XZ_DEC_IA64 */
17/* #define XZ_DEC_ARM */
18/* #define XZ_DEC_ARMTHUMB */
19/* #define XZ_DEC_SPARC */
20
21#include <stdbool.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "xz.h"
26
27#define kmalloc(size, flags) malloc(size)
28#define kfree(ptr) free(ptr)
29#define vmalloc(size) malloc(size)
30#define vfree(ptr) free(ptr)
31
32#define memeq(a, b, size) (memcmp(a, b, size) == 0)
33#define memzero(buf, size) memset(buf, 0, size)
34
35#define min(x, y) ((x) < (y) ? (x) : (y))
36#define min_t(type, x, y) min(x, y)
37
38/*
39 * Some functions have been marked with __always_inline to keep the
40 * performance reasonable even when the compiler is optimizing for
41 * small code size. You may be able to save a few bytes by #defining
42 * __always_inline to plain inline, but don't complain if the code
43 * becomes slow.
44 *
45 * NOTE: System headers on GNU/Linux may #define this macro already,
46 * so if you want to change it, it you need to #undef it first.
47 */
48#ifndef __always_inline
49# ifdef __GNUC__
50# define __always_inline \
51 inline __attribute__((__always_inline__))
52# else
53# define __always_inline inline
54# endif
55#endif
56
57/*
58 * Some functions are marked to never be inlined to reduce stack usage.
59 * If you don't care about stack usage, you may want to modify this so
60 * that noinline_for_stack is #defined to be empty even when using GCC.
61 * Doing so may save a few bytes in binary size.
62 */
63#ifndef noinline_for_stack
64# ifdef __GNUC__
65# define noinline_for_stack __attribute__((__noinline__))
66# else
67# define noinline_for_stack
68# endif
69#endif
70
71/* Inline functions to access unaligned unsigned 32-bit integers */
72#ifndef get_unaligned_le32
73static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf)
74{
75 return (uint32_t)buf[0]
76 | ((uint32_t)buf[1] << 8)
77 | ((uint32_t)buf[2] << 16)
78 | ((uint32_t)buf[3] << 24);
79}
80#endif
81
82#ifndef get_unaligned_be32
83static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf)
84{
85 return (uint32_t)(buf[0] << 24)
86 | ((uint32_t)buf[1] << 16)
87 | ((uint32_t)buf[2] << 8)
88 | (uint32_t)buf[3];
89}
90#endif
91
92#ifndef put_unaligned_le32
93static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf)
94{
95 buf[0] = (uint8_t)val;
96 buf[1] = (uint8_t)(val >> 8);
97 buf[2] = (uint8_t)(val >> 16);
98 buf[3] = (uint8_t)(val >> 24);
99}
100#endif
101
102#ifndef put_unaligned_be32
103static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
104{
105 buf[0] = (uint8_t)(val >> 24);
106 buf[1] = (uint8_t)(val >> 16);
107 buf[2] = (uint8_t)(val >> 8);
108 buf[3] = (uint8_t)val;
109}
110#endif
111
112/*
113 * Use get_unaligned_le32() also for aligned access for simplicity. On
114 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
115 * could save a few bytes in code size.
116 */
117#define get_le32 get_unaligned_le32
118
119#endif
diff --git a/archival/libunarchive/unxz/xz_dec_bcj.c b/archival/libunarchive/unxz/xz_dec_bcj.c
new file mode 100644
index 000000000..d4b6ef751
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_bcj.c
@@ -0,0 +1,560 @@
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13struct xz_dec_bcj {
14 /* Type of the BCJ filter being used */
15 enum {
16 BCJ_X86 = 4, /* x86 or x86-64 */
17 BCJ_POWERPC = 5, /* Big endian only */
18 BCJ_IA64 = 6, /* Big or little endian */
19 BCJ_ARM = 7, /* Little endian only */
20 BCJ_ARMTHUMB = 8, /* Little endian only */
21 BCJ_SPARC = 9 /* Big or little endian */
22 } type;
23
24 /*
25 * Return value of the next filter in the chain. We need to preserve
26 * this information across calls, because we must not call the next
27 * filter anymore once it has returned XZ_STREAM_END.
28 */
29 enum xz_ret ret;
30
31 /* True if we are operating in single-call mode. */
32 bool single_call;
33
34 /*
35 * Absolute position relative to the beginning of the uncompressed
36 * data (in a single .xz Block). We care only about the lowest 32
37 * bits so this doesn't need to be uint64_t even with big files.
38 */
39 uint32_t pos;
40
41 /* x86 filter state */
42 uint32_t x86_prev_mask;
43
44 /* Temporary space to hold the variables from struct xz_buf */
45 uint8_t *out;
46 size_t out_pos;
47 size_t out_size;
48
49 struct {
50 /* Amount of already filtered data in the beginning of buf */
51 size_t filtered;
52
53 /* Total amount of data currently stored in buf */
54 size_t size;
55
56 /*
57 * Buffer to hold a mix of filtered and unfiltered data. This
58 * needs to be big enough to hold Alignment + 2 * Look-ahead:
59 *
60 * Type Alignment Look-ahead
61 * x86 1 4
62 * PowerPC 4 0
63 * IA-64 16 0
64 * ARM 4 0
65 * ARM-Thumb 2 2
66 * SPARC 4 0
67 */
68 uint8_t buf[16];
69 } temp;
70};
71
72#ifdef XZ_DEC_X86
73/*
74 * This is macro used to test the most significant byte of a memory address
75 * in an x86 instruction.
76 */
77#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
78
79static noinline_for_stack size_t XZ_FUNC bcj_x86(
80 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
81{
82 static const bool mask_to_allowed_status[8]
83 = { true, true, true, false, true, false, false, false };
84
85 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
86
87 size_t i;
88 size_t prev_pos = (size_t)-1;
89 uint32_t prev_mask = s->x86_prev_mask;
90 uint32_t src;
91 uint32_t dest;
92 uint32_t j;
93 uint8_t b;
94
95 if (size <= 4)
96 return 0;
97
98 size -= 4;
99 for (i = 0; i < size; ++i) {
100 if ((buf[i] & 0xFE) != 0xE8)
101 continue;
102
103 prev_pos = i - prev_pos;
104 if (prev_pos > 3) {
105 prev_mask = 0;
106 } else {
107 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
108 if (prev_mask != 0) {
109 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
110 if (!mask_to_allowed_status[prev_mask]
111 || bcj_x86_test_msbyte(b)) {
112 prev_pos = i;
113 prev_mask = (prev_mask << 1) | 1;
114 continue;
115 }
116 }
117 }
118
119 prev_pos = i;
120
121 if (bcj_x86_test_msbyte(buf[i + 4])) {
122 src = get_unaligned_le32(buf + i + 1);
123 while (true) {
124 dest = src - (s->pos + (uint32_t)i + 5);
125 if (prev_mask == 0)
126 break;
127
128 j = mask_to_bit_num[prev_mask] * 8;
129 b = (uint8_t)(dest >> (24 - j));
130 if (!bcj_x86_test_msbyte(b))
131 break;
132
133 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
134 }
135
136 dest &= 0x01FFFFFF;
137 dest |= (uint32_t)0 - (dest & 0x01000000);
138 put_unaligned_le32(dest, buf + i + 1);
139 i += 4;
140 } else {
141 prev_mask = (prev_mask << 1) | 1;
142 }
143 }
144
145 prev_pos = i - prev_pos;
146 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
147 return i;
148}
149#endif
150
151#ifdef XZ_DEC_POWERPC
152static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
153 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
154{
155 size_t i;
156 uint32_t instr;
157
158 for (i = 0; i + 4 <= size; i += 4) {
159 instr = get_unaligned_be32(buf + i);
160 if ((instr & 0xFC000003) == 0x48000001) {
161 instr &= 0x03FFFFFC;
162 instr -= s->pos + (uint32_t)i;
163 instr &= 0x03FFFFFC;
164 instr |= 0x48000001;
165 put_unaligned_be32(instr, buf + i);
166 }
167 }
168
169 return i;
170}
171#endif
172
173#ifdef XZ_DEC_IA64
174static noinline_for_stack size_t XZ_FUNC bcj_ia64(
175 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
176{
177 static const uint8_t branch_table[32] = {
178 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0,
180 4, 4, 6, 6, 0, 0, 7, 7,
181 4, 4, 0, 0, 4, 4, 0, 0
182 };
183
184 /*
185 * The local variables take a little bit stack space, but it's less
186 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
187 * stack usage here without doing that for the LZMA2 decoder too.
188 */
189
190 /* Loop counters */
191 size_t i;
192 size_t j;
193
194 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
195 uint32_t slot;
196
197 /* Bitwise offset of the instruction indicated by slot */
198 uint32_t bit_pos;
199
200 /* bit_pos split into byte and bit parts */
201 uint32_t byte_pos;
202 uint32_t bit_res;
203
204 /* Address part of an instruction */
205 uint32_t addr;
206
207 /* Mask used to detect which instructions to convert */
208 uint32_t mask;
209
210 /* 41-bit instruction stored somewhere in the lowest 48 bits */
211 uint64_t instr;
212
213 /* Instruction normalized with bit_res for easier manipulation */
214 uint64_t norm;
215
216 for (i = 0; i + 16 <= size; i += 16) {
217 mask = branch_table[buf[i] & 0x1F];
218 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
219 if (((mask >> slot) & 1) == 0)
220 continue;
221
222 byte_pos = bit_pos >> 3;
223 bit_res = bit_pos & 7;
224 instr = 0;
225 for (j = 0; j < 6; ++j)
226 instr |= (uint64_t)(buf[i + j + byte_pos])
227 << (8 * j);
228
229 norm = instr >> bit_res;
230
231 if (((norm >> 37) & 0x0F) == 0x05
232 && ((norm >> 9) & 0x07) == 0) {
233 addr = (norm >> 13) & 0x0FFFFF;
234 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
235 addr <<= 4;
236 addr -= s->pos + (uint32_t)i;
237 addr >>= 4;
238
239 norm &= ~((uint64_t)0x8FFFFF << 13);
240 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
241 norm |= (uint64_t)(addr & 0x100000)
242 << (36 - 20);
243
244 instr &= (1 << bit_res) - 1;
245 instr |= norm << bit_res;
246
247 for (j = 0; j < 6; j++)
248 buf[i + j + byte_pos]
249 = (uint8_t)(instr >> (8 * j));
250 }
251 }
252 }
253
254 return i;
255}
256#endif
257
258#ifdef XZ_DEC_ARM
259static noinline_for_stack size_t XZ_FUNC bcj_arm(
260 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
261{
262 size_t i;
263 uint32_t addr;
264
265 for (i = 0; i + 4 <= size; i += 4) {
266 if (buf[i + 3] == 0xEB) {
267 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
268 | ((uint32_t)buf[i + 2] << 16);
269 addr <<= 2;
270 addr -= s->pos + (uint32_t)i + 8;
271 addr >>= 2;
272 buf[i] = (uint8_t)addr;
273 buf[i + 1] = (uint8_t)(addr >> 8);
274 buf[i + 2] = (uint8_t)(addr >> 16);
275 }
276 }
277
278 return i;
279}
280#endif
281
282#ifdef XZ_DEC_ARMTHUMB
283static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
284 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
285{
286 size_t i;
287 uint32_t addr;
288
289 for (i = 0; i + 4 <= size; i += 2) {
290 if ((buf[i + 1] & 0xF8) == 0xF0
291 && (buf[i + 3] & 0xF8) == 0xF8) {
292 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
293 | ((uint32_t)buf[i] << 11)
294 | (((uint32_t)buf[i + 3] & 0x07) << 8)
295 | (uint32_t)buf[i + 2];
296 addr <<= 1;
297 addr -= s->pos + (uint32_t)i + 4;
298 addr >>= 1;
299 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
300 buf[i] = (uint8_t)(addr >> 11);
301 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
302 buf[i + 2] = (uint8_t)addr;
303 i += 2;
304 }
305 }
306
307 return i;
308}
309#endif
310
311#ifdef XZ_DEC_SPARC
312static noinline_for_stack size_t XZ_FUNC bcj_sparc(
313 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
314{
315 size_t i;
316 uint32_t instr;
317
318 for (i = 0; i + 4 <= size; i += 4) {
319 instr = get_unaligned_be32(buf + i);
320 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
321 instr <<= 2;
322 instr -= s->pos + (uint32_t)i;
323 instr >>= 2;
324 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
325 | 0x40000000 | (instr & 0x3FFFFF);
326 put_unaligned_be32(instr, buf + i);
327 }
328 }
329
330 return i;
331}
332#endif
333
334#ifdef XZ_DEC_BCJ
335/*
336 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
337 * of data that got filtered.
338 *
339 * NOTE: This is implemented as a switch statement to avoid using function
340 * pointers, which could be problematic in the kernel boot code, which must
341 * avoid pointers to static data (at least on x86).
342 */
343static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
344 uint8_t *buf, size_t *pos, size_t size)
345{
346 size_t filtered;
347
348 buf += *pos;
349 size -= *pos;
350
351 switch (s->type) {
352#ifdef XZ_DEC_X86
353 case BCJ_X86:
354 filtered = bcj_x86(s, buf, size);
355 break;
356#endif
357#ifdef XZ_DEC_POWERPC
358 case BCJ_POWERPC:
359 filtered = bcj_powerpc(s, buf, size);
360 break;
361#endif
362#ifdef XZ_DEC_IA64
363 case BCJ_IA64:
364 filtered = bcj_ia64(s, buf, size);
365 break;
366#endif
367#ifdef XZ_DEC_ARM
368 case BCJ_ARM:
369 filtered = bcj_arm(s, buf, size);
370 break;
371#endif
372#ifdef XZ_DEC_ARMTHUMB
373 case BCJ_ARMTHUMB:
374 filtered = bcj_armthumb(s, buf, size);
375 break;
376#endif
377#ifdef XZ_DEC_SPARC
378 case BCJ_SPARC:
379 filtered = bcj_sparc(s, buf, size);
380 break;
381#endif
382 default:
383 /* Never reached but silence compiler warnings. */
384 filtered = 0;
385 break;
386 }
387
388 *pos += filtered;
389 s->pos += filtered;
390}
391#endif
392
393#ifdef XZ_DEC_BCJ
394/*
395 * Flush pending filtered data from temp to the output buffer.
396 * Move the remaining mixture of possibly filtered and unfiltered
397 * data to the beginning of temp.
398 */
399static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400{
401 size_t copy_size;
402
403 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405 b->out_pos += copy_size;
406
407 s->temp.filtered -= copy_size;
408 s->temp.size -= copy_size;
409 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410}
411
412/*
413 * The BCJ filter functions are primitive in sense that they process the
414 * data in chunks of 1-16 bytes. To hide this issue, this function does
415 * some buffering.
416 */
417XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
418 struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
419{
420 size_t out_start;
421
422 /*
423 * Flush pending already filtered data to the output buffer. Return
424 * immediatelly if we couldn't flush everything, or if the next
425 * filter in the chain had already returned XZ_STREAM_END.
426 */
427 if (s->temp.filtered > 0) {
428 bcj_flush(s, b);
429 if (s->temp.filtered > 0)
430 return XZ_OK;
431
432 if (s->ret == XZ_STREAM_END)
433 return XZ_STREAM_END;
434 }
435
436 /*
437 * If we have more output space than what is currently pending in
438 * temp, copy the unfiltered data from temp to the output buffer
439 * and try to fill the output buffer by decoding more data from the
440 * next filter in the chain. Apply the BCJ filter on the new data
441 * in the output buffer. If everything cannot be filtered, copy it
442 * to temp and rewind the output buffer position accordingly.
443 */
444 if (s->temp.size < b->out_size - b->out_pos) {
445 out_start = b->out_pos;
446 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
447 b->out_pos += s->temp.size;
448
449 s->ret = xz_dec_lzma2_run(lzma2, b);
450 if (s->ret != XZ_STREAM_END
451 && (s->ret != XZ_OK || s->single_call))
452 return s->ret;
453
454 bcj_apply(s, b->out, &out_start, b->out_pos);
455
456 /*
457 * As an exception, if the next filter returned XZ_STREAM_END,
458 * we can do that too, since the last few bytes that remain
459 * unfiltered are meant to remain unfiltered.
460 */
461 if (s->ret == XZ_STREAM_END)
462 return XZ_STREAM_END;
463
464 s->temp.size = b->out_pos - out_start;
465 b->out_pos -= s->temp.size;
466 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
467 }
468
469 /*
470 * If we have unfiltered data in temp, try to fill by decoding more
471 * data from the next filter. Apply the BCJ filter on temp. Then we
472 * hopefully can fill the actual output buffer by copying filtered
473 * data from temp. A mix of filtered and unfiltered data may be left
474 * in temp; it will be taken care on the next call to this function.
475 */
476 if (s->temp.size > 0) {
477 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
478 s->out = b->out;
479 s->out_pos = b->out_pos;
480 s->out_size = b->out_size;
481 b->out = s->temp.buf;
482 b->out_pos = s->temp.size;
483 b->out_size = sizeof(s->temp.buf);
484
485 s->ret = xz_dec_lzma2_run(lzma2, b);
486
487 s->temp.size = b->out_pos;
488 b->out = s->out;
489 b->out_pos = s->out_pos;
490 b->out_size = s->out_size;
491
492 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
493 return s->ret;
494
495 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
496
497 /*
498 * If the next filter returned XZ_STREAM_END, we mark that
499 * everything is filtered, since the last unfiltered bytes
500 * of the stream are meant to be left as is.
501 */
502 if (s->ret == XZ_STREAM_END)
503 s->temp.filtered = s->temp.size;
504
505 bcj_flush(s, b);
506 if (s->temp.filtered > 0)
507 return XZ_OK;
508 }
509
510 return s->ret;
511}
512
513XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
514{
515 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
516 if (s != NULL)
517 s->single_call = single_call;
518
519 return s;
520}
521
522XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
523 struct xz_dec_bcj *s, uint8_t id)
524{
525 switch (id) {
526#ifdef XZ_DEC_X86
527 case BCJ_X86:
528#endif
529#ifdef XZ_DEC_POWERPC
530 case BCJ_POWERPC:
531#endif
532#ifdef XZ_DEC_IA64
533 case BCJ_IA64:
534#endif
535#ifdef XZ_DEC_ARM
536 case BCJ_ARM:
537#endif
538#ifdef XZ_DEC_ARMTHUMB
539 case BCJ_ARMTHUMB:
540#endif
541#ifdef XZ_DEC_SPARC
542 case BCJ_SPARC:
543#endif
544 break;
545
546 default:
547 /* Unsupported Filter ID */
548 return XZ_OPTIONS_ERROR;
549 }
550
551 s->type = id;
552 s->ret = XZ_OK;
553 s->pos = 0;
554 s->x86_prev_mask = 0;
555 s->temp.filtered = 0;
556 s->temp.size = 0;
557
558 return XZ_OK;
559}
560#endif
diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c
new file mode 100644
index 000000000..890141b7c
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_lzma2.c
@@ -0,0 +1,1157 @@
1/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= allocated
38 *
39 * Most of these variables are size_t to support single-call mode,
40 * in which the dictionary variables address the actual output
41 * buffer directly.
42 */
43struct dictionary {
44 /* Beginning of the history buffer */
45 uint8_t *buf;
46
47 /* Old position in buf (before decoding more data) */
48 size_t start;
49
50 /* Position in buf */
51 size_t pos;
52
53 /*
54 * How full dictionary is. This is used to detect corrupt input that
55 * would read beyond the beginning of the uncompressed stream.
56 */
57 size_t full;
58
59 /* Write limit; we don't write to buf[limit] or later bytes. */
60 size_t limit;
61
62 /*
63 * End of the dictionary buffer. In multi-call mode, this is
64 * the same as the dictionary size. In single-call mode, this
65 * indicates the size of the output buffer.
66 */
67 size_t end;
68
69 /*
70 * Size of the dictionary as specified in Block Header. This is used
71 * together with "full" to detect corrupt input that would make us
72 * read beyond the beginning of the uncompressed stream.
73 */
74 uint32_t size;
75
76 /*
77 * Amount of memory allocated for the dictionary. A special
78 * value of zero indicates that we are in single-call mode,
79 * where the output buffer works as the dictionary.
80 */
81 uint32_t allocated;
82};
83
84/* Range decoder */
85struct rc_dec {
86 uint32_t range;
87 uint32_t code;
88
89 /*
90 * Number of initializing bytes remaining to be read
91 * by rc_read_init().
92 */
93 uint32_t init_bytes_left;
94
95 /*
96 * Buffer from which we read our input. It can be either
97 * temp.buf or the caller-provided input buffer.
98 */
99 const uint8_t *in;
100 size_t in_pos;
101 size_t in_limit;
102};
103
104/* Probabilities for a length decoder. */
105struct lzma_len_dec {
106 /* Probability of match length being at least 10 */
107 uint16_t choice;
108
109 /* Probability of match length being at least 18 */
110 uint16_t choice2;
111
112 /* Probabilities for match lengths 2-9 */
113 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
114
115 /* Probabilities for match lengths 10-17 */
116 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
117
118 /* Probabilities for match lengths 18-273 */
119 uint16_t high[LEN_HIGH_SYMBOLS];
120};
121
122struct lzma_dec {
123 /*
124 * LZMA properties or related bit masks (number of literal
125 * context bits, a mask dervied from the number of literal
126 * position bits, and a mask dervied from the number
127 * position bits)
128 */
129 uint32_t lc;
130 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
131 uint32_t pos_mask; /* (1 << pb) - 1 */
132
133 /* Types of the most recently seen LZMA symbols */
134 enum lzma_state state;
135
136 /* Distances of latest four matches */
137 uint32_t rep0;
138 uint32_t rep1;
139 uint32_t rep2;
140 uint32_t rep3;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
149 uint16_t is_match[STATES][POS_STATES_MAX];
150
151 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
152 uint16_t is_rep[STATES];
153
154 /*
155 * If 0, distance of a repeated match is rep0.
156 * Otherwise check is_rep1.
157 */
158 uint16_t is_rep0[STATES];
159
160 /*
161 * If 0, distance of a repeated match is rep1.
162 * Otherwise check is_rep2.
163 */
164 uint16_t is_rep1[STATES];
165
166 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
167 uint16_t is_rep2[STATES];
168
169 /*
170 * If 1, the repeated match has length of one byte. Otherwise
171 * the length is decoded from rep_len_decoder.
172 */
173 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
174
175 /*
176 * Probability tree for the highest two bits of the match
177 * distance. There is a separate probability tree for match
178 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
179 */
180 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
181
182 /*
183 * Probility trees for additional bits for match distance
184 * when the distance is in the range [4, 127].
185 */
186 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
187
188 /*
189 * Probability tree for the lowest four bits of a match
190 * distance that is equal to or greater than 128.
191 */
192 uint16_t dist_align[ALIGN_SIZE];
193
194 /* Length of a normal match */
195 struct lzma_len_dec match_len_dec;
196
197 /* Length of a repeated match */
198 struct lzma_len_dec rep_len_dec;
199
200 /* Probabilities of literals */
201 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
202};
203
204struct xz_dec_lzma2 {
205 /* LZMA2 */
206 struct {
207 /* Position in xz_dec_lzma2_run(). */
208 enum lzma2_seq {
209 SEQ_CONTROL,
210 SEQ_UNCOMPRESSED_1,
211 SEQ_UNCOMPRESSED_2,
212 SEQ_COMPRESSED_0,
213 SEQ_COMPRESSED_1,
214 SEQ_PROPERTIES,
215 SEQ_LZMA_PREPARE,
216 SEQ_LZMA_RUN,
217 SEQ_COPY
218 } sequence;
219
220 /*
221 * Next position after decoding the compressed size of
222 * the chunk.
223 */
224 enum lzma2_seq next_sequence;
225
226 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
227 uint32_t uncompressed;
228
229 /*
230 * Compressed size of LZMA chunk or compressed/uncompressed
231 * size of uncompressed chunk (64 KiB at maximum)
232 */
233 uint32_t compressed;
234
235 /*
236 * True if dictionary reset is needed. This is false before
237 * the first chunk (LZMA or uncompressed).
238 */
239 bool need_dict_reset;
240
241 /*
242 * True if new LZMA properties are needed. This is false
243 * before the first LZMA chunk.
244 */
245 bool need_props;
246 } lzma2;
247
248 /*
249 * Temporary buffer which holds small number of input bytes between
250 * decoder calls. See lzma2_lzma() for details.
251 */
252 struct {
253 uint32_t size;
254 uint8_t buf[3 * LZMA_IN_REQUIRED];
255 } temp;
256
257 struct dictionary dict;
258 struct rc_dec rc;
259 struct lzma_dec lzma;
260};
261
262/**************
263 * Dictionary *
264 **************/
265
266/*
267 * Reset the dictionary state. When in single-call mode, set up the beginning
268 * of the dictionary to point to the actual output buffer.
269 */
270static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
271{
272 if (dict->allocated == 0) {
273 dict->buf = b->out + b->out_pos;
274 dict->end = b->out_size - b->out_pos;
275 }
276
277 dict->start = 0;
278 dict->pos = 0;
279 dict->limit = 0;
280 dict->full = 0;
281}
282
283/* Set dictionary write limit */
284static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max)
285{
286 if (dict->end - dict->pos <= out_max)
287 dict->limit = dict->end;
288 else
289 dict->limit = dict->pos + out_max;
290}
291
292/* Return true if at least one byte can be written into the dictionary. */
293static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict)
294{
295 return dict->pos < dict->limit;
296}
297
298/*
299 * Get a byte from the dictionary at the given distance. The distance is
300 * assumed to valid, or as a special case, zero when the dictionary is
301 * still empty. This special case is needed for single-call decoding to
302 * avoid writing a '\0' to the end of the destination buffer.
303 */
304static __always_inline uint32_t XZ_FUNC dict_get(
305 const struct dictionary *dict, uint32_t dist)
306{
307 size_t offset = dict->pos - dist - 1;
308
309 if (dist >= dict->pos)
310 offset += dict->end;
311
312 return dict->full > 0 ? dict->buf[offset] : 0;
313}
314
315/*
316 * Put one byte into the dictionary. It is assumed that there is space for it.
317 */
318static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte)
319{
320 dict->buf[dict->pos++] = byte;
321
322 if (dict->full < dict->pos)
323 dict->full = dict->pos;
324}
325
326/*
327 * Repeat given number of bytes from the given distance. If the distance is
328 * invalid, false is returned. On success, true is returned and *len is
329 * updated to indicate how many bytes were left to be repeated.
330 */
331static bool XZ_FUNC dict_repeat(
332 struct dictionary *dict, uint32_t *len, uint32_t dist)
333{
334 size_t back;
335 uint32_t left;
336
337 if (dist >= dict->full || dist >= dict->size)
338 return false;
339
340 left = min_t(size_t, dict->limit - dict->pos, *len);
341 *len -= left;
342
343 back = dict->pos - dist - 1;
344 if (dist >= dict->pos)
345 back += dict->end;
346
347 do {
348 dict->buf[dict->pos++] = dict->buf[back++];
349 if (back == dict->end)
350 back = 0;
351 } while (--left > 0);
352
353 if (dict->full < dict->pos)
354 dict->full = dict->pos;
355
356 return true;
357}
358
359/* Copy uncompressed data as is from input to dictionary and output buffers. */
360static void XZ_FUNC dict_uncompressed(
361 struct dictionary *dict, struct xz_buf *b, uint32_t *left)
362{
363 size_t copy_size;
364
365 while (*left > 0 && b->in_pos < b->in_size
366 && b->out_pos < b->out_size) {
367 copy_size = min(b->in_size - b->in_pos,
368 b->out_size - b->out_pos);
369 if (copy_size > dict->end - dict->pos)
370 copy_size = dict->end - dict->pos;
371 if (copy_size > *left)
372 copy_size = *left;
373
374 *left -= copy_size;
375
376 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
377 dict->pos += copy_size;
378
379 if (dict->full < dict->pos)
380 dict->full = dict->pos;
381
382 if (dict->allocated != 0) {
383 if (dict->pos == dict->end)
384 dict->pos = 0;
385
386 memcpy(b->out + b->out_pos, b->in + b->in_pos,
387 copy_size);
388 }
389
390 dict->start = dict->pos;
391
392 b->out_pos += copy_size;
393 b->in_pos += copy_size;
394
395 }
396}
397
398/*
399 * Flush pending data from dictionary to b->out. It is assumed that there is
400 * enough space in b->out. This is guaranteed because caller uses dict_limit()
401 * before decoding data into the dictionary.
402 */
403static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b)
404{
405 size_t copy_size = dict->pos - dict->start;
406
407 if (dict->allocated != 0) {
408 if (dict->pos == dict->end)
409 dict->pos = 0;
410
411 memcpy(b->out + b->out_pos, dict->buf + dict->start,
412 copy_size);
413 }
414
415 dict->start = dict->pos;
416 b->out_pos += copy_size;
417 return copy_size;
418}
419
420/*****************
421 * Range decoder *
422 *****************/
423
424/* Reset the range decoder. */
425static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc)
426{
427 rc->range = (uint32_t)-1;
428 rc->code = 0;
429 rc->init_bytes_left = RC_INIT_BYTES;
430}
431
432/*
433 * Read the first five initial bytes into rc->code if they haven't been
434 * read already. (Yes, the first byte gets completely ignored.)
435 */
436static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b)
437{
438 while (rc->init_bytes_left > 0) {
439 if (b->in_pos == b->in_size)
440 return false;
441
442 rc->code = (rc->code << 8) + b->in[b->in_pos++];
443 --rc->init_bytes_left;
444 }
445
446 return true;
447}
448
449/* Return true if there may not be enough input for the next decoding loop. */
450static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc)
451{
452 return rc->in_pos > rc->in_limit;
453}
454
455/*
456 * Return true if it is possible (from point of view of range decoder) that
457 * we have reached the end of the LZMA chunk.
458 */
459static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc)
460{
461 return rc->code == 0;
462}
463
464/* Read the next input byte if needed. */
465static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc)
466{
467 if (rc->range < RC_TOP_VALUE) {
468 rc->range <<= RC_SHIFT_BITS;
469 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
470 }
471}
472
473/*
474 * Decode one bit. In some versions, this function has been splitted in three
475 * functions so that the compiler is supposed to be able to more easily avoid
476 * an extra branch. In this particular version of the LZMA decoder, this
477 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
478 * on x86). Using a non-splitted version results in nicer looking code too.
479 *
480 * NOTE: This must return an int. Do not make it return a bool or the speed
481 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
482 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
483 */
484static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob)
485{
486 uint32_t bound;
487 int bit;
488
489 rc_normalize(rc);
490 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
491 if (rc->code < bound) {
492 rc->range = bound;
493 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
494 bit = 0;
495 } else {
496 rc->range -= bound;
497 rc->code -= bound;
498 *prob -= *prob >> RC_MOVE_BITS;
499 bit = 1;
500 }
501
502 return bit;
503}
504
505/* Decode a bittree starting from the most significant bit. */
506static __always_inline uint32_t XZ_FUNC rc_bittree(
507 struct rc_dec *rc, uint16_t *probs, uint32_t limit)
508{
509 uint32_t symbol = 1;
510
511 do {
512 if (rc_bit(rc, &probs[symbol]))
513 symbol = (symbol << 1) + 1;
514 else
515 symbol <<= 1;
516 } while (symbol < limit);
517
518 return symbol;
519}
520
521/* Decode a bittree starting from the least significant bit. */
522static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc,
523 uint16_t *probs, uint32_t *dest, uint32_t limit)
524{
525 uint32_t symbol = 1;
526 uint32_t i = 0;
527
528 do {
529 if (rc_bit(rc, &probs[symbol])) {
530 symbol = (symbol << 1) + 1;
531 *dest += 1 << i;
532 } else {
533 symbol <<= 1;
534 }
535 } while (++i < limit);
536}
537
538/* Decode direct bits (fixed fifty-fifty probability) */
539static inline void XZ_FUNC rc_direct(
540 struct rc_dec *rc, uint32_t *dest, uint32_t limit)
541{
542 uint32_t mask;
543
544 do {
545 rc_normalize(rc);
546 rc->range >>= 1;
547 rc->code -= rc->range;
548 mask = (uint32_t)0 - (rc->code >> 31);
549 rc->code += rc->range & mask;
550 *dest = (*dest << 1) + (mask + 1);
551 } while (--limit > 0);
552}
553
554/********
555 * LZMA *
556 ********/
557
558/* Get pointer to literal coder probability array. */
559static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s)
560{
561 uint32_t prev_byte = dict_get(&s->dict, 0);
562 uint32_t low = prev_byte >> (8 - s->lzma.lc);
563 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
564 return s->lzma.literal[low + high];
565}
566
567/* Decode a literal (one 8-bit byte) */
568static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s)
569{
570 uint16_t *probs;
571 uint32_t symbol;
572 uint32_t match_byte;
573 uint32_t match_bit;
574 uint32_t offset;
575 uint32_t i;
576
577 probs = lzma_literal_probs(s);
578
579 if (lzma_state_is_literal(s->lzma.state)) {
580 symbol = rc_bittree(&s->rc, probs, 0x100);
581 } else {
582 symbol = 1;
583 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
584 offset = 0x100;
585
586 do {
587 match_bit = match_byte & offset;
588 match_byte <<= 1;
589 i = offset + match_bit + symbol;
590
591 if (rc_bit(&s->rc, &probs[i])) {
592 symbol = (symbol << 1) + 1;
593 offset &= match_bit;
594 } else {
595 symbol <<= 1;
596 offset &= ~match_bit;
597 }
598 } while (symbol < 0x100);
599 }
600
601 dict_put(&s->dict, (uint8_t)symbol);
602 lzma_state_literal(&s->lzma.state);
603}
604
605/* Decode the length of the match into s->lzma.len. */
606static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
607 uint32_t pos_state)
608{
609 uint16_t *probs;
610 uint32_t limit;
611
612 if (!rc_bit(&s->rc, &l->choice)) {
613 probs = l->low[pos_state];
614 limit = LEN_LOW_SYMBOLS;
615 s->lzma.len = MATCH_LEN_MIN;
616 } else {
617 if (!rc_bit(&s->rc, &l->choice2)) {
618 probs = l->mid[pos_state];
619 limit = LEN_MID_SYMBOLS;
620 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
621 } else {
622 probs = l->high;
623 limit = LEN_HIGH_SYMBOLS;
624 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
625 + LEN_MID_SYMBOLS;
626 }
627 }
628
629 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
630}
631
632/* Decode a match. The distance will be stored in s->lzma.rep0. */
633static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
634{
635 uint16_t *probs;
636 uint32_t dist_slot;
637 uint32_t limit;
638
639 lzma_state_match(&s->lzma.state);
640
641 s->lzma.rep3 = s->lzma.rep2;
642 s->lzma.rep2 = s->lzma.rep1;
643 s->lzma.rep1 = s->lzma.rep0;
644
645 lzma_len(s, &s->lzma.match_len_dec, pos_state);
646
647 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
648 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
649
650 if (dist_slot < DIST_MODEL_START) {
651 s->lzma.rep0 = dist_slot;
652 } else {
653 limit = (dist_slot >> 1) - 1;
654 s->lzma.rep0 = 2 + (dist_slot & 1);
655
656 if (dist_slot < DIST_MODEL_END) {
657 s->lzma.rep0 <<= limit;
658 probs = s->lzma.dist_special + s->lzma.rep0
659 - dist_slot - 1;
660 rc_bittree_reverse(&s->rc, probs,
661 &s->lzma.rep0, limit);
662 } else {
663 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
664 s->lzma.rep0 <<= ALIGN_BITS;
665 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
666 &s->lzma.rep0, ALIGN_BITS);
667 }
668 }
669}
670
671/*
672 * Decode a repeated match. The distance is one of the four most recently
673 * seen matches. The distance will be stored in s->lzma.rep0.
674 */
675static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
676{
677 uint32_t tmp;
678
679 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
680 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
681 s->lzma.state][pos_state])) {
682 lzma_state_short_rep(&s->lzma.state);
683 s->lzma.len = 1;
684 return;
685 }
686 } else {
687 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
688 tmp = s->lzma.rep1;
689 } else {
690 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
691 tmp = s->lzma.rep2;
692 } else {
693 tmp = s->lzma.rep3;
694 s->lzma.rep3 = s->lzma.rep2;
695 }
696
697 s->lzma.rep2 = s->lzma.rep1;
698 }
699
700 s->lzma.rep1 = s->lzma.rep0;
701 s->lzma.rep0 = tmp;
702 }
703
704 lzma_state_long_rep(&s->lzma.state);
705 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
706}
707
708/* LZMA decoder core */
709static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s)
710{
711 uint32_t pos_state;
712
713 /*
714 * If the dictionary was reached during the previous call, try to
715 * finish the possibly pending repeat in the dictionary.
716 */
717 if (dict_has_space(&s->dict) && s->lzma.len > 0)
718 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
719
720 /*
721 * Decode more LZMA symbols. One iteration may consume up to
722 * LZMA_IN_REQUIRED - 1 bytes.
723 */
724 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
725 pos_state = s->dict.pos & s->lzma.pos_mask;
726
727 if (!rc_bit(&s->rc, &s->lzma.is_match[
728 s->lzma.state][pos_state])) {
729 lzma_literal(s);
730 } else {
731 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
732 lzma_rep_match(s, pos_state);
733 else
734 lzma_match(s, pos_state);
735
736 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
737 return false;
738 }
739 }
740
741 /*
742 * Having the range decoder always normalized when we are outside
743 * this function makes it easier to correctly handle end of the chunk.
744 */
745 rc_normalize(&s->rc);
746
747 return true;
748}
749
750/*
751 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
752 * here, because LZMA state may be reset without resetting the dictionary.
753 */
754static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s)
755{
756 uint16_t *probs;
757 size_t i;
758
759 s->lzma.state = STATE_LIT_LIT;
760 s->lzma.rep0 = 0;
761 s->lzma.rep1 = 0;
762 s->lzma.rep2 = 0;
763 s->lzma.rep3 = 0;
764
765 /*
766 * All probabilities are initialized to the same value. This hack
767 * makes the code smaller by avoiding a separate loop for each
768 * probability array.
769 *
770 * This could be optimized so that only that part of literal
771 * probabilities that are actually required. In the common case
772 * we would write 12 KiB less.
773 */
774 probs = s->lzma.is_match[0];
775 for (i = 0; i < PROBS_TOTAL; ++i)
776 probs[i] = RC_BIT_MODEL_TOTAL / 2;
777
778 rc_reset(&s->rc);
779}
780
781/*
782 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
783 * from the decoded lp and pb values. On success, the LZMA decoder state is
784 * reset and true is returned.
785 */
786static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
787{
788 if (props > (4 * 5 + 4) * 9 + 8)
789 return false;
790
791 s->lzma.pos_mask = 0;
792 while (props >= 9 * 5) {
793 props -= 9 * 5;
794 ++s->lzma.pos_mask;
795 }
796
797 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
798
799 s->lzma.literal_pos_mask = 0;
800 while (props >= 9) {
801 props -= 9;
802 ++s->lzma.literal_pos_mask;
803 }
804
805 s->lzma.lc = props;
806
807 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
808 return false;
809
810 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
811
812 lzma_reset(s);
813
814 return true;
815}
816
817/*********
818 * LZMA2 *
819 *********/
820
821/*
822 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
823 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
824 * wrapper function takes care of making the LZMA decoder's assumption safe.
825 *
826 * As long as there is plenty of input left to be decoded in the current LZMA
827 * chunk, we decode directly from the caller-supplied input buffer until
828 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
829 * s->temp.buf, which (hopefully) gets filled on the next call to this
830 * function. We decode a few bytes from the temporary buffer so that we can
831 * continue decoding from the caller-supplied input buffer again.
832 */
833static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
834{
835 size_t in_avail;
836 uint32_t tmp;
837
838 in_avail = b->in_size - b->in_pos;
839 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
840 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
841 if (tmp > s->lzma2.compressed - s->temp.size)
842 tmp = s->lzma2.compressed - s->temp.size;
843 if (tmp > in_avail)
844 tmp = in_avail;
845
846 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
847
848 if (s->temp.size + tmp == s->lzma2.compressed) {
849 memzero(s->temp.buf + s->temp.size + tmp,
850 sizeof(s->temp.buf)
851 - s->temp.size - tmp);
852 s->rc.in_limit = s->temp.size + tmp;
853 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
854 s->temp.size += tmp;
855 b->in_pos += tmp;
856 return true;
857 } else {
858 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
859 }
860
861 s->rc.in = s->temp.buf;
862 s->rc.in_pos = 0;
863
864 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
865 return false;
866
867 s->lzma2.compressed -= s->rc.in_pos;
868
869 if (s->rc.in_pos < s->temp.size) {
870 s->temp.size -= s->rc.in_pos;
871 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
872 s->temp.size);
873 return true;
874 }
875
876 b->in_pos += s->rc.in_pos - s->temp.size;
877 s->temp.size = 0;
878 }
879
880 in_avail = b->in_size - b->in_pos;
881 if (in_avail >= LZMA_IN_REQUIRED) {
882 s->rc.in = b->in;
883 s->rc.in_pos = b->in_pos;
884
885 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
886 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
887 else
888 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
889
890 if (!lzma_main(s))
891 return false;
892
893 in_avail = s->rc.in_pos - b->in_pos;
894 if (in_avail > s->lzma2.compressed)
895 return false;
896
897 s->lzma2.compressed -= in_avail;
898 b->in_pos = s->rc.in_pos;
899 }
900
901 in_avail = b->in_size - b->in_pos;
902 if (in_avail < LZMA_IN_REQUIRED) {
903 if (in_avail > s->lzma2.compressed)
904 in_avail = s->lzma2.compressed;
905
906 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
907 s->temp.size = in_avail;
908 b->in_pos += in_avail;
909 }
910
911 return true;
912}
913
914/*
915 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
916 * decoding or copying of uncompressed chunks to other functions.
917 */
918XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
919 struct xz_dec_lzma2 *s, struct xz_buf *b)
920{
921 uint32_t tmp;
922
923 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
924 switch (s->lzma2.sequence) {
925 case SEQ_CONTROL:
926 /*
927 * LZMA2 control byte
928 *
929 * Exact values:
930 * 0x00 End marker
931 * 0x01 Dictionary reset followed by
932 * an uncompressed chunk
933 * 0x02 Uncompressed chunk (no dictionary reset)
934 *
935 * Highest three bits (s->control & 0xE0):
936 * 0xE0 Dictionary reset, new properties and state
937 * reset, followed by LZMA compressed chunk
938 * 0xC0 New properties and state reset, followed
939 * by LZMA compressed chunk (no dictionary
940 * reset)
941 * 0xA0 State reset using old properties,
942 * followed by LZMA compressed chunk (no
943 * dictionary reset)
944 * 0x80 LZMA chunk (no dictionary or state reset)
945 *
946 * For LZMA compressed chunks, the lowest five bits
947 * (s->control & 1F) are the highest bits of the
948 * uncompressed size (bits 16-20).
949 *
950 * A new LZMA2 stream must begin with a dictionary
951 * reset. The first LZMA chunk must set new
952 * properties and reset the LZMA state.
953 *
954 * Values that don't match anything described above
955 * are invalid and we return XZ_DATA_ERROR.
956 */
957 tmp = b->in[b->in_pos++];
958
959 if (tmp >= 0xE0 || tmp == 0x01) {
960 s->lzma2.need_props = true;
961 s->lzma2.need_dict_reset = false;
962 dict_reset(&s->dict, b);
963 } else if (s->lzma2.need_dict_reset) {
964 return XZ_DATA_ERROR;
965 }
966
967 if (tmp >= 0x80) {
968 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
969 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
970
971 if (tmp >= 0xC0) {
972 /*
973 * When there are new properties,
974 * state reset is done at
975 * SEQ_PROPERTIES.
976 */
977 s->lzma2.need_props = false;
978 s->lzma2.next_sequence
979 = SEQ_PROPERTIES;
980
981 } else if (s->lzma2.need_props) {
982 return XZ_DATA_ERROR;
983
984 } else {
985 s->lzma2.next_sequence
986 = SEQ_LZMA_PREPARE;
987 if (tmp >= 0xA0)
988 lzma_reset(s);
989 }
990 } else {
991 if (tmp == 0x00)
992 return XZ_STREAM_END;
993
994 if (tmp > 0x02)
995 return XZ_DATA_ERROR;
996
997 s->lzma2.sequence = SEQ_COMPRESSED_0;
998 s->lzma2.next_sequence = SEQ_COPY;
999 }
1000
1001 break;
1002
1003 case SEQ_UNCOMPRESSED_1:
1004 s->lzma2.uncompressed
1005 += (uint32_t)b->in[b->in_pos++] << 8;
1006 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1007 break;
1008
1009 case SEQ_UNCOMPRESSED_2:
1010 s->lzma2.uncompressed
1011 += (uint32_t)b->in[b->in_pos++] + 1;
1012 s->lzma2.sequence = SEQ_COMPRESSED_0;
1013 break;
1014
1015 case SEQ_COMPRESSED_0:
1016 s->lzma2.compressed
1017 = (uint32_t)b->in[b->in_pos++] << 8;
1018 s->lzma2.sequence = SEQ_COMPRESSED_1;
1019 break;
1020
1021 case SEQ_COMPRESSED_1:
1022 s->lzma2.compressed
1023 += (uint32_t)b->in[b->in_pos++] + 1;
1024 s->lzma2.sequence = s->lzma2.next_sequence;
1025 break;
1026
1027 case SEQ_PROPERTIES:
1028 if (!lzma_props(s, b->in[b->in_pos++]))
1029 return XZ_DATA_ERROR;
1030
1031 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1032
1033 case SEQ_LZMA_PREPARE:
1034 if (s->lzma2.compressed < RC_INIT_BYTES)
1035 return XZ_DATA_ERROR;
1036
1037 if (!rc_read_init(&s->rc, b))
1038 return XZ_OK;
1039
1040 s->lzma2.compressed -= RC_INIT_BYTES;
1041 s->lzma2.sequence = SEQ_LZMA_RUN;
1042
1043 case SEQ_LZMA_RUN:
1044 /*
1045 * Set dictionary limit to indicate how much we want
1046 * to be encoded at maximum. Decode new data into the
1047 * dictionary. Flush the new data from dictionary to
1048 * b->out. Check if we finished decoding this chunk.
1049 * In case the dictionary got full but we didn't fill
1050 * the output buffer yet, we may run this loop
1051 * multiple times without changing s->lzma2.sequence.
1052 */
1053 dict_limit(&s->dict, min_t(size_t,
1054 b->out_size - b->out_pos,
1055 s->lzma2.uncompressed));
1056 if (!lzma2_lzma(s, b))
1057 return XZ_DATA_ERROR;
1058
1059 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1060
1061 if (s->lzma2.uncompressed == 0) {
1062 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1063 || !rc_is_finished(&s->rc))
1064 return XZ_DATA_ERROR;
1065
1066 rc_reset(&s->rc);
1067 s->lzma2.sequence = SEQ_CONTROL;
1068
1069 } else if (b->out_pos == b->out_size
1070 || (b->in_pos == b->in_size
1071 && s->temp.size
1072 < s->lzma2.compressed)) {
1073 return XZ_OK;
1074 }
1075
1076 break;
1077
1078 case SEQ_COPY:
1079 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1080 if (s->lzma2.compressed > 0)
1081 return XZ_OK;
1082
1083 s->lzma2.sequence = SEQ_CONTROL;
1084 break;
1085 }
1086 }
1087
1088 return XZ_OK;
1089}
1090
1091XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max)
1092{
1093 struct xz_dec_lzma2 *s;
1094
1095 /* Maximum supported dictionary by this implementation is 3 GiB. */
1096 if (dict_max > ((uint32_t)3 << 30))
1097 return NULL;
1098
1099 s = kmalloc(sizeof(*s), GFP_KERNEL);
1100 if (s == NULL)
1101 return NULL;
1102
1103 if (dict_max > 0) {
1104 s->dict.buf = vmalloc(dict_max);
1105 if (s->dict.buf == NULL) {
1106 kfree(s);
1107 return NULL;
1108 }
1109 }
1110
1111 s->dict.allocated = dict_max;
1112
1113 return s;
1114}
1115
1116XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
1117 struct xz_dec_lzma2 *s, uint8_t props)
1118{
1119 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1120 if (props > 39) {
1121 XZ_DEBUG_MSG("props:%d", props);
1122 return XZ_OPTIONS_ERROR;
1123 }
1124
1125 s->dict.size = 2 + (props & 1);
1126 s->dict.size <<= (props >> 1) + 11;
1127
1128 if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) {
1129#ifdef XZ_REALLOC_DICT_BUF
1130 s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size);
1131 if (!s->dict.buf)
1132 return XZ_MEMLIMIT_ERROR;
1133 s->dict.allocated = s->dict.size;
1134#else
1135 return XZ_MEMLIMIT_ERROR;
1136#endif
1137 }
1138
1139 s->dict.end = s->dict.size;
1140
1141 s->lzma.len = 0;
1142
1143 s->lzma2.sequence = SEQ_CONTROL;
1144 s->lzma2.need_dict_reset = true;
1145
1146 s->temp.size = 0;
1147
1148 return XZ_OK;
1149}
1150
1151XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1152{
1153 if (s->dict.allocated > 0)
1154 vfree(s->dict.buf);
1155
1156 kfree(s);
1157}
diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c
new file mode 100644
index 000000000..e10c9413d
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_dec_stream.c
@@ -0,0 +1,787 @@
1/*
2 * .xz Stream decoder
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include "xz_private.h"
11#include "xz_stream.h"
12
13/* Hash used to validate the Index field */
14struct xz_dec_hash {
15 vli_type unpadded;
16 vli_type uncompressed;
17 uint32_t crc32;
18};
19
20struct xz_dec {
21 /* Position in dec_main() */
22 enum {
23 SEQ_STREAM_HEADER,
24 SEQ_BLOCK_START,
25 SEQ_BLOCK_HEADER,
26 SEQ_BLOCK_UNCOMPRESS,
27 SEQ_BLOCK_PADDING,
28 SEQ_BLOCK_CHECK,
29 SEQ_INDEX,
30 SEQ_INDEX_PADDING,
31 SEQ_INDEX_CRC32,
32 SEQ_STREAM_FOOTER
33 } sequence;
34
35 /* Position in variable-length integers and Check fields */
36 uint32_t pos;
37
38 /* Variable-length integer decoded by dec_vli() */
39 vli_type vli;
40
41 /* Saved in_pos and out_pos */
42 size_t in_start;
43 size_t out_start;
44
45 /* CRC32 value in Block or Index */
46 uint32_t crc32;
47
48 /* True if CRC32 is calculated from uncompressed data */
49 uint8_t crc_type;
50
51 /* True if we are operating in single-call mode. */
52 bool single_call;
53
54 /*
55 * True if the next call to xz_dec_run() is allowed to return
56 * XZ_BUF_ERROR.
57 */
58 bool allow_buf_error;
59
60 /* Information stored in Block Header */
61 struct {
62 /*
63 * Value stored in the Compressed Size field, or
64 * VLI_UNKNOWN if Compressed Size is not present.
65 */
66 vli_type compressed;
67
68 /*
69 * Value stored in the Uncompressed Size field, or
70 * VLI_UNKNOWN if Uncompressed Size is not present.
71 */
72 vli_type uncompressed;
73
74 /* Size of the Block Header field */
75 uint32_t size;
76 } block_header;
77
78 /* Information collected when decoding Blocks */
79 struct {
80 /* Observed compressed size of the current Block */
81 vli_type compressed;
82
83 /* Observed uncompressed size of the current Block */
84 vli_type uncompressed;
85
86 /* Number of Blocks decoded so far */
87 vli_type count;
88
89 /*
90 * Hash calculated from the Block sizes. This is used to
91 * validate the Index field.
92 */
93 struct xz_dec_hash hash;
94 } block;
95
96 /* Variables needed when verifying the Index field */
97 struct {
98 /* Position in dec_index() */
99 enum {
100 SEQ_INDEX_COUNT,
101 SEQ_INDEX_UNPADDED,
102 SEQ_INDEX_UNCOMPRESSED
103 } sequence;
104
105 /* Size of the Index in bytes */
106 vli_type size;
107
108 /* Number of Records (matches block.count in valid files) */
109 vli_type count;
110
111 /*
112 * Hash calculated from the Records (matches block.hash in
113 * valid files).
114 */
115 struct xz_dec_hash hash;
116 } index;
117
118 /*
119 * Temporary buffer needed to hold Stream Header, Block Header,
120 * and Stream Footer. The Block Header is the biggest (1 KiB)
121 * so we reserve space according to that. buf[] has to be aligned
122 * to a multiple of four bytes; the size_t variables before it
123 * should guarantee this.
124 */
125 struct {
126 size_t pos;
127 size_t size;
128 uint8_t buf[1024];
129 } temp;
130
131 struct xz_dec_lzma2 *lzma2;
132
133#ifdef XZ_DEC_BCJ
134 struct xz_dec_bcj *bcj;
135 bool bcj_active;
136#endif
137
138 uint32_t crc32_table[256];
139};
140
141/*
142 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
143 * must have set s->temp.pos to indicate how much data we are supposed
144 * to copy into s->temp.buf. Return true once s->temp.pos has reached
145 * s->temp.size.
146 */
147static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b)
148{
149 size_t copy_size = min_t(size_t,
150 b->in_size - b->in_pos, s->temp.size - s->temp.pos);
151
152 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
153 b->in_pos += copy_size;
154 s->temp.pos += copy_size;
155
156 if (s->temp.pos == s->temp.size) {
157 s->temp.pos = 0;
158 return true;
159 }
160
161 return false;
162}
163
164/* Decode a variable-length integer (little-endian base-128 encoding) */
165static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s,
166 const uint8_t *in, size_t *in_pos, size_t in_size)
167{
168 uint8_t byte;
169
170 if (s->pos == 0)
171 s->vli = 0;
172
173 while (*in_pos < in_size) {
174 byte = in[*in_pos];
175 ++*in_pos;
176
177 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
178
179 if ((byte & 0x80) == 0) {
180 /* Don't allow non-minimal encodings. */
181 if (byte == 0 && s->pos != 0)
182 return XZ_DATA_ERROR;
183
184 s->pos = 0;
185 return XZ_STREAM_END;
186 }
187
188 s->pos += 7;
189 if (s->pos == 7 * VLI_BYTES_MAX)
190 return XZ_DATA_ERROR;
191 }
192
193 return XZ_OK;
194}
195
196/*
197 * Decode the Compressed Data field from a Block. Update and validate
198 * the observed compressed and uncompressed sizes of the Block so that
199 * they don't exceed the values possibly stored in the Block Header
200 * (validation assumes that no integer overflow occurs, since vli_type
201 * is normally uint64_t). Update the CRC32 if presence of the CRC32
202 * field was indicated in Stream Header.
203 *
204 * Once the decoding is finished, validate that the observed sizes match
205 * the sizes possibly stored in the Block Header. Update the hash and
206 * Block count, which are later used to validate the Index field.
207 */
208static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
209{
210 enum xz_ret ret;
211
212 s->in_start = b->in_pos;
213 s->out_start = b->out_pos;
214
215#ifdef XZ_DEC_BCJ
216 if (s->bcj_active)
217 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
218 else
219#endif
220 ret = xz_dec_lzma2_run(s->lzma2, b);
221
222 s->block.compressed += b->in_pos - s->in_start;
223 s->block.uncompressed += b->out_pos - s->out_start;
224
225 /*
226 * There is no need to separately check for VLI_UNKNOWN, since
227 * the observed sizes are always smaller than VLI_UNKNOWN.
228 */
229 if (s->block.compressed > s->block_header.compressed
230 || s->block.uncompressed
231 > s->block_header.uncompressed)
232 return XZ_DATA_ERROR;
233
234 if (s->crc_type == 0x01)
235 s->crc32 = xz_crc32(s->crc32_table,
236 b->out + s->out_start,
237 b->out_pos - s->out_start, s->crc32);
238
239 if (ret == XZ_STREAM_END) {
240 if (s->block_header.compressed != VLI_UNKNOWN
241 && s->block_header.compressed
242 != s->block.compressed)
243 return XZ_DATA_ERROR;
244
245 if (s->block_header.uncompressed != VLI_UNKNOWN
246 && s->block_header.uncompressed
247 != s->block.uncompressed)
248 return XZ_DATA_ERROR;
249
250 s->block.hash.unpadded += s->block_header.size
251 + s->block.compressed;
252 if (s->crc_type == 0x01)
253 s->block.hash.unpadded += 4;
254 if (s->crc_type == 0x04) /* CRC64 */
255 s->block.hash.unpadded += 8;
256 if (s->crc_type == 0x0A) /* SHA-256 */
257 s->block.hash.unpadded += 32;
258
259 s->block.hash.uncompressed += s->block.uncompressed;
260 s->block.hash.crc32 = xz_crc32(s->crc32_table,
261 (const uint8_t *)&s->block.hash,
262 sizeof(s->block.hash), s->block.hash.crc32);
263
264 ++s->block.count;
265 }
266
267 return ret;
268}
269
270/* Update the Index size and the CRC32 value. */
271static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
272{
273 size_t in_used = b->in_pos - s->in_start;
274 s->index.size += in_used;
275 s->crc32 = xz_crc32(s->crc32_table, b->in + s->in_start, in_used, s->crc32);
276}
277
278/*
279 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
280 * fields from the Index field. That is, Index Padding and CRC32 are not
281 * decoded by this function.
282 *
283 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
284 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
285 */
286static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
287{
288 enum xz_ret ret;
289
290 do {
291 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
292 if (ret != XZ_STREAM_END) {
293 index_update(s, b);
294 return ret;
295 }
296
297 switch (s->index.sequence) {
298 case SEQ_INDEX_COUNT:
299 s->index.count = s->vli;
300
301 /*
302 * Validate that the Number of Records field
303 * indicates the same number of Records as
304 * there were Blocks in the Stream.
305 */
306 if (s->index.count != s->block.count)
307 return XZ_DATA_ERROR;
308
309 s->index.sequence = SEQ_INDEX_UNPADDED;
310 break;
311
312 case SEQ_INDEX_UNPADDED:
313 s->index.hash.unpadded += s->vli;
314 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
315 break;
316
317 case SEQ_INDEX_UNCOMPRESSED:
318 s->index.hash.uncompressed += s->vli;
319 s->index.hash.crc32 = xz_crc32(s->crc32_table,
320 (const uint8_t *)&s->index.hash,
321 sizeof(s->index.hash),
322 s->index.hash.crc32);
323 --s->index.count;
324 s->index.sequence = SEQ_INDEX_UNPADDED;
325 break;
326 }
327 } while (s->index.count > 0);
328
329 return XZ_STREAM_END;
330}
331
332/*
333 * Validate that the next four input bytes match the value of s->crc32.
334 * s->pos must be zero when starting to validate the first byte.
335 */
336static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
337{
338 do {
339 if (b->in_pos == b->in_size)
340 return XZ_OK;
341
342 if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
343 return XZ_DATA_ERROR;
344
345 s->pos += 8;
346
347 } while (s->pos < 32);
348
349 s->crc32 = 0;
350 s->pos = 0;
351
352 return XZ_STREAM_END;
353}
354
355/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
356static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
357{
358 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
359 return XZ_FORMAT_ERROR;
360
361 if (xz_crc32(s->crc32_table, s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
362 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
363 return XZ_DATA_ERROR;
364
365 /*
366 * Decode the Stream Flags field. Of integrity checks, we support
367 * only none (Check ID = 0) and CRC32 (Check ID = 1).
368 */
369 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0
370 || (s->temp.buf[HEADER_MAGIC_SIZE + 1] > 1
371 && s->temp.buf[HEADER_MAGIC_SIZE + 1] != 0x04
372 && s->temp.buf[HEADER_MAGIC_SIZE + 1] != 0x0A
373 )
374 ) {
375 XZ_DEBUG_MSG("unsupported stream flags %x:%x",
376 s->temp.buf[HEADER_MAGIC_SIZE],
377 s->temp.buf[HEADER_MAGIC_SIZE+1]);
378 return XZ_OPTIONS_ERROR;
379 }
380
381 s->crc_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
382
383 return XZ_OK;
384}
385
386/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
387static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
388{
389 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
390 return XZ_DATA_ERROR;
391
392 if (xz_crc32(s->crc32_table, s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
393 return XZ_DATA_ERROR;
394
395 /*
396 * Validate Backward Size. Note that we never added the size of the
397 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
398 * instead of s->index.size / 4 - 1.
399 */
400 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
401 return XZ_DATA_ERROR;
402
403 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->crc_type)
404 return XZ_DATA_ERROR;
405
406 /*
407 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
408 * for the caller.
409 */
410 return XZ_STREAM_END;
411}
412
413/* Decode the Block Header and initialize the filter chain. */
414static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
415{
416 enum xz_ret ret;
417
418 /*
419 * Validate the CRC32. We know that the temp buffer is at least
420 * eight bytes so this is safe.
421 */
422 s->temp.size -= 4;
423 if (xz_crc32(s->crc32_table, s->temp.buf, s->temp.size, 0)
424 != get_le32(s->temp.buf + s->temp.size))
425 return XZ_DATA_ERROR;
426
427 s->temp.pos = 2;
428
429 /*
430 * Catch unsupported Block Flags. We support only one or two filters
431 * in the chain, so we catch that with the same test.
432 */
433#ifdef XZ_DEC_BCJ
434 if (s->temp.buf[1] & 0x3E)
435#else
436 if (s->temp.buf[1] & 0x3F)
437#endif
438 {
439 XZ_DEBUG_MSG("s->temp.buf[1] & 0x3E/3F != 0");
440 return XZ_OPTIONS_ERROR;
441 }
442
443 /* Compressed Size */
444 if (s->temp.buf[1] & 0x40) {
445 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
446 != XZ_STREAM_END)
447 return XZ_DATA_ERROR;
448
449 s->block_header.compressed = s->vli;
450 } else {
451 s->block_header.compressed = VLI_UNKNOWN;
452 }
453
454 /* Uncompressed Size */
455 if (s->temp.buf[1] & 0x80) {
456 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
457 != XZ_STREAM_END)
458 return XZ_DATA_ERROR;
459
460 s->block_header.uncompressed = s->vli;
461 } else {
462 s->block_header.uncompressed = VLI_UNKNOWN;
463 }
464
465#ifdef XZ_DEC_BCJ
466 /* If there are two filters, the first one must be a BCJ filter. */
467 s->bcj_active = s->temp.buf[1] & 0x01;
468 if (s->bcj_active) {
469 if (s->temp.size - s->temp.pos < 2) {
470 XZ_DEBUG_MSG("temp.size - temp.pos < 2");
471 return XZ_OPTIONS_ERROR;
472 }
473
474 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
475 if (ret != XZ_OK)
476 return ret;
477
478 /*
479 * We don't support custom start offset,
480 * so Size of Properties must be zero.
481 */
482 if (s->temp.buf[s->temp.pos++] != 0x00) {
483 XZ_DEBUG_MSG("size of properties != 0");
484 return XZ_OPTIONS_ERROR;
485 }
486 }
487#endif
488
489 /* Valid Filter Flags always take at least two bytes. */
490 if (s->temp.size - s->temp.pos < 2)
491 return XZ_DATA_ERROR;
492
493 /* Filter ID = LZMA2 */
494 if (s->temp.buf[s->temp.pos++] != 0x21) {
495 XZ_DEBUG_MSG("filter ID != 0x21");
496 return XZ_OPTIONS_ERROR;
497 }
498
499 /* Size of Properties = 1-byte Filter Properties */
500 if (s->temp.buf[s->temp.pos++] != 0x01) {
501 XZ_DEBUG_MSG("size of properties != 1");
502 return XZ_OPTIONS_ERROR;
503 }
504
505 /* Filter Properties contains LZMA2 dictionary size. */
506 if (s->temp.size - s->temp.pos < 1)
507 return XZ_DATA_ERROR;
508
509 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
510 if (ret != XZ_OK)
511 return ret;
512
513 /* The rest must be Header Padding. */
514 while (s->temp.pos < s->temp.size)
515 if (s->temp.buf[s->temp.pos++] != 0x00) {
516 XZ_DEBUG_MSG("padding is not zero-filled");
517 return XZ_OPTIONS_ERROR;
518 }
519
520 s->temp.pos = 0;
521 s->block.compressed = 0;
522 s->block.uncompressed = 0;
523
524 return XZ_OK;
525}
526
527static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
528{
529 enum xz_ret ret;
530
531 /*
532 * Store the start position for the case when we are in the middle
533 * of the Index field.
534 */
535 s->in_start = b->in_pos;
536
537 while (true) {
538 switch (s->sequence) {
539 case SEQ_STREAM_HEADER:
540 /*
541 * Stream Header is copied to s->temp, and then
542 * decoded from there. This way if the caller
543 * gives us only little input at a time, we can
544 * still keep the Stream Header decoding code
545 * simple. Similar approach is used in many places
546 * in this file.
547 */
548 if (!fill_temp(s, b))
549 return XZ_OK;
550
551 ret = dec_stream_header(s);
552 if (ret != XZ_OK)
553 return ret;
554
555 s->sequence = SEQ_BLOCK_START;
556
557 case SEQ_BLOCK_START:
558 /* We need one byte of input to continue. */
559 if (b->in_pos == b->in_size)
560 return XZ_OK;
561
562 /* See if this is the beginning of the Index field. */
563 if (b->in[b->in_pos] == 0) {
564 s->in_start = b->in_pos++;
565 s->sequence = SEQ_INDEX;
566 break;
567 }
568
569 /*
570 * Calculate the size of the Block Header and
571 * prepare to decode it.
572 */
573 s->block_header.size
574 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
575
576 s->temp.size = s->block_header.size;
577 s->temp.pos = 0;
578 s->sequence = SEQ_BLOCK_HEADER;
579
580 case SEQ_BLOCK_HEADER:
581 if (!fill_temp(s, b))
582 return XZ_OK;
583
584 ret = dec_block_header(s);
585 if (ret != XZ_OK)
586 return ret;
587
588 s->sequence = SEQ_BLOCK_UNCOMPRESS;
589
590 case SEQ_BLOCK_UNCOMPRESS:
591 ret = dec_block(s, b);
592 if (ret != XZ_STREAM_END)
593 return ret;
594
595 s->sequence = SEQ_BLOCK_PADDING;
596
597 case SEQ_BLOCK_PADDING:
598 /*
599 * Size of Compressed Data + Block Padding
600 * must be a multiple of four. We don't need
601 * s->block.compressed for anything else
602 * anymore, so we use it here to test the size
603 * of the Block Padding field.
604 */
605 while (s->block.compressed & 3) {
606 if (b->in_pos == b->in_size)
607 return XZ_OK;
608
609 if (b->in[b->in_pos++] != 0)
610 return XZ_DATA_ERROR;
611
612 ++s->block.compressed;
613 }
614
615 s->sequence = SEQ_BLOCK_CHECK;
616
617 case SEQ_BLOCK_CHECK:
618 if (s->crc_type == 0x01) {
619 ret = crc32_validate(s, b);
620 if (ret != XZ_STREAM_END)
621 return ret;
622 }
623
624 s->sequence = SEQ_BLOCK_START;
625 break;
626
627 case SEQ_INDEX:
628 ret = dec_index(s, b);
629 if (ret != XZ_STREAM_END)
630 return ret;
631
632 s->sequence = SEQ_INDEX_PADDING;
633
634 case SEQ_INDEX_PADDING:
635 while ((s->index.size + (b->in_pos - s->in_start))
636 & 3) {
637 if (b->in_pos == b->in_size) {
638 index_update(s, b);
639 return XZ_OK;
640 }
641
642 if (b->in[b->in_pos++] != 0)
643 return XZ_DATA_ERROR;
644 }
645
646 /* Finish the CRC32 value and Index size. */
647 index_update(s, b);
648
649 /* Compare the hashes to validate the Index field. */
650 if (!memeq(&s->block.hash, &s->index.hash,
651 sizeof(s->block.hash)))
652 return XZ_DATA_ERROR;
653
654 s->sequence = SEQ_INDEX_CRC32;
655
656 case SEQ_INDEX_CRC32:
657 ret = crc32_validate(s, b);
658 if (ret != XZ_STREAM_END)
659 return ret;
660
661 s->temp.size = STREAM_HEADER_SIZE;
662 s->sequence = SEQ_STREAM_FOOTER;
663
664 case SEQ_STREAM_FOOTER:
665 if (!fill_temp(s, b))
666 return XZ_OK;
667
668 return dec_stream_footer(s);
669 }
670 }
671
672 /* Never reached */
673}
674
675/*
676 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
677 * multi-call and single-call decoding.
678 *
679 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
680 * are not going to make any progress anymore. This is to prevent the caller
681 * from calling us infinitely when the input file is truncated or otherwise
682 * corrupt. Since zlib-style API allows that the caller fills the input buffer
683 * only when the decoder doesn't produce any new output, we have to be careful
684 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
685 * after the second consecutive call to xz_dec_run() that makes no progress.
686 *
687 * In single-call mode, if we couldn't decode everything and no error
688 * occurred, either the input is truncated or the output buffer is too small.
689 * Since we know that the last input byte never produces any output, we know
690 * that if all the input was consumed and decoding wasn't finished, the file
691 * must be corrupt. Otherwise the output buffer has to be too small or the
692 * file is corrupt in a way that decoding it produces too big output.
693 *
694 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
695 * their original values. This is because with some filter chains there won't
696 * be any valid uncompressed data in the output buffer unless the decoding
697 * actually succeeds (that's the price to pay of using the output buffer as
698 * the workspace).
699 */
700XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b)
701{
702 size_t in_start;
703 size_t out_start;
704 enum xz_ret ret;
705
706 if (s->single_call)
707 xz_dec_reset(s);
708
709 in_start = b->in_pos;
710 out_start = b->out_pos;
711 ret = dec_main(s, b);
712
713 if (s->single_call) {
714 if (ret == XZ_OK)
715 ret = b->in_pos == b->in_size
716 ? XZ_DATA_ERROR : XZ_BUF_ERROR;
717
718 if (ret != XZ_STREAM_END) {
719 b->in_pos = in_start;
720 b->out_pos = out_start;
721 }
722
723 } else if (ret == XZ_OK && in_start == b->in_pos
724 && out_start == b->out_pos) {
725 if (s->allow_buf_error)
726 ret = XZ_BUF_ERROR;
727
728 s->allow_buf_error = true;
729 } else {
730 s->allow_buf_error = false;
731 }
732
733 return ret;
734}
735
736XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max)
737{
738 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
739 if (s == NULL)
740 return NULL;
741
742 s->single_call = dict_max == 0;
743
744#ifdef XZ_DEC_BCJ
745 s->bcj = xz_dec_bcj_create(s->single_call);
746 if (s->bcj == NULL)
747 goto error_bcj;
748#endif
749
750 s->lzma2 = xz_dec_lzma2_create(dict_max);
751 if (s->lzma2 == NULL)
752 goto error_lzma2;
753
754 xz_dec_reset(s);
755 return s;
756
757error_lzma2:
758#ifdef XZ_DEC_BCJ
759 xz_dec_bcj_end(s->bcj);
760error_bcj:
761#endif
762 kfree(s);
763 return NULL;
764}
765
766XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s)
767{
768 s->sequence = SEQ_STREAM_HEADER;
769 s->allow_buf_error = false;
770 s->pos = 0;
771 s->crc32 = 0;
772 memzero(&s->block, sizeof(s->block));
773 memzero(&s->index, sizeof(s->index));
774 s->temp.pos = 0;
775 s->temp.size = STREAM_HEADER_SIZE;
776}
777
778XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s)
779{
780 if (s != NULL) {
781 xz_dec_lzma2_end(s->lzma2);
782#ifdef XZ_DEC_BCJ
783 xz_dec_bcj_end(s->bcj);
784#endif
785 kfree(s);
786 }
787}
diff --git a/archival/libunarchive/unxz/xz_lzma2.h b/archival/libunarchive/unxz/xz_lzma2.h
new file mode 100644
index 000000000..47f21afbc
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_lzma2.h
@@ -0,0 +1,204 @@
1/*
2 * LZMA2 definitions
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_LZMA2_H
12#define XZ_LZMA2_H
13
14/* Range coder constants */
15#define RC_SHIFT_BITS 8
16#define RC_TOP_BITS 24
17#define RC_TOP_VALUE (1 << RC_TOP_BITS)
18#define RC_BIT_MODEL_TOTAL_BITS 11
19#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
20#define RC_MOVE_BITS 5
21
22/*
23 * Maximum number of position states. A position state is the lowest pb
24 * number of bits of the current uncompressed offset. In some places there
25 * are different sets of probabilities for different position states.
26 */
27#define POS_STATES_MAX (1 << 4)
28
29/*
30 * This enum is used to track which LZMA symbols have occurred most recently
31 * and in which order. This information is used to predict the next symbol.
32 *
33 * Symbols:
34 * - Literal: One 8-bit byte
35 * - Match: Repeat a chunk of data at some distance
36 * - Long repeat: Multi-byte match at a recently seen distance
37 * - Short repeat: One-byte repeat at a recently seen distance
38 *
39 * The symbol names are in from STATE_oldest_older_previous. REP means
40 * either short or long repeated match, and NONLIT means any non-literal.
41 */
42enum lzma_state {
43 STATE_LIT_LIT,
44 STATE_MATCH_LIT_LIT,
45 STATE_REP_LIT_LIT,
46 STATE_SHORTREP_LIT_LIT,
47 STATE_MATCH_LIT,
48 STATE_REP_LIT,
49 STATE_SHORTREP_LIT,
50 STATE_LIT_MATCH,
51 STATE_LIT_LONGREP,
52 STATE_LIT_SHORTREP,
53 STATE_NONLIT_MATCH,
54 STATE_NONLIT_REP
55};
56
57/* Total number of states */
58#define STATES 12
59
60/* The lowest 7 states indicate that the previous state was a literal. */
61#define LIT_STATES 7
62
63/* Indicate that the latest symbol was a literal. */
64static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state)
65{
66 if (*state <= STATE_SHORTREP_LIT_LIT)
67 *state = STATE_LIT_LIT;
68 else if (*state <= STATE_LIT_SHORTREP)
69 *state -= 3;
70 else
71 *state -= 6;
72}
73
74/* Indicate that the latest symbol was a match. */
75static inline void XZ_FUNC lzma_state_match(enum lzma_state *state)
76{
77 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
78}
79
80/* Indicate that the latest state was a long repeated match. */
81static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state)
82{
83 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
84}
85
86/* Indicate that the latest symbol was a short match. */
87static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state)
88{
89 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
90}
91
92/* Test if the previous symbol was a literal. */
93static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state)
94{
95 return state < LIT_STATES;
96}
97
98/* Each literal coder is divided in three sections:
99 * - 0x001-0x0FF: Without match byte
100 * - 0x101-0x1FF: With match byte; match bit is 0
101 * - 0x201-0x2FF: With match byte; match bit is 1
102 *
103 * Match byte is used when the previous LZMA symbol was something else than
104 * a literal (that is, it was some kind of match).
105 */
106#define LITERAL_CODER_SIZE 0x300
107
108/* Maximum number of literal coders */
109#define LITERAL_CODERS_MAX (1 << 4)
110
111/* Minimum length of a match is two bytes. */
112#define MATCH_LEN_MIN 2
113
114/* Match length is encoded with 4, 5, or 10 bits.
115 *
116 * Length Bits
117 * 2-9 4 = Choice=0 + 3 bits
118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
120 */
121#define LEN_LOW_BITS 3
122#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
123#define LEN_MID_BITS 3
124#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
125#define LEN_HIGH_BITS 8
126#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
127#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
128
129/*
130 * Maximum length of a match is 273 which is a result of the encoding
131 * described above.
132 */
133#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
134
135/*
136 * Different sets of probabilities are used for match distances that have
137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
138 * set of probabilities for each length. The matches with longer length
139 * use a shared set of probabilities.
140 */
141#define DIST_STATES 4
142
143/*
144 * Get the index of the appropriate probability array for decoding
145 * the distance slot.
146 */
147static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len)
148{
149 return len < DIST_STATES + MATCH_LEN_MIN
150 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
151}
152
153/*
154 * The highest two bits of a 32-bit match distance are encoded using six bits.
155 * This six-bit value is called a distance slot. This way encoding a 32-bit
156 * value takes 6-36 bits, larger values taking more bits.
157 */
158#define DIST_SLOT_BITS 6
159#define DIST_SLOTS (1 << DIST_SLOT_BITS)
160
161/* Match distances up to 127 are fully encoded using probabilities. Since
162 * the highest two bits (distance slot) are always encoded using six bits,
163 * the distances 0-3 don't need any additional bits to encode, since the
164 * distance slot itself is the same as the actual distance. DIST_MODEL_START
165 * indicates the first distance slot where at least one additional bit is
166 * needed.
167 */
168#define DIST_MODEL_START 4
169
170/*
171 * Match distances greater than 127 are encoded in three pieces:
172 * - distance slot: the highest two bits
173 * - direct bits: 2-26 bits below the highest two bits
174 * - alignment bits: four lowest bits
175 *
176 * Direct bits don't use any probabilities.
177 *
178 * The distance slot value of 14 is for distances 128-191.
179 */
180#define DIST_MODEL_END 14
181
182/* Distance slots that indicate a distance <= 127. */
183#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
184#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
185
186/*
187 * For match distances greater than 127, only the highest two bits and the
188 * lowest four bits (alignment) is encoded using probabilities.
189 */
190#define ALIGN_BITS 4
191#define ALIGN_SIZE (1 << ALIGN_BITS)
192#define ALIGN_MASK (ALIGN_SIZE - 1)
193
194/* Total number of all probability variables */
195#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
196
197/*
198 * LZMA remembers the four most recent match distances. Reusing these
199 * distances tends to take less space than re-encoding the actual
200 * distance value.
201 */
202#define REPS 4
203
204#endif
diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h
new file mode 100644
index 000000000..9da8d7061
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_private.h
@@ -0,0 +1,120 @@
1/*
2 * Private includes and definitions
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_PRIVATE_H
11#define XZ_PRIVATE_H
12
13#ifdef __KERNEL__
14 /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
15# ifndef XZ_PREBOOT
16# include <linux/slab.h>
17# include <linux/vmalloc.h>
18# include <linux/string.h>
19# define memeq(a, b, size) (memcmp(a, b, size) == 0)
20# define memzero(buf, size) memset(buf, 0, size)
21# endif
22# include <asm/byteorder.h>
23# include <asm/unaligned.h>
24# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
25 /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */
26# ifndef XZ_IGNORE_KCONFIG
27# ifdef CONFIG_XZ_DEC_X86
28# define XZ_DEC_X86
29# endif
30# ifdef CONFIG_XZ_DEC_POWERPC
31# define XZ_DEC_POWERPC
32# endif
33# ifdef CONFIG_XZ_DEC_IA64
34# define XZ_DEC_IA64
35# endif
36# ifdef CONFIG_XZ_DEC_ARM
37# define XZ_DEC_ARM
38# endif
39# ifdef CONFIG_XZ_DEC_ARMTHUMB
40# define XZ_DEC_ARMTHUMB
41# endif
42# ifdef CONFIG_XZ_DEC_SPARC
43# define XZ_DEC_SPARC
44# endif
45# endif
46# include <linux/xz.h>
47#else
48 /*
49 * For userspace builds, use a separate header to define the required
50 * macros and functions. This makes it easier to adapt the code into
51 * different environments and avoids clutter in the Linux kernel tree.
52 */
53# include "xz_config.h"
54#endif
55
56/*
57 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
58 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
59 */
60#ifndef XZ_DEC_BCJ
61# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
62 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
63 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
64 || defined(XZ_DEC_SPARC)
65# define XZ_DEC_BCJ
66# endif
67#endif
68
69/*
70 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
71 * before calling xz_dec_lzma2_run().
72 */
73XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
74 uint32_t dict_max);
75
76/*
77 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
78 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
79 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
80 * decoder doesn't support.
81 */
82XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
83 struct xz_dec_lzma2 *s, uint8_t props);
84
85/* Decode raw LZMA2 stream from b->in to b->out. */
86XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
87 struct xz_dec_lzma2 *s, struct xz_buf *b);
88
89/* Free the memory allocated for the LZMA2 decoder. */
90XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
91
92#ifdef XZ_DEC_BCJ
93/*
94 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
95 * calling xz_dec_bcj_run().
96 */
97XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call);
98
99/*
100 * Decode the Filter ID of a BCJ filter. This implementation doesn't
101 * support custom start offsets, so no decoding of Filter Properties
102 * is needed. Returns XZ_OK if the given Filter ID is supported.
103 * Otherwise XZ_OPTIONS_ERROR is returned.
104 */
105XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
106 struct xz_dec_bcj *s, uint8_t id);
107
108/*
109 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
110 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
111 * must be called directly.
112 */
113XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
114 struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
115#endif
116
117/* Free the memory allocated for the BCJ filters. */
118#define xz_dec_bcj_end(s) kfree(s)
119
120#endif
diff --git a/archival/libunarchive/unxz/xz_stream.h b/archival/libunarchive/unxz/xz_stream.h
new file mode 100644
index 000000000..efbe75ae3
--- /dev/null
+++ b/archival/libunarchive/unxz/xz_stream.h
@@ -0,0 +1,46 @@
1/*
2 * Definitions for handling the .xz file format
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_STREAM_H
11#define XZ_STREAM_H
12
13#if defined(__KERNEL__) && !defined(XZ_INTERNAL_CRC32)
14# include <linux/crc32.h>
15# undef crc32
16# define xz_crc32(crc32_table, buf, size, crc) \
17 (~crc32_le(~(uint32_t)(crc), buf, size))
18#endif
19
20/*
21 * See the .xz file format specification at
22 * http://tukaani.org/xz/xz-file-format.txt
23 * to understand the container format.
24 */
25
26#define STREAM_HEADER_SIZE 12
27
28#define HEADER_MAGIC "\3757zXZ\0"
29#define HEADER_MAGIC_SIZE 6
30
31#define FOOTER_MAGIC "YZ"
32#define FOOTER_MAGIC_SIZE 2
33
34/*
35 * Variable-length integer can hold a 63-bit unsigned integer, or a special
36 * value to indicate that the value is unknown.
37 */
38typedef uint64_t vli_type;
39
40#define VLI_MAX ((vli_type)-1 / 2)
41#define VLI_UNKNOWN ((vli_type)-1)
42
43/* Maximum encoded size of a VLI */
44#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
45
46#endif
diff --git a/include/applets.h b/include/applets.h
index a171c5449..d8a706b44 100644
--- a/include/applets.h
+++ b/include/applets.h
@@ -419,6 +419,7 @@ IF_UNCOMPRESS(APPLET(uncompress, _BB_DIR_BIN, _BB_SUID_DROP))
419IF_UNEXPAND(APPLET_ODDNAME(unexpand, expand, _BB_DIR_USR_BIN, _BB_SUID_DROP, unexpand)) 419IF_UNEXPAND(APPLET_ODDNAME(unexpand, expand, _BB_DIR_USR_BIN, _BB_SUID_DROP, unexpand))
420IF_UNIQ(APPLET(uniq, _BB_DIR_USR_BIN, _BB_SUID_DROP)) 420IF_UNIQ(APPLET(uniq, _BB_DIR_USR_BIN, _BB_SUID_DROP))
421IF_UNIX2DOS(APPLET_ODDNAME(unix2dos, dos2unix, _BB_DIR_USR_BIN, _BB_SUID_DROP, unix2dos)) 421IF_UNIX2DOS(APPLET_ODDNAME(unix2dos, dos2unix, _BB_DIR_USR_BIN, _BB_SUID_DROP, unix2dos))
422IF_UNXZ(APPLET(unxz, _BB_DIR_USR_BIN, _BB_SUID_DROP))
422IF_UNLZMA(APPLET(unlzma, _BB_DIR_USR_BIN, _BB_SUID_DROP)) 423IF_UNLZMA(APPLET(unlzma, _BB_DIR_USR_BIN, _BB_SUID_DROP))
423IF_LZOP(APPLET_ODDNAME(unlzop, lzop, _BB_DIR_USR_BIN, _BB_SUID_DROP, unlzop)) 424IF_LZOP(APPLET_ODDNAME(unlzop, lzop, _BB_DIR_USR_BIN, _BB_SUID_DROP, unlzop))
424IF_UNZIP(APPLET(unzip, _BB_DIR_USR_BIN, _BB_SUID_DROP)) 425IF_UNZIP(APPLET(unzip, _BB_DIR_USR_BIN, _BB_SUID_DROP))
@@ -439,6 +440,8 @@ IF_WHICH(APPLET(which, _BB_DIR_USR_BIN, _BB_SUID_DROP))
439IF_WHO(APPLET(who, _BB_DIR_USR_BIN, _BB_SUID_DROP)) 440IF_WHO(APPLET(who, _BB_DIR_USR_BIN, _BB_SUID_DROP))
440IF_WHOAMI(APPLET_NOFORK(whoami, whoami, _BB_DIR_USR_BIN, _BB_SUID_DROP, whoami)) 441IF_WHOAMI(APPLET_NOFORK(whoami, whoami, _BB_DIR_USR_BIN, _BB_SUID_DROP, whoami))
441IF_XARGS(APPLET_NOEXEC(xargs, xargs, _BB_DIR_USR_BIN, _BB_SUID_DROP, xargs)) 442IF_XARGS(APPLET_NOEXEC(xargs, xargs, _BB_DIR_USR_BIN, _BB_SUID_DROP, xargs))
443IF_UNXZ(APPLET_ODDNAME(xzcat, unxz, _BB_DIR_USR_BIN, _BB_SUID_DROP, xzcat))
444IF_XZ(APPLET_ODDNAME(xz, unxz, _BB_DIR_USR_BIN, _BB_SUID_DROP, xz))
442IF_YES(APPLET_NOFORK(yes, yes, _BB_DIR_USR_BIN, _BB_SUID_DROP, yes)) 445IF_YES(APPLET_NOFORK(yes, yes, _BB_DIR_USR_BIN, _BB_SUID_DROP, yes))
443IF_GUNZIP(APPLET_ODDNAME(zcat, gunzip, _BB_DIR_BIN, _BB_SUID_DROP, zcat)) 446IF_GUNZIP(APPLET_ODDNAME(zcat, gunzip, _BB_DIR_BIN, _BB_SUID_DROP, zcat))
444IF_ZCIP(APPLET(zcip, _BB_DIR_SBIN, _BB_SUID_DROP)) 447IF_ZCIP(APPLET(zcip, _BB_DIR_SBIN, _BB_SUID_DROP))
diff --git a/include/unarchive.h b/include/unarchive.h
index a834816ba..14cd98e24 100644
--- a/include/unarchive.h
+++ b/include/unarchive.h
@@ -143,6 +143,7 @@ typedef struct inflate_unzip_result {
143} inflate_unzip_result; 143} inflate_unzip_result;
144 144
145IF_DESKTOP(long long) int inflate_unzip(inflate_unzip_result *res, off_t compr_size, int src_fd, int dst_fd) FAST_FUNC; 145IF_DESKTOP(long long) int inflate_unzip(inflate_unzip_result *res, off_t compr_size, int src_fd, int dst_fd) FAST_FUNC;
146IF_DESKTOP(long long) int unpack_xz_stream_stdin(void) FAST_FUNC;
146/* lzma unpacker takes .lzma stream from offset 0 */ 147/* lzma unpacker takes .lzma stream from offset 0 */
147IF_DESKTOP(long long) int unpack_lzma_stream(int src_fd, int dst_fd) FAST_FUNC; 148IF_DESKTOP(long long) int unpack_lzma_stream(int src_fd, int dst_fd) FAST_FUNC;
148/* the rest wants 2 first bytes already skipped by the caller */ 149/* the rest wants 2 first bytes already skipped by the caller */
diff --git a/include/usage.h b/include/usage.h
index 3aa980cdc..a9c4c4294 100644
--- a/include/usage.h
+++ b/include/usage.h
@@ -269,6 +269,28 @@
269#define lzcat_full_usage "\n\n" \ 269#define lzcat_full_usage "\n\n" \
270 "Decompress to stdout" 270 "Decompress to stdout"
271 271
272#define unxz_trivial_usage \
273 "[OPTIONS] [FILE]..."
274#define unxz_full_usage "\n\n" \
275 "Decompress FILE (or stdin)\n" \
276 "\nOptions:" \
277 "\n -c Write to stdout" \
278 "\n -f Force" \
279
280#define xz_trivial_usage \
281 "-d [OPTIONS] [FILE]..."
282#define xz_full_usage "\n\n" \
283 "Decompress FILE (or stdin)\n" \
284 "\nOptions:" \
285 "\n -d Decompress" \
286 "\n -c Write to stdout" \
287 "\n -f Force" \
288
289#define xzcat_trivial_usage \
290 "FILE"
291#define xzcat_full_usage "\n\n" \
292 "Decompress to stdout"
293
272#define cal_trivial_usage \ 294#define cal_trivial_usage \
273 "[-jy] [[MONTH] YEAR]" 295 "[-jy] [[MONTH] YEAR]"
274#define cal_full_usage "\n\n" \ 296#define cal_full_usage "\n\n" \