aboutsummaryrefslogtreecommitdiff
path: root/archival/libarchive/unxz
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-11-03 02:38:31 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-11-03 02:38:31 +0100
commit833d4e7f84f59099ee66eabfa3457ebb7d37eaa8 (patch)
tree3be84e1049707ce8077291065fe3689497c69b9c /archival/libarchive/unxz
parent5e9934028aa030312a1a2e2e32d5ceade8672beb (diff)
downloadbusybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.tar.gz
busybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.tar.bz2
busybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.zip
rename archival/libunarchive -> archival/libarchive; move bz/ into it
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'archival/libarchive/unxz')
-rw-r--r--archival/libarchive/unxz/README135
-rw-r--r--archival/libarchive/unxz/xz.h271
-rw-r--r--archival/libarchive/unxz/xz_config.h123
-rw-r--r--archival/libarchive/unxz/xz_dec_bcj.c564
-rw-r--r--archival/libarchive/unxz/xz_dec_lzma2.c1175
-rw-r--r--archival/libarchive/unxz/xz_dec_stream.c822
-rw-r--r--archival/libarchive/unxz/xz_lzma2.h204
-rw-r--r--archival/libarchive/unxz/xz_private.h159
-rw-r--r--archival/libarchive/unxz/xz_stream.h57
9 files changed, 3510 insertions, 0 deletions
diff --git a/archival/libarchive/unxz/README b/archival/libarchive/unxz/README
new file mode 100644
index 000000000..c5972f6b8
--- /dev/null
+++ b/archival/libarchive/unxz/README
@@ -0,0 +1,135 @@
1
2XZ Embedded
3===========
4
5 XZ Embedded is a relatively small, limited implementation of the .xz
6 file format. Currently only decoding is implemented.
7
8 XZ Embedded was written for use in the Linux kernel, but the code can
9 be easily used in other environments too, including regular userspace
10 applications.
11
12 This README contains information that is useful only when the copy
13 of XZ Embedded isn't part of the Linux kernel tree. You should also
14 read linux/Documentation/xz.txt even if you aren't using XZ Embedded
15 as part of Linux; information in that file is not repeated in this
16 README.
17
18Compiling the Linux kernel module
19
20 The xz_dec module depends on crc32 module, so make sure that you have
21 it enabled (CONFIG_CRC32).
22
23 Building the xz_dec and xz_dec_test modules without support for BCJ
24 filters:
25
26 cd linux/lib/xz
27 make -C /path/to/kernel/source \
28 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
29 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m
30
31 Building the xz_dec and xz_dec_test modules with support for BCJ
32 filters:
33
34 cd linux/lib/xz
35 make -C /path/to/kernel/source \
36 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
37 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \
38 CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
39 CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
40 CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
41
42 If you want only one or a few of the BCJ filters, omit the appropriate
43 variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
44 code shared between all BCJ filters.
45
46 Most people don't need the xz_dec_test module. You can skip building
47 it by omitting CONFIG_XZ_DEC_TEST=m from the make command line.
48
49Compiler requirements
50
51 XZ Embedded should compile as either GNU-C89 (used in the Linux
52 kernel) or with any C99 compiler. Getting the code to compile with
53 non-GNU C89 compiler or a C++ compiler should be quite easy as
54 long as there is a data type for unsigned 64-bit integer (or the
55 code is modified not to support large files, which needs some more
56 care than just using 32-bit integer instead of 64-bit).
57
58 If you use GCC, try to use a recent version. For example, on x86,
59 xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when
60 compiled with GCC 4.3.3.
61
62Embedding into userspace applications
63
64 To embed the XZ decoder, copy the following files into a single
65 directory in your source code tree:
66
67 linux/include/linux/xz.h
68 linux/lib/xz/xz_crc32.c
69 linux/lib/xz/xz_dec_lzma2.c
70 linux/lib/xz/xz_dec_stream.c
71 linux/lib/xz/xz_lzma2.h
72 linux/lib/xz/xz_private.h
73 linux/lib/xz/xz_stream.h
74 userspace/xz_config.h
75
76 Alternatively, xz.h may be placed into a different directory but then
77 that directory must be in the compiler include path when compiling
78 the .c files.
79
80 Your code should use only the functions declared in xz.h. The rest of
81 the .h files are meant only for internal use in XZ Embedded.
82
83 You may want to modify xz_config.h to be more suitable for your build
84 environment. Probably you should at least skim through it even if the
85 default file works as is.
86
87BCJ filter support
88
89 If you want support for one or more BCJ filters, you need to copy also
90 linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
91 #defines in xz_config.h or in compiler flags. You don't need these
92 #defines in the code that just uses XZ Embedded via xz.h, but having
93 them always #defined doesn't hurt either.
94
95 #define Instruction set BCJ filter endianness
96 XZ_DEC_X86 x86 or x86-64 Little endian only
97 XZ_DEC_POWERPC PowerPC Big endian only
98 XZ_DEC_IA64 Itanium (IA-64) Big or little endian
99 XZ_DEC_ARM ARM Little endian only
100 XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
101 XZ_DEC_SPARC SPARC Big or little endian
102
103 While some architectures are (partially) bi-endian, the endianness
104 setting doesn't change the endianness of the instructions on all
105 architectures. That's why Itanium and SPARC filters work for both big
106 and little endian executables (Itanium has little endian instructions
107 and SPARC has big endian instructions).
108
109 There currently is no filter for little endian PowerPC or big endian
110 ARM or ARM-Thumb. Implementing filters for them can be considered if
111 there is a need for such filters in real-world applications.
112
113Notes about shared libraries
114
115 If you are including XZ Embedded into a shared library, you very
116 probably should rename the xz_* functions to prevent symbol
117 conflicts in case your library is linked against some other library
118 or application that also has XZ Embedded in it (which may even be
119 a different version of XZ Embedded). TODO: Provide an easy way
120 to do this.
121
122 Please don't create a shared library of XZ Embedded itself unless
123 it is fine to rebuild everything depending on that shared library
124 everytime you upgrade to a newer version of XZ Embedded. There are
125 no API or ABI stability guarantees between different versions of
126 XZ Embedded.
127
128Specifying the calling convention
129
130 XZ_FUNC macro was included to support declaring functions with __init
131 in Linux. Outside Linux, it can be used to specify the calling
132 convention on systems that support multiple calling conventions.
133 For example, on Windows, you may make all functions use the stdcall
134 calling convention by defining XZ_FUNC=__stdcall when building and
135 using the functions from XZ Embedded.
diff --git a/archival/libarchive/unxz/xz.h b/archival/libarchive/unxz/xz.h
new file mode 100644
index 000000000..c6c071c4a
--- /dev/null
+++ b/archival/libarchive/unxz/xz.h
@@ -0,0 +1,271 @@
1/*
2 * XZ decompressor
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_H
12#define XZ_H
13
14#ifdef __KERNEL__
15# include <linux/stddef.h>
16# include <linux/types.h>
17#else
18# include <stddef.h>
19# include <stdint.h>
20#endif
21
22/* In Linux, this is used to make extern functions static when needed. */
23#ifndef XZ_EXTERN
24# define XZ_EXTERN extern
25#endif
26
27/* In Linux, this is used to mark the functions with __init when needed. */
28#ifndef XZ_FUNC
29# define XZ_FUNC
30#endif
31
32/**
33 * enum xz_mode - Operation mode
34 *
35 * @XZ_SINGLE: Single-call mode. This uses less RAM than
36 * than multi-call modes, because the LZMA2
37 * dictionary doesn't need to be allocated as
38 * part of the decoder state. All required data
39 * structures are allocated at initialization,
40 * so xz_dec_run() cannot return XZ_MEM_ERROR.
41 * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2
42 * dictionary buffer. All data structures are
43 * allocated at initialization, so xz_dec_run()
44 * cannot return XZ_MEM_ERROR.
45 * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is
46 * allocated once the required size has been
47 * parsed from the stream headers. If the
48 * allocation fails, xz_dec_run() will return
49 * XZ_MEM_ERROR.
50 *
51 * It is possible to enable support only for a subset of the above
52 * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
53 * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
54 * with support for all operation modes, but the preboot code may
55 * be built with fewer features to minimize code size.
56 */
57enum xz_mode {
58 XZ_SINGLE,
59 XZ_PREALLOC,
60 XZ_DYNALLOC
61};
62
63/**
64 * enum xz_ret - Return codes
65 * @XZ_OK: Everything is OK so far. More input or more
66 * output space is required to continue. This
67 * return code is possible only in multi-call mode
68 * (XZ_PREALLOC or XZ_DYNALLOC).
69 * @XZ_STREAM_END: Operation finished successfully.
70 * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
71 * is still possible in multi-call mode by simply
72 * calling xz_dec_run() again.
73 * NOTE: This return value is used only if
74 * XZ_DEC_ANY_CHECK was defined at build time,
75 * which is not used in the kernel. Unsupported
76 * check types return XZ_OPTIONS_ERROR if
77 * XZ_DEC_ANY_CHECK was not defined at build time.
78 * @XZ_MEM_ERROR: Allocating memory failed. This return code is
79 * possible only if the decoder was initialized
80 * with XZ_DYNALLOC. The amount of memory that was
81 * tried to be allocated was no more than the
82 * dict_max argument given to xz_dec_init().
83 * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
84 * allowed by the dict_max argument given to
85 * xz_dec_init(). This return value is possible
86 * only in multi-call mode (XZ_PREALLOC or
87 * XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
88 * ignores the dict_max argument.
89 * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
90 * bytes).
91 * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
92 * compression options. In the decoder this means
93 * that the header CRC32 matches, but the header
94 * itself specifies something that we don't support.
95 * @XZ_DATA_ERROR: Compressed data is corrupt.
96 * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
97 * different between multi-call and single-call
98 * mode; more information below.
99 *
100 * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
101 * to XZ code cannot consume any input and cannot produce any new output.
102 * This happens when there is no new input available, or the output buffer
103 * is full while at least one output byte is still pending. Assuming your
104 * code is not buggy, you can get this error only when decoding a compressed
105 * stream that is truncated or otherwise corrupt.
106 *
107 * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
108 * is too small, or the compressed input is corrupt in a way that makes the
109 * decoder produce more output than the caller expected. When it is
110 * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
111 * is used instead of XZ_BUF_ERROR.
112 */
113enum xz_ret {
114 XZ_OK,
115 XZ_STREAM_END,
116 XZ_UNSUPPORTED_CHECK,
117 XZ_MEM_ERROR,
118 XZ_MEMLIMIT_ERROR,
119 XZ_FORMAT_ERROR,
120 XZ_OPTIONS_ERROR,
121 XZ_DATA_ERROR,
122 XZ_BUF_ERROR
123};
124
125/**
126 * struct xz_buf - Passing input and output buffers to XZ code
127 * @in: Beginning of the input buffer. This may be NULL if and only
128 * if in_pos is equal to in_size.
129 * @in_pos: Current position in the input buffer. This must not exceed
130 * in_size.
131 * @in_size: Size of the input buffer
132 * @out: Beginning of the output buffer. This may be NULL if and only
133 * if out_pos is equal to out_size.
134 * @out_pos: Current position in the output buffer. This must not exceed
135 * out_size.
136 * @out_size: Size of the output buffer
137 *
138 * Only the contents of the output buffer from out[out_pos] onward, and
139 * the variables in_pos and out_pos are modified by the XZ code.
140 */
141struct xz_buf {
142 const uint8_t *in;
143 size_t in_pos;
144 size_t in_size;
145
146 uint8_t *out;
147 size_t out_pos;
148 size_t out_size;
149};
150
151/**
152 * struct xz_dec - Opaque type to hold the XZ decoder state
153 */
154struct xz_dec;
155
156/**
157 * xz_dec_init() - Allocate and initialize a XZ decoder state
158 * @mode: Operation mode
159 * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
160 * multi-call decoding. This is ignored in single-call mode
161 * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
162 * or 2^n + 2^(n-1) bytes (the latter sizes are less common
163 * in practice), so other values for dict_max don't make sense.
164 * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
165 * 512 KiB, and 1 MiB are probably the only reasonable values,
166 * except for kernel and initramfs images where a bigger
167 * dictionary can be fine and useful.
168 *
169 * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
170 * once. The caller must provide enough output space or the decoding will
171 * fail. The output space is used as the dictionary buffer, which is why
172 * there is no need to allocate the dictionary as part of the decoder's
173 * internal state.
174 *
175 * Because the output buffer is used as the workspace, streams encoded using
176 * a big dictionary are not a problem in single-call mode. It is enough that
177 * the output buffer is big enough to hold the actual uncompressed data; it
178 * can be smaller than the dictionary size stored in the stream headers.
179 *
180 * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
181 * of memory is preallocated for the LZMA2 dictionary. This way there is no
182 * risk that xz_dec_run() could run out of memory, since xz_dec_run() will
183 * never allocate any memory. Instead, if the preallocated dictionary is too
184 * small for decoding the given input stream, xz_dec_run() will return
185 * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
186 * decoded to avoid allocating excessive amount of memory for the dictionary.
187 *
188 * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
189 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
190 * may allocate once it has parsed the dictionary size from the stream
191 * headers. This way excessive allocations can be avoided while still
192 * limiting the maximum memory usage to a sane value to prevent running the
193 * system out of memory when decompressing streams from untrusted sources.
194 *
195 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
196 * ready to be used with xz_dec_run(). If memory allocation fails,
197 * xz_dec_init() returns NULL.
198 */
199XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
200 enum xz_mode mode, uint32_t dict_max);
201
202/**
203 * xz_dec_run() - Run the XZ decoder
204 * @s: Decoder state allocated using xz_dec_init()
205 * @b: Input and output buffers
206 *
207 * The possible return values depend on build options and operation mode.
208 * See enum xz_ret for details.
209 *
210 * NOTE: If an error occurs in single-call mode (return value is not
211 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the
212 * contents of the output buffer from b->out[b->out_pos] onward are
213 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
214 * chains, there may be a second pass over the output buffer, and this pass
215 * cannot be properly done if the output buffer is truncated. Thus, you
216 * cannot give the single-call decoder a too small buffer and then expect to
217 * get that amount valid data from the beginning of the stream. You must use
218 * the multi-call decoder if you don't want to uncompress the whole stream.
219 */
220XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b);
221
222/**
223 * xz_dec_reset() - Reset an already allocated decoder state
224 * @s: Decoder state allocated using xz_dec_init()
225 *
226 * This function can be used to reset the multi-call decoder state without
227 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
228 *
229 * In single-call mode, xz_dec_reset() is always called in the beginning of
230 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
231 * multi-call mode.
232 */
233XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s);
234
235/**
236 * xz_dec_end() - Free the memory allocated for the decoder state
237 * @s: Decoder state allocated using xz_dec_init(). If s is NULL,
238 * this function does nothing.
239 */
240XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
241
242/*
243 * Standalone build (userspace build or in-kernel build for boot time use)
244 * needs a CRC32 implementation. For normal in-kernel use, kernel's own
245 * CRC32 module is used instead, and users of this module don't need to
246 * care about the functions below.
247 */
248#ifndef XZ_INTERNAL_CRC32
249# ifdef __KERNEL__
250# define XZ_INTERNAL_CRC32 0
251# else
252# define XZ_INTERNAL_CRC32 1
253# endif
254#endif
255
256#if XZ_INTERNAL_CRC32
257/*
258 * This must be called before any other xz_* function to initialize
259 * the CRC32 lookup table.
260 */
261XZ_EXTERN void XZ_FUNC xz_crc32_init(void);
262
263/*
264 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
265 * calculation, the third argument must be zero. To continue the calculation,
266 * the previously returned value is passed as the third argument.
267 */
268XZ_EXTERN uint32_t XZ_FUNC xz_crc32(
269 const uint8_t *buf, size_t size, uint32_t crc);
270#endif
271#endif
diff --git a/archival/libarchive/unxz/xz_config.h b/archival/libarchive/unxz/xz_config.h
new file mode 100644
index 000000000..187e1cbed
--- /dev/null
+++ b/archival/libarchive/unxz/xz_config.h
@@ -0,0 +1,123 @@
1/*
2 * Private includes and definitions for userspace use of XZ Embedded
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_CONFIG_H
11#define XZ_CONFIG_H
12
13/* Uncomment as needed to enable BCJ filter decoders. */
14/* #define XZ_DEC_X86 */
15/* #define XZ_DEC_POWERPC */
16/* #define XZ_DEC_IA64 */
17/* #define XZ_DEC_ARM */
18/* #define XZ_DEC_ARMTHUMB */
19/* #define XZ_DEC_SPARC */
20
21#include <stdbool.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "xz.h"
26
27#define kmalloc(size, flags) malloc(size)
28#define kfree(ptr) free(ptr)
29#define vmalloc(size) malloc(size)
30#define vfree(ptr) free(ptr)
31
32#define memeq(a, b, size) (memcmp(a, b, size) == 0)
33#define memzero(buf, size) memset(buf, 0, size)
34
35#undef min
36#undef min_t
37#define min(x, y) ((x) < (y) ? (x) : (y))
38#define min_t(type, x, y) min(x, y)
39
40/*
41 * Some functions have been marked with __always_inline to keep the
42 * performance reasonable even when the compiler is optimizing for
43 * small code size. You may be able to save a few bytes by #defining
44 * __always_inline to plain inline, but don't complain if the code
45 * becomes slow.
46 *
47 * NOTE: System headers on GNU/Linux may #define this macro already,
48 * so if you want to change it, you need to #undef it first.
49 */
50#ifndef __always_inline
51# ifdef __GNUC__
52# define __always_inline \
53 inline __attribute__((__always_inline__))
54# else
55# define __always_inline inline
56# endif
57#endif
58
59/*
60 * Some functions are marked to never be inlined to reduce stack usage.
61 * If you don't care about stack usage, you may want to modify this so
62 * that noinline_for_stack is #defined to be empty even when using GCC.
63 * Doing so may save a few bytes in binary size.
64 */
65#ifndef noinline_for_stack
66# ifdef __GNUC__
67# define noinline_for_stack __attribute__((__noinline__))
68# else
69# define noinline_for_stack
70# endif
71#endif
72
73/* Inline functions to access unaligned unsigned 32-bit integers */
74#ifndef get_unaligned_le32
75static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf)
76{
77 return (uint32_t)buf[0]
78 | ((uint32_t)buf[1] << 8)
79 | ((uint32_t)buf[2] << 16)
80 | ((uint32_t)buf[3] << 24);
81}
82#endif
83
84#ifndef get_unaligned_be32
85static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf)
86{
87 return (uint32_t)(buf[0] << 24)
88 | ((uint32_t)buf[1] << 16)
89 | ((uint32_t)buf[2] << 8)
90 | (uint32_t)buf[3];
91}
92#endif
93
94#ifndef put_unaligned_le32
95static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf)
96{
97 buf[0] = (uint8_t)val;
98 buf[1] = (uint8_t)(val >> 8);
99 buf[2] = (uint8_t)(val >> 16);
100 buf[3] = (uint8_t)(val >> 24);
101}
102#endif
103
104#ifndef put_unaligned_be32
105static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
106{
107 buf[0] = (uint8_t)(val >> 24);
108 buf[1] = (uint8_t)(val >> 16);
109 buf[2] = (uint8_t)(val >> 8);
110 buf[3] = (uint8_t)val;
111}
112#endif
113
114/*
115 * Use get_unaligned_le32() also for aligned access for simplicity. On
116 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
117 * could save a few bytes in code size.
118 */
119#ifndef get_le32
120# define get_le32 get_unaligned_le32
121#endif
122
123#endif
diff --git a/archival/libarchive/unxz/xz_dec_bcj.c b/archival/libarchive/unxz/xz_dec_bcj.c
new file mode 100644
index 000000000..09162b51f
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_bcj.c
@@ -0,0 +1,564 @@
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13/*
14 * The rest of the file is inside this ifdef. It makes things a little more
15 * convenient when building without support for any BCJ filters.
16 */
17#ifdef XZ_DEC_BCJ
18
19struct xz_dec_bcj {
20 /* Type of the BCJ filter being used */
21 enum {
22 BCJ_X86 = 4, /* x86 or x86-64 */
23 BCJ_POWERPC = 5, /* Big endian only */
24 BCJ_IA64 = 6, /* Big or little endian */
25 BCJ_ARM = 7, /* Little endian only */
26 BCJ_ARMTHUMB = 8, /* Little endian only */
27 BCJ_SPARC = 9 /* Big or little endian */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76};
77
78#ifdef XZ_DEC_X86
79/*
80 * This is macro used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
83#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
84
85static noinline_for_stack size_t XZ_FUNC bcj_x86(
86 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
87{
88 static const bool mask_to_allowed_status[8]
89 = { true, true, true, false, true, false, false, false };
90
91 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
92
93 size_t i;
94 size_t prev_pos = (size_t)-1;
95 uint32_t prev_mask = s->x86_prev_mask;
96 uint32_t src;
97 uint32_t dest;
98 uint32_t j;
99 uint8_t b;
100
101 if (size <= 4)
102 return 0;
103
104 size -= 4;
105 for (i = 0; i < size; ++i) {
106 if ((buf[i] & 0xFE) != 0xE8)
107 continue;
108
109 prev_pos = i - prev_pos;
110 if (prev_pos > 3) {
111 prev_mask = 0;
112 } else {
113 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
114 if (prev_mask != 0) {
115 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
116 if (!mask_to_allowed_status[prev_mask]
117 || bcj_x86_test_msbyte(b)) {
118 prev_pos = i;
119 prev_mask = (prev_mask << 1) | 1;
120 continue;
121 }
122 }
123 }
124
125 prev_pos = i;
126
127 if (bcj_x86_test_msbyte(buf[i + 4])) {
128 src = get_unaligned_le32(buf + i + 1);
129 while (true) {
130 dest = src - (s->pos + (uint32_t)i + 5);
131 if (prev_mask == 0)
132 break;
133
134 j = mask_to_bit_num[prev_mask] * 8;
135 b = (uint8_t)(dest >> (24 - j));
136 if (!bcj_x86_test_msbyte(b))
137 break;
138
139 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
140 }
141
142 dest &= 0x01FFFFFF;
143 dest |= (uint32_t)0 - (dest & 0x01000000);
144 put_unaligned_le32(dest, buf + i + 1);
145 i += 4;
146 } else {
147 prev_mask = (prev_mask << 1) | 1;
148 }
149 }
150
151 prev_pos = i - prev_pos;
152 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
153 return i;
154}
155#endif
156
157#ifdef XZ_DEC_POWERPC
158static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
159 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
160{
161 size_t i;
162 uint32_t instr;
163
164 for (i = 0; i + 4 <= size; i += 4) {
165 instr = get_unaligned_be32(buf + i);
166 if ((instr & 0xFC000003) == 0x48000001) {
167 instr &= 0x03FFFFFC;
168 instr -= s->pos + (uint32_t)i;
169 instr &= 0x03FFFFFC;
170 instr |= 0x48000001;
171 put_unaligned_be32(instr, buf + i);
172 }
173 }
174
175 return i;
176}
177#endif
178
179#ifdef XZ_DEC_IA64
180static noinline_for_stack size_t XZ_FUNC bcj_ia64(
181 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183 static const uint8_t branch_table[32] = {
184 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 4, 4, 6, 6, 0, 0, 7, 7,
187 4, 4, 0, 0, 4, 4, 0, 0
188 };
189
190 /*
191 * The local variables take a little bit stack space, but it's less
192 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193 * stack usage here without doing that for the LZMA2 decoder too.
194 */
195
196 /* Loop counters */
197 size_t i;
198 size_t j;
199
200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201 uint32_t slot;
202
203 /* Bitwise offset of the instruction indicated by slot */
204 uint32_t bit_pos;
205
206 /* bit_pos split into byte and bit parts */
207 uint32_t byte_pos;
208 uint32_t bit_res;
209
210 /* Address part of an instruction */
211 uint32_t addr;
212
213 /* Mask used to detect which instructions to convert */
214 uint32_t mask;
215
216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
217 uint64_t instr;
218
219 /* Instruction normalized with bit_res for easier manipulation */
220 uint64_t norm;
221
222 for (i = 0; i + 16 <= size; i += 16) {
223 mask = branch_table[buf[i] & 0x1F];
224 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225 if (((mask >> slot) & 1) == 0)
226 continue;
227
228 byte_pos = bit_pos >> 3;
229 bit_res = bit_pos & 7;
230 instr = 0;
231 for (j = 0; j < 6; ++j)
232 instr |= (uint64_t)(buf[i + j + byte_pos])
233 << (8 * j);
234
235 norm = instr >> bit_res;
236
237 if (((norm >> 37) & 0x0F) == 0x05
238 && ((norm >> 9) & 0x07) == 0) {
239 addr = (norm >> 13) & 0x0FFFFF;
240 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241 addr <<= 4;
242 addr -= s->pos + (uint32_t)i;
243 addr >>= 4;
244
245 norm &= ~((uint64_t)0x8FFFFF << 13);
246 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247 norm |= (uint64_t)(addr & 0x100000)
248 << (36 - 20);
249
250 instr &= (1 << bit_res) - 1;
251 instr |= norm << bit_res;
252
253 for (j = 0; j < 6; j++)
254 buf[i + j + byte_pos]
255 = (uint8_t)(instr >> (8 * j));
256 }
257 }
258 }
259
260 return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static noinline_for_stack size_t XZ_FUNC bcj_arm(
266 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
267{
268 size_t i;
269 uint32_t addr;
270
271 for (i = 0; i + 4 <= size; i += 4) {
272 if (buf[i + 3] == 0xEB) {
273 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
274 | ((uint32_t)buf[i + 2] << 16);
275 addr <<= 2;
276 addr -= s->pos + (uint32_t)i + 8;
277 addr >>= 2;
278 buf[i] = (uint8_t)addr;
279 buf[i + 1] = (uint8_t)(addr >> 8);
280 buf[i + 2] = (uint8_t)(addr >> 16);
281 }
282 }
283
284 return i;
285}
286#endif
287
288#ifdef XZ_DEC_ARMTHUMB
289static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
290 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
291{
292 size_t i;
293 uint32_t addr;
294
295 for (i = 0; i + 4 <= size; i += 2) {
296 if ((buf[i + 1] & 0xF8) == 0xF0
297 && (buf[i + 3] & 0xF8) == 0xF8) {
298 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
299 | ((uint32_t)buf[i] << 11)
300 | (((uint32_t)buf[i + 3] & 0x07) << 8)
301 | (uint32_t)buf[i + 2];
302 addr <<= 1;
303 addr -= s->pos + (uint32_t)i + 4;
304 addr >>= 1;
305 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
306 buf[i] = (uint8_t)(addr >> 11);
307 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
308 buf[i + 2] = (uint8_t)addr;
309 i += 2;
310 }
311 }
312
313 return i;
314}
315#endif
316
317#ifdef XZ_DEC_SPARC
318static noinline_for_stack size_t XZ_FUNC bcj_sparc(
319 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
320{
321 size_t i;
322 uint32_t instr;
323
324 for (i = 0; i + 4 <= size; i += 4) {
325 instr = get_unaligned_be32(buf + i);
326 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
327 instr <<= 2;
328 instr -= s->pos + (uint32_t)i;
329 instr >>= 2;
330 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
331 | 0x40000000 | (instr & 0x3FFFFF);
332 put_unaligned_be32(instr, buf + i);
333 }
334 }
335
336 return i;
337}
338#endif
339
340/*
341 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
342 * of data that got filtered.
343 *
344 * NOTE: This is implemented as a switch statement to avoid using function
345 * pointers, which could be problematic in the kernel boot code, which must
346 * avoid pointers to static data (at least on x86).
347 */
348static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
349 uint8_t *buf, size_t *pos, size_t size)
350{
351 size_t filtered;
352
353 buf += *pos;
354 size -= *pos;
355
356 switch (s->type) {
357#ifdef XZ_DEC_X86
358 case BCJ_X86:
359 filtered = bcj_x86(s, buf, size);
360 break;
361#endif
362#ifdef XZ_DEC_POWERPC
363 case BCJ_POWERPC:
364 filtered = bcj_powerpc(s, buf, size);
365 break;
366#endif
367#ifdef XZ_DEC_IA64
368 case BCJ_IA64:
369 filtered = bcj_ia64(s, buf, size);
370 break;
371#endif
372#ifdef XZ_DEC_ARM
373 case BCJ_ARM:
374 filtered = bcj_arm(s, buf, size);
375 break;
376#endif
377#ifdef XZ_DEC_ARMTHUMB
378 case BCJ_ARMTHUMB:
379 filtered = bcj_armthumb(s, buf, size);
380 break;
381#endif
382#ifdef XZ_DEC_SPARC
383 case BCJ_SPARC:
384 filtered = bcj_sparc(s, buf, size);
385 break;
386#endif
387 default:
388 /* Never reached but silence compiler warnings. */
389 filtered = 0;
390 break;
391 }
392
393 *pos += filtered;
394 s->pos += filtered;
395}
396
397/*
398 * Flush pending filtered data from temp to the output buffer.
399 * Move the remaining mixture of possibly filtered and unfiltered
400 * data to the beginning of temp.
401 */
402static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
403{
404 size_t copy_size;
405
406 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
407 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
408 b->out_pos += copy_size;
409
410 s->temp.filtered -= copy_size;
411 s->temp.size -= copy_size;
412 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
413}
414
415/*
416 * The BCJ filter functions are primitive in sense that they process the
417 * data in chunks of 1-16 bytes. To hide this issue, this function does
418 * some buffering.
419 */
420XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
421 struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
422{
423 size_t out_start;
424
425 /*
426 * Flush pending already filtered data to the output buffer. Return
427 * immediatelly if we couldn't flush everything, or if the next
428 * filter in the chain had already returned XZ_STREAM_END.
429 */
430 if (s->temp.filtered > 0) {
431 bcj_flush(s, b);
432 if (s->temp.filtered > 0)
433 return XZ_OK;
434
435 if (s->ret == XZ_STREAM_END)
436 return XZ_STREAM_END;
437 }
438
439 /*
440 * If we have more output space than what is currently pending in
441 * temp, copy the unfiltered data from temp to the output buffer
442 * and try to fill the output buffer by decoding more data from the
443 * next filter in the chain. Apply the BCJ filter on the new data
444 * in the output buffer. If everything cannot be filtered, copy it
445 * to temp and rewind the output buffer position accordingly.
446 */
447 if (s->temp.size < b->out_size - b->out_pos) {
448 out_start = b->out_pos;
449 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
450 b->out_pos += s->temp.size;
451
452 s->ret = xz_dec_lzma2_run(lzma2, b);
453 if (s->ret != XZ_STREAM_END
454 && (s->ret != XZ_OK || s->single_call))
455 return s->ret;
456
457 bcj_apply(s, b->out, &out_start, b->out_pos);
458
459 /*
460 * As an exception, if the next filter returned XZ_STREAM_END,
461 * we can do that too, since the last few bytes that remain
462 * unfiltered are meant to remain unfiltered.
463 */
464 if (s->ret == XZ_STREAM_END)
465 return XZ_STREAM_END;
466
467 s->temp.size = b->out_pos - out_start;
468 b->out_pos -= s->temp.size;
469 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
470 }
471
472 /*
473 * If we have unfiltered data in temp, try to fill by decoding more
474 * data from the next filter. Apply the BCJ filter on temp. Then we
475 * hopefully can fill the actual output buffer by copying filtered
476 * data from temp. A mix of filtered and unfiltered data may be left
477 * in temp; it will be taken care on the next call to this function.
478 */
479 if (s->temp.size > 0) {
480 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
481 s->out = b->out;
482 s->out_pos = b->out_pos;
483 s->out_size = b->out_size;
484 b->out = s->temp.buf;
485 b->out_pos = s->temp.size;
486 b->out_size = sizeof(s->temp.buf);
487
488 s->ret = xz_dec_lzma2_run(lzma2, b);
489
490 s->temp.size = b->out_pos;
491 b->out = s->out;
492 b->out_pos = s->out_pos;
493 b->out_size = s->out_size;
494
495 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
496 return s->ret;
497
498 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
499
500 /*
501 * If the next filter returned XZ_STREAM_END, we mark that
502 * everything is filtered, since the last unfiltered bytes
503 * of the stream are meant to be left as is.
504 */
505 if (s->ret == XZ_STREAM_END)
506 s->temp.filtered = s->temp.size;
507
508 bcj_flush(s, b);
509 if (s->temp.filtered > 0)
510 return XZ_OK;
511 }
512
513 return s->ret;
514}
515
516XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
517{
518 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
519 if (s != NULL)
520 s->single_call = single_call;
521
522 return s;
523}
524
525XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
526 struct xz_dec_bcj *s, uint8_t id)
527{
528 switch (id) {
529#ifdef XZ_DEC_X86
530 case BCJ_X86:
531#endif
532#ifdef XZ_DEC_POWERPC
533 case BCJ_POWERPC:
534#endif
535#ifdef XZ_DEC_IA64
536 case BCJ_IA64:
537#endif
538#ifdef XZ_DEC_ARM
539 case BCJ_ARM:
540#endif
541#ifdef XZ_DEC_ARMTHUMB
542 case BCJ_ARMTHUMB:
543#endif
544#ifdef XZ_DEC_SPARC
545 case BCJ_SPARC:
546#endif
547 break;
548
549 default:
550 /* Unsupported Filter ID */
551 return XZ_OPTIONS_ERROR;
552 }
553
554 s->type = id;
555 s->ret = XZ_OK;
556 s->pos = 0;
557 s->x86_prev_mask = 0;
558 s->temp.filtered = 0;
559 s->temp.size = 0;
560
561 return XZ_OK;
562}
563
564#endif
diff --git a/archival/libarchive/unxz/xz_dec_lzma2.c b/archival/libarchive/unxz/xz_dec_lzma2.c
new file mode 100644
index 000000000..da71cb4d4
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_lzma2.c
@@ -0,0 +1,1175 @@
1/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= size_max
38 * allocated <= size
39 *
40 * Most of these variables are size_t to support single-call mode,
41 * in which the dictionary variables address the actual output
42 * buffer directly.
43 */
44struct dictionary {
45 /* Beginning of the history buffer */
46 uint8_t *buf;
47
48 /* Old position in buf (before decoding more data) */
49 size_t start;
50
51 /* Position in buf */
52 size_t pos;
53
54 /*
55 * How full dictionary is. This is used to detect corrupt input that
56 * would read beyond the beginning of the uncompressed stream.
57 */
58 size_t full;
59
60 /* Write limit; we don't write to buf[limit] or later bytes. */
61 size_t limit;
62
63 /*
64 * End of the dictionary buffer. In multi-call mode, this is
65 * the same as the dictionary size. In single-call mode, this
66 * indicates the size of the output buffer.
67 */
68 size_t end;
69
70 /*
71 * Size of the dictionary as specified in Block Header. This is used
72 * together with "full" to detect corrupt input that would make us
73 * read beyond the beginning of the uncompressed stream.
74 */
75 uint32_t size;
76
77 /*
78 * Maximum allowed dictionary size in multi-call mode.
79 * This is ignored in single-call mode.
80 */
81 uint32_t size_max;
82
83 /*
84 * Amount of memory currently allocated for the dictionary.
85 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
86 * size_max is always the same as the allocated size.)
87 */
88 uint32_t allocated;
89
90 /* Operation mode */
91 enum xz_mode mode;
92};
93
94/* Range decoder */
95struct rc_dec {
96 uint32_t range;
97 uint32_t code;
98
99 /*
100 * Number of initializing bytes remaining to be read
101 * by rc_read_init().
102 */
103 uint32_t init_bytes_left;
104
105 /*
106 * Buffer from which we read our input. It can be either
107 * temp.buf or the caller-provided input buffer.
108 */
109 const uint8_t *in;
110 size_t in_pos;
111 size_t in_limit;
112};
113
114/* Probabilities for a length decoder. */
115struct lzma_len_dec {
116 /* Probability of match length being at least 10 */
117 uint16_t choice;
118
119 /* Probability of match length being at least 18 */
120 uint16_t choice2;
121
122 /* Probabilities for match lengths 2-9 */
123 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
124
125 /* Probabilities for match lengths 10-17 */
126 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
127
128 /* Probabilities for match lengths 18-273 */
129 uint16_t high[LEN_HIGH_SYMBOLS];
130};
131
132struct lzma_dec {
133 /* Distances of latest four matches */
134 uint32_t rep0;
135 uint32_t rep1;
136 uint32_t rep2;
137 uint32_t rep3;
138
139 /* Types of the most recently seen LZMA symbols */
140 enum lzma_state state;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /*
149 * LZMA properties or related bit masks (number of literal
150 * context bits, a mask dervied from the number of literal
151 * position bits, and a mask dervied from the number
152 * position bits)
153 */
154 uint32_t lc;
155 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
156 uint32_t pos_mask; /* (1 << pb) - 1 */
157
158 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
159 uint16_t is_match[STATES][POS_STATES_MAX];
160
161 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
162 uint16_t is_rep[STATES];
163
164 /*
165 * If 0, distance of a repeated match is rep0.
166 * Otherwise check is_rep1.
167 */
168 uint16_t is_rep0[STATES];
169
170 /*
171 * If 0, distance of a repeated match is rep1.
172 * Otherwise check is_rep2.
173 */
174 uint16_t is_rep1[STATES];
175
176 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
177 uint16_t is_rep2[STATES];
178
179 /*
180 * If 1, the repeated match has length of one byte. Otherwise
181 * the length is decoded from rep_len_decoder.
182 */
183 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
184
185 /*
186 * Probability tree for the highest two bits of the match
187 * distance. There is a separate probability tree for match
188 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
189 */
190 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
191
192 /*
193 * Probility trees for additional bits for match distance
194 * when the distance is in the range [4, 127].
195 */
196 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
197
198 /*
199 * Probability tree for the lowest four bits of a match
200 * distance that is equal to or greater than 128.
201 */
202 uint16_t dist_align[ALIGN_SIZE];
203
204 /* Length of a normal match */
205 struct lzma_len_dec match_len_dec;
206
207 /* Length of a repeated match */
208 struct lzma_len_dec rep_len_dec;
209
210 /* Probabilities of literals */
211 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
212};
213
214struct lzma2_dec {
215 /* Position in xz_dec_lzma2_run(). */
216 enum lzma2_seq {
217 SEQ_CONTROL,
218 SEQ_UNCOMPRESSED_1,
219 SEQ_UNCOMPRESSED_2,
220 SEQ_COMPRESSED_0,
221 SEQ_COMPRESSED_1,
222 SEQ_PROPERTIES,
223 SEQ_LZMA_PREPARE,
224 SEQ_LZMA_RUN,
225 SEQ_COPY
226 } sequence;
227
228 /* Next position after decoding the compressed size of the chunk. */
229 enum lzma2_seq next_sequence;
230
231 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
232 uint32_t uncompressed;
233
234 /*
235 * Compressed size of LZMA chunk or compressed/uncompressed
236 * size of uncompressed chunk (64 KiB at maximum)
237 */
238 uint32_t compressed;
239
240 /*
241 * True if dictionary reset is needed. This is false before
242 * the first chunk (LZMA or uncompressed).
243 */
244 bool need_dict_reset;
245
246 /*
247 * True if new LZMA properties are needed. This is false
248 * before the first LZMA chunk.
249 */
250 bool need_props;
251};
252
253struct xz_dec_lzma2 {
254 /*
255 * The order below is important on x86 to reduce code size and
256 * it shouldn't hurt on other platforms. Everything up to and
257 * including lzma.pos_mask are in the first 128 bytes on x86-32,
258 * which allows using smaller instructions to access those
259 * variables. On x86-64, fewer variables fit into the first 128
260 * bytes, but this is still the best order without sacrificing
261 * the readability by splitting the structures.
262 */
263 struct rc_dec rc;
264 struct dictionary dict;
265 struct lzma2_dec lzma2;
266 struct lzma_dec lzma;
267
268 /*
269 * Temporary buffer which holds small number of input bytes between
270 * decoder calls. See lzma2_lzma() for details.
271 */
272 struct {
273 uint32_t size;
274 uint8_t buf[3 * LZMA_IN_REQUIRED];
275 } temp;
276};
277
278/**************
279 * Dictionary *
280 **************/
281
282/*
283 * Reset the dictionary state. When in single-call mode, set up the beginning
284 * of the dictionary to point to the actual output buffer.
285 */
286static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
287{
288 if (DEC_IS_SINGLE(dict->mode)) {
289 dict->buf = b->out + b->out_pos;
290 dict->end = b->out_size - b->out_pos;
291 }
292
293 dict->start = 0;
294 dict->pos = 0;
295 dict->limit = 0;
296 dict->full = 0;
297}
298
299/* Set dictionary write limit */
300static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max)
301{
302 if (dict->end - dict->pos <= out_max)
303 dict->limit = dict->end;
304 else
305 dict->limit = dict->pos + out_max;
306}
307
308/* Return true if at least one byte can be written into the dictionary. */
309static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict)
310{
311 return dict->pos < dict->limit;
312}
313
314/*
315 * Get a byte from the dictionary at the given distance. The distance is
316 * assumed to valid, or as a special case, zero when the dictionary is
317 * still empty. This special case is needed for single-call decoding to
318 * avoid writing a '\0' to the end of the destination buffer.
319 */
320static __always_inline uint32_t XZ_FUNC dict_get(
321 const struct dictionary *dict, uint32_t dist)
322{
323 size_t offset = dict->pos - dist - 1;
324
325 if (dist >= dict->pos)
326 offset += dict->end;
327
328 return dict->full > 0 ? dict->buf[offset] : 0;
329}
330
331/*
332 * Put one byte into the dictionary. It is assumed that there is space for it.
333 */
334static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte)
335{
336 dict->buf[dict->pos++] = byte;
337
338 if (dict->full < dict->pos)
339 dict->full = dict->pos;
340}
341
342/*
343 * Repeat given number of bytes from the given distance. If the distance is
344 * invalid, false is returned. On success, true is returned and *len is
345 * updated to indicate how many bytes were left to be repeated.
346 */
347static bool XZ_FUNC dict_repeat(
348 struct dictionary *dict, uint32_t *len, uint32_t dist)
349{
350 size_t back;
351 uint32_t left;
352
353 if (dist >= dict->full || dist >= dict->size)
354 return false;
355
356 left = min_t(size_t, dict->limit - dict->pos, *len);
357 *len -= left;
358
359 back = dict->pos - dist - 1;
360 if (dist >= dict->pos)
361 back += dict->end;
362
363 do {
364 dict->buf[dict->pos++] = dict->buf[back++];
365 if (back == dict->end)
366 back = 0;
367 } while (--left > 0);
368
369 if (dict->full < dict->pos)
370 dict->full = dict->pos;
371
372 return true;
373}
374
375/* Copy uncompressed data as is from input to dictionary and output buffers. */
376static void XZ_FUNC dict_uncompressed(
377 struct dictionary *dict, struct xz_buf *b, uint32_t *left)
378{
379 size_t copy_size;
380
381 while (*left > 0 && b->in_pos < b->in_size
382 && b->out_pos < b->out_size) {
383 copy_size = min(b->in_size - b->in_pos,
384 b->out_size - b->out_pos);
385 if (copy_size > dict->end - dict->pos)
386 copy_size = dict->end - dict->pos;
387 if (copy_size > *left)
388 copy_size = *left;
389
390 *left -= copy_size;
391
392 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
393 dict->pos += copy_size;
394
395 if (dict->full < dict->pos)
396 dict->full = dict->pos;
397
398 if (DEC_IS_MULTI(dict->mode)) {
399 if (dict->pos == dict->end)
400 dict->pos = 0;
401
402 memcpy(b->out + b->out_pos, b->in + b->in_pos,
403 copy_size);
404 }
405
406 dict->start = dict->pos;
407
408 b->out_pos += copy_size;
409 b->in_pos += copy_size;
410
411 }
412}
413
414/*
415 * Flush pending data from dictionary to b->out. It is assumed that there is
416 * enough space in b->out. This is guaranteed because caller uses dict_limit()
417 * before decoding data into the dictionary.
418 */
419static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b)
420{
421 size_t copy_size = dict->pos - dict->start;
422
423 if (DEC_IS_MULTI(dict->mode)) {
424 if (dict->pos == dict->end)
425 dict->pos = 0;
426
427 memcpy(b->out + b->out_pos, dict->buf + dict->start,
428 copy_size);
429 }
430
431 dict->start = dict->pos;
432 b->out_pos += copy_size;
433 return copy_size;
434}
435
436/*****************
437 * Range decoder *
438 *****************/
439
440/* Reset the range decoder. */
441static void XZ_FUNC rc_reset(struct rc_dec *rc)
442{
443 rc->range = (uint32_t)-1;
444 rc->code = 0;
445 rc->init_bytes_left = RC_INIT_BYTES;
446}
447
448/*
449 * Read the first five initial bytes into rc->code if they haven't been
450 * read already. (Yes, the first byte gets completely ignored.)
451 */
452static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b)
453{
454 while (rc->init_bytes_left > 0) {
455 if (b->in_pos == b->in_size)
456 return false;
457
458 rc->code = (rc->code << 8) + b->in[b->in_pos++];
459 --rc->init_bytes_left;
460 }
461
462 return true;
463}
464
465/* Return true if there may not be enough input for the next decoding loop. */
466static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc)
467{
468 return rc->in_pos > rc->in_limit;
469}
470
471/*
472 * Return true if it is possible (from point of view of range decoder) that
473 * we have reached the end of the LZMA chunk.
474 */
475static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc)
476{
477 return rc->code == 0;
478}
479
480/* Read the next input byte if needed. */
481static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc)
482{
483 if (rc->range < RC_TOP_VALUE) {
484 rc->range <<= RC_SHIFT_BITS;
485 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
486 }
487}
488
489/*
490 * Decode one bit. In some versions, this function has been splitted in three
491 * functions so that the compiler is supposed to be able to more easily avoid
492 * an extra branch. In this particular version of the LZMA decoder, this
493 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
494 * on x86). Using a non-splitted version results in nicer looking code too.
495 *
496 * NOTE: This must return an int. Do not make it return a bool or the speed
497 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
498 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
499 */
500static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob)
501{
502 uint32_t bound;
503 int bit;
504
505 rc_normalize(rc);
506 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
507 if (rc->code < bound) {
508 rc->range = bound;
509 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
510 bit = 0;
511 } else {
512 rc->range -= bound;
513 rc->code -= bound;
514 *prob -= *prob >> RC_MOVE_BITS;
515 bit = 1;
516 }
517
518 return bit;
519}
520
521/* Decode a bittree starting from the most significant bit. */
522static __always_inline uint32_t XZ_FUNC rc_bittree(
523 struct rc_dec *rc, uint16_t *probs, uint32_t limit)
524{
525 uint32_t symbol = 1;
526
527 do {
528 if (rc_bit(rc, &probs[symbol]))
529 symbol = (symbol << 1) + 1;
530 else
531 symbol <<= 1;
532 } while (symbol < limit);
533
534 return symbol;
535}
536
537/* Decode a bittree starting from the least significant bit. */
538static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc,
539 uint16_t *probs, uint32_t *dest, uint32_t limit)
540{
541 uint32_t symbol = 1;
542 uint32_t i = 0;
543
544 do {
545 if (rc_bit(rc, &probs[symbol])) {
546 symbol = (symbol << 1) + 1;
547 *dest += 1 << i;
548 } else {
549 symbol <<= 1;
550 }
551 } while (++i < limit);
552}
553
554/* Decode direct bits (fixed fifty-fifty probability) */
555static inline void XZ_FUNC rc_direct(
556 struct rc_dec *rc, uint32_t *dest, uint32_t limit)
557{
558 uint32_t mask;
559
560 do {
561 rc_normalize(rc);
562 rc->range >>= 1;
563 rc->code -= rc->range;
564 mask = (uint32_t)0 - (rc->code >> 31);
565 rc->code += rc->range & mask;
566 *dest = (*dest << 1) + (mask + 1);
567 } while (--limit > 0);
568}
569
570/********
571 * LZMA *
572 ********/
573
574/* Get pointer to literal coder probability array. */
575static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s)
576{
577 uint32_t prev_byte = dict_get(&s->dict, 0);
578 uint32_t low = prev_byte >> (8 - s->lzma.lc);
579 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
580 return s->lzma.literal[low + high];
581}
582
583/* Decode a literal (one 8-bit byte) */
584static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s)
585{
586 uint16_t *probs;
587 uint32_t symbol;
588 uint32_t match_byte;
589 uint32_t match_bit;
590 uint32_t offset;
591 uint32_t i;
592
593 probs = lzma_literal_probs(s);
594
595 if (lzma_state_is_literal(s->lzma.state)) {
596 symbol = rc_bittree(&s->rc, probs, 0x100);
597 } else {
598 symbol = 1;
599 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
600 offset = 0x100;
601
602 do {
603 match_bit = match_byte & offset;
604 match_byte <<= 1;
605 i = offset + match_bit + symbol;
606
607 if (rc_bit(&s->rc, &probs[i])) {
608 symbol = (symbol << 1) + 1;
609 offset &= match_bit;
610 } else {
611 symbol <<= 1;
612 offset &= ~match_bit;
613 }
614 } while (symbol < 0x100);
615 }
616
617 dict_put(&s->dict, (uint8_t)symbol);
618 lzma_state_literal(&s->lzma.state);
619}
620
621/* Decode the length of the match into s->lzma.len. */
622static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
623 uint32_t pos_state)
624{
625 uint16_t *probs;
626 uint32_t limit;
627
628 if (!rc_bit(&s->rc, &l->choice)) {
629 probs = l->low[pos_state];
630 limit = LEN_LOW_SYMBOLS;
631 s->lzma.len = MATCH_LEN_MIN;
632 } else {
633 if (!rc_bit(&s->rc, &l->choice2)) {
634 probs = l->mid[pos_state];
635 limit = LEN_MID_SYMBOLS;
636 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
637 } else {
638 probs = l->high;
639 limit = LEN_HIGH_SYMBOLS;
640 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
641 + LEN_MID_SYMBOLS;
642 }
643 }
644
645 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
646}
647
648/* Decode a match. The distance will be stored in s->lzma.rep0. */
649static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
650{
651 uint16_t *probs;
652 uint32_t dist_slot;
653 uint32_t limit;
654
655 lzma_state_match(&s->lzma.state);
656
657 s->lzma.rep3 = s->lzma.rep2;
658 s->lzma.rep2 = s->lzma.rep1;
659 s->lzma.rep1 = s->lzma.rep0;
660
661 lzma_len(s, &s->lzma.match_len_dec, pos_state);
662
663 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
664 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
665
666 if (dist_slot < DIST_MODEL_START) {
667 s->lzma.rep0 = dist_slot;
668 } else {
669 limit = (dist_slot >> 1) - 1;
670 s->lzma.rep0 = 2 + (dist_slot & 1);
671
672 if (dist_slot < DIST_MODEL_END) {
673 s->lzma.rep0 <<= limit;
674 probs = s->lzma.dist_special + s->lzma.rep0
675 - dist_slot - 1;
676 rc_bittree_reverse(&s->rc, probs,
677 &s->lzma.rep0, limit);
678 } else {
679 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
680 s->lzma.rep0 <<= ALIGN_BITS;
681 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
682 &s->lzma.rep0, ALIGN_BITS);
683 }
684 }
685}
686
687/*
688 * Decode a repeated match. The distance is one of the four most recently
689 * seen matches. The distance will be stored in s->lzma.rep0.
690 */
691static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
692{
693 uint32_t tmp;
694
695 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
696 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
697 s->lzma.state][pos_state])) {
698 lzma_state_short_rep(&s->lzma.state);
699 s->lzma.len = 1;
700 return;
701 }
702 } else {
703 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
704 tmp = s->lzma.rep1;
705 } else {
706 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
707 tmp = s->lzma.rep2;
708 } else {
709 tmp = s->lzma.rep3;
710 s->lzma.rep3 = s->lzma.rep2;
711 }
712
713 s->lzma.rep2 = s->lzma.rep1;
714 }
715
716 s->lzma.rep1 = s->lzma.rep0;
717 s->lzma.rep0 = tmp;
718 }
719
720 lzma_state_long_rep(&s->lzma.state);
721 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
722}
723
724/* LZMA decoder core */
725static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s)
726{
727 uint32_t pos_state;
728
729 /*
730 * If the dictionary was reached during the previous call, try to
731 * finish the possibly pending repeat in the dictionary.
732 */
733 if (dict_has_space(&s->dict) && s->lzma.len > 0)
734 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
735
736 /*
737 * Decode more LZMA symbols. One iteration may consume up to
738 * LZMA_IN_REQUIRED - 1 bytes.
739 */
740 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
741 pos_state = s->dict.pos & s->lzma.pos_mask;
742
743 if (!rc_bit(&s->rc, &s->lzma.is_match[
744 s->lzma.state][pos_state])) {
745 lzma_literal(s);
746 } else {
747 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
748 lzma_rep_match(s, pos_state);
749 else
750 lzma_match(s, pos_state);
751
752 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
753 return false;
754 }
755 }
756
757 /*
758 * Having the range decoder always normalized when we are outside
759 * this function makes it easier to correctly handle end of the chunk.
760 */
761 rc_normalize(&s->rc);
762
763 return true;
764}
765
766/*
767 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
768 * here, because LZMA state may be reset without resetting the dictionary.
769 */
770static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s)
771{
772 uint16_t *probs;
773 size_t i;
774
775 s->lzma.state = STATE_LIT_LIT;
776 s->lzma.rep0 = 0;
777 s->lzma.rep1 = 0;
778 s->lzma.rep2 = 0;
779 s->lzma.rep3 = 0;
780
781 /*
782 * All probabilities are initialized to the same value. This hack
783 * makes the code smaller by avoiding a separate loop for each
784 * probability array.
785 *
786 * This could be optimized so that only that part of literal
787 * probabilities that are actually required. In the common case
788 * we would write 12 KiB less.
789 */
790 probs = s->lzma.is_match[0];
791 for (i = 0; i < PROBS_TOTAL; ++i)
792 probs[i] = RC_BIT_MODEL_TOTAL / 2;
793
794 rc_reset(&s->rc);
795}
796
797/*
798 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
799 * from the decoded lp and pb values. On success, the LZMA decoder state is
800 * reset and true is returned.
801 */
802static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
803{
804 if (props > (4 * 5 + 4) * 9 + 8)
805 return false;
806
807 s->lzma.pos_mask = 0;
808 while (props >= 9 * 5) {
809 props -= 9 * 5;
810 ++s->lzma.pos_mask;
811 }
812
813 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
814
815 s->lzma.literal_pos_mask = 0;
816 while (props >= 9) {
817 props -= 9;
818 ++s->lzma.literal_pos_mask;
819 }
820
821 s->lzma.lc = props;
822
823 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
824 return false;
825
826 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
827
828 lzma_reset(s);
829
830 return true;
831}
832
833/*********
834 * LZMA2 *
835 *********/
836
837/*
838 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
839 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
840 * wrapper function takes care of making the LZMA decoder's assumption safe.
841 *
842 * As long as there is plenty of input left to be decoded in the current LZMA
843 * chunk, we decode directly from the caller-supplied input buffer until
844 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
845 * s->temp.buf, which (hopefully) gets filled on the next call to this
846 * function. We decode a few bytes from the temporary buffer so that we can
847 * continue decoding from the caller-supplied input buffer again.
848 */
849static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
850{
851 size_t in_avail;
852 uint32_t tmp;
853
854 in_avail = b->in_size - b->in_pos;
855 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
856 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
857 if (tmp > s->lzma2.compressed - s->temp.size)
858 tmp = s->lzma2.compressed - s->temp.size;
859 if (tmp > in_avail)
860 tmp = in_avail;
861
862 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
863
864 if (s->temp.size + tmp == s->lzma2.compressed) {
865 memzero(s->temp.buf + s->temp.size + tmp,
866 sizeof(s->temp.buf)
867 - s->temp.size - tmp);
868 s->rc.in_limit = s->temp.size + tmp;
869 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
870 s->temp.size += tmp;
871 b->in_pos += tmp;
872 return true;
873 } else {
874 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
875 }
876
877 s->rc.in = s->temp.buf;
878 s->rc.in_pos = 0;
879
880 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
881 return false;
882
883 s->lzma2.compressed -= s->rc.in_pos;
884
885 if (s->rc.in_pos < s->temp.size) {
886 s->temp.size -= s->rc.in_pos;
887 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
888 s->temp.size);
889 return true;
890 }
891
892 b->in_pos += s->rc.in_pos - s->temp.size;
893 s->temp.size = 0;
894 }
895
896 in_avail = b->in_size - b->in_pos;
897 if (in_avail >= LZMA_IN_REQUIRED) {
898 s->rc.in = b->in;
899 s->rc.in_pos = b->in_pos;
900
901 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
902 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
903 else
904 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
905
906 if (!lzma_main(s))
907 return false;
908
909 in_avail = s->rc.in_pos - b->in_pos;
910 if (in_avail > s->lzma2.compressed)
911 return false;
912
913 s->lzma2.compressed -= in_avail;
914 b->in_pos = s->rc.in_pos;
915 }
916
917 in_avail = b->in_size - b->in_pos;
918 if (in_avail < LZMA_IN_REQUIRED) {
919 if (in_avail > s->lzma2.compressed)
920 in_avail = s->lzma2.compressed;
921
922 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
923 s->temp.size = in_avail;
924 b->in_pos += in_avail;
925 }
926
927 return true;
928}
929
930/*
931 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
932 * decoding or copying of uncompressed chunks to other functions.
933 */
934XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run(
935 struct xz_dec_lzma2 *s, struct xz_buf *b)
936{
937 uint32_t tmp;
938
939 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
940 switch (s->lzma2.sequence) {
941 case SEQ_CONTROL:
942 /*
943 * LZMA2 control byte
944 *
945 * Exact values:
946 * 0x00 End marker
947 * 0x01 Dictionary reset followed by
948 * an uncompressed chunk
949 * 0x02 Uncompressed chunk (no dictionary reset)
950 *
951 * Highest three bits (s->control & 0xE0):
952 * 0xE0 Dictionary reset, new properties and state
953 * reset, followed by LZMA compressed chunk
954 * 0xC0 New properties and state reset, followed
955 * by LZMA compressed chunk (no dictionary
956 * reset)
957 * 0xA0 State reset using old properties,
958 * followed by LZMA compressed chunk (no
959 * dictionary reset)
960 * 0x80 LZMA chunk (no dictionary or state reset)
961 *
962 * For LZMA compressed chunks, the lowest five bits
963 * (s->control & 1F) are the highest bits of the
964 * uncompressed size (bits 16-20).
965 *
966 * A new LZMA2 stream must begin with a dictionary
967 * reset. The first LZMA chunk must set new
968 * properties and reset the LZMA state.
969 *
970 * Values that don't match anything described above
971 * are invalid and we return XZ_DATA_ERROR.
972 */
973 tmp = b->in[b->in_pos++];
974
975 if (tmp >= 0xE0 || tmp == 0x01) {
976 s->lzma2.need_props = true;
977 s->lzma2.need_dict_reset = false;
978 dict_reset(&s->dict, b);
979 } else if (s->lzma2.need_dict_reset) {
980 return XZ_DATA_ERROR;
981 }
982
983 if (tmp >= 0x80) {
984 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
985 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
986
987 if (tmp >= 0xC0) {
988 /*
989 * When there are new properties,
990 * state reset is done at
991 * SEQ_PROPERTIES.
992 */
993 s->lzma2.need_props = false;
994 s->lzma2.next_sequence
995 = SEQ_PROPERTIES;
996
997 } else if (s->lzma2.need_props) {
998 return XZ_DATA_ERROR;
999
1000 } else {
1001 s->lzma2.next_sequence
1002 = SEQ_LZMA_PREPARE;
1003 if (tmp >= 0xA0)
1004 lzma_reset(s);
1005 }
1006 } else {
1007 if (tmp == 0x00)
1008 return XZ_STREAM_END;
1009
1010 if (tmp > 0x02)
1011 return XZ_DATA_ERROR;
1012
1013 s->lzma2.sequence = SEQ_COMPRESSED_0;
1014 s->lzma2.next_sequence = SEQ_COPY;
1015 }
1016
1017 break;
1018
1019 case SEQ_UNCOMPRESSED_1:
1020 s->lzma2.uncompressed
1021 += (uint32_t)b->in[b->in_pos++] << 8;
1022 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1023 break;
1024
1025 case SEQ_UNCOMPRESSED_2:
1026 s->lzma2.uncompressed
1027 += (uint32_t)b->in[b->in_pos++] + 1;
1028 s->lzma2.sequence = SEQ_COMPRESSED_0;
1029 break;
1030
1031 case SEQ_COMPRESSED_0:
1032 s->lzma2.compressed
1033 = (uint32_t)b->in[b->in_pos++] << 8;
1034 s->lzma2.sequence = SEQ_COMPRESSED_1;
1035 break;
1036
1037 case SEQ_COMPRESSED_1:
1038 s->lzma2.compressed
1039 += (uint32_t)b->in[b->in_pos++] + 1;
1040 s->lzma2.sequence = s->lzma2.next_sequence;
1041 break;
1042
1043 case SEQ_PROPERTIES:
1044 if (!lzma_props(s, b->in[b->in_pos++]))
1045 return XZ_DATA_ERROR;
1046
1047 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1048
1049 case SEQ_LZMA_PREPARE:
1050 if (s->lzma2.compressed < RC_INIT_BYTES)
1051 return XZ_DATA_ERROR;
1052
1053 if (!rc_read_init(&s->rc, b))
1054 return XZ_OK;
1055
1056 s->lzma2.compressed -= RC_INIT_BYTES;
1057 s->lzma2.sequence = SEQ_LZMA_RUN;
1058
1059 case SEQ_LZMA_RUN:
1060 /*
1061 * Set dictionary limit to indicate how much we want
1062 * to be encoded at maximum. Decode new data into the
1063 * dictionary. Flush the new data from dictionary to
1064 * b->out. Check if we finished decoding this chunk.
1065 * In case the dictionary got full but we didn't fill
1066 * the output buffer yet, we may run this loop
1067 * multiple times without changing s->lzma2.sequence.
1068 */
1069 dict_limit(&s->dict, min_t(size_t,
1070 b->out_size - b->out_pos,
1071 s->lzma2.uncompressed));
1072 if (!lzma2_lzma(s, b))
1073 return XZ_DATA_ERROR;
1074
1075 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1076
1077 if (s->lzma2.uncompressed == 0) {
1078 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1079 || !rc_is_finished(&s->rc))
1080 return XZ_DATA_ERROR;
1081
1082 rc_reset(&s->rc);
1083 s->lzma2.sequence = SEQ_CONTROL;
1084
1085 } else if (b->out_pos == b->out_size
1086 || (b->in_pos == b->in_size
1087 && s->temp.size
1088 < s->lzma2.compressed)) {
1089 return XZ_OK;
1090 }
1091
1092 break;
1093
1094 case SEQ_COPY:
1095 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1096 if (s->lzma2.compressed > 0)
1097 return XZ_OK;
1098
1099 s->lzma2.sequence = SEQ_CONTROL;
1100 break;
1101 }
1102 }
1103
1104 return XZ_OK;
1105}
1106
1107XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
1108 enum xz_mode mode, uint32_t dict_max)
1109{
1110 struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1111 if (s == NULL)
1112 return NULL;
1113
1114 s->dict.mode = mode;
1115 s->dict.size_max = dict_max;
1116
1117 if (DEC_IS_PREALLOC(mode)) {
1118 s->dict.buf = vmalloc(dict_max);
1119 if (s->dict.buf == NULL) {
1120 kfree(s);
1121 return NULL;
1122 }
1123 } else if (DEC_IS_DYNALLOC(mode)) {
1124 s->dict.buf = NULL;
1125 s->dict.allocated = 0;
1126 }
1127
1128 return s;
1129}
1130
1131XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
1132 struct xz_dec_lzma2 *s, uint8_t props)
1133{
1134 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1135 if (props > 39)
1136 return XZ_OPTIONS_ERROR;
1137
1138 s->dict.size = 2 + (props & 1);
1139 s->dict.size <<= (props >> 1) + 11;
1140
1141 if (DEC_IS_MULTI(s->dict.mode)) {
1142 if (s->dict.size > s->dict.size_max)
1143 return XZ_MEMLIMIT_ERROR;
1144
1145 s->dict.end = s->dict.size;
1146
1147 if (DEC_IS_DYNALLOC(s->dict.mode)) {
1148 if (s->dict.allocated < s->dict.size) {
1149 vfree(s->dict.buf);
1150 s->dict.buf = vmalloc(s->dict.size);
1151 if (s->dict.buf == NULL) {
1152 s->dict.allocated = 0;
1153 return XZ_MEM_ERROR;
1154 }
1155 }
1156 }
1157 }
1158
1159 s->lzma.len = 0;
1160
1161 s->lzma2.sequence = SEQ_CONTROL;
1162 s->lzma2.need_dict_reset = true;
1163
1164 s->temp.size = 0;
1165
1166 return XZ_OK;
1167}
1168
1169XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1170{
1171 if (DEC_IS_MULTI(s->dict.mode))
1172 vfree(s->dict.buf);
1173
1174 kfree(s);
1175}
diff --git a/archival/libarchive/unxz/xz_dec_stream.c b/archival/libarchive/unxz/xz_dec_stream.c
new file mode 100644
index 000000000..bdcbf1ba3
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_stream.c
@@ -0,0 +1,822 @@
1/*
2 * .xz Stream decoder
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include "xz_private.h"
11#include "xz_stream.h"
12
13/* Hash used to validate the Index field */
14struct xz_dec_hash {
15 vli_type unpadded;
16 vli_type uncompressed;
17 uint32_t crc32;
18};
19
20struct xz_dec {
21 /* Position in dec_main() */
22 enum {
23 SEQ_STREAM_HEADER,
24 SEQ_BLOCK_START,
25 SEQ_BLOCK_HEADER,
26 SEQ_BLOCK_UNCOMPRESS,
27 SEQ_BLOCK_PADDING,
28 SEQ_BLOCK_CHECK,
29 SEQ_INDEX,
30 SEQ_INDEX_PADDING,
31 SEQ_INDEX_CRC32,
32 SEQ_STREAM_FOOTER
33 } sequence;
34
35 /* Position in variable-length integers and Check fields */
36 uint32_t pos;
37
38 /* Variable-length integer decoded by dec_vli() */
39 vli_type vli;
40
41 /* Saved in_pos and out_pos */
42 size_t in_start;
43 size_t out_start;
44
45 /* CRC32 value in Block or Index */
46 uint32_t crc32;
47
48 /* Type of the integrity check calculated from uncompressed data */
49 enum xz_check check_type;
50
51 /* Operation mode */
52 enum xz_mode mode;
53
54 /*
55 * True if the next call to xz_dec_run() is allowed to return
56 * XZ_BUF_ERROR.
57 */
58 bool allow_buf_error;
59
60 /* Information stored in Block Header */
61 struct {
62 /*
63 * Value stored in the Compressed Size field, or
64 * VLI_UNKNOWN if Compressed Size is not present.
65 */
66 vli_type compressed;
67
68 /*
69 * Value stored in the Uncompressed Size field, or
70 * VLI_UNKNOWN if Uncompressed Size is not present.
71 */
72 vli_type uncompressed;
73
74 /* Size of the Block Header field */
75 uint32_t size;
76 } block_header;
77
78 /* Information collected when decoding Blocks */
79 struct {
80 /* Observed compressed size of the current Block */
81 vli_type compressed;
82
83 /* Observed uncompressed size of the current Block */
84 vli_type uncompressed;
85
86 /* Number of Blocks decoded so far */
87 vli_type count;
88
89 /*
90 * Hash calculated from the Block sizes. This is used to
91 * validate the Index field.
92 */
93 struct xz_dec_hash hash;
94 } block;
95
96 /* Variables needed when verifying the Index field */
97 struct {
98 /* Position in dec_index() */
99 enum {
100 SEQ_INDEX_COUNT,
101 SEQ_INDEX_UNPADDED,
102 SEQ_INDEX_UNCOMPRESSED
103 } sequence;
104
105 /* Size of the Index in bytes */
106 vli_type size;
107
108 /* Number of Records (matches block.count in valid files) */
109 vli_type count;
110
111 /*
112 * Hash calculated from the Records (matches block.hash in
113 * valid files).
114 */
115 struct xz_dec_hash hash;
116 } index;
117
118 /*
119 * Temporary buffer needed to hold Stream Header, Block Header,
120 * and Stream Footer. The Block Header is the biggest (1 KiB)
121 * so we reserve space according to that. buf[] has to be aligned
122 * to a multiple of four bytes; the size_t variables before it
123 * should guarantee this.
124 */
125 struct {
126 size_t pos;
127 size_t size;
128 uint8_t buf[1024];
129 } temp;
130
131 struct xz_dec_lzma2 *lzma2;
132
133#ifdef XZ_DEC_BCJ
134 struct xz_dec_bcj *bcj;
135 bool bcj_active;
136#endif
137};
138
139#ifdef XZ_DEC_ANY_CHECK
140/* Sizes of the Check field with different Check IDs */
141static const uint8_t check_sizes[16] = {
142 0,
143 4, 4, 4,
144 8, 8, 8,
145 16, 16, 16,
146 32, 32, 32,
147 64, 64, 64
148};
149#endif
150
151/*
152 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
153 * must have set s->temp.pos to indicate how much data we are supposed
154 * to copy into s->temp.buf. Return true once s->temp.pos has reached
155 * s->temp.size.
156 */
157static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b)
158{
159 size_t copy_size = min_t(size_t,
160 b->in_size - b->in_pos, s->temp.size - s->temp.pos);
161
162 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
163 b->in_pos += copy_size;
164 s->temp.pos += copy_size;
165
166 if (s->temp.pos == s->temp.size) {
167 s->temp.pos = 0;
168 return true;
169 }
170
171 return false;
172}
173
174/* Decode a variable-length integer (little-endian base-128 encoding) */
175static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s,
176 const uint8_t *in, size_t *in_pos, size_t in_size)
177{
178 uint8_t byte;
179
180 if (s->pos == 0)
181 s->vli = 0;
182
183 while (*in_pos < in_size) {
184 byte = in[*in_pos];
185 ++*in_pos;
186
187 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
188
189 if ((byte & 0x80) == 0) {
190 /* Don't allow non-minimal encodings. */
191 if (byte == 0 && s->pos != 0)
192 return XZ_DATA_ERROR;
193
194 s->pos = 0;
195 return XZ_STREAM_END;
196 }
197
198 s->pos += 7;
199 if (s->pos == 7 * VLI_BYTES_MAX)
200 return XZ_DATA_ERROR;
201 }
202
203 return XZ_OK;
204}
205
206/*
207 * Decode the Compressed Data field from a Block. Update and validate
208 * the observed compressed and uncompressed sizes of the Block so that
209 * they don't exceed the values possibly stored in the Block Header
210 * (validation assumes that no integer overflow occurs, since vli_type
211 * is normally uint64_t). Update the CRC32 if presence of the CRC32
212 * field was indicated in Stream Header.
213 *
214 * Once the decoding is finished, validate that the observed sizes match
215 * the sizes possibly stored in the Block Header. Update the hash and
216 * Block count, which are later used to validate the Index field.
217 */
218static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
219{
220 enum xz_ret ret;
221
222 s->in_start = b->in_pos;
223 s->out_start = b->out_pos;
224
225#ifdef XZ_DEC_BCJ
226 if (s->bcj_active)
227 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
228 else
229#endif
230 ret = xz_dec_lzma2_run(s->lzma2, b);
231
232 s->block.compressed += b->in_pos - s->in_start;
233 s->block.uncompressed += b->out_pos - s->out_start;
234
235 /*
236 * There is no need to separately check for VLI_UNKNOWN, since
237 * the observed sizes are always smaller than VLI_UNKNOWN.
238 */
239 if (s->block.compressed > s->block_header.compressed
240 || s->block.uncompressed
241 > s->block_header.uncompressed)
242 return XZ_DATA_ERROR;
243
244 if (s->check_type == XZ_CHECK_CRC32)
245 s->crc32 = xz_crc32(b->out + s->out_start,
246 b->out_pos - s->out_start, s->crc32);
247
248 if (ret == XZ_STREAM_END) {
249 if (s->block_header.compressed != VLI_UNKNOWN
250 && s->block_header.compressed
251 != s->block.compressed)
252 return XZ_DATA_ERROR;
253
254 if (s->block_header.uncompressed != VLI_UNKNOWN
255 && s->block_header.uncompressed
256 != s->block.uncompressed)
257 return XZ_DATA_ERROR;
258
259 s->block.hash.unpadded += s->block_header.size
260 + s->block.compressed;
261
262#ifdef XZ_DEC_ANY_CHECK
263 s->block.hash.unpadded += check_sizes[s->check_type];
264#else
265 if (s->check_type == XZ_CHECK_CRC32)
266 s->block.hash.unpadded += 4;
267#endif
268
269 s->block.hash.uncompressed += s->block.uncompressed;
270 s->block.hash.crc32 = xz_crc32(
271 (const uint8_t *)&s->block.hash,
272 sizeof(s->block.hash), s->block.hash.crc32);
273
274 ++s->block.count;
275 }
276
277 return ret;
278}
279
280/* Update the Index size and the CRC32 value. */
281static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
282{
283 size_t in_used = b->in_pos - s->in_start;
284 s->index.size += in_used;
285 s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
286}
287
288/*
289 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
290 * fields from the Index field. That is, Index Padding and CRC32 are not
291 * decoded by this function.
292 *
293 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
294 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
295 */
296static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
297{
298 enum xz_ret ret;
299
300 do {
301 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
302 if (ret != XZ_STREAM_END) {
303 index_update(s, b);
304 return ret;
305 }
306
307 switch (s->index.sequence) {
308 case SEQ_INDEX_COUNT:
309 s->index.count = s->vli;
310
311 /*
312 * Validate that the Number of Records field
313 * indicates the same number of Records as
314 * there were Blocks in the Stream.
315 */
316 if (s->index.count != s->block.count)
317 return XZ_DATA_ERROR;
318
319 s->index.sequence = SEQ_INDEX_UNPADDED;
320 break;
321
322 case SEQ_INDEX_UNPADDED:
323 s->index.hash.unpadded += s->vli;
324 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
325 break;
326
327 case SEQ_INDEX_UNCOMPRESSED:
328 s->index.hash.uncompressed += s->vli;
329 s->index.hash.crc32 = xz_crc32(
330 (const uint8_t *)&s->index.hash,
331 sizeof(s->index.hash),
332 s->index.hash.crc32);
333 --s->index.count;
334 s->index.sequence = SEQ_INDEX_UNPADDED;
335 break;
336 }
337 } while (s->index.count > 0);
338
339 return XZ_STREAM_END;
340}
341
342/*
343 * Validate that the next four input bytes match the value of s->crc32.
344 * s->pos must be zero when starting to validate the first byte.
345 */
346static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
347{
348 do {
349 if (b->in_pos == b->in_size)
350 return XZ_OK;
351
352 if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
353 return XZ_DATA_ERROR;
354
355 s->pos += 8;
356
357 } while (s->pos < 32);
358
359 s->crc32 = 0;
360 s->pos = 0;
361
362 return XZ_STREAM_END;
363}
364
365#ifdef XZ_DEC_ANY_CHECK
366/*
367 * Skip over the Check field when the Check ID is not supported.
368 * Returns true once the whole Check field has been skipped over.
369 */
370static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b)
371{
372 while (s->pos < check_sizes[s->check_type]) {
373 if (b->in_pos == b->in_size)
374 return false;
375
376 ++b->in_pos;
377 ++s->pos;
378 }
379
380 s->pos = 0;
381
382 return true;
383}
384#endif
385
386/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
387static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
388{
389 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
390 return XZ_FORMAT_ERROR;
391
392 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
393 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
394 return XZ_DATA_ERROR;
395
396 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
397 return XZ_OPTIONS_ERROR;
398
399 /*
400 * Of integrity checks, we support only none (Check ID = 0) and
401 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
402 * we will accept other check types too, but then the check won't
403 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
404 */
405 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
406
407#ifdef XZ_DEC_ANY_CHECK
408 if (s->check_type > XZ_CHECK_MAX)
409 return XZ_OPTIONS_ERROR;
410
411 if (s->check_type > XZ_CHECK_CRC32)
412 return XZ_UNSUPPORTED_CHECK;
413#else
414 if (s->check_type > XZ_CHECK_CRC32)
415 return XZ_OPTIONS_ERROR;
416#endif
417
418 return XZ_OK;
419}
420
421/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
422static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
423{
424 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
425 return XZ_DATA_ERROR;
426
427 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
428 return XZ_DATA_ERROR;
429
430 /*
431 * Validate Backward Size. Note that we never added the size of the
432 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
433 * instead of s->index.size / 4 - 1.
434 */
435 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
436 return XZ_DATA_ERROR;
437
438 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
439 return XZ_DATA_ERROR;
440
441 /*
442 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
443 * for the caller.
444 */
445 return XZ_STREAM_END;
446}
447
448/* Decode the Block Header and initialize the filter chain. */
449static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
450{
451 enum xz_ret ret;
452
453 /*
454 * Validate the CRC32. We know that the temp buffer is at least
455 * eight bytes so this is safe.
456 */
457 s->temp.size -= 4;
458 if (xz_crc32(s->temp.buf, s->temp.size, 0)
459 != get_le32(s->temp.buf + s->temp.size))
460 return XZ_DATA_ERROR;
461
462 s->temp.pos = 2;
463
464 /*
465 * Catch unsupported Block Flags. We support only one or two filters
466 * in the chain, so we catch that with the same test.
467 */
468#ifdef XZ_DEC_BCJ
469 if (s->temp.buf[1] & 0x3E)
470#else
471 if (s->temp.buf[1] & 0x3F)
472#endif
473 return XZ_OPTIONS_ERROR;
474
475 /* Compressed Size */
476 if (s->temp.buf[1] & 0x40) {
477 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
478 != XZ_STREAM_END)
479 return XZ_DATA_ERROR;
480
481 s->block_header.compressed = s->vli;
482 } else {
483 s->block_header.compressed = VLI_UNKNOWN;
484 }
485
486 /* Uncompressed Size */
487 if (s->temp.buf[1] & 0x80) {
488 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
489 != XZ_STREAM_END)
490 return XZ_DATA_ERROR;
491
492 s->block_header.uncompressed = s->vli;
493 } else {
494 s->block_header.uncompressed = VLI_UNKNOWN;
495 }
496
497#ifdef XZ_DEC_BCJ
498 /* If there are two filters, the first one must be a BCJ filter. */
499 s->bcj_active = s->temp.buf[1] & 0x01;
500 if (s->bcj_active) {
501 if (s->temp.size - s->temp.pos < 2)
502 return XZ_OPTIONS_ERROR;
503
504 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
505 if (ret != XZ_OK)
506 return ret;
507
508 /*
509 * We don't support custom start offset,
510 * so Size of Properties must be zero.
511 */
512 if (s->temp.buf[s->temp.pos++] != 0x00)
513 return XZ_OPTIONS_ERROR;
514 }
515#endif
516
517 /* Valid Filter Flags always take at least two bytes. */
518 if (s->temp.size - s->temp.pos < 2)
519 return XZ_DATA_ERROR;
520
521 /* Filter ID = LZMA2 */
522 if (s->temp.buf[s->temp.pos++] != 0x21)
523 return XZ_OPTIONS_ERROR;
524
525 /* Size of Properties = 1-byte Filter Properties */
526 if (s->temp.buf[s->temp.pos++] != 0x01)
527 return XZ_OPTIONS_ERROR;
528
529 /* Filter Properties contains LZMA2 dictionary size. */
530 if (s->temp.size - s->temp.pos < 1)
531 return XZ_DATA_ERROR;
532
533 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
534 if (ret != XZ_OK)
535 return ret;
536
537 /* The rest must be Header Padding. */
538 while (s->temp.pos < s->temp.size)
539 if (s->temp.buf[s->temp.pos++] != 0x00)
540 return XZ_OPTIONS_ERROR;
541
542 s->temp.pos = 0;
543 s->block.compressed = 0;
544 s->block.uncompressed = 0;
545
546 return XZ_OK;
547}
548
549static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
550{
551 enum xz_ret ret;
552
553 /*
554 * Store the start position for the case when we are in the middle
555 * of the Index field.
556 */
557 s->in_start = b->in_pos;
558
559 while (true) {
560 switch (s->sequence) {
561 case SEQ_STREAM_HEADER:
562 /*
563 * Stream Header is copied to s->temp, and then
564 * decoded from there. This way if the caller
565 * gives us only little input at a time, we can
566 * still keep the Stream Header decoding code
567 * simple. Similar approach is used in many places
568 * in this file.
569 */
570 if (!fill_temp(s, b))
571 return XZ_OK;
572
573 /*
574 * If dec_stream_header() returns
575 * XZ_UNSUPPORTED_CHECK, it is still possible
576 * to continue decoding if working in multi-call
577 * mode. Thus, update s->sequence before calling
578 * dec_stream_header().
579 */
580 s->sequence = SEQ_BLOCK_START;
581
582 ret = dec_stream_header(s);
583 if (ret != XZ_OK)
584 return ret;
585
586 case SEQ_BLOCK_START:
587 /* We need one byte of input to continue. */
588 if (b->in_pos == b->in_size)
589 return XZ_OK;
590
591 /* See if this is the beginning of the Index field. */
592 if (b->in[b->in_pos] == 0) {
593 s->in_start = b->in_pos++;
594 s->sequence = SEQ_INDEX;
595 break;
596 }
597
598 /*
599 * Calculate the size of the Block Header and
600 * prepare to decode it.
601 */
602 s->block_header.size
603 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
604
605 s->temp.size = s->block_header.size;
606 s->temp.pos = 0;
607 s->sequence = SEQ_BLOCK_HEADER;
608
609 case SEQ_BLOCK_HEADER:
610 if (!fill_temp(s, b))
611 return XZ_OK;
612
613 ret = dec_block_header(s);
614 if (ret != XZ_OK)
615 return ret;
616
617 s->sequence = SEQ_BLOCK_UNCOMPRESS;
618
619 case SEQ_BLOCK_UNCOMPRESS:
620 ret = dec_block(s, b);
621 if (ret != XZ_STREAM_END)
622 return ret;
623
624 s->sequence = SEQ_BLOCK_PADDING;
625
626 case SEQ_BLOCK_PADDING:
627 /*
628 * Size of Compressed Data + Block Padding
629 * must be a multiple of four. We don't need
630 * s->block.compressed for anything else
631 * anymore, so we use it here to test the size
632 * of the Block Padding field.
633 */
634 while (s->block.compressed & 3) {
635 if (b->in_pos == b->in_size)
636 return XZ_OK;
637
638 if (b->in[b->in_pos++] != 0)
639 return XZ_DATA_ERROR;
640
641 ++s->block.compressed;
642 }
643
644 s->sequence = SEQ_BLOCK_CHECK;
645
646 case SEQ_BLOCK_CHECK:
647 if (s->check_type == XZ_CHECK_CRC32) {
648 ret = crc32_validate(s, b);
649 if (ret != XZ_STREAM_END)
650 return ret;
651 }
652#ifdef XZ_DEC_ANY_CHECK
653 else if (!check_skip(s, b)) {
654 return XZ_OK;
655 }
656#endif
657
658 s->sequence = SEQ_BLOCK_START;
659 break;
660
661 case SEQ_INDEX:
662 ret = dec_index(s, b);
663 if (ret != XZ_STREAM_END)
664 return ret;
665
666 s->sequence = SEQ_INDEX_PADDING;
667
668 case SEQ_INDEX_PADDING:
669 while ((s->index.size + (b->in_pos - s->in_start))
670 & 3) {
671 if (b->in_pos == b->in_size) {
672 index_update(s, b);
673 return XZ_OK;
674 }
675
676 if (b->in[b->in_pos++] != 0)
677 return XZ_DATA_ERROR;
678 }
679
680 /* Finish the CRC32 value and Index size. */
681 index_update(s, b);
682
683 /* Compare the hashes to validate the Index field. */
684 if (!memeq(&s->block.hash, &s->index.hash,
685 sizeof(s->block.hash)))
686 return XZ_DATA_ERROR;
687
688 s->sequence = SEQ_INDEX_CRC32;
689
690 case SEQ_INDEX_CRC32:
691 ret = crc32_validate(s, b);
692 if (ret != XZ_STREAM_END)
693 return ret;
694
695 s->temp.size = STREAM_HEADER_SIZE;
696 s->sequence = SEQ_STREAM_FOOTER;
697
698 case SEQ_STREAM_FOOTER:
699 if (!fill_temp(s, b))
700 return XZ_OK;
701
702 return dec_stream_footer(s);
703 }
704 }
705
706 /* Never reached */
707}
708
709/*
710 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
711 * multi-call and single-call decoding.
712 *
713 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
714 * are not going to make any progress anymore. This is to prevent the caller
715 * from calling us infinitely when the input file is truncated or otherwise
716 * corrupt. Since zlib-style API allows that the caller fills the input buffer
717 * only when the decoder doesn't produce any new output, we have to be careful
718 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
719 * after the second consecutive call to xz_dec_run() that makes no progress.
720 *
721 * In single-call mode, if we couldn't decode everything and no error
722 * occurred, either the input is truncated or the output buffer is too small.
723 * Since we know that the last input byte never produces any output, we know
724 * that if all the input was consumed and decoding wasn't finished, the file
725 * must be corrupt. Otherwise the output buffer has to be too small or the
726 * file is corrupt in a way that decoding it produces too big output.
727 *
728 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
729 * their original values. This is because with some filter chains there won't
730 * be any valid uncompressed data in the output buffer unless the decoding
731 * actually succeeds (that's the price to pay of using the output buffer as
732 * the workspace).
733 */
734XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b)
735{
736 size_t in_start;
737 size_t out_start;
738 enum xz_ret ret;
739
740 if (DEC_IS_SINGLE(s->mode))
741 xz_dec_reset(s);
742
743 in_start = b->in_pos;
744 out_start = b->out_pos;
745 ret = dec_main(s, b);
746
747 if (DEC_IS_SINGLE(s->mode)) {
748 if (ret == XZ_OK)
749 ret = b->in_pos == b->in_size
750 ? XZ_DATA_ERROR : XZ_BUF_ERROR;
751
752 if (ret != XZ_STREAM_END) {
753 b->in_pos = in_start;
754 b->out_pos = out_start;
755 }
756
757 } else if (ret == XZ_OK && in_start == b->in_pos
758 && out_start == b->out_pos) {
759 if (s->allow_buf_error)
760 ret = XZ_BUF_ERROR;
761
762 s->allow_buf_error = true;
763 } else {
764 s->allow_buf_error = false;
765 }
766
767 return ret;
768}
769
770XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
771 enum xz_mode mode, uint32_t dict_max)
772{
773 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
774 if (s == NULL)
775 return NULL;
776
777 s->mode = mode;
778
779#ifdef XZ_DEC_BCJ
780 s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
781 if (s->bcj == NULL)
782 goto error_bcj;
783#endif
784
785 s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
786 if (s->lzma2 == NULL)
787 goto error_lzma2;
788
789 xz_dec_reset(s);
790 return s;
791
792error_lzma2:
793#ifdef XZ_DEC_BCJ
794 xz_dec_bcj_end(s->bcj);
795error_bcj:
796#endif
797 kfree(s);
798 return NULL;
799}
800
801XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s)
802{
803 s->sequence = SEQ_STREAM_HEADER;
804 s->allow_buf_error = false;
805 s->pos = 0;
806 s->crc32 = 0;
807 memzero(&s->block, sizeof(s->block));
808 memzero(&s->index, sizeof(s->index));
809 s->temp.pos = 0;
810 s->temp.size = STREAM_HEADER_SIZE;
811}
812
813XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s)
814{
815 if (s != NULL) {
816 xz_dec_lzma2_end(s->lzma2);
817#ifdef XZ_DEC_BCJ
818 xz_dec_bcj_end(s->bcj);
819#endif
820 kfree(s);
821 }
822}
diff --git a/archival/libarchive/unxz/xz_lzma2.h b/archival/libarchive/unxz/xz_lzma2.h
new file mode 100644
index 000000000..47f21afbc
--- /dev/null
+++ b/archival/libarchive/unxz/xz_lzma2.h
@@ -0,0 +1,204 @@
1/*
2 * LZMA2 definitions
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_LZMA2_H
12#define XZ_LZMA2_H
13
14/* Range coder constants */
15#define RC_SHIFT_BITS 8
16#define RC_TOP_BITS 24
17#define RC_TOP_VALUE (1 << RC_TOP_BITS)
18#define RC_BIT_MODEL_TOTAL_BITS 11
19#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
20#define RC_MOVE_BITS 5
21
22/*
23 * Maximum number of position states. A position state is the lowest pb
24 * number of bits of the current uncompressed offset. In some places there
25 * are different sets of probabilities for different position states.
26 */
27#define POS_STATES_MAX (1 << 4)
28
29/*
30 * This enum is used to track which LZMA symbols have occurred most recently
31 * and in which order. This information is used to predict the next symbol.
32 *
33 * Symbols:
34 * - Literal: One 8-bit byte
35 * - Match: Repeat a chunk of data at some distance
36 * - Long repeat: Multi-byte match at a recently seen distance
37 * - Short repeat: One-byte repeat at a recently seen distance
38 *
39 * The symbol names are in from STATE_oldest_older_previous. REP means
40 * either short or long repeated match, and NONLIT means any non-literal.
41 */
42enum lzma_state {
43 STATE_LIT_LIT,
44 STATE_MATCH_LIT_LIT,
45 STATE_REP_LIT_LIT,
46 STATE_SHORTREP_LIT_LIT,
47 STATE_MATCH_LIT,
48 STATE_REP_LIT,
49 STATE_SHORTREP_LIT,
50 STATE_LIT_MATCH,
51 STATE_LIT_LONGREP,
52 STATE_LIT_SHORTREP,
53 STATE_NONLIT_MATCH,
54 STATE_NONLIT_REP
55};
56
57/* Total number of states */
58#define STATES 12
59
60/* The lowest 7 states indicate that the previous state was a literal. */
61#define LIT_STATES 7
62
63/* Indicate that the latest symbol was a literal. */
64static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state)
65{
66 if (*state <= STATE_SHORTREP_LIT_LIT)
67 *state = STATE_LIT_LIT;
68 else if (*state <= STATE_LIT_SHORTREP)
69 *state -= 3;
70 else
71 *state -= 6;
72}
73
74/* Indicate that the latest symbol was a match. */
75static inline void XZ_FUNC lzma_state_match(enum lzma_state *state)
76{
77 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
78}
79
80/* Indicate that the latest state was a long repeated match. */
81static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state)
82{
83 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
84}
85
86/* Indicate that the latest symbol was a short match. */
87static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state)
88{
89 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
90}
91
92/* Test if the previous symbol was a literal. */
93static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state)
94{
95 return state < LIT_STATES;
96}
97
98/* Each literal coder is divided in three sections:
99 * - 0x001-0x0FF: Without match byte
100 * - 0x101-0x1FF: With match byte; match bit is 0
101 * - 0x201-0x2FF: With match byte; match bit is 1
102 *
103 * Match byte is used when the previous LZMA symbol was something else than
104 * a literal (that is, it was some kind of match).
105 */
106#define LITERAL_CODER_SIZE 0x300
107
108/* Maximum number of literal coders */
109#define LITERAL_CODERS_MAX (1 << 4)
110
111/* Minimum length of a match is two bytes. */
112#define MATCH_LEN_MIN 2
113
114/* Match length is encoded with 4, 5, or 10 bits.
115 *
116 * Length Bits
117 * 2-9 4 = Choice=0 + 3 bits
118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
120 */
121#define LEN_LOW_BITS 3
122#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
123#define LEN_MID_BITS 3
124#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
125#define LEN_HIGH_BITS 8
126#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
127#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
128
129/*
130 * Maximum length of a match is 273 which is a result of the encoding
131 * described above.
132 */
133#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
134
135/*
136 * Different sets of probabilities are used for match distances that have
137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
138 * set of probabilities for each length. The matches with longer length
139 * use a shared set of probabilities.
140 */
141#define DIST_STATES 4
142
143/*
144 * Get the index of the appropriate probability array for decoding
145 * the distance slot.
146 */
147static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len)
148{
149 return len < DIST_STATES + MATCH_LEN_MIN
150 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
151}
152
153/*
154 * The highest two bits of a 32-bit match distance are encoded using six bits.
155 * This six-bit value is called a distance slot. This way encoding a 32-bit
156 * value takes 6-36 bits, larger values taking more bits.
157 */
158#define DIST_SLOT_BITS 6
159#define DIST_SLOTS (1 << DIST_SLOT_BITS)
160
161/* Match distances up to 127 are fully encoded using probabilities. Since
162 * the highest two bits (distance slot) are always encoded using six bits,
163 * the distances 0-3 don't need any additional bits to encode, since the
164 * distance slot itself is the same as the actual distance. DIST_MODEL_START
165 * indicates the first distance slot where at least one additional bit is
166 * needed.
167 */
168#define DIST_MODEL_START 4
169
170/*
171 * Match distances greater than 127 are encoded in three pieces:
172 * - distance slot: the highest two bits
173 * - direct bits: 2-26 bits below the highest two bits
174 * - alignment bits: four lowest bits
175 *
176 * Direct bits don't use any probabilities.
177 *
178 * The distance slot value of 14 is for distances 128-191.
179 */
180#define DIST_MODEL_END 14
181
182/* Distance slots that indicate a distance <= 127. */
183#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
184#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
185
186/*
187 * For match distances greater than 127, only the highest two bits and the
188 * lowest four bits (alignment) is encoded using probabilities.
189 */
190#define ALIGN_BITS 4
191#define ALIGN_SIZE (1 << ALIGN_BITS)
192#define ALIGN_MASK (ALIGN_SIZE - 1)
193
194/* Total number of all probability variables */
195#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
196
197/*
198 * LZMA remembers the four most recent match distances. Reusing these
199 * distances tends to take less space than re-encoding the actual
200 * distance value.
201 */
202#define REPS 4
203
204#endif
diff --git a/archival/libarchive/unxz/xz_private.h b/archival/libarchive/unxz/xz_private.h
new file mode 100644
index 000000000..145649a83
--- /dev/null
+++ b/archival/libarchive/unxz/xz_private.h
@@ -0,0 +1,159 @@
1/*
2 * Private includes and definitions
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_PRIVATE_H
11#define XZ_PRIVATE_H
12
13#ifdef __KERNEL__
14 /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
15# ifndef XZ_PREBOOT
16# include <linux/slab.h>
17# include <linux/vmalloc.h>
18# include <linux/string.h>
19# define memeq(a, b, size) (memcmp(a, b, size) == 0)
20# define memzero(buf, size) memset(buf, 0, size)
21# endif
22# include <asm/byteorder.h>
23# include <asm/unaligned.h>
24# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
25 /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */
26# ifndef XZ_IGNORE_KCONFIG
27# ifdef CONFIG_XZ_DEC_X86
28# define XZ_DEC_X86
29# endif
30# ifdef CONFIG_XZ_DEC_POWERPC
31# define XZ_DEC_POWERPC
32# endif
33# ifdef CONFIG_XZ_DEC_IA64
34# define XZ_DEC_IA64
35# endif
36# ifdef CONFIG_XZ_DEC_ARM
37# define XZ_DEC_ARM
38# endif
39# ifdef CONFIG_XZ_DEC_ARMTHUMB
40# define XZ_DEC_ARMTHUMB
41# endif
42# ifdef CONFIG_XZ_DEC_SPARC
43# define XZ_DEC_SPARC
44# endif
45# endif
46# include <linux/xz.h>
47#else
48 /*
49 * For userspace builds, use a separate header to define the required
50 * macros and functions. This makes it easier to adapt the code into
51 * different environments and avoids clutter in the Linux kernel tree.
52 */
53# include "xz_config.h"
54#endif
55
56/* If no specific decoding mode is requested, enable support for all modes. */
57#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
58 && !defined(XZ_DEC_DYNALLOC)
59# define XZ_DEC_SINGLE
60# define XZ_DEC_PREALLOC
61# define XZ_DEC_DYNALLOC
62#endif
63
64/*
65 * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
66 * of the supported modes are enabled, these macros will evaluate to true or
67 * false at compile time and thus allow the compiler to omit unneeded code.
68 */
69#ifdef XZ_DEC_SINGLE
70# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
71#else
72# define DEC_IS_SINGLE(mode) (false)
73#endif
74
75#ifdef XZ_DEC_PREALLOC
76# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
77#else
78# define DEC_IS_PREALLOC(mode) (false)
79#endif
80
81#ifdef XZ_DEC_DYNALLOC
82# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
83#else
84# define DEC_IS_DYNALLOC(mode) (false)
85#endif
86
87#if !defined(XZ_DEC_SINGLE)
88# define DEC_IS_MULTI(mode) (true)
89#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
90# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
91#else
92# define DEC_IS_MULTI(mode) (false)
93#endif
94
95/*
96 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
97 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
98 */
99#ifndef XZ_DEC_BCJ
100# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
101 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
102 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
103 || defined(XZ_DEC_SPARC)
104# define XZ_DEC_BCJ
105# endif
106#endif
107
108/*
109 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
110 * before calling xz_dec_lzma2_run().
111 */
112XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
113 enum xz_mode mode, uint32_t dict_max);
114
115/*
116 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
117 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
118 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
119 * decoder doesn't support.
120 */
121XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
122 struct xz_dec_lzma2 *s, uint8_t props);
123
124/* Decode raw LZMA2 stream from b->in to b->out. */
125XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
126 struct xz_dec_lzma2 *s, struct xz_buf *b);
127
128/* Free the memory allocated for the LZMA2 decoder. */
129XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
130
131#ifdef XZ_DEC_BCJ
132/*
133 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
134 * calling xz_dec_bcj_run().
135 */
136XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call);
137
138/*
139 * Decode the Filter ID of a BCJ filter. This implementation doesn't
140 * support custom start offsets, so no decoding of Filter Properties
141 * is needed. Returns XZ_OK if the given Filter ID is supported.
142 * Otherwise XZ_OPTIONS_ERROR is returned.
143 */
144XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
145 struct xz_dec_bcj *s, uint8_t id);
146
147/*
148 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
149 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
150 * must be called directly.
151 */
152XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
153 struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
154
155/* Free the memory allocated for the BCJ filters. */
156#define xz_dec_bcj_end(s) kfree(s)
157#endif
158
159#endif
diff --git a/archival/libarchive/unxz/xz_stream.h b/archival/libarchive/unxz/xz_stream.h
new file mode 100644
index 000000000..36f2a7cbf
--- /dev/null
+++ b/archival/libarchive/unxz/xz_stream.h
@@ -0,0 +1,57 @@
1/*
2 * Definitions for handling the .xz file format
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_STREAM_H
11#define XZ_STREAM_H
12
13#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
14# include <linux/crc32.h>
15# undef crc32
16# define xz_crc32(buf, size, crc) \
17 (~crc32_le(~(uint32_t)(crc), buf, size))
18#endif
19
20/*
21 * See the .xz file format specification at
22 * http://tukaani.org/xz/xz-file-format.txt
23 * to understand the container format.
24 */
25
26#define STREAM_HEADER_SIZE 12
27
28#define HEADER_MAGIC "\3757zXZ\0"
29#define HEADER_MAGIC_SIZE 6
30
31#define FOOTER_MAGIC "YZ"
32#define FOOTER_MAGIC_SIZE 2
33
34/*
35 * Variable-length integer can hold a 63-bit unsigned integer, or a special
36 * value to indicate that the value is unknown.
37 */
38typedef uint64_t vli_type;
39
40#define VLI_MAX ((vli_type)-1 / 2)
41#define VLI_UNKNOWN ((vli_type)-1)
42
43/* Maximum encoded size of a VLI */
44#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
45
46/* Integrity Check types */
47enum xz_check {
48 XZ_CHECK_NONE = 0,
49 XZ_CHECK_CRC32 = 1,
50 XZ_CHECK_CRC64 = 4,
51 XZ_CHECK_SHA256 = 10
52};
53
54/* Maximum possible Check ID */
55#define XZ_CHECK_MAX 15
56
57#endif