aboutsummaryrefslogtreecommitdiff
path: root/archival/libarchive
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-11-03 02:38:31 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-11-03 02:38:31 +0100
commit833d4e7f84f59099ee66eabfa3457ebb7d37eaa8 (patch)
tree3be84e1049707ce8077291065fe3689497c69b9c /archival/libarchive
parent5e9934028aa030312a1a2e2e32d5ceade8672beb (diff)
downloadbusybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.tar.gz
busybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.tar.bz2
busybox-w32-833d4e7f84f59099ee66eabfa3457ebb7d37eaa8.zip
rename archival/libunarchive -> archival/libarchive; move bz/ into it
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'archival/libarchive')
-rw-r--r--archival/libarchive/Kbuild.src64
-rw-r--r--archival/libarchive/bz/LICENSE44
-rw-r--r--archival/libarchive/bz/README90
-rw-r--r--archival/libarchive/bz/blocksort.c1072
-rw-r--r--archival/libarchive/bz/bzlib.c431
-rw-r--r--archival/libarchive/bz/bzlib.h65
-rw-r--r--archival/libarchive/bz/bzlib_private.h219
-rw-r--r--archival/libarchive/bz/compress.c685
-rw-r--r--archival/libarchive/bz/huffman.c229
-rw-r--r--archival/libarchive/data_align.c15
-rw-r--r--archival/libarchive/data_extract_all.c200
-rw-r--r--archival/libarchive/data_extract_to_command.c134
-rw-r--r--archival/libarchive/data_extract_to_stdout.c14
-rw-r--r--archival/libarchive/data_skip.c12
-rw-r--r--archival/libarchive/decompress_bunzip2.c822
-rw-r--r--archival/libarchive/decompress_uncompress.c307
-rw-r--r--archival/libarchive/decompress_unlzma.c465
-rw-r--r--archival/libarchive/decompress_unxz.c98
-rw-r--r--archival/libarchive/decompress_unzip.c1252
-rw-r--r--archival/libarchive/filter_accept_all.c17
-rw-r--r--archival/libarchive/filter_accept_list.c19
-rw-r--r--archival/libarchive/filter_accept_list_reassign.c51
-rw-r--r--archival/libarchive/filter_accept_reject_list.c36
-rw-r--r--archival/libarchive/find_list_entry.c54
-rw-r--r--archival/libarchive/get_header_ar.c133
-rw-r--r--archival/libarchive/get_header_cpio.c186
-rw-r--r--archival/libarchive/get_header_tar.c461
-rw-r--r--archival/libarchive/get_header_tar_bz2.c21
-rw-r--r--archival/libarchive/get_header_tar_gz.c36
-rw-r--r--archival/libarchive/get_header_tar_lzma.c24
-rw-r--r--archival/libarchive/header_list.c12
-rw-r--r--archival/libarchive/header_skip.c10
-rw-r--r--archival/libarchive/header_verbose_list.c69
-rw-r--r--archival/libarchive/init_handle.c22
-rw-r--r--archival/libarchive/liblzo.h93
-rw-r--r--archival/libarchive/lzo1x_1.c35
-rw-r--r--archival/libarchive/lzo1x_1o.c35
-rw-r--r--archival/libarchive/lzo1x_9x.c921
-rw-r--r--archival/libarchive/lzo1x_c.c296
-rw-r--r--archival/libarchive/lzo1x_d.c420
-rw-r--r--archival/libarchive/open_transformer.c54
-rw-r--r--archival/libarchive/seek_by_jump.c19
-rw-r--r--archival/libarchive/seek_by_read.c16
-rw-r--r--archival/libarchive/unpack_ar_archive.c22
-rw-r--r--archival/libarchive/unxz/README135
-rw-r--r--archival/libarchive/unxz/xz.h271
-rw-r--r--archival/libarchive/unxz/xz_config.h123
-rw-r--r--archival/libarchive/unxz/xz_dec_bcj.c564
-rw-r--r--archival/libarchive/unxz/xz_dec_lzma2.c1175
-rw-r--r--archival/libarchive/unxz/xz_dec_stream.c822
-rw-r--r--archival/libarchive/unxz/xz_lzma2.h204
-rw-r--r--archival/libarchive/unxz/xz_private.h159
-rw-r--r--archival/libarchive/unxz/xz_stream.h57
53 files changed, 12790 insertions, 0 deletions
diff --git a/archival/libarchive/Kbuild.src b/archival/libarchive/Kbuild.src
new file mode 100644
index 000000000..b0bc4e5aa
--- /dev/null
+++ b/archival/libarchive/Kbuild.src
@@ -0,0 +1,64 @@
1# Makefile for busybox
2#
3# Copyright (C) 1999-2004 by Erik Andersen <andersen@codepoet.org>
4#
5# Licensed under GPLv2 or later, see file LICENSE in this source tree.
6
7lib-y:=
8
9COMMON_FILES:= \
10\
11 data_skip.o \
12 data_extract_all.o \
13 data_extract_to_stdout.o \
14\
15 filter_accept_all.o \
16 filter_accept_list.o \
17 filter_accept_reject_list.o \
18\
19 header_skip.o \
20 header_list.o \
21 header_verbose_list.o \
22\
23 seek_by_read.o \
24 seek_by_jump.o \
25\
26 data_align.o \
27 find_list_entry.o \
28 init_handle.o
29
30DPKG_FILES:= \
31 get_header_ar.o \
32 unpack_ar_archive.o \
33 get_header_tar.o \
34 filter_accept_list_reassign.o
35
36INSERT
37
38lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o
39lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o
40lib-$(CONFIG_UNLZMA) += decompress_unlzma.o
41lib-$(CONFIG_UNXZ) += decompress_unxz.o
42lib-$(CONFIG_CPIO) += get_header_cpio.o
43lib-$(CONFIG_DPKG) += $(DPKG_FILES)
44lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES)
45lib-$(CONFIG_GUNZIP) += decompress_unzip.o
46lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o
47lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o
48lib-$(CONFIG_TAR) += get_header_tar.o
49lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o
50lib-$(CONFIG_UNZIP) += decompress_unzip.o
51lib-$(CONFIG_LZOP) += lzo1x_1.o lzo1x_1o.o lzo1x_d.o
52lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
53lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o
54lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o
55lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o
56lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o
57lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o
58lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o
59lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += decompress_bunzip2.o
60lib-$(CONFIG_FEATURE_TAR_TO_COMMAND) += data_extract_to_command.o
61
62ifneq ($(lib-y),)
63lib-y += $(COMMON_FILES)
64endif
diff --git a/archival/libarchive/bz/LICENSE b/archival/libarchive/bz/LICENSE
new file mode 100644
index 000000000..da4346520
--- /dev/null
+++ b/archival/libarchive/bz/LICENSE
@@ -0,0 +1,44 @@
1bzip2 applet in busybox is based on lightly-modified source
2of bzip2 version 1.0.4. bzip2 source is distributed
3under the following conditions (copied verbatim from LICENSE file)
4===========================================================
5
6
7This program, "bzip2", the associated library "libbzip2", and all
8documentation, are copyright (C) 1996-2006 Julian R Seward. All
9rights reserved.
10
11Redistribution and use in source and binary forms, with or without
12modification, are permitted provided that the following conditions
13are met:
14
151. Redistributions of source code must retain the above copyright
16 notice, this list of conditions and the following disclaimer.
17
182. The origin of this software must not be misrepresented; you must
19 not claim that you wrote the original software. If you use this
20 software in a product, an acknowledgment in the product
21 documentation would be appreciated but is not required.
22
233. Altered source versions must be plainly marked as such, and must
24 not be misrepresented as being the original software.
25
264. The name of the author may not be used to endorse or promote
27 products derived from this software without specific prior written
28 permission.
29
30THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
31OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
32WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
34DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
36GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
38WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
39NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
40SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41
42Julian Seward, Cambridge, UK.
43jseward@bzip.org
44bzip2/libbzip2 version 1.0.4 of 20 December 2006
diff --git a/archival/libarchive/bz/README b/archival/libarchive/bz/README
new file mode 100644
index 000000000..fffd47b8a
--- /dev/null
+++ b/archival/libarchive/bz/README
@@ -0,0 +1,90 @@
1This file is an abridged version of README from bzip2 1.0.4
2Build instructions (which are not relevant to busyboxed bzip2)
3are removed.
4===========================================================
5
6
7This is the README for bzip2/libzip2.
8This version is fully compatible with the previous public releases.
9
10------------------------------------------------------------------
11This file is part of bzip2/libbzip2, a program and library for
12lossless, block-sorting data compression.
13
14bzip2/libbzip2 version 1.0.4 of 20 December 2006
15Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
16
17Please read the WARNING, DISCLAIMER and PATENTS sections in this file.
18
19This program is released under the terms of the license contained
20in the file LICENSE.
21------------------------------------------------------------------
22
23Please read and be aware of the following:
24
25
26WARNING:
27
28 This program and library (attempts to) compress data by
29 performing several non-trivial transformations on it.
30 Unless you are 100% familiar with *all* the algorithms
31 contained herein, and with the consequences of modifying them,
32 you should NOT meddle with the compression or decompression
33 machinery. Incorrect changes can and very likely *will*
34 lead to disastrous loss of data.
35
36
37DISCLAIMER:
38
39 I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
40 USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.
41
42 Every compression of a file implies an assumption that the
43 compressed file can be decompressed to reproduce the original.
44 Great efforts in design, coding and testing have been made to
45 ensure that this program works correctly. However, the complexity
46 of the algorithms, and, in particular, the presence of various
47 special cases in the code which occur with very low but non-zero
48 probability make it impossible to rule out the possibility of bugs
49 remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS
50 PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER
51 SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
52
53 That is not to say this program is inherently unreliable.
54 Indeed, I very much hope the opposite is true. bzip2/libbzip2
55 has been carefully constructed and extensively tested.
56
57
58PATENTS:
59
60 To the best of my knowledge, bzip2/libbzip2 does not use any
61 patented algorithms. However, I do not have the resources
62 to carry out a patent search. Therefore I cannot give any
63 guarantee of the above statement.
64
65
66I hope you find bzip2 useful. Feel free to contact me at
67 jseward@bzip.org
68if you have any suggestions or queries. Many people mailed me with
69comments, suggestions and patches after the releases of bzip-0.15,
70bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1,
711.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this
72feedback. I thank you for your comments.
73
74bzip2's "home" is http://www.bzip.org/
75
76Julian Seward
77jseward@bzip.org
78Cambridge, UK.
79
8018 July 1996 (version 0.15)
8125 August 1996 (version 0.21)
82 7 August 1997 (bzip2, version 0.1)
8329 August 1997 (bzip2, version 0.1pl2)
8423 August 1998 (bzip2, version 0.9.0)
85 8 June 1999 (bzip2, version 0.9.5)
86 4 Sept 1999 (bzip2, version 0.9.5d)
87 5 May 2000 (bzip2, version 1.0pre8)
8830 December 2001 (bzip2, version 1.0.2pre1)
8915 February 2005 (bzip2, version 1.0.3)
9020 December 2006 (bzip2, version 1.0.4)
diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c
new file mode 100644
index 000000000..f70c3701d
--- /dev/null
+++ b/archival/libarchive/bz/blocksort.c
@@ -0,0 +1,1072 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Block sorting machinery ---*/
9/*--- blocksort.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28#define mswap(zz1, zz2) \
29{ \
30 int32_t zztmp = zz1; \
31 zz1 = zz2; \
32 zz2 = zztmp; \
33}
34
35static
36/* No measurable speed gain with inlining */
37/* ALWAYS_INLINE */
38void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn)
39{
40 while (zzn > 0) {
41 mswap(ptr[zzp1], ptr[zzp2]);
42 zzp1++;
43 zzp2++;
44 zzn--;
45 }
46}
47
48static
49ALWAYS_INLINE
50int32_t mmin(int32_t a, int32_t b)
51{
52 return (a < b) ? a : b;
53}
54
55
56/*---------------------------------------------*/
57/*--- Fallback O(N log(N)^2) sorting ---*/
58/*--- algorithm, for repetitive blocks ---*/
59/*---------------------------------------------*/
60
61/*---------------------------------------------*/
62static
63inline
64void fallbackSimpleSort(uint32_t* fmap,
65 uint32_t* eclass,
66 int32_t lo,
67 int32_t hi)
68{
69 int32_t i, j, tmp;
70 uint32_t ec_tmp;
71
72 if (lo == hi) return;
73
74 if (hi - lo > 3) {
75 for (i = hi-4; i >= lo; i--) {
76 tmp = fmap[i];
77 ec_tmp = eclass[tmp];
78 for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
79 fmap[j-4] = fmap[j];
80 fmap[j-4] = tmp;
81 }
82 }
83
84 for (i = hi-1; i >= lo; i--) {
85 tmp = fmap[i];
86 ec_tmp = eclass[tmp];
87 for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
88 fmap[j-1] = fmap[j];
89 fmap[j-1] = tmp;
90 }
91}
92
93
94/*---------------------------------------------*/
95#define fpush(lz,hz) { \
96 stackLo[sp] = lz; \
97 stackHi[sp] = hz; \
98 sp++; \
99}
100
101#define fpop(lz,hz) { \
102 sp--; \
103 lz = stackLo[sp]; \
104 hz = stackHi[sp]; \
105}
106
107#define FALLBACK_QSORT_SMALL_THRESH 10
108#define FALLBACK_QSORT_STACK_SIZE 100
109
110static
111void fallbackQSort3(uint32_t* fmap,
112 uint32_t* eclass,
113 int32_t loSt,
114 int32_t hiSt)
115{
116 int32_t unLo, unHi, ltLo, gtHi, n, m;
117 int32_t sp, lo, hi;
118 uint32_t med, r, r3;
119 int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
120 int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
121
122 r = 0;
123
124 sp = 0;
125 fpush(loSt, hiSt);
126
127 while (sp > 0) {
128 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
129
130 fpop(lo, hi);
131 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
132 fallbackSimpleSort(fmap, eclass, lo, hi);
133 continue;
134 }
135
136 /* Random partitioning. Median of 3 sometimes fails to
137 * avoid bad cases. Median of 9 seems to help but
138 * looks rather expensive. This too seems to work but
139 * is cheaper. Guidance for the magic constants
140 * 7621 and 32768 is taken from Sedgewick's algorithms
141 * book, chapter 35.
142 */
143 r = ((r * 7621) + 1) % 32768;
144 r3 = r % 3;
145 if (r3 == 0)
146 med = eclass[fmap[lo]];
147 else if (r3 == 1)
148 med = eclass[fmap[(lo+hi)>>1]];
149 else
150 med = eclass[fmap[hi]];
151
152 unLo = ltLo = lo;
153 unHi = gtHi = hi;
154
155 while (1) {
156 while (1) {
157 if (unLo > unHi) break;
158 n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
159 if (n == 0) {
160 mswap(fmap[unLo], fmap[ltLo]);
161 ltLo++;
162 unLo++;
163 continue;
164 };
165 if (n > 0) break;
166 unLo++;
167 }
168 while (1) {
169 if (unLo > unHi) break;
170 n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
171 if (n == 0) {
172 mswap(fmap[unHi], fmap[gtHi]);
173 gtHi--; unHi--;
174 continue;
175 };
176 if (n < 0) break;
177 unHi--;
178 }
179 if (unLo > unHi) break;
180 mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
181 }
182
183 AssertD(unHi == unLo-1, "fallbackQSort3(2)");
184
185 if (gtHi < ltLo) continue;
186
187 n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);
188 m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);
189
190 n = lo + unLo - ltLo - 1;
191 m = hi - (gtHi - unHi) + 1;
192
193 if (n - lo > hi - m) {
194 fpush(lo, n);
195 fpush(m, hi);
196 } else {
197 fpush(m, hi);
198 fpush(lo, n);
199 }
200 }
201}
202
203#undef fpush
204#undef fpop
205#undef FALLBACK_QSORT_SMALL_THRESH
206#undef FALLBACK_QSORT_STACK_SIZE
207
208
209/*---------------------------------------------*/
210/* Pre:
211 * nblock > 0
212 * eclass exists for [0 .. nblock-1]
213 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
214 * ptr exists for [0 .. nblock-1]
215 *
216 * Post:
217 * ((uint8_t*)eclass) [0 .. nblock-1] holds block
218 * All other areas of eclass destroyed
219 * fmap [0 .. nblock-1] holds sorted order
220 * bhtab[0 .. 2+(nblock/32)] destroyed
221*/
222
223#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
224#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
225#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
226#define WORD_BH(zz) bhtab[(zz) >> 5]
227#define UNALIGNED_BH(zz) ((zz) & 0x01f)
228
229static
230void fallbackSort(uint32_t* fmap,
231 uint32_t* eclass,
232 uint32_t* bhtab,
233 int32_t nblock)
234{
235 int32_t ftab[257];
236 int32_t ftabCopy[256];
237 int32_t H, i, j, k, l, r, cc, cc1;
238 int32_t nNotDone;
239 int32_t nBhtab;
240 uint8_t* eclass8 = (uint8_t*)eclass;
241
242 /*
243 * Initial 1-char radix sort to generate
244 * initial fmap and initial BH bits.
245 */
246 for (i = 0; i < 257; i++) ftab[i] = 0;
247 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
248 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
249
250 j = ftab[0]; /* bbox: optimized */
251 for (i = 1; i < 257; i++) {
252 j += ftab[i];
253 ftab[i] = j;
254 }
255
256 for (i = 0; i < nblock; i++) {
257 j = eclass8[i];
258 k = ftab[j] - 1;
259 ftab[j] = k;
260 fmap[k] = i;
261 }
262
263 nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
265 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
266
267 /*
268 * Inductively refine the buckets. Kind-of an
269 * "exponential radix sort" (!), inspired by the
270 * Manber-Myers suffix array construction algorithm.
271 */
272
273 /*-- set sentinel bits for block-end detection --*/
274 for (i = 0; i < 32; i++) {
275 SET_BH(nblock + 2*i);
276 CLEAR_BH(nblock + 2*i + 1);
277 }
278
279 /*-- the log(N) loop --*/
280 H = 1;
281 while (1) {
282 j = 0;
283 for (i = 0; i < nblock; i++) {
284 if (ISSET_BH(i))
285 j = i;
286 k = fmap[i] - H;
287 if (k < 0)
288 k += nblock;
289 eclass[k] = j;
290 }
291
292 nNotDone = 0;
293 r = -1;
294 while (1) {
295
296 /*-- find the next non-singleton bucket --*/
297 k = r + 1;
298 while (ISSET_BH(k) && UNALIGNED_BH(k))
299 k++;
300 if (ISSET_BH(k)) {
301 while (WORD_BH(k) == 0xffffffff) k += 32;
302 while (ISSET_BH(k)) k++;
303 }
304 l = k - 1;
305 if (l >= nblock)
306 break;
307 while (!ISSET_BH(k) && UNALIGNED_BH(k))
308 k++;
309 if (!ISSET_BH(k)) {
310 while (WORD_BH(k) == 0x00000000) k += 32;
311 while (!ISSET_BH(k)) k++;
312 }
313 r = k - 1;
314 if (r >= nblock)
315 break;
316
317 /*-- now [l, r] bracket current bucket --*/
318 if (r > l) {
319 nNotDone += (r - l + 1);
320 fallbackQSort3(fmap, eclass, l, r);
321
322 /*-- scan bucket and generate header bits-- */
323 cc = -1;
324 for (i = l; i <= r; i++) {
325 cc1 = eclass[fmap[i]];
326 if (cc != cc1) {
327 SET_BH(i);
328 cc = cc1;
329 };
330 }
331 }
332 }
333
334 H *= 2;
335 if (H > nblock || nNotDone == 0)
336 break;
337 }
338
339 /*
340 * Reconstruct the original block in
341 * eclass8 [0 .. nblock-1], since the
342 * previous phase destroyed it.
343 */
344 j = 0;
345 for (i = 0; i < nblock; i++) {
346 while (ftabCopy[j] == 0)
347 j++;
348 ftabCopy[j]--;
349 eclass8[fmap[i]] = (uint8_t)j;
350 }
351 AssertH(j < 256, 1005);
352}
353
354#undef SET_BH
355#undef CLEAR_BH
356#undef ISSET_BH
357#undef WORD_BH
358#undef UNALIGNED_BH
359
360
361/*---------------------------------------------*/
362/*--- The main, O(N^2 log(N)) sorting ---*/
363/*--- algorithm. Faster for "normal" ---*/
364/*--- non-repetitive blocks. ---*/
365/*---------------------------------------------*/
366
367/*---------------------------------------------*/
368static
369NOINLINE
370int mainGtU(
371 uint32_t i1,
372 uint32_t i2,
373 uint8_t* block,
374 uint16_t* quadrant,
375 uint32_t nblock,
376 int32_t* budget)
377{
378 int32_t k;
379 uint8_t c1, c2;
380 uint16_t s1, s2;
381
382/* Loop unrolling here is actually very useful
383 * (generated code is much simpler),
384 * code size increase is only 270 bytes (i386)
385 * but speeds up compression 10% overall
386 */
387
388#if CONFIG_BZIP2_FEATURE_SPEED >= 1
389
390#define TIMES_8(code) \
391 code; code; code; code; \
392 code; code; code; code;
393#define TIMES_12(code) \
394 code; code; code; code; \
395 code; code; code; code; \
396 code; code; code; code;
397
398#else
399
400#define TIMES_8(code) \
401{ \
402 int nn = 8; \
403 do { \
404 code; \
405 } while (--nn); \
406}
407#define TIMES_12(code) \
408{ \
409 int nn = 12; \
410 do { \
411 code; \
412 } while (--nn); \
413}
414
415#endif
416
417 AssertD(i1 != i2, "mainGtU");
418 TIMES_12(
419 c1 = block[i1]; c2 = block[i2];
420 if (c1 != c2) return (c1 > c2);
421 i1++; i2++;
422 )
423
424 k = nblock + 8;
425
426 do {
427 TIMES_8(
428 c1 = block[i1]; c2 = block[i2];
429 if (c1 != c2) return (c1 > c2);
430 s1 = quadrant[i1]; s2 = quadrant[i2];
431 if (s1 != s2) return (s1 > s2);
432 i1++; i2++;
433 )
434
435 if (i1 >= nblock) i1 -= nblock;
436 if (i2 >= nblock) i2 -= nblock;
437
438 (*budget)--;
439 k -= 8;
440 } while (k >= 0);
441
442 return False;
443}
444#undef TIMES_8
445#undef TIMES_12
446
447/*---------------------------------------------*/
448/*
449 * Knuth's increments seem to work better
450 * than Incerpi-Sedgewick here. Possibly
451 * because the number of elems to sort is
452 * usually small, typically <= 20.
453 */
454static
455const int32_t incs[14] = {
456 1, 4, 13, 40, 121, 364, 1093, 3280,
457 9841, 29524, 88573, 265720,
458 797161, 2391484
459};
460
461static
462void mainSimpleSort(uint32_t* ptr,
463 uint8_t* block,
464 uint16_t* quadrant,
465 int32_t nblock,
466 int32_t lo,
467 int32_t hi,
468 int32_t d,
469 int32_t* budget)
470{
471 int32_t i, j, h, bigN, hp;
472 uint32_t v;
473
474 bigN = hi - lo + 1;
475 if (bigN < 2) return;
476
477 hp = 0;
478 while (incs[hp] < bigN) hp++;
479 hp--;
480
481 for (; hp >= 0; hp--) {
482 h = incs[hp];
483
484 i = lo + h;
485 while (1) {
486 /*-- copy 1 --*/
487 if (i > hi) break;
488 v = ptr[i];
489 j = i;
490 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
491 ptr[j] = ptr[j-h];
492 j = j - h;
493 if (j <= (lo + h - 1)) break;
494 }
495 ptr[j] = v;
496 i++;
497
498/* 1.5% overall speedup, +290 bytes */
499#if CONFIG_BZIP2_FEATURE_SPEED >= 3
500 /*-- copy 2 --*/
501 if (i > hi) break;
502 v = ptr[i];
503 j = i;
504 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
505 ptr[j] = ptr[j-h];
506 j = j - h;
507 if (j <= (lo + h - 1)) break;
508 }
509 ptr[j] = v;
510 i++;
511
512 /*-- copy 3 --*/
513 if (i > hi) break;
514 v = ptr[i];
515 j = i;
516 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
517 ptr[j] = ptr[j-h];
518 j = j - h;
519 if (j <= (lo + h - 1)) break;
520 }
521 ptr[j] = v;
522 i++;
523#endif
524 if (*budget < 0) return;
525 }
526 }
527}
528
529
530/*---------------------------------------------*/
531/*
532 * The following is an implementation of
533 * an elegant 3-way quicksort for strings,
534 * described in a paper "Fast Algorithms for
535 * Sorting and Searching Strings", by Robert
536 * Sedgewick and Jon L. Bentley.
537 */
538
539static
540ALWAYS_INLINE
541uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
542{
543 uint8_t t;
544 if (a > b) {
545 t = a;
546 a = b;
547 b = t;
548 };
549 /* here b >= a */
550 if (b > c) {
551 b = c;
552 if (a > b)
553 b = a;
554 }
555 return b;
556}
557
558#define mpush(lz,hz,dz) \
559{ \
560 stackLo[sp] = lz; \
561 stackHi[sp] = hz; \
562 stackD [sp] = dz; \
563 sp++; \
564}
565
566#define mpop(lz,hz,dz) \
567{ \
568 sp--; \
569 lz = stackLo[sp]; \
570 hz = stackHi[sp]; \
571 dz = stackD [sp]; \
572}
573
574#define mnextsize(az) (nextHi[az] - nextLo[az])
575
576#define mnextswap(az,bz) \
577{ \
578 int32_t tz; \
579 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
580 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
581 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
582}
583
584#define MAIN_QSORT_SMALL_THRESH 20
585#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
586#define MAIN_QSORT_STACK_SIZE 100
587
588static NOINLINE
589void mainQSort3(uint32_t* ptr,
590 uint8_t* block,
591 uint16_t* quadrant,
592 int32_t nblock,
593 int32_t loSt,
594 int32_t hiSt,
595 int32_t dSt,
596 int32_t* budget)
597{
598 int32_t unLo, unHi, ltLo, gtHi, n, m, med;
599 int32_t sp, lo, hi, d;
600
601 int32_t stackLo[MAIN_QSORT_STACK_SIZE];
602 int32_t stackHi[MAIN_QSORT_STACK_SIZE];
603 int32_t stackD [MAIN_QSORT_STACK_SIZE];
604
605 int32_t nextLo[3];
606 int32_t nextHi[3];
607 int32_t nextD [3];
608
609 sp = 0;
610 mpush(loSt, hiSt, dSt);
611
612 while (sp > 0) {
613 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
614
615 mpop(lo, hi, d);
616 if (hi - lo < MAIN_QSORT_SMALL_THRESH
617 || d > MAIN_QSORT_DEPTH_THRESH
618 ) {
619 mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
620 if (*budget < 0)
621 return;
622 continue;
623 }
624 med = (int32_t) mmed3(block[ptr[lo ] + d],
625 block[ptr[hi ] + d],
626 block[ptr[(lo+hi) >> 1] + d]);
627
628 unLo = ltLo = lo;
629 unHi = gtHi = hi;
630
631 while (1) {
632 while (1) {
633 if (unLo > unHi)
634 break;
635 n = ((int32_t)block[ptr[unLo]+d]) - med;
636 if (n == 0) {
637 mswap(ptr[unLo], ptr[ltLo]);
638 ltLo++;
639 unLo++;
640 continue;
641 };
642 if (n > 0) break;
643 unLo++;
644 }
645 while (1) {
646 if (unLo > unHi)
647 break;
648 n = ((int32_t)block[ptr[unHi]+d]) - med;
649 if (n == 0) {
650 mswap(ptr[unHi], ptr[gtHi]);
651 gtHi--;
652 unHi--;
653 continue;
654 };
655 if (n < 0) break;
656 unHi--;
657 }
658 if (unLo > unHi)
659 break;
660 mswap(ptr[unLo], ptr[unHi]);
661 unLo++;
662 unHi--;
663 }
664
665 AssertD(unHi == unLo-1, "mainQSort3(2)");
666
667 if (gtHi < ltLo) {
668 mpush(lo, hi, d + 1);
669 continue;
670 }
671
672 n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n);
673 m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m);
674
675 n = lo + unLo - ltLo - 1;
676 m = hi - (gtHi - unHi) + 1;
677
678 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
679 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
680 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
681
682 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
683 if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2);
684 if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1);
685
686 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
687 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
688
689 mpush(nextLo[0], nextHi[0], nextD[0]);
690 mpush(nextLo[1], nextHi[1], nextD[1]);
691 mpush(nextLo[2], nextHi[2], nextD[2]);
692 }
693}
694
695#undef mpush
696#undef mpop
697#undef mnextsize
698#undef mnextswap
699#undef MAIN_QSORT_SMALL_THRESH
700#undef MAIN_QSORT_DEPTH_THRESH
701#undef MAIN_QSORT_STACK_SIZE
702
703
704/*---------------------------------------------*/
705/* Pre:
706 * nblock > N_OVERSHOOT
707 * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
708 * ((uint8_t*)block32) [0 .. nblock-1] holds block
709 * ptr exists for [0 .. nblock-1]
710 *
711 * Post:
712 * ((uint8_t*)block32) [0 .. nblock-1] holds block
713 * All other areas of block32 destroyed
714 * ftab[0 .. 65536] destroyed
715 * ptr [0 .. nblock-1] holds sorted order
716 * if (*budget < 0), sorting was abandoned
717 */
718
719#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
720#define SETMASK (1 << 21)
721#define CLEARMASK (~(SETMASK))
722
723static NOINLINE
724void mainSort(EState* state,
725 uint32_t* ptr,
726 uint8_t* block,
727 uint16_t* quadrant,
728 uint32_t* ftab,
729 int32_t nblock,
730 int32_t* budget)
731{
732 int32_t i, j, k, ss, sb;
733 uint8_t c1;
734 int32_t numQSorted;
735 uint16_t s;
736 Bool bigDone[256];
737 /* bbox: moved to EState to save stack
738 int32_t runningOrder[256];
739 int32_t copyStart[256];
740 int32_t copyEnd [256];
741 */
742#define runningOrder (state->mainSort__runningOrder)
743#define copyStart (state->mainSort__copyStart)
744#define copyEnd (state->mainSort__copyEnd)
745
746 /*-- set up the 2-byte frequency table --*/
747 /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
748 memset(ftab, 0, 65537 * sizeof(ftab[0]));
749
750 j = block[0] << 8;
751 i = nblock - 1;
752/* 3%, +300 bytes */
753#if CONFIG_BZIP2_FEATURE_SPEED >= 2
754 for (; i >= 3; i -= 4) {
755 quadrant[i] = 0;
756 j = (j >> 8) | (((uint16_t)block[i]) << 8);
757 ftab[j]++;
758 quadrant[i-1] = 0;
759 j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
760 ftab[j]++;
761 quadrant[i-2] = 0;
762 j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
763 ftab[j]++;
764 quadrant[i-3] = 0;
765 j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
766 ftab[j]++;
767 }
768#endif
769 for (; i >= 0; i--) {
770 quadrant[i] = 0;
771 j = (j >> 8) | (((uint16_t)block[i]) << 8);
772 ftab[j]++;
773 }
774
775 /*-- (emphasises close relationship of block & quadrant) --*/
776 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
777 block [nblock+i] = block[i];
778 quadrant[nblock+i] = 0;
779 }
780
781 /*-- Complete the initial radix sort --*/
782 j = ftab[0]; /* bbox: optimized */
783 for (i = 1; i <= 65536; i++) {
784 j += ftab[i];
785 ftab[i] = j;
786 }
787
788 s = block[0] << 8;
789 i = nblock - 1;
790#if CONFIG_BZIP2_FEATURE_SPEED >= 2
791 for (; i >= 3; i -= 4) {
792 s = (s >> 8) | (block[i] << 8);
793 j = ftab[s] - 1;
794 ftab[s] = j;
795 ptr[j] = i;
796 s = (s >> 8) | (block[i-1] << 8);
797 j = ftab[s] - 1;
798 ftab[s] = j;
799 ptr[j] = i-1;
800 s = (s >> 8) | (block[i-2] << 8);
801 j = ftab[s] - 1;
802 ftab[s] = j;
803 ptr[j] = i-2;
804 s = (s >> 8) | (block[i-3] << 8);
805 j = ftab[s] - 1;
806 ftab[s] = j;
807 ptr[j] = i-3;
808 }
809#endif
810 for (; i >= 0; i--) {
811 s = (s >> 8) | (block[i] << 8);
812 j = ftab[s] - 1;
813 ftab[s] = j;
814 ptr[j] = i;
815 }
816
817 /*
818 * Now ftab contains the first loc of every small bucket.
819 * Calculate the running order, from smallest to largest
820 * big bucket.
821 */
822 for (i = 0; i <= 255; i++) {
823 bigDone [i] = False;
824 runningOrder[i] = i;
825 }
826
827 {
828 int32_t vv;
829 /* bbox: was: int32_t h = 1; */
830 /* do h = 3 * h + 1; while (h <= 256); */
831 uint32_t h = 364;
832
833 do {
834 /*h = h / 3;*/
835 h = (h * 171) >> 9; /* bbox: fast h/3 */
836 for (i = h; i <= 255; i++) {
837 vv = runningOrder[i];
838 j = i;
839 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
840 runningOrder[j] = runningOrder[j-h];
841 j = j - h;
842 if (j <= (h - 1))
843 goto zero;
844 }
845 zero:
846 runningOrder[j] = vv;
847 }
848 } while (h != 1);
849 }
850
851 /*
852 * The main sorting loop.
853 */
854
855 numQSorted = 0;
856
857 for (i = 0; i <= 255; i++) {
858
859 /*
860 * Process big buckets, starting with the least full.
861 * Basically this is a 3-step process in which we call
862 * mainQSort3 to sort the small buckets [ss, j], but
863 * also make a big effort to avoid the calls if we can.
864 */
865 ss = runningOrder[i];
866
867 /*
868 * Step 1:
869 * Complete the big bucket [ss] by quicksorting
870 * any unsorted small buckets [ss, j], for j != ss.
871 * Hopefully previous pointer-scanning phases have already
872 * completed many of the small buckets [ss, j], so
873 * we don't have to sort them at all.
874 */
875 for (j = 0; j <= 255; j++) {
876 if (j != ss) {
877 sb = (ss << 8) + j;
878 if (!(ftab[sb] & SETMASK)) {
879 int32_t lo = ftab[sb] & CLEARMASK;
880 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
881 if (hi > lo) {
882 mainQSort3(
883 ptr, block, quadrant, nblock,
884 lo, hi, BZ_N_RADIX, budget
885 );
886 if (*budget < 0) return;
887 numQSorted += (hi - lo + 1);
888 }
889 }
890 ftab[sb] |= SETMASK;
891 }
892 }
893
894 AssertH(!bigDone[ss], 1006);
895
896 /*
897 * Step 2:
898 * Now scan this big bucket [ss] so as to synthesise the
899 * sorted order for small buckets [t, ss] for all t,
900 * including, magically, the bucket [ss,ss] too.
901 * This will avoid doing Real Work in subsequent Step 1's.
902 */
903 {
904 for (j = 0; j <= 255; j++) {
905 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
906 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
907 }
908 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
909 k = ptr[j] - 1;
910 if (k < 0)
911 k += nblock;
912 c1 = block[k];
913 if (!bigDone[c1])
914 ptr[copyStart[c1]++] = k;
915 }
916 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
917 k = ptr[j]-1;
918 if (k < 0)
919 k += nblock;
920 c1 = block[k];
921 if (!bigDone[c1])
922 ptr[copyEnd[c1]--] = k;
923 }
924 }
925
926 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
927 * Necessity for this case is demonstrated by compressing
928 * a sequence of approximately 48.5 million of character
929 * 251; 1.0.0/1.0.1 will then die here. */
930 AssertH((copyStart[ss]-1 == copyEnd[ss]) \
931 || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007);
932
933 for (j = 0; j <= 255; j++)
934 ftab[(j << 8) + ss] |= SETMASK;
935
936 /*
937 * Step 3:
938 * The [ss] big bucket is now done. Record this fact,
939 * and update the quadrant descriptors. Remember to
940 * update quadrants in the overshoot area too, if
941 * necessary. The "if (i < 255)" test merely skips
942 * this updating for the last bucket processed, since
943 * updating for the last bucket is pointless.
944 *
945 * The quadrant array provides a way to incrementally
946 * cache sort orderings, as they appear, so as to
947 * make subsequent comparisons in fullGtU() complete
948 * faster. For repetitive blocks this makes a big
949 * difference (but not big enough to be able to avoid
950 * the fallback sorting mechanism, exponential radix sort).
951 *
952 * The precise meaning is: at all times:
953 *
954 * for 0 <= i < nblock and 0 <= j <= nblock
955 *
956 * if block[i] != block[j],
957 *
958 * then the relative values of quadrant[i] and
959 * quadrant[j] are meaningless.
960 *
961 * else {
962 * if quadrant[i] < quadrant[j]
963 * then the string starting at i lexicographically
964 * precedes the string starting at j
965 *
966 * else if quadrant[i] > quadrant[j]
967 * then the string starting at j lexicographically
968 * precedes the string starting at i
969 *
970 * else
971 * the relative ordering of the strings starting
972 * at i and j has not yet been determined.
973 * }
974 */
975 bigDone[ss] = True;
976
977 if (i < 255) {
978 int32_t bbStart = ftab[ss << 8] & CLEARMASK;
979 int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
980 int32_t shifts = 0;
981
982 while ((bbSize >> shifts) > 65534) shifts++;
983
984 for (j = bbSize-1; j >= 0; j--) {
985 int32_t a2update = ptr[bbStart + j];
986 uint16_t qVal = (uint16_t)(j >> shifts);
987 quadrant[a2update] = qVal;
988 if (a2update < BZ_N_OVERSHOOT)
989 quadrant[a2update + nblock] = qVal;
990 }
991 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
992 }
993 }
994#undef runningOrder
995#undef copyStart
996#undef copyEnd
997}
998
999#undef BIGFREQ
1000#undef SETMASK
1001#undef CLEARMASK
1002
1003
1004/*---------------------------------------------*/
1005/* Pre:
1006 * nblock > 0
1007 * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1008 * ((uint8_t*)arr2)[0 .. nblock-1] holds block
1009 * arr1 exists for [0 .. nblock-1]
1010 *
1011 * Post:
1012 * ((uint8_t*)arr2) [0 .. nblock-1] holds block
1013 * All other areas of block destroyed
1014 * ftab[0 .. 65536] destroyed
1015 * arr1[0 .. nblock-1] holds sorted order
1016 */
1017static NOINLINE
1018void BZ2_blockSort(EState* s)
1019{
1020 /* In original bzip2 1.0.4, it's a parameter, but 30
1021 * (which was the default) should work ok. */
1022 enum { wfact = 30 };
1023
1024 uint32_t* ptr = s->ptr;
1025 uint8_t* block = s->block;
1026 uint32_t* ftab = s->ftab;
1027 int32_t nblock = s->nblock;
1028 uint16_t* quadrant;
1029 int32_t budget;
1030 int32_t i;
1031
1032 if (nblock < 10000) {
1033 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1034 } else {
1035 /* Calculate the location for quadrant, remembering to get
1036 * the alignment right. Assumes that &(block[0]) is at least
1037 * 2-byte aligned -- this should be ok since block is really
1038 * the first section of arr2.
1039 */
1040 i = nblock + BZ_N_OVERSHOOT;
1041 if (i & 1) i++;
1042 quadrant = (uint16_t*)(&(block[i]));
1043
1044 /* (wfact-1) / 3 puts the default-factor-30
1045 * transition point at very roughly the same place as
1046 * with v0.1 and v0.9.0.
1047 * Not that it particularly matters any more, since the
1048 * resulting compressed stream is now the same regardless
1049 * of whether or not we use the main sort or fallback sort.
1050 */
1051 budget = nblock * ((wfact-1) / 3);
1052
1053 mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
1054 if (budget < 0) {
1055 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1056 }
1057 }
1058
1059 s->origPtr = -1;
1060 for (i = 0; i < s->nblock; i++)
1061 if (ptr[i] == 0) {
1062 s->origPtr = i;
1063 break;
1064 };
1065
1066 AssertH(s->origPtr != -1, 1003);
1067}
1068
1069
1070/*-------------------------------------------------------------*/
1071/*--- end blocksort.c ---*/
1072/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/bz/bzlib.c b/archival/libarchive/bz/bzlib.c
new file mode 100644
index 000000000..b3beeabed
--- /dev/null
+++ b/archival/libarchive/bz/bzlib.c
@@ -0,0 +1,431 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Library top-level functions. ---*/
9/*--- bzlib.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* CHANGES
27 * 0.9.0 -- original version.
28 * 0.9.0a/b -- no changes in this file.
29 * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress().
30 * fixed bzWrite/bzRead to ignore zero-length requests.
31 * fixed bzread to correctly handle read requests after EOF.
32 * wrong parameter order in call to bzDecompressInit in
33 * bzBuffToBuffDecompress. Fixed.
34 */
35
36/* #include "bzlib_private.h" */
37
38/*---------------------------------------------------*/
39/*--- Compression stuff ---*/
40/*---------------------------------------------------*/
41
42/*---------------------------------------------------*/
43#if BZ_LIGHT_DEBUG
44static
45void bz_assert_fail(int errcode)
46{
47 /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */
48 bb_error_msg_and_die("internal error %d", errcode);
49}
50#endif
51
52/*---------------------------------------------------*/
53static
54void prepare_new_block(EState* s)
55{
56 int i;
57 s->nblock = 0;
58 s->numZ = 0;
59 s->state_out_pos = 0;
60 BZ_INITIALISE_CRC(s->blockCRC);
61 /* inlined memset would be nice to have here */
62 for (i = 0; i < 256; i++)
63 s->inUse[i] = 0;
64 s->blockNo++;
65}
66
67
68/*---------------------------------------------------*/
69static
70ALWAYS_INLINE
71void init_RL(EState* s)
72{
73 s->state_in_ch = 256;
74 s->state_in_len = 0;
75}
76
77
78static
79int isempty_RL(EState* s)
80{
81 return (s->state_in_ch >= 256 || s->state_in_len <= 0);
82}
83
84
85/*---------------------------------------------------*/
86static
87void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k)
88{
89 int32_t n;
90 EState* s;
91
92 s = xzalloc(sizeof(EState));
93 s->strm = strm;
94
95 n = 100000 * blockSize100k;
96 s->arr1 = xmalloc(n * sizeof(uint32_t));
97 s->mtfv = (uint16_t*)s->arr1;
98 s->ptr = (uint32_t*)s->arr1;
99 s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t));
100 s->block = (uint8_t*)s->arr2;
101 s->ftab = xmalloc(65537 * sizeof(uint32_t));
102
103 s->crc32table = crc32_filltable(NULL, 1);
104
105 s->state = BZ_S_INPUT;
106 s->mode = BZ_M_RUNNING;
107 s->blockSize100k = blockSize100k;
108 s->nblockMAX = n - 19;
109
110 strm->state = s;
111 /*strm->total_in = 0;*/
112 strm->total_out = 0;
113 init_RL(s);
114 prepare_new_block(s);
115}
116
117
118/*---------------------------------------------------*/
119static
120void add_pair_to_block(EState* s)
121{
122 int32_t i;
123 uint8_t ch = (uint8_t)(s->state_in_ch);
124 for (i = 0; i < s->state_in_len; i++) {
125 BZ_UPDATE_CRC(s, s->blockCRC, ch);
126 }
127 s->inUse[s->state_in_ch] = 1;
128 switch (s->state_in_len) {
129 case 3:
130 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
131 /* fall through */
132 case 2:
133 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
134 /* fall through */
135 case 1:
136 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
137 break;
138 default:
139 s->inUse[s->state_in_len - 4] = 1;
140 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
141 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
142 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
143 s->block[s->nblock] = (uint8_t)ch; s->nblock++;
144 s->block[s->nblock] = (uint8_t)(s->state_in_len - 4);
145 s->nblock++;
146 break;
147 }
148}
149
150
151/*---------------------------------------------------*/
152static
153void flush_RL(EState* s)
154{
155 if (s->state_in_ch < 256) add_pair_to_block(s);
156 init_RL(s);
157}
158
159
160/*---------------------------------------------------*/
161#define ADD_CHAR_TO_BLOCK(zs, zchh0) \
162{ \
163 uint32_t zchh = (uint32_t)(zchh0); \
164 /*-- fast track the common case --*/ \
165 if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \
166 uint8_t ch = (uint8_t)(zs->state_in_ch); \
167 BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \
168 zs->inUse[zs->state_in_ch] = 1; \
169 zs->block[zs->nblock] = (uint8_t)ch; \
170 zs->nblock++; \
171 zs->state_in_ch = zchh; \
172 } \
173 else \
174 /*-- general, uncommon cases --*/ \
175 if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \
176 if (zs->state_in_ch < 256) \
177 add_pair_to_block(zs); \
178 zs->state_in_ch = zchh; \
179 zs->state_in_len = 1; \
180 } else { \
181 zs->state_in_len++; \
182 } \
183}
184
185
186/*---------------------------------------------------*/
187static
188void /*Bool*/ copy_input_until_stop(EState* s)
189{
190 /*Bool progress_in = False;*/
191
192#ifdef SAME_CODE_AS_BELOW
193 if (s->mode == BZ_M_RUNNING) {
194 /*-- fast track the common case --*/
195 while (1) {
196 /*-- no input? --*/
197 if (s->strm->avail_in == 0) break;
198 /*-- block full? --*/
199 if (s->nblock >= s->nblockMAX) break;
200 /*progress_in = True;*/
201 ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in)));
202 s->strm->next_in++;
203 s->strm->avail_in--;
204 /*s->strm->total_in++;*/
205 }
206 } else
207#endif
208 {
209 /*-- general, uncommon case --*/
210 while (1) {
211 /*-- no input? --*/
212 if (s->strm->avail_in == 0) break;
213 /*-- block full? --*/
214 if (s->nblock >= s->nblockMAX) break;
215 //# /*-- flush/finish end? --*/
216 //# if (s->avail_in_expect == 0) break;
217 /*progress_in = True;*/
218 ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in));
219 s->strm->next_in++;
220 s->strm->avail_in--;
221 /*s->strm->total_in++;*/
222 //# s->avail_in_expect--;
223 }
224 }
225 /*return progress_in;*/
226}
227
228
229/*---------------------------------------------------*/
230static
231void /*Bool*/ copy_output_until_stop(EState* s)
232{
233 /*Bool progress_out = False;*/
234
235 while (1) {
236 /*-- no output space? --*/
237 if (s->strm->avail_out == 0) break;
238
239 /*-- block done? --*/
240 if (s->state_out_pos >= s->numZ) break;
241
242 /*progress_out = True;*/
243 *(s->strm->next_out) = s->zbits[s->state_out_pos];
244 s->state_out_pos++;
245 s->strm->avail_out--;
246 s->strm->next_out++;
247 s->strm->total_out++;
248 }
249 /*return progress_out;*/
250}
251
252
253/*---------------------------------------------------*/
254static
255void /*Bool*/ handle_compress(bz_stream *strm)
256{
257 /*Bool progress_in = False;*/
258 /*Bool progress_out = False;*/
259 EState* s = strm->state;
260
261 while (1) {
262 if (s->state == BZ_S_OUTPUT) {
263 /*progress_out |=*/ copy_output_until_stop(s);
264 if (s->state_out_pos < s->numZ) break;
265 if (s->mode == BZ_M_FINISHING
266 //# && s->avail_in_expect == 0
267 && s->strm->avail_in == 0
268 && isempty_RL(s))
269 break;
270 prepare_new_block(s);
271 s->state = BZ_S_INPUT;
272#ifdef FLUSH_IS_UNUSED
273 if (s->mode == BZ_M_FLUSHING
274 && s->avail_in_expect == 0
275 && isempty_RL(s))
276 break;
277#endif
278 }
279
280 if (s->state == BZ_S_INPUT) {
281 /*progress_in |=*/ copy_input_until_stop(s);
282 //#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
283 if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) {
284 flush_RL(s);
285 BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING));
286 s->state = BZ_S_OUTPUT;
287 } else
288 if (s->nblock >= s->nblockMAX) {
289 BZ2_compressBlock(s, 0);
290 s->state = BZ_S_OUTPUT;
291 } else
292 if (s->strm->avail_in == 0) {
293 break;
294 }
295 }
296 }
297
298 /*return progress_in || progress_out;*/
299}
300
301
302/*---------------------------------------------------*/
303static
304int BZ2_bzCompress(bz_stream *strm, int action)
305{
306 /*Bool progress;*/
307 EState* s;
308
309 s = strm->state;
310
311 switch (s->mode) {
312 case BZ_M_RUNNING:
313 if (action == BZ_RUN) {
314 /*progress =*/ handle_compress(strm);
315 /*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/
316 return BZ_RUN_OK;
317 }
318#ifdef FLUSH_IS_UNUSED
319 else
320 if (action == BZ_FLUSH) {
321 //#s->avail_in_expect = strm->avail_in;
322 s->mode = BZ_M_FLUSHING;
323 goto case_BZ_M_FLUSHING;
324 }
325#endif
326 else
327 /*if (action == BZ_FINISH)*/ {
328 //#s->avail_in_expect = strm->avail_in;
329 s->mode = BZ_M_FINISHING;
330 goto case_BZ_M_FINISHING;
331 }
332
333#ifdef FLUSH_IS_UNUSED
334 case_BZ_M_FLUSHING:
335 case BZ_M_FLUSHING:
336 /*if (s->avail_in_expect != s->strm->avail_in)
337 return BZ_SEQUENCE_ERROR;*/
338 /*progress =*/ handle_compress(strm);
339 if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
340 return BZ_FLUSH_OK;
341 s->mode = BZ_M_RUNNING;
342 return BZ_RUN_OK;
343#endif
344
345 case_BZ_M_FINISHING:
346 /*case BZ_M_FINISHING:*/
347 default:
348 /*if (s->avail_in_expect != s->strm->avail_in)
349 return BZ_SEQUENCE_ERROR;*/
350 /*progress =*/ handle_compress(strm);
351 /*if (!progress) return BZ_SEQUENCE_ERROR;*/
352 //#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
353 //# return BZ_FINISH_OK;
354 if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
355 return BZ_FINISH_OK;
356 /*s->mode = BZ_M_IDLE;*/
357 return BZ_STREAM_END;
358 }
359 /* return BZ_OK; --not reached--*/
360}
361
362
363/*---------------------------------------------------*/
364#if ENABLE_FEATURE_CLEAN_UP
365static
366void BZ2_bzCompressEnd(bz_stream *strm)
367{
368 EState* s;
369
370 s = strm->state;
371 free(s->arr1);
372 free(s->arr2);
373 free(s->ftab);
374 free(s->crc32table);
375 free(strm->state);
376}
377#endif
378
379
380/*---------------------------------------------------*/
381/*--- Misc convenience stuff ---*/
382/*---------------------------------------------------*/
383
384/*---------------------------------------------------*/
385#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION
386static
387int BZ2_bzBuffToBuffCompress(char* dest,
388 unsigned int* destLen,
389 char* source,
390 unsigned int sourceLen,
391 int blockSize100k)
392{
393 bz_stream strm;
394 int ret;
395
396 if (dest == NULL || destLen == NULL
397 || source == NULL
398 || blockSize100k < 1 || blockSize100k > 9
399 ) {
400 return BZ_PARAM_ERROR;
401 }
402
403 BZ2_bzCompressInit(&strm, blockSize100k);
404
405 strm.next_in = source;
406 strm.next_out = dest;
407 strm.avail_in = sourceLen;
408 strm.avail_out = *destLen;
409
410 ret = BZ2_bzCompress(&strm, BZ_FINISH);
411 if (ret == BZ_FINISH_OK) goto output_overflow;
412 if (ret != BZ_STREAM_END) goto errhandler;
413
414 /* normal termination */
415 *destLen -= strm.avail_out;
416 BZ2_bzCompressEnd(&strm);
417 return BZ_OK;
418
419 output_overflow:
420 BZ2_bzCompressEnd(&strm);
421 return BZ_OUTBUFF_FULL;
422
423 errhandler:
424 BZ2_bzCompressEnd(&strm);
425 return ret;
426}
427#endif
428
429/*-------------------------------------------------------------*/
430/*--- end bzlib.c ---*/
431/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/bz/bzlib.h b/archival/libarchive/bz/bzlib.h
new file mode 100644
index 000000000..1bb811c4a
--- /dev/null
+++ b/archival/libarchive/bz/bzlib.h
@@ -0,0 +1,65 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Public header file for the library. ---*/
9/*--- bzlib.h ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26#define BZ_RUN 0
27#define BZ_FLUSH 1
28#define BZ_FINISH 2
29
30#define BZ_OK 0
31#define BZ_RUN_OK 1
32#define BZ_FLUSH_OK 2
33#define BZ_FINISH_OK 3
34#define BZ_STREAM_END 4
35#define BZ_SEQUENCE_ERROR (-1)
36#define BZ_PARAM_ERROR (-2)
37#define BZ_MEM_ERROR (-3)
38#define BZ_DATA_ERROR (-4)
39#define BZ_DATA_ERROR_MAGIC (-5)
40#define BZ_IO_ERROR (-6)
41#define BZ_UNEXPECTED_EOF (-7)
42#define BZ_OUTBUFF_FULL (-8)
43#define BZ_CONFIG_ERROR (-9)
44
45typedef struct bz_stream {
46 void *state;
47 char *next_in;
48 char *next_out;
49 unsigned avail_in;
50 unsigned avail_out;
51 /*unsigned long long total_in;*/
52 unsigned long long total_out;
53} bz_stream;
54
55/*-- Core (low-level) library functions --*/
56
57static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k);
58static int BZ2_bzCompress(bz_stream *strm, int action);
59#if ENABLE_FEATURE_CLEAN_UP
60static void BZ2_bzCompressEnd(bz_stream *strm);
61#endif
62
63/*-------------------------------------------------------------*/
64/*--- end bzlib.h ---*/
65/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/bz/bzlib_private.h b/archival/libarchive/bz/bzlib_private.h
new file mode 100644
index 000000000..6430ce407
--- /dev/null
+++ b/archival/libarchive/bz/bzlib_private.h
@@ -0,0 +1,219 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Private header file for the library. ---*/
9/*--- bzlib_private.h ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib.h" */
27
28/*-- General stuff. --*/
29
30typedef unsigned char Bool;
31
32#define True ((Bool)1)
33#define False ((Bool)0)
34
35#if BZ_LIGHT_DEBUG
36static void bz_assert_fail(int errcode) NORETURN;
37#define AssertH(cond, errcode) \
38do { \
39 if (!(cond)) \
40 bz_assert_fail(errcode); \
41} while (0)
42#else
43#define AssertH(cond, msg) do { } while (0)
44#endif
45
46#if BZ_DEBUG
47#define AssertD(cond, msg) \
48do { \
49 if (!(cond)) \
50 bb_error_msg_and_die("(debug build): internal error %s", msg); \
51} while (0)
52#else
53#define AssertD(cond, msg) do { } while (0)
54#endif
55
56
57/*-- Header bytes. --*/
58
59#define BZ_HDR_B 0x42 /* 'B' */
60#define BZ_HDR_Z 0x5a /* 'Z' */
61#define BZ_HDR_h 0x68 /* 'h' */
62#define BZ_HDR_0 0x30 /* '0' */
63
64#define BZ_HDR_BZh0 0x425a6830
65
66/*-- Constants for the back end. --*/
67
68#define BZ_MAX_ALPHA_SIZE 258
69#define BZ_MAX_CODE_LEN 23
70
71#define BZ_RUNA 0
72#define BZ_RUNB 1
73
74#define BZ_N_GROUPS 6
75#define BZ_G_SIZE 50
76#define BZ_N_ITERS 4
77
78#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
79
80
81/*-- Stuff for doing CRCs. --*/
82
83#define BZ_INITIALISE_CRC(crcVar) \
84{ \
85 crcVar = 0xffffffffL; \
86}
87
88#define BZ_FINALISE_CRC(crcVar) \
89{ \
90 crcVar = ~(crcVar); \
91}
92
93#define BZ_UPDATE_CRC(s, crcVar, cha) \
94{ \
95 crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \
96}
97
98
99/*-- States and modes for compression. --*/
100
101#define BZ_M_IDLE 1
102#define BZ_M_RUNNING 2
103#define BZ_M_FLUSHING 3
104#define BZ_M_FINISHING 4
105
106#define BZ_S_OUTPUT 1
107#define BZ_S_INPUT 2
108
109#define BZ_N_RADIX 2
110#define BZ_N_QSORT 12
111#define BZ_N_SHELL 18
112#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
113
114
115/*-- Structure holding all the compression-side stuff. --*/
116
117typedef struct EState {
118 /* pointer back to the struct bz_stream */
119 bz_stream *strm;
120
121 /* mode this stream is in, and whether inputting */
122 /* or outputting data */
123 int32_t mode;
124 int32_t state;
125
126 /* remembers avail_in when flush/finish requested */
127/* bbox: not needed, strm->avail_in always has the same value */
128/* commented out with '//#' throughout the code */
129 /* uint32_t avail_in_expect; */
130
131 /* for doing the block sorting */
132 int32_t origPtr;
133 uint32_t *arr1;
134 uint32_t *arr2;
135 uint32_t *ftab;
136
137 /* aliases for arr1 and arr2 */
138 uint32_t *ptr;
139 uint8_t *block;
140 uint16_t *mtfv;
141 uint8_t *zbits;
142
143 /* guess what */
144 uint32_t *crc32table;
145
146 /* run-length-encoding of the input */
147 uint32_t state_in_ch;
148 int32_t state_in_len;
149
150 /* input and output limits and current posns */
151 int32_t nblock;
152 int32_t nblockMAX;
153 int32_t numZ;
154 int32_t state_out_pos;
155
156 /* the buffer for bit stream creation */
157 uint32_t bsBuff;
158 int32_t bsLive;
159
160 /* block and combined CRCs */
161 uint32_t blockCRC;
162 uint32_t combinedCRC;
163
164 /* misc administratium */
165 int32_t blockNo;
166 int32_t blockSize100k;
167
168 /* stuff for coding the MTF values */
169 int32_t nMTF;
170
171 /* map of bytes used in block */
172 int32_t nInUse;
173 Bool inUse[256] ALIGNED(sizeof(long));
174 uint8_t unseqToSeq[256];
175
176 /* stuff for coding the MTF values */
177 int32_t mtfFreq [BZ_MAX_ALPHA_SIZE];
178 uint8_t selector [BZ_MAX_SELECTORS];
179 uint8_t selectorMtf[BZ_MAX_SELECTORS];
180
181 uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
182
183 /* stack-saving measures: these can be local, but they are too big */
184 int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
185 int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
186#if CONFIG_BZIP2_FEATURE_SPEED >= 5
187 /* second dimension: only 3 needed; 4 makes index calculations faster */
188 uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
189#endif
190 int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2];
191 int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2];
192 int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2];
193
194 int32_t mainSort__runningOrder[256];
195 int32_t mainSort__copyStart[256];
196 int32_t mainSort__copyEnd[256];
197} EState;
198
199
200/*-- compression. --*/
201
202static void
203BZ2_blockSort(EState*);
204
205static void
206BZ2_compressBlock(EState*, int);
207
208static void
209BZ2_bsInitWrite(EState*);
210
211static void
212BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t);
213
214static void
215BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t);
216
217/*-------------------------------------------------------------*/
218/*--- end bzlib_private.h ---*/
219/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c
new file mode 100644
index 000000000..6f1c70a08
--- /dev/null
+++ b/archival/libarchive/bz/compress.c
@@ -0,0 +1,685 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Compression machinery (not incl block sorting) ---*/
9/*--- compress.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* CHANGES
27 * 0.9.0 -- original version.
28 * 0.9.0a/b -- no changes in this file.
29 * 0.9.0c -- changed setting of nGroups in sendMTFValues()
30 * so as to do a bit better on small files
31*/
32
33/* #include "bzlib_private.h" */
34
35/*---------------------------------------------------*/
36/*--- Bit stream I/O ---*/
37/*---------------------------------------------------*/
38
39/*---------------------------------------------------*/
40static
41void BZ2_bsInitWrite(EState* s)
42{
43 s->bsLive = 0;
44 s->bsBuff = 0;
45}
46
47
48/*---------------------------------------------------*/
49static NOINLINE
50void bsFinishWrite(EState* s)
51{
52 while (s->bsLive > 0) {
53 s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
54 s->numZ++;
55 s->bsBuff <<= 8;
56 s->bsLive -= 8;
57 }
58}
59
60
61/*---------------------------------------------------*/
62static
63/* Helps only on level 5, on other levels hurts. ? */
64#if CONFIG_BZIP2_FEATURE_SPEED >= 5
65ALWAYS_INLINE
66#endif
67void bsW(EState* s, int32_t n, uint32_t v)
68{
69 while (s->bsLive >= 8) {
70 s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
71 s->numZ++;
72 s->bsBuff <<= 8;
73 s->bsLive -= 8;
74 }
75 s->bsBuff |= (v << (32 - s->bsLive - n));
76 s->bsLive += n;
77}
78
79
80/*---------------------------------------------------*/
81static
82void bsPutU32(EState* s, unsigned u)
83{
84 bsW(s, 8, (u >> 24) & 0xff);
85 bsW(s, 8, (u >> 16) & 0xff);
86 bsW(s, 8, (u >> 8) & 0xff);
87 bsW(s, 8, u & 0xff);
88}
89
90
91/*---------------------------------------------------*/
92static
93void bsPutU16(EState* s, unsigned u)
94{
95 bsW(s, 8, (u >> 8) & 0xff);
96 bsW(s, 8, u & 0xff);
97}
98
99
100/*---------------------------------------------------*/
101/*--- The back end proper ---*/
102/*---------------------------------------------------*/
103
104/*---------------------------------------------------*/
105static
106void makeMaps_e(EState* s)
107{
108 int i;
109 s->nInUse = 0;
110 for (i = 0; i < 256; i++) {
111 if (s->inUse[i]) {
112 s->unseqToSeq[i] = s->nInUse;
113 s->nInUse++;
114 }
115 }
116}
117
118
119/*---------------------------------------------------*/
120static NOINLINE
121void generateMTFValues(EState* s)
122{
123 uint8_t yy[256];
124 int32_t i, j;
125 int32_t zPend;
126 int32_t wr;
127 int32_t EOB;
128
129 /*
130 * After sorting (eg, here),
131 * s->arr1[0 .. s->nblock-1] holds sorted order,
132 * and
133 * ((uint8_t*)s->arr2)[0 .. s->nblock-1]
134 * holds the original block data.
135 *
136 * The first thing to do is generate the MTF values,
137 * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1].
138 *
139 * Because there are strictly fewer or equal MTF values
140 * than block values, ptr values in this area are overwritten
141 * with MTF values only when they are no longer needed.
142 *
143 * The final compressed bitstream is generated into the
144 * area starting at &((uint8_t*)s->arr2)[s->nblock]
145 *
146 * These storage aliases are set up in bzCompressInit(),
147 * except for the last one, which is arranged in
148 * compressBlock().
149 */
150 uint32_t* ptr = s->ptr;
151 uint8_t* block = s->block;
152 uint16_t* mtfv = s->mtfv;
153
154 makeMaps_e(s);
155 EOB = s->nInUse+1;
156
157 for (i = 0; i <= EOB; i++)
158 s->mtfFreq[i] = 0;
159
160 wr = 0;
161 zPend = 0;
162 for (i = 0; i < s->nInUse; i++)
163 yy[i] = (uint8_t) i;
164
165 for (i = 0; i < s->nblock; i++) {
166 uint8_t ll_i;
167 AssertD(wr <= i, "generateMTFValues(1)");
168 j = ptr[i] - 1;
169 if (j < 0)
170 j += s->nblock;
171 ll_i = s->unseqToSeq[block[j]];
172 AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
173
174 if (yy[0] == ll_i) {
175 zPend++;
176 } else {
177 if (zPend > 0) {
178 zPend--;
179 while (1) {
180 if (zPend & 1) {
181 mtfv[wr] = BZ_RUNB; wr++;
182 s->mtfFreq[BZ_RUNB]++;
183 } else {
184 mtfv[wr] = BZ_RUNA; wr++;
185 s->mtfFreq[BZ_RUNA]++;
186 }
187 if (zPend < 2) break;
188 zPend = (uint32_t)(zPend - 2) / 2;
189 /* bbox: unsigned div is easier */
190 };
191 zPend = 0;
192 }
193 {
194 register uint8_t rtmp;
195 register uint8_t* ryy_j;
196 register uint8_t rll_i;
197 rtmp = yy[1];
198 yy[1] = yy[0];
199 ryy_j = &(yy[1]);
200 rll_i = ll_i;
201 while (rll_i != rtmp) {
202 register uint8_t rtmp2;
203 ryy_j++;
204 rtmp2 = rtmp;
205 rtmp = *ryy_j;
206 *ryy_j = rtmp2;
207 };
208 yy[0] = rtmp;
209 j = ryy_j - &(yy[0]);
210 mtfv[wr] = j+1;
211 wr++;
212 s->mtfFreq[j+1]++;
213 }
214 }
215 }
216
217 if (zPend > 0) {
218 zPend--;
219 while (1) {
220 if (zPend & 1) {
221 mtfv[wr] = BZ_RUNB;
222 wr++;
223 s->mtfFreq[BZ_RUNB]++;
224 } else {
225 mtfv[wr] = BZ_RUNA;
226 wr++;
227 s->mtfFreq[BZ_RUNA]++;
228 }
229 if (zPend < 2)
230 break;
231 zPend = (uint32_t)(zPend - 2) / 2;
232 /* bbox: unsigned div is easier */
233 };
234 zPend = 0;
235 }
236
237 mtfv[wr] = EOB;
238 wr++;
239 s->mtfFreq[EOB]++;
240
241 s->nMTF = wr;
242}
243
244
245/*---------------------------------------------------*/
246#define BZ_LESSER_ICOST 0
247#define BZ_GREATER_ICOST 15
248
249static NOINLINE
250void sendMTFValues(EState* s)
251{
252 int32_t v, t, i, j, gs, ge, totc, bt, bc, iter;
253 int32_t nSelectors, alphaSize, minLen, maxLen, selCtr;
254 int32_t nGroups, nBytes;
255
256 /*
257 * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
258 * is a global since the decoder also needs it.
259 *
260 * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
261 * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
262 * are also globals only used in this proc.
263 * Made global to keep stack frame size small.
264 */
265#define code sendMTFValues__code
266#define rfreq sendMTFValues__rfreq
267#define len_pack sendMTFValues__len_pack
268
269 uint16_t cost[BZ_N_GROUPS];
270 int32_t fave[BZ_N_GROUPS];
271
272 uint16_t* mtfv = s->mtfv;
273
274 alphaSize = s->nInUse + 2;
275 for (t = 0; t < BZ_N_GROUPS; t++)
276 for (v = 0; v < alphaSize; v++)
277 s->len[t][v] = BZ_GREATER_ICOST;
278
279 /*--- Decide how many coding tables to use ---*/
280 AssertH(s->nMTF > 0, 3001);
281 if (s->nMTF < 200) nGroups = 2; else
282 if (s->nMTF < 600) nGroups = 3; else
283 if (s->nMTF < 1200) nGroups = 4; else
284 if (s->nMTF < 2400) nGroups = 5; else
285 nGroups = 6;
286
287 /*--- Generate an initial set of coding tables ---*/
288 {
289 int32_t nPart, remF, tFreq, aFreq;
290
291 nPart = nGroups;
292 remF = s->nMTF;
293 gs = 0;
294 while (nPart > 0) {
295 tFreq = remF / nPart;
296 ge = gs - 1;
297 aFreq = 0;
298 while (aFreq < tFreq && ge < alphaSize-1) {
299 ge++;
300 aFreq += s->mtfFreq[ge];
301 }
302
303 if (ge > gs
304 && nPart != nGroups && nPart != 1
305 && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
306 ) {
307 aFreq -= s->mtfFreq[ge];
308 ge--;
309 }
310
311 for (v = 0; v < alphaSize; v++)
312 if (v >= gs && v <= ge)
313 s->len[nPart-1][v] = BZ_LESSER_ICOST;
314 else
315 s->len[nPart-1][v] = BZ_GREATER_ICOST;
316
317 nPart--;
318 gs = ge + 1;
319 remF -= aFreq;
320 }
321 }
322
323 /*
324 * Iterate up to BZ_N_ITERS times to improve the tables.
325 */
326 for (iter = 0; iter < BZ_N_ITERS; iter++) {
327 for (t = 0; t < nGroups; t++)
328 fave[t] = 0;
329
330 for (t = 0; t < nGroups; t++)
331 for (v = 0; v < alphaSize; v++)
332 s->rfreq[t][v] = 0;
333
334#if CONFIG_BZIP2_FEATURE_SPEED >= 5
335 /*
336 * Set up an auxiliary length table which is used to fast-track
337 * the common case (nGroups == 6).
338 */
339 if (nGroups == 6) {
340 for (v = 0; v < alphaSize; v++) {
341 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
342 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
343 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
344 }
345 }
346#endif
347 nSelectors = 0;
348 totc = 0;
349 gs = 0;
350 while (1) {
351 /*--- Set group start & end marks. --*/
352 if (gs >= s->nMTF)
353 break;
354 ge = gs + BZ_G_SIZE - 1;
355 if (ge >= s->nMTF)
356 ge = s->nMTF-1;
357
358 /*
359 * Calculate the cost of this group as coded
360 * by each of the coding tables.
361 */
362 for (t = 0; t < nGroups; t++)
363 cost[t] = 0;
364#if CONFIG_BZIP2_FEATURE_SPEED >= 5
365 if (nGroups == 6 && 50 == ge-gs+1) {
366 /*--- fast track the common case ---*/
367 register uint32_t cost01, cost23, cost45;
368 register uint16_t icv;
369 cost01 = cost23 = cost45 = 0;
370#define BZ_ITER(nn) \
371 icv = mtfv[gs+(nn)]; \
372 cost01 += s->len_pack[icv][0]; \
373 cost23 += s->len_pack[icv][1]; \
374 cost45 += s->len_pack[icv][2];
375 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
376 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
377 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
378 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
379 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
380 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
381 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
382 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
383 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
384 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
385#undef BZ_ITER
386 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
387 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
388 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
389
390 } else
391#endif
392 {
393 /*--- slow version which correctly handles all situations ---*/
394 for (i = gs; i <= ge; i++) {
395 uint16_t icv = mtfv[i];
396 for (t = 0; t < nGroups; t++)
397 cost[t] += s->len[t][icv];
398 }
399 }
400 /*
401 * Find the coding table which is best for this group,
402 * and record its identity in the selector table.
403 */
404 /*bc = 999999999;*/
405 /*bt = -1;*/
406 bc = cost[0];
407 bt = 0;
408 for (t = 1 /*0*/; t < nGroups; t++) {
409 if (cost[t] < bc) {
410 bc = cost[t];
411 bt = t;
412 }
413 }
414 totc += bc;
415 fave[bt]++;
416 s->selector[nSelectors] = bt;
417 nSelectors++;
418
419 /*
420 * Increment the symbol frequencies for the selected table.
421 */
422/* 1% faster compress. +800 bytes */
423#if CONFIG_BZIP2_FEATURE_SPEED >= 4
424 if (nGroups == 6 && 50 == ge-gs+1) {
425 /*--- fast track the common case ---*/
426#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
427 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
428 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
429 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
430 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
431 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
432 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
433 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
434 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
435 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
436 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
437#undef BZ_ITUR
438 gs = ge + 1;
439 } else
440#endif
441 {
442 /*--- slow version which correctly handles all situations ---*/
443 while (gs <= ge) {
444 s->rfreq[bt][mtfv[gs]]++;
445 gs++;
446 }
447 /* already is: gs = ge + 1; */
448 }
449 }
450
451 /*
452 * Recompute the tables based on the accumulated frequencies.
453 */
454 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
455 * comment in huffman.c for details. */
456 for (t = 0; t < nGroups; t++)
457 BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
458 }
459
460 AssertH(nGroups < 8, 3002);
461 AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003);
462
463 /*--- Compute MTF values for the selectors. ---*/
464 {
465 uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
466
467 for (i = 0; i < nGroups; i++)
468 pos[i] = i;
469 for (i = 0; i < nSelectors; i++) {
470 ll_i = s->selector[i];
471 j = 0;
472 tmp = pos[j];
473 while (ll_i != tmp) {
474 j++;
475 tmp2 = tmp;
476 tmp = pos[j];
477 pos[j] = tmp2;
478 };
479 pos[0] = tmp;
480 s->selectorMtf[i] = j;
481 }
482 };
483
484 /*--- Assign actual codes for the tables. --*/
485 for (t = 0; t < nGroups; t++) {
486 minLen = 32;
487 maxLen = 0;
488 for (i = 0; i < alphaSize; i++) {
489 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
490 if (s->len[t][i] < minLen) minLen = s->len[t][i];
491 }
492 AssertH(!(maxLen > 17 /*20*/), 3004);
493 AssertH(!(minLen < 1), 3005);
494 BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize);
495 }
496
497 /*--- Transmit the mapping table. ---*/
498 {
499 /* bbox: optimized a bit more than in bzip2 */
500 int inUse16 = 0;
501 for (i = 0; i < 16; i++) {
502 if (sizeof(long) <= 4) {
503 inUse16 = inUse16*2 +
504 ((*(uint32_t*)&(s->inUse[i * 16 + 0])
505 | *(uint32_t*)&(s->inUse[i * 16 + 4])
506 | *(uint32_t*)&(s->inUse[i * 16 + 8])
507 | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
508 } else { /* Our CPU can do better */
509 inUse16 = inUse16*2 +
510 ((*(uint64_t*)&(s->inUse[i * 16 + 0])
511 | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
512 }
513 }
514
515 nBytes = s->numZ;
516 bsW(s, 16, inUse16);
517
518 inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */
519 for (i = 0; i < 16; i++) {
520 if (inUse16 < 0) {
521 unsigned v16 = 0;
522 for (j = 0; j < 16; j++)
523 v16 = v16*2 + s->inUse[i * 16 + j];
524 bsW(s, 16, v16);
525 }
526 inUse16 <<= 1;
527 }
528 }
529
530 /*--- Now the selectors. ---*/
531 nBytes = s->numZ;
532 bsW(s, 3, nGroups);
533 bsW(s, 15, nSelectors);
534 for (i = 0; i < nSelectors; i++) {
535 for (j = 0; j < s->selectorMtf[i]; j++)
536 bsW(s, 1, 1);
537 bsW(s, 1, 0);
538 }
539
540 /*--- Now the coding tables. ---*/
541 nBytes = s->numZ;
542
543 for (t = 0; t < nGroups; t++) {
544 int32_t curr = s->len[t][0];
545 bsW(s, 5, curr);
546 for (i = 0; i < alphaSize; i++) {
547 while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ };
548 while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ };
549 bsW(s, 1, 0);
550 }
551 }
552
553 /*--- And finally, the block data proper ---*/
554 nBytes = s->numZ;
555 selCtr = 0;
556 gs = 0;
557 while (1) {
558 if (gs >= s->nMTF)
559 break;
560 ge = gs + BZ_G_SIZE - 1;
561 if (ge >= s->nMTF)
562 ge = s->nMTF-1;
563 AssertH(s->selector[selCtr] < nGroups, 3006);
564
565/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */
566#if 0
567 if (nGroups == 6 && 50 == ge-gs+1) {
568 /*--- fast track the common case ---*/
569 uint16_t mtfv_i;
570 uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]);
571 int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
572#define BZ_ITAH(nn) \
573 mtfv_i = mtfv[gs+(nn)]; \
574 bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i])
575 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
576 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
577 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
578 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
579 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
580 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
581 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
582 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
583 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
584 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
585#undef BZ_ITAH
586 gs = ge+1;
587 } else
588#endif
589 {
590 /*--- slow version which correctly handles all situations ---*/
591 /* code is bit bigger, but moves multiply out of the loop */
592 uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]);
593 int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
594 while (gs <= ge) {
595 bsW(s,
596 s_len_sel_selCtr[mtfv[gs]],
597 s_code_sel_selCtr[mtfv[gs]]
598 );
599 gs++;
600 }
601 /* already is: gs = ge+1; */
602 }
603 selCtr++;
604 }
605 AssertH(selCtr == nSelectors, 3007);
606#undef code
607#undef rfreq
608#undef len_pack
609}
610
611
612/*---------------------------------------------------*/
613static
614void BZ2_compressBlock(EState* s, int is_last_block)
615{
616 if (s->nblock > 0) {
617 BZ_FINALISE_CRC(s->blockCRC);
618 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
619 s->combinedCRC ^= s->blockCRC;
620 if (s->blockNo > 1)
621 s->numZ = 0;
622
623 BZ2_blockSort(s);
624 }
625
626 s->zbits = &((uint8_t*)s->arr2)[s->nblock];
627
628 /*-- If this is the first block, create the stream header. --*/
629 if (s->blockNo == 1) {
630 BZ2_bsInitWrite(s);
631 /*bsPutU8(s, BZ_HDR_B);*/
632 /*bsPutU8(s, BZ_HDR_Z);*/
633 /*bsPutU8(s, BZ_HDR_h);*/
634 /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/
635 bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
636 }
637
638 if (s->nblock > 0) {
639 /*bsPutU8(s, 0x31);*/
640 /*bsPutU8(s, 0x41);*/
641 /*bsPutU8(s, 0x59);*/
642 /*bsPutU8(s, 0x26);*/
643 bsPutU32(s, 0x31415926);
644 /*bsPutU8(s, 0x53);*/
645 /*bsPutU8(s, 0x59);*/
646 bsPutU16(s, 0x5359);
647
648 /*-- Now the block's CRC, so it is in a known place. --*/
649 bsPutU32(s, s->blockCRC);
650
651 /*
652 * Now a single bit indicating (non-)randomisation.
653 * As of version 0.9.5, we use a better sorting algorithm
654 * which makes randomisation unnecessary. So always set
655 * the randomised bit to 'no'. Of course, the decoder
656 * still needs to be able to handle randomised blocks
657 * so as to maintain backwards compatibility with
658 * older versions of bzip2.
659 */
660 bsW(s, 1, 0);
661
662 bsW(s, 24, s->origPtr);
663 generateMTFValues(s);
664 sendMTFValues(s);
665 }
666
667 /*-- If this is the last block, add the stream trailer. --*/
668 if (is_last_block) {
669 /*bsPutU8(s, 0x17);*/
670 /*bsPutU8(s, 0x72);*/
671 /*bsPutU8(s, 0x45);*/
672 /*bsPutU8(s, 0x38);*/
673 bsPutU32(s, 0x17724538);
674 /*bsPutU8(s, 0x50);*/
675 /*bsPutU8(s, 0x90);*/
676 bsPutU16(s, 0x5090);
677 bsPutU32(s, s->combinedCRC);
678 bsFinishWrite(s);
679 }
680}
681
682
683/*-------------------------------------------------------------*/
684/*--- end compress.c ---*/
685/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/bz/huffman.c b/archival/libarchive/bz/huffman.c
new file mode 100644
index 000000000..676b1af66
--- /dev/null
+++ b/archival/libarchive/bz/huffman.c
@@ -0,0 +1,229 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Huffman coding low-level stuff ---*/
9/*--- huffman.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28/*---------------------------------------------------*/
29#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
30#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
31#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32
33#define ADDWEIGHTS(zw1,zw2) \
34 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36
37#define UPHEAP(z) \
38{ \
39 int32_t zz, tmp; \
40 zz = z; \
41 tmp = heap[zz]; \
42 while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 heap[zz] = heap[zz >> 1]; \
44 zz >>= 1; \
45 } \
46 heap[zz] = tmp; \
47}
48
49
50/* 90 bytes, 0.3% of overall compress speed */
51#if CONFIG_BZIP2_FEATURE_SPEED >= 1
52
53/* macro works better than inline (gcc 4.2.1) */
54#define DOWNHEAP1(heap, weight, Heap) \
55{ \
56 int32_t zz, yy, tmp; \
57 zz = 1; \
58 tmp = heap[zz]; \
59 while (1) { \
60 yy = zz << 1; \
61 if (yy > nHeap) \
62 break; \
63 if (yy < nHeap \
64 && weight[heap[yy+1]] < weight[heap[yy]]) \
65 yy++; \
66 if (weight[tmp] < weight[heap[yy]]) \
67 break; \
68 heap[zz] = heap[yy]; \
69 zz = yy; \
70 } \
71 heap[zz] = tmp; \
72}
73
74#else
75
76static
77void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
78{
79 int32_t zz, yy, tmp;
80 zz = 1;
81 tmp = heap[zz];
82 while (1) {
83 yy = zz << 1;
84 if (yy > nHeap)
85 break;
86 if (yy < nHeap
87 && weight[heap[yy + 1]] < weight[heap[yy]])
88 yy++;
89 if (weight[tmp] < weight[heap[yy]])
90 break;
91 heap[zz] = heap[yy];
92 zz = yy;
93 }
94 heap[zz] = tmp;
95}
96
97#endif
98
99/*---------------------------------------------------*/
100static
101void BZ2_hbMakeCodeLengths(EState *s,
102 uint8_t *len,
103 int32_t *freq,
104 int32_t alphaSize,
105 int32_t maxLen)
106{
107 /*
108 * Nodes and heap entries run from 1. Entry 0
109 * for both the heap and nodes is a sentinel.
110 */
111 int32_t nNodes, nHeap, n1, n2, i, j, k;
112 Bool tooLong;
113
114 /* bbox: moved to EState to save stack
115 int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
116 int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
117 int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
118 */
119#define heap (s->BZ2_hbMakeCodeLengths__heap)
120#define weight (s->BZ2_hbMakeCodeLengths__weight)
121#define parent (s->BZ2_hbMakeCodeLengths__parent)
122
123 for (i = 0; i < alphaSize; i++)
124 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125
126 while (1) {
127 nNodes = alphaSize;
128 nHeap = 0;
129
130 heap[0] = 0;
131 weight[0] = 0;
132 parent[0] = -2;
133
134 for (i = 1; i <= alphaSize; i++) {
135 parent[i] = -1;
136 nHeap++;
137 heap[nHeap] = i;
138 UPHEAP(nHeap);
139 }
140
141 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
142
143 while (nHeap > 1) {
144 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
145 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
146 nNodes++;
147 parent[n1] = parent[n2] = nNodes;
148 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
149 parent[nNodes] = -1;
150 nHeap++;
151 heap[nHeap] = nNodes;
152 UPHEAP(nHeap);
153 }
154
155 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
156
157 tooLong = False;
158 for (i = 1; i <= alphaSize; i++) {
159 j = 0;
160 k = i;
161 while (parent[k] >= 0) {
162 k = parent[k];
163 j++;
164 }
165 len[i-1] = j;
166 if (j > maxLen)
167 tooLong = True;
168 }
169
170 if (!tooLong)
171 break;
172
173 /* 17 Oct 04: keep-going condition for the following loop used
174 to be 'i < alphaSize', which missed the last element,
175 theoretically leading to the possibility of the compressor
176 looping. However, this count-scaling step is only needed if
177 one of the generated Huffman code words is longer than
178 maxLen, which up to and including version 1.0.2 was 20 bits,
179 which is extremely unlikely. In version 1.0.3 maxLen was
180 changed to 17 bits, which has minimal effect on compression
181 ratio, but does mean this scaling step is used from time to
182 time, enough to verify that it works.
183
184 This means that bzip2-1.0.3 and later will only produce
185 Huffman codes with a maximum length of 17 bits. However, in
186 order to preserve backwards compatibility with bitstreams
187 produced by versions pre-1.0.3, the decompressor must still
188 handle lengths of up to 20. */
189
190 for (i = 1; i <= alphaSize; i++) {
191 j = weight[i] >> 8;
192 /* bbox: yes, it is a signed division.
193 * don't replace with shift! */
194 j = 1 + (j / 2);
195 weight[i] = j << 8;
196 }
197 }
198#undef heap
199#undef weight
200#undef parent
201}
202
203
204/*---------------------------------------------------*/
205static
206void BZ2_hbAssignCodes(int32_t *code,
207 uint8_t *length,
208 int32_t minLen,
209 int32_t maxLen,
210 int32_t alphaSize)
211{
212 int32_t n, vec, i;
213
214 vec = 0;
215 for (n = minLen; n <= maxLen; n++) {
216 for (i = 0; i < alphaSize; i++) {
217 if (length[i] == n) {
218 code[i] = vec;
219 vec++;
220 };
221 }
222 vec <<= 1;
223 }
224}
225
226
227/*-------------------------------------------------------------*/
228/*--- end huffman.c ---*/
229/*-------------------------------------------------------------*/
diff --git a/archival/libarchive/data_align.c b/archival/libarchive/data_align.c
new file mode 100644
index 000000000..2e56fa8ff
--- /dev/null
+++ b/archival/libarchive/data_align.c
@@ -0,0 +1,15 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary)
10{
11 unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary;
12
13 archive_handle->seek(archive_handle->src_fd, skip_amount);
14 archive_handle->offset += skip_amount;
15}
diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c
new file mode 100644
index 000000000..1b25c8bd6
--- /dev/null
+++ b/archival/libarchive/data_extract_all.c
@@ -0,0 +1,200 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
10{
11 file_header_t *file_header = archive_handle->file_header;
12 int dst_fd;
13 int res;
14
15#if ENABLE_FEATURE_TAR_SELINUX
16 char *sctx = archive_handle->tar__next_file_sctx;
17 if (!sctx)
18 sctx = archive_handle->tar__global_sctx;
19 if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */
20 setfscreatecon(sctx);
21 free(archive_handle->tar__next_file_sctx);
22 archive_handle->tar__next_file_sctx = NULL;
23 }
24#endif
25
26 if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
27 char *slash = strrchr(file_header->name, '/');
28 if (slash) {
29 *slash = '\0';
30 bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
31 *slash = '/';
32 }
33 }
34
35 if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) {
36 /* Remove the entry if it exists */
37 if (!S_ISDIR(file_header->mode)) {
38 /* Is it hardlink?
39 * We encode hard links as regular files of size 0 with a symlink */
40 if (S_ISREG(file_header->mode)
41 && file_header->link_target
42 && file_header->size == 0
43 ) {
44 /* Ugly special case:
45 * tar cf t.tar hardlink1 hardlink2 hardlink1
46 * results in this tarball structure:
47 * hardlink1
48 * hardlink2 -> hardlink1
49 * hardlink1 -> hardlink1 <== !!!
50 */
51 if (strcmp(file_header->link_target, file_header->name) == 0)
52 goto ret;
53 }
54 /* Proceed with deleting */
55 if (unlink(file_header->name) == -1
56 && errno != ENOENT
57 ) {
58 bb_perror_msg_and_die("can't remove old file %s",
59 file_header->name);
60 }
61 }
62 }
63 else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
64 /* Remove the existing entry if its older than the extracted entry */
65 struct stat existing_sb;
66 if (lstat(file_header->name, &existing_sb) == -1) {
67 if (errno != ENOENT) {
68 bb_perror_msg_and_die("can't stat old file");
69 }
70 }
71 else if (existing_sb.st_mtime >= file_header->mtime) {
72 if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
73 && !S_ISDIR(file_header->mode)
74 ) {
75 bb_error_msg("%s not created: newer or "
76 "same age file exists", file_header->name);
77 }
78 data_skip(archive_handle);
79 goto ret;
80 }
81 else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
82 bb_perror_msg_and_die("can't remove old file %s",
83 file_header->name);
84 }
85 }
86
87 /* Handle hard links separately
88 * We encode hard links as regular files of size 0 with a symlink */
89 if (S_ISREG(file_header->mode)
90 && file_header->link_target
91 && file_header->size == 0
92 ) {
93 /* hard link */
94 res = link(file_header->link_target, file_header->name);
95 if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
96 bb_perror_msg("can't create %slink "
97 "from %s to %s", "hard",
98 file_header->name,
99 file_header->link_target);
100 }
101 /* Hardlinks have no separate mode/ownership, skip chown/chmod */
102 goto ret;
103 }
104
105 /* Create the filesystem entry */
106 switch (file_header->mode & S_IFMT) {
107 case S_IFREG: {
108 /* Regular file */
109 int flags = O_WRONLY | O_CREAT | O_EXCL;
110 if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
111 flags = O_WRONLY | O_CREAT | O_TRUNC;
112 dst_fd = xopen3(file_header->name,
113 flags,
114 file_header->mode
115 );
116 bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size);
117 close(dst_fd);
118 break;
119 }
120 case S_IFDIR:
121 res = mkdir(file_header->name, file_header->mode);
122 if ((res == -1)
123 && (errno != EISDIR) /* btw, Linux doesn't return this */
124 && (errno != EEXIST)
125 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
126 ) {
127 bb_perror_msg("can't make dir %s", file_header->name);
128 }
129 break;
130 case S_IFLNK:
131 /* Symlink */
132//TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
133 res = symlink(file_header->link_target, file_header->name);
134 if ((res == -1)
135 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
136 ) {
137 bb_perror_msg("can't create %slink "
138 "from %s to %s", "sym",
139 file_header->name,
140 file_header->link_target);
141 }
142 break;
143 case S_IFSOCK:
144 case S_IFBLK:
145 case S_IFCHR:
146 case S_IFIFO:
147 res = mknod(file_header->name, file_header->mode, file_header->device);
148 if ((res == -1)
149 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
150 ) {
151 bb_perror_msg("can't create node %s", file_header->name);
152 }
153 break;
154 default:
155 bb_error_msg_and_die("unrecognized file type");
156 }
157
158 if (!S_ISLNK(file_header->mode)) {
159 if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_OWNER)) {
160 uid_t uid = file_header->uid;
161 gid_t gid = file_header->gid;
162#if ENABLE_FEATURE_TAR_UNAME_GNAME
163 if (!(archive_handle->ah_flags & ARCHIVE_NUMERIC_OWNER)) {
164 if (file_header->tar__uname) {
165//TODO: cache last name/id pair?
166 struct passwd *pwd = getpwnam(file_header->tar__uname);
167 if (pwd) uid = pwd->pw_uid;
168 }
169 if (file_header->tar__gname) {
170 struct group *grp = getgrnam(file_header->tar__gname);
171 if (grp) gid = grp->gr_gid;
172 }
173 }
174#endif
175 /* GNU tar 1.15.1 uses chown, not lchown */
176 chown(file_header->name, uid, gid);
177 }
178 /* uclibc has no lchmod, glibc is even stranger -
179 * it has lchmod which seems to do nothing!
180 * so we use chmod... */
181 if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
182 chmod(file_header->name, file_header->mode);
183 }
184 if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
185 struct timeval t[2];
186
187 t[1].tv_sec = t[0].tv_sec = file_header->mtime;
188 t[1].tv_usec = t[0].tv_usec = 0;
189 utimes(file_header->name, t);
190 }
191 }
192
193 ret: ;
194#if ENABLE_FEATURE_TAR_SELINUX
195 if (sctx) {
196 /* reset the context after creating an entry */
197 setfscreatecon(NULL);
198 }
199#endif
200}
diff --git a/archival/libarchive/data_extract_to_command.c b/archival/libarchive/data_extract_to_command.c
new file mode 100644
index 000000000..2bbab7641
--- /dev/null
+++ b/archival/libarchive/data_extract_to_command.c
@@ -0,0 +1,134 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9enum {
10 //TAR_FILETYPE,
11 TAR_MODE,
12 TAR_FILENAME,
13 TAR_REALNAME,
14#if ENABLE_FEATURE_TAR_UNAME_GNAME
15 TAR_UNAME,
16 TAR_GNAME,
17#endif
18 TAR_SIZE,
19 TAR_UID,
20 TAR_GID,
21 TAR_MAX,
22};
23
24static const char *const tar_var[] = {
25 // "FILETYPE",
26 "MODE",
27 "FILENAME",
28 "REALNAME",
29#if ENABLE_FEATURE_TAR_UNAME_GNAME
30 "UNAME",
31 "GNAME",
32#endif
33 "SIZE",
34 "UID",
35 "GID",
36};
37
38static void xputenv(char *str)
39{
40 if (putenv(str))
41 bb_error_msg_and_die(bb_msg_memory_exhausted);
42}
43
44static void str2env(char *env[], int idx, const char *str)
45{
46 env[idx] = xasprintf("TAR_%s=%s", tar_var[idx], str);
47 xputenv(env[idx]);
48}
49
50static void dec2env(char *env[], int idx, unsigned long long val)
51{
52 env[idx] = xasprintf("TAR_%s=%llu", tar_var[idx], val);
53 xputenv(env[idx]);
54}
55
56static void oct2env(char *env[], int idx, unsigned long val)
57{
58 env[idx] = xasprintf("TAR_%s=%lo", tar_var[idx], val);
59 xputenv(env[idx]);
60}
61
62void FAST_FUNC data_extract_to_command(archive_handle_t *archive_handle)
63{
64 file_header_t *file_header = archive_handle->file_header;
65
66#if 0 /* do we need this? ENABLE_FEATURE_TAR_SELINUX */
67 char *sctx = archive_handle->tar__next_file_sctx;
68 if (!sctx)
69 sctx = archive_handle->tar__global_sctx;
70 if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */
71 setfscreatecon(sctx);
72 free(archive_handle->tar__next_file_sctx);
73 archive_handle->tar__next_file_sctx = NULL;
74 }
75#endif
76
77 if ((file_header->mode & S_IFMT) == S_IFREG) {
78 pid_t pid;
79 int p[2], status;
80 char *tar_env[TAR_MAX];
81
82 memset(tar_env, 0, sizeof(tar_env));
83
84 xpipe(p);
85 pid = BB_MMU ? xfork() : xvfork();
86 if (pid == 0) {
87 /* Child */
88 /* str2env(tar_env, TAR_FILETYPE, "f"); - parent should do it once */
89 oct2env(tar_env, TAR_MODE, file_header->mode);
90 str2env(tar_env, TAR_FILENAME, file_header->name);
91 str2env(tar_env, TAR_REALNAME, file_header->name);
92#if ENABLE_FEATURE_TAR_UNAME_GNAME
93 str2env(tar_env, TAR_UNAME, file_header->tar__uname);
94 str2env(tar_env, TAR_GNAME, file_header->tar__gname);
95#endif
96 dec2env(tar_env, TAR_SIZE, file_header->size);
97 dec2env(tar_env, TAR_UID, file_header->uid);
98 dec2env(tar_env, TAR_GID, file_header->gid);
99 close(p[1]);
100 xdup2(p[0], STDIN_FILENO);
101 signal(SIGPIPE, SIG_DFL);
102 execl(DEFAULT_SHELL, DEFAULT_SHELL_SHORT_NAME, "-c", archive_handle->tar__to_command, NULL);
103 bb_perror_msg_and_die("can't execute '%s'", DEFAULT_SHELL);
104 }
105 close(p[0]);
106 /* Our caller is expected to do signal(SIGPIPE, SIG_IGN)
107 * so that we don't die if child don't read all the input: */
108 bb_copyfd_exact_size(archive_handle->src_fd, p[1], -file_header->size);
109 close(p[1]);
110
111 if (safe_waitpid(pid, &status, 0) == -1)
112 bb_perror_msg_and_die("waitpid");
113 if (WIFEXITED(status) && WEXITSTATUS(status))
114 bb_error_msg_and_die("'%s' returned status %d",
115 archive_handle->tar__to_command, WEXITSTATUS(status));
116 if (WIFSIGNALED(status))
117 bb_error_msg_and_die("'%s' terminated on signal %d",
118 archive_handle->tar__to_command, WTERMSIG(status));
119
120 if (!BB_MMU) {
121 int i;
122 for (i = 0; i < TAR_MAX; i++) {
123 if (tar_env[i])
124 bb_unsetenv_and_free(tar_env[i]);
125 }
126 }
127 }
128
129#if 0 /* ENABLE_FEATURE_TAR_SELINUX */
130 if (sctx)
131 /* reset the context after creating an entry */
132 setfscreatecon(NULL);
133#endif
134}
diff --git a/archival/libarchive/data_extract_to_stdout.c b/archival/libarchive/data_extract_to_stdout.c
new file mode 100644
index 000000000..91f3f3539
--- /dev/null
+++ b/archival/libarchive/data_extract_to_stdout.c
@@ -0,0 +1,14 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle)
10{
11 bb_copyfd_exact_size(archive_handle->src_fd,
12 STDOUT_FILENO,
13 archive_handle->file_header->size);
14}
diff --git a/archival/libarchive/data_skip.c b/archival/libarchive/data_skip.c
new file mode 100644
index 000000000..a055424e2
--- /dev/null
+++ b/archival/libarchive/data_skip.c
@@ -0,0 +1,12 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC data_skip(archive_handle_t *archive_handle)
10{
11 archive_handle->seek(archive_handle->src_fd, archive_handle->file_header->size);
12}
diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c
new file mode 100644
index 000000000..4e46e6849
--- /dev/null
+++ b/archival/libarchive/decompress_bunzip2.c
@@ -0,0 +1,822 @@
1/* vi: set sw=4 ts=4: */
2/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
3
4 Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
5 which also acknowledges contributions by Mike Burrows, David Wheeler,
6 Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
7 Robert Sedgewick, and Jon L. Bentley.
8
9 Licensed under GPLv2 or later, see file LICENSE in this source tree.
10*/
11
12/*
13 Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
14
15 More efficient reading of Huffman codes, a streamlined read_bunzip()
16 function, and various other tweaks. In (limited) tests, approximately
17 20% faster than bzcat on x86 and about 10% faster on arm.
18
19 Note that about 2/3 of the time is spent in read_bunzip() reversing
20 the Burrows-Wheeler transformation. Much of that time is delay
21 resulting from cache misses.
22
23 (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null"
24 on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip:
25 %time seconds calls function
26 71.01 12.69 444 get_next_block
27 28.65 5.12 93065 read_bunzip
28 00.22 0.04 7736490 get_bits
29 00.11 0.02 47 dealloc_bunzip
30 00.00 0.00 93018 full_write
31 ...)
32
33
34 I would ask that anyone benefiting from this work, especially those
35 using it in commercial products, consider making a donation to my local
36 non-profit hospice organization (www.hospiceacadiana.com) in the name of
37 the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
38
39 Manuel
40 */
41
42#include "libbb.h"
43#include "archive.h"
44
45/* Constants for Huffman coding */
46#define MAX_GROUPS 6
47#define GROUP_SIZE 50 /* 64 would have been more efficient */
48#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
49#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
50#define SYMBOL_RUNA 0
51#define SYMBOL_RUNB 1
52
53/* Status return values */
54#define RETVAL_OK 0
55#define RETVAL_LAST_BLOCK (-1)
56#define RETVAL_NOT_BZIP_DATA (-2)
57#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
58#define RETVAL_SHORT_WRITE (-4)
59#define RETVAL_DATA_ERROR (-5)
60#define RETVAL_OUT_OF_MEMORY (-6)
61#define RETVAL_OBSOLETE_INPUT (-7)
62
63/* Other housekeeping constants */
64#define IOBUF_SIZE 4096
65
66/* This is what we know about each Huffman coding group */
67struct group_data {
68 /* We have an extra slot at the end of limit[] for a sentinel value. */
69 int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
70 int minLen, maxLen;
71};
72
73/* Structure holding all the housekeeping data, including IO buffers and
74 * memory that persists between calls to bunzip
75 * Found the most used member:
76 * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \
77 * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
78 * and moved it (inbufBitCount) to offset 0.
79 */
80struct bunzip_data {
81 /* I/O tracking data (file handles, buffers, positions, etc.) */
82 unsigned inbufBitCount, inbufBits;
83 int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
84 uint8_t *inbuf /*,*outbuf*/;
85
86 /* State for interrupting output loop */
87 int writeCopies, writePos, writeRunCountdown, writeCount;
88 int writeCurrent; /* actually a uint8_t */
89
90 /* The CRC values stored in the block header and calculated from the data */
91 uint32_t headerCRC, totalCRC, writeCRC;
92
93 /* Intermediate buffer and its size (in bytes) */
94 uint32_t *dbuf;
95 unsigned dbufSize;
96
97 /* For I/O error handling */
98 jmp_buf jmpbuf;
99
100 /* Big things go last (register-relative addressing can be larger for big offsets) */
101 uint32_t crc32Table[256];
102 uint8_t selectors[32768]; /* nSelectors=15 bits */
103 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
104};
105/* typedef struct bunzip_data bunzip_data; -- done in .h file */
106
107
108/* Return the next nnn bits of input. All reads from the compressed input
109 are done through this function. All reads are big endian */
110static unsigned get_bits(bunzip_data *bd, int bits_wanted)
111{
112 unsigned bits = 0;
113 /* Cache bd->inbufBitCount in a CPU register (hopefully): */
114 int bit_count = bd->inbufBitCount;
115
116 /* If we need to get more data from the byte buffer, do so. (Loop getting
117 one byte at a time to enforce endianness and avoid unaligned access.) */
118 while (bit_count < bits_wanted) {
119
120 /* If we need to read more data from file into byte buffer, do so */
121 if (bd->inbufPos == bd->inbufCount) {
122 /* if "no input fd" case: in_fd == -1, read fails, we jump */
123 bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
124 if (bd->inbufCount <= 0)
125 longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
126 bd->inbufPos = 0;
127 }
128
129 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
130 if (bit_count >= 24) {
131 bits = bd->inbufBits & ((1 << bit_count) - 1);
132 bits_wanted -= bit_count;
133 bits <<= bits_wanted;
134 bit_count = 0;
135 }
136
137 /* Grab next 8 bits of input from buffer. */
138 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
139 bit_count += 8;
140 }
141
142 /* Calculate result */
143 bit_count -= bits_wanted;
144 bd->inbufBitCount = bit_count;
145 bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
146
147 return bits;
148}
149
150/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
151static int get_next_block(bunzip_data *bd)
152{
153 struct group_data *hufGroup;
154 int dbufCount, dbufSize, groupCount, *base, *limit, selector,
155 i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
156 int runCnt = runCnt; /* for compiler */
157 uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
158 uint32_t *dbuf;
159 unsigned origPtr;
160
161 dbuf = bd->dbuf;
162 dbufSize = bd->dbufSize;
163 selectors = bd->selectors;
164
165/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
166#if 0
167 /* Reset longjmp I/O error handling */
168 i = setjmp(bd->jmpbuf);
169 if (i) return i;
170#endif
171
172 /* Read in header signature and CRC, then validate signature.
173 (last block signature means CRC is for whole file, return now) */
174 i = get_bits(bd, 24);
175 j = get_bits(bd, 24);
176 bd->headerCRC = get_bits(bd, 32);
177 if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
178 if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
179
180 /* We can add support for blockRandomised if anybody complains. There was
181 some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
182 it didn't actually work. */
183 if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
184 origPtr = get_bits(bd, 24);
185 if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
186
187 /* mapping table: if some byte values are never used (encoding things
188 like ascii text), the compression code removes the gaps to have fewer
189 symbols to deal with, and writes a sparse bitfield indicating which
190 values were present. We make a translation table to convert the symbols
191 back to the corresponding bytes. */
192 symTotal = 0;
193 i = 0;
194 t = get_bits(bd, 16);
195 do {
196 if (t & (1 << 15)) {
197 unsigned inner_map = get_bits(bd, 16);
198 do {
199 if (inner_map & (1 << 15))
200 symToByte[symTotal++] = i;
201 inner_map <<= 1;
202 i++;
203 } while (i & 15);
204 i -= 16;
205 }
206 t <<= 1;
207 i += 16;
208 } while (i < 256);
209
210 /* How many different Huffman coding groups does this block use? */
211 groupCount = get_bits(bd, 3);
212 if (groupCount < 2 || groupCount > MAX_GROUPS)
213 return RETVAL_DATA_ERROR;
214
215 /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
216 group. Read in the group selector list, which is stored as MTF encoded
217 bit runs. (MTF=Move To Front, as each value is used it's moved to the
218 start of the list.) */
219 for (i = 0; i < groupCount; i++)
220 mtfSymbol[i] = i;
221 nSelectors = get_bits(bd, 15);
222 if (!nSelectors)
223 return RETVAL_DATA_ERROR;
224 for (i = 0; i < nSelectors; i++) {
225 uint8_t tmp_byte;
226 /* Get next value */
227 int n = 0;
228 while (get_bits(bd, 1)) {
229 if (n >= groupCount) return RETVAL_DATA_ERROR;
230 n++;
231 }
232 /* Decode MTF to get the next selector */
233 tmp_byte = mtfSymbol[n];
234 while (--n >= 0)
235 mtfSymbol[n + 1] = mtfSymbol[n];
236 mtfSymbol[0] = selectors[i] = tmp_byte;
237 }
238
239 /* Read the Huffman coding tables for each group, which code for symTotal
240 literal symbols, plus two run symbols (RUNA, RUNB) */
241 symCount = symTotal + 2;
242 for (j = 0; j < groupCount; j++) {
243 uint8_t length[MAX_SYMBOLS];
244 /* 8 bits is ALMOST enough for temp[], see below */
245 unsigned temp[MAX_HUFCODE_BITS+1];
246 int minLen, maxLen, pp, len_m1;
247
248 /* Read Huffman code lengths for each symbol. They're stored in
249 a way similar to mtf; record a starting value for the first symbol,
250 and an offset from the previous value for every symbol after that.
251 (Subtracting 1 before the loop and then adding it back at the end is
252 an optimization that makes the test inside the loop simpler: symbol
253 length 0 becomes negative, so an unsigned inequality catches it.) */
254 len_m1 = get_bits(bd, 5) - 1;
255 for (i = 0; i < symCount; i++) {
256 for (;;) {
257 int two_bits;
258 if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1))
259 return RETVAL_DATA_ERROR;
260
261 /* If first bit is 0, stop. Else second bit indicates whether
262 to increment or decrement the value. Optimization: grab 2
263 bits and unget the second if the first was 0. */
264 two_bits = get_bits(bd, 2);
265 if (two_bits < 2) {
266 bd->inbufBitCount++;
267 break;
268 }
269
270 /* Add one if second bit 1, else subtract 1. Avoids if/else */
271 len_m1 += (((two_bits+1) & 2) - 1);
272 }
273
274 /* Correct for the initial -1, to get the final symbol length */
275 length[i] = len_m1 + 1;
276 }
277
278 /* Find largest and smallest lengths in this group */
279 minLen = maxLen = length[0];
280 for (i = 1; i < symCount; i++) {
281 if (length[i] > maxLen) maxLen = length[i];
282 else if (length[i] < minLen) minLen = length[i];
283 }
284
285 /* Calculate permute[], base[], and limit[] tables from length[].
286 *
287 * permute[] is the lookup table for converting Huffman coded symbols
288 * into decoded symbols. base[] is the amount to subtract from the
289 * value of a Huffman symbol of a given length when using permute[].
290 *
291 * limit[] indicates the largest numerical value a symbol with a given
292 * number of bits can have. This is how the Huffman codes can vary in
293 * length: each code with a value>limit[length] needs another bit.
294 */
295 hufGroup = bd->groups + j;
296 hufGroup->minLen = minLen;
297 hufGroup->maxLen = maxLen;
298
299 /* Note that minLen can't be smaller than 1, so we adjust the base
300 and limit array pointers so we're not always wasting the first
301 entry. We do this again when using them (during symbol decoding). */
302 base = hufGroup->base - 1;
303 limit = hufGroup->limit - 1;
304
305 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
306 pp = 0;
307 for (i = minLen; i <= maxLen; i++) {
308 int k;
309 temp[i] = limit[i] = 0;
310 for (k = 0; k < symCount; k++)
311 if (length[k] == i)
312 hufGroup->permute[pp++] = k;
313 }
314
315 /* Count symbols coded for at each bit length */
316 /* NB: in pathological cases, temp[8] can end ip being 256.
317 * That's why uint8_t is too small for temp[]. */
318 for (i = 0; i < symCount; i++) temp[length[i]]++;
319
320 /* Calculate limit[] (the largest symbol-coding value at each bit
321 * length, which is (previous limit<<1)+symbols at this level), and
322 * base[] (number of symbols to ignore at each bit length, which is
323 * limit minus the cumulative count of symbols coded for already). */
324 pp = t = 0;
325 for (i = minLen; i < maxLen;) {
326 unsigned temp_i = temp[i];
327
328 pp += temp_i;
329
330 /* We read the largest possible symbol size and then unget bits
331 after determining how many we need, and those extra bits could
332 be set to anything. (They're noise from future symbols.) At
333 each level we're really only interested in the first few bits,
334 so here we set all the trailing to-be-ignored bits to 1 so they
335 don't affect the value>limit[length] comparison. */
336 limit[i] = (pp << (maxLen - i)) - 1;
337 pp <<= 1;
338 t += temp_i;
339 base[++i] = pp - t;
340 }
341 limit[maxLen] = pp + temp[maxLen] - 1;
342 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
343 base[minLen] = 0;
344 }
345
346 /* We've finished reading and digesting the block header. Now read this
347 block's Huffman coded symbols from the file and undo the Huffman coding
348 and run length encoding, saving the result into dbuf[dbufCount++] = uc */
349
350 /* Initialize symbol occurrence counters and symbol Move To Front table */
351 /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
352 for (i = 0; i < 256; i++) {
353 byteCount[i] = 0;
354 mtfSymbol[i] = (uint8_t)i;
355 }
356
357 /* Loop through compressed symbols. */
358
359 runPos = dbufCount = selector = 0;
360 for (;;) {
361 int nextSym;
362
363 /* Fetch next Huffman coding group from list. */
364 symCount = GROUP_SIZE - 1;
365 if (selector >= nSelectors) return RETVAL_DATA_ERROR;
366 hufGroup = bd->groups + selectors[selector++];
367 base = hufGroup->base - 1;
368 limit = hufGroup->limit - 1;
369
370 continue_this_group:
371 /* Read next Huffman-coded symbol. */
372
373 /* Note: It is far cheaper to read maxLen bits and back up than it is
374 to read minLen bits and then add additional bit at a time, testing
375 as we go. Because there is a trailing last block (with file CRC),
376 there is no danger of the overread causing an unexpected EOF for a
377 valid compressed file.
378 */
379 if (1) {
380 /* As a further optimization, we do the read inline
381 (falling back to a call to get_bits if the buffer runs dry).
382 */
383 int new_cnt;
384 while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
385 /* bd->inbufBitCount < hufGroup->maxLen */
386 if (bd->inbufPos == bd->inbufCount) {
387 nextSym = get_bits(bd, hufGroup->maxLen);
388 goto got_huff_bits;
389 }
390 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
391 bd->inbufBitCount += 8;
392 };
393 bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
394 nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
395 got_huff_bits: ;
396 } else { /* unoptimized equivalent */
397 nextSym = get_bits(bd, hufGroup->maxLen);
398 }
399 /* Figure how many bits are in next symbol and unget extras */
400 i = hufGroup->minLen;
401 while (nextSym > limit[i]) ++i;
402 j = hufGroup->maxLen - i;
403 if (j < 0)
404 return RETVAL_DATA_ERROR;
405 bd->inbufBitCount += j;
406
407 /* Huffman decode value to get nextSym (with bounds checking) */
408 nextSym = (nextSym >> j) - base[i];
409 if ((unsigned)nextSym >= MAX_SYMBOLS)
410 return RETVAL_DATA_ERROR;
411 nextSym = hufGroup->permute[nextSym];
412
413 /* We have now decoded the symbol, which indicates either a new literal
414 byte, or a repeated run of the most recent literal byte. First,
415 check if nextSym indicates a repeated run, and if so loop collecting
416 how many times to repeat the last literal. */
417 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
418
419 /* If this is the start of a new run, zero out counter */
420 if (runPos == 0) {
421 runPos = 1;
422 runCnt = 0;
423 }
424
425 /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
426 each bit position, add 1 or 2 instead. For example,
427 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
428 You can make any bit pattern that way using 1 less symbol than
429 the basic or 0/1 method (except all bits 0, which would use no
430 symbols, but a run of length 0 doesn't mean anything in this
431 context). Thus space is saved. */
432 runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
433 if (runPos < dbufSize) runPos <<= 1;
434 goto end_of_huffman_loop;
435 }
436
437 /* When we hit the first non-run symbol after a run, we now know
438 how many times to repeat the last literal, so append that many
439 copies to our buffer of decoded symbols (dbuf) now. (The last
440 literal used is the one at the head of the mtfSymbol array.) */
441 if (runPos != 0) {
442 uint8_t tmp_byte;
443 if (dbufCount + runCnt >= dbufSize) return RETVAL_DATA_ERROR;
444 tmp_byte = symToByte[mtfSymbol[0]];
445 byteCount[tmp_byte] += runCnt;
446 while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte;
447 runPos = 0;
448 }
449
450 /* Is this the terminating symbol? */
451 if (nextSym > symTotal) break;
452
453 /* At this point, nextSym indicates a new literal character. Subtract
454 one to get the position in the MTF array at which this literal is
455 currently to be found. (Note that the result can't be -1 or 0,
456 because 0 and 1 are RUNA and RUNB. But another instance of the
457 first symbol in the mtf array, position 0, would have been handled
458 as part of a run above. Therefore 1 unused mtf position minus
459 2 non-literal nextSym values equals -1.) */
460 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
461 i = nextSym - 1;
462 uc = mtfSymbol[i];
463
464 /* Adjust the MTF array. Since we typically expect to move only a
465 * small number of symbols, and are bound by 256 in any case, using
466 * memmove here would typically be bigger and slower due to function
467 * call overhead and other assorted setup costs. */
468 do {
469 mtfSymbol[i] = mtfSymbol[i-1];
470 } while (--i);
471 mtfSymbol[0] = uc;
472 uc = symToByte[uc];
473
474 /* We have our literal byte. Save it into dbuf. */
475 byteCount[uc]++;
476 dbuf[dbufCount++] = (uint32_t)uc;
477
478 /* Skip group initialization if we're not done with this group. Done
479 * this way to avoid compiler warning. */
480 end_of_huffman_loop:
481 if (--symCount >= 0) goto continue_this_group;
482 }
483
484 /* At this point, we've read all the Huffman-coded symbols (and repeated
485 runs) for this block from the input stream, and decoded them into the
486 intermediate buffer. There are dbufCount many decoded bytes in dbuf[].
487 Now undo the Burrows-Wheeler transform on dbuf.
488 See http://dogma.net/markn/articles/bwt/bwt.htm
489 */
490
491 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
492 j = 0;
493 for (i = 0; i < 256; i++) {
494 int tmp_count = j + byteCount[i];
495 byteCount[i] = j;
496 j = tmp_count;
497 }
498
499 /* Figure out what order dbuf would be in if we sorted it. */
500 for (i = 0; i < dbufCount; i++) {
501 uint8_t tmp_byte = (uint8_t)dbuf[i];
502 int tmp_count = byteCount[tmp_byte];
503 dbuf[tmp_count] |= (i << 8);
504 byteCount[tmp_byte] = tmp_count + 1;
505 }
506
507 /* Decode first byte by hand to initialize "previous" byte. Note that it
508 doesn't get output, and if the first three characters are identical
509 it doesn't qualify as a run (hence writeRunCountdown=5). */
510 if (dbufCount) {
511 uint32_t tmp;
512 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
513 tmp = dbuf[origPtr];
514 bd->writeCurrent = (uint8_t)tmp;
515 bd->writePos = (tmp >> 8);
516 bd->writeRunCountdown = 5;
517 }
518 bd->writeCount = dbufCount;
519
520 return RETVAL_OK;
521}
522
523/* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
524 If start_bunzip was initialized with out_fd=-1, then up to len bytes of
525 data are written to outbuf. Return value is number of bytes written or
526 error (all errors are negative numbers). If out_fd!=-1, outbuf and len
527 are ignored, data is written to out_fd and return is RETVAL_OK or error.
528
529 NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
530 in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0.
531 (Why? This allows to get rid of one local variable)
532*/
533int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
534{
535 const uint32_t *dbuf;
536 int pos, current, previous;
537 uint32_t CRC;
538
539 /* If we already have error/end indicator, return it */
540 if (bd->writeCount < 0)
541 return bd->writeCount;
542
543 dbuf = bd->dbuf;
544
545 /* Register-cached state (hopefully): */
546 pos = bd->writePos;
547 current = bd->writeCurrent;
548 CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
549
550 /* We will always have pending decoded data to write into the output
551 buffer unless this is the very first call (in which case we haven't
552 Huffman-decoded a block into the intermediate buffer yet). */
553 if (bd->writeCopies) {
554
555 dec_writeCopies:
556 /* Inside the loop, writeCopies means extra copies (beyond 1) */
557 --bd->writeCopies;
558
559 /* Loop outputting bytes */
560 for (;;) {
561
562 /* If the output buffer is full, save cached state and return */
563 if (--len < 0) {
564 /* Unlikely branch.
565 * Use of "goto" instead of keeping code here
566 * helps compiler to realize this. */
567 goto outbuf_full;
568 }
569
570 /* Write next byte into output buffer, updating CRC */
571 *outbuf++ = current;
572 CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
573
574 /* Loop now if we're outputting multiple copies of this byte */
575 if (bd->writeCopies) {
576 /* Unlikely branch */
577 /*--bd->writeCopies;*/
578 /*continue;*/
579 /* Same, but (ab)using other existing --writeCopies operation
580 * (and this if() compiles into just test+branch pair): */
581 goto dec_writeCopies;
582 }
583 decode_next_byte:
584 if (--bd->writeCount < 0)
585 break; /* input block is fully consumed, need next one */
586
587 /* Follow sequence vector to undo Burrows-Wheeler transform */
588 previous = current;
589 pos = dbuf[pos];
590 current = (uint8_t)pos;
591 pos >>= 8;
592
593 /* After 3 consecutive copies of the same byte, the 4th
594 * is a repeat count. We count down from 4 instead
595 * of counting up because testing for non-zero is faster */
596 if (--bd->writeRunCountdown != 0) {
597 if (current != previous)
598 bd->writeRunCountdown = 4;
599 } else {
600 /* Unlikely branch */
601 /* We have a repeated run, this byte indicates the count */
602 bd->writeCopies = current;
603 current = previous;
604 bd->writeRunCountdown = 5;
605
606 /* Sometimes there are just 3 bytes (run length 0) */
607 if (!bd->writeCopies) goto decode_next_byte;
608
609 /* Subtract the 1 copy we'd output anyway to get extras */
610 --bd->writeCopies;
611 }
612 } /* for(;;) */
613
614 /* Decompression of this input block completed successfully */
615 bd->writeCRC = CRC = ~CRC;
616 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
617
618 /* If this block had a CRC error, force file level CRC error */
619 if (CRC != bd->headerCRC) {
620 bd->totalCRC = bd->headerCRC + 1;
621 return RETVAL_LAST_BLOCK;
622 }
623 }
624
625 /* Refill the intermediate buffer by Huffman-decoding next block of input */
626 {
627 int r = get_next_block(bd);
628 if (r) { /* error/end */
629 bd->writeCount = r;
630 return (r != RETVAL_LAST_BLOCK) ? r : len;
631 }
632 }
633
634 CRC = ~0;
635 pos = bd->writePos;
636 current = bd->writeCurrent;
637 goto decode_next_byte;
638
639 outbuf_full:
640 /* Output buffer is full, save cached state and return */
641 bd->writePos = pos;
642 bd->writeCurrent = current;
643 bd->writeCRC = CRC;
644
645 bd->writeCopies++;
646
647 return 0;
648}
649
650/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
651 a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
652 ignored, and data is read from file handle into temporary buffer. */
653
654/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
655 should work for NOFORK applets too, we must be extremely careful to not leak
656 any allocations! */
657int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
658 const void *inbuf, int len)
659{
660 bunzip_data *bd;
661 unsigned i;
662 enum {
663 BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0',
664 h0 = ('h' << 8) + '0',
665 };
666
667 /* Figure out how much data to allocate */
668 i = sizeof(bunzip_data);
669 if (in_fd != -1) i += IOBUF_SIZE;
670
671 /* Allocate bunzip_data. Most fields initialize to zero. */
672 bd = *bdp = xzalloc(i);
673
674 /* Setup input buffer */
675 bd->in_fd = in_fd;
676 if (-1 == in_fd) {
677 /* in this case, bd->inbuf is read-only */
678 bd->inbuf = (void*)inbuf; /* cast away const-ness */
679 } else {
680 bd->inbuf = (uint8_t*)(bd + 1);
681 memcpy(bd->inbuf, inbuf, len);
682 }
683 bd->inbufCount = len;
684
685 /* Init the CRC32 table (big endian) */
686 crc32_filltable(bd->crc32Table, 1);
687
688 /* Setup for I/O error handling via longjmp */
689 i = setjmp(bd->jmpbuf);
690 if (i) return i;
691
692 /* Ensure that file starts with "BZh['1'-'9']." */
693 /* Update: now caller verifies 1st two bytes, makes .gz/.bz2
694 * integration easier */
695 /* was: */
696 /* i = get_bits(bd, 32); */
697 /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */
698 i = get_bits(bd, 16);
699 if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
700
701 /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
702 uncompressed data. Allocate intermediate buffer for block. */
703 /* bd->dbufSize = 100000 * (i - BZh0); */
704 bd->dbufSize = 100000 * (i - h0);
705
706 /* Cannot use xmalloc - may leak bd in NOFORK case! */
707 bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
708 if (!bd->dbuf) {
709 free(bd);
710 xfunc_die();
711 }
712 return RETVAL_OK;
713}
714
715void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
716{
717 free(bd->dbuf);
718 free(bd);
719}
720
721
722/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */
723IF_DESKTOP(long long) int FAST_FUNC
724unpack_bz2_stream(int src_fd, int dst_fd)
725{
726 IF_DESKTOP(long long total_written = 0;)
727 bunzip_data *bd;
728 char *outbuf;
729 int i;
730 unsigned len;
731
732 outbuf = xmalloc(IOBUF_SIZE);
733 len = 0;
734 while (1) { /* "Process one BZ... stream" loop */
735
736 i = start_bunzip(&bd, src_fd, outbuf + 2, len);
737
738 if (i == 0) {
739 while (1) { /* "Produce some output bytes" loop */
740 i = read_bunzip(bd, outbuf, IOBUF_SIZE);
741 if (i < 0) /* error? */
742 break;
743 i = IOBUF_SIZE - i; /* number of bytes produced */
744 if (i == 0) /* EOF? */
745 break;
746 if (i != full_write(dst_fd, outbuf, i)) {
747 bb_error_msg("short write");
748 i = RETVAL_SHORT_WRITE;
749 goto release_mem;
750 }
751 IF_DESKTOP(total_written += i;)
752 }
753 }
754
755 if (i != RETVAL_LAST_BLOCK) {
756 bb_error_msg("bunzip error %d", i);
757 break;
758 }
759 if (bd->headerCRC != bd->totalCRC) {
760 bb_error_msg("CRC error");
761 break;
762 }
763
764 /* Successfully unpacked one BZ stream */
765 i = RETVAL_OK;
766
767 /* Do we have "BZ..." after last processed byte?
768 * pbzip2 (parallelized bzip2) produces such files.
769 */
770 len = bd->inbufCount - bd->inbufPos;
771 memcpy(outbuf, &bd->inbuf[bd->inbufPos], len);
772 if (len < 2) {
773 if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len)
774 break;
775 len = 2;
776 }
777 if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */
778 break;
779 dealloc_bunzip(bd);
780 len -= 2;
781 }
782
783 release_mem:
784 dealloc_bunzip(bd);
785 free(outbuf);
786
787 return i ? i : IF_DESKTOP(total_written) + 0;
788}
789
790IF_DESKTOP(long long) int FAST_FUNC
791unpack_bz2_stream_prime(int src_fd, int dst_fd)
792{
793 uint16_t magic2;
794 xread(src_fd, &magic2, 2);
795 if (magic2 != BZIP2_MAGIC) {
796 bb_error_msg_and_die("invalid magic");
797 }
798 return unpack_bz2_stream(src_fd, dst_fd);
799}
800
801#ifdef TESTING
802
803static char *const bunzip_errors[] = {
804 NULL, "Bad file checksum", "Not bzip data",
805 "Unexpected input EOF", "Unexpected output EOF", "Data error",
806 "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported"
807};
808
809/* Dumb little test thing, decompress stdin to stdout */
810int main(int argc, char **argv)
811{
812 int i;
813 char c;
814
815 int i = unpack_bz2_stream_prime(0, 1);
816 if (i < 0)
817 fprintf(stderr, "%s\n", bunzip_errors[-i]);
818 else if (read(STDIN_FILENO, &c, 1))
819 fprintf(stderr, "Trailing garbage ignored\n");
820 return -i;
821}
822#endif
diff --git a/archival/libarchive/decompress_uncompress.c b/archival/libarchive/decompress_uncompress.c
new file mode 100644
index 000000000..44d894244
--- /dev/null
+++ b/archival/libarchive/decompress_uncompress.c
@@ -0,0 +1,307 @@
1/* vi: set sw=4 ts=4: */
2/* uncompress for busybox -- (c) 2002 Robert Griebl
3 *
4 * based on the original compress42.c source
5 * (see disclaimer below)
6 */
7
8/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
9 *
10 * Authors:
11 * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
12 * Jim McKie (decvax!mcvax!jim)
13 * Steve Davies (decvax!vax135!petsd!peora!srd)
14 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
15 * James A. Woods (decvax!ihnp4!ames!jaw)
16 * Joe Orost (decvax!vax135!petsd!joe)
17 * Dave Mack (csu@alembic.acs.com)
18 * Peter Jannesen, Network Communication Systems
19 * (peter@ncs.nl)
20 *
21 * marc@suse.de : a small security fix for a buffer overflow
22 *
23 * [... History snipped ...]
24 *
25 */
26
27#include "libbb.h"
28#include "archive.h"
29
30
31/* Default input buffer size */
32#define IBUFSIZ 2048
33
34/* Default output buffer size */
35#define OBUFSIZ 2048
36
37/* Defines for third byte of header */
38#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
39 /* Masks 0x20 and 0x40 are free. */
40 /* I think 0x20 should mean that there is */
41 /* a fourth header byte (for expansion). */
42#define BLOCK_MODE 0x80 /* Block compression if table is full and */
43 /* compression rate is dropping flush tables */
44 /* the next two codes should not be changed lightly, as they must not */
45 /* lie within the contiguous general code space. */
46#define FIRST 257 /* first free entry */
47#define CLEAR 256 /* table clear output code */
48
49#define INIT_BITS 9 /* initial number of bits/code */
50
51
52/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
53#define HBITS 17 /* 50% occupancy */
54#define HSIZE (1<<HBITS)
55#define HMASK (HSIZE-1) /* unused */
56#define HPRIME 9941 /* unused */
57#define BITS 16
58#define BITS_STR "16"
59#undef MAXSEG_64K /* unused */
60#define MAXCODE(n) (1L << (n))
61
62#define htabof(i) htab[i]
63#define codetabof(i) codetab[i]
64#define tab_prefixof(i) codetabof(i)
65#define tab_suffixof(i) ((unsigned char *)(htab))[i]
66#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
67#define clear_tab_prefixof() memset(codetab, 0, 256)
68
69/*
70 * Decompress stdin to stdout. This routine adapts to the codes in the
71 * file building the "string" table on-the-fly; requiring no table to
72 * be stored in the compressed file.
73 */
74
75IF_DESKTOP(long long) int FAST_FUNC
76unpack_Z_stream(int fd_in, int fd_out)
77{
78 IF_DESKTOP(long long total_written = 0;)
79 IF_DESKTOP(long long) int retval = -1;
80 unsigned char *stackp;
81 long code;
82 int finchar;
83 long oldcode;
84 long incode;
85 int inbits;
86 int posbits;
87 int outpos;
88 int insize;
89 int bitmask;
90 long free_ent;
91 long maxcode;
92 long maxmaxcode;
93 int n_bits;
94 int rsize = 0;
95 unsigned char *inbuf; /* were eating insane amounts of stack - */
96 unsigned char *outbuf; /* bad for some embedded targets */
97 unsigned char *htab;
98 unsigned short *codetab;
99
100 /* Hmm, these were statics - why?! */
101 /* user settable max # bits/code */
102 int maxbits; /* = BITS; */
103 /* block compress mode -C compatible with 2.0 */
104 int block_mode; /* = BLOCK_MODE; */
105
106 inbuf = xzalloc(IBUFSIZ + 64);
107 outbuf = xzalloc(OBUFSIZ + 2048);
108 htab = xzalloc(HSIZE); /* wsn't zeroed out before, maybe can xmalloc? */
109 codetab = xzalloc(HSIZE * sizeof(codetab[0]));
110
111 insize = 0;
112
113 /* xread isn't good here, we have to return - caller may want
114 * to do some cleanup (e.g. delete incomplete unpacked file etc) */
115 if (full_read(fd_in, inbuf, 1) != 1) {
116 bb_error_msg("short read");
117 goto err;
118 }
119
120 maxbits = inbuf[0] & BIT_MASK;
121 block_mode = inbuf[0] & BLOCK_MODE;
122 maxmaxcode = MAXCODE(maxbits);
123
124 if (maxbits > BITS) {
125 bb_error_msg("compressed with %d bits, can only handle "
126 BITS_STR" bits", maxbits);
127 goto err;
128 }
129
130 n_bits = INIT_BITS;
131 maxcode = MAXCODE(INIT_BITS) - 1;
132 bitmask = (1 << INIT_BITS) - 1;
133 oldcode = -1;
134 finchar = 0;
135 outpos = 0;
136 posbits = 0 << 3;
137
138 free_ent = ((block_mode) ? FIRST : 256);
139
140 /* As above, initialize the first 256 entries in the table. */
141 /*clear_tab_prefixof(); - done by xzalloc */
142
143 for (code = 255; code >= 0; --code) {
144 tab_suffixof(code) = (unsigned char) code;
145 }
146
147 do {
148 resetbuf:
149 {
150 int i;
151 int e;
152 int o;
153
154 o = posbits >> 3;
155 e = insize - o;
156
157 for (i = 0; i < e; ++i)
158 inbuf[i] = inbuf[i + o];
159
160 insize = e;
161 posbits = 0;
162 }
163
164 if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
165 rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
166//error check??
167 insize += rsize;
168 }
169
170 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
171 (insize << 3) - (n_bits - 1));
172
173 while (inbits > posbits) {
174 if (free_ent > maxcode) {
175 posbits =
176 ((posbits - 1) +
177 ((n_bits << 3) -
178 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
179 ++n_bits;
180 if (n_bits == maxbits) {
181 maxcode = maxmaxcode;
182 } else {
183 maxcode = MAXCODE(n_bits) - 1;
184 }
185 bitmask = (1 << n_bits) - 1;
186 goto resetbuf;
187 }
188 {
189 unsigned char *p = &inbuf[posbits >> 3];
190
191 code = ((((long) (p[0])) | ((long) (p[1]) << 8) |
192 ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
193 }
194 posbits += n_bits;
195
196
197 if (oldcode == -1) {
198 oldcode = code;
199 finchar = (int) oldcode;
200 outbuf[outpos++] = (unsigned char) finchar;
201 continue;
202 }
203
204 if (code == CLEAR && block_mode) {
205 clear_tab_prefixof();
206 free_ent = FIRST - 1;
207 posbits =
208 ((posbits - 1) +
209 ((n_bits << 3) -
210 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
211 n_bits = INIT_BITS;
212 maxcode = MAXCODE(INIT_BITS) - 1;
213 bitmask = (1 << INIT_BITS) - 1;
214 goto resetbuf;
215 }
216
217 incode = code;
218 stackp = de_stack;
219
220 /* Special case for KwKwK string. */
221 if (code >= free_ent) {
222 if (code > free_ent) {
223 unsigned char *p;
224
225 posbits -= n_bits;
226 p = &inbuf[posbits >> 3];
227
228 bb_error_msg
229 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
230 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
231 (posbits & 07));
232 bb_error_msg("corrupted data");
233 goto err;
234 }
235
236 *--stackp = (unsigned char) finchar;
237 code = oldcode;
238 }
239
240 /* Generate output characters in reverse order */
241 while ((long) code >= (long) 256) {
242 *--stackp = tab_suffixof(code);
243 code = tab_prefixof(code);
244 }
245
246 finchar = tab_suffixof(code);
247 *--stackp = (unsigned char) finchar;
248
249 /* And put them out in forward order */
250 {
251 int i;
252
253 i = de_stack - stackp;
254 if (outpos + i >= OBUFSIZ) {
255 do {
256 if (i > OBUFSIZ - outpos) {
257 i = OBUFSIZ - outpos;
258 }
259
260 if (i > 0) {
261 memcpy(outbuf + outpos, stackp, i);
262 outpos += i;
263 }
264
265 if (outpos >= OBUFSIZ) {
266 full_write(fd_out, outbuf, outpos);
267//error check??
268 IF_DESKTOP(total_written += outpos;)
269 outpos = 0;
270 }
271 stackp += i;
272 i = de_stack - stackp;
273 } while (i > 0);
274 } else {
275 memcpy(outbuf + outpos, stackp, i);
276 outpos += i;
277 }
278 }
279
280 /* Generate the new entry. */
281 code = free_ent;
282 if (code < maxmaxcode) {
283 tab_prefixof(code) = (unsigned short) oldcode;
284 tab_suffixof(code) = (unsigned char) finchar;
285 free_ent = code + 1;
286 }
287
288 /* Remember previous code. */
289 oldcode = incode;
290 }
291
292 } while (rsize > 0);
293
294 if (outpos > 0) {
295 full_write(fd_out, outbuf, outpos);
296//error check??
297 IF_DESKTOP(total_written += outpos;)
298 }
299
300 retval = IF_DESKTOP(total_written) + 0;
301 err:
302 free(inbuf);
303 free(outbuf);
304 free(htab);
305 free(codetab);
306 return retval;
307}
diff --git a/archival/libarchive/decompress_unlzma.c b/archival/libarchive/decompress_unlzma.c
new file mode 100644
index 000000000..a04714341
--- /dev/null
+++ b/archival/libarchive/decompress_unlzma.c
@@ -0,0 +1,465 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Small lzma deflate implementation.
4 * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
5 *
6 * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
7 * Copyright (C) 1999-2005 Igor Pavlov
8 *
9 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 */
11#include "libbb.h"
12#include "archive.h"
13
14#if ENABLE_FEATURE_LZMA_FAST
15# define speed_inline ALWAYS_INLINE
16# define size_inline
17#else
18# define speed_inline
19# define size_inline ALWAYS_INLINE
20#endif
21
22
23typedef struct {
24 int fd;
25 uint8_t *ptr;
26
27/* Was keeping rc on stack in unlzma and separately allocating buffer,
28 * but with "buffer 'attached to' allocated rc" code is smaller: */
29 /* uint8_t *buffer; */
30#define RC_BUFFER ((uint8_t*)(rc+1))
31
32 uint8_t *buffer_end;
33
34/* Had provisions for variable buffer, but we don't need it here */
35 /* int buffer_size; */
36#define RC_BUFFER_SIZE 0x10000
37
38 uint32_t code;
39 uint32_t range;
40 uint32_t bound;
41} rc_t;
42
43#define RC_TOP_BITS 24
44#define RC_MOVE_BITS 5
45#define RC_MODEL_TOTAL_BITS 11
46
47
48/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */
49static size_inline void rc_read(rc_t *rc)
50{
51 int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE);
52//TODO: return -1 instead
53//This will make unlzma delete broken unpacked file on unpack errors
54 if (buffer_size <= 0)
55 bb_error_msg_and_die("unexpected EOF");
56 rc->ptr = RC_BUFFER;
57 rc->buffer_end = RC_BUFFER + buffer_size;
58}
59
60/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */
61static void rc_do_normalize(rc_t *rc)
62{
63 if (rc->ptr >= rc->buffer_end)
64 rc_read(rc);
65 rc->range <<= 8;
66 rc->code = (rc->code << 8) | *rc->ptr++;
67}
68
69/* Called once */
70static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */
71{
72 int i;
73 rc_t *rc;
74
75 rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE);
76
77 rc->fd = fd;
78 /* rc->ptr = rc->buffer_end; */
79
80 for (i = 0; i < 5; i++) {
81#if ENABLE_FEATURE_LZMA_FAST
82 if (rc->ptr >= rc->buffer_end)
83 rc_read(rc);
84 rc->code = (rc->code << 8) | *rc->ptr++;
85#else
86 rc_do_normalize(rc);
87#endif
88 }
89 rc->range = 0xFFFFFFFF;
90 return rc;
91}
92
93/* Called once */
94static ALWAYS_INLINE void rc_free(rc_t *rc)
95{
96 free(rc);
97}
98
99static ALWAYS_INLINE void rc_normalize(rc_t *rc)
100{
101 if (rc->range < (1 << RC_TOP_BITS)) {
102 rc_do_normalize(rc);
103 }
104}
105
106/* rc_is_bit_1 is called 9 times */
107static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p)
108{
109 rc_normalize(rc);
110 rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
111 if (rc->code < rc->bound) {
112 rc->range = rc->bound;
113 *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
114 return 0;
115 }
116 rc->range -= rc->bound;
117 rc->code -= rc->bound;
118 *p -= *p >> RC_MOVE_BITS;
119 return 1;
120}
121
122/* Called 4 times in unlzma loop */
123static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol)
124{
125 int ret = rc_is_bit_1(rc, p);
126 *symbol = *symbol * 2 + ret;
127 return ret;
128}
129
130/* Called once */
131static ALWAYS_INLINE int rc_direct_bit(rc_t *rc)
132{
133 rc_normalize(rc);
134 rc->range >>= 1;
135 if (rc->code >= rc->range) {
136 rc->code -= rc->range;
137 return 1;
138 }
139 return 0;
140}
141
142/* Called twice */
143static speed_inline void
144rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol)
145{
146 int i = num_levels;
147
148 *symbol = 1;
149 while (i--)
150 rc_get_bit(rc, p + *symbol, symbol);
151 *symbol -= 1 << num_levels;
152}
153
154
155typedef struct {
156 uint8_t pos;
157 uint32_t dict_size;
158 uint64_t dst_size;
159} PACKED lzma_header_t;
160
161
162/* #defines will force compiler to compute/optimize each one with each usage.
163 * Have heart and use enum instead. */
164enum {
165 LZMA_BASE_SIZE = 1846,
166 LZMA_LIT_SIZE = 768,
167
168 LZMA_NUM_POS_BITS_MAX = 4,
169
170 LZMA_LEN_NUM_LOW_BITS = 3,
171 LZMA_LEN_NUM_MID_BITS = 3,
172 LZMA_LEN_NUM_HIGH_BITS = 8,
173
174 LZMA_LEN_CHOICE = 0,
175 LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1),
176 LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1),
177 LZMA_LEN_MID = (LZMA_LEN_LOW \
178 + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))),
179 LZMA_LEN_HIGH = (LZMA_LEN_MID \
180 + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))),
181 LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)),
182
183 LZMA_NUM_STATES = 12,
184 LZMA_NUM_LIT_STATES = 7,
185
186 LZMA_START_POS_MODEL_INDEX = 4,
187 LZMA_END_POS_MODEL_INDEX = 14,
188 LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)),
189
190 LZMA_NUM_POS_SLOT_BITS = 6,
191 LZMA_NUM_LEN_TO_POS_STATES = 4,
192
193 LZMA_NUM_ALIGN_BITS = 4,
194
195 LZMA_MATCH_MIN_LEN = 2,
196
197 LZMA_IS_MATCH = 0,
198 LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
199 LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES),
200 LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES),
201 LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES),
202 LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES),
203 LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \
204 + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
205 LZMA_SPEC_POS = (LZMA_POS_SLOT \
206 + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)),
207 LZMA_ALIGN = (LZMA_SPEC_POS \
208 + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX),
209 LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)),
210 LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS),
211 LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS),
212};
213
214
215IF_DESKTOP(long long) int FAST_FUNC
216unpack_lzma_stream(int src_fd, int dst_fd)
217{
218 IF_DESKTOP(long long total_written = 0;)
219 lzma_header_t header;
220 int lc, pb, lp;
221 uint32_t pos_state_mask;
222 uint32_t literal_pos_mask;
223 uint16_t *p;
224 int num_bits;
225 int num_probs;
226 rc_t *rc;
227 int i;
228 uint8_t *buffer;
229 uint8_t previous_byte = 0;
230 size_t buffer_pos = 0, global_pos = 0;
231 int len = 0;
232 int state = 0;
233 uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
234
235 if (full_read(src_fd, &header, sizeof(header)) != sizeof(header)
236 || header.pos >= (9 * 5 * 5)
237 ) {
238 bb_error_msg("bad lzma header");
239 return -1;
240 }
241
242 i = header.pos / 9;
243 lc = header.pos % 9;
244 pb = i / 5;
245 lp = i % 5;
246 pos_state_mask = (1 << pb) - 1;
247 literal_pos_mask = (1 << lp) - 1;
248
249 header.dict_size = SWAP_LE32(header.dict_size);
250 header.dst_size = SWAP_LE64(header.dst_size);
251
252 if (header.dict_size == 0)
253 header.dict_size++;
254
255 buffer = xmalloc(MIN(header.dst_size, header.dict_size));
256
257 num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
258 p = xmalloc(num_probs * sizeof(*p));
259 num_probs += LZMA_LITERAL - LZMA_BASE_SIZE;
260 for (i = 0; i < num_probs; i++)
261 p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
262
263 rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */
264
265 while (global_pos + buffer_pos < header.dst_size) {
266 int pos_state = (buffer_pos + global_pos) & pos_state_mask;
267 uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
268
269 if (!rc_is_bit_1(rc, prob)) {
270 static const char next_state[LZMA_NUM_STATES] =
271 { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
272 int mi = 1;
273
274 prob = (p + LZMA_LITERAL
275 + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
276 + (previous_byte >> (8 - lc))
277 )
278 )
279 );
280
281 if (state >= LZMA_NUM_LIT_STATES) {
282 int match_byte;
283 uint32_t pos = buffer_pos - rep0;
284
285 while (pos >= header.dict_size)
286 pos += header.dict_size;
287 match_byte = buffer[pos];
288 do {
289 int bit;
290
291 match_byte <<= 1;
292 bit = match_byte & 0x100;
293 bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */
294 if (bit)
295 break;
296 } while (mi < 0x100);
297 }
298 while (mi < 0x100) {
299 rc_get_bit(rc, prob + mi, &mi);
300 }
301
302 state = next_state[state];
303
304 previous_byte = (uint8_t) mi;
305#if ENABLE_FEATURE_LZMA_FAST
306 one_byte1:
307 buffer[buffer_pos++] = previous_byte;
308 if (buffer_pos == header.dict_size) {
309 buffer_pos = 0;
310 global_pos += header.dict_size;
311 if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
312 goto bad;
313 IF_DESKTOP(total_written += header.dict_size;)
314 }
315#else
316 len = 1;
317 goto one_byte2;
318#endif
319 } else {
320 int offset;
321 uint16_t *prob2;
322#define prob_len prob2
323
324 prob2 = p + LZMA_IS_REP + state;
325 if (!rc_is_bit_1(rc, prob2)) {
326 rep3 = rep2;
327 rep2 = rep1;
328 rep1 = rep0;
329 state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
330 prob2 = p + LZMA_LEN_CODER;
331 } else {
332 prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP;
333 if (!rc_is_bit_1(rc, prob2)) {
334 prob2 = (p + LZMA_IS_REP_0_LONG
335 + (state << LZMA_NUM_POS_BITS_MAX)
336 + pos_state
337 );
338 if (!rc_is_bit_1(rc, prob2)) {
339#if ENABLE_FEATURE_LZMA_FAST
340 uint32_t pos = buffer_pos - rep0;
341 state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
342 while (pos >= header.dict_size)
343 pos += header.dict_size;
344 previous_byte = buffer[pos];
345 goto one_byte1;
346#else
347 state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
348 len = 1;
349 goto string;
350#endif
351 }
352 } else {
353 uint32_t distance;
354
355 prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0;
356 distance = rep1;
357 if (rc_is_bit_1(rc, prob2)) {
358 prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1;
359 distance = rep2;
360 if (rc_is_bit_1(rc, prob2)) {
361 distance = rep3;
362 rep3 = rep2;
363 }
364 rep2 = rep1;
365 }
366 rep1 = rep0;
367 rep0 = distance;
368 }
369 state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
370 prob2 = p + LZMA_REP_LEN_CODER;
371 }
372
373 prob_len = prob2 + LZMA_LEN_CHOICE;
374 num_bits = LZMA_LEN_NUM_LOW_BITS;
375 if (!rc_is_bit_1(rc, prob_len)) {
376 prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE
377 + (pos_state << LZMA_LEN_NUM_LOW_BITS);
378 offset = 0;
379 } else {
380 prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE;
381 if (!rc_is_bit_1(rc, prob_len)) {
382 prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2
383 + (pos_state << LZMA_LEN_NUM_MID_BITS);
384 offset = 1 << LZMA_LEN_NUM_LOW_BITS;
385 num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS;
386 } else {
387 prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2;
388 offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
389 + (1 << LZMA_LEN_NUM_MID_BITS));
390 num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS;
391 }
392 }
393 rc_bit_tree_decode(rc, prob_len, num_bits, &len);
394 len += offset;
395
396 if (state < 4) {
397 int pos_slot;
398 uint16_t *prob3;
399
400 state += LZMA_NUM_LIT_STATES;
401 prob3 = p + LZMA_POS_SLOT +
402 ((len < LZMA_NUM_LEN_TO_POS_STATES ? len :
403 LZMA_NUM_LEN_TO_POS_STATES - 1)
404 << LZMA_NUM_POS_SLOT_BITS);
405 rc_bit_tree_decode(rc, prob3,
406 LZMA_NUM_POS_SLOT_BITS, &pos_slot);
407 rep0 = pos_slot;
408 if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
409 int i2, mi2, num_bits2 = (pos_slot >> 1) - 1;
410 rep0 = 2 | (pos_slot & 1);
411 if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
412 rep0 <<= num_bits2;
413 prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
414 } else {
415 for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--)
416 rep0 = (rep0 << 1) | rc_direct_bit(rc);
417 rep0 <<= LZMA_NUM_ALIGN_BITS;
418 prob3 = p + LZMA_ALIGN;
419 }
420 i2 = 1;
421 mi2 = 1;
422 while (num_bits2--) {
423 if (rc_get_bit(rc, prob3 + mi2, &mi2))
424 rep0 |= i2;
425 i2 <<= 1;
426 }
427 }
428 if (++rep0 == 0)
429 break;
430 }
431
432 len += LZMA_MATCH_MIN_LEN;
433 IF_NOT_FEATURE_LZMA_FAST(string:)
434 do {
435 uint32_t pos = buffer_pos - rep0;
436 while (pos >= header.dict_size)
437 pos += header.dict_size;
438 previous_byte = buffer[pos];
439 IF_NOT_FEATURE_LZMA_FAST(one_byte2:)
440 buffer[buffer_pos++] = previous_byte;
441 if (buffer_pos == header.dict_size) {
442 buffer_pos = 0;
443 global_pos += header.dict_size;
444 if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
445 goto bad;
446 IF_DESKTOP(total_written += header.dict_size;)
447 }
448 len--;
449 } while (len != 0 && buffer_pos < header.dst_size);
450 }
451 }
452
453 {
454 IF_NOT_DESKTOP(int total_written = 0; /* success */)
455 IF_DESKTOP(total_written += buffer_pos;)
456 if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) {
457 bad:
458 total_written = -1; /* failure */
459 }
460 rc_free(rc);
461 free(p);
462 free(buffer);
463 return total_written;
464 }
465}
diff --git a/archival/libarchive/decompress_unxz.c b/archival/libarchive/decompress_unxz.c
new file mode 100644
index 000000000..e90dfb06f
--- /dev/null
+++ b/archival/libarchive/decompress_unxz.c
@@ -0,0 +1,98 @@
1/*
2 * This file uses XZ Embedded library code which is written
3 * by Lasse Collin <lasse.collin@tukaani.org>
4 * and Igor Pavlov <http://7-zip.org/>
5 *
6 * See README file in unxz/ directory for more information.
7 *
8 * This file is:
9 * Copyright (C) 2010 Denys Vlasenko <vda.linux@googlemail.com>
10 * Licensed under GPLv2, see file LICENSE in this source tree.
11 */
12#include "libbb.h"
13#include "archive.h"
14
15#define XZ_FUNC FAST_FUNC
16#define XZ_EXTERN static
17
18#define XZ_DEC_DYNALLOC
19
20/* Skip check (rather than fail) of unsupported hash functions */
21#define XZ_DEC_ANY_CHECK 1
22
23/* We use our own crc32 function */
24#define XZ_INTERNAL_CRC32 0
25static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
26{
27 return ~crc32_block_endian0(~crc, buf, size, global_crc32_table);
28}
29
30/* We use arch-optimized unaligned accessors */
31#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); })
32#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); })
33#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val))
34#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val))
35
36#include "unxz/xz_dec_bcj.c"
37#include "unxz/xz_dec_lzma2.c"
38#include "unxz/xz_dec_stream.c"
39
40IF_DESKTOP(long long) int FAST_FUNC
41unpack_xz_stream(int src_fd, int dst_fd)
42{
43 struct xz_buf iobuf;
44 struct xz_dec *state;
45 unsigned char *membuf;
46 IF_DESKTOP(long long) int total = 0;
47
48 if (!global_crc32_table)
49 global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0);
50
51 memset(&iobuf, 0, sizeof(iobuf));
52 /* Preload XZ file signature */
53 membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC);
54 iobuf.in = membuf;
55 iobuf.in_size = HEADER_MAGIC_SIZE;
56 iobuf.out = membuf + BUFSIZ;
57 iobuf.out_size = BUFSIZ;
58
59 /* Limit memory usage to about 64 MiB. */
60 state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024);
61
62 while (1) {
63 enum xz_ret r;
64
65 if (iobuf.in_pos == iobuf.in_size) {
66 int rd = safe_read(src_fd, membuf, BUFSIZ);
67 if (rd < 0) {
68 bb_error_msg(bb_msg_read_error);
69 total = -1;
70 break;
71 }
72 iobuf.in_size = rd;
73 iobuf.in_pos = 0;
74 }
75// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d",
76// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size);
77 r = xz_dec_run(state, &iobuf);
78// bb_error_msg("<in pos:%d size:%d out pos:%d size:%d r:%d",
79// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size, r);
80 if (iobuf.out_pos) {
81 xwrite(dst_fd, iobuf.out, iobuf.out_pos);
82 IF_DESKTOP(total += iobuf.out_pos;)
83 iobuf.out_pos = 0;
84 }
85 if (r == XZ_STREAM_END) {
86 break;
87 }
88 if (r != XZ_OK && r != XZ_UNSUPPORTED_CHECK) {
89 bb_error_msg("corrupted data");
90 total = -1;
91 break;
92 }
93 }
94 xz_dec_end(state);
95 free(membuf);
96
97 return total;
98}
diff --git a/archival/libarchive/decompress_unzip.c b/archival/libarchive/decompress_unzip.c
new file mode 100644
index 000000000..a29eef837
--- /dev/null
+++ b/archival/libarchive/decompress_unzip.c
@@ -0,0 +1,1252 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * gunzip implementation for busybox
4 *
5 * Based on GNU gzip v1.2.4 Copyright (C) 1992-1993 Jean-loup Gailly.
6 *
7 * Originally adjusted for busybox by Sven Rudolph <sr1@inf.tu-dresden.de>
8 * based on gzip sources
9 *
10 * Adjusted further by Erik Andersen <andersen@codepoet.org> to support
11 * files as well as stdin/stdout, and to generally behave itself wrt
12 * command line handling.
13 *
14 * General cleanup to better adhere to the style guide and make use of standard
15 * busybox functions by Glenn McGrath
16 *
17 * read_gz interface + associated hacking by Laurence Anderson
18 *
19 * Fixed huft_build() so decoding end-of-block code does not grab more bits
20 * than necessary (this is required by unzip applet), added inflate_cleanup()
21 * to free leaked bytebuffer memory (used in unzip.c), and some minor style
22 * guide cleanups by Ed Clark
23 *
24 * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface
25 * Copyright (C) 1992-1993 Jean-loup Gailly
26 * The unzip code was written and put in the public domain by Mark Adler.
27 * Portions of the lzw code are derived from the public domain 'compress'
28 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
29 * Ken Turkowski, Dave Mack and Peter Jannesen.
30 *
31 * See the file algorithm.doc for the compression algorithms and file formats.
32 *
33 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
34 */
35
36#include <setjmp.h>
37#include "libbb.h"
38#include "archive.h"
39
40typedef struct huft_t {
41 unsigned char e; /* number of extra bits or operation */
42 unsigned char b; /* number of bits in this code or subcode */
43 union {
44 unsigned short n; /* literal, length base, or distance base */
45 struct huft_t *t; /* pointer to next level of table */
46 } v;
47} huft_t;
48
49enum {
50 /* gunzip_window size--must be a power of two, and
51 * at least 32K for zip's deflate method */
52 GUNZIP_WSIZE = 0x8000,
53 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
54 BMAX = 16, /* maximum bit length of any code (16 for explode) */
55 N_MAX = 288, /* maximum number of codes in any set */
56};
57
58
59/* This is somewhat complex-looking arrangement, but it allows
60 * to place decompressor state either in bss or in
61 * malloc'ed space simply by changing #defines below.
62 * Sizes on i386:
63 * text data bss dec hex
64 * 5256 0 108 5364 14f4 - bss
65 * 4915 0 0 4915 1333 - malloc
66 */
67#define STATE_IN_BSS 0
68#define STATE_IN_MALLOC 1
69
70
71typedef struct state_t {
72 off_t gunzip_bytes_out; /* number of output bytes */
73 uint32_t gunzip_crc;
74
75 int gunzip_src_fd;
76 unsigned gunzip_outbuf_count; /* bytes in output buffer */
77
78 unsigned char *gunzip_window;
79
80 uint32_t *gunzip_crc_table;
81
82 /* bitbuffer */
83 unsigned gunzip_bb; /* bit buffer */
84 unsigned char gunzip_bk; /* bits in bit buffer */
85
86 /* input (compressed) data */
87 unsigned char *bytebuffer; /* buffer itself */
88 off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */
89// unsigned bytebuffer_max; /* buffer size */
90 unsigned bytebuffer_offset; /* buffer position */
91 unsigned bytebuffer_size; /* how much data is there (size <= max) */
92
93 /* private data of inflate_codes() */
94 unsigned inflate_codes_ml; /* masks for bl and bd bits */
95 unsigned inflate_codes_md; /* masks for bl and bd bits */
96 unsigned inflate_codes_bb; /* bit buffer */
97 unsigned inflate_codes_k; /* number of bits in bit buffer */
98 unsigned inflate_codes_w; /* current gunzip_window position */
99 huft_t *inflate_codes_tl;
100 huft_t *inflate_codes_td;
101 unsigned inflate_codes_bl;
102 unsigned inflate_codes_bd;
103 unsigned inflate_codes_nn; /* length and index for copy */
104 unsigned inflate_codes_dd;
105
106 smallint resume_copy;
107
108 /* private data of inflate_get_next_window() */
109 smallint method; /* method == -1 for stored, -2 for codes */
110 smallint need_another_block;
111 smallint end_reached;
112
113 /* private data of inflate_stored() */
114 unsigned inflate_stored_n;
115 unsigned inflate_stored_b;
116 unsigned inflate_stored_k;
117 unsigned inflate_stored_w;
118
119 const char *error_msg;
120 jmp_buf error_jmp;
121} state_t;
122#define gunzip_bytes_out (S()gunzip_bytes_out )
123#define gunzip_crc (S()gunzip_crc )
124#define gunzip_src_fd (S()gunzip_src_fd )
125#define gunzip_outbuf_count (S()gunzip_outbuf_count)
126#define gunzip_window (S()gunzip_window )
127#define gunzip_crc_table (S()gunzip_crc_table )
128#define gunzip_bb (S()gunzip_bb )
129#define gunzip_bk (S()gunzip_bk )
130#define to_read (S()to_read )
131// #define bytebuffer_max (S()bytebuffer_max )
132// Both gunzip and unzip can use constant buffer size now (16k):
133#define bytebuffer_max 0x4000
134#define bytebuffer (S()bytebuffer )
135#define bytebuffer_offset (S()bytebuffer_offset )
136#define bytebuffer_size (S()bytebuffer_size )
137#define inflate_codes_ml (S()inflate_codes_ml )
138#define inflate_codes_md (S()inflate_codes_md )
139#define inflate_codes_bb (S()inflate_codes_bb )
140#define inflate_codes_k (S()inflate_codes_k )
141#define inflate_codes_w (S()inflate_codes_w )
142#define inflate_codes_tl (S()inflate_codes_tl )
143#define inflate_codes_td (S()inflate_codes_td )
144#define inflate_codes_bl (S()inflate_codes_bl )
145#define inflate_codes_bd (S()inflate_codes_bd )
146#define inflate_codes_nn (S()inflate_codes_nn )
147#define inflate_codes_dd (S()inflate_codes_dd )
148#define resume_copy (S()resume_copy )
149#define method (S()method )
150#define need_another_block (S()need_another_block )
151#define end_reached (S()end_reached )
152#define inflate_stored_n (S()inflate_stored_n )
153#define inflate_stored_b (S()inflate_stored_b )
154#define inflate_stored_k (S()inflate_stored_k )
155#define inflate_stored_w (S()inflate_stored_w )
156#define error_msg (S()error_msg )
157#define error_jmp (S()error_jmp )
158
159/* This is a generic part */
160#if STATE_IN_BSS /* Use global data segment */
161#define DECLARE_STATE /*nothing*/
162#define ALLOC_STATE /*nothing*/
163#define DEALLOC_STATE ((void)0)
164#define S() state.
165#define PASS_STATE /*nothing*/
166#define PASS_STATE_ONLY /*nothing*/
167#define STATE_PARAM /*nothing*/
168#define STATE_PARAM_ONLY void
169static state_t state;
170#endif
171
172#if STATE_IN_MALLOC /* Use malloc space */
173#define DECLARE_STATE state_t *state
174#define ALLOC_STATE (state = xzalloc(sizeof(*state)))
175#define DEALLOC_STATE free(state)
176#define S() state->
177#define PASS_STATE state,
178#define PASS_STATE_ONLY state
179#define STATE_PARAM state_t *state,
180#define STATE_PARAM_ONLY state_t *state
181#endif
182
183
184static const uint16_t mask_bits[] ALIGN2 = {
185 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
186 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
187};
188
189/* Copy lengths for literal codes 257..285 */
190static const uint16_t cplens[] ALIGN2 = {
191 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
192 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
193};
194
195/* note: see note #13 above about the 258 in this list. */
196/* Extra bits for literal codes 257..285 */
197static const uint8_t cplext[] ALIGN1 = {
198 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
199 5, 5, 5, 0, 99, 99
200}; /* 99 == invalid */
201
202/* Copy offsets for distance codes 0..29 */
203static const uint16_t cpdist[] ALIGN2 = {
204 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
205 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
206};
207
208/* Extra bits for distance codes */
209static const uint8_t cpdext[] ALIGN1 = {
210 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
211 11, 11, 12, 12, 13, 13
212};
213
214/* Tables for deflate from PKZIP's appnote.txt. */
215/* Order of the bit length code lengths */
216static const uint8_t border[] ALIGN1 = {
217 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
218};
219
220
221/*
222 * Free the malloc'ed tables built by huft_build(), which makes a linked
223 * list of the tables it made, with the links in a dummy first entry of
224 * each table.
225 * t: table to free
226 */
227static void huft_free(huft_t *p)
228{
229 huft_t *q;
230
231 /* Go through linked list, freeing from the malloced (t[-1]) address. */
232 while (p) {
233 q = (--p)->v.t;
234 free(p);
235 p = q;
236 }
237}
238
239static void huft_free_all(STATE_PARAM_ONLY)
240{
241 huft_free(inflate_codes_tl);
242 huft_free(inflate_codes_td);
243 inflate_codes_tl = NULL;
244 inflate_codes_td = NULL;
245}
246
247static void abort_unzip(STATE_PARAM_ONLY) NORETURN;
248static void abort_unzip(STATE_PARAM_ONLY)
249{
250 huft_free_all(PASS_STATE_ONLY);
251 longjmp(error_jmp, 1);
252}
253
254static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required)
255{
256 while (*current < required) {
257 if (bytebuffer_offset >= bytebuffer_size) {
258 unsigned sz = bytebuffer_max - 4;
259 if (to_read >= 0 && to_read < sz) /* unzip only */
260 sz = to_read;
261 /* Leave the first 4 bytes empty so we can always unwind the bitbuffer
262 * to the front of the bytebuffer */
263 bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz);
264 if ((int)bytebuffer_size < 1) {
265 error_msg = "unexpected end of file";
266 abort_unzip(PASS_STATE_ONLY);
267 }
268 if (to_read >= 0) /* unzip only */
269 to_read -= bytebuffer_size;
270 bytebuffer_size += 4;
271 bytebuffer_offset = 4;
272 }
273 bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current;
274 bytebuffer_offset++;
275 *current += 8;
276 }
277 return bitbuffer;
278}
279
280
281/* Given a list of code lengths and a maximum table size, make a set of
282 * tables to decode that set of codes. Return zero on success, one if
283 * the given code set is incomplete (the tables are still built in this
284 * case), two if the input is invalid (all zero length codes or an
285 * oversubscribed set of lengths) - in this case stores NULL in *t.
286 *
287 * b: code lengths in bits (all assumed <= BMAX)
288 * n: number of codes (assumed <= N_MAX)
289 * s: number of simple-valued codes (0..s-1)
290 * d: list of base values for non-simple codes
291 * e: list of extra bits for non-simple codes
292 * t: result: starting table
293 * m: maximum lookup bits, returns actual
294 */
295static int huft_build(const unsigned *b, const unsigned n,
296 const unsigned s, const unsigned short *d,
297 const unsigned char *e, huft_t **t, unsigned *m)
298{
299 unsigned a; /* counter for codes of length k */
300 unsigned c[BMAX + 1]; /* bit length count table */
301 unsigned eob_len; /* length of end-of-block code (value 256) */
302 unsigned f; /* i repeats in table every f entries */
303 int g; /* maximum code length */
304 int htl; /* table level */
305 unsigned i; /* counter, current code */
306 unsigned j; /* counter */
307 int k; /* number of bits in current code */
308 unsigned *p; /* pointer into c[], b[], or v[] */
309 huft_t *q; /* points to current table */
310 huft_t r; /* table entry for structure assignment */
311 huft_t *u[BMAX]; /* table stack */
312 unsigned v[N_MAX]; /* values in order of bit length */
313 int ws[BMAX + 1]; /* bits decoded stack */
314 int w; /* bits decoded */
315 unsigned x[BMAX + 1]; /* bit offsets, then code stack */
316 unsigned *xp; /* pointer into x */
317 int y; /* number of dummy codes added */
318 unsigned z; /* number of entries in current table */
319
320 /* Length of EOB code, if any */
321 eob_len = n > 256 ? b[256] : BMAX;
322
323 *t = NULL;
324
325 /* Generate counts for each bit length */
326 memset(c, 0, sizeof(c));
327 p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */
328 i = n;
329 do {
330 c[*p]++; /* assume all entries <= BMAX */
331 p++; /* can't combine with above line (Solaris bug) */
332 } while (--i);
333 if (c[0] == n) { /* null input - all zero length codes */
334 *m = 0;
335 return 2;
336 }
337
338 /* Find minimum and maximum length, bound *m by those */
339 for (j = 1; (c[j] == 0) && (j <= BMAX); j++)
340 continue;
341 k = j; /* minimum code length */
342 for (i = BMAX; (c[i] == 0) && i; i--)
343 continue;
344 g = i; /* maximum code length */
345 *m = (*m < j) ? j : ((*m > i) ? i : *m);
346
347 /* Adjust last length count to fill out codes, if needed */
348 for (y = 1 << j; j < i; j++, y <<= 1) {
349 y -= c[j];
350 if (y < 0)
351 return 2; /* bad input: more codes than bits */
352 }
353 y -= c[i];
354 if (y < 0)
355 return 2;
356 c[i] += y;
357
358 /* Generate starting offsets into the value table for each length */
359 x[1] = j = 0;
360 p = c + 1;
361 xp = x + 2;
362 while (--i) { /* note that i == g from above */
363 j += *p++;
364 *xp++ = j;
365 }
366
367 /* Make a table of values in order of bit lengths */
368 p = (unsigned *) b;
369 i = 0;
370 do {
371 j = *p++;
372 if (j != 0) {
373 v[x[j]++] = i;
374 }
375 } while (++i < n);
376
377 /* Generate the Huffman codes and for each, make the table entries */
378 x[0] = i = 0; /* first Huffman code is zero */
379 p = v; /* grab values in bit order */
380 htl = -1; /* no tables yet--level -1 */
381 w = ws[0] = 0; /* bits decoded */
382 u[0] = NULL; /* just to keep compilers happy */
383 q = NULL; /* ditto */
384 z = 0; /* ditto */
385
386 /* go through the bit lengths (k already is bits in shortest code) */
387 for (; k <= g; k++) {
388 a = c[k];
389 while (a--) {
390 /* here i is the Huffman code of length k bits for value *p */
391 /* make tables up to required level */
392 while (k > ws[htl + 1]) {
393 w = ws[++htl];
394
395 /* compute minimum size table less than or equal to *m bits */
396 z = g - w;
397 z = z > *m ? *m : z; /* upper limit on table size */
398 j = k - w;
399 f = 1 << j;
400 if (f > a + 1) { /* try a k-w bit table */
401 /* too few codes for k-w bit table */
402 f -= a + 1; /* deduct codes from patterns left */
403 xp = c + k;
404 while (++j < z) { /* try smaller tables up to z bits */
405 f <<= 1;
406 if (f <= *++xp) {
407 break; /* enough codes to use up j bits */
408 }
409 f -= *xp; /* else deduct codes from patterns */
410 }
411 }
412 j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */
413 z = 1 << j; /* table entries for j-bit table */
414 ws[htl+1] = w + j; /* set bits decoded in stack */
415
416 /* allocate and link in new table */
417 q = xzalloc((z + 1) * sizeof(huft_t));
418 *t = q + 1; /* link to list for huft_free() */
419 t = &(q->v.t);
420 u[htl] = ++q; /* table starts after link */
421
422 /* connect to last table, if there is one */
423 if (htl) {
424 x[htl] = i; /* save pattern for backing up */
425 r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */
426 r.e = (unsigned char) (16 + j); /* bits in this table */
427 r.v.t = q; /* pointer to this table */
428 j = (i & ((1 << w) - 1)) >> ws[htl - 1];
429 u[htl - 1][j] = r; /* connect to last table */
430 }
431 }
432
433 /* set up table entry in r */
434 r.b = (unsigned char) (k - w);
435 if (p >= v + n) {
436 r.e = 99; /* out of values--invalid code */
437 } else if (*p < s) {
438 r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */
439 r.v.n = (unsigned short) (*p++); /* simple code is just the value */
440 } else {
441 r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */
442 r.v.n = d[*p++ - s];
443 }
444
445 /* fill code-like entries with r */
446 f = 1 << (k - w);
447 for (j = i >> w; j < z; j += f) {
448 q[j] = r;
449 }
450
451 /* backwards increment the k-bit code i */
452 for (j = 1 << (k - 1); i & j; j >>= 1) {
453 i ^= j;
454 }
455 i ^= j;
456
457 /* backup over finished tables */
458 while ((i & ((1 << w) - 1)) != x[htl]) {
459 w = ws[--htl];
460 }
461 }
462 }
463
464 /* return actual size of base table */
465 *m = ws[1];
466
467 /* Return 1 if we were given an incomplete table */
468 return y != 0 && g != 1;
469}
470
471
472/*
473 * inflate (decompress) the codes in a deflated (compressed) block.
474 * Return an error code or zero if it all goes ok.
475 *
476 * tl, td: literal/length and distance decoder tables
477 * bl, bd: number of bits decoded by tl[] and td[]
478 */
479/* called once from inflate_block */
480
481/* map formerly local static variables to globals */
482#define ml inflate_codes_ml
483#define md inflate_codes_md
484#define bb inflate_codes_bb
485#define k inflate_codes_k
486#define w inflate_codes_w
487#define tl inflate_codes_tl
488#define td inflate_codes_td
489#define bl inflate_codes_bl
490#define bd inflate_codes_bd
491#define nn inflate_codes_nn
492#define dd inflate_codes_dd
493static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd)
494{
495 bl = my_bl;
496 bd = my_bd;
497 /* make local copies of globals */
498 bb = gunzip_bb; /* initialize bit buffer */
499 k = gunzip_bk;
500 w = gunzip_outbuf_count; /* initialize gunzip_window position */
501 /* inflate the coded data */
502 ml = mask_bits[bl]; /* precompute masks for speed */
503 md = mask_bits[bd];
504}
505/* called once from inflate_get_next_window */
506static NOINLINE int inflate_codes(STATE_PARAM_ONLY)
507{
508 unsigned e; /* table entry flag/number of extra bits */
509 huft_t *t; /* pointer to table entry */
510
511 if (resume_copy)
512 goto do_copy;
513
514 while (1) { /* do until end of block */
515 bb = fill_bitbuffer(PASS_STATE bb, &k, bl);
516 t = tl + ((unsigned) bb & ml);
517 e = t->e;
518 if (e > 16)
519 do {
520 if (e == 99)
521 abort_unzip(PASS_STATE_ONLY);;
522 bb >>= t->b;
523 k -= t->b;
524 e -= 16;
525 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
526 t = t->v.t + ((unsigned) bb & mask_bits[e]);
527 e = t->e;
528 } while (e > 16);
529 bb >>= t->b;
530 k -= t->b;
531 if (e == 16) { /* then it's a literal */
532 gunzip_window[w++] = (unsigned char) t->v.n;
533 if (w == GUNZIP_WSIZE) {
534 gunzip_outbuf_count = w;
535 //flush_gunzip_window();
536 w = 0;
537 return 1; // We have a block to read
538 }
539 } else { /* it's an EOB or a length */
540 /* exit if end of block */
541 if (e == 15) {
542 break;
543 }
544
545 /* get length of block to copy */
546 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
547 nn = t->v.n + ((unsigned) bb & mask_bits[e]);
548 bb >>= e;
549 k -= e;
550
551 /* decode distance of block to copy */
552 bb = fill_bitbuffer(PASS_STATE bb, &k, bd);
553 t = td + ((unsigned) bb & md);
554 e = t->e;
555 if (e > 16)
556 do {
557 if (e == 99)
558 abort_unzip(PASS_STATE_ONLY);
559 bb >>= t->b;
560 k -= t->b;
561 e -= 16;
562 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
563 t = t->v.t + ((unsigned) bb & mask_bits[e]);
564 e = t->e;
565 } while (e > 16);
566 bb >>= t->b;
567 k -= t->b;
568 bb = fill_bitbuffer(PASS_STATE bb, &k, e);
569 dd = w - t->v.n - ((unsigned) bb & mask_bits[e]);
570 bb >>= e;
571 k -= e;
572
573 /* do the copy */
574 do_copy:
575 do {
576 /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */
577 /* Who wrote THAT?? rewritten as: */
578 unsigned delta;
579
580 dd &= GUNZIP_WSIZE - 1;
581 e = GUNZIP_WSIZE - (dd > w ? dd : w);
582 delta = w > dd ? w - dd : dd - w;
583 if (e > nn) e = nn;
584 nn -= e;
585
586 /* copy to new buffer to prevent possible overwrite */
587 if (delta >= e) {
588 memcpy(gunzip_window + w, gunzip_window + dd, e);
589 w += e;
590 dd += e;
591 } else {
592 /* do it slow to avoid memcpy() overlap */
593 /* !NOMEMCPY */
594 do {
595 gunzip_window[w++] = gunzip_window[dd++];
596 } while (--e);
597 }
598 if (w == GUNZIP_WSIZE) {
599 gunzip_outbuf_count = w;
600 resume_copy = (nn != 0);
601 //flush_gunzip_window();
602 w = 0;
603 return 1;
604 }
605 } while (nn);
606 resume_copy = 0;
607 }
608 }
609
610 /* restore the globals from the locals */
611 gunzip_outbuf_count = w; /* restore global gunzip_window pointer */
612 gunzip_bb = bb; /* restore global bit buffer */
613 gunzip_bk = k;
614
615 /* normally just after call to inflate_codes, but save code by putting it here */
616 /* free the decoding tables (tl and td), return */
617 huft_free_all(PASS_STATE_ONLY);
618
619 /* done */
620 return 0;
621}
622#undef ml
623#undef md
624#undef bb
625#undef k
626#undef w
627#undef tl
628#undef td
629#undef bl
630#undef bd
631#undef nn
632#undef dd
633
634
635/* called once from inflate_block */
636static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k)
637{
638 inflate_stored_n = my_n;
639 inflate_stored_b = my_b;
640 inflate_stored_k = my_k;
641 /* initialize gunzip_window position */
642 inflate_stored_w = gunzip_outbuf_count;
643}
644/* called once from inflate_get_next_window */
645static int inflate_stored(STATE_PARAM_ONLY)
646{
647 /* read and output the compressed data */
648 while (inflate_stored_n--) {
649 inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8);
650 gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b;
651 if (inflate_stored_w == GUNZIP_WSIZE) {
652 gunzip_outbuf_count = inflate_stored_w;
653 //flush_gunzip_window();
654 inflate_stored_w = 0;
655 inflate_stored_b >>= 8;
656 inflate_stored_k -= 8;
657 return 1; /* We have a block */
658 }
659 inflate_stored_b >>= 8;
660 inflate_stored_k -= 8;
661 }
662
663 /* restore the globals from the locals */
664 gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */
665 gunzip_bb = inflate_stored_b; /* restore global bit buffer */
666 gunzip_bk = inflate_stored_k;
667 return 0; /* Finished */
668}
669
670
671/*
672 * decompress an inflated block
673 * e: last block flag
674 *
675 * GLOBAL VARIABLES: bb, kk,
676 */
677/* Return values: -1 = inflate_stored, -2 = inflate_codes */
678/* One callsite in inflate_get_next_window */
679static int inflate_block(STATE_PARAM smallint *e)
680{
681 unsigned ll[286 + 30]; /* literal/length and distance code lengths */
682 unsigned t; /* block type */
683 unsigned b; /* bit buffer */
684 unsigned k; /* number of bits in bit buffer */
685
686 /* make local bit buffer */
687
688 b = gunzip_bb;
689 k = gunzip_bk;
690
691 /* read in last block bit */
692 b = fill_bitbuffer(PASS_STATE b, &k, 1);
693 *e = b & 1;
694 b >>= 1;
695 k -= 1;
696
697 /* read in block type */
698 b = fill_bitbuffer(PASS_STATE b, &k, 2);
699 t = (unsigned) b & 3;
700 b >>= 2;
701 k -= 2;
702
703 /* restore the global bit buffer */
704 gunzip_bb = b;
705 gunzip_bk = k;
706
707 /* Do we see block type 1 often? Yes!
708 * TODO: fix performance problem (see below) */
709 //bb_error_msg("blktype %d", t);
710
711 /* inflate that block type */
712 switch (t) {
713 case 0: /* Inflate stored */
714 {
715 unsigned n; /* number of bytes in block */
716 unsigned b_stored; /* bit buffer */
717 unsigned k_stored; /* number of bits in bit buffer */
718
719 /* make local copies of globals */
720 b_stored = gunzip_bb; /* initialize bit buffer */
721 k_stored = gunzip_bk;
722
723 /* go to byte boundary */
724 n = k_stored & 7;
725 b_stored >>= n;
726 k_stored -= n;
727
728 /* get the length and its complement */
729 b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16);
730 n = ((unsigned) b_stored & 0xffff);
731 b_stored >>= 16;
732 k_stored -= 16;
733
734 b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16);
735 if (n != (unsigned) ((~b_stored) & 0xffff)) {
736 abort_unzip(PASS_STATE_ONLY); /* error in compressed data */
737 }
738 b_stored >>= 16;
739 k_stored -= 16;
740
741 inflate_stored_setup(PASS_STATE n, b_stored, k_stored);
742
743 return -1;
744 }
745 case 1:
746 /* Inflate fixed
747 * decompress an inflated type 1 (fixed Huffman codes) block. We should
748 * either replace this with a custom decoder, or at least precompute the
749 * Huffman tables. TODO */
750 {
751 int i; /* temporary variable */
752 unsigned bl; /* lookup bits for tl */
753 unsigned bd; /* lookup bits for td */
754 /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */
755 //unsigned ll[288]; /* length list for huft_build */
756
757 /* set up literal table */
758 for (i = 0; i < 144; i++)
759 ll[i] = 8;
760 for (; i < 256; i++)
761 ll[i] = 9;
762 for (; i < 280; i++)
763 ll[i] = 7;
764 for (; i < 288; i++) /* make a complete, but wrong code set */
765 ll[i] = 8;
766 bl = 7;
767 huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl);
768 /* huft_build() never return nonzero - we use known data */
769
770 /* set up distance table */
771 for (i = 0; i < 30; i++) /* make an incomplete code set */
772 ll[i] = 5;
773 bd = 5;
774 huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd);
775
776 /* set up data for inflate_codes() */
777 inflate_codes_setup(PASS_STATE bl, bd);
778
779 /* huft_free code moved into inflate_codes */
780
781 return -2;
782 }
783 case 2: /* Inflate dynamic */
784 {
785 enum { dbits = 6 }; /* bits in base distance lookup table */
786 enum { lbits = 9 }; /* bits in base literal/length lookup table */
787
788 huft_t *td; /* distance code table */
789 unsigned i; /* temporary variables */
790 unsigned j;
791 unsigned l; /* last length */
792 unsigned m; /* mask for bit lengths table */
793 unsigned n; /* number of lengths to get */
794 unsigned bl; /* lookup bits for tl */
795 unsigned bd; /* lookup bits for td */
796 unsigned nb; /* number of bit length codes */
797 unsigned nl; /* number of literal/length codes */
798 unsigned nd; /* number of distance codes */
799
800 //unsigned ll[286 + 30];/* literal/length and distance code lengths */
801 unsigned b_dynamic; /* bit buffer */
802 unsigned k_dynamic; /* number of bits in bit buffer */
803
804 /* make local bit buffer */
805 b_dynamic = gunzip_bb;
806 k_dynamic = gunzip_bk;
807
808 /* read in table lengths */
809 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5);
810 nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */
811
812 b_dynamic >>= 5;
813 k_dynamic -= 5;
814 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5);
815 nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */
816
817 b_dynamic >>= 5;
818 k_dynamic -= 5;
819 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4);
820 nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */
821
822 b_dynamic >>= 4;
823 k_dynamic -= 4;
824 if (nl > 286 || nd > 30)
825 abort_unzip(PASS_STATE_ONLY); /* bad lengths */
826
827 /* read in bit-length-code lengths */
828 for (j = 0; j < nb; j++) {
829 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3);
830 ll[border[j]] = (unsigned) b_dynamic & 7;
831 b_dynamic >>= 3;
832 k_dynamic -= 3;
833 }
834 for (; j < 19; j++)
835 ll[border[j]] = 0;
836
837 /* build decoding table for trees - single level, 7 bit lookup */
838 bl = 7;
839 i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl);
840 if (i != 0) {
841 abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */
842 }
843
844 /* read in literal and distance code lengths */
845 n = nl + nd;
846 m = mask_bits[bl];
847 i = l = 0;
848 while ((unsigned) i < n) {
849 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl);
850 td = inflate_codes_tl + ((unsigned) b_dynamic & m);
851 j = td->b;
852 b_dynamic >>= j;
853 k_dynamic -= j;
854 j = td->v.n;
855 if (j < 16) { /* length of code in bits (0..15) */
856 ll[i++] = l = j; /* save last length in l */
857 } else if (j == 16) { /* repeat last length 3 to 6 times */
858 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2);
859 j = 3 + ((unsigned) b_dynamic & 3);
860 b_dynamic >>= 2;
861 k_dynamic -= 2;
862 if ((unsigned) i + j > n) {
863 abort_unzip(PASS_STATE_ONLY); //return 1;
864 }
865 while (j--) {
866 ll[i++] = l;
867 }
868 } else if (j == 17) { /* 3 to 10 zero length codes */
869 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3);
870 j = 3 + ((unsigned) b_dynamic & 7);
871 b_dynamic >>= 3;
872 k_dynamic -= 3;
873 if ((unsigned) i + j > n) {
874 abort_unzip(PASS_STATE_ONLY); //return 1;
875 }
876 while (j--) {
877 ll[i++] = 0;
878 }
879 l = 0;
880 } else { /* j == 18: 11 to 138 zero length codes */
881 b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7);
882 j = 11 + ((unsigned) b_dynamic & 0x7f);
883 b_dynamic >>= 7;
884 k_dynamic -= 7;
885 if ((unsigned) i + j > n) {
886 abort_unzip(PASS_STATE_ONLY); //return 1;
887 }
888 while (j--) {
889 ll[i++] = 0;
890 }
891 l = 0;
892 }
893 }
894
895 /* free decoding table for trees */
896 huft_free(inflate_codes_tl);
897
898 /* restore the global bit buffer */
899 gunzip_bb = b_dynamic;
900 gunzip_bk = k_dynamic;
901
902 /* build the decoding tables for literal/length and distance codes */
903 bl = lbits;
904
905 i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl);
906 if (i != 0)
907 abort_unzip(PASS_STATE_ONLY);
908 bd = dbits;
909 i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd);
910 if (i != 0)
911 abort_unzip(PASS_STATE_ONLY);
912
913 /* set up data for inflate_codes() */
914 inflate_codes_setup(PASS_STATE bl, bd);
915
916 /* huft_free code moved into inflate_codes */
917
918 return -2;
919 }
920 default:
921 abort_unzip(PASS_STATE_ONLY);
922 }
923}
924
925/* Two callsites, both in inflate_get_next_window */
926static void calculate_gunzip_crc(STATE_PARAM_ONLY)
927{
928 gunzip_crc = crc32_block_endian0(gunzip_crc, gunzip_window, gunzip_outbuf_count, gunzip_crc_table);
929 gunzip_bytes_out += gunzip_outbuf_count;
930}
931
932/* One callsite in inflate_unzip_internal */
933static int inflate_get_next_window(STATE_PARAM_ONLY)
934{
935 gunzip_outbuf_count = 0;
936
937 while (1) {
938 int ret;
939
940 if (need_another_block) {
941 if (end_reached) {
942 calculate_gunzip_crc(PASS_STATE_ONLY);
943 end_reached = 0;
944 /* NB: need_another_block is still set */
945 return 0; /* Last block */
946 }
947 method = inflate_block(PASS_STATE &end_reached);
948 need_another_block = 0;
949 }
950
951 switch (method) {
952 case -1:
953 ret = inflate_stored(PASS_STATE_ONLY);
954 break;
955 case -2:
956 ret = inflate_codes(PASS_STATE_ONLY);
957 break;
958 default: /* cannot happen */
959 abort_unzip(PASS_STATE_ONLY);
960 }
961
962 if (ret == 1) {
963 calculate_gunzip_crc(PASS_STATE_ONLY);
964 return 1; /* more data left */
965 }
966 need_another_block = 1; /* end of that block */
967 }
968 /* Doesnt get here */
969}
970
971
972/* Called from unpack_gz_stream() and inflate_unzip() */
973static IF_DESKTOP(long long) int
974inflate_unzip_internal(STATE_PARAM int in, int out)
975{
976 IF_DESKTOP(long long) int n = 0;
977 ssize_t nwrote;
978
979 /* Allocate all global buffers (for DYN_ALLOC option) */
980 gunzip_window = xmalloc(GUNZIP_WSIZE);
981 gunzip_outbuf_count = 0;
982 gunzip_bytes_out = 0;
983 gunzip_src_fd = in;
984
985 /* (re) initialize state */
986 method = -1;
987 need_another_block = 1;
988 resume_copy = 0;
989 gunzip_bk = 0;
990 gunzip_bb = 0;
991
992 /* Create the crc table */
993 gunzip_crc_table = crc32_filltable(NULL, 0);
994 gunzip_crc = ~0;
995
996 error_msg = "corrupted data";
997 if (setjmp(error_jmp)) {
998 /* Error from deep inside zip machinery */
999 n = -1;
1000 goto ret;
1001 }
1002
1003 while (1) {
1004 int r = inflate_get_next_window(PASS_STATE_ONLY);
1005 nwrote = full_write(out, gunzip_window, gunzip_outbuf_count);
1006 if (nwrote != (ssize_t)gunzip_outbuf_count) {
1007 bb_perror_msg("write");
1008 n = -1;
1009 goto ret;
1010 }
1011 IF_DESKTOP(n += nwrote;)
1012 if (r == 0) break;
1013 }
1014
1015 /* Store unused bytes in a global buffer so calling applets can access it */
1016 if (gunzip_bk >= 8) {
1017 /* Undo too much lookahead. The next read will be byte aligned
1018 * so we can discard unused bits in the last meaningful byte. */
1019 bytebuffer_offset--;
1020 bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff;
1021 gunzip_bb >>= 8;
1022 gunzip_bk -= 8;
1023 }
1024 ret:
1025 /* Cleanup */
1026 free(gunzip_window);
1027 free(gunzip_crc_table);
1028 return n;
1029}
1030
1031
1032/* External entry points */
1033
1034/* For unzip */
1035
1036IF_DESKTOP(long long) int FAST_FUNC
1037inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out)
1038{
1039 IF_DESKTOP(long long) int n;
1040 DECLARE_STATE;
1041
1042 ALLOC_STATE;
1043
1044 to_read = compr_size;
1045// bytebuffer_max = 0x8000;
1046 bytebuffer_offset = 4;
1047 bytebuffer = xmalloc(bytebuffer_max);
1048 n = inflate_unzip_internal(PASS_STATE in, out);
1049 free(bytebuffer);
1050
1051 res->crc = gunzip_crc;
1052 res->bytes_out = gunzip_bytes_out;
1053 DEALLOC_STATE;
1054 return n;
1055}
1056
1057
1058/* For gunzip */
1059
1060/* helpers first */
1061
1062/* Top up the input buffer with at least n bytes. */
1063static int top_up(STATE_PARAM unsigned n)
1064{
1065 int count = bytebuffer_size - bytebuffer_offset;
1066
1067 if (count < (int)n) {
1068 memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count);
1069 bytebuffer_offset = 0;
1070 bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count);
1071 if ((int)bytebuffer_size < 0) {
1072 bb_error_msg(bb_msg_read_error);
1073 return 0;
1074 }
1075 bytebuffer_size += count;
1076 if (bytebuffer_size < n)
1077 return 0;
1078 }
1079 return 1;
1080}
1081
1082static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY)
1083{
1084 uint16_t res;
1085#if BB_LITTLE_ENDIAN
1086 move_from_unaligned16(res, &bytebuffer[bytebuffer_offset]);
1087#else
1088 res = bytebuffer[bytebuffer_offset];
1089 res |= bytebuffer[bytebuffer_offset + 1] << 8;
1090#endif
1091 bytebuffer_offset += 2;
1092 return res;
1093}
1094
1095static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY)
1096{
1097 uint32_t res;
1098#if BB_LITTLE_ENDIAN
1099 move_from_unaligned32(res, &bytebuffer[bytebuffer_offset]);
1100#else
1101 res = bytebuffer[bytebuffer_offset];
1102 res |= bytebuffer[bytebuffer_offset + 1] << 8;
1103 res |= bytebuffer[bytebuffer_offset + 2] << 16;
1104 res |= bytebuffer[bytebuffer_offset + 3] << 24;
1105#endif
1106 bytebuffer_offset += 4;
1107 return res;
1108}
1109
1110static int check_header_gzip(STATE_PARAM unpack_info_t *info)
1111{
1112 union {
1113 unsigned char raw[8];
1114 struct {
1115 uint8_t gz_method;
1116 uint8_t flags;
1117 uint32_t mtime;
1118 uint8_t xtra_flags_UNUSED;
1119 uint8_t os_flags_UNUSED;
1120 } PACKED formatted;
1121 } header;
1122 struct BUG_header {
1123 char BUG_header[sizeof(header) == 8 ? 1 : -1];
1124 };
1125
1126 /*
1127 * Rewind bytebuffer. We use the beginning because the header has 8
1128 * bytes, leaving enough for unwinding afterwards.
1129 */
1130 bytebuffer_size -= bytebuffer_offset;
1131 memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size);
1132 bytebuffer_offset = 0;
1133
1134 if (!top_up(PASS_STATE 8))
1135 return 0;
1136 memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8);
1137 bytebuffer_offset += 8;
1138
1139 /* Check the compression method */
1140 if (header.formatted.gz_method != 8) {
1141 return 0;
1142 }
1143
1144 if (header.formatted.flags & 0x04) {
1145 /* bit 2 set: extra field present */
1146 unsigned extra_short;
1147
1148 if (!top_up(PASS_STATE 2))
1149 return 0;
1150 extra_short = buffer_read_le_u16(PASS_STATE_ONLY);
1151 if (!top_up(PASS_STATE extra_short))
1152 return 0;
1153 /* Ignore extra field */
1154 bytebuffer_offset += extra_short;
1155 }
1156
1157 /* Discard original name and file comment if any */
1158 /* bit 3 set: original file name present */
1159 /* bit 4 set: file comment present */
1160 if (header.formatted.flags & 0x18) {
1161 while (1) {
1162 do {
1163 if (!top_up(PASS_STATE 1))
1164 return 0;
1165 } while (bytebuffer[bytebuffer_offset++] != 0);
1166 if ((header.formatted.flags & 0x18) != 0x18)
1167 break;
1168 header.formatted.flags &= ~0x18;
1169 }
1170 }
1171
1172 if (info)
1173 info->mtime = SWAP_LE32(header.formatted.mtime);
1174
1175 /* Read the header checksum */
1176 if (header.formatted.flags & 0x02) {
1177 if (!top_up(PASS_STATE 2))
1178 return 0;
1179 bytebuffer_offset += 2;
1180 }
1181 return 1;
1182}
1183
1184IF_DESKTOP(long long) int FAST_FUNC
1185unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
1186{
1187 uint32_t v32;
1188 IF_DESKTOP(long long) int n;
1189 DECLARE_STATE;
1190
1191 n = 0;
1192
1193 ALLOC_STATE;
1194 to_read = -1;
1195// bytebuffer_max = 0x8000;
1196 bytebuffer = xmalloc(bytebuffer_max);
1197 gunzip_src_fd = in;
1198
1199 again:
1200 if (!check_header_gzip(PASS_STATE info)) {
1201 bb_error_msg("corrupted data");
1202 n = -1;
1203 goto ret;
1204 }
1205 n += inflate_unzip_internal(PASS_STATE in, out);
1206 if (n < 0)
1207 goto ret;
1208
1209 if (!top_up(PASS_STATE 8)) {
1210 bb_error_msg("corrupted data");
1211 n = -1;
1212 goto ret;
1213 }
1214
1215 /* Validate decompression - crc */
1216 v32 = buffer_read_le_u32(PASS_STATE_ONLY);
1217 if ((~gunzip_crc) != v32) {
1218 bb_error_msg("crc error");
1219 n = -1;
1220 goto ret;
1221 }
1222
1223 /* Validate decompression - size */
1224 v32 = buffer_read_le_u32(PASS_STATE_ONLY);
1225 if ((uint32_t)gunzip_bytes_out != v32) {
1226 bb_error_msg("incorrect length");
1227 n = -1;
1228 }
1229
1230 if (!top_up(PASS_STATE 2))
1231 goto ret; /* EOF */
1232
1233 if (bytebuffer[bytebuffer_offset] == 0x1f
1234 && bytebuffer[bytebuffer_offset + 1] == 0x8b
1235 ) {
1236 bytebuffer_offset += 2;
1237 goto again;
1238 }
1239 /* GNU gzip says: */
1240 /*bb_error_msg("decompression OK, trailing garbage ignored");*/
1241
1242 ret:
1243 free(bytebuffer);
1244 DEALLOC_STATE;
1245 return n;
1246}
1247
1248IF_DESKTOP(long long) int FAST_FUNC
1249unpack_gz_stream(int in, int out)
1250{
1251 return unpack_gz_stream_with_info(in, out, NULL);
1252}
diff --git a/archival/libarchive/filter_accept_all.c b/archival/libarchive/filter_accept_all.c
new file mode 100644
index 000000000..e69deb679
--- /dev/null
+++ b/archival/libarchive/filter_accept_all.c
@@ -0,0 +1,17 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (C) 2002 by Glenn McGrath
4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
6 */
7
8#include "libbb.h"
9#include "archive.h"
10
11/* Accept any non-null name, its not really a filter at all */
12char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle)
13{
14 if (archive_handle->file_header->name)
15 return EXIT_SUCCESS;
16 return EXIT_FAILURE;
17}
diff --git a/archival/libarchive/filter_accept_list.c b/archival/libarchive/filter_accept_list.c
new file mode 100644
index 000000000..a7640af79
--- /dev/null
+++ b/archival/libarchive/filter_accept_list.c
@@ -0,0 +1,19 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (C) 2002 by Glenn McGrath
4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
6 */
7
8#include "libbb.h"
9#include "archive.h"
10
11/*
12 * Accept names that are in the accept list, ignoring reject list.
13 */
14char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle)
15{
16 if (find_list_entry(archive_handle->accept, archive_handle->file_header->name))
17 return EXIT_SUCCESS;
18 return EXIT_FAILURE;
19}
diff --git a/archival/libarchive/filter_accept_list_reassign.c b/archival/libarchive/filter_accept_list_reassign.c
new file mode 100644
index 000000000..d80f71668
--- /dev/null
+++ b/archival/libarchive/filter_accept_list_reassign.c
@@ -0,0 +1,51 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (C) 2002 by Glenn McGrath
4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
6 */
7
8#include "libbb.h"
9#include "archive.h"
10
11/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */
12
13/*
14 * Reassign the subarchive metadata parser based on the filename extension
15 * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz
16 * or if its a .tar.bz2 make archive_handle->sub_archive handle that
17 */
18char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle)
19{
20 /* Check the file entry is in the accept list */
21 if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) {
22 const char *name_ptr;
23
24 /* Find extension */
25 name_ptr = strrchr(archive_handle->file_header->name, '.');
26 if (!name_ptr)
27 return EXIT_FAILURE;
28 name_ptr++;
29
30 /* Modify the subarchive handler based on the extension */
31 if (ENABLE_FEATURE_SEAMLESS_GZ
32 && strcmp(name_ptr, "gz") == 0
33 ) {
34 archive_handle->dpkg__action_data_subarchive = get_header_tar_gz;
35 return EXIT_SUCCESS;
36 }
37 if (ENABLE_FEATURE_SEAMLESS_BZ2
38 && strcmp(name_ptr, "bz2") == 0
39 ) {
40 archive_handle->dpkg__action_data_subarchive = get_header_tar_bz2;
41 return EXIT_SUCCESS;
42 }
43 if (ENABLE_FEATURE_SEAMLESS_LZMA
44 && strcmp(name_ptr, "lzma") == 0
45 ) {
46 archive_handle->dpkg__action_data_subarchive = get_header_tar_lzma;
47 return EXIT_SUCCESS;
48 }
49 }
50 return EXIT_FAILURE;
51}
diff --git a/archival/libarchive/filter_accept_reject_list.c b/archival/libarchive/filter_accept_reject_list.c
new file mode 100644
index 000000000..3e86cca65
--- /dev/null
+++ b/archival/libarchive/filter_accept_reject_list.c
@@ -0,0 +1,36 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (C) 2002 by Glenn McGrath
4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
6 */
7
8#include "libbb.h"
9#include "archive.h"
10
11/*
12 * Accept names that are in the accept list and not in the reject list
13 */
14char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle)
15{
16 const char *key;
17 const llist_t *reject_entry;
18 const llist_t *accept_entry;
19
20 key = archive_handle->file_header->name;
21
22 /* If the key is in a reject list fail */
23 reject_entry = find_list_entry2(archive_handle->reject, key);
24 if (reject_entry) {
25 return EXIT_FAILURE;
26 }
27 accept_entry = find_list_entry2(archive_handle->accept, key);
28
29 /* Fail if an accept list was specified and the key wasnt in there */
30 if ((accept_entry == NULL) && archive_handle->accept) {
31 return EXIT_FAILURE;
32 }
33
34 /* Accepted */
35 return EXIT_SUCCESS;
36}
diff --git a/archival/libarchive/find_list_entry.c b/archival/libarchive/find_list_entry.c
new file mode 100644
index 000000000..5efd1af2e
--- /dev/null
+++ b/archival/libarchive/find_list_entry.c
@@ -0,0 +1,54 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Copyright (C) 2002 by Glenn McGrath
4 *
5 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
6 */
7
8#include <fnmatch.h>
9#include "libbb.h"
10#include "archive.h"
11
12/* Find a string in a shell pattern list */
13const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename)
14{
15 while (list) {
16 if (fnmatch(list->data, filename, 0) == 0) {
17 return list;
18 }
19 list = list->link;
20 }
21 return NULL;
22}
23
24/* Same, but compares only path components present in pattern
25 * (extra trailing path components in filename are assumed to match)
26 */
27const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename)
28{
29 char buf[PATH_MAX];
30 int pattern_slash_cnt;
31 const char *c;
32 char *d;
33
34 while (list) {
35 c = list->data;
36 pattern_slash_cnt = 0;
37 while (*c)
38 if (*c++ == '/') pattern_slash_cnt++;
39 c = filename;
40 d = buf;
41 /* paranoia is better than buffer overflows */
42 while (*c && d != buf + sizeof(buf)-1) {
43 if (*c == '/' && --pattern_slash_cnt < 0)
44 break;
45 *d++ = *c++;
46 }
47 *d = '\0';
48 if (fnmatch(list->data, buf, 0) == 0) {
49 return list;
50 }
51 list = list->link;
52 }
53 return NULL;
54}
diff --git a/archival/libarchive/get_header_ar.c b/archival/libarchive/get_header_ar.c
new file mode 100644
index 000000000..df603b111
--- /dev/null
+++ b/archival/libarchive/get_header_ar.c
@@ -0,0 +1,133 @@
1/* vi: set sw=4 ts=4: */
2/* Copyright 2001 Glenn McGrath.
3 *
4 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
5 */
6
7#include "libbb.h"
8#include "archive.h"
9#include "ar.h"
10
11static unsigned read_num(const char *str, int base)
12{
13 /* This code works because
14 * on misformatted numbers bb_strtou returns all-ones */
15 int err = bb_strtou(str, NULL, base);
16 if (err == -1)
17 bb_error_msg_and_die("invalid ar header");
18 return err;
19}
20
21char FAST_FUNC get_header_ar(archive_handle_t *archive_handle)
22{
23 file_header_t *typed = archive_handle->file_header;
24 unsigned size;
25 union {
26 char raw[60];
27 struct ar_header formatted;
28 } ar;
29#if ENABLE_FEATURE_AR_LONG_FILENAMES
30 static char *ar_long_names;
31 static unsigned ar_long_name_size;
32#endif
33
34 /* dont use xread as we want to handle the error ourself */
35 if (read(archive_handle->src_fd, ar.raw, 60) != 60) {
36 /* End Of File */
37 return EXIT_FAILURE;
38 }
39
40 /* ar header starts on an even byte (2 byte aligned)
41 * '\n' is used for padding
42 */
43 if (ar.raw[0] == '\n') {
44 /* fix up the header, we started reading 1 byte too early */
45 memmove(ar.raw, &ar.raw[1], 59);
46 ar.raw[59] = xread_char(archive_handle->src_fd);
47 archive_handle->offset++;
48 }
49 archive_handle->offset += 60;
50
51 if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n')
52 bb_error_msg_and_die("invalid ar header");
53
54 /* FIXME: more thorough routine would be in order here
55 * (we have something like that in tar)
56 * but for now we are lax. */
57 ar.formatted.magic[0] = '\0'; /* else 4G-2 file will have size="4294967294`\n..." */
58 typed->size = size = read_num(ar.formatted.size, 10);
59
60 /* special filenames have '/' as the first character */
61 if (ar.formatted.name[0] == '/') {
62 if (ar.formatted.name[1] == ' ') {
63 /* This is the index of symbols in the file for compilers */
64 data_skip(archive_handle);
65 archive_handle->offset += size;
66 return get_header_ar(archive_handle); /* Return next header */
67 }
68#if ENABLE_FEATURE_AR_LONG_FILENAMES
69 if (ar.formatted.name[1] == '/') {
70 /* If the second char is a '/' then this entries data section
71 * stores long filename for multiple entries, they are stored
72 * in static variable long_names for use in future entries
73 */
74 ar_long_name_size = size;
75 free(ar_long_names);
76 ar_long_names = xmalloc(size);
77 xread(archive_handle->src_fd, ar_long_names, size);
78 archive_handle->offset += size;
79 /* Return next header */
80 return get_header_ar(archive_handle);
81 }
82#else
83 bb_error_msg_and_die("long filenames not supported");
84#endif
85 }
86 /* Only size is always present, the rest may be missing in
87 * long filename pseudo file. Thus we decode the rest
88 * after dealing with long filename pseudo file.
89 */
90 typed->mode = read_num(ar.formatted.mode, 8);
91 typed->mtime = read_num(ar.formatted.date, 10);
92 typed->uid = read_num(ar.formatted.uid, 10);
93 typed->gid = read_num(ar.formatted.gid, 10);
94
95#if ENABLE_FEATURE_AR_LONG_FILENAMES
96 if (ar.formatted.name[0] == '/') {
97 unsigned long_offset;
98
99 /* The number after the '/' indicates the offset in the ar data section
100 * (saved in ar_long_names) that conatains the real filename */
101 long_offset = read_num(&ar.formatted.name[1], 10);
102 if (long_offset >= ar_long_name_size) {
103 bb_error_msg_and_die("can't resolve long filename");
104 }
105 typed->name = xstrdup(ar_long_names + long_offset);
106 } else
107#endif
108 {
109 /* short filenames */
110 typed->name = xstrndup(ar.formatted.name, 16);
111 }
112
113 typed->name[strcspn(typed->name, " /")] = '\0';
114
115 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
116 archive_handle->action_header(typed);
117#if ENABLE_DPKG || ENABLE_DPKG_DEB
118 if (archive_handle->dpkg__sub_archive) {
119 while (archive_handle->dpkg__action_data_subarchive(archive_handle->dpkg__sub_archive) == EXIT_SUCCESS)
120 continue;
121 } else
122#endif
123 archive_handle->action_data(archive_handle);
124 } else {
125 data_skip(archive_handle);
126 }
127
128 archive_handle->offset += typed->size;
129 /* Set the file pointer to the correct spot, we may have been reading a compressed file */
130 lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET);
131
132 return EXIT_SUCCESS;
133}
diff --git a/archival/libarchive/get_header_cpio.c b/archival/libarchive/get_header_cpio.c
new file mode 100644
index 000000000..3d99b492a
--- /dev/null
+++ b/archival/libarchive/get_header_cpio.c
@@ -0,0 +1,186 @@
1/* vi: set sw=4 ts=4: */
2/* Copyright 2002 Laurence Anderson
3 *
4 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
5 */
6
7#include "libbb.h"
8#include "archive.h"
9
10typedef struct hardlinks_t {
11 struct hardlinks_t *next;
12 int inode; /* TODO: must match maj/min too! */
13 int mode ;
14 int mtime; /* These three are useful only in corner case */
15 int uid ; /* of hardlinks with zero size body */
16 int gid ;
17 char name[1];
18} hardlinks_t;
19
20char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle)
21{
22 file_header_t *file_header = archive_handle->file_header;
23 char cpio_header[110];
24 int namesize;
25 int major, minor, nlink, mode, inode;
26 unsigned size, uid, gid, mtime;
27
28 /* There can be padding before archive header */
29 data_align(archive_handle, 4);
30
31 size = full_read(archive_handle->src_fd, cpio_header, 110);
32 if (size == 0) {
33 goto create_hardlinks;
34 }
35 if (size != 110) {
36 bb_error_msg_and_die("short read");
37 }
38 archive_handle->offset += 110;
39
40 if (strncmp(&cpio_header[0], "07070", 5) != 0
41 || (cpio_header[5] != '1' && cpio_header[5] != '2')
42 ) {
43 bb_error_msg_and_die("unsupported cpio format, use newc or crc");
44 }
45
46 if (sscanf(cpio_header + 6,
47 "%8x" "%8x" "%8x" "%8x"
48 "%8x" "%8x" "%8x" /*maj,min:*/ "%*16c"
49 /*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/,
50 &inode, &mode, &uid, &gid,
51 &nlink, &mtime, &size,
52 &major, &minor, &namesize) != 10)
53 bb_error_msg_and_die("damaged cpio file");
54 file_header->mode = mode;
55 file_header->uid = uid;
56 file_header->gid = gid;
57 file_header->mtime = mtime;
58 file_header->size = size;
59
60 namesize &= 0x1fff; /* paranoia: limit names to 8k chars */
61 file_header->name = xzalloc(namesize + 1);
62 /* Read in filename */
63 xread(archive_handle->src_fd, file_header->name, namesize);
64 if (file_header->name[0] == '/') {
65 /* Testcase: echo /etc/hosts | cpio -pvd /tmp
66 * Without this code, it tries to unpack /etc/hosts
67 * into "/etc/hosts", not "etc/hosts".
68 */
69 char *p = file_header->name;
70 do p++; while (*p == '/');
71 overlapping_strcpy(file_header->name, p);
72 }
73 archive_handle->offset += namesize;
74
75 /* Update offset amount and skip padding before file contents */
76 data_align(archive_handle, 4);
77
78 if (strcmp(file_header->name, "TRAILER!!!") == 0) {
79 /* Always round up. ">> 9" divides by 512 */
80 archive_handle->cpio__blocks = (uoff_t)(archive_handle->offset + 511) >> 9;
81 goto create_hardlinks;
82 }
83
84 file_header->link_target = NULL;
85 if (S_ISLNK(file_header->mode)) {
86 file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */
87 file_header->link_target = xzalloc(file_header->size + 1);
88 xread(archive_handle->src_fd, file_header->link_target, file_header->size);
89 archive_handle->offset += file_header->size;
90 file_header->size = 0; /* Stop possible seeks in future */
91 }
92
93// TODO: data_extract_all can't deal with hardlinks to non-files...
94// when fixed, change S_ISREG to !S_ISDIR here
95
96 if (nlink > 1 && S_ISREG(file_header->mode)) {
97 hardlinks_t *new = xmalloc(sizeof(*new) + namesize);
98 new->inode = inode;
99 new->mode = mode ;
100 new->mtime = mtime;
101 new->uid = uid ;
102 new->gid = gid ;
103 strcpy(new->name, file_header->name);
104 /* Put file on a linked list for later */
105 if (size == 0) {
106 new->next = archive_handle->cpio__hardlinks_to_create;
107 archive_handle->cpio__hardlinks_to_create = new;
108 return EXIT_SUCCESS; /* Skip this one */
109 /* TODO: this breaks cpio -t (it does not show hardlinks) */
110 }
111 new->next = archive_handle->cpio__created_hardlinks;
112 archive_handle->cpio__created_hardlinks = new;
113 }
114 file_header->device = makedev(major, minor);
115
116 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
117 archive_handle->action_data(archive_handle);
118//TODO: run "echo /etc/hosts | cpio -pv /tmp" twice. On 2nd run:
119//cpio: etc/hosts not created: newer or same age file exists
120//etc/hosts <-- should NOT show it
121//2 blocks <-- should say "0 blocks"
122 archive_handle->action_header(file_header);
123 } else {
124 data_skip(archive_handle);
125 }
126
127 archive_handle->offset += file_header->size;
128
129 free(file_header->link_target);
130 free(file_header->name);
131 file_header->link_target = NULL;
132 file_header->name = NULL;
133
134 return EXIT_SUCCESS;
135
136 create_hardlinks:
137 free(file_header->link_target);
138 free(file_header->name);
139
140 while (archive_handle->cpio__hardlinks_to_create) {
141 hardlinks_t *cur;
142 hardlinks_t *make_me = archive_handle->cpio__hardlinks_to_create;
143
144 archive_handle->cpio__hardlinks_to_create = make_me->next;
145
146 memset(file_header, 0, sizeof(*file_header));
147 file_header->mtime = make_me->mtime;
148 file_header->name = make_me->name;
149 file_header->mode = make_me->mode;
150 file_header->uid = make_me->uid;
151 file_header->gid = make_me->gid;
152 /*file_header->size = 0;*/
153 /*file_header->link_target = NULL;*/
154
155 /* Try to find a file we are hardlinked to */
156 cur = archive_handle->cpio__created_hardlinks;
157 while (cur) {
158 /* TODO: must match maj/min too! */
159 if (cur->inode == make_me->inode) {
160 file_header->link_target = cur->name;
161 /* link_target != NULL, size = 0: "I am a hardlink" */
162 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS)
163 archive_handle->action_data(archive_handle);
164 free(make_me);
165 goto next_link;
166 }
167 cur = cur->next;
168 }
169 /* Oops... no file with such inode was created... do it now
170 * (happens when hardlinked files are empty (zero length)) */
171 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS)
172 archive_handle->action_data(archive_handle);
173 /* Move to the list of created hardlinked files */
174 make_me->next = archive_handle->cpio__created_hardlinks;
175 archive_handle->cpio__created_hardlinks = make_me;
176 next_link: ;
177 }
178
179 while (archive_handle->cpio__created_hardlinks) {
180 hardlinks_t *p = archive_handle->cpio__created_hardlinks;
181 archive_handle->cpio__created_hardlinks = p->next;
182 free(p);
183 }
184
185 return EXIT_FAILURE; /* "No more files to process" */
186}
diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c
new file mode 100644
index 000000000..78b0ae25f
--- /dev/null
+++ b/archival/libarchive/get_header_tar.c
@@ -0,0 +1,461 @@
1/* vi: set sw=4 ts=4: */
2/* Licensed under GPLv2 or later, see file LICENSE in this source tree.
3 *
4 * FIXME:
5 * In privileged mode if uname and gname map to a uid and gid then use the
6 * mapped value instead of the uid/gid values in tar header
7 *
8 * References:
9 * GNU tar and star man pages,
10 * Opengroup's ustar interchange format,
11 * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
12 */
13
14#include "libbb.h"
15#include "archive.h"
16
17typedef uint32_t aliased_uint32_t FIX_ALIASING;
18typedef off_t aliased_off_t FIX_ALIASING;
19
20
21/* NB: _DESTROYS_ str[len] character! */
22static unsigned long long getOctal(char *str, int len)
23{
24 unsigned long long v;
25 char *end;
26 /* NB: leading spaces are allowed. Using strtoull to handle that.
27 * The downside is that we accept e.g. "-123" too :(
28 */
29 str[len] = '\0';
30 v = strtoull(str, &end, 8);
31 /* std: "Each numeric field is terminated by one or more
32 * <space> or NUL characters". We must support ' '! */
33 if (*end != '\0' && *end != ' ') {
34 int8_t first = str[0];
35 if (!(first & 0x80))
36 bb_error_msg_and_die("corrupted octal value in tar header");
37 /*
38 * GNU tar uses "base-256 encoding" for very large numbers.
39 * Encoding is binary, with highest bit always set as a marker
40 * and sign in next-highest bit:
41 * 80 00 .. 00 - zero
42 * bf ff .. ff - largest positive number
43 * ff ff .. ff - minus 1
44 * c0 00 .. 00 - smallest negative number
45 *
46 * Example of tar file with 8914993153 (0x213600001) byte file.
47 * Field starts at offset 7c:
48 * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....|
49 * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336|
50 *
51 * NB: tarballs with NEGATIVE unix times encoded that way were seen!
52 */
53 v = first;
54 /* Sign-extend using 6th bit: */
55 v <<= sizeof(unsigned long long)*8 - 7;
56 v = (long long)v >> (sizeof(unsigned long long)*8 - 7);
57 while (--len != 0)
58 v = (v << 8) + (unsigned char) *str++;
59 }
60 return v;
61}
62#define GET_OCTAL(a) getOctal((a), sizeof(a))
63
64#if ENABLE_FEATURE_TAR_SELINUX
65/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword.
66 * This is what Red Hat's patched version of tar uses.
67 */
68# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux"
69static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz)
70{
71 char *buf, *p;
72 char *result;
73
74 p = buf = xmalloc(sz + 1);
75 /* prevent bb_strtou from running off the buffer */
76 buf[sz] = '\0';
77 xread(archive_handle->src_fd, buf, sz);
78 archive_handle->offset += sz;
79
80 result = NULL;
81 while (sz != 0) {
82 char *end, *value;
83 unsigned len;
84
85 /* Every record has this format: "LEN NAME=VALUE\n" */
86 len = bb_strtou(p, &end, 10);
87 /* expect errno to be EINVAL, because the character
88 * following the digits should be a space
89 */
90 p += len;
91 sz -= len;
92 if ((int)sz < 0
93 || len == 0
94 || errno != EINVAL
95 || *end != ' '
96 ) {
97 bb_error_msg("malformed extended header, skipped");
98 // More verbose version:
99 //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped",
100 // archive_handle->offset - (sz + len));
101 break;
102 }
103 /* overwrite the terminating newline with NUL
104 * (we do not bother to check that it *was* a newline)
105 */
106 p[-1] = '\0';
107 /* Is it selinux security context? */
108 value = end + 1;
109 if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) {
110 value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1;
111 result = xstrdup(value);
112 break;
113 }
114 }
115
116 free(buf);
117 return result;
118}
119#endif
120
121char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
122{
123 file_header_t *file_header = archive_handle->file_header;
124 struct tar_header_t tar;
125 char *cp;
126 int i, sum_u, sum;
127#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
128 int sum_s;
129#endif
130 int parse_names;
131
132 /* Our "private data" */
133#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
134# define p_longname (archive_handle->tar__longname)
135# define p_linkname (archive_handle->tar__linkname)
136#else
137# define p_longname 0
138# define p_linkname 0
139#endif
140
141#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX
142 again:
143#endif
144 /* Align header */
145 data_align(archive_handle, 512);
146
147 again_after_align:
148
149#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT
150 /* to prevent misdetection of bz2 sig */
151 *(aliased_uint32_t*)&tar = 0;
152 i = full_read(archive_handle->src_fd, &tar, 512);
153 /* If GNU tar sees EOF in above read, it says:
154 * "tar: A lone zero block at N", where N = kilobyte
155 * where EOF was met (not EOF block, actual EOF!),
156 * and exits with EXIT_SUCCESS.
157 * We will mimic exit(EXIT_SUCCESS), although we will not mimic
158 * the message and we don't check whether we indeed
159 * saw zero block directly before this. */
160 if (i == 0) {
161 xfunc_error_retval = 0;
162 short_read:
163 bb_error_msg_and_die("short read");
164 }
165 if (i != 512) {
166 IF_FEATURE_TAR_AUTODETECT(goto autodetect;)
167 goto short_read;
168 }
169
170#else
171 i = 512;
172 xread(archive_handle->src_fd, &tar, i);
173#endif
174 archive_handle->offset += i;
175
176 /* If there is no filename its an empty header */
177 if (tar.name[0] == 0 && tar.prefix[0] == 0) {
178 if (archive_handle->tar__end) {
179 /* Second consecutive empty header - end of archive.
180 * Read until the end to empty the pipe from gz or bz2
181 */
182 while (full_read(archive_handle->src_fd, &tar, 512) == 512)
183 continue;
184 return EXIT_FAILURE;
185 }
186 archive_handle->tar__end = 1;
187 return EXIT_SUCCESS;
188 }
189 archive_handle->tar__end = 0;
190
191 /* Check header has valid magic, "ustar" is for the proper tar,
192 * five NULs are for the old tar format */
193 if (strncmp(tar.magic, "ustar", 5) != 0
194 && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
195 || memcmp(tar.magic, "\0\0\0\0", 5) != 0)
196 ) {
197#if ENABLE_FEATURE_TAR_AUTODETECT
198 char FAST_FUNC (*get_header_ptr)(archive_handle_t *);
199 uint16_t magic2;
200
201 autodetect:
202 magic2 = *(uint16_t*)tar.name;
203 /* tar gz/bz autodetect: check for gz/bz2 magic.
204 * If we see the magic, and it is the very first block,
205 * we can switch to get_header_tar_gz/bz2/lzma().
206 * Needs seekable fd. I wish recv(MSG_PEEK) works
207 * on any fd... */
208# if ENABLE_FEATURE_SEAMLESS_GZ
209 if (magic2 == GZIP_MAGIC) {
210 get_header_ptr = get_header_tar_gz;
211 } else
212# endif
213# if ENABLE_FEATURE_SEAMLESS_BZ2
214 if (magic2 == BZIP2_MAGIC
215 && tar.name[2] == 'h' && isdigit(tar.name[3])
216 ) { /* bzip2 */
217 get_header_ptr = get_header_tar_bz2;
218 } else
219# endif
220# if ENABLE_FEATURE_SEAMLESS_XZ
221 //TODO: if (magic2 == XZ_MAGIC1)...
222 //else
223# endif
224 goto err;
225 /* Two different causes for lseek() != 0:
226 * unseekable fd (would like to support that too, but...),
227 * or not first block (false positive, it's not .gz/.bz2!) */
228 if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0)
229 goto err;
230 while (get_header_ptr(archive_handle) == EXIT_SUCCESS)
231 continue;
232 return EXIT_FAILURE;
233 err:
234#endif /* FEATURE_TAR_AUTODETECT */
235 bb_error_msg_and_die("invalid tar magic");
236 }
237
238 /* Do checksum on headers.
239 * POSIX says that checksum is done on unsigned bytes, but
240 * Sun and HP-UX gets it wrong... more details in
241 * GNU tar source. */
242#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
243 sum_s = ' ' * sizeof(tar.chksum);
244#endif
245 sum_u = ' ' * sizeof(tar.chksum);
246 for (i = 0; i < 148; i++) {
247 sum_u += ((unsigned char*)&tar)[i];
248#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
249 sum_s += ((signed char*)&tar)[i];
250#endif
251 }
252 for (i = 156; i < 512; i++) {
253 sum_u += ((unsigned char*)&tar)[i];
254#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
255 sum_s += ((signed char*)&tar)[i];
256#endif
257 }
258 /* This field does not need special treatment (getOctal) */
259 {
260 char *endp; /* gcc likes temp var for &endp */
261 sum = strtoul(tar.chksum, &endp, 8);
262 if ((*endp != '\0' && *endp != ' ')
263 || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum))
264 ) {
265 bb_error_msg_and_die("invalid tar header checksum");
266 }
267 }
268 /* don't use xstrtoul, tar.chksum may have leading spaces */
269 sum = strtoul(tar.chksum, NULL, 8);
270 if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) {
271 bb_error_msg_and_die("invalid tar header checksum");
272 }
273
274 /* 0 is reserved for high perf file, treat as normal file */
275 if (!tar.typeflag) tar.typeflag = '0';
276 parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7');
277
278 /* getOctal trashes subsequent field, therefore we call it
279 * on fields in reverse order */
280 if (tar.devmajor[0]) {
281 char t = tar.prefix[0];
282 /* we trash prefix[0] here, but we DO need it later! */
283 unsigned minor = GET_OCTAL(tar.devminor);
284 unsigned major = GET_OCTAL(tar.devmajor);
285 file_header->device = makedev(major, minor);
286 tar.prefix[0] = t;
287 }
288 file_header->link_target = NULL;
289 if (!p_linkname && parse_names && tar.linkname[0]) {
290 file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname));
291 /* FIXME: what if we have non-link object with link_target? */
292 /* Will link_target be free()ed? */
293 }
294#if ENABLE_FEATURE_TAR_UNAME_GNAME
295 file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL;
296 file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL;
297#endif
298 file_header->mtime = GET_OCTAL(tar.mtime);
299 file_header->size = GET_OCTAL(tar.size);
300 file_header->gid = GET_OCTAL(tar.gid);
301 file_header->uid = GET_OCTAL(tar.uid);
302 /* Set bits 0-11 of the files mode */
303 file_header->mode = 07777 & GET_OCTAL(tar.mode);
304
305 file_header->name = NULL;
306 if (!p_longname && parse_names) {
307 /* we trash mode[0] here, it's ok */
308 //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain
309 tar.mode[0] = '\0';
310 if (tar.prefix[0]) {
311 /* and padding[0] */
312 //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain
313 tar.padding[0] = '\0';
314 file_header->name = concat_path_file(tar.prefix, tar.name);
315 } else
316 file_header->name = xstrdup(tar.name);
317 }
318
319 /* Set bits 12-15 of the files mode */
320 /* (typeflag was not trashed because chksum does not use getOctal) */
321 switch (tar.typeflag) {
322 /* busybox identifies hard links as being regular files with 0 size and a link name */
323 case '1':
324 file_header->mode |= S_IFREG;
325 break;
326 case '7':
327 /* case 0: */
328 case '0':
329#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
330 if (last_char_is(file_header->name, '/')) {
331 goto set_dir;
332 }
333#endif
334 file_header->mode |= S_IFREG;
335 break;
336 case '2':
337 file_header->mode |= S_IFLNK;
338 /* have seen tarballs with size field containing
339 * the size of the link target's name */
340 size0:
341 file_header->size = 0;
342 break;
343 case '3':
344 file_header->mode |= S_IFCHR;
345 goto size0; /* paranoia */
346 case '4':
347 file_header->mode |= S_IFBLK;
348 goto size0;
349 case '5':
350 IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:)
351 file_header->mode |= S_IFDIR;
352 goto size0;
353 case '6':
354 file_header->mode |= S_IFIFO;
355 goto size0;
356#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
357 case 'L':
358 /* free: paranoia: tar with several consecutive longnames */
359 free(p_longname);
360 /* For paranoia reasons we allocate extra NUL char */
361 p_longname = xzalloc(file_header->size + 1);
362 /* We read ASCIZ string, including NUL */
363 xread(archive_handle->src_fd, p_longname, file_header->size);
364 archive_handle->offset += file_header->size;
365 /* return get_header_tar(archive_handle); */
366 /* gcc 4.1.1 didn't optimize it into jump */
367 /* so we will do it ourself, this also saves stack */
368 goto again;
369 case 'K':
370 free(p_linkname);
371 p_linkname = xzalloc(file_header->size + 1);
372 xread(archive_handle->src_fd, p_linkname, file_header->size);
373 archive_handle->offset += file_header->size;
374 /* return get_header_tar(archive_handle); */
375 goto again;
376 case 'D': /* GNU dump dir */
377 case 'M': /* Continuation of multi volume archive */
378 case 'N': /* Old GNU for names > 100 characters */
379 case 'S': /* Sparse file */
380 case 'V': /* Volume header */
381#endif
382#if !ENABLE_FEATURE_TAR_SELINUX
383 case 'g': /* pax global header */
384 case 'x': /* pax extended header */
385#else
386 skip_ext_hdr:
387#endif
388 {
389 off_t sz;
390 bb_error_msg("warning: skipping header '%c'", tar.typeflag);
391 sz = (file_header->size + 511) & ~(off_t)511;
392 archive_handle->offset += sz;
393 sz >>= 9; /* sz /= 512 but w/o contortions for signed div */
394 while (sz--)
395 xread(archive_handle->src_fd, &tar, 512);
396 /* return get_header_tar(archive_handle); */
397 goto again_after_align;
398 }
399#if ENABLE_FEATURE_TAR_SELINUX
400 case 'g': /* pax global header */
401 case 'x': { /* pax extended header */
402 char **pp;
403 if ((uoff_t)file_header->size > 0xfffff) /* paranoia */
404 goto skip_ext_hdr;
405 pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx;
406 free(*pp);
407 *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size);
408 goto again;
409 }
410#endif
411 default:
412 bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag);
413 }
414
415#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
416 if (p_longname) {
417 file_header->name = p_longname;
418 p_longname = NULL;
419 }
420 if (p_linkname) {
421 file_header->link_target = p_linkname;
422 p_linkname = NULL;
423 }
424#endif
425 if (strncmp(file_header->name, "/../"+1, 3) == 0
426 || strstr(file_header->name, "/../")
427 ) {
428 bb_error_msg_and_die("name with '..' encountered: '%s'",
429 file_header->name);
430 }
431
432 /* Strip trailing '/' in directories */
433 /* Must be done after mode is set as '/' is used to check if it's a directory */
434 cp = last_char_is(file_header->name, '/');
435
436 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
437 archive_handle->action_header(/*archive_handle->*/ file_header);
438 /* Note that we kill the '/' only after action_header() */
439 /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */
440 if (cp)
441 *cp = '\0';
442 archive_handle->action_data(archive_handle);
443 if (archive_handle->accept || archive_handle->reject)
444 llist_add_to(&archive_handle->passed, file_header->name);
445 else /* Caller isn't interested in list of unpacked files */
446 free(file_header->name);
447 } else {
448 data_skip(archive_handle);
449 free(file_header->name);
450 }
451 archive_handle->offset += file_header->size;
452
453 free(file_header->link_target);
454 /* Do not free(file_header->name)!
455 * It might be inserted in archive_handle->passed - see above */
456#if ENABLE_FEATURE_TAR_UNAME_GNAME
457 free(file_header->tar__uname);
458 free(file_header->tar__gname);
459#endif
460 return EXIT_SUCCESS;
461}
diff --git a/archival/libarchive/get_header_tar_bz2.c b/archival/libarchive/get_header_tar_bz2.c
new file mode 100644
index 000000000..60d32069f
--- /dev/null
+++ b/archival/libarchive/get_header_tar_bz2.c
@@ -0,0 +1,21 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle)
10{
11 /* Can't lseek over pipes */
12 archive_handle->seek = seek_by_read;
13
14 open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2");
15 archive_handle->offset = 0;
16 while (get_header_tar(archive_handle) == EXIT_SUCCESS)
17 continue;
18
19 /* Can only do one file at a time */
20 return EXIT_FAILURE;
21}
diff --git a/archival/libarchive/get_header_tar_gz.c b/archival/libarchive/get_header_tar_gz.c
new file mode 100644
index 000000000..b09f8691c
--- /dev/null
+++ b/archival/libarchive/get_header_tar_gz.c
@@ -0,0 +1,36 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle)
10{
11#if BB_MMU
12 unsigned char magic[2];
13#endif
14
15 /* Can't lseek over pipes */
16 archive_handle->seek = seek_by_read;
17
18 /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case).
19 * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will
20 * need the header. */
21#if BB_MMU
22 xread(archive_handle->src_fd, &magic, 2);
23 /* Can skip this check, but error message will be less clear */
24 if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) {
25 bb_error_msg_and_die("invalid gzip magic");
26 }
27#endif
28
29 open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip");
30 archive_handle->offset = 0;
31 while (get_header_tar(archive_handle) == EXIT_SUCCESS)
32 continue;
33
34 /* Can only do one file at a time */
35 return EXIT_FAILURE;
36}
diff --git a/archival/libarchive/get_header_tar_lzma.c b/archival/libarchive/get_header_tar_lzma.c
new file mode 100644
index 000000000..da08e0c72
--- /dev/null
+++ b/archival/libarchive/get_header_tar_lzma.c
@@ -0,0 +1,24 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Small lzma deflate implementation.
4 * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
5 *
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 */
8
9#include "libbb.h"
10#include "archive.h"
11
12char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle)
13{
14 /* Can't lseek over pipes */
15 archive_handle->seek = seek_by_read;
16
17 open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma");
18 archive_handle->offset = 0;
19 while (get_header_tar(archive_handle) == EXIT_SUCCESS)
20 continue;
21
22 /* Can only do one file at a time */
23 return EXIT_FAILURE;
24}
diff --git a/archival/libarchive/header_list.c b/archival/libarchive/header_list.c
new file mode 100644
index 000000000..c4fc75f38
--- /dev/null
+++ b/archival/libarchive/header_list.c
@@ -0,0 +1,12 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5#include "libbb.h"
6#include "archive.h"
7
8void FAST_FUNC header_list(const file_header_t *file_header)
9{
10//TODO: cpio -vp DIR should output "DIR/NAME", not just "NAME" */
11 puts(file_header->name);
12}
diff --git a/archival/libarchive/header_skip.c b/archival/libarchive/header_skip.c
new file mode 100644
index 000000000..2bfc5253c
--- /dev/null
+++ b/archival/libarchive/header_skip.c
@@ -0,0 +1,10 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5#include "libbb.h"
6#include "archive.h"
7
8void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM)
9{
10}
diff --git a/archival/libarchive/header_verbose_list.c b/archival/libarchive/header_verbose_list.c
new file mode 100644
index 000000000..bc4e4154b
--- /dev/null
+++ b/archival/libarchive/header_verbose_list.c
@@ -0,0 +1,69 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC header_verbose_list(const file_header_t *file_header)
10{
11 struct tm tm_time;
12 struct tm *ptm = &tm_time; //localtime(&file_header->mtime);
13
14#if ENABLE_FEATURE_TAR_UNAME_GNAME
15 char uid[sizeof(int)*3 + 2];
16 /*char gid[sizeof(int)*3 + 2];*/
17 char *user;
18 char *group;
19
20 localtime_r(&file_header->mtime, ptm);
21
22 user = file_header->tar__uname;
23 if (user == NULL) {
24 sprintf(uid, "%u", (unsigned)file_header->uid);
25 user = uid;
26 }
27 group = file_header->tar__gname;
28 if (group == NULL) {
29 /*sprintf(gid, "%u", (unsigned)file_header->gid);*/
30 group = utoa(file_header->gid);
31 }
32 printf("%s %s/%s %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s",
33 bb_mode_string(file_header->mode),
34 user,
35 group,
36 file_header->size,
37 1900 + ptm->tm_year,
38 1 + ptm->tm_mon,
39 ptm->tm_mday,
40 ptm->tm_hour,
41 ptm->tm_min,
42 ptm->tm_sec,
43 file_header->name);
44
45#else /* !FEATURE_TAR_UNAME_GNAME */
46
47 localtime_r(&file_header->mtime, ptm);
48
49 printf("%s %u/%u %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s",
50 bb_mode_string(file_header->mode),
51 (unsigned)file_header->uid,
52 (unsigned)file_header->gid,
53 file_header->size,
54 1900 + ptm->tm_year,
55 1 + ptm->tm_mon,
56 ptm->tm_mday,
57 ptm->tm_hour,
58 ptm->tm_min,
59 ptm->tm_sec,
60 file_header->name);
61
62#endif /* FEATURE_TAR_UNAME_GNAME */
63
64 /* NB: GNU tar shows "->" for symlinks and "link to" for hardlinks */
65 if (file_header->link_target) {
66 printf(" -> %s", file_header->link_target);
67 }
68 bb_putchar('\n');
69}
diff --git a/archival/libarchive/init_handle.c b/archival/libarchive/init_handle.c
new file mode 100644
index 000000000..6644ea13b
--- /dev/null
+++ b/archival/libarchive/init_handle.c
@@ -0,0 +1,22 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9archive_handle_t* FAST_FUNC init_handle(void)
10{
11 archive_handle_t *archive_handle;
12
13 /* Initialize default values */
14 archive_handle = xzalloc(sizeof(archive_handle_t));
15 archive_handle->file_header = xzalloc(sizeof(file_header_t));
16 archive_handle->action_header = header_skip;
17 archive_handle->action_data = data_skip;
18 archive_handle->filter = filter_accept_all;
19 archive_handle->seek = seek_by_jump;
20
21 return archive_handle;
22}
diff --git a/archival/libarchive/liblzo.h b/archival/libarchive/liblzo.h
new file mode 100644
index 000000000..843997cb9
--- /dev/null
+++ b/archival/libarchive/liblzo.h
@@ -0,0 +1,93 @@
1/*
2 This file is part of the LZO real-time data compression library.
3
4 Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
5 All Rights Reserved.
6
7 Markus F.X.J. Oberhumer <markus@oberhumer.com>
8 http://www.oberhumer.com/opensource/lzo/
9
10 The LZO library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License as
12 published by the Free Software Foundation; either version 2 of
13 the License, or (at your option) any later version.
14
15 The LZO library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with the LZO library; see the file COPYING.
22 If not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25
26#include "liblzo_interface.h"
27
28/* lzo-2.03/src/config1x.h */
29#define M2_MIN_LEN 3
30#define M2_MAX_LEN 8
31#define M3_MAX_LEN 33
32#define M4_MAX_LEN 9
33#define M1_MAX_OFFSET 0x0400
34#define M2_MAX_OFFSET 0x0800
35#define M3_MAX_OFFSET 0x4000
36#define M4_MAX_OFFSET 0xbfff
37#define M1_MARKER 0
38#define M3_MARKER 32
39#define M4_MARKER 16
40
41#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET)
42#define MIN_LOOKAHEAD (M2_MAX_LEN + 1)
43
44#define LZO_EOF_CODE
45
46/* lzo-2.03/src/lzo_dict.h */
47#define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex]
48#define DX2(p,s1,s2) \
49 (((((unsigned)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
50//#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
51//#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
52#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
53
54#define D_SIZE (1U << D_BITS)
55#define D_MASK ((1U << D_BITS) - 1)
56#define D_HIGH ((D_MASK >> 1) + 1)
57
58#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
59 ( \
60 m_pos = ip - (unsigned)(ip - m_pos), \
61 ((uintptr_t)m_pos < (uintptr_t)in \
62 || (m_off = (unsigned)(ip - m_pos)) <= 0 \
63 || m_off > max_offset) \
64 )
65
66#define DENTRY(p,in) (p)
67#define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in)
68
69#define DMS(v,s) ((unsigned) (((v) & (D_MASK >> (s))) << (s)))
70#define DM(v) ((unsigned) ((v) & D_MASK))
71#define DMUL(a,b) ((unsigned) ((a) * (b)))
72
73/* lzo-2.03/src/lzo_ptr.h */
74#define pd(a,b) ((unsigned)((a)-(b)))
75
76# define TEST_IP (ip < ip_end)
77# define NEED_IP(x) \
78 if ((unsigned)(ip_end - ip) < (unsigned)(x)) goto input_overrun
79
80# undef TEST_OP /* don't need both of the tests here */
81# define TEST_OP 1
82# define NEED_OP(x) \
83 if ((unsigned)(op_end - op) < (unsigned)(x)) goto output_overrun
84
85#define HAVE_ANY_OP 1
86
87//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND)
88# define TEST_LB(m_pos) if (m_pos < out || m_pos >= op) goto lookbehind_overrun
89//# define TEST_LBO(m_pos,o) if (m_pos < out || m_pos >= op - (o)) goto lookbehind_overrun
90//#else
91//# define TEST_LB(m_pos) ((void) 0)
92//# define TEST_LBO(m_pos,o) ((void) 0)
93//#endif
diff --git a/archival/libarchive/lzo1x_1.c b/archival/libarchive/lzo1x_1.c
new file mode 100644
index 000000000..a88839846
--- /dev/null
+++ b/archival/libarchive/lzo1x_1.c
@@ -0,0 +1,35 @@
1/* LZO1X-1 compression
2
3 This file is part of the LZO real-time data compression library.
4
5 Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
6 All Rights Reserved.
7
8 Markus F.X.J. Oberhumer <markus@oberhumer.com>
9 http://www.oberhumer.com/opensource/lzo/
10
11 The LZO library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of
14 the License, or (at your option) any later version.
15
16 The LZO library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with the LZO library; see the file COPYING.
23 If not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26#include "libbb.h"
27#include "liblzo.h"
28
29#define D_BITS 14
30#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5)
31#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
32
33#define DO_COMPRESS lzo1x_1_compress
34
35#include "lzo1x_c.c"
diff --git a/archival/libarchive/lzo1x_1o.c b/archival/libarchive/lzo1x_1o.c
new file mode 100644
index 000000000..3c61253e0
--- /dev/null
+++ b/archival/libarchive/lzo1x_1o.c
@@ -0,0 +1,35 @@
1/* LZO1X-1(15) compression
2
3 This file is part of the LZO real-time data compression library.
4
5 Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
6 All Rights Reserved.
7
8 Markus F.X.J. Oberhumer <markus@oberhumer.com>
9 http://www.oberhumer.com/opensource/lzo/
10
11 The LZO library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of
14 the License, or (at your option) any later version.
15
16 The LZO library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with the LZO library; see the file COPYING.
23 If not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26#include "libbb.h"
27#include "liblzo.h"
28
29#define D_BITS 15
30#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5)
31#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
32
33#define DO_COMPRESS lzo1x_1_15_compress
34
35#include "lzo1x_c.c"
diff --git a/archival/libarchive/lzo1x_9x.c b/archival/libarchive/lzo1x_9x.c
new file mode 100644
index 000000000..483205155
--- /dev/null
+++ b/archival/libarchive/lzo1x_9x.c
@@ -0,0 +1,921 @@
1/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm
2
3 This file is part of the LZO real-time data compression library.
4
5 Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer
6 Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer
7 Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer
8 Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer
9 Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer
10 Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer
11 Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
12 Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
13 Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
14 Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
15 Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
16 Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
17 Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
18 All Rights Reserved.
19
20 The LZO library is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License as
22 published by the Free Software Foundation; either version 2 of
23 the License, or (at your option) any later version.
24
25 The LZO library is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 GNU General Public License for more details.
29
30 You should have received a copy of the GNU General Public License
31 along with the LZO library; see the file COPYING.
32 If not, write to the Free Software Foundation, Inc.,
33 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
34
35 Markus F.X.J. Oberhumer
36 <markus@oberhumer.com>
37 http://www.oberhumer.com/opensource/lzo/
38*/
39#include "libbb.h"
40
41/* The following is probably only safe on Intel-compatible processors ... */
42#define LZO_UNALIGNED_OK_2
43#define LZO_UNALIGNED_OK_4
44
45#include "liblzo.h"
46
47#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b))
48#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b))
49#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
50
51/***********************************************************************
52//
53************************************************************************/
54#define SWD_N M4_MAX_OFFSET /* size of ring buffer */
55#define SWD_F 2048 /* upper limit for match length */
56
57#define SWD_BEST_OFF (LZO_MAX3(M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN) + 1)
58
59typedef struct {
60 int init;
61
62 unsigned look; /* bytes in lookahead buffer */
63
64 unsigned m_len;
65 unsigned m_off;
66
67 const uint8_t *bp;
68 const uint8_t *ip;
69 const uint8_t *in;
70 const uint8_t *in_end;
71 uint8_t *out;
72
73 unsigned r1_lit;
74
75} lzo1x_999_t;
76
77#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1))
78
79/* lzo_swd.c -- sliding window dictionary */
80
81/***********************************************************************
82//
83************************************************************************/
84#define SWD_UINT_MAX USHRT_MAX
85
86#ifndef SWD_HSIZE
87# define SWD_HSIZE 16384
88#endif
89#ifndef SWD_MAX_CHAIN
90# define SWD_MAX_CHAIN 2048
91#endif
92
93#define HEAD3(b, p) \
94 ( ((0x9f5f * ((((b[p]<<5)^b[p+1])<<5) ^ b[p+2])) >> 5) & (SWD_HSIZE-1) )
95
96#if defined(LZO_UNALIGNED_OK_2)
97# define HEAD2(b,p) (* (uint16_t *) &(b[p]))
98#else
99# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8))
100#endif
101#define NIL2 SWD_UINT_MAX
102
103typedef struct lzo_swd {
104 /* public - "built-in" */
105
106 /* public - configuration */
107 unsigned max_chain;
108 int use_best_off;
109
110 /* public - output */
111 unsigned m_len;
112 unsigned m_off;
113 unsigned look;
114 int b_char;
115#if defined(SWD_BEST_OFF)
116 unsigned best_off[SWD_BEST_OFF];
117#endif
118
119 /* semi public */
120 lzo1x_999_t *c;
121 unsigned m_pos;
122#if defined(SWD_BEST_OFF)
123 unsigned best_pos[SWD_BEST_OFF];
124#endif
125
126 /* private */
127 unsigned ip; /* input pointer (lookahead) */
128 unsigned bp; /* buffer pointer */
129 unsigned rp; /* remove pointer */
130
131 unsigned node_count;
132 unsigned first_rp;
133
134 uint8_t b[SWD_N + SWD_F];
135 uint8_t b_wrap[SWD_F]; /* must follow b */
136 uint16_t head3[SWD_HSIZE];
137 uint16_t succ3[SWD_N + SWD_F];
138 uint16_t best3[SWD_N + SWD_F];
139 uint16_t llen3[SWD_HSIZE];
140#ifdef HEAD2
141 uint16_t head2[65536L];
142#endif
143} lzo_swd_t, *lzo_swd_p;
144
145#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t))
146
147
148/* Access macro for head3.
149 * head3[key] may be uninitialized, but then its value will never be used.
150 */
151#define s_get_head3(s,key) s->head3[key]
152
153
154/***********************************************************************
155//
156************************************************************************/
157#define B_SIZE (SWD_N + SWD_F)
158
159static int swd_init(lzo_swd_p s)
160{
161 /* defaults */
162 s->node_count = SWD_N;
163
164 memset(s->llen3, 0, sizeof(s->llen3[0]) * (unsigned)SWD_HSIZE);
165#ifdef HEAD2
166 memset(s->head2, 0xff, sizeof(s->head2[0]) * 65536L);
167 assert(s->head2[0] == NIL2);
168#endif
169
170 s->ip = 0;
171 s->bp = s->ip;
172 s->first_rp = s->ip;
173
174 assert(s->ip + SWD_F <= B_SIZE);
175 s->look = (unsigned) (s->c->in_end - s->c->ip);
176 if (s->look > 0) {
177 if (s->look > SWD_F)
178 s->look = SWD_F;
179 memcpy(&s->b[s->ip], s->c->ip, s->look);
180 s->c->ip += s->look;
181 s->ip += s->look;
182 }
183 if (s->ip == B_SIZE)
184 s->ip = 0;
185
186 s->rp = s->first_rp;
187 if (s->rp >= s->node_count)
188 s->rp -= s->node_count;
189 else
190 s->rp += B_SIZE - s->node_count;
191
192 return LZO_E_OK;
193}
194
195#define swd_pos2off(s,pos) \
196 (s->bp > (pos) ? s->bp - (pos) : B_SIZE - ((pos) - s->bp))
197
198
199/***********************************************************************
200//
201************************************************************************/
202static void swd_getbyte(lzo_swd_p s)
203{
204 int c;
205
206 if ((c = getbyte(*(s->c))) < 0) {
207 if (s->look > 0)
208 --s->look;
209 } else {
210 s->b[s->ip] = c;
211 if (s->ip < SWD_F)
212 s->b_wrap[s->ip] = c;
213 }
214 if (++s->ip == B_SIZE)
215 s->ip = 0;
216 if (++s->bp == B_SIZE)
217 s->bp = 0;
218 if (++s->rp == B_SIZE)
219 s->rp = 0;
220}
221
222
223/***********************************************************************
224// remove node from lists
225************************************************************************/
226static void swd_remove_node(lzo_swd_p s, unsigned node)
227{
228 if (s->node_count == 0) {
229 unsigned key;
230
231 key = HEAD3(s->b,node);
232 assert(s->llen3[key] > 0);
233 --s->llen3[key];
234
235#ifdef HEAD2
236 key = HEAD2(s->b,node);
237 assert(s->head2[key] != NIL2);
238 if ((unsigned) s->head2[key] == node)
239 s->head2[key] = NIL2;
240#endif
241 } else
242 --s->node_count;
243}
244
245
246/***********************************************************************
247//
248************************************************************************/
249static void swd_accept(lzo_swd_p s, unsigned n)
250{
251 assert(n <= s->look);
252
253 while (n--) {
254 unsigned key;
255
256 swd_remove_node(s,s->rp);
257
258 /* add bp into HEAD3 */
259 key = HEAD3(s->b, s->bp);
260 s->succ3[s->bp] = s_get_head3(s, key);
261 s->head3[key] = s->bp;
262 s->best3[s->bp] = SWD_F + 1;
263 s->llen3[key]++;
264 assert(s->llen3[key] <= SWD_N);
265
266#ifdef HEAD2
267 /* add bp into HEAD2 */
268 key = HEAD2(s->b, s->bp);
269 s->head2[key] = s->bp;
270#endif
271
272 swd_getbyte(s);
273 }
274}
275
276
277/***********************************************************************
278//
279************************************************************************/
280static void swd_search(lzo_swd_p s, unsigned node, unsigned cnt)
281{
282 const uint8_t *p1;
283 const uint8_t *p2;
284 const uint8_t *px;
285 unsigned m_len = s->m_len;
286 const uint8_t *b = s->b;
287 const uint8_t *bp = s->b + s->bp;
288 const uint8_t *bx = s->b + s->bp + s->look;
289 unsigned char scan_end1;
290
291 assert(s->m_len > 0);
292
293 scan_end1 = bp[m_len - 1];
294 for ( ; cnt-- > 0; node = s->succ3[node]) {
295 p1 = bp;
296 p2 = b + node;
297 px = bx;
298
299 assert(m_len < s->look);
300
301 if (p2[m_len - 1] == scan_end1
302 && p2[m_len] == p1[m_len]
303 && p2[0] == p1[0]
304 && p2[1] == p1[1]
305 ) {
306 unsigned i;
307 assert(lzo_memcmp(bp, &b[node], 3) == 0);
308
309 p1 += 2; p2 += 2;
310 do {} while (++p1 < px && *p1 == *++p2);
311 i = p1-bp;
312
313 assert(lzo_memcmp(bp, &b[node], i) == 0);
314
315#if defined(SWD_BEST_OFF)
316 if (i < SWD_BEST_OFF) {
317 if (s->best_pos[i] == 0)
318 s->best_pos[i] = node + 1;
319 }
320#endif
321 if (i > m_len) {
322 s->m_len = m_len = i;
323 s->m_pos = node;
324 if (m_len == s->look)
325 return;
326 if (m_len >= SWD_F)
327 return;
328 if (m_len > (unsigned) s->best3[node])
329 return;
330 scan_end1 = bp[m_len - 1];
331 }
332 }
333 }
334}
335
336
337/***********************************************************************
338//
339************************************************************************/
340#ifdef HEAD2
341
342static int swd_search2(lzo_swd_p s)
343{
344 unsigned key;
345
346 assert(s->look >= 2);
347 assert(s->m_len > 0);
348
349 key = s->head2[HEAD2(s->b, s->bp)];
350 if (key == NIL2)
351 return 0;
352 assert(lzo_memcmp(&s->b[s->bp], &s->b[key], 2) == 0);
353#if defined(SWD_BEST_OFF)
354 if (s->best_pos[2] == 0)
355 s->best_pos[2] = key + 1;
356#endif
357
358 if (s->m_len < 2) {
359 s->m_len = 2;
360 s->m_pos = key;
361 }
362 return 1;
363}
364
365#endif
366
367
368/***********************************************************************
369//
370************************************************************************/
371static void swd_findbest(lzo_swd_p s)
372{
373 unsigned key;
374 unsigned cnt, node;
375 unsigned len;
376
377 assert(s->m_len > 0);
378
379 /* get current head, add bp into HEAD3 */
380 key = HEAD3(s->b,s->bp);
381 node = s->succ3[s->bp] = s_get_head3(s, key);
382 cnt = s->llen3[key]++;
383 assert(s->llen3[key] <= SWD_N + SWD_F);
384 if (cnt > s->max_chain)
385 cnt = s->max_chain;
386 s->head3[key] = s->bp;
387
388 s->b_char = s->b[s->bp];
389 len = s->m_len;
390 if (s->m_len >= s->look) {
391 if (s->look == 0)
392 s->b_char = -1;
393 s->m_off = 0;
394 s->best3[s->bp] = SWD_F + 1;
395 } else {
396#ifdef HEAD2
397 if (swd_search2(s))
398#endif
399 if (s->look >= 3)
400 swd_search(s, node, cnt);
401 if (s->m_len > len)
402 s->m_off = swd_pos2off(s,s->m_pos);
403 s->best3[s->bp] = s->m_len;
404
405#if defined(SWD_BEST_OFF)
406 if (s->use_best_off) {
407 int i;
408 for (i = 2; i < SWD_BEST_OFF; i++) {
409 if (s->best_pos[i] > 0)
410 s->best_off[i] = swd_pos2off(s, s->best_pos[i]-1);
411 else
412 s->best_off[i] = 0;
413 }
414 }
415#endif
416 }
417
418 swd_remove_node(s,s->rp);
419
420#ifdef HEAD2
421 /* add bp into HEAD2 */
422 key = HEAD2(s->b, s->bp);
423 s->head2[key] = s->bp;
424#endif
425}
426
427#undef HEAD3
428#undef HEAD2
429#undef s_get_head3
430
431
432/***********************************************************************
433//
434************************************************************************/
435static int init_match(lzo1x_999_t *c, lzo_swd_p s, uint32_t use_best_off)
436{
437 int r;
438
439 assert(!c->init);
440 c->init = 1;
441
442 s->c = c;
443
444 r = swd_init(s);
445 if (r != 0)
446 return r;
447
448 s->use_best_off = use_best_off;
449 return r;
450}
451
452
453/***********************************************************************
454//
455************************************************************************/
456static int find_match(lzo1x_999_t *c, lzo_swd_p s,
457 unsigned this_len, unsigned skip)
458{
459 assert(c->init);
460
461 if (skip > 0) {
462 assert(this_len >= skip);
463 swd_accept(s, this_len - skip);
464 } else {
465 assert(this_len <= 1);
466 }
467
468 s->m_len = 1;
469 s->m_len = 1;
470#ifdef SWD_BEST_OFF
471 if (s->use_best_off)
472 memset(s->best_pos, 0, sizeof(s->best_pos));
473#endif
474 swd_findbest(s);
475 c->m_len = s->m_len;
476 c->m_off = s->m_off;
477
478 swd_getbyte(s);
479
480 if (s->b_char < 0) {
481 c->look = 0;
482 c->m_len = 0;
483 } else {
484 c->look = s->look + 1;
485 }
486 c->bp = c->ip - c->look;
487
488 return LZO_E_OK;
489}
490
491/* this is a public functions, but there is no prototype in a header file */
492static int lzo1x_999_compress_internal(const uint8_t *in , unsigned in_len,
493 uint8_t *out, unsigned *out_len,
494 void *wrkmem,
495 unsigned good_length,
496 unsigned max_lazy,
497 unsigned max_chain,
498 uint32_t use_best_off);
499
500
501/***********************************************************************
502//
503************************************************************************/
504static uint8_t *code_match(lzo1x_999_t *c,
505 uint8_t *op, unsigned m_len, unsigned m_off)
506{
507 assert(op > c->out);
508 if (m_len == 2) {
509 assert(m_off <= M1_MAX_OFFSET);
510 assert(c->r1_lit > 0);
511 assert(c->r1_lit < 4);
512 m_off -= 1;
513 *op++ = M1_MARKER | ((m_off & 3) << 2);
514 *op++ = m_off >> 2;
515 } else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
516 assert(m_len >= 3);
517 m_off -= 1;
518 *op++ = ((m_len - 1) << 5) | ((m_off & 7) << 2);
519 *op++ = m_off >> 3;
520 assert(op[-2] >= M2_MARKER);
521 } else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) {
522 assert(m_len == 3);
523 assert(m_off > M2_MAX_OFFSET);
524 m_off -= 1 + M2_MAX_OFFSET;
525 *op++ = M1_MARKER | ((m_off & 3) << 2);
526 *op++ = m_off >> 2;
527 } else if (m_off <= M3_MAX_OFFSET) {
528 assert(m_len >= 3);
529 m_off -= 1;
530 if (m_len <= M3_MAX_LEN)
531 *op++ = M3_MARKER | (m_len - 2);
532 else {
533 m_len -= M3_MAX_LEN;
534 *op++ = M3_MARKER | 0;
535 while (m_len > 255) {
536 m_len -= 255;
537 *op++ = 0;
538 }
539 assert(m_len > 0);
540 *op++ = m_len;
541 }
542 *op++ = m_off << 2;
543 *op++ = m_off >> 6;
544 } else {
545 unsigned k;
546
547 assert(m_len >= 3);
548 assert(m_off > 0x4000);
549 assert(m_off <= 0xbfff);
550 m_off -= 0x4000;
551 k = (m_off & 0x4000) >> 11;
552 if (m_len <= M4_MAX_LEN)
553 *op++ = M4_MARKER | k | (m_len - 2);
554 else {
555 m_len -= M4_MAX_LEN;
556 *op++ = M4_MARKER | k | 0;
557 while (m_len > 255) {
558 m_len -= 255;
559 *op++ = 0;
560 }
561 assert(m_len > 0);
562 *op++ = m_len;
563 }
564 *op++ = m_off << 2;
565 *op++ = m_off >> 6;
566 }
567
568 return op;
569}
570
571
572static uint8_t *STORE_RUN(lzo1x_999_t *c, uint8_t *op,
573 const uint8_t *ii, unsigned t)
574{
575 if (op == c->out && t <= 238) {
576 *op++ = 17 + t;
577 } else if (t <= 3) {
578 op[-2] |= t;
579 } else if (t <= 18) {
580 *op++ = t - 3;
581 } else {
582 unsigned tt = t - 18;
583
584 *op++ = 0;
585 while (tt > 255) {
586 tt -= 255;
587 *op++ = 0;
588 }
589 assert(tt > 0);
590 *op++ = tt;
591 }
592 do *op++ = *ii++; while (--t > 0);
593
594 return op;
595}
596
597
598static uint8_t *code_run(lzo1x_999_t *c, uint8_t *op, const uint8_t *ii,
599 unsigned lit)
600{
601 if (lit > 0) {
602 assert(m_len >= 2);
603 op = STORE_RUN(c, op, ii, lit);
604 } else {
605 assert(m_len >= 3);
606 }
607 c->r1_lit = lit;
608
609 return op;
610}
611
612
613/***********************************************************************
614//
615************************************************************************/
616static int len_of_coded_match(unsigned m_len, unsigned m_off, unsigned lit)
617{
618 int n = 4;
619
620 if (m_len < 2)
621 return -1;
622 if (m_len == 2)
623 return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : -1;
624 if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET)
625 return 2;
626 if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4)
627 return 2;
628 if (m_off <= M3_MAX_OFFSET) {
629 if (m_len <= M3_MAX_LEN)
630 return 3;
631 m_len -= M3_MAX_LEN;
632 } else if (m_off <= M4_MAX_OFFSET) {
633 if (m_len <= M4_MAX_LEN)
634 return 3;
635 m_len -= M4_MAX_LEN;
636 } else
637 return -1;
638 while (m_len > 255) {
639 m_len -= 255;
640 n++;
641 }
642 return n;
643}
644
645
646static int min_gain(unsigned ahead, unsigned lit1,
647 unsigned lit2, int l1, int l2, int l3)
648{
649 int lazy_match_min_gain = 0;
650
651 assert (ahead >= 1);
652 lazy_match_min_gain += ahead;
653
654 if (lit1 <= 3)
655 lazy_match_min_gain += (lit2 <= 3) ? 0 : 2;
656 else if (lit1 <= 18)
657 lazy_match_min_gain += (lit2 <= 18) ? 0 : 1;
658
659 lazy_match_min_gain += (l2 - l1) * 2;
660 if (l3 > 0)
661 lazy_match_min_gain -= (ahead - l3) * 2;
662
663 if (lazy_match_min_gain < 0)
664 lazy_match_min_gain = 0;
665
666 return lazy_match_min_gain;
667}
668
669
670/***********************************************************************
671//
672************************************************************************/
673#if defined(SWD_BEST_OFF)
674
675static void better_match(const lzo_swd_p swd,
676 unsigned *m_len, unsigned *m_off)
677{
678 if (*m_len <= M2_MIN_LEN)
679 return;
680
681 if (*m_off <= M2_MAX_OFFSET)
682 return;
683
684 /* M3/M4 -> M2 */
685 if (*m_off > M2_MAX_OFFSET
686 && *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1
687 && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET
688 ) {
689 *m_len = *m_len - 1;
690 *m_off = swd->best_off[*m_len];
691 return;
692 }
693
694 /* M4 -> M2 */
695 if (*m_off > M3_MAX_OFFSET
696 && *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2
697 && swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET
698 ) {
699 *m_len = *m_len - 2;
700 *m_off = swd->best_off[*m_len];
701 return;
702 }
703 /* M4 -> M3 */
704 if (*m_off > M3_MAX_OFFSET
705 && *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1
706 && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET
707 ) {
708 *m_len = *m_len - 1;
709 *m_off = swd->best_off[*m_len];
710 }
711}
712
713#endif
714
715
716/***********************************************************************
717//
718************************************************************************/
719static int lzo1x_999_compress_internal(const uint8_t *in, unsigned in_len,
720 uint8_t *out, unsigned *out_len,
721 void *wrkmem,
722 unsigned good_length,
723 unsigned max_lazy,
724 unsigned max_chain,
725 uint32_t use_best_off)
726{
727 uint8_t *op;
728 const uint8_t *ii;
729 unsigned lit;
730 unsigned m_len, m_off;
731 lzo1x_999_t cc;
732 lzo1x_999_t *const c = &cc;
733 const lzo_swd_p swd = (lzo_swd_p) wrkmem;
734 int r;
735
736 c->init = 0;
737 c->ip = c->in = in;
738 c->in_end = in + in_len;
739 c->out = out;
740
741 op = out;
742 ii = c->ip; /* point to start of literal run */
743 lit = 0;
744 c->r1_lit = 0;
745
746 r = init_match(c, swd, use_best_off);
747 if (r != 0)
748 return r;
749 swd->max_chain = max_chain;
750
751 r = find_match(c, swd, 0, 0);
752 if (r != 0)
753 return r;
754
755 while (c->look > 0) {
756 unsigned ahead;
757 unsigned max_ahead;
758 int l1, l2, l3;
759
760 m_len = c->m_len;
761 m_off = c->m_off;
762
763 assert(c->bp == c->ip - c->look);
764 assert(c->bp >= in);
765 if (lit == 0)
766 ii = c->bp;
767 assert(ii + lit == c->bp);
768 assert(swd->b_char == *(c->bp));
769
770 if (m_len < 2
771 || (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4))
772 /* Do not accept this match for compressed-data compatibility
773 * with LZO v1.01 and before
774 * [ might be a problem for decompress() and optimize() ]
775 */
776 || (m_len == 2 && op == out)
777 || (op == out && lit == 0)
778 ) {
779 /* a literal */
780 m_len = 0;
781 }
782 else if (m_len == M2_MIN_LEN) {
783 /* compression ratio improves if we code a literal in some cases */
784 if (m_off > MX_MAX_OFFSET && lit >= 4)
785 m_len = 0;
786 }
787
788 if (m_len == 0) {
789 /* a literal */
790 lit++;
791 swd->max_chain = max_chain;
792 r = find_match(c, swd, 1, 0);
793 assert(r == 0);
794 continue;
795 }
796
797 /* a match */
798#if defined(SWD_BEST_OFF)
799 if (swd->use_best_off)
800 better_match(swd, &m_len, &m_off);
801#endif
802
803 /* shall we try a lazy match ? */
804 ahead = 0;
805 if (m_len >= max_lazy) {
806 /* no */
807 l1 = 0;
808 max_ahead = 0;
809 } else {
810 /* yes, try a lazy match */
811 l1 = len_of_coded_match(m_len, m_off, lit);
812 assert(l1 > 0);
813 max_ahead = LZO_MIN(2, (unsigned)l1 - 1);
814 }
815
816
817 while (ahead < max_ahead && c->look > m_len) {
818 int lazy_match_min_gain;
819
820 if (m_len >= good_length)
821 swd->max_chain = max_chain >> 2;
822 else
823 swd->max_chain = max_chain;
824 r = find_match(c, swd, 1, 0);
825 ahead++;
826
827 assert(r == 0);
828 assert(c->look > 0);
829 assert(ii + lit + ahead == c->bp);
830
831 if (c->m_len < m_len)
832 continue;
833 if (c->m_len == m_len && c->m_off >= m_off)
834 continue;
835#if defined(SWD_BEST_OFF)
836 if (swd->use_best_off)
837 better_match(swd, &c->m_len, &c->m_off);
838#endif
839 l2 = len_of_coded_match(c->m_len, c->m_off, lit+ahead);
840 if (l2 < 0)
841 continue;
842
843 /* compressed-data compatibility [see above] */
844 l3 = (op == out) ? -1 : len_of_coded_match(ahead, m_off, lit);
845
846 lazy_match_min_gain = min_gain(ahead, lit, lit+ahead, l1, l2, l3);
847 if (c->m_len >= m_len + lazy_match_min_gain) {
848 if (l3 > 0) {
849 /* code previous run */
850 op = code_run(c, op, ii, lit);
851 lit = 0;
852 /* code shortened match */
853 op = code_match(c, op, ahead, m_off);
854 } else {
855 lit += ahead;
856 assert(ii + lit == c->bp);
857 }
858 goto lazy_match_done;
859 }
860 }
861
862 assert(ii + lit + ahead == c->bp);
863
864 /* 1 - code run */
865 op = code_run(c, op, ii, lit);
866 lit = 0;
867
868 /* 2 - code match */
869 op = code_match(c, op, m_len, m_off);
870 swd->max_chain = max_chain;
871 r = find_match(c, swd, m_len, 1+ahead);
872 assert(r == 0);
873
874 lazy_match_done: ;
875 }
876
877 /* store final run */
878 if (lit > 0)
879 op = STORE_RUN(c, op, ii, lit);
880
881#if defined(LZO_EOF_CODE)
882 *op++ = M4_MARKER | 1;
883 *op++ = 0;
884 *op++ = 0;
885#endif
886
887 *out_len = op - out;
888
889 return LZO_E_OK;
890}
891
892
893/***********************************************************************
894//
895************************************************************************/
896int lzo1x_999_compress_level(const uint8_t *in, unsigned in_len,
897 uint8_t *out, unsigned *out_len,
898 void *wrkmem,
899 int compression_level)
900{
901 static const struct {
902 uint16_t good_length;
903 uint16_t max_lazy;
904 uint16_t max_chain;
905 uint16_t use_best_off;
906 } c[3] = {
907 { 8, 32, 256, 0 },
908 { 32, 128, 2048, 1 },
909 { SWD_F, SWD_F, 4096, 1 } /* max. compression */
910 };
911
912 if (compression_level < 7 || compression_level > 9)
913 return LZO_E_ERROR;
914
915 compression_level -= 7;
916 return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem,
917 c[compression_level].good_length,
918 c[compression_level].max_lazy,
919 c[compression_level].max_chain,
920 c[compression_level].use_best_off);
921}
diff --git a/archival/libarchive/lzo1x_c.c b/archival/libarchive/lzo1x_c.c
new file mode 100644
index 000000000..cc86f74b1
--- /dev/null
+++ b/archival/libarchive/lzo1x_c.c
@@ -0,0 +1,296 @@
1/* implementation of the LZO1[XY]-1 compression algorithm
2
3 This file is part of the LZO real-time data compression library.
4
5 Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
6 All Rights Reserved.
7
8 Markus F.X.J. Oberhumer <markus@oberhumer.com>
9 http://www.oberhumer.com/opensource/lzo/
10
11 The LZO library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of
14 the License, or (at your option) any later version.
15
16 The LZO library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with the LZO library; see the file COPYING.
23 If not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26
27/***********************************************************************
28// compress a block of data.
29************************************************************************/
30static NOINLINE unsigned
31do_compress(const uint8_t* in, unsigned in_len,
32 uint8_t* out, unsigned* out_len,
33 void* wrkmem)
34{
35 register const uint8_t* ip;
36 uint8_t* op;
37 const uint8_t* const in_end = in + in_len;
38 const uint8_t* const ip_end = in + in_len - M2_MAX_LEN - 5;
39 const uint8_t* ii;
40 const void* *const dict = (const void**) wrkmem;
41
42 op = out;
43 ip = in;
44 ii = ip;
45
46 ip += 4;
47 for (;;) {
48 register const uint8_t* m_pos;
49 unsigned m_off;
50 unsigned m_len;
51 unsigned dindex;
52
53 D_INDEX1(dindex,ip);
54 GINDEX(m_pos,m_off,dict,dindex,in);
55 if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
56 goto literal;
57#if 1
58 if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
59 goto try_match;
60 D_INDEX2(dindex,ip);
61#endif
62 GINDEX(m_pos,m_off,dict,dindex,in);
63 if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
64 goto literal;
65 if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
66 goto try_match;
67 goto literal;
68
69 try_match:
70#if 1 && defined(LZO_UNALIGNED_OK_2)
71 if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip)
72#else
73 if (m_pos[0] != ip[0] || m_pos[1] != ip[1])
74#endif
75 {
76 } else {
77 if (m_pos[2] == ip[2]) {
78#if 0
79 if (m_off <= M2_MAX_OFFSET)
80 goto match;
81 if (lit <= 3)
82 goto match;
83 if (lit == 3) { /* better compression, but slower */
84 assert(op - 2 > out); op[-2] |= (uint8_t)(3);
85 *op++ = *ii++; *op++ = *ii++; *op++ = *ii++;
86 goto code_match;
87 }
88 if (m_pos[3] == ip[3])
89#endif
90 goto match;
91 }
92 else {
93 /* still need a better way for finding M1 matches */
94#if 0
95 /* a M1 match */
96#if 0
97 if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3)
98#else
99 if (m_off <= M1_MAX_OFFSET && lit == 3)
100#endif
101 {
102 register unsigned t;
103
104 t = lit;
105 assert(op - 2 > out); op[-2] |= (uint8_t)(t);
106 do *op++ = *ii++; while (--t > 0);
107 assert(ii == ip);
108 m_off -= 1;
109 *op++ = (uint8_t)(M1_MARKER | ((m_off & 3) << 2));
110 *op++ = (uint8_t)(m_off >> 2);
111 ip += 2;
112 goto match_done;
113 }
114#endif
115 }
116 }
117
118 /* a literal */
119 literal:
120 UPDATE_I(dict, 0, dindex, ip, in);
121 ++ip;
122 if (ip >= ip_end)
123 break;
124 continue;
125
126 /* a match */
127match:
128 UPDATE_I(dict, 0, dindex, ip, in);
129 /* store current literal run */
130 if (pd(ip, ii) > 0) {
131 register unsigned t = pd(ip, ii);
132
133 if (t <= 3) {
134 assert(op - 2 > out);
135 op[-2] |= (uint8_t)(t);
136 }
137 else if (t <= 18)
138 *op++ = (uint8_t)(t - 3);
139 else {
140 register unsigned tt = t - 18;
141
142 *op++ = 0;
143 while (tt > 255) {
144 tt -= 255;
145 *op++ = 0;
146 }
147 assert(tt > 0);
148 *op++ = (uint8_t)(tt);
149 }
150 do *op++ = *ii++; while (--t > 0);
151 }
152
153 /* code the match */
154 assert(ii == ip);
155 ip += 3;
156 if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++
157 || m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++
158#ifdef LZO1Y
159 || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++
160 || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++
161#endif
162 ) {
163 --ip;
164 m_len = pd(ip, ii);
165 assert(m_len >= 3);
166 assert(m_len <= M2_MAX_LEN);
167
168 if (m_off <= M2_MAX_OFFSET) {
169 m_off -= 1;
170#if defined(LZO1X)
171 *op++ = (uint8_t)(((m_len - 1) << 5) | ((m_off & 7) << 2));
172 *op++ = (uint8_t)(m_off >> 3);
173#elif defined(LZO1Y)
174 *op++ = (uint8_t)(((m_len + 1) << 4) | ((m_off & 3) << 2));
175 *op++ = (uint8_t)(m_off >> 2);
176#endif
177 }
178 else if (m_off <= M3_MAX_OFFSET) {
179 m_off -= 1;
180 *op++ = (uint8_t)(M3_MARKER | (m_len - 2));
181 goto m3_m4_offset;
182 } else {
183#if defined(LZO1X)
184 m_off -= 0x4000;
185 assert(m_off > 0);
186 assert(m_off <= 0x7fff);
187 *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2));
188 goto m3_m4_offset;
189#elif defined(LZO1Y)
190 goto m4_match;
191#endif
192 }
193 }
194 else {
195 {
196 const uint8_t* end = in_end;
197 const uint8_t* m = m_pos + M2_MAX_LEN + 1;
198 while (ip < end && *m == *ip)
199 m++, ip++;
200 m_len = pd(ip, ii);
201 }
202 assert(m_len > M2_MAX_LEN);
203
204 if (m_off <= M3_MAX_OFFSET) {
205 m_off -= 1;
206 if (m_len <= 33)
207 *op++ = (uint8_t)(M3_MARKER | (m_len - 2));
208 else {
209 m_len -= 33;
210 *op++ = M3_MARKER | 0;
211 goto m3_m4_len;
212 }
213 } else {
214#if defined(LZO1Y)
215 m4_match:
216#endif
217 m_off -= 0x4000;
218 assert(m_off > 0);
219 assert(m_off <= 0x7fff);
220 if (m_len <= M4_MAX_LEN)
221 *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2));
222 else {
223 m_len -= M4_MAX_LEN;
224 *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11));
225 m3_m4_len:
226 while (m_len > 255) {
227 m_len -= 255;
228 *op++ = 0;
229 }
230 assert(m_len > 0);
231 *op++ = (uint8_t)(m_len);
232 }
233 }
234 m3_m4_offset:
235 *op++ = (uint8_t)((m_off & 63) << 2);
236 *op++ = (uint8_t)(m_off >> 6);
237 }
238#if 0
239 match_done:
240#endif
241 ii = ip;
242 if (ip >= ip_end)
243 break;
244 }
245
246 *out_len = pd(op, out);
247 return pd(in_end, ii);
248}
249
250/***********************************************************************
251// public entry point
252************************************************************************/
253int DO_COMPRESS(const uint8_t* in, unsigned in_len,
254 uint8_t* out, unsigned* out_len,
255 void* wrkmem)
256{
257 uint8_t* op = out;
258 unsigned t;
259
260 if (in_len <= M2_MAX_LEN + 5)
261 t = in_len;
262 else {
263 t = do_compress(in,in_len,op,out_len,wrkmem);
264 op += *out_len;
265 }
266
267 if (t > 0) {
268 const uint8_t* ii = in + in_len - t;
269
270 if (op == out && t <= 238)
271 *op++ = (uint8_t)(17 + t);
272 else if (t <= 3)
273 op[-2] |= (uint8_t)(t);
274 else if (t <= 18)
275 *op++ = (uint8_t)(t - 3);
276 else {
277 unsigned tt = t - 18;
278
279 *op++ = 0;
280 while (tt > 255) {
281 tt -= 255;
282 *op++ = 0;
283 }
284 assert(tt > 0);
285 *op++ = (uint8_t)(tt);
286 }
287 do *op++ = *ii++; while (--t > 0);
288 }
289
290 *op++ = M4_MARKER | 1;
291 *op++ = 0;
292 *op++ = 0;
293
294 *out_len = pd(op, out);
295 return 0; /*LZO_E_OK*/
296}
diff --git a/archival/libarchive/lzo1x_d.c b/archival/libarchive/lzo1x_d.c
new file mode 100644
index 000000000..348a85510
--- /dev/null
+++ b/archival/libarchive/lzo1x_d.c
@@ -0,0 +1,420 @@
1/* implementation of the LZO1X decompression algorithm
2
3 This file is part of the LZO real-time data compression library.
4
5 Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
6 All Rights Reserved.
7
8 Markus F.X.J. Oberhumer <markus@oberhumer.com>
9 http://www.oberhumer.com/opensource/lzo/
10
11 The LZO library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of
14 the License, or (at your option) any later version.
15
16 The LZO library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with the LZO library; see the file COPYING.
23 If not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26#include "libbb.h"
27#include "liblzo.h"
28
29/***********************************************************************
30// decompress a block of data.
31************************************************************************/
32/* safe decompression with overrun testing */
33int lzo1x_decompress_safe(const uint8_t* in, unsigned in_len,
34 uint8_t* out, unsigned* out_len,
35 void* wrkmem UNUSED_PARAM)
36{
37 register uint8_t* op;
38 register const uint8_t* ip;
39 register unsigned t;
40#if defined(COPY_DICT)
41 unsigned m_off;
42 const uint8_t* dict_end;
43#else
44 register const uint8_t* m_pos = NULL; /* possibly not needed */
45#endif
46 const uint8_t* const ip_end = in + in_len;
47#if defined(HAVE_ANY_OP)
48 uint8_t* const op_end = out + *out_len;
49#endif
50#if defined(LZO1Z)
51 unsigned last_m_off = 0;
52#endif
53
54// LZO_UNUSED(wrkmem);
55
56#if defined(COPY_DICT)
57 if (dict) {
58 if (dict_len > M4_MAX_OFFSET) {
59 dict += dict_len - M4_MAX_OFFSET;
60 dict_len = M4_MAX_OFFSET;
61 }
62 dict_end = dict + dict_len;
63 } else {
64 dict_len = 0;
65 dict_end = NULL;
66 }
67#endif /* COPY_DICT */
68
69 *out_len = 0;
70
71 op = out;
72 ip = in;
73
74 if (*ip > 17) {
75 t = *ip++ - 17;
76 if (t < 4)
77 goto match_next;
78 assert(t > 0); NEED_OP(t); NEED_IP(t+1);
79 do *op++ = *ip++; while (--t > 0);
80 goto first_literal_run;
81 }
82
83 while (TEST_IP && TEST_OP) {
84 t = *ip++;
85 if (t >= 16)
86 goto match;
87 /* a literal run */
88 if (t == 0) {
89 NEED_IP(1);
90 while (*ip == 0) {
91 t += 255;
92 ip++;
93 NEED_IP(1);
94 }
95 t += 15 + *ip++;
96 }
97 /* copy literals */
98 assert(t > 0);
99 NEED_OP(t+3);
100 NEED_IP(t+4);
101#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
102# if !defined(LZO_UNALIGNED_OK_4)
103 if (PTR_ALIGNED2_4(op, ip))
104# endif
105 {
106 COPY4(op, ip);
107 op += 4;
108 ip += 4;
109 if (--t > 0) {
110 if (t >= 4) {
111 do {
112 COPY4(op, ip);
113 op += 4;
114 ip += 4;
115 t -= 4;
116 } while (t >= 4);
117 if (t > 0)
118 do *op++ = *ip++; while (--t > 0);
119 } else {
120 do *op++ = *ip++; while (--t > 0);
121 }
122 }
123 }
124# if !defined(LZO_UNALIGNED_OK_4)
125 else
126# endif
127#endif
128#if !defined(LZO_UNALIGNED_OK_4)
129 {
130 *op++ = *ip++;
131 *op++ = *ip++;
132 *op++ = *ip++;
133 do *op++ = *ip++; while (--t > 0);
134 }
135#endif
136
137 first_literal_run:
138 t = *ip++;
139 if (t >= 16)
140 goto match;
141#if defined(COPY_DICT)
142#if defined(LZO1Z)
143 m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
144 last_m_off = m_off;
145#else
146 m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
147#endif
148 NEED_OP(3);
149 t = 3; COPY_DICT(t,m_off)
150#else /* !COPY_DICT */
151#if defined(LZO1Z)
152 t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
153 m_pos = op - t;
154 last_m_off = t;
155#else
156 m_pos = op - (1 + M2_MAX_OFFSET);
157 m_pos -= t >> 2;
158 m_pos -= *ip++ << 2;
159#endif
160 TEST_LB(m_pos); NEED_OP(3);
161 *op++ = *m_pos++;
162 *op++ = *m_pos++;
163 *op++ = *m_pos;
164#endif /* COPY_DICT */
165 goto match_done;
166
167 /* handle matches */
168 do {
169 match:
170 if (t >= 64) { /* a M2 match */
171#if defined(COPY_DICT)
172#if defined(LZO1X)
173 m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
174 t = (t >> 5) - 1;
175#elif defined(LZO1Y)
176 m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
177 t = (t >> 4) - 3;
178#elif defined(LZO1Z)
179 m_off = t & 0x1f;
180 if (m_off >= 0x1c)
181 m_off = last_m_off;
182 else {
183 m_off = 1 + (m_off << 6) + (*ip++ >> 2);
184 last_m_off = m_off;
185 }
186 t = (t >> 5) - 1;
187#endif
188#else /* !COPY_DICT */
189#if defined(LZO1X)
190 m_pos = op - 1;
191 m_pos -= (t >> 2) & 7;
192 m_pos -= *ip++ << 3;
193 t = (t >> 5) - 1;
194#elif defined(LZO1Y)
195 m_pos = op - 1;
196 m_pos -= (t >> 2) & 3;
197 m_pos -= *ip++ << 2;
198 t = (t >> 4) - 3;
199#elif defined(LZO1Z)
200 {
201 unsigned off = t & 0x1f;
202 m_pos = op;
203 if (off >= 0x1c) {
204 assert(last_m_off > 0);
205 m_pos -= last_m_off;
206 } else {
207 off = 1 + (off << 6) + (*ip++ >> 2);
208 m_pos -= off;
209 last_m_off = off;
210 }
211 }
212 t = (t >> 5) - 1;
213#endif
214 TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1);
215 goto copy_match;
216#endif /* COPY_DICT */
217 }
218 else if (t >= 32) { /* a M3 match */
219 t &= 31;
220 if (t == 0) {
221 NEED_IP(1);
222 while (*ip == 0) {
223 t += 255;
224 ip++;
225 NEED_IP(1);
226 }
227 t += 31 + *ip++;
228 }
229#if defined(COPY_DICT)
230#if defined(LZO1Z)
231 m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
232 last_m_off = m_off;
233#else
234 m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
235#endif
236#else /* !COPY_DICT */
237#if defined(LZO1Z)
238 {
239 unsigned off = 1 + (ip[0] << 6) + (ip[1] >> 2);
240 m_pos = op - off;
241 last_m_off = off;
242 }
243#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN)
244 m_pos = op - 1;
245 m_pos -= (* (const lzo_ushortp) ip) >> 2;
246#else
247 m_pos = op - 1;
248 m_pos -= (ip[0] >> 2) + (ip[1] << 6);
249#endif
250#endif /* COPY_DICT */
251 ip += 2;
252 }
253 else if (t >= 16) { /* a M4 match */
254#if defined(COPY_DICT)
255 m_off = (t & 8) << 11;
256#else /* !COPY_DICT */
257 m_pos = op;
258 m_pos -= (t & 8) << 11;
259#endif /* COPY_DICT */
260 t &= 7;
261 if (t == 0) {
262 NEED_IP(1);
263 while (*ip == 0) {
264 t += 255;
265 ip++;
266 NEED_IP(1);
267 }
268 t += 7 + *ip++;
269 }
270#if defined(COPY_DICT)
271#if defined(LZO1Z)
272 m_off += (ip[0] << 6) + (ip[1] >> 2);
273#else
274 m_off += (ip[0] >> 2) + (ip[1] << 6);
275#endif
276 ip += 2;
277 if (m_off == 0)
278 goto eof_found;
279 m_off += 0x4000;
280#if defined(LZO1Z)
281 last_m_off = m_off;
282#endif
283#else /* !COPY_DICT */
284#if defined(LZO1Z)
285 m_pos -= (ip[0] << 6) + (ip[1] >> 2);
286#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN)
287 m_pos -= (* (const lzo_ushortp) ip) >> 2;
288#else
289 m_pos -= (ip[0] >> 2) + (ip[1] << 6);
290#endif
291 ip += 2;
292 if (m_pos == op)
293 goto eof_found;
294 m_pos -= 0x4000;
295#if defined(LZO1Z)
296 last_m_off = pd((const uint8_t*)op, m_pos);
297#endif
298#endif /* COPY_DICT */
299 }
300 else { /* a M1 match */
301#if defined(COPY_DICT)
302#if defined(LZO1Z)
303 m_off = 1 + (t << 6) + (*ip++ >> 2);
304 last_m_off = m_off;
305#else
306 m_off = 1 + (t >> 2) + (*ip++ << 2);
307#endif
308 NEED_OP(2);
309 t = 2; COPY_DICT(t,m_off)
310#else /* !COPY_DICT */
311#if defined(LZO1Z)
312 t = 1 + (t << 6) + (*ip++ >> 2);
313 m_pos = op - t;
314 last_m_off = t;
315#else
316 m_pos = op - 1;
317 m_pos -= t >> 2;
318 m_pos -= *ip++ << 2;
319#endif
320 TEST_LB(m_pos); NEED_OP(2);
321 *op++ = *m_pos++;
322 *op++ = *m_pos;
323#endif /* COPY_DICT */
324 goto match_done;
325 }
326
327 /* copy match */
328#if defined(COPY_DICT)
329
330 NEED_OP(t+3-1);
331 t += 3-1; COPY_DICT(t,m_off)
332
333#else /* !COPY_DICT */
334
335 TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1);
336#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
337# if !defined(LZO_UNALIGNED_OK_4)
338 if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) {
339 assert((op - m_pos) >= 4); /* both pointers are aligned */
340# else
341 if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
342# endif
343 COPY4(op,m_pos);
344 op += 4; m_pos += 4; t -= 4 - (3 - 1);
345 do {
346 COPY4(op,m_pos);
347 op += 4; m_pos += 4; t -= 4;
348 } while (t >= 4);
349 if (t > 0)
350 do *op++ = *m_pos++; while (--t > 0);
351 }
352 else
353#endif
354 {
355 copy_match:
356 *op++ = *m_pos++; *op++ = *m_pos++;
357 do *op++ = *m_pos++; while (--t > 0);
358 }
359
360#endif /* COPY_DICT */
361
362 match_done:
363#if defined(LZO1Z)
364 t = ip[-1] & 3;
365#else
366 t = ip[-2] & 3;
367#endif
368 if (t == 0)
369 break;
370
371 /* copy literals */
372 match_next:
373 assert(t > 0);
374 assert(t < 4);
375 NEED_OP(t);
376 NEED_IP(t+1);
377#if 0
378 do *op++ = *ip++; while (--t > 0);
379#else
380 *op++ = *ip++;
381 if (t > 1) {
382 *op++ = *ip++;
383 if (t > 2)
384 *op++ = *ip++;
385 }
386#endif
387 t = *ip++;
388 } while (TEST_IP && TEST_OP);
389 }
390
391//#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
392 /* no EOF code was found */
393 *out_len = pd(op, out);
394 return LZO_E_EOF_NOT_FOUND;
395//#endif
396
397 eof_found:
398 assert(t == 1);
399 *out_len = pd(op, out);
400 return (ip == ip_end ? LZO_E_OK :
401 (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
402
403//#if defined(HAVE_NEED_IP)
404 input_overrun:
405 *out_len = pd(op, out);
406 return LZO_E_INPUT_OVERRUN;
407//#endif
408
409//#if defined(HAVE_NEED_OP)
410 output_overrun:
411 *out_len = pd(op, out);
412 return LZO_E_OUTPUT_OVERRUN;
413//#endif
414
415//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND)
416 lookbehind_overrun:
417 *out_len = pd(op, out);
418 return LZO_E_LOOKBEHIND_OVERRUN;
419//#endif
420}
diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c
new file mode 100644
index 000000000..26ae565f5
--- /dev/null
+++ b/archival/libarchive/open_transformer.c
@@ -0,0 +1,54 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9/* transformer(), more than meets the eye */
10/*
11 * On MMU machine, the transform_prog is removed by macro magic
12 * in include/archive.h. On NOMMU, transformer is removed.
13 */
14void FAST_FUNC open_transformer(int fd,
15 IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd),
16 const char *transform_prog)
17{
18 struct fd_pair fd_pipe;
19 int pid;
20
21 xpiped_pair(fd_pipe);
22 pid = BB_MMU ? xfork() : xvfork();
23 if (pid == 0) {
24 /* Child */
25 close(fd_pipe.rd); /* we don't want to read from the parent */
26 // FIXME: error check?
27#if BB_MMU
28 transformer(fd, fd_pipe.wr);
29 if (ENABLE_FEATURE_CLEAN_UP) {
30 close(fd_pipe.wr); /* send EOF */
31 close(fd);
32 }
33 /* must be _exit! bug was actually seen here */
34 _exit(EXIT_SUCCESS);
35#else
36 {
37 char *argv[4];
38 xmove_fd(fd, 0);
39 xmove_fd(fd_pipe.wr, 1);
40 argv[0] = (char*)transform_prog;
41 argv[1] = (char*)"-cf";
42 argv[2] = (char*)"-";
43 argv[3] = NULL;
44 BB_EXECVP(transform_prog, argv);
45 bb_perror_msg_and_die("can't execute '%s'", transform_prog);
46 }
47#endif
48 /* notreached */
49 }
50
51 /* parent process */
52 close(fd_pipe.wr); /* don't want to write to the child */
53 xmove_fd(fd_pipe.rd, fd);
54}
diff --git a/archival/libarchive/seek_by_jump.c b/archival/libarchive/seek_by_jump.c
new file mode 100644
index 000000000..7c2c52ae1
--- /dev/null
+++ b/archival/libarchive/seek_by_jump.c
@@ -0,0 +1,19 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9void FAST_FUNC seek_by_jump(int fd, off_t amount)
10{
11 if (amount
12 && lseek(fd, amount, SEEK_CUR) == (off_t) -1
13 ) {
14 if (errno == ESPIPE)
15 seek_by_read(fd, amount);
16 else
17 bb_perror_msg_and_die("seek failure");
18 }
19}
diff --git a/archival/libarchive/seek_by_read.c b/archival/libarchive/seek_by_read.c
new file mode 100644
index 000000000..ad931a8de
--- /dev/null
+++ b/archival/libarchive/seek_by_read.c
@@ -0,0 +1,16 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8
9/* If we are reading through a pipe, or from stdin then we can't lseek,
10 * we must read and discard the data to skip over it.
11 */
12void FAST_FUNC seek_by_read(int fd, off_t amount)
13{
14 if (amount)
15 bb_copyfd_exact_size(fd, -1, amount);
16}
diff --git a/archival/libarchive/unpack_ar_archive.c b/archival/libarchive/unpack_ar_archive.c
new file mode 100644
index 000000000..18dbfd54d
--- /dev/null
+++ b/archival/libarchive/unpack_ar_archive.c
@@ -0,0 +1,22 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
4 */
5
6#include "libbb.h"
7#include "archive.h"
8#include "ar.h"
9
10void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive)
11{
12 char magic[7];
13
14 xread(ar_archive->src_fd, magic, AR_MAGIC_LEN);
15 if (strncmp(magic, AR_MAGIC, AR_MAGIC_LEN) != 0) {
16 bb_error_msg_and_die("invalid ar magic");
17 }
18 ar_archive->offset += AR_MAGIC_LEN;
19
20 while (get_header_ar(ar_archive) == EXIT_SUCCESS)
21 continue;
22}
diff --git a/archival/libarchive/unxz/README b/archival/libarchive/unxz/README
new file mode 100644
index 000000000..c5972f6b8
--- /dev/null
+++ b/archival/libarchive/unxz/README
@@ -0,0 +1,135 @@
1
2XZ Embedded
3===========
4
5 XZ Embedded is a relatively small, limited implementation of the .xz
6 file format. Currently only decoding is implemented.
7
8 XZ Embedded was written for use in the Linux kernel, but the code can
9 be easily used in other environments too, including regular userspace
10 applications.
11
12 This README contains information that is useful only when the copy
13 of XZ Embedded isn't part of the Linux kernel tree. You should also
14 read linux/Documentation/xz.txt even if you aren't using XZ Embedded
15 as part of Linux; information in that file is not repeated in this
16 README.
17
18Compiling the Linux kernel module
19
20 The xz_dec module depends on crc32 module, so make sure that you have
21 it enabled (CONFIG_CRC32).
22
23 Building the xz_dec and xz_dec_test modules without support for BCJ
24 filters:
25
26 cd linux/lib/xz
27 make -C /path/to/kernel/source \
28 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
29 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m
30
31 Building the xz_dec and xz_dec_test modules with support for BCJ
32 filters:
33
34 cd linux/lib/xz
35 make -C /path/to/kernel/source \
36 KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
37 CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \
38 CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
39 CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
40 CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
41
42 If you want only one or a few of the BCJ filters, omit the appropriate
43 variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
44 code shared between all BCJ filters.
45
46 Most people don't need the xz_dec_test module. You can skip building
47 it by omitting CONFIG_XZ_DEC_TEST=m from the make command line.
48
49Compiler requirements
50
51 XZ Embedded should compile as either GNU-C89 (used in the Linux
52 kernel) or with any C99 compiler. Getting the code to compile with
53 non-GNU C89 compiler or a C++ compiler should be quite easy as
54 long as there is a data type for unsigned 64-bit integer (or the
55 code is modified not to support large files, which needs some more
56 care than just using 32-bit integer instead of 64-bit).
57
58 If you use GCC, try to use a recent version. For example, on x86,
59 xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when
60 compiled with GCC 4.3.3.
61
62Embedding into userspace applications
63
64 To embed the XZ decoder, copy the following files into a single
65 directory in your source code tree:
66
67 linux/include/linux/xz.h
68 linux/lib/xz/xz_crc32.c
69 linux/lib/xz/xz_dec_lzma2.c
70 linux/lib/xz/xz_dec_stream.c
71 linux/lib/xz/xz_lzma2.h
72 linux/lib/xz/xz_private.h
73 linux/lib/xz/xz_stream.h
74 userspace/xz_config.h
75
76 Alternatively, xz.h may be placed into a different directory but then
77 that directory must be in the compiler include path when compiling
78 the .c files.
79
80 Your code should use only the functions declared in xz.h. The rest of
81 the .h files are meant only for internal use in XZ Embedded.
82
83 You may want to modify xz_config.h to be more suitable for your build
84 environment. Probably you should at least skim through it even if the
85 default file works as is.
86
87BCJ filter support
88
89 If you want support for one or more BCJ filters, you need to copy also
90 linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
91 #defines in xz_config.h or in compiler flags. You don't need these
92 #defines in the code that just uses XZ Embedded via xz.h, but having
93 them always #defined doesn't hurt either.
94
95 #define Instruction set BCJ filter endianness
96 XZ_DEC_X86 x86 or x86-64 Little endian only
97 XZ_DEC_POWERPC PowerPC Big endian only
98 XZ_DEC_IA64 Itanium (IA-64) Big or little endian
99 XZ_DEC_ARM ARM Little endian only
100 XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
101 XZ_DEC_SPARC SPARC Big or little endian
102
103 While some architectures are (partially) bi-endian, the endianness
104 setting doesn't change the endianness of the instructions on all
105 architectures. That's why Itanium and SPARC filters work for both big
106 and little endian executables (Itanium has little endian instructions
107 and SPARC has big endian instructions).
108
109 There currently is no filter for little endian PowerPC or big endian
110 ARM or ARM-Thumb. Implementing filters for them can be considered if
111 there is a need for such filters in real-world applications.
112
113Notes about shared libraries
114
115 If you are including XZ Embedded into a shared library, you very
116 probably should rename the xz_* functions to prevent symbol
117 conflicts in case your library is linked against some other library
118 or application that also has XZ Embedded in it (which may even be
119 a different version of XZ Embedded). TODO: Provide an easy way
120 to do this.
121
122 Please don't create a shared library of XZ Embedded itself unless
123 it is fine to rebuild everything depending on that shared library
124 everytime you upgrade to a newer version of XZ Embedded. There are
125 no API or ABI stability guarantees between different versions of
126 XZ Embedded.
127
128Specifying the calling convention
129
130 XZ_FUNC macro was included to support declaring functions with __init
131 in Linux. Outside Linux, it can be used to specify the calling
132 convention on systems that support multiple calling conventions.
133 For example, on Windows, you may make all functions use the stdcall
134 calling convention by defining XZ_FUNC=__stdcall when building and
135 using the functions from XZ Embedded.
diff --git a/archival/libarchive/unxz/xz.h b/archival/libarchive/unxz/xz.h
new file mode 100644
index 000000000..c6c071c4a
--- /dev/null
+++ b/archival/libarchive/unxz/xz.h
@@ -0,0 +1,271 @@
1/*
2 * XZ decompressor
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_H
12#define XZ_H
13
14#ifdef __KERNEL__
15# include <linux/stddef.h>
16# include <linux/types.h>
17#else
18# include <stddef.h>
19# include <stdint.h>
20#endif
21
22/* In Linux, this is used to make extern functions static when needed. */
23#ifndef XZ_EXTERN
24# define XZ_EXTERN extern
25#endif
26
27/* In Linux, this is used to mark the functions with __init when needed. */
28#ifndef XZ_FUNC
29# define XZ_FUNC
30#endif
31
32/**
33 * enum xz_mode - Operation mode
34 *
35 * @XZ_SINGLE: Single-call mode. This uses less RAM than
36 * than multi-call modes, because the LZMA2
37 * dictionary doesn't need to be allocated as
38 * part of the decoder state. All required data
39 * structures are allocated at initialization,
40 * so xz_dec_run() cannot return XZ_MEM_ERROR.
41 * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2
42 * dictionary buffer. All data structures are
43 * allocated at initialization, so xz_dec_run()
44 * cannot return XZ_MEM_ERROR.
45 * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is
46 * allocated once the required size has been
47 * parsed from the stream headers. If the
48 * allocation fails, xz_dec_run() will return
49 * XZ_MEM_ERROR.
50 *
51 * It is possible to enable support only for a subset of the above
52 * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
53 * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
54 * with support for all operation modes, but the preboot code may
55 * be built with fewer features to minimize code size.
56 */
57enum xz_mode {
58 XZ_SINGLE,
59 XZ_PREALLOC,
60 XZ_DYNALLOC
61};
62
63/**
64 * enum xz_ret - Return codes
65 * @XZ_OK: Everything is OK so far. More input or more
66 * output space is required to continue. This
67 * return code is possible only in multi-call mode
68 * (XZ_PREALLOC or XZ_DYNALLOC).
69 * @XZ_STREAM_END: Operation finished successfully.
70 * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
71 * is still possible in multi-call mode by simply
72 * calling xz_dec_run() again.
73 * NOTE: This return value is used only if
74 * XZ_DEC_ANY_CHECK was defined at build time,
75 * which is not used in the kernel. Unsupported
76 * check types return XZ_OPTIONS_ERROR if
77 * XZ_DEC_ANY_CHECK was not defined at build time.
78 * @XZ_MEM_ERROR: Allocating memory failed. This return code is
79 * possible only if the decoder was initialized
80 * with XZ_DYNALLOC. The amount of memory that was
81 * tried to be allocated was no more than the
82 * dict_max argument given to xz_dec_init().
83 * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
84 * allowed by the dict_max argument given to
85 * xz_dec_init(). This return value is possible
86 * only in multi-call mode (XZ_PREALLOC or
87 * XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
88 * ignores the dict_max argument.
89 * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
90 * bytes).
91 * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
92 * compression options. In the decoder this means
93 * that the header CRC32 matches, but the header
94 * itself specifies something that we don't support.
95 * @XZ_DATA_ERROR: Compressed data is corrupt.
96 * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
97 * different between multi-call and single-call
98 * mode; more information below.
99 *
100 * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
101 * to XZ code cannot consume any input and cannot produce any new output.
102 * This happens when there is no new input available, or the output buffer
103 * is full while at least one output byte is still pending. Assuming your
104 * code is not buggy, you can get this error only when decoding a compressed
105 * stream that is truncated or otherwise corrupt.
106 *
107 * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
108 * is too small, or the compressed input is corrupt in a way that makes the
109 * decoder produce more output than the caller expected. When it is
110 * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
111 * is used instead of XZ_BUF_ERROR.
112 */
113enum xz_ret {
114 XZ_OK,
115 XZ_STREAM_END,
116 XZ_UNSUPPORTED_CHECK,
117 XZ_MEM_ERROR,
118 XZ_MEMLIMIT_ERROR,
119 XZ_FORMAT_ERROR,
120 XZ_OPTIONS_ERROR,
121 XZ_DATA_ERROR,
122 XZ_BUF_ERROR
123};
124
125/**
126 * struct xz_buf - Passing input and output buffers to XZ code
127 * @in: Beginning of the input buffer. This may be NULL if and only
128 * if in_pos is equal to in_size.
129 * @in_pos: Current position in the input buffer. This must not exceed
130 * in_size.
131 * @in_size: Size of the input buffer
132 * @out: Beginning of the output buffer. This may be NULL if and only
133 * if out_pos is equal to out_size.
134 * @out_pos: Current position in the output buffer. This must not exceed
135 * out_size.
136 * @out_size: Size of the output buffer
137 *
138 * Only the contents of the output buffer from out[out_pos] onward, and
139 * the variables in_pos and out_pos are modified by the XZ code.
140 */
141struct xz_buf {
142 const uint8_t *in;
143 size_t in_pos;
144 size_t in_size;
145
146 uint8_t *out;
147 size_t out_pos;
148 size_t out_size;
149};
150
151/**
152 * struct xz_dec - Opaque type to hold the XZ decoder state
153 */
154struct xz_dec;
155
156/**
157 * xz_dec_init() - Allocate and initialize a XZ decoder state
158 * @mode: Operation mode
159 * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
160 * multi-call decoding. This is ignored in single-call mode
161 * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
162 * or 2^n + 2^(n-1) bytes (the latter sizes are less common
163 * in practice), so other values for dict_max don't make sense.
164 * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
165 * 512 KiB, and 1 MiB are probably the only reasonable values,
166 * except for kernel and initramfs images where a bigger
167 * dictionary can be fine and useful.
168 *
169 * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
170 * once. The caller must provide enough output space or the decoding will
171 * fail. The output space is used as the dictionary buffer, which is why
172 * there is no need to allocate the dictionary as part of the decoder's
173 * internal state.
174 *
175 * Because the output buffer is used as the workspace, streams encoded using
176 * a big dictionary are not a problem in single-call mode. It is enough that
177 * the output buffer is big enough to hold the actual uncompressed data; it
178 * can be smaller than the dictionary size stored in the stream headers.
179 *
180 * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
181 * of memory is preallocated for the LZMA2 dictionary. This way there is no
182 * risk that xz_dec_run() could run out of memory, since xz_dec_run() will
183 * never allocate any memory. Instead, if the preallocated dictionary is too
184 * small for decoding the given input stream, xz_dec_run() will return
185 * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
186 * decoded to avoid allocating excessive amount of memory for the dictionary.
187 *
188 * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
189 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
190 * may allocate once it has parsed the dictionary size from the stream
191 * headers. This way excessive allocations can be avoided while still
192 * limiting the maximum memory usage to a sane value to prevent running the
193 * system out of memory when decompressing streams from untrusted sources.
194 *
195 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
196 * ready to be used with xz_dec_run(). If memory allocation fails,
197 * xz_dec_init() returns NULL.
198 */
199XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
200 enum xz_mode mode, uint32_t dict_max);
201
202/**
203 * xz_dec_run() - Run the XZ decoder
204 * @s: Decoder state allocated using xz_dec_init()
205 * @b: Input and output buffers
206 *
207 * The possible return values depend on build options and operation mode.
208 * See enum xz_ret for details.
209 *
210 * NOTE: If an error occurs in single-call mode (return value is not
211 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the
212 * contents of the output buffer from b->out[b->out_pos] onward are
213 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
214 * chains, there may be a second pass over the output buffer, and this pass
215 * cannot be properly done if the output buffer is truncated. Thus, you
216 * cannot give the single-call decoder a too small buffer and then expect to
217 * get that amount valid data from the beginning of the stream. You must use
218 * the multi-call decoder if you don't want to uncompress the whole stream.
219 */
220XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b);
221
222/**
223 * xz_dec_reset() - Reset an already allocated decoder state
224 * @s: Decoder state allocated using xz_dec_init()
225 *
226 * This function can be used to reset the multi-call decoder state without
227 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
228 *
229 * In single-call mode, xz_dec_reset() is always called in the beginning of
230 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
231 * multi-call mode.
232 */
233XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s);
234
235/**
236 * xz_dec_end() - Free the memory allocated for the decoder state
237 * @s: Decoder state allocated using xz_dec_init(). If s is NULL,
238 * this function does nothing.
239 */
240XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
241
242/*
243 * Standalone build (userspace build or in-kernel build for boot time use)
244 * needs a CRC32 implementation. For normal in-kernel use, kernel's own
245 * CRC32 module is used instead, and users of this module don't need to
246 * care about the functions below.
247 */
248#ifndef XZ_INTERNAL_CRC32
249# ifdef __KERNEL__
250# define XZ_INTERNAL_CRC32 0
251# else
252# define XZ_INTERNAL_CRC32 1
253# endif
254#endif
255
256#if XZ_INTERNAL_CRC32
257/*
258 * This must be called before any other xz_* function to initialize
259 * the CRC32 lookup table.
260 */
261XZ_EXTERN void XZ_FUNC xz_crc32_init(void);
262
263/*
264 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
265 * calculation, the third argument must be zero. To continue the calculation,
266 * the previously returned value is passed as the third argument.
267 */
268XZ_EXTERN uint32_t XZ_FUNC xz_crc32(
269 const uint8_t *buf, size_t size, uint32_t crc);
270#endif
271#endif
diff --git a/archival/libarchive/unxz/xz_config.h b/archival/libarchive/unxz/xz_config.h
new file mode 100644
index 000000000..187e1cbed
--- /dev/null
+++ b/archival/libarchive/unxz/xz_config.h
@@ -0,0 +1,123 @@
1/*
2 * Private includes and definitions for userspace use of XZ Embedded
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_CONFIG_H
11#define XZ_CONFIG_H
12
13/* Uncomment as needed to enable BCJ filter decoders. */
14/* #define XZ_DEC_X86 */
15/* #define XZ_DEC_POWERPC */
16/* #define XZ_DEC_IA64 */
17/* #define XZ_DEC_ARM */
18/* #define XZ_DEC_ARMTHUMB */
19/* #define XZ_DEC_SPARC */
20
21#include <stdbool.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "xz.h"
26
27#define kmalloc(size, flags) malloc(size)
28#define kfree(ptr) free(ptr)
29#define vmalloc(size) malloc(size)
30#define vfree(ptr) free(ptr)
31
32#define memeq(a, b, size) (memcmp(a, b, size) == 0)
33#define memzero(buf, size) memset(buf, 0, size)
34
35#undef min
36#undef min_t
37#define min(x, y) ((x) < (y) ? (x) : (y))
38#define min_t(type, x, y) min(x, y)
39
40/*
41 * Some functions have been marked with __always_inline to keep the
42 * performance reasonable even when the compiler is optimizing for
43 * small code size. You may be able to save a few bytes by #defining
44 * __always_inline to plain inline, but don't complain if the code
45 * becomes slow.
46 *
47 * NOTE: System headers on GNU/Linux may #define this macro already,
48 * so if you want to change it, you need to #undef it first.
49 */
50#ifndef __always_inline
51# ifdef __GNUC__
52# define __always_inline \
53 inline __attribute__((__always_inline__))
54# else
55# define __always_inline inline
56# endif
57#endif
58
59/*
60 * Some functions are marked to never be inlined to reduce stack usage.
61 * If you don't care about stack usage, you may want to modify this so
62 * that noinline_for_stack is #defined to be empty even when using GCC.
63 * Doing so may save a few bytes in binary size.
64 */
65#ifndef noinline_for_stack
66# ifdef __GNUC__
67# define noinline_for_stack __attribute__((__noinline__))
68# else
69# define noinline_for_stack
70# endif
71#endif
72
73/* Inline functions to access unaligned unsigned 32-bit integers */
74#ifndef get_unaligned_le32
75static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf)
76{
77 return (uint32_t)buf[0]
78 | ((uint32_t)buf[1] << 8)
79 | ((uint32_t)buf[2] << 16)
80 | ((uint32_t)buf[3] << 24);
81}
82#endif
83
84#ifndef get_unaligned_be32
85static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf)
86{
87 return (uint32_t)(buf[0] << 24)
88 | ((uint32_t)buf[1] << 16)
89 | ((uint32_t)buf[2] << 8)
90 | (uint32_t)buf[3];
91}
92#endif
93
94#ifndef put_unaligned_le32
95static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf)
96{
97 buf[0] = (uint8_t)val;
98 buf[1] = (uint8_t)(val >> 8);
99 buf[2] = (uint8_t)(val >> 16);
100 buf[3] = (uint8_t)(val >> 24);
101}
102#endif
103
104#ifndef put_unaligned_be32
105static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
106{
107 buf[0] = (uint8_t)(val >> 24);
108 buf[1] = (uint8_t)(val >> 16);
109 buf[2] = (uint8_t)(val >> 8);
110 buf[3] = (uint8_t)val;
111}
112#endif
113
114/*
115 * Use get_unaligned_le32() also for aligned access for simplicity. On
116 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
117 * could save a few bytes in code size.
118 */
119#ifndef get_le32
120# define get_le32 get_unaligned_le32
121#endif
122
123#endif
diff --git a/archival/libarchive/unxz/xz_dec_bcj.c b/archival/libarchive/unxz/xz_dec_bcj.c
new file mode 100644
index 000000000..09162b51f
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_bcj.c
@@ -0,0 +1,564 @@
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13/*
14 * The rest of the file is inside this ifdef. It makes things a little more
15 * convenient when building without support for any BCJ filters.
16 */
17#ifdef XZ_DEC_BCJ
18
19struct xz_dec_bcj {
20 /* Type of the BCJ filter being used */
21 enum {
22 BCJ_X86 = 4, /* x86 or x86-64 */
23 BCJ_POWERPC = 5, /* Big endian only */
24 BCJ_IA64 = 6, /* Big or little endian */
25 BCJ_ARM = 7, /* Little endian only */
26 BCJ_ARMTHUMB = 8, /* Little endian only */
27 BCJ_SPARC = 9 /* Big or little endian */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76};
77
78#ifdef XZ_DEC_X86
79/*
80 * This is macro used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
83#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
84
85static noinline_for_stack size_t XZ_FUNC bcj_x86(
86 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
87{
88 static const bool mask_to_allowed_status[8]
89 = { true, true, true, false, true, false, false, false };
90
91 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
92
93 size_t i;
94 size_t prev_pos = (size_t)-1;
95 uint32_t prev_mask = s->x86_prev_mask;
96 uint32_t src;
97 uint32_t dest;
98 uint32_t j;
99 uint8_t b;
100
101 if (size <= 4)
102 return 0;
103
104 size -= 4;
105 for (i = 0; i < size; ++i) {
106 if ((buf[i] & 0xFE) != 0xE8)
107 continue;
108
109 prev_pos = i - prev_pos;
110 if (prev_pos > 3) {
111 prev_mask = 0;
112 } else {
113 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
114 if (prev_mask != 0) {
115 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
116 if (!mask_to_allowed_status[prev_mask]
117 || bcj_x86_test_msbyte(b)) {
118 prev_pos = i;
119 prev_mask = (prev_mask << 1) | 1;
120 continue;
121 }
122 }
123 }
124
125 prev_pos = i;
126
127 if (bcj_x86_test_msbyte(buf[i + 4])) {
128 src = get_unaligned_le32(buf + i + 1);
129 while (true) {
130 dest = src - (s->pos + (uint32_t)i + 5);
131 if (prev_mask == 0)
132 break;
133
134 j = mask_to_bit_num[prev_mask] * 8;
135 b = (uint8_t)(dest >> (24 - j));
136 if (!bcj_x86_test_msbyte(b))
137 break;
138
139 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
140 }
141
142 dest &= 0x01FFFFFF;
143 dest |= (uint32_t)0 - (dest & 0x01000000);
144 put_unaligned_le32(dest, buf + i + 1);
145 i += 4;
146 } else {
147 prev_mask = (prev_mask << 1) | 1;
148 }
149 }
150
151 prev_pos = i - prev_pos;
152 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
153 return i;
154}
155#endif
156
157#ifdef XZ_DEC_POWERPC
158static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
159 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
160{
161 size_t i;
162 uint32_t instr;
163
164 for (i = 0; i + 4 <= size; i += 4) {
165 instr = get_unaligned_be32(buf + i);
166 if ((instr & 0xFC000003) == 0x48000001) {
167 instr &= 0x03FFFFFC;
168 instr -= s->pos + (uint32_t)i;
169 instr &= 0x03FFFFFC;
170 instr |= 0x48000001;
171 put_unaligned_be32(instr, buf + i);
172 }
173 }
174
175 return i;
176}
177#endif
178
179#ifdef XZ_DEC_IA64
180static noinline_for_stack size_t XZ_FUNC bcj_ia64(
181 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183 static const uint8_t branch_table[32] = {
184 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 4, 4, 6, 6, 0, 0, 7, 7,
187 4, 4, 0, 0, 4, 4, 0, 0
188 };
189
190 /*
191 * The local variables take a little bit stack space, but it's less
192 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193 * stack usage here without doing that for the LZMA2 decoder too.
194 */
195
196 /* Loop counters */
197 size_t i;
198 size_t j;
199
200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201 uint32_t slot;
202
203 /* Bitwise offset of the instruction indicated by slot */
204 uint32_t bit_pos;
205
206 /* bit_pos split into byte and bit parts */
207 uint32_t byte_pos;
208 uint32_t bit_res;
209
210 /* Address part of an instruction */
211 uint32_t addr;
212
213 /* Mask used to detect which instructions to convert */
214 uint32_t mask;
215
216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
217 uint64_t instr;
218
219 /* Instruction normalized with bit_res for easier manipulation */
220 uint64_t norm;
221
222 for (i = 0; i + 16 <= size; i += 16) {
223 mask = branch_table[buf[i] & 0x1F];
224 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225 if (((mask >> slot) & 1) == 0)
226 continue;
227
228 byte_pos = bit_pos >> 3;
229 bit_res = bit_pos & 7;
230 instr = 0;
231 for (j = 0; j < 6; ++j)
232 instr |= (uint64_t)(buf[i + j + byte_pos])
233 << (8 * j);
234
235 norm = instr >> bit_res;
236
237 if (((norm >> 37) & 0x0F) == 0x05
238 && ((norm >> 9) & 0x07) == 0) {
239 addr = (norm >> 13) & 0x0FFFFF;
240 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241 addr <<= 4;
242 addr -= s->pos + (uint32_t)i;
243 addr >>= 4;
244
245 norm &= ~((uint64_t)0x8FFFFF << 13);
246 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247 norm |= (uint64_t)(addr & 0x100000)
248 << (36 - 20);
249
250 instr &= (1 << bit_res) - 1;
251 instr |= norm << bit_res;
252
253 for (j = 0; j < 6; j++)
254 buf[i + j + byte_pos]
255 = (uint8_t)(instr >> (8 * j));
256 }
257 }
258 }
259
260 return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static noinline_for_stack size_t XZ_FUNC bcj_arm(
266 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
267{
268 size_t i;
269 uint32_t addr;
270
271 for (i = 0; i + 4 <= size; i += 4) {
272 if (buf[i + 3] == 0xEB) {
273 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
274 | ((uint32_t)buf[i + 2] << 16);
275 addr <<= 2;
276 addr -= s->pos + (uint32_t)i + 8;
277 addr >>= 2;
278 buf[i] = (uint8_t)addr;
279 buf[i + 1] = (uint8_t)(addr >> 8);
280 buf[i + 2] = (uint8_t)(addr >> 16);
281 }
282 }
283
284 return i;
285}
286#endif
287
288#ifdef XZ_DEC_ARMTHUMB
289static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
290 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
291{
292 size_t i;
293 uint32_t addr;
294
295 for (i = 0; i + 4 <= size; i += 2) {
296 if ((buf[i + 1] & 0xF8) == 0xF0
297 && (buf[i + 3] & 0xF8) == 0xF8) {
298 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
299 | ((uint32_t)buf[i] << 11)
300 | (((uint32_t)buf[i + 3] & 0x07) << 8)
301 | (uint32_t)buf[i + 2];
302 addr <<= 1;
303 addr -= s->pos + (uint32_t)i + 4;
304 addr >>= 1;
305 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
306 buf[i] = (uint8_t)(addr >> 11);
307 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
308 buf[i + 2] = (uint8_t)addr;
309 i += 2;
310 }
311 }
312
313 return i;
314}
315#endif
316
317#ifdef XZ_DEC_SPARC
318static noinline_for_stack size_t XZ_FUNC bcj_sparc(
319 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
320{
321 size_t i;
322 uint32_t instr;
323
324 for (i = 0; i + 4 <= size; i += 4) {
325 instr = get_unaligned_be32(buf + i);
326 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
327 instr <<= 2;
328 instr -= s->pos + (uint32_t)i;
329 instr >>= 2;
330 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
331 | 0x40000000 | (instr & 0x3FFFFF);
332 put_unaligned_be32(instr, buf + i);
333 }
334 }
335
336 return i;
337}
338#endif
339
340/*
341 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
342 * of data that got filtered.
343 *
344 * NOTE: This is implemented as a switch statement to avoid using function
345 * pointers, which could be problematic in the kernel boot code, which must
346 * avoid pointers to static data (at least on x86).
347 */
348static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
349 uint8_t *buf, size_t *pos, size_t size)
350{
351 size_t filtered;
352
353 buf += *pos;
354 size -= *pos;
355
356 switch (s->type) {
357#ifdef XZ_DEC_X86
358 case BCJ_X86:
359 filtered = bcj_x86(s, buf, size);
360 break;
361#endif
362#ifdef XZ_DEC_POWERPC
363 case BCJ_POWERPC:
364 filtered = bcj_powerpc(s, buf, size);
365 break;
366#endif
367#ifdef XZ_DEC_IA64
368 case BCJ_IA64:
369 filtered = bcj_ia64(s, buf, size);
370 break;
371#endif
372#ifdef XZ_DEC_ARM
373 case BCJ_ARM:
374 filtered = bcj_arm(s, buf, size);
375 break;
376#endif
377#ifdef XZ_DEC_ARMTHUMB
378 case BCJ_ARMTHUMB:
379 filtered = bcj_armthumb(s, buf, size);
380 break;
381#endif
382#ifdef XZ_DEC_SPARC
383 case BCJ_SPARC:
384 filtered = bcj_sparc(s, buf, size);
385 break;
386#endif
387 default:
388 /* Never reached but silence compiler warnings. */
389 filtered = 0;
390 break;
391 }
392
393 *pos += filtered;
394 s->pos += filtered;
395}
396
397/*
398 * Flush pending filtered data from temp to the output buffer.
399 * Move the remaining mixture of possibly filtered and unfiltered
400 * data to the beginning of temp.
401 */
402static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
403{
404 size_t copy_size;
405
406 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
407 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
408 b->out_pos += copy_size;
409
410 s->temp.filtered -= copy_size;
411 s->temp.size -= copy_size;
412 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
413}
414
415/*
416 * The BCJ filter functions are primitive in sense that they process the
417 * data in chunks of 1-16 bytes. To hide this issue, this function does
418 * some buffering.
419 */
420XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
421 struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
422{
423 size_t out_start;
424
425 /*
426 * Flush pending already filtered data to the output buffer. Return
427 * immediatelly if we couldn't flush everything, or if the next
428 * filter in the chain had already returned XZ_STREAM_END.
429 */
430 if (s->temp.filtered > 0) {
431 bcj_flush(s, b);
432 if (s->temp.filtered > 0)
433 return XZ_OK;
434
435 if (s->ret == XZ_STREAM_END)
436 return XZ_STREAM_END;
437 }
438
439 /*
440 * If we have more output space than what is currently pending in
441 * temp, copy the unfiltered data from temp to the output buffer
442 * and try to fill the output buffer by decoding more data from the
443 * next filter in the chain. Apply the BCJ filter on the new data
444 * in the output buffer. If everything cannot be filtered, copy it
445 * to temp and rewind the output buffer position accordingly.
446 */
447 if (s->temp.size < b->out_size - b->out_pos) {
448 out_start = b->out_pos;
449 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
450 b->out_pos += s->temp.size;
451
452 s->ret = xz_dec_lzma2_run(lzma2, b);
453 if (s->ret != XZ_STREAM_END
454 && (s->ret != XZ_OK || s->single_call))
455 return s->ret;
456
457 bcj_apply(s, b->out, &out_start, b->out_pos);
458
459 /*
460 * As an exception, if the next filter returned XZ_STREAM_END,
461 * we can do that too, since the last few bytes that remain
462 * unfiltered are meant to remain unfiltered.
463 */
464 if (s->ret == XZ_STREAM_END)
465 return XZ_STREAM_END;
466
467 s->temp.size = b->out_pos - out_start;
468 b->out_pos -= s->temp.size;
469 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
470 }
471
472 /*
473 * If we have unfiltered data in temp, try to fill by decoding more
474 * data from the next filter. Apply the BCJ filter on temp. Then we
475 * hopefully can fill the actual output buffer by copying filtered
476 * data from temp. A mix of filtered and unfiltered data may be left
477 * in temp; it will be taken care on the next call to this function.
478 */
479 if (s->temp.size > 0) {
480 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
481 s->out = b->out;
482 s->out_pos = b->out_pos;
483 s->out_size = b->out_size;
484 b->out = s->temp.buf;
485 b->out_pos = s->temp.size;
486 b->out_size = sizeof(s->temp.buf);
487
488 s->ret = xz_dec_lzma2_run(lzma2, b);
489
490 s->temp.size = b->out_pos;
491 b->out = s->out;
492 b->out_pos = s->out_pos;
493 b->out_size = s->out_size;
494
495 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
496 return s->ret;
497
498 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
499
500 /*
501 * If the next filter returned XZ_STREAM_END, we mark that
502 * everything is filtered, since the last unfiltered bytes
503 * of the stream are meant to be left as is.
504 */
505 if (s->ret == XZ_STREAM_END)
506 s->temp.filtered = s->temp.size;
507
508 bcj_flush(s, b);
509 if (s->temp.filtered > 0)
510 return XZ_OK;
511 }
512
513 return s->ret;
514}
515
516XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
517{
518 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
519 if (s != NULL)
520 s->single_call = single_call;
521
522 return s;
523}
524
525XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
526 struct xz_dec_bcj *s, uint8_t id)
527{
528 switch (id) {
529#ifdef XZ_DEC_X86
530 case BCJ_X86:
531#endif
532#ifdef XZ_DEC_POWERPC
533 case BCJ_POWERPC:
534#endif
535#ifdef XZ_DEC_IA64
536 case BCJ_IA64:
537#endif
538#ifdef XZ_DEC_ARM
539 case BCJ_ARM:
540#endif
541#ifdef XZ_DEC_ARMTHUMB
542 case BCJ_ARMTHUMB:
543#endif
544#ifdef XZ_DEC_SPARC
545 case BCJ_SPARC:
546#endif
547 break;
548
549 default:
550 /* Unsupported Filter ID */
551 return XZ_OPTIONS_ERROR;
552 }
553
554 s->type = id;
555 s->ret = XZ_OK;
556 s->pos = 0;
557 s->x86_prev_mask = 0;
558 s->temp.filtered = 0;
559 s->temp.size = 0;
560
561 return XZ_OK;
562}
563
564#endif
diff --git a/archival/libarchive/unxz/xz_dec_lzma2.c b/archival/libarchive/unxz/xz_dec_lzma2.c
new file mode 100644
index 000000000..da71cb4d4
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_lzma2.c
@@ -0,0 +1,1175 @@
1/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= size_max
38 * allocated <= size
39 *
40 * Most of these variables are size_t to support single-call mode,
41 * in which the dictionary variables address the actual output
42 * buffer directly.
43 */
44struct dictionary {
45 /* Beginning of the history buffer */
46 uint8_t *buf;
47
48 /* Old position in buf (before decoding more data) */
49 size_t start;
50
51 /* Position in buf */
52 size_t pos;
53
54 /*
55 * How full dictionary is. This is used to detect corrupt input that
56 * would read beyond the beginning of the uncompressed stream.
57 */
58 size_t full;
59
60 /* Write limit; we don't write to buf[limit] or later bytes. */
61 size_t limit;
62
63 /*
64 * End of the dictionary buffer. In multi-call mode, this is
65 * the same as the dictionary size. In single-call mode, this
66 * indicates the size of the output buffer.
67 */
68 size_t end;
69
70 /*
71 * Size of the dictionary as specified in Block Header. This is used
72 * together with "full" to detect corrupt input that would make us
73 * read beyond the beginning of the uncompressed stream.
74 */
75 uint32_t size;
76
77 /*
78 * Maximum allowed dictionary size in multi-call mode.
79 * This is ignored in single-call mode.
80 */
81 uint32_t size_max;
82
83 /*
84 * Amount of memory currently allocated for the dictionary.
85 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
86 * size_max is always the same as the allocated size.)
87 */
88 uint32_t allocated;
89
90 /* Operation mode */
91 enum xz_mode mode;
92};
93
94/* Range decoder */
95struct rc_dec {
96 uint32_t range;
97 uint32_t code;
98
99 /*
100 * Number of initializing bytes remaining to be read
101 * by rc_read_init().
102 */
103 uint32_t init_bytes_left;
104
105 /*
106 * Buffer from which we read our input. It can be either
107 * temp.buf or the caller-provided input buffer.
108 */
109 const uint8_t *in;
110 size_t in_pos;
111 size_t in_limit;
112};
113
114/* Probabilities for a length decoder. */
115struct lzma_len_dec {
116 /* Probability of match length being at least 10 */
117 uint16_t choice;
118
119 /* Probability of match length being at least 18 */
120 uint16_t choice2;
121
122 /* Probabilities for match lengths 2-9 */
123 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
124
125 /* Probabilities for match lengths 10-17 */
126 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
127
128 /* Probabilities for match lengths 18-273 */
129 uint16_t high[LEN_HIGH_SYMBOLS];
130};
131
132struct lzma_dec {
133 /* Distances of latest four matches */
134 uint32_t rep0;
135 uint32_t rep1;
136 uint32_t rep2;
137 uint32_t rep3;
138
139 /* Types of the most recently seen LZMA symbols */
140 enum lzma_state state;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /*
149 * LZMA properties or related bit masks (number of literal
150 * context bits, a mask dervied from the number of literal
151 * position bits, and a mask dervied from the number
152 * position bits)
153 */
154 uint32_t lc;
155 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
156 uint32_t pos_mask; /* (1 << pb) - 1 */
157
158 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
159 uint16_t is_match[STATES][POS_STATES_MAX];
160
161 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
162 uint16_t is_rep[STATES];
163
164 /*
165 * If 0, distance of a repeated match is rep0.
166 * Otherwise check is_rep1.
167 */
168 uint16_t is_rep0[STATES];
169
170 /*
171 * If 0, distance of a repeated match is rep1.
172 * Otherwise check is_rep2.
173 */
174 uint16_t is_rep1[STATES];
175
176 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
177 uint16_t is_rep2[STATES];
178
179 /*
180 * If 1, the repeated match has length of one byte. Otherwise
181 * the length is decoded from rep_len_decoder.
182 */
183 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
184
185 /*
186 * Probability tree for the highest two bits of the match
187 * distance. There is a separate probability tree for match
188 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
189 */
190 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
191
192 /*
193 * Probility trees for additional bits for match distance
194 * when the distance is in the range [4, 127].
195 */
196 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
197
198 /*
199 * Probability tree for the lowest four bits of a match
200 * distance that is equal to or greater than 128.
201 */
202 uint16_t dist_align[ALIGN_SIZE];
203
204 /* Length of a normal match */
205 struct lzma_len_dec match_len_dec;
206
207 /* Length of a repeated match */
208 struct lzma_len_dec rep_len_dec;
209
210 /* Probabilities of literals */
211 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
212};
213
214struct lzma2_dec {
215 /* Position in xz_dec_lzma2_run(). */
216 enum lzma2_seq {
217 SEQ_CONTROL,
218 SEQ_UNCOMPRESSED_1,
219 SEQ_UNCOMPRESSED_2,
220 SEQ_COMPRESSED_0,
221 SEQ_COMPRESSED_1,
222 SEQ_PROPERTIES,
223 SEQ_LZMA_PREPARE,
224 SEQ_LZMA_RUN,
225 SEQ_COPY
226 } sequence;
227
228 /* Next position after decoding the compressed size of the chunk. */
229 enum lzma2_seq next_sequence;
230
231 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
232 uint32_t uncompressed;
233
234 /*
235 * Compressed size of LZMA chunk or compressed/uncompressed
236 * size of uncompressed chunk (64 KiB at maximum)
237 */
238 uint32_t compressed;
239
240 /*
241 * True if dictionary reset is needed. This is false before
242 * the first chunk (LZMA or uncompressed).
243 */
244 bool need_dict_reset;
245
246 /*
247 * True if new LZMA properties are needed. This is false
248 * before the first LZMA chunk.
249 */
250 bool need_props;
251};
252
253struct xz_dec_lzma2 {
254 /*
255 * The order below is important on x86 to reduce code size and
256 * it shouldn't hurt on other platforms. Everything up to and
257 * including lzma.pos_mask are in the first 128 bytes on x86-32,
258 * which allows using smaller instructions to access those
259 * variables. On x86-64, fewer variables fit into the first 128
260 * bytes, but this is still the best order without sacrificing
261 * the readability by splitting the structures.
262 */
263 struct rc_dec rc;
264 struct dictionary dict;
265 struct lzma2_dec lzma2;
266 struct lzma_dec lzma;
267
268 /*
269 * Temporary buffer which holds small number of input bytes between
270 * decoder calls. See lzma2_lzma() for details.
271 */
272 struct {
273 uint32_t size;
274 uint8_t buf[3 * LZMA_IN_REQUIRED];
275 } temp;
276};
277
278/**************
279 * Dictionary *
280 **************/
281
282/*
283 * Reset the dictionary state. When in single-call mode, set up the beginning
284 * of the dictionary to point to the actual output buffer.
285 */
286static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
287{
288 if (DEC_IS_SINGLE(dict->mode)) {
289 dict->buf = b->out + b->out_pos;
290 dict->end = b->out_size - b->out_pos;
291 }
292
293 dict->start = 0;
294 dict->pos = 0;
295 dict->limit = 0;
296 dict->full = 0;
297}
298
299/* Set dictionary write limit */
300static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max)
301{
302 if (dict->end - dict->pos <= out_max)
303 dict->limit = dict->end;
304 else
305 dict->limit = dict->pos + out_max;
306}
307
308/* Return true if at least one byte can be written into the dictionary. */
309static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict)
310{
311 return dict->pos < dict->limit;
312}
313
314/*
315 * Get a byte from the dictionary at the given distance. The distance is
316 * assumed to valid, or as a special case, zero when the dictionary is
317 * still empty. This special case is needed for single-call decoding to
318 * avoid writing a '\0' to the end of the destination buffer.
319 */
320static __always_inline uint32_t XZ_FUNC dict_get(
321 const struct dictionary *dict, uint32_t dist)
322{
323 size_t offset = dict->pos - dist - 1;
324
325 if (dist >= dict->pos)
326 offset += dict->end;
327
328 return dict->full > 0 ? dict->buf[offset] : 0;
329}
330
331/*
332 * Put one byte into the dictionary. It is assumed that there is space for it.
333 */
334static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte)
335{
336 dict->buf[dict->pos++] = byte;
337
338 if (dict->full < dict->pos)
339 dict->full = dict->pos;
340}
341
342/*
343 * Repeat given number of bytes from the given distance. If the distance is
344 * invalid, false is returned. On success, true is returned and *len is
345 * updated to indicate how many bytes were left to be repeated.
346 */
347static bool XZ_FUNC dict_repeat(
348 struct dictionary *dict, uint32_t *len, uint32_t dist)
349{
350 size_t back;
351 uint32_t left;
352
353 if (dist >= dict->full || dist >= dict->size)
354 return false;
355
356 left = min_t(size_t, dict->limit - dict->pos, *len);
357 *len -= left;
358
359 back = dict->pos - dist - 1;
360 if (dist >= dict->pos)
361 back += dict->end;
362
363 do {
364 dict->buf[dict->pos++] = dict->buf[back++];
365 if (back == dict->end)
366 back = 0;
367 } while (--left > 0);
368
369 if (dict->full < dict->pos)
370 dict->full = dict->pos;
371
372 return true;
373}
374
375/* Copy uncompressed data as is from input to dictionary and output buffers. */
376static void XZ_FUNC dict_uncompressed(
377 struct dictionary *dict, struct xz_buf *b, uint32_t *left)
378{
379 size_t copy_size;
380
381 while (*left > 0 && b->in_pos < b->in_size
382 && b->out_pos < b->out_size) {
383 copy_size = min(b->in_size - b->in_pos,
384 b->out_size - b->out_pos);
385 if (copy_size > dict->end - dict->pos)
386 copy_size = dict->end - dict->pos;
387 if (copy_size > *left)
388 copy_size = *left;
389
390 *left -= copy_size;
391
392 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
393 dict->pos += copy_size;
394
395 if (dict->full < dict->pos)
396 dict->full = dict->pos;
397
398 if (DEC_IS_MULTI(dict->mode)) {
399 if (dict->pos == dict->end)
400 dict->pos = 0;
401
402 memcpy(b->out + b->out_pos, b->in + b->in_pos,
403 copy_size);
404 }
405
406 dict->start = dict->pos;
407
408 b->out_pos += copy_size;
409 b->in_pos += copy_size;
410
411 }
412}
413
414/*
415 * Flush pending data from dictionary to b->out. It is assumed that there is
416 * enough space in b->out. This is guaranteed because caller uses dict_limit()
417 * before decoding data into the dictionary.
418 */
419static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b)
420{
421 size_t copy_size = dict->pos - dict->start;
422
423 if (DEC_IS_MULTI(dict->mode)) {
424 if (dict->pos == dict->end)
425 dict->pos = 0;
426
427 memcpy(b->out + b->out_pos, dict->buf + dict->start,
428 copy_size);
429 }
430
431 dict->start = dict->pos;
432 b->out_pos += copy_size;
433 return copy_size;
434}
435
436/*****************
437 * Range decoder *
438 *****************/
439
440/* Reset the range decoder. */
441static void XZ_FUNC rc_reset(struct rc_dec *rc)
442{
443 rc->range = (uint32_t)-1;
444 rc->code = 0;
445 rc->init_bytes_left = RC_INIT_BYTES;
446}
447
448/*
449 * Read the first five initial bytes into rc->code if they haven't been
450 * read already. (Yes, the first byte gets completely ignored.)
451 */
452static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b)
453{
454 while (rc->init_bytes_left > 0) {
455 if (b->in_pos == b->in_size)
456 return false;
457
458 rc->code = (rc->code << 8) + b->in[b->in_pos++];
459 --rc->init_bytes_left;
460 }
461
462 return true;
463}
464
465/* Return true if there may not be enough input for the next decoding loop. */
466static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc)
467{
468 return rc->in_pos > rc->in_limit;
469}
470
471/*
472 * Return true if it is possible (from point of view of range decoder) that
473 * we have reached the end of the LZMA chunk.
474 */
475static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc)
476{
477 return rc->code == 0;
478}
479
480/* Read the next input byte if needed. */
481static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc)
482{
483 if (rc->range < RC_TOP_VALUE) {
484 rc->range <<= RC_SHIFT_BITS;
485 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
486 }
487}
488
489/*
490 * Decode one bit. In some versions, this function has been splitted in three
491 * functions so that the compiler is supposed to be able to more easily avoid
492 * an extra branch. In this particular version of the LZMA decoder, this
493 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
494 * on x86). Using a non-splitted version results in nicer looking code too.
495 *
496 * NOTE: This must return an int. Do not make it return a bool or the speed
497 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
498 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
499 */
500static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob)
501{
502 uint32_t bound;
503 int bit;
504
505 rc_normalize(rc);
506 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
507 if (rc->code < bound) {
508 rc->range = bound;
509 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
510 bit = 0;
511 } else {
512 rc->range -= bound;
513 rc->code -= bound;
514 *prob -= *prob >> RC_MOVE_BITS;
515 bit = 1;
516 }
517
518 return bit;
519}
520
521/* Decode a bittree starting from the most significant bit. */
522static __always_inline uint32_t XZ_FUNC rc_bittree(
523 struct rc_dec *rc, uint16_t *probs, uint32_t limit)
524{
525 uint32_t symbol = 1;
526
527 do {
528 if (rc_bit(rc, &probs[symbol]))
529 symbol = (symbol << 1) + 1;
530 else
531 symbol <<= 1;
532 } while (symbol < limit);
533
534 return symbol;
535}
536
537/* Decode a bittree starting from the least significant bit. */
538static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc,
539 uint16_t *probs, uint32_t *dest, uint32_t limit)
540{
541 uint32_t symbol = 1;
542 uint32_t i = 0;
543
544 do {
545 if (rc_bit(rc, &probs[symbol])) {
546 symbol = (symbol << 1) + 1;
547 *dest += 1 << i;
548 } else {
549 symbol <<= 1;
550 }
551 } while (++i < limit);
552}
553
554/* Decode direct bits (fixed fifty-fifty probability) */
555static inline void XZ_FUNC rc_direct(
556 struct rc_dec *rc, uint32_t *dest, uint32_t limit)
557{
558 uint32_t mask;
559
560 do {
561 rc_normalize(rc);
562 rc->range >>= 1;
563 rc->code -= rc->range;
564 mask = (uint32_t)0 - (rc->code >> 31);
565 rc->code += rc->range & mask;
566 *dest = (*dest << 1) + (mask + 1);
567 } while (--limit > 0);
568}
569
570/********
571 * LZMA *
572 ********/
573
574/* Get pointer to literal coder probability array. */
575static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s)
576{
577 uint32_t prev_byte = dict_get(&s->dict, 0);
578 uint32_t low = prev_byte >> (8 - s->lzma.lc);
579 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
580 return s->lzma.literal[low + high];
581}
582
583/* Decode a literal (one 8-bit byte) */
584static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s)
585{
586 uint16_t *probs;
587 uint32_t symbol;
588 uint32_t match_byte;
589 uint32_t match_bit;
590 uint32_t offset;
591 uint32_t i;
592
593 probs = lzma_literal_probs(s);
594
595 if (lzma_state_is_literal(s->lzma.state)) {
596 symbol = rc_bittree(&s->rc, probs, 0x100);
597 } else {
598 symbol = 1;
599 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
600 offset = 0x100;
601
602 do {
603 match_bit = match_byte & offset;
604 match_byte <<= 1;
605 i = offset + match_bit + symbol;
606
607 if (rc_bit(&s->rc, &probs[i])) {
608 symbol = (symbol << 1) + 1;
609 offset &= match_bit;
610 } else {
611 symbol <<= 1;
612 offset &= ~match_bit;
613 }
614 } while (symbol < 0x100);
615 }
616
617 dict_put(&s->dict, (uint8_t)symbol);
618 lzma_state_literal(&s->lzma.state);
619}
620
621/* Decode the length of the match into s->lzma.len. */
622static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
623 uint32_t pos_state)
624{
625 uint16_t *probs;
626 uint32_t limit;
627
628 if (!rc_bit(&s->rc, &l->choice)) {
629 probs = l->low[pos_state];
630 limit = LEN_LOW_SYMBOLS;
631 s->lzma.len = MATCH_LEN_MIN;
632 } else {
633 if (!rc_bit(&s->rc, &l->choice2)) {
634 probs = l->mid[pos_state];
635 limit = LEN_MID_SYMBOLS;
636 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
637 } else {
638 probs = l->high;
639 limit = LEN_HIGH_SYMBOLS;
640 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
641 + LEN_MID_SYMBOLS;
642 }
643 }
644
645 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
646}
647
648/* Decode a match. The distance will be stored in s->lzma.rep0. */
649static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
650{
651 uint16_t *probs;
652 uint32_t dist_slot;
653 uint32_t limit;
654
655 lzma_state_match(&s->lzma.state);
656
657 s->lzma.rep3 = s->lzma.rep2;
658 s->lzma.rep2 = s->lzma.rep1;
659 s->lzma.rep1 = s->lzma.rep0;
660
661 lzma_len(s, &s->lzma.match_len_dec, pos_state);
662
663 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
664 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
665
666 if (dist_slot < DIST_MODEL_START) {
667 s->lzma.rep0 = dist_slot;
668 } else {
669 limit = (dist_slot >> 1) - 1;
670 s->lzma.rep0 = 2 + (dist_slot & 1);
671
672 if (dist_slot < DIST_MODEL_END) {
673 s->lzma.rep0 <<= limit;
674 probs = s->lzma.dist_special + s->lzma.rep0
675 - dist_slot - 1;
676 rc_bittree_reverse(&s->rc, probs,
677 &s->lzma.rep0, limit);
678 } else {
679 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
680 s->lzma.rep0 <<= ALIGN_BITS;
681 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
682 &s->lzma.rep0, ALIGN_BITS);
683 }
684 }
685}
686
687/*
688 * Decode a repeated match. The distance is one of the four most recently
689 * seen matches. The distance will be stored in s->lzma.rep0.
690 */
691static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
692{
693 uint32_t tmp;
694
695 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
696 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
697 s->lzma.state][pos_state])) {
698 lzma_state_short_rep(&s->lzma.state);
699 s->lzma.len = 1;
700 return;
701 }
702 } else {
703 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
704 tmp = s->lzma.rep1;
705 } else {
706 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
707 tmp = s->lzma.rep2;
708 } else {
709 tmp = s->lzma.rep3;
710 s->lzma.rep3 = s->lzma.rep2;
711 }
712
713 s->lzma.rep2 = s->lzma.rep1;
714 }
715
716 s->lzma.rep1 = s->lzma.rep0;
717 s->lzma.rep0 = tmp;
718 }
719
720 lzma_state_long_rep(&s->lzma.state);
721 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
722}
723
724/* LZMA decoder core */
725static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s)
726{
727 uint32_t pos_state;
728
729 /*
730 * If the dictionary was reached during the previous call, try to
731 * finish the possibly pending repeat in the dictionary.
732 */
733 if (dict_has_space(&s->dict) && s->lzma.len > 0)
734 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
735
736 /*
737 * Decode more LZMA symbols. One iteration may consume up to
738 * LZMA_IN_REQUIRED - 1 bytes.
739 */
740 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
741 pos_state = s->dict.pos & s->lzma.pos_mask;
742
743 if (!rc_bit(&s->rc, &s->lzma.is_match[
744 s->lzma.state][pos_state])) {
745 lzma_literal(s);
746 } else {
747 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
748 lzma_rep_match(s, pos_state);
749 else
750 lzma_match(s, pos_state);
751
752 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
753 return false;
754 }
755 }
756
757 /*
758 * Having the range decoder always normalized when we are outside
759 * this function makes it easier to correctly handle end of the chunk.
760 */
761 rc_normalize(&s->rc);
762
763 return true;
764}
765
766/*
767 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
768 * here, because LZMA state may be reset without resetting the dictionary.
769 */
770static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s)
771{
772 uint16_t *probs;
773 size_t i;
774
775 s->lzma.state = STATE_LIT_LIT;
776 s->lzma.rep0 = 0;
777 s->lzma.rep1 = 0;
778 s->lzma.rep2 = 0;
779 s->lzma.rep3 = 0;
780
781 /*
782 * All probabilities are initialized to the same value. This hack
783 * makes the code smaller by avoiding a separate loop for each
784 * probability array.
785 *
786 * This could be optimized so that only that part of literal
787 * probabilities that are actually required. In the common case
788 * we would write 12 KiB less.
789 */
790 probs = s->lzma.is_match[0];
791 for (i = 0; i < PROBS_TOTAL; ++i)
792 probs[i] = RC_BIT_MODEL_TOTAL / 2;
793
794 rc_reset(&s->rc);
795}
796
797/*
798 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
799 * from the decoded lp and pb values. On success, the LZMA decoder state is
800 * reset and true is returned.
801 */
802static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
803{
804 if (props > (4 * 5 + 4) * 9 + 8)
805 return false;
806
807 s->lzma.pos_mask = 0;
808 while (props >= 9 * 5) {
809 props -= 9 * 5;
810 ++s->lzma.pos_mask;
811 }
812
813 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
814
815 s->lzma.literal_pos_mask = 0;
816 while (props >= 9) {
817 props -= 9;
818 ++s->lzma.literal_pos_mask;
819 }
820
821 s->lzma.lc = props;
822
823 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
824 return false;
825
826 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
827
828 lzma_reset(s);
829
830 return true;
831}
832
833/*********
834 * LZMA2 *
835 *********/
836
837/*
838 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
839 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
840 * wrapper function takes care of making the LZMA decoder's assumption safe.
841 *
842 * As long as there is plenty of input left to be decoded in the current LZMA
843 * chunk, we decode directly from the caller-supplied input buffer until
844 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
845 * s->temp.buf, which (hopefully) gets filled on the next call to this
846 * function. We decode a few bytes from the temporary buffer so that we can
847 * continue decoding from the caller-supplied input buffer again.
848 */
849static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
850{
851 size_t in_avail;
852 uint32_t tmp;
853
854 in_avail = b->in_size - b->in_pos;
855 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
856 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
857 if (tmp > s->lzma2.compressed - s->temp.size)
858 tmp = s->lzma2.compressed - s->temp.size;
859 if (tmp > in_avail)
860 tmp = in_avail;
861
862 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
863
864 if (s->temp.size + tmp == s->lzma2.compressed) {
865 memzero(s->temp.buf + s->temp.size + tmp,
866 sizeof(s->temp.buf)
867 - s->temp.size - tmp);
868 s->rc.in_limit = s->temp.size + tmp;
869 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
870 s->temp.size += tmp;
871 b->in_pos += tmp;
872 return true;
873 } else {
874 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
875 }
876
877 s->rc.in = s->temp.buf;
878 s->rc.in_pos = 0;
879
880 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
881 return false;
882
883 s->lzma2.compressed -= s->rc.in_pos;
884
885 if (s->rc.in_pos < s->temp.size) {
886 s->temp.size -= s->rc.in_pos;
887 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
888 s->temp.size);
889 return true;
890 }
891
892 b->in_pos += s->rc.in_pos - s->temp.size;
893 s->temp.size = 0;
894 }
895
896 in_avail = b->in_size - b->in_pos;
897 if (in_avail >= LZMA_IN_REQUIRED) {
898 s->rc.in = b->in;
899 s->rc.in_pos = b->in_pos;
900
901 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
902 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
903 else
904 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
905
906 if (!lzma_main(s))
907 return false;
908
909 in_avail = s->rc.in_pos - b->in_pos;
910 if (in_avail > s->lzma2.compressed)
911 return false;
912
913 s->lzma2.compressed -= in_avail;
914 b->in_pos = s->rc.in_pos;
915 }
916
917 in_avail = b->in_size - b->in_pos;
918 if (in_avail < LZMA_IN_REQUIRED) {
919 if (in_avail > s->lzma2.compressed)
920 in_avail = s->lzma2.compressed;
921
922 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
923 s->temp.size = in_avail;
924 b->in_pos += in_avail;
925 }
926
927 return true;
928}
929
930/*
931 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
932 * decoding or copying of uncompressed chunks to other functions.
933 */
934XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run(
935 struct xz_dec_lzma2 *s, struct xz_buf *b)
936{
937 uint32_t tmp;
938
939 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
940 switch (s->lzma2.sequence) {
941 case SEQ_CONTROL:
942 /*
943 * LZMA2 control byte
944 *
945 * Exact values:
946 * 0x00 End marker
947 * 0x01 Dictionary reset followed by
948 * an uncompressed chunk
949 * 0x02 Uncompressed chunk (no dictionary reset)
950 *
951 * Highest three bits (s->control & 0xE0):
952 * 0xE0 Dictionary reset, new properties and state
953 * reset, followed by LZMA compressed chunk
954 * 0xC0 New properties and state reset, followed
955 * by LZMA compressed chunk (no dictionary
956 * reset)
957 * 0xA0 State reset using old properties,
958 * followed by LZMA compressed chunk (no
959 * dictionary reset)
960 * 0x80 LZMA chunk (no dictionary or state reset)
961 *
962 * For LZMA compressed chunks, the lowest five bits
963 * (s->control & 1F) are the highest bits of the
964 * uncompressed size (bits 16-20).
965 *
966 * A new LZMA2 stream must begin with a dictionary
967 * reset. The first LZMA chunk must set new
968 * properties and reset the LZMA state.
969 *
970 * Values that don't match anything described above
971 * are invalid and we return XZ_DATA_ERROR.
972 */
973 tmp = b->in[b->in_pos++];
974
975 if (tmp >= 0xE0 || tmp == 0x01) {
976 s->lzma2.need_props = true;
977 s->lzma2.need_dict_reset = false;
978 dict_reset(&s->dict, b);
979 } else if (s->lzma2.need_dict_reset) {
980 return XZ_DATA_ERROR;
981 }
982
983 if (tmp >= 0x80) {
984 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
985 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
986
987 if (tmp >= 0xC0) {
988 /*
989 * When there are new properties,
990 * state reset is done at
991 * SEQ_PROPERTIES.
992 */
993 s->lzma2.need_props = false;
994 s->lzma2.next_sequence
995 = SEQ_PROPERTIES;
996
997 } else if (s->lzma2.need_props) {
998 return XZ_DATA_ERROR;
999
1000 } else {
1001 s->lzma2.next_sequence
1002 = SEQ_LZMA_PREPARE;
1003 if (tmp >= 0xA0)
1004 lzma_reset(s);
1005 }
1006 } else {
1007 if (tmp == 0x00)
1008 return XZ_STREAM_END;
1009
1010 if (tmp > 0x02)
1011 return XZ_DATA_ERROR;
1012
1013 s->lzma2.sequence = SEQ_COMPRESSED_0;
1014 s->lzma2.next_sequence = SEQ_COPY;
1015 }
1016
1017 break;
1018
1019 case SEQ_UNCOMPRESSED_1:
1020 s->lzma2.uncompressed
1021 += (uint32_t)b->in[b->in_pos++] << 8;
1022 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1023 break;
1024
1025 case SEQ_UNCOMPRESSED_2:
1026 s->lzma2.uncompressed
1027 += (uint32_t)b->in[b->in_pos++] + 1;
1028 s->lzma2.sequence = SEQ_COMPRESSED_0;
1029 break;
1030
1031 case SEQ_COMPRESSED_0:
1032 s->lzma2.compressed
1033 = (uint32_t)b->in[b->in_pos++] << 8;
1034 s->lzma2.sequence = SEQ_COMPRESSED_1;
1035 break;
1036
1037 case SEQ_COMPRESSED_1:
1038 s->lzma2.compressed
1039 += (uint32_t)b->in[b->in_pos++] + 1;
1040 s->lzma2.sequence = s->lzma2.next_sequence;
1041 break;
1042
1043 case SEQ_PROPERTIES:
1044 if (!lzma_props(s, b->in[b->in_pos++]))
1045 return XZ_DATA_ERROR;
1046
1047 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1048
1049 case SEQ_LZMA_PREPARE:
1050 if (s->lzma2.compressed < RC_INIT_BYTES)
1051 return XZ_DATA_ERROR;
1052
1053 if (!rc_read_init(&s->rc, b))
1054 return XZ_OK;
1055
1056 s->lzma2.compressed -= RC_INIT_BYTES;
1057 s->lzma2.sequence = SEQ_LZMA_RUN;
1058
1059 case SEQ_LZMA_RUN:
1060 /*
1061 * Set dictionary limit to indicate how much we want
1062 * to be encoded at maximum. Decode new data into the
1063 * dictionary. Flush the new data from dictionary to
1064 * b->out. Check if we finished decoding this chunk.
1065 * In case the dictionary got full but we didn't fill
1066 * the output buffer yet, we may run this loop
1067 * multiple times without changing s->lzma2.sequence.
1068 */
1069 dict_limit(&s->dict, min_t(size_t,
1070 b->out_size - b->out_pos,
1071 s->lzma2.uncompressed));
1072 if (!lzma2_lzma(s, b))
1073 return XZ_DATA_ERROR;
1074
1075 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1076
1077 if (s->lzma2.uncompressed == 0) {
1078 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1079 || !rc_is_finished(&s->rc))
1080 return XZ_DATA_ERROR;
1081
1082 rc_reset(&s->rc);
1083 s->lzma2.sequence = SEQ_CONTROL;
1084
1085 } else if (b->out_pos == b->out_size
1086 || (b->in_pos == b->in_size
1087 && s->temp.size
1088 < s->lzma2.compressed)) {
1089 return XZ_OK;
1090 }
1091
1092 break;
1093
1094 case SEQ_COPY:
1095 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1096 if (s->lzma2.compressed > 0)
1097 return XZ_OK;
1098
1099 s->lzma2.sequence = SEQ_CONTROL;
1100 break;
1101 }
1102 }
1103
1104 return XZ_OK;
1105}
1106
1107XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
1108 enum xz_mode mode, uint32_t dict_max)
1109{
1110 struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1111 if (s == NULL)
1112 return NULL;
1113
1114 s->dict.mode = mode;
1115 s->dict.size_max = dict_max;
1116
1117 if (DEC_IS_PREALLOC(mode)) {
1118 s->dict.buf = vmalloc(dict_max);
1119 if (s->dict.buf == NULL) {
1120 kfree(s);
1121 return NULL;
1122 }
1123 } else if (DEC_IS_DYNALLOC(mode)) {
1124 s->dict.buf = NULL;
1125 s->dict.allocated = 0;
1126 }
1127
1128 return s;
1129}
1130
1131XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
1132 struct xz_dec_lzma2 *s, uint8_t props)
1133{
1134 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1135 if (props > 39)
1136 return XZ_OPTIONS_ERROR;
1137
1138 s->dict.size = 2 + (props & 1);
1139 s->dict.size <<= (props >> 1) + 11;
1140
1141 if (DEC_IS_MULTI(s->dict.mode)) {
1142 if (s->dict.size > s->dict.size_max)
1143 return XZ_MEMLIMIT_ERROR;
1144
1145 s->dict.end = s->dict.size;
1146
1147 if (DEC_IS_DYNALLOC(s->dict.mode)) {
1148 if (s->dict.allocated < s->dict.size) {
1149 vfree(s->dict.buf);
1150 s->dict.buf = vmalloc(s->dict.size);
1151 if (s->dict.buf == NULL) {
1152 s->dict.allocated = 0;
1153 return XZ_MEM_ERROR;
1154 }
1155 }
1156 }
1157 }
1158
1159 s->lzma.len = 0;
1160
1161 s->lzma2.sequence = SEQ_CONTROL;
1162 s->lzma2.need_dict_reset = true;
1163
1164 s->temp.size = 0;
1165
1166 return XZ_OK;
1167}
1168
1169XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1170{
1171 if (DEC_IS_MULTI(s->dict.mode))
1172 vfree(s->dict.buf);
1173
1174 kfree(s);
1175}
diff --git a/archival/libarchive/unxz/xz_dec_stream.c b/archival/libarchive/unxz/xz_dec_stream.c
new file mode 100644
index 000000000..bdcbf1ba3
--- /dev/null
+++ b/archival/libarchive/unxz/xz_dec_stream.c
@@ -0,0 +1,822 @@
1/*
2 * .xz Stream decoder
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include "xz_private.h"
11#include "xz_stream.h"
12
13/* Hash used to validate the Index field */
14struct xz_dec_hash {
15 vli_type unpadded;
16 vli_type uncompressed;
17 uint32_t crc32;
18};
19
20struct xz_dec {
21 /* Position in dec_main() */
22 enum {
23 SEQ_STREAM_HEADER,
24 SEQ_BLOCK_START,
25 SEQ_BLOCK_HEADER,
26 SEQ_BLOCK_UNCOMPRESS,
27 SEQ_BLOCK_PADDING,
28 SEQ_BLOCK_CHECK,
29 SEQ_INDEX,
30 SEQ_INDEX_PADDING,
31 SEQ_INDEX_CRC32,
32 SEQ_STREAM_FOOTER
33 } sequence;
34
35 /* Position in variable-length integers and Check fields */
36 uint32_t pos;
37
38 /* Variable-length integer decoded by dec_vli() */
39 vli_type vli;
40
41 /* Saved in_pos and out_pos */
42 size_t in_start;
43 size_t out_start;
44
45 /* CRC32 value in Block or Index */
46 uint32_t crc32;
47
48 /* Type of the integrity check calculated from uncompressed data */
49 enum xz_check check_type;
50
51 /* Operation mode */
52 enum xz_mode mode;
53
54 /*
55 * True if the next call to xz_dec_run() is allowed to return
56 * XZ_BUF_ERROR.
57 */
58 bool allow_buf_error;
59
60 /* Information stored in Block Header */
61 struct {
62 /*
63 * Value stored in the Compressed Size field, or
64 * VLI_UNKNOWN if Compressed Size is not present.
65 */
66 vli_type compressed;
67
68 /*
69 * Value stored in the Uncompressed Size field, or
70 * VLI_UNKNOWN if Uncompressed Size is not present.
71 */
72 vli_type uncompressed;
73
74 /* Size of the Block Header field */
75 uint32_t size;
76 } block_header;
77
78 /* Information collected when decoding Blocks */
79 struct {
80 /* Observed compressed size of the current Block */
81 vli_type compressed;
82
83 /* Observed uncompressed size of the current Block */
84 vli_type uncompressed;
85
86 /* Number of Blocks decoded so far */
87 vli_type count;
88
89 /*
90 * Hash calculated from the Block sizes. This is used to
91 * validate the Index field.
92 */
93 struct xz_dec_hash hash;
94 } block;
95
96 /* Variables needed when verifying the Index field */
97 struct {
98 /* Position in dec_index() */
99 enum {
100 SEQ_INDEX_COUNT,
101 SEQ_INDEX_UNPADDED,
102 SEQ_INDEX_UNCOMPRESSED
103 } sequence;
104
105 /* Size of the Index in bytes */
106 vli_type size;
107
108 /* Number of Records (matches block.count in valid files) */
109 vli_type count;
110
111 /*
112 * Hash calculated from the Records (matches block.hash in
113 * valid files).
114 */
115 struct xz_dec_hash hash;
116 } index;
117
118 /*
119 * Temporary buffer needed to hold Stream Header, Block Header,
120 * and Stream Footer. The Block Header is the biggest (1 KiB)
121 * so we reserve space according to that. buf[] has to be aligned
122 * to a multiple of four bytes; the size_t variables before it
123 * should guarantee this.
124 */
125 struct {
126 size_t pos;
127 size_t size;
128 uint8_t buf[1024];
129 } temp;
130
131 struct xz_dec_lzma2 *lzma2;
132
133#ifdef XZ_DEC_BCJ
134 struct xz_dec_bcj *bcj;
135 bool bcj_active;
136#endif
137};
138
139#ifdef XZ_DEC_ANY_CHECK
140/* Sizes of the Check field with different Check IDs */
141static const uint8_t check_sizes[16] = {
142 0,
143 4, 4, 4,
144 8, 8, 8,
145 16, 16, 16,
146 32, 32, 32,
147 64, 64, 64
148};
149#endif
150
151/*
152 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
153 * must have set s->temp.pos to indicate how much data we are supposed
154 * to copy into s->temp.buf. Return true once s->temp.pos has reached
155 * s->temp.size.
156 */
157static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b)
158{
159 size_t copy_size = min_t(size_t,
160 b->in_size - b->in_pos, s->temp.size - s->temp.pos);
161
162 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
163 b->in_pos += copy_size;
164 s->temp.pos += copy_size;
165
166 if (s->temp.pos == s->temp.size) {
167 s->temp.pos = 0;
168 return true;
169 }
170
171 return false;
172}
173
174/* Decode a variable-length integer (little-endian base-128 encoding) */
175static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s,
176 const uint8_t *in, size_t *in_pos, size_t in_size)
177{
178 uint8_t byte;
179
180 if (s->pos == 0)
181 s->vli = 0;
182
183 while (*in_pos < in_size) {
184 byte = in[*in_pos];
185 ++*in_pos;
186
187 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
188
189 if ((byte & 0x80) == 0) {
190 /* Don't allow non-minimal encodings. */
191 if (byte == 0 && s->pos != 0)
192 return XZ_DATA_ERROR;
193
194 s->pos = 0;
195 return XZ_STREAM_END;
196 }
197
198 s->pos += 7;
199 if (s->pos == 7 * VLI_BYTES_MAX)
200 return XZ_DATA_ERROR;
201 }
202
203 return XZ_OK;
204}
205
206/*
207 * Decode the Compressed Data field from a Block. Update and validate
208 * the observed compressed and uncompressed sizes of the Block so that
209 * they don't exceed the values possibly stored in the Block Header
210 * (validation assumes that no integer overflow occurs, since vli_type
211 * is normally uint64_t). Update the CRC32 if presence of the CRC32
212 * field was indicated in Stream Header.
213 *
214 * Once the decoding is finished, validate that the observed sizes match
215 * the sizes possibly stored in the Block Header. Update the hash and
216 * Block count, which are later used to validate the Index field.
217 */
218static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
219{
220 enum xz_ret ret;
221
222 s->in_start = b->in_pos;
223 s->out_start = b->out_pos;
224
225#ifdef XZ_DEC_BCJ
226 if (s->bcj_active)
227 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
228 else
229#endif
230 ret = xz_dec_lzma2_run(s->lzma2, b);
231
232 s->block.compressed += b->in_pos - s->in_start;
233 s->block.uncompressed += b->out_pos - s->out_start;
234
235 /*
236 * There is no need to separately check for VLI_UNKNOWN, since
237 * the observed sizes are always smaller than VLI_UNKNOWN.
238 */
239 if (s->block.compressed > s->block_header.compressed
240 || s->block.uncompressed
241 > s->block_header.uncompressed)
242 return XZ_DATA_ERROR;
243
244 if (s->check_type == XZ_CHECK_CRC32)
245 s->crc32 = xz_crc32(b->out + s->out_start,
246 b->out_pos - s->out_start, s->crc32);
247
248 if (ret == XZ_STREAM_END) {
249 if (s->block_header.compressed != VLI_UNKNOWN
250 && s->block_header.compressed
251 != s->block.compressed)
252 return XZ_DATA_ERROR;
253
254 if (s->block_header.uncompressed != VLI_UNKNOWN
255 && s->block_header.uncompressed
256 != s->block.uncompressed)
257 return XZ_DATA_ERROR;
258
259 s->block.hash.unpadded += s->block_header.size
260 + s->block.compressed;
261
262#ifdef XZ_DEC_ANY_CHECK
263 s->block.hash.unpadded += check_sizes[s->check_type];
264#else
265 if (s->check_type == XZ_CHECK_CRC32)
266 s->block.hash.unpadded += 4;
267#endif
268
269 s->block.hash.uncompressed += s->block.uncompressed;
270 s->block.hash.crc32 = xz_crc32(
271 (const uint8_t *)&s->block.hash,
272 sizeof(s->block.hash), s->block.hash.crc32);
273
274 ++s->block.count;
275 }
276
277 return ret;
278}
279
280/* Update the Index size and the CRC32 value. */
281static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
282{
283 size_t in_used = b->in_pos - s->in_start;
284 s->index.size += in_used;
285 s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
286}
287
288/*
289 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
290 * fields from the Index field. That is, Index Padding and CRC32 are not
291 * decoded by this function.
292 *
293 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
294 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
295 */
296static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
297{
298 enum xz_ret ret;
299
300 do {
301 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
302 if (ret != XZ_STREAM_END) {
303 index_update(s, b);
304 return ret;
305 }
306
307 switch (s->index.sequence) {
308 case SEQ_INDEX_COUNT:
309 s->index.count = s->vli;
310
311 /*
312 * Validate that the Number of Records field
313 * indicates the same number of Records as
314 * there were Blocks in the Stream.
315 */
316 if (s->index.count != s->block.count)
317 return XZ_DATA_ERROR;
318
319 s->index.sequence = SEQ_INDEX_UNPADDED;
320 break;
321
322 case SEQ_INDEX_UNPADDED:
323 s->index.hash.unpadded += s->vli;
324 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
325 break;
326
327 case SEQ_INDEX_UNCOMPRESSED:
328 s->index.hash.uncompressed += s->vli;
329 s->index.hash.crc32 = xz_crc32(
330 (const uint8_t *)&s->index.hash,
331 sizeof(s->index.hash),
332 s->index.hash.crc32);
333 --s->index.count;
334 s->index.sequence = SEQ_INDEX_UNPADDED;
335 break;
336 }
337 } while (s->index.count > 0);
338
339 return XZ_STREAM_END;
340}
341
342/*
343 * Validate that the next four input bytes match the value of s->crc32.
344 * s->pos must be zero when starting to validate the first byte.
345 */
346static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
347{
348 do {
349 if (b->in_pos == b->in_size)
350 return XZ_OK;
351
352 if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
353 return XZ_DATA_ERROR;
354
355 s->pos += 8;
356
357 } while (s->pos < 32);
358
359 s->crc32 = 0;
360 s->pos = 0;
361
362 return XZ_STREAM_END;
363}
364
365#ifdef XZ_DEC_ANY_CHECK
366/*
367 * Skip over the Check field when the Check ID is not supported.
368 * Returns true once the whole Check field has been skipped over.
369 */
370static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b)
371{
372 while (s->pos < check_sizes[s->check_type]) {
373 if (b->in_pos == b->in_size)
374 return false;
375
376 ++b->in_pos;
377 ++s->pos;
378 }
379
380 s->pos = 0;
381
382 return true;
383}
384#endif
385
386/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
387static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
388{
389 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
390 return XZ_FORMAT_ERROR;
391
392 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
393 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
394 return XZ_DATA_ERROR;
395
396 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
397 return XZ_OPTIONS_ERROR;
398
399 /*
400 * Of integrity checks, we support only none (Check ID = 0) and
401 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
402 * we will accept other check types too, but then the check won't
403 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
404 */
405 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
406
407#ifdef XZ_DEC_ANY_CHECK
408 if (s->check_type > XZ_CHECK_MAX)
409 return XZ_OPTIONS_ERROR;
410
411 if (s->check_type > XZ_CHECK_CRC32)
412 return XZ_UNSUPPORTED_CHECK;
413#else
414 if (s->check_type > XZ_CHECK_CRC32)
415 return XZ_OPTIONS_ERROR;
416#endif
417
418 return XZ_OK;
419}
420
421/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
422static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
423{
424 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
425 return XZ_DATA_ERROR;
426
427 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
428 return XZ_DATA_ERROR;
429
430 /*
431 * Validate Backward Size. Note that we never added the size of the
432 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
433 * instead of s->index.size / 4 - 1.
434 */
435 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
436 return XZ_DATA_ERROR;
437
438 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
439 return XZ_DATA_ERROR;
440
441 /*
442 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
443 * for the caller.
444 */
445 return XZ_STREAM_END;
446}
447
448/* Decode the Block Header and initialize the filter chain. */
449static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
450{
451 enum xz_ret ret;
452
453 /*
454 * Validate the CRC32. We know that the temp buffer is at least
455 * eight bytes so this is safe.
456 */
457 s->temp.size -= 4;
458 if (xz_crc32(s->temp.buf, s->temp.size, 0)
459 != get_le32(s->temp.buf + s->temp.size))
460 return XZ_DATA_ERROR;
461
462 s->temp.pos = 2;
463
464 /*
465 * Catch unsupported Block Flags. We support only one or two filters
466 * in the chain, so we catch that with the same test.
467 */
468#ifdef XZ_DEC_BCJ
469 if (s->temp.buf[1] & 0x3E)
470#else
471 if (s->temp.buf[1] & 0x3F)
472#endif
473 return XZ_OPTIONS_ERROR;
474
475 /* Compressed Size */
476 if (s->temp.buf[1] & 0x40) {
477 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
478 != XZ_STREAM_END)
479 return XZ_DATA_ERROR;
480
481 s->block_header.compressed = s->vli;
482 } else {
483 s->block_header.compressed = VLI_UNKNOWN;
484 }
485
486 /* Uncompressed Size */
487 if (s->temp.buf[1] & 0x80) {
488 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
489 != XZ_STREAM_END)
490 return XZ_DATA_ERROR;
491
492 s->block_header.uncompressed = s->vli;
493 } else {
494 s->block_header.uncompressed = VLI_UNKNOWN;
495 }
496
497#ifdef XZ_DEC_BCJ
498 /* If there are two filters, the first one must be a BCJ filter. */
499 s->bcj_active = s->temp.buf[1] & 0x01;
500 if (s->bcj_active) {
501 if (s->temp.size - s->temp.pos < 2)
502 return XZ_OPTIONS_ERROR;
503
504 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
505 if (ret != XZ_OK)
506 return ret;
507
508 /*
509 * We don't support custom start offset,
510 * so Size of Properties must be zero.
511 */
512 if (s->temp.buf[s->temp.pos++] != 0x00)
513 return XZ_OPTIONS_ERROR;
514 }
515#endif
516
517 /* Valid Filter Flags always take at least two bytes. */
518 if (s->temp.size - s->temp.pos < 2)
519 return XZ_DATA_ERROR;
520
521 /* Filter ID = LZMA2 */
522 if (s->temp.buf[s->temp.pos++] != 0x21)
523 return XZ_OPTIONS_ERROR;
524
525 /* Size of Properties = 1-byte Filter Properties */
526 if (s->temp.buf[s->temp.pos++] != 0x01)
527 return XZ_OPTIONS_ERROR;
528
529 /* Filter Properties contains LZMA2 dictionary size. */
530 if (s->temp.size - s->temp.pos < 1)
531 return XZ_DATA_ERROR;
532
533 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
534 if (ret != XZ_OK)
535 return ret;
536
537 /* The rest must be Header Padding. */
538 while (s->temp.pos < s->temp.size)
539 if (s->temp.buf[s->temp.pos++] != 0x00)
540 return XZ_OPTIONS_ERROR;
541
542 s->temp.pos = 0;
543 s->block.compressed = 0;
544 s->block.uncompressed = 0;
545
546 return XZ_OK;
547}
548
549static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
550{
551 enum xz_ret ret;
552
553 /*
554 * Store the start position for the case when we are in the middle
555 * of the Index field.
556 */
557 s->in_start = b->in_pos;
558
559 while (true) {
560 switch (s->sequence) {
561 case SEQ_STREAM_HEADER:
562 /*
563 * Stream Header is copied to s->temp, and then
564 * decoded from there. This way if the caller
565 * gives us only little input at a time, we can
566 * still keep the Stream Header decoding code
567 * simple. Similar approach is used in many places
568 * in this file.
569 */
570 if (!fill_temp(s, b))
571 return XZ_OK;
572
573 /*
574 * If dec_stream_header() returns
575 * XZ_UNSUPPORTED_CHECK, it is still possible
576 * to continue decoding if working in multi-call
577 * mode. Thus, update s->sequence before calling
578 * dec_stream_header().
579 */
580 s->sequence = SEQ_BLOCK_START;
581
582 ret = dec_stream_header(s);
583 if (ret != XZ_OK)
584 return ret;
585
586 case SEQ_BLOCK_START:
587 /* We need one byte of input to continue. */
588 if (b->in_pos == b->in_size)
589 return XZ_OK;
590
591 /* See if this is the beginning of the Index field. */
592 if (b->in[b->in_pos] == 0) {
593 s->in_start = b->in_pos++;
594 s->sequence = SEQ_INDEX;
595 break;
596 }
597
598 /*
599 * Calculate the size of the Block Header and
600 * prepare to decode it.
601 */
602 s->block_header.size
603 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
604
605 s->temp.size = s->block_header.size;
606 s->temp.pos = 0;
607 s->sequence = SEQ_BLOCK_HEADER;
608
609 case SEQ_BLOCK_HEADER:
610 if (!fill_temp(s, b))
611 return XZ_OK;
612
613 ret = dec_block_header(s);
614 if (ret != XZ_OK)
615 return ret;
616
617 s->sequence = SEQ_BLOCK_UNCOMPRESS;
618
619 case SEQ_BLOCK_UNCOMPRESS:
620 ret = dec_block(s, b);
621 if (ret != XZ_STREAM_END)
622 return ret;
623
624 s->sequence = SEQ_BLOCK_PADDING;
625
626 case SEQ_BLOCK_PADDING:
627 /*
628 * Size of Compressed Data + Block Padding
629 * must be a multiple of four. We don't need
630 * s->block.compressed for anything else
631 * anymore, so we use it here to test the size
632 * of the Block Padding field.
633 */
634 while (s->block.compressed & 3) {
635 if (b->in_pos == b->in_size)
636 return XZ_OK;
637
638 if (b->in[b->in_pos++] != 0)
639 return XZ_DATA_ERROR;
640
641 ++s->block.compressed;
642 }
643
644 s->sequence = SEQ_BLOCK_CHECK;
645
646 case SEQ_BLOCK_CHECK:
647 if (s->check_type == XZ_CHECK_CRC32) {
648 ret = crc32_validate(s, b);
649 if (ret != XZ_STREAM_END)
650 return ret;
651 }
652#ifdef XZ_DEC_ANY_CHECK
653 else if (!check_skip(s, b)) {
654 return XZ_OK;
655 }
656#endif
657
658 s->sequence = SEQ_BLOCK_START;
659 break;
660
661 case SEQ_INDEX:
662 ret = dec_index(s, b);
663 if (ret != XZ_STREAM_END)
664 return ret;
665
666 s->sequence = SEQ_INDEX_PADDING;
667
668 case SEQ_INDEX_PADDING:
669 while ((s->index.size + (b->in_pos - s->in_start))
670 & 3) {
671 if (b->in_pos == b->in_size) {
672 index_update(s, b);
673 return XZ_OK;
674 }
675
676 if (b->in[b->in_pos++] != 0)
677 return XZ_DATA_ERROR;
678 }
679
680 /* Finish the CRC32 value and Index size. */
681 index_update(s, b);
682
683 /* Compare the hashes to validate the Index field. */
684 if (!memeq(&s->block.hash, &s->index.hash,
685 sizeof(s->block.hash)))
686 return XZ_DATA_ERROR;
687
688 s->sequence = SEQ_INDEX_CRC32;
689
690 case SEQ_INDEX_CRC32:
691 ret = crc32_validate(s, b);
692 if (ret != XZ_STREAM_END)
693 return ret;
694
695 s->temp.size = STREAM_HEADER_SIZE;
696 s->sequence = SEQ_STREAM_FOOTER;
697
698 case SEQ_STREAM_FOOTER:
699 if (!fill_temp(s, b))
700 return XZ_OK;
701
702 return dec_stream_footer(s);
703 }
704 }
705
706 /* Never reached */
707}
708
709/*
710 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
711 * multi-call and single-call decoding.
712 *
713 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
714 * are not going to make any progress anymore. This is to prevent the caller
715 * from calling us infinitely when the input file is truncated or otherwise
716 * corrupt. Since zlib-style API allows that the caller fills the input buffer
717 * only when the decoder doesn't produce any new output, we have to be careful
718 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
719 * after the second consecutive call to xz_dec_run() that makes no progress.
720 *
721 * In single-call mode, if we couldn't decode everything and no error
722 * occurred, either the input is truncated or the output buffer is too small.
723 * Since we know that the last input byte never produces any output, we know
724 * that if all the input was consumed and decoding wasn't finished, the file
725 * must be corrupt. Otherwise the output buffer has to be too small or the
726 * file is corrupt in a way that decoding it produces too big output.
727 *
728 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
729 * their original values. This is because with some filter chains there won't
730 * be any valid uncompressed data in the output buffer unless the decoding
731 * actually succeeds (that's the price to pay of using the output buffer as
732 * the workspace).
733 */
734XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b)
735{
736 size_t in_start;
737 size_t out_start;
738 enum xz_ret ret;
739
740 if (DEC_IS_SINGLE(s->mode))
741 xz_dec_reset(s);
742
743 in_start = b->in_pos;
744 out_start = b->out_pos;
745 ret = dec_main(s, b);
746
747 if (DEC_IS_SINGLE(s->mode)) {
748 if (ret == XZ_OK)
749 ret = b->in_pos == b->in_size
750 ? XZ_DATA_ERROR : XZ_BUF_ERROR;
751
752 if (ret != XZ_STREAM_END) {
753 b->in_pos = in_start;
754 b->out_pos = out_start;
755 }
756
757 } else if (ret == XZ_OK && in_start == b->in_pos
758 && out_start == b->out_pos) {
759 if (s->allow_buf_error)
760 ret = XZ_BUF_ERROR;
761
762 s->allow_buf_error = true;
763 } else {
764 s->allow_buf_error = false;
765 }
766
767 return ret;
768}
769
770XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
771 enum xz_mode mode, uint32_t dict_max)
772{
773 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
774 if (s == NULL)
775 return NULL;
776
777 s->mode = mode;
778
779#ifdef XZ_DEC_BCJ
780 s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
781 if (s->bcj == NULL)
782 goto error_bcj;
783#endif
784
785 s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
786 if (s->lzma2 == NULL)
787 goto error_lzma2;
788
789 xz_dec_reset(s);
790 return s;
791
792error_lzma2:
793#ifdef XZ_DEC_BCJ
794 xz_dec_bcj_end(s->bcj);
795error_bcj:
796#endif
797 kfree(s);
798 return NULL;
799}
800
801XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s)
802{
803 s->sequence = SEQ_STREAM_HEADER;
804 s->allow_buf_error = false;
805 s->pos = 0;
806 s->crc32 = 0;
807 memzero(&s->block, sizeof(s->block));
808 memzero(&s->index, sizeof(s->index));
809 s->temp.pos = 0;
810 s->temp.size = STREAM_HEADER_SIZE;
811}
812
813XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s)
814{
815 if (s != NULL) {
816 xz_dec_lzma2_end(s->lzma2);
817#ifdef XZ_DEC_BCJ
818 xz_dec_bcj_end(s->bcj);
819#endif
820 kfree(s);
821 }
822}
diff --git a/archival/libarchive/unxz/xz_lzma2.h b/archival/libarchive/unxz/xz_lzma2.h
new file mode 100644
index 000000000..47f21afbc
--- /dev/null
+++ b/archival/libarchive/unxz/xz_lzma2.h
@@ -0,0 +1,204 @@
1/*
2 * LZMA2 definitions
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_LZMA2_H
12#define XZ_LZMA2_H
13
14/* Range coder constants */
15#define RC_SHIFT_BITS 8
16#define RC_TOP_BITS 24
17#define RC_TOP_VALUE (1 << RC_TOP_BITS)
18#define RC_BIT_MODEL_TOTAL_BITS 11
19#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
20#define RC_MOVE_BITS 5
21
22/*
23 * Maximum number of position states. A position state is the lowest pb
24 * number of bits of the current uncompressed offset. In some places there
25 * are different sets of probabilities for different position states.
26 */
27#define POS_STATES_MAX (1 << 4)
28
29/*
30 * This enum is used to track which LZMA symbols have occurred most recently
31 * and in which order. This information is used to predict the next symbol.
32 *
33 * Symbols:
34 * - Literal: One 8-bit byte
35 * - Match: Repeat a chunk of data at some distance
36 * - Long repeat: Multi-byte match at a recently seen distance
37 * - Short repeat: One-byte repeat at a recently seen distance
38 *
39 * The symbol names are in from STATE_oldest_older_previous. REP means
40 * either short or long repeated match, and NONLIT means any non-literal.
41 */
42enum lzma_state {
43 STATE_LIT_LIT,
44 STATE_MATCH_LIT_LIT,
45 STATE_REP_LIT_LIT,
46 STATE_SHORTREP_LIT_LIT,
47 STATE_MATCH_LIT,
48 STATE_REP_LIT,
49 STATE_SHORTREP_LIT,
50 STATE_LIT_MATCH,
51 STATE_LIT_LONGREP,
52 STATE_LIT_SHORTREP,
53 STATE_NONLIT_MATCH,
54 STATE_NONLIT_REP
55};
56
57/* Total number of states */
58#define STATES 12
59
60/* The lowest 7 states indicate that the previous state was a literal. */
61#define LIT_STATES 7
62
63/* Indicate that the latest symbol was a literal. */
64static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state)
65{
66 if (*state <= STATE_SHORTREP_LIT_LIT)
67 *state = STATE_LIT_LIT;
68 else if (*state <= STATE_LIT_SHORTREP)
69 *state -= 3;
70 else
71 *state -= 6;
72}
73
74/* Indicate that the latest symbol was a match. */
75static inline void XZ_FUNC lzma_state_match(enum lzma_state *state)
76{
77 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
78}
79
80/* Indicate that the latest state was a long repeated match. */
81static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state)
82{
83 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
84}
85
86/* Indicate that the latest symbol was a short match. */
87static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state)
88{
89 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
90}
91
92/* Test if the previous symbol was a literal. */
93static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state)
94{
95 return state < LIT_STATES;
96}
97
98/* Each literal coder is divided in three sections:
99 * - 0x001-0x0FF: Without match byte
100 * - 0x101-0x1FF: With match byte; match bit is 0
101 * - 0x201-0x2FF: With match byte; match bit is 1
102 *
103 * Match byte is used when the previous LZMA symbol was something else than
104 * a literal (that is, it was some kind of match).
105 */
106#define LITERAL_CODER_SIZE 0x300
107
108/* Maximum number of literal coders */
109#define LITERAL_CODERS_MAX (1 << 4)
110
111/* Minimum length of a match is two bytes. */
112#define MATCH_LEN_MIN 2
113
114/* Match length is encoded with 4, 5, or 10 bits.
115 *
116 * Length Bits
117 * 2-9 4 = Choice=0 + 3 bits
118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
120 */
121#define LEN_LOW_BITS 3
122#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
123#define LEN_MID_BITS 3
124#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
125#define LEN_HIGH_BITS 8
126#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
127#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
128
129/*
130 * Maximum length of a match is 273 which is a result of the encoding
131 * described above.
132 */
133#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
134
135/*
136 * Different sets of probabilities are used for match distances that have
137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
138 * set of probabilities for each length. The matches with longer length
139 * use a shared set of probabilities.
140 */
141#define DIST_STATES 4
142
143/*
144 * Get the index of the appropriate probability array for decoding
145 * the distance slot.
146 */
147static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len)
148{
149 return len < DIST_STATES + MATCH_LEN_MIN
150 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
151}
152
153/*
154 * The highest two bits of a 32-bit match distance are encoded using six bits.
155 * This six-bit value is called a distance slot. This way encoding a 32-bit
156 * value takes 6-36 bits, larger values taking more bits.
157 */
158#define DIST_SLOT_BITS 6
159#define DIST_SLOTS (1 << DIST_SLOT_BITS)
160
161/* Match distances up to 127 are fully encoded using probabilities. Since
162 * the highest two bits (distance slot) are always encoded using six bits,
163 * the distances 0-3 don't need any additional bits to encode, since the
164 * distance slot itself is the same as the actual distance. DIST_MODEL_START
165 * indicates the first distance slot where at least one additional bit is
166 * needed.
167 */
168#define DIST_MODEL_START 4
169
170/*
171 * Match distances greater than 127 are encoded in three pieces:
172 * - distance slot: the highest two bits
173 * - direct bits: 2-26 bits below the highest two bits
174 * - alignment bits: four lowest bits
175 *
176 * Direct bits don't use any probabilities.
177 *
178 * The distance slot value of 14 is for distances 128-191.
179 */
180#define DIST_MODEL_END 14
181
182/* Distance slots that indicate a distance <= 127. */
183#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
184#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
185
186/*
187 * For match distances greater than 127, only the highest two bits and the
188 * lowest four bits (alignment) is encoded using probabilities.
189 */
190#define ALIGN_BITS 4
191#define ALIGN_SIZE (1 << ALIGN_BITS)
192#define ALIGN_MASK (ALIGN_SIZE - 1)
193
194/* Total number of all probability variables */
195#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
196
197/*
198 * LZMA remembers the four most recent match distances. Reusing these
199 * distances tends to take less space than re-encoding the actual
200 * distance value.
201 */
202#define REPS 4
203
204#endif
diff --git a/archival/libarchive/unxz/xz_private.h b/archival/libarchive/unxz/xz_private.h
new file mode 100644
index 000000000..145649a83
--- /dev/null
+++ b/archival/libarchive/unxz/xz_private.h
@@ -0,0 +1,159 @@
1/*
2 * Private includes and definitions
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_PRIVATE_H
11#define XZ_PRIVATE_H
12
13#ifdef __KERNEL__
14 /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
15# ifndef XZ_PREBOOT
16# include <linux/slab.h>
17# include <linux/vmalloc.h>
18# include <linux/string.h>
19# define memeq(a, b, size) (memcmp(a, b, size) == 0)
20# define memzero(buf, size) memset(buf, 0, size)
21# endif
22# include <asm/byteorder.h>
23# include <asm/unaligned.h>
24# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
25 /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */
26# ifndef XZ_IGNORE_KCONFIG
27# ifdef CONFIG_XZ_DEC_X86
28# define XZ_DEC_X86
29# endif
30# ifdef CONFIG_XZ_DEC_POWERPC
31# define XZ_DEC_POWERPC
32# endif
33# ifdef CONFIG_XZ_DEC_IA64
34# define XZ_DEC_IA64
35# endif
36# ifdef CONFIG_XZ_DEC_ARM
37# define XZ_DEC_ARM
38# endif
39# ifdef CONFIG_XZ_DEC_ARMTHUMB
40# define XZ_DEC_ARMTHUMB
41# endif
42# ifdef CONFIG_XZ_DEC_SPARC
43# define XZ_DEC_SPARC
44# endif
45# endif
46# include <linux/xz.h>
47#else
48 /*
49 * For userspace builds, use a separate header to define the required
50 * macros and functions. This makes it easier to adapt the code into
51 * different environments and avoids clutter in the Linux kernel tree.
52 */
53# include "xz_config.h"
54#endif
55
56/* If no specific decoding mode is requested, enable support for all modes. */
57#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
58 && !defined(XZ_DEC_DYNALLOC)
59# define XZ_DEC_SINGLE
60# define XZ_DEC_PREALLOC
61# define XZ_DEC_DYNALLOC
62#endif
63
64/*
65 * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
66 * of the supported modes are enabled, these macros will evaluate to true or
67 * false at compile time and thus allow the compiler to omit unneeded code.
68 */
69#ifdef XZ_DEC_SINGLE
70# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
71#else
72# define DEC_IS_SINGLE(mode) (false)
73#endif
74
75#ifdef XZ_DEC_PREALLOC
76# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
77#else
78# define DEC_IS_PREALLOC(mode) (false)
79#endif
80
81#ifdef XZ_DEC_DYNALLOC
82# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
83#else
84# define DEC_IS_DYNALLOC(mode) (false)
85#endif
86
87#if !defined(XZ_DEC_SINGLE)
88# define DEC_IS_MULTI(mode) (true)
89#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
90# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
91#else
92# define DEC_IS_MULTI(mode) (false)
93#endif
94
95/*
96 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
97 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
98 */
99#ifndef XZ_DEC_BCJ
100# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
101 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
102 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
103 || defined(XZ_DEC_SPARC)
104# define XZ_DEC_BCJ
105# endif
106#endif
107
108/*
109 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
110 * before calling xz_dec_lzma2_run().
111 */
112XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
113 enum xz_mode mode, uint32_t dict_max);
114
115/*
116 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
117 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
118 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
119 * decoder doesn't support.
120 */
121XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
122 struct xz_dec_lzma2 *s, uint8_t props);
123
124/* Decode raw LZMA2 stream from b->in to b->out. */
125XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
126 struct xz_dec_lzma2 *s, struct xz_buf *b);
127
128/* Free the memory allocated for the LZMA2 decoder. */
129XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
130
131#ifdef XZ_DEC_BCJ
132/*
133 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
134 * calling xz_dec_bcj_run().
135 */
136XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call);
137
138/*
139 * Decode the Filter ID of a BCJ filter. This implementation doesn't
140 * support custom start offsets, so no decoding of Filter Properties
141 * is needed. Returns XZ_OK if the given Filter ID is supported.
142 * Otherwise XZ_OPTIONS_ERROR is returned.
143 */
144XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
145 struct xz_dec_bcj *s, uint8_t id);
146
147/*
148 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
149 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
150 * must be called directly.
151 */
152XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
153 struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
154
155/* Free the memory allocated for the BCJ filters. */
156#define xz_dec_bcj_end(s) kfree(s)
157#endif
158
159#endif
diff --git a/archival/libarchive/unxz/xz_stream.h b/archival/libarchive/unxz/xz_stream.h
new file mode 100644
index 000000000..36f2a7cbf
--- /dev/null
+++ b/archival/libarchive/unxz/xz_stream.h
@@ -0,0 +1,57 @@
1/*
2 * Definitions for handling the .xz file format
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_STREAM_H
11#define XZ_STREAM_H
12
13#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
14# include <linux/crc32.h>
15# undef crc32
16# define xz_crc32(buf, size, crc) \
17 (~crc32_le(~(uint32_t)(crc), buf, size))
18#endif
19
20/*
21 * See the .xz file format specification at
22 * http://tukaani.org/xz/xz-file-format.txt
23 * to understand the container format.
24 */
25
26#define STREAM_HEADER_SIZE 12
27
28#define HEADER_MAGIC "\3757zXZ\0"
29#define HEADER_MAGIC_SIZE 6
30
31#define FOOTER_MAGIC "YZ"
32#define FOOTER_MAGIC_SIZE 2
33
34/*
35 * Variable-length integer can hold a 63-bit unsigned integer, or a special
36 * value to indicate that the value is unknown.
37 */
38typedef uint64_t vli_type;
39
40#define VLI_MAX ((vli_type)-1 / 2)
41#define VLI_UNKNOWN ((vli_type)-1)
42
43/* Maximum encoded size of a VLI */
44#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
45
46/* Integrity Check types */
47enum xz_check {
48 XZ_CHECK_NONE = 0,
49 XZ_CHECK_CRC32 = 1,
50 XZ_CHECK_CRC64 = 4,
51 XZ_CHECK_SHA256 = 10
52};
53
54/* Maximum possible Check ID */
55#define XZ_CHECK_MAX 15
56
57#endif