diff options
Diffstat (limited to 'archival/libunarchive/decompress_unlzma.c')
-rw-r--r-- | archival/libunarchive/decompress_unlzma.c | 347 |
1 files changed, 347 insertions, 0 deletions
diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c new file mode 100644 index 000000000..977cb48d0 --- /dev/null +++ b/archival/libunarchive/decompress_unlzma.c | |||
@@ -0,0 +1,347 @@ | |||
1 | /* | ||
2 | * Small lzma deflate implementation. | ||
3 | * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> | ||
4 | * | ||
5 | * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) | ||
6 | * Copyright (C) 1999-2005 Igor Pavlov | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with this library; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include <stdint.h> | ||
24 | #include <unistd.h> | ||
25 | #include <stdio.h> | ||
26 | #include <byteswap.h> | ||
27 | |||
28 | #include "libbb.h" | ||
29 | |||
30 | #include "rangecoder.h" | ||
31 | |||
32 | |||
33 | typedef struct { | ||
34 | uint8_t pos; | ||
35 | uint32_t dict_size; | ||
36 | uint64_t dst_size; | ||
37 | } __attribute__ ((packed)) lzma_header_t; | ||
38 | |||
39 | |||
40 | #define LZMA_BASE_SIZE 1846 | ||
41 | #define LZMA_LIT_SIZE 768 | ||
42 | |||
43 | #define LZMA_NUM_POS_BITS_MAX 4 | ||
44 | |||
45 | #define LZMA_LEN_NUM_LOW_BITS 3 | ||
46 | #define LZMA_LEN_NUM_MID_BITS 3 | ||
47 | #define LZMA_LEN_NUM_HIGH_BITS 8 | ||
48 | |||
49 | #define LZMA_LEN_CHOICE 0 | ||
50 | #define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) | ||
51 | #define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) | ||
52 | #define LZMA_LEN_MID (LZMA_LEN_LOW \ | ||
53 | + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) | ||
54 | #define LZMA_LEN_HIGH (LZMA_LEN_MID \ | ||
55 | +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) | ||
56 | #define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) | ||
57 | |||
58 | #define LZMA_NUM_STATES 12 | ||
59 | #define LZMA_NUM_LIT_STATES 7 | ||
60 | |||
61 | #define LZMA_START_POS_MODEL_INDEX 4 | ||
62 | #define LZMA_END_POS_MODEL_INDEX 14 | ||
63 | #define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) | ||
64 | |||
65 | #define LZMA_NUM_POS_SLOT_BITS 6 | ||
66 | #define LZMA_NUM_LEN_TO_POS_STATES 4 | ||
67 | |||
68 | #define LZMA_NUM_ALIGN_BITS 4 | ||
69 | |||
70 | #define LZMA_MATCH_MIN_LEN 2 | ||
71 | |||
72 | #define LZMA_IS_MATCH 0 | ||
73 | #define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES <<LZMA_NUM_POS_BITS_MAX)) | ||
74 | #define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) | ||
75 | #define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) | ||
76 | #define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) | ||
77 | #define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) | ||
78 | #define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ | ||
79 | + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) | ||
80 | #define LZMA_SPEC_POS (LZMA_POS_SLOT \ | ||
81 | +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) | ||
82 | #define LZMA_ALIGN (LZMA_SPEC_POS \ | ||
83 | + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) | ||
84 | #define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) | ||
85 | #define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) | ||
86 | #define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) | ||
87 | |||
88 | |||
89 | int unlzma(int src_fd, int dst_fd) | ||
90 | { | ||
91 | lzma_header_t header; | ||
92 | int lc, pb, lp; | ||
93 | uint32_t pos_state_mask; | ||
94 | uint32_t literal_pos_mask; | ||
95 | uint32_t pos; | ||
96 | uint16_t *p; | ||
97 | uint16_t *prob; | ||
98 | uint16_t *prob_lit; | ||
99 | int num_bits; | ||
100 | int num_probs; | ||
101 | rc_t rc; | ||
102 | int i, mi; | ||
103 | uint8_t *buffer; | ||
104 | uint8_t previous_byte = 0; | ||
105 | size_t buffer_pos = 0, global_pos = 0; | ||
106 | int len = 0; | ||
107 | int state = 0; | ||
108 | uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; | ||
109 | |||
110 | if (read(src_fd, &header, sizeof(header)) != sizeof(header)) | ||
111 | bb_error_msg_and_die("can't read header"); | ||
112 | |||
113 | if (header.pos >= (9 * 5 * 5)) | ||
114 | bb_error_msg_and_die("bad header"); | ||
115 | mi = header.pos / 9; | ||
116 | lc = header.pos % 9; | ||
117 | pb = mi / 5; | ||
118 | lp = mi % 5; | ||
119 | pos_state_mask = (1 << pb) - 1; | ||
120 | literal_pos_mask = (1 << lp) - 1; | ||
121 | |||
122 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
123 | header.dict_size = bswap_32(header.dict_size); | ||
124 | header.dst_size = bswap_64(header.dst_size); | ||
125 | #endif /* __BYTE_ORDER */ | ||
126 | |||
127 | if (header.dict_size == 0) | ||
128 | header.dict_size = 1; | ||
129 | |||
130 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); | ||
131 | |||
132 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); | ||
133 | p = xmalloc(num_probs * sizeof(*p)); | ||
134 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); | ||
135 | for (i = 0; i < num_probs; i++) | ||
136 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; | ||
137 | |||
138 | rc_init(&rc, src_fd, 0x10000); | ||
139 | |||
140 | while (global_pos + buffer_pos < header.dst_size) { | ||
141 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; | ||
142 | |||
143 | prob = | ||
144 | p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; | ||
145 | if (rc_is_bit_0(&rc, prob)) { | ||
146 | mi = 1; | ||
147 | rc_update_bit_0(&rc, prob); | ||
148 | prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE | ||
149 | * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) | ||
150 | + (previous_byte >> (8 - lc))))); | ||
151 | |||
152 | if (state >= LZMA_NUM_LIT_STATES) { | ||
153 | int match_byte; | ||
154 | |||
155 | pos = buffer_pos - rep0; | ||
156 | while (pos >= header.dict_size) | ||
157 | pos += header.dict_size; | ||
158 | match_byte = buffer[pos]; | ||
159 | do { | ||
160 | int bit; | ||
161 | |||
162 | match_byte <<= 1; | ||
163 | bit = match_byte & 0x100; | ||
164 | prob_lit = prob + 0x100 + bit + mi; | ||
165 | if (rc_get_bit(&rc, prob_lit, &mi)) { | ||
166 | if (!bit) | ||
167 | break; | ||
168 | } else { | ||
169 | if (bit) | ||
170 | break; | ||
171 | } | ||
172 | } while (mi < 0x100); | ||
173 | } | ||
174 | while (mi < 0x100) { | ||
175 | prob_lit = prob + mi; | ||
176 | rc_get_bit(&rc, prob_lit, &mi); | ||
177 | } | ||
178 | previous_byte = (uint8_t) mi; | ||
179 | |||
180 | buffer[buffer_pos++] = previous_byte; | ||
181 | if (buffer_pos == header.dict_size) { | ||
182 | buffer_pos = 0; | ||
183 | global_pos += header.dict_size; | ||
184 | write(dst_fd, buffer, header.dict_size); | ||
185 | } | ||
186 | if (state < 4) | ||
187 | state = 0; | ||
188 | else if (state < 10) | ||
189 | state -= 3; | ||
190 | else | ||
191 | state -= 6; | ||
192 | } else { | ||
193 | int offset; | ||
194 | uint16_t *prob_len; | ||
195 | |||
196 | rc_update_bit_1(&rc, prob); | ||
197 | prob = p + LZMA_IS_REP + state; | ||
198 | if (rc_is_bit_0(&rc, prob)) { | ||
199 | rc_update_bit_0(&rc, prob); | ||
200 | rep3 = rep2; | ||
201 | rep2 = rep1; | ||
202 | rep1 = rep0; | ||
203 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; | ||
204 | prob = p + LZMA_LEN_CODER; | ||
205 | } else { | ||
206 | rc_update_bit_1(&rc, prob); | ||
207 | prob = p + LZMA_IS_REP_G0 + state; | ||
208 | if (rc_is_bit_0(&rc, prob)) { | ||
209 | rc_update_bit_0(&rc, prob); | ||
210 | prob = (p + LZMA_IS_REP_0_LONG | ||
211 | + (state << LZMA_NUM_POS_BITS_MAX) + pos_state); | ||
212 | if (rc_is_bit_0(&rc, prob)) { | ||
213 | rc_update_bit_0(&rc, prob); | ||
214 | |||
215 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; | ||
216 | pos = buffer_pos - rep0; | ||
217 | while (pos >= header.dict_size) | ||
218 | pos += header.dict_size; | ||
219 | previous_byte = buffer[pos]; | ||
220 | buffer[buffer_pos++] = previous_byte; | ||
221 | if (buffer_pos == header.dict_size) { | ||
222 | buffer_pos = 0; | ||
223 | global_pos += header.dict_size; | ||
224 | write(dst_fd, buffer, header.dict_size); | ||
225 | } | ||
226 | continue; | ||
227 | } else { | ||
228 | rc_update_bit_1(&rc, prob); | ||
229 | } | ||
230 | } else { | ||
231 | uint32_t distance; | ||
232 | |||
233 | rc_update_bit_1(&rc, prob); | ||
234 | prob = p + LZMA_IS_REP_G1 + state; | ||
235 | if (rc_is_bit_0(&rc, prob)) { | ||
236 | rc_update_bit_0(&rc, prob); | ||
237 | distance = rep1; | ||
238 | } else { | ||
239 | rc_update_bit_1(&rc, prob); | ||
240 | prob = p + LZMA_IS_REP_G2 + state; | ||
241 | if (rc_is_bit_0(&rc, prob)) { | ||
242 | rc_update_bit_0(&rc, prob); | ||
243 | distance = rep2; | ||
244 | } else { | ||
245 | rc_update_bit_1(&rc, prob); | ||
246 | distance = rep3; | ||
247 | rep3 = rep2; | ||
248 | } | ||
249 | rep2 = rep1; | ||
250 | } | ||
251 | rep1 = rep0; | ||
252 | rep0 = distance; | ||
253 | } | ||
254 | state = state < LZMA_NUM_LIT_STATES ? 8 : 11; | ||
255 | prob = p + LZMA_REP_LEN_CODER; | ||
256 | } | ||
257 | |||
258 | prob_len = prob + LZMA_LEN_CHOICE; | ||
259 | if (rc_is_bit_0(&rc, prob_len)) { | ||
260 | rc_update_bit_0(&rc, prob_len); | ||
261 | prob_len = (prob + LZMA_LEN_LOW | ||
262 | + (pos_state << LZMA_LEN_NUM_LOW_BITS)); | ||
263 | offset = 0; | ||
264 | num_bits = LZMA_LEN_NUM_LOW_BITS; | ||
265 | } else { | ||
266 | rc_update_bit_1(&rc, prob_len); | ||
267 | prob_len = prob + LZMA_LEN_CHOICE_2; | ||
268 | if (rc_is_bit_0(&rc, prob_len)) { | ||
269 | rc_update_bit_0(&rc, prob_len); | ||
270 | prob_len = (prob + LZMA_LEN_MID | ||
271 | + (pos_state << LZMA_LEN_NUM_MID_BITS)); | ||
272 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; | ||
273 | num_bits = LZMA_LEN_NUM_MID_BITS; | ||
274 | } else { | ||
275 | rc_update_bit_1(&rc, prob_len); | ||
276 | prob_len = prob + LZMA_LEN_HIGH; | ||
277 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) | ||
278 | + (1 << LZMA_LEN_NUM_MID_BITS)); | ||
279 | num_bits = LZMA_LEN_NUM_HIGH_BITS; | ||
280 | } | ||
281 | } | ||
282 | rc_bit_tree_decode(&rc, prob_len, num_bits, &len); | ||
283 | len += offset; | ||
284 | |||
285 | if (state < 4) { | ||
286 | int pos_slot; | ||
287 | |||
288 | state += LZMA_NUM_LIT_STATES; | ||
289 | prob = | ||
290 | p + LZMA_POS_SLOT + | ||
291 | ((len < | ||
292 | LZMA_NUM_LEN_TO_POS_STATES ? len : | ||
293 | LZMA_NUM_LEN_TO_POS_STATES - 1) | ||
294 | << LZMA_NUM_POS_SLOT_BITS); | ||
295 | rc_bit_tree_decode(&rc, prob, LZMA_NUM_POS_SLOT_BITS, | ||
296 | &pos_slot); | ||
297 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { | ||
298 | num_bits = (pos_slot >> 1) - 1; | ||
299 | rep0 = 2 | (pos_slot & 1); | ||
300 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { | ||
301 | rep0 <<= num_bits; | ||
302 | prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; | ||
303 | } else { | ||
304 | num_bits -= LZMA_NUM_ALIGN_BITS; | ||
305 | while (num_bits--) | ||
306 | rep0 = (rep0 << 1) | rc_direct_bit(&rc); | ||
307 | prob = p + LZMA_ALIGN; | ||
308 | rep0 <<= LZMA_NUM_ALIGN_BITS; | ||
309 | num_bits = LZMA_NUM_ALIGN_BITS; | ||
310 | } | ||
311 | i = 1; | ||
312 | mi = 1; | ||
313 | while (num_bits--) { | ||
314 | if (rc_get_bit(&rc, prob + mi, &mi)) | ||
315 | rep0 |= i; | ||
316 | i <<= 1; | ||
317 | } | ||
318 | } else | ||
319 | rep0 = pos_slot; | ||
320 | if (++rep0 == 0) | ||
321 | break; | ||
322 | } | ||
323 | |||
324 | len += LZMA_MATCH_MIN_LEN; | ||
325 | |||
326 | do { | ||
327 | pos = buffer_pos - rep0; | ||
328 | while (pos >= header.dict_size) | ||
329 | pos += header.dict_size; | ||
330 | previous_byte = buffer[pos]; | ||
331 | buffer[buffer_pos++] = previous_byte; | ||
332 | if (buffer_pos == header.dict_size) { | ||
333 | buffer_pos = 0; | ||
334 | global_pos += header.dict_size; | ||
335 | write(dst_fd, buffer, header.dict_size); | ||
336 | } | ||
337 | len--; | ||
338 | } while (len != 0 && buffer_pos < header.dst_size); | ||
339 | } | ||
340 | } | ||
341 | |||
342 | write(dst_fd, buffer, buffer_pos); | ||
343 | rc_free(&rc); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /* vi:set ts=4: */ | ||