diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2009-09-15 23:40:08 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2009-09-15 23:40:08 +0200 |
commit | f2c184be835fbcbd04d06fce22783c6a5d37b563 (patch) | |
tree | 62f0f5ca34a5768e0472cd4a7527801018e2af25 | |
parent | ba98603264125aceac59c36a36dfbee0f7f67c7f (diff) | |
download | busybox-w32-f2c184be835fbcbd04d06fce22783c6a5d37b563.tar.gz busybox-w32-f2c184be835fbcbd04d06fce22783c6a5d37b563.tar.bz2 busybox-w32-f2c184be835fbcbd04d06fce22783c6a5d37b563.zip |
unlzma: fixed speedup/shrink by Pascal Bellard (pascal.bellard AT ads-lu.com)
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | archival/Config.in | 4 | ||||
-rw-r--r-- | archival/libunarchive/decompress_unlzma.c | 250 |
2 files changed, 108 insertions, 146 deletions
diff --git a/archival/Config.in b/archival/Config.in index 71b953819..cae7f20bb 100644 --- a/archival/Config.in +++ b/archival/Config.in | |||
@@ -298,8 +298,8 @@ config FEATURE_LZMA_FAST | |||
298 | default n | 298 | default n |
299 | depends on UNLZMA | 299 | depends on UNLZMA |
300 | help | 300 | help |
301 | This option reduces decompression time by about 33% at the cost of | 301 | This option reduces decompression time by about 25% at the cost of |
302 | a 2K bigger binary. | 302 | a 1K bigger binary. |
303 | 303 | ||
304 | config UNZIP | 304 | config UNZIP |
305 | bool "unzip" | 305 | bool "unzip" |
diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c index 33e5cd65d..4478cd2e3 100644 --- a/archival/libunarchive/decompress_unlzma.c +++ b/archival/libunarchive/decompress_unlzma.c | |||
@@ -8,14 +8,15 @@ | |||
8 | * | 8 | * |
9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. | 9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
10 | */ | 10 | */ |
11 | |||
12 | #include "libbb.h" | 11 | #include "libbb.h" |
13 | #include "unarchive.h" | 12 | #include "unarchive.h" |
14 | 13 | ||
15 | #if ENABLE_FEATURE_LZMA_FAST | 14 | #if ENABLE_FEATURE_LZMA_FAST |
16 | # define speed_inline ALWAYS_INLINE | 15 | # define speed_inline ALWAYS_INLINE |
16 | # define size_inline | ||
17 | #else | 17 | #else |
18 | # define speed_inline | 18 | # define speed_inline |
19 | # define size_inline ALWAYS_INLINE | ||
19 | #endif | 20 | #endif |
20 | 21 | ||
21 | 22 | ||
@@ -44,36 +45,48 @@ typedef struct { | |||
44 | #define RC_MODEL_TOTAL_BITS 11 | 45 | #define RC_MODEL_TOTAL_BITS 11 |
45 | 46 | ||
46 | 47 | ||
47 | /* Called twice: once at startup and once in rc_normalize() */ | 48 | /* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ |
48 | static void rc_read(rc_t *rc) | 49 | static size_inline void rc_read(rc_t *rc) |
49 | { | 50 | { |
50 | int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); | 51 | int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); |
52 | //TODO: return -1 instead | ||
53 | //This will make unlzma delete broken unpacked file on unpack errors | ||
51 | if (buffer_size <= 0) | 54 | if (buffer_size <= 0) |
52 | bb_error_msg_and_die("unexpected EOF"); | 55 | bb_error_msg_and_die("unexpected EOF"); |
53 | rc->ptr = RC_BUFFER; | 56 | rc->ptr = RC_BUFFER; |
54 | rc->buffer_end = RC_BUFFER + buffer_size; | 57 | rc->buffer_end = RC_BUFFER + buffer_size; |
55 | } | 58 | } |
56 | 59 | ||
60 | /* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ | ||
61 | static void rc_do_normalize(rc_t *rc) | ||
62 | { | ||
63 | if (rc->ptr >= rc->buffer_end) | ||
64 | rc_read(rc); | ||
65 | rc->range <<= 8; | ||
66 | rc->code = (rc->code << 8) | *rc->ptr++; | ||
67 | } | ||
68 | |||
57 | /* Called once */ | 69 | /* Called once */ |
58 | static rc_t* rc_init(int fd) /*, int buffer_size) */ | 70 | static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ |
59 | { | 71 | { |
60 | int i; | 72 | int i; |
61 | rc_t *rc; | 73 | rc_t *rc; |
62 | 74 | ||
63 | rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); | 75 | rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE); |
64 | 76 | ||
65 | rc->fd = fd; | 77 | rc->fd = fd; |
66 | /* rc->buffer_size = buffer_size; */ | 78 | /* rc->ptr = rc->buffer_end; */ |
67 | rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; | ||
68 | rc->ptr = rc->buffer_end; | ||
69 | 79 | ||
70 | rc->code = 0; | ||
71 | rc->range = 0xFFFFFFFF; | ||
72 | for (i = 0; i < 5; i++) { | 80 | for (i = 0; i < 5; i++) { |
81 | #if ENABLE_FEATURE_LZMA_FAST | ||
73 | if (rc->ptr >= rc->buffer_end) | 82 | if (rc->ptr >= rc->buffer_end) |
74 | rc_read(rc); | 83 | rc_read(rc); |
75 | rc->code = (rc->code << 8) | *rc->ptr++; | 84 | rc->code = (rc->code << 8) | *rc->ptr++; |
85 | #else | ||
86 | rc_do_normalize(rc); | ||
87 | #endif | ||
76 | } | 88 | } |
89 | rc->range = 0xFFFFFFFF; | ||
77 | return rc; | 90 | return rc; |
78 | } | 91 | } |
79 | 92 | ||
@@ -83,14 +96,6 @@ static ALWAYS_INLINE void rc_free(rc_t *rc) | |||
83 | free(rc); | 96 | free(rc); |
84 | } | 97 | } |
85 | 98 | ||
86 | /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ | ||
87 | static void rc_do_normalize(rc_t *rc) | ||
88 | { | ||
89 | if (rc->ptr >= rc->buffer_end) | ||
90 | rc_read(rc); | ||
91 | rc->range <<= 8; | ||
92 | rc->code = (rc->code << 8) | *rc->ptr++; | ||
93 | } | ||
94 | static ALWAYS_INLINE void rc_normalize(rc_t *rc) | 99 | static ALWAYS_INLINE void rc_normalize(rc_t *rc) |
95 | { | 100 | { |
96 | if (rc->range < (1 << RC_TOP_BITS)) { | 101 | if (rc->range < (1 << RC_TOP_BITS)) { |
@@ -98,49 +103,28 @@ static ALWAYS_INLINE void rc_normalize(rc_t *rc) | |||
98 | } | 103 | } |
99 | } | 104 | } |
100 | 105 | ||
101 | /* rc_is_bit_0 is called 9 times */ | 106 | /* rc_is_bit_1 is called 9 times */ |
102 | /* Why rc_is_bit_0_helper exists? | 107 | static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) |
103 | * Because we want to always expose (rc->code < rc->bound) to optimizer. | ||
104 | * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined | ||
105 | * only if we compile for speed. | ||
106 | */ | ||
107 | static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) | ||
108 | { | 108 | { |
109 | rc_normalize(rc); | 109 | rc_normalize(rc); |
110 | rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); | 110 | rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); |
111 | return rc->bound; | 111 | if (rc->code < rc->bound) { |
112 | } | 112 | rc->range = rc->bound; |
113 | static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) | 113 | *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; |
114 | { | 114 | return 0; |
115 | uint32_t t = rc_is_bit_0_helper(rc, p); | 115 | } |
116 | return rc->code < t; | ||
117 | } | ||
118 | |||
119 | /* Called ~10 times, but very small, thus inlined */ | ||
120 | static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) | ||
121 | { | ||
122 | rc->range = rc->bound; | ||
123 | *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; | ||
124 | } | ||
125 | static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) | ||
126 | { | ||
127 | rc->range -= rc->bound; | 116 | rc->range -= rc->bound; |
128 | rc->code -= rc->bound; | 117 | rc->code -= rc->bound; |
129 | *p -= *p >> RC_MOVE_BITS; | 118 | *p -= *p >> RC_MOVE_BITS; |
119 | return 1; | ||
130 | } | 120 | } |
131 | 121 | ||
132 | /* Called 4 times in unlzma loop */ | 122 | /* Called 4 times in unlzma loop */ |
133 | static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) | 123 | static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) |
134 | { | 124 | { |
135 | if (rc_is_bit_0(rc, p)) { | 125 | int ret = rc_is_bit_1(rc, p); |
136 | rc_update_bit_0(rc, p); | 126 | *symbol = *symbol * 2 + ret; |
137 | *symbol *= 2; | 127 | return ret; |
138 | return 0; | ||
139 | } else { | ||
140 | rc_update_bit_1(rc, p); | ||
141 | *symbol = *symbol * 2 + 1; | ||
142 | return 1; | ||
143 | } | ||
144 | } | 128 | } |
145 | 129 | ||
146 | /* Called once */ | 130 | /* Called once */ |
@@ -236,14 +220,11 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
236 | int lc, pb, lp; | 220 | int lc, pb, lp; |
237 | uint32_t pos_state_mask; | 221 | uint32_t pos_state_mask; |
238 | uint32_t literal_pos_mask; | 222 | uint32_t literal_pos_mask; |
239 | uint32_t pos; | ||
240 | uint16_t *p; | 223 | uint16_t *p; |
241 | uint16_t *prob; | ||
242 | uint16_t *prob_lit; | ||
243 | int num_bits; | 224 | int num_bits; |
244 | int num_probs; | 225 | int num_probs; |
245 | rc_t *rc; | 226 | rc_t *rc; |
246 | int i, mi; | 227 | int i; |
247 | uint8_t *buffer; | 228 | uint8_t *buffer; |
248 | uint8_t previous_byte = 0; | 229 | uint8_t previous_byte = 0; |
249 | size_t buffer_pos = 0, global_pos = 0; | 230 | size_t buffer_pos = 0, global_pos = 0; |
@@ -251,14 +232,17 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
251 | int state = 0; | 232 | int state = 0; |
252 | uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; | 233 | uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; |
253 | 234 | ||
254 | xread(src_fd, &header, sizeof(header)); | 235 | if (full_read(src_fd, &header, sizeof(header)) != sizeof(header) |
236 | || header.pos >= (9 * 5 * 5) | ||
237 | ) { | ||
238 | bb_error_msg("bad lzma header"); | ||
239 | return -1; | ||
240 | } | ||
255 | 241 | ||
256 | if (header.pos >= (9 * 5 * 5)) | 242 | i = header.pos / 9; |
257 | bb_error_msg_and_die("bad header"); | ||
258 | mi = header.pos / 9; | ||
259 | lc = header.pos % 9; | 243 | lc = header.pos % 9; |
260 | pb = mi / 5; | 244 | pb = i / 5; |
261 | lp = mi % 5; | 245 | lp = i % 5; |
262 | pos_state_mask = (1 << pb) - 1; | 246 | pos_state_mask = (1 << pb) - 1; |
263 | literal_pos_mask = (1 << lp) - 1; | 247 | literal_pos_mask = (1 << lp) - 1; |
264 | 248 | ||
@@ -266,13 +250,13 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
266 | header.dst_size = SWAP_LE64(header.dst_size); | 250 | header.dst_size = SWAP_LE64(header.dst_size); |
267 | 251 | ||
268 | if (header.dict_size == 0) | 252 | if (header.dict_size == 0) |
269 | header.dict_size = 1; | 253 | header.dict_size++; |
270 | 254 | ||
271 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); | 255 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); |
272 | 256 | ||
273 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); | 257 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); |
274 | p = xmalloc(num_probs * sizeof(*p)); | 258 | p = xmalloc(num_probs * sizeof(*p)); |
275 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); | 259 | num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; |
276 | for (i = 0; i < num_probs; i++) | 260 | for (i = 0; i < num_probs; i++) |
277 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; | 261 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; |
278 | 262 | ||
@@ -280,11 +264,13 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
280 | 264 | ||
281 | while (global_pos + buffer_pos < header.dst_size) { | 265 | while (global_pos + buffer_pos < header.dst_size) { |
282 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; | 266 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; |
267 | uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; | ||
268 | |||
269 | if (!rc_is_bit_1(rc, prob)) { | ||
270 | static const char next_state[LZMA_NUM_STATES] = | ||
271 | { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; | ||
272 | int mi = 1; | ||
283 | 273 | ||
284 | prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; | ||
285 | if (rc_is_bit_0(rc, prob)) { | ||
286 | mi = 1; | ||
287 | rc_update_bit_0(rc, prob); | ||
288 | prob = (p + LZMA_LITERAL | 274 | prob = (p + LZMA_LITERAL |
289 | + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) | 275 | + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) |
290 | + (previous_byte >> (8 - lc)) | 276 | + (previous_byte >> (8 - lc)) |
@@ -294,8 +280,8 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
294 | 280 | ||
295 | if (state >= LZMA_NUM_LIT_STATES) { | 281 | if (state >= LZMA_NUM_LIT_STATES) { |
296 | int match_byte; | 282 | int match_byte; |
283 | uint32_t pos = buffer_pos - rep0; | ||
297 | 284 | ||
298 | pos = buffer_pos - rep0; | ||
299 | while (pos >= header.dict_size) | 285 | while (pos >= header.dict_size) |
300 | pos += header.dict_size; | 286 | pos += header.dict_size; |
301 | match_byte = buffer[pos]; | 287 | match_byte = buffer[pos]; |
@@ -304,22 +290,16 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
304 | 290 | ||
305 | match_byte <<= 1; | 291 | match_byte <<= 1; |
306 | bit = match_byte & 0x100; | 292 | bit = match_byte & 0x100; |
307 | prob_lit = prob + 0x100 + bit + mi; | 293 | bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */ |
308 | bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */ | ||
309 | if (bit) | 294 | if (bit) |
310 | break; | 295 | break; |
311 | } while (mi < 0x100); | 296 | } while (mi < 0x100); |
312 | } | 297 | } |
313 | while (mi < 0x100) { | 298 | while (mi < 0x100) { |
314 | prob_lit = prob + mi; | 299 | rc_get_bit(rc, prob + mi, &mi); |
315 | rc_get_bit(rc, prob_lit, &mi); | ||
316 | } | 300 | } |
317 | 301 | ||
318 | state -= 3; | 302 | state = next_state[state]; |
319 | if (state < 4-3) | ||
320 | state = 0; | ||
321 | if (state >= 10-3) | ||
322 | state -= 6-3; | ||
323 | 303 | ||
324 | previous_byte = (uint8_t) mi; | 304 | previous_byte = (uint8_t) mi; |
325 | #if ENABLE_FEATURE_LZMA_FAST | 305 | #if ENABLE_FEATURE_LZMA_FAST |
@@ -338,59 +318,46 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
338 | #endif | 318 | #endif |
339 | } else { | 319 | } else { |
340 | int offset; | 320 | int offset; |
341 | uint16_t *prob_len; | 321 | uint16_t *prob2; |
322 | #define prob_len prob2 | ||
342 | 323 | ||
343 | rc_update_bit_1(rc, prob); | 324 | prob2 = p + LZMA_IS_REP + state; |
344 | prob = p + LZMA_IS_REP + state; | 325 | if (!rc_is_bit_1(rc, prob2)) { |
345 | if (rc_is_bit_0(rc, prob)) { | ||
346 | rc_update_bit_0(rc, prob); | ||
347 | rep3 = rep2; | 326 | rep3 = rep2; |
348 | rep2 = rep1; | 327 | rep2 = rep1; |
349 | rep1 = rep0; | 328 | rep1 = rep0; |
350 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; | 329 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; |
351 | prob = p + LZMA_LEN_CODER; | 330 | prob2 = p + LZMA_LEN_CODER; |
352 | } else { | 331 | } else { |
353 | rc_update_bit_1(rc, prob); | 332 | prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP; |
354 | prob = p + LZMA_IS_REP_G0 + state; | 333 | if (!rc_is_bit_1(rc, prob2)) { |
355 | if (rc_is_bit_0(rc, prob)) { | 334 | prob2 = (p + LZMA_IS_REP_0_LONG |
356 | rc_update_bit_0(rc, prob); | ||
357 | prob = (p + LZMA_IS_REP_0_LONG | ||
358 | + (state << LZMA_NUM_POS_BITS_MAX) | 335 | + (state << LZMA_NUM_POS_BITS_MAX) |
359 | + pos_state | 336 | + pos_state |
360 | ); | 337 | ); |
361 | if (rc_is_bit_0(rc, prob)) { | 338 | if (!rc_is_bit_1(rc, prob2)) { |
362 | rc_update_bit_0(rc, prob); | ||
363 | |||
364 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; | ||
365 | #if ENABLE_FEATURE_LZMA_FAST | 339 | #if ENABLE_FEATURE_LZMA_FAST |
366 | pos = buffer_pos - rep0; | 340 | uint32_t pos = buffer_pos - rep0; |
341 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; | ||
367 | while (pos >= header.dict_size) | 342 | while (pos >= header.dict_size) |
368 | pos += header.dict_size; | 343 | pos += header.dict_size; |
369 | previous_byte = buffer[pos]; | 344 | previous_byte = buffer[pos]; |
370 | goto one_byte1; | 345 | goto one_byte1; |
371 | #else | 346 | #else |
347 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; | ||
372 | len = 1; | 348 | len = 1; |
373 | goto string; | 349 | goto string; |
374 | #endif | 350 | #endif |
375 | } else { | ||
376 | rc_update_bit_1(rc, prob); | ||
377 | } | 351 | } |
378 | } else { | 352 | } else { |
379 | uint32_t distance; | 353 | uint32_t distance; |
380 | 354 | ||
381 | rc_update_bit_1(rc, prob); | 355 | prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; |
382 | prob = p + LZMA_IS_REP_G1 + state; | 356 | distance = rep1; |
383 | if (rc_is_bit_0(rc, prob)) { | 357 | if (rc_is_bit_1(rc, prob2)) { |
384 | rc_update_bit_0(rc, prob); | 358 | prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; |
385 | distance = rep1; | 359 | distance = rep2; |
386 | } else { | 360 | if (rc_is_bit_1(rc, prob2)) { |
387 | rc_update_bit_1(rc, prob); | ||
388 | prob = p + LZMA_IS_REP_G2 + state; | ||
389 | if (rc_is_bit_0(rc, prob)) { | ||
390 | rc_update_bit_0(rc, prob); | ||
391 | distance = rep2; | ||
392 | } else { | ||
393 | rc_update_bit_1(rc, prob); | ||
394 | distance = rep3; | 361 | distance = rep3; |
395 | rep3 = rep2; | 362 | rep3 = rep2; |
396 | } | 363 | } |
@@ -400,31 +367,27 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
400 | rep0 = distance; | 367 | rep0 = distance; |
401 | } | 368 | } |
402 | state = state < LZMA_NUM_LIT_STATES ? 8 : 11; | 369 | state = state < LZMA_NUM_LIT_STATES ? 8 : 11; |
403 | prob = p + LZMA_REP_LEN_CODER; | 370 | prob2 = p + LZMA_REP_LEN_CODER; |
404 | } | 371 | } |
405 | 372 | ||
406 | prob_len = prob + LZMA_LEN_CHOICE; | 373 | prob_len = prob2 + LZMA_LEN_CHOICE; |
407 | if (rc_is_bit_0(rc, prob_len)) { | 374 | num_bits = LZMA_LEN_NUM_LOW_BITS; |
408 | rc_update_bit_0(rc, prob_len); | 375 | if (!rc_is_bit_1(rc, prob_len)) { |
409 | prob_len = (prob + LZMA_LEN_LOW | 376 | prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE |
410 | + (pos_state << LZMA_LEN_NUM_LOW_BITS)); | 377 | + (pos_state << LZMA_LEN_NUM_LOW_BITS); |
411 | offset = 0; | 378 | offset = 0; |
412 | num_bits = LZMA_LEN_NUM_LOW_BITS; | ||
413 | } else { | 379 | } else { |
414 | rc_update_bit_1(rc, prob_len); | 380 | prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; |
415 | prob_len = prob + LZMA_LEN_CHOICE_2; | 381 | if (!rc_is_bit_1(rc, prob_len)) { |
416 | if (rc_is_bit_0(rc, prob_len)) { | 382 | prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 |
417 | rc_update_bit_0(rc, prob_len); | 383 | + (pos_state << LZMA_LEN_NUM_MID_BITS); |
418 | prob_len = (prob + LZMA_LEN_MID | ||
419 | + (pos_state << LZMA_LEN_NUM_MID_BITS)); | ||
420 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; | 384 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; |
421 | num_bits = LZMA_LEN_NUM_MID_BITS; | 385 | num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS; |
422 | } else { | 386 | } else { |
423 | rc_update_bit_1(rc, prob_len); | 387 | prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; |
424 | prob_len = prob + LZMA_LEN_HIGH; | ||
425 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) | 388 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) |
426 | + (1 << LZMA_LEN_NUM_MID_BITS)); | 389 | + (1 << LZMA_LEN_NUM_MID_BITS)); |
427 | num_bits = LZMA_LEN_NUM_HIGH_BITS; | 390 | num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS; |
428 | } | 391 | } |
429 | } | 392 | } |
430 | rc_bit_tree_decode(rc, prob_len, num_bits, &len); | 393 | rc_bit_tree_decode(rc, prob_len, num_bits, &len); |
@@ -432,37 +395,36 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
432 | 395 | ||
433 | if (state < 4) { | 396 | if (state < 4) { |
434 | int pos_slot; | 397 | int pos_slot; |
398 | uint16_t *prob3; | ||
435 | 399 | ||
436 | state += LZMA_NUM_LIT_STATES; | 400 | state += LZMA_NUM_LIT_STATES; |
437 | prob = p + LZMA_POS_SLOT + | 401 | prob3 = p + LZMA_POS_SLOT + |
438 | ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : | 402 | ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : |
439 | LZMA_NUM_LEN_TO_POS_STATES - 1) | 403 | LZMA_NUM_LEN_TO_POS_STATES - 1) |
440 | << LZMA_NUM_POS_SLOT_BITS); | 404 | << LZMA_NUM_POS_SLOT_BITS); |
441 | rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, | 405 | rc_bit_tree_decode(rc, prob3, |
442 | &pos_slot); | 406 | LZMA_NUM_POS_SLOT_BITS, &pos_slot); |
407 | rep0 = pos_slot; | ||
443 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { | 408 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { |
444 | num_bits = (pos_slot >> 1) - 1; | 409 | int i2, mi2, num_bits2 = (pos_slot >> 1) - 1; |
445 | rep0 = 2 | (pos_slot & 1); | 410 | rep0 = 2 | (pos_slot & 1); |
446 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { | 411 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { |
447 | rep0 <<= num_bits; | 412 | rep0 <<= num_bits2; |
448 | prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; | 413 | prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; |
449 | } else { | 414 | } else { |
450 | num_bits -= LZMA_NUM_ALIGN_BITS; | 415 | for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--) |
451 | while (num_bits--) | ||
452 | rep0 = (rep0 << 1) | rc_direct_bit(rc); | 416 | rep0 = (rep0 << 1) | rc_direct_bit(rc); |
453 | prob = p + LZMA_ALIGN; | ||
454 | rep0 <<= LZMA_NUM_ALIGN_BITS; | 417 | rep0 <<= LZMA_NUM_ALIGN_BITS; |
455 | num_bits = LZMA_NUM_ALIGN_BITS; | 418 | prob3 = p + LZMA_ALIGN; |
456 | } | 419 | } |
457 | i = 1; | 420 | i2 = 1; |
458 | mi = 1; | 421 | mi2 = 1; |
459 | while (num_bits--) { | 422 | while (num_bits2--) { |
460 | if (rc_get_bit(rc, prob + mi, &mi)) | 423 | if (rc_get_bit(rc, prob3 + mi2, &mi2)) |
461 | rep0 |= i; | 424 | rep0 |= i2; |
462 | i <<= 1; | 425 | i2 <<= 1; |
463 | } | 426 | } |
464 | } else | 427 | } |
465 | rep0 = pos_slot; | ||
466 | if (++rep0 == 0) | 428 | if (++rep0 == 0) |
467 | break; | 429 | break; |
468 | } | 430 | } |
@@ -470,7 +432,7 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
470 | len += LZMA_MATCH_MIN_LEN; | 432 | len += LZMA_MATCH_MIN_LEN; |
471 | IF_NOT_FEATURE_LZMA_FAST(string:) | 433 | IF_NOT_FEATURE_LZMA_FAST(string:) |
472 | do { | 434 | do { |
473 | pos = buffer_pos - rep0; | 435 | uint32_t pos = buffer_pos - rep0; |
474 | while (pos >= header.dict_size) | 436 | while (pos >= header.dict_size) |
475 | pos += header.dict_size; | 437 | pos += header.dict_size; |
476 | previous_byte = buffer[pos]; | 438 | previous_byte = buffer[pos]; |