diff options
-rw-r--r-- | archival/Config.in | 4 | ||||
-rw-r--r-- | archival/libunarchive/decompress_unlzma.c | 154 |
2 files changed, 60 insertions, 98 deletions
diff --git a/archival/Config.in b/archival/Config.in index 64b44c218..e0d43c0ee 100644 --- a/archival/Config.in +++ b/archival/Config.in | |||
@@ -283,8 +283,8 @@ config FEATURE_LZMA_FAST | |||
283 | default n | 283 | default n |
284 | depends on UNLZMA | 284 | depends on UNLZMA |
285 | help | 285 | help |
286 | This option reduces decompression time by about 33% at the cost of | 286 | This option reduces decompression time by about 25% at the cost of |
287 | a 2K bigger binary. | 287 | a 1K bigger binary. |
288 | 288 | ||
289 | config UNZIP | 289 | config UNZIP |
290 | bool "unzip" | 290 | bool "unzip" |
diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c index 33e5cd65d..68085d68c 100644 --- a/archival/libunarchive/decompress_unlzma.c +++ b/archival/libunarchive/decompress_unlzma.c | |||
@@ -8,14 +8,15 @@ | |||
8 | * | 8 | * |
9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. | 9 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
10 | */ | 10 | */ |
11 | |||
12 | #include "libbb.h" | 11 | #include "libbb.h" |
13 | #include "unarchive.h" | 12 | #include "unarchive.h" |
14 | 13 | ||
15 | #if ENABLE_FEATURE_LZMA_FAST | 14 | #if ENABLE_FEATURE_LZMA_FAST |
16 | # define speed_inline ALWAYS_INLINE | 15 | # define speed_inline ALWAYS_INLINE |
16 | # define size_inline | ||
17 | #else | 17 | #else |
18 | # define speed_inline | 18 | # define speed_inline |
19 | # define size_inline ALWAYS_INLINE | ||
19 | #endif | 20 | #endif |
20 | 21 | ||
21 | 22 | ||
@@ -44,8 +45,8 @@ typedef struct { | |||
44 | #define RC_MODEL_TOTAL_BITS 11 | 45 | #define RC_MODEL_TOTAL_BITS 11 |
45 | 46 | ||
46 | 47 | ||
47 | /* Called twice: once at startup and once in rc_normalize() */ | 48 | /* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ |
48 | static void rc_read(rc_t *rc) | 49 | static size_inline void rc_read(rc_t *rc) |
49 | { | 50 | { |
50 | int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); | 51 | int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); |
51 | if (buffer_size <= 0) | 52 | if (buffer_size <= 0) |
@@ -54,8 +55,17 @@ static void rc_read(rc_t *rc) | |||
54 | rc->buffer_end = RC_BUFFER + buffer_size; | 55 | rc->buffer_end = RC_BUFFER + buffer_size; |
55 | } | 56 | } |
56 | 57 | ||
58 | /* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ | ||
59 | static void rc_do_normalize(rc_t *rc) | ||
60 | { | ||
61 | if (rc->ptr >= rc->buffer_end) | ||
62 | rc_read(rc); | ||
63 | rc->range <<= 8; | ||
64 | rc->code = (rc->code << 8) | *rc->ptr++; | ||
65 | } | ||
66 | |||
57 | /* Called once */ | 67 | /* Called once */ |
58 | static rc_t* rc_init(int fd) /*, int buffer_size) */ | 68 | static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ |
59 | { | 69 | { |
60 | int i; | 70 | int i; |
61 | rc_t *rc; | 71 | rc_t *rc; |
@@ -63,17 +73,18 @@ static rc_t* rc_init(int fd) /*, int buffer_size) */ | |||
63 | rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); | 73 | rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); |
64 | 74 | ||
65 | rc->fd = fd; | 75 | rc->fd = fd; |
66 | /* rc->buffer_size = buffer_size; */ | ||
67 | rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; | ||
68 | rc->ptr = rc->buffer_end; | 76 | rc->ptr = rc->buffer_end; |
69 | 77 | ||
70 | rc->code = 0; | ||
71 | rc->range = 0xFFFFFFFF; | ||
72 | for (i = 0; i < 5; i++) { | 78 | for (i = 0; i < 5; i++) { |
79 | #if ENABLE_FEATURE_LZMA_FAST | ||
73 | if (rc->ptr >= rc->buffer_end) | 80 | if (rc->ptr >= rc->buffer_end) |
74 | rc_read(rc); | 81 | rc_read(rc); |
75 | rc->code = (rc->code << 8) | *rc->ptr++; | 82 | rc->code = (rc->code << 8) | *rc->ptr++; |
83 | #else | ||
84 | rc_do_normalize(rc); | ||
85 | #endif | ||
76 | } | 86 | } |
87 | rc->range = 0xFFFFFFFF; | ||
77 | return rc; | 88 | return rc; |
78 | } | 89 | } |
79 | 90 | ||
@@ -83,14 +94,6 @@ static ALWAYS_INLINE void rc_free(rc_t *rc) | |||
83 | free(rc); | 94 | free(rc); |
84 | } | 95 | } |
85 | 96 | ||
86 | /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ | ||
87 | static void rc_do_normalize(rc_t *rc) | ||
88 | { | ||
89 | if (rc->ptr >= rc->buffer_end) | ||
90 | rc_read(rc); | ||
91 | rc->range <<= 8; | ||
92 | rc->code = (rc->code << 8) | *rc->ptr++; | ||
93 | } | ||
94 | static ALWAYS_INLINE void rc_normalize(rc_t *rc) | 97 | static ALWAYS_INLINE void rc_normalize(rc_t *rc) |
95 | { | 98 | { |
96 | if (rc->range < (1 << RC_TOP_BITS)) { | 99 | if (rc->range < (1 << RC_TOP_BITS)) { |
@@ -98,49 +101,28 @@ static ALWAYS_INLINE void rc_normalize(rc_t *rc) | |||
98 | } | 101 | } |
99 | } | 102 | } |
100 | 103 | ||
101 | /* rc_is_bit_0 is called 9 times */ | 104 | /* rc_is_bit_1 is called 9 times */ |
102 | /* Why rc_is_bit_0_helper exists? | 105 | static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) |
103 | * Because we want to always expose (rc->code < rc->bound) to optimizer. | ||
104 | * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined | ||
105 | * only if we compile for speed. | ||
106 | */ | ||
107 | static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) | ||
108 | { | 106 | { |
109 | rc_normalize(rc); | 107 | rc_normalize(rc); |
110 | rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); | 108 | rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); |
111 | return rc->bound; | 109 | if (rc->code < rc->bound) { |
112 | } | 110 | rc->range = rc->bound; |
113 | static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) | 111 | *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; |
114 | { | 112 | return 0; |
115 | uint32_t t = rc_is_bit_0_helper(rc, p); | 113 | } |
116 | return rc->code < t; | ||
117 | } | ||
118 | |||
119 | /* Called ~10 times, but very small, thus inlined */ | ||
120 | static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) | ||
121 | { | ||
122 | rc->range = rc->bound; | ||
123 | *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; | ||
124 | } | ||
125 | static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) | ||
126 | { | ||
127 | rc->range -= rc->bound; | 114 | rc->range -= rc->bound; |
128 | rc->code -= rc->bound; | 115 | rc->code -= rc->bound; |
129 | *p -= *p >> RC_MOVE_BITS; | 116 | *p -= *p >> RC_MOVE_BITS; |
117 | return 1; | ||
130 | } | 118 | } |
131 | 119 | ||
132 | /* Called 4 times in unlzma loop */ | 120 | /* Called 4 times in unlzma loop */ |
133 | static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) | 121 | static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) |
134 | { | 122 | { |
135 | if (rc_is_bit_0(rc, p)) { | 123 | int ret = rc_is_bit_1(rc, p); |
136 | rc_update_bit_0(rc, p); | 124 | *symbol = *symbol * 2 + ret; |
137 | *symbol *= 2; | 125 | return ret; |
138 | return 0; | ||
139 | } else { | ||
140 | rc_update_bit_1(rc, p); | ||
141 | *symbol = *symbol * 2 + 1; | ||
142 | return 1; | ||
143 | } | ||
144 | } | 126 | } |
145 | 127 | ||
146 | /* Called once */ | 128 | /* Called once */ |
@@ -266,13 +248,13 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
266 | header.dst_size = SWAP_LE64(header.dst_size); | 248 | header.dst_size = SWAP_LE64(header.dst_size); |
267 | 249 | ||
268 | if (header.dict_size == 0) | 250 | if (header.dict_size == 0) |
269 | header.dict_size = 1; | 251 | header.dict_size++; |
270 | 252 | ||
271 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); | 253 | buffer = xmalloc(MIN(header.dst_size, header.dict_size)); |
272 | 254 | ||
273 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); | 255 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); |
274 | p = xmalloc(num_probs * sizeof(*p)); | 256 | p = xmalloc(num_probs * sizeof(*p)); |
275 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); | 257 | num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; |
276 | for (i = 0; i < num_probs; i++) | 258 | for (i = 0; i < num_probs; i++) |
277 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; | 259 | p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; |
278 | 260 | ||
@@ -282,9 +264,8 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
282 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; | 264 | int pos_state = (buffer_pos + global_pos) & pos_state_mask; |
283 | 265 | ||
284 | prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; | 266 | prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; |
285 | if (rc_is_bit_0(rc, prob)) { | 267 | if (!rc_is_bit_1(rc, prob)) { |
286 | mi = 1; | 268 | mi = 1; |
287 | rc_update_bit_0(rc, prob); | ||
288 | prob = (p + LZMA_LITERAL | 269 | prob = (p + LZMA_LITERAL |
289 | + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) | 270 | + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) |
290 | + (previous_byte >> (8 - lc)) | 271 | + (previous_byte >> (8 - lc)) |
@@ -340,27 +321,21 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
340 | int offset; | 321 | int offset; |
341 | uint16_t *prob_len; | 322 | uint16_t *prob_len; |
342 | 323 | ||
343 | rc_update_bit_1(rc, prob); | ||
344 | prob = p + LZMA_IS_REP + state; | 324 | prob = p + LZMA_IS_REP + state; |
345 | if (rc_is_bit_0(rc, prob)) { | 325 | if (!rc_is_bit_1(rc, prob)) { |
346 | rc_update_bit_0(rc, prob); | ||
347 | rep3 = rep2; | 326 | rep3 = rep2; |
348 | rep2 = rep1; | 327 | rep2 = rep1; |
349 | rep1 = rep0; | 328 | rep1 = rep0; |
350 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; | 329 | state = state < LZMA_NUM_LIT_STATES ? 0 : 3; |
351 | prob = p + LZMA_LEN_CODER; | 330 | prob = p + LZMA_LEN_CODER; |
352 | } else { | 331 | } else { |
353 | rc_update_bit_1(rc, prob); | 332 | prob += LZMA_IS_REP_G0 - LZMA_IS_REP; |
354 | prob = p + LZMA_IS_REP_G0 + state; | 333 | if (!rc_is_bit_1(rc, prob)) { |
355 | if (rc_is_bit_0(rc, prob)) { | ||
356 | rc_update_bit_0(rc, prob); | ||
357 | prob = (p + LZMA_IS_REP_0_LONG | 334 | prob = (p + LZMA_IS_REP_0_LONG |
358 | + (state << LZMA_NUM_POS_BITS_MAX) | 335 | + (state << LZMA_NUM_POS_BITS_MAX) |
359 | + pos_state | 336 | + pos_state |
360 | ); | 337 | ); |
361 | if (rc_is_bit_0(rc, prob)) { | 338 | if (!rc_is_bit_1(rc, prob)) { |
362 | rc_update_bit_0(rc, prob); | ||
363 | |||
364 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; | 339 | state = state < LZMA_NUM_LIT_STATES ? 9 : 11; |
365 | #if ENABLE_FEATURE_LZMA_FAST | 340 | #if ENABLE_FEATURE_LZMA_FAST |
366 | pos = buffer_pos - rep0; | 341 | pos = buffer_pos - rep0; |
@@ -372,25 +347,16 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
372 | len = 1; | 347 | len = 1; |
373 | goto string; | 348 | goto string; |
374 | #endif | 349 | #endif |
375 | } else { | ||
376 | rc_update_bit_1(rc, prob); | ||
377 | } | 350 | } |
378 | } else { | 351 | } else { |
379 | uint32_t distance; | 352 | uint32_t distance; |
380 | 353 | ||
381 | rc_update_bit_1(rc, prob); | 354 | prob += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; |
382 | prob = p + LZMA_IS_REP_G1 + state; | 355 | distance = rep1; |
383 | if (rc_is_bit_0(rc, prob)) { | 356 | if (rc_is_bit_1(rc, prob)) { |
384 | rc_update_bit_0(rc, prob); | 357 | prob += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; |
385 | distance = rep1; | 358 | distance = rep2; |
386 | } else { | 359 | if (rc_is_bit_1(rc, prob)) { |
387 | rc_update_bit_1(rc, prob); | ||
388 | prob = p + LZMA_IS_REP_G2 + state; | ||
389 | if (rc_is_bit_0(rc, prob)) { | ||
390 | rc_update_bit_0(rc, prob); | ||
391 | distance = rep2; | ||
392 | } else { | ||
393 | rc_update_bit_1(rc, prob); | ||
394 | distance = rep3; | 360 | distance = rep3; |
395 | rep3 = rep2; | 361 | rep3 = rep2; |
396 | } | 362 | } |
@@ -404,24 +370,20 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
404 | } | 370 | } |
405 | 371 | ||
406 | prob_len = prob + LZMA_LEN_CHOICE; | 372 | prob_len = prob + LZMA_LEN_CHOICE; |
407 | if (rc_is_bit_0(rc, prob_len)) { | 373 | if (!rc_is_bit_1(rc, prob_len)) { |
408 | rc_update_bit_0(rc, prob_len); | 374 | prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE |
409 | prob_len = (prob + LZMA_LEN_LOW | 375 | + (pos_state << LZMA_LEN_NUM_LOW_BITS); |
410 | + (pos_state << LZMA_LEN_NUM_LOW_BITS)); | ||
411 | offset = 0; | 376 | offset = 0; |
412 | num_bits = LZMA_LEN_NUM_LOW_BITS; | 377 | num_bits = LZMA_LEN_NUM_LOW_BITS; |
413 | } else { | 378 | } else { |
414 | rc_update_bit_1(rc, prob_len); | 379 | prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; |
415 | prob_len = prob + LZMA_LEN_CHOICE_2; | 380 | if (!rc_is_bit_1(rc, prob_len)) { |
416 | if (rc_is_bit_0(rc, prob_len)) { | 381 | prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 |
417 | rc_update_bit_0(rc, prob_len); | 382 | + (pos_state << LZMA_LEN_NUM_MID_BITS); |
418 | prob_len = (prob + LZMA_LEN_MID | ||
419 | + (pos_state << LZMA_LEN_NUM_MID_BITS)); | ||
420 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; | 383 | offset = 1 << LZMA_LEN_NUM_LOW_BITS; |
421 | num_bits = LZMA_LEN_NUM_MID_BITS; | 384 | num_bits = LZMA_LEN_NUM_MID_BITS; |
422 | } else { | 385 | } else { |
423 | rc_update_bit_1(rc, prob_len); | 386 | prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; |
424 | prob_len = prob + LZMA_LEN_HIGH; | ||
425 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) | 387 | offset = ((1 << LZMA_LEN_NUM_LOW_BITS) |
426 | + (1 << LZMA_LEN_NUM_MID_BITS)); | 388 | + (1 << LZMA_LEN_NUM_MID_BITS)); |
427 | num_bits = LZMA_LEN_NUM_HIGH_BITS; | 389 | num_bits = LZMA_LEN_NUM_HIGH_BITS; |
@@ -438,19 +400,20 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
438 | ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : | 400 | ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : |
439 | LZMA_NUM_LEN_TO_POS_STATES - 1) | 401 | LZMA_NUM_LEN_TO_POS_STATES - 1) |
440 | << LZMA_NUM_POS_SLOT_BITS); | 402 | << LZMA_NUM_POS_SLOT_BITS); |
441 | rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, | 403 | rc_bit_tree_decode(rc, prob, |
442 | &pos_slot); | 404 | LZMA_NUM_POS_SLOT_BITS, &pos_slot); |
405 | rep0 = pos_slot; | ||
443 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { | 406 | if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { |
444 | num_bits = (pos_slot >> 1) - 1; | 407 | num_bits = (pos_slot >> 1) - 1; |
445 | rep0 = 2 | (pos_slot & 1); | 408 | rep0 = 2 | (pos_slot & 1); |
409 | prob = p + LZMA_ALIGN; | ||
446 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { | 410 | if (pos_slot < LZMA_END_POS_MODEL_INDEX) { |
447 | rep0 <<= num_bits; | 411 | rep0 <<= num_bits; |
448 | prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; | 412 | prob += LZMA_SPEC_POS - LZMA_ALIGN - 1 + rep0 - pos_slot; |
449 | } else { | 413 | } else { |
450 | num_bits -= LZMA_NUM_ALIGN_BITS; | 414 | num_bits -= LZMA_NUM_ALIGN_BITS; |
451 | while (num_bits--) | 415 | while (num_bits--) |
452 | rep0 = (rep0 << 1) | rc_direct_bit(rc); | 416 | rep0 = (rep0 << 1) | rc_direct_bit(rc); |
453 | prob = p + LZMA_ALIGN; | ||
454 | rep0 <<= LZMA_NUM_ALIGN_BITS; | 417 | rep0 <<= LZMA_NUM_ALIGN_BITS; |
455 | num_bits = LZMA_NUM_ALIGN_BITS; | 418 | num_bits = LZMA_NUM_ALIGN_BITS; |
456 | } | 419 | } |
@@ -461,8 +424,7 @@ unpack_lzma_stream(int src_fd, int dst_fd) | |||
461 | rep0 |= i; | 424 | rep0 |= i; |
462 | i <<= 1; | 425 | i <<= 1; |
463 | } | 426 | } |
464 | } else | 427 | } |
465 | rep0 = pos_slot; | ||
466 | if (++rep0 == 0) | 428 | if (++rep0 == 0) |
467 | break; | 429 | break; |
468 | } | 430 | } |