aboutsummaryrefslogtreecommitdiff
path: root/C/Bcj2.c
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-06-21 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-12-17 14:59:19 +0500
commit5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch)
treefe5e17420300b715021a76328444088d32047963 /C/Bcj2.c
parent93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff)
download7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip
23.0123.01
Diffstat (limited to 'C/Bcj2.c')
-rw-r--r--C/Bcj2.c319
1 files changed, 176 insertions, 143 deletions
diff --git a/C/Bcj2.c b/C/Bcj2.c
index c7b9567..7cb57ad 100644
--- a/C/Bcj2.c
+++ b/C/Bcj2.c
@@ -1,29 +1,24 @@
1/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) 1/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
22021-02-09 : Igor Pavlov : Public domain */ 22023-03-01 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6#include "Bcj2.h" 6#include "Bcj2.h"
7#include "CpuArch.h" 7#include "CpuArch.h"
8 8
9#define CProb UInt16
10
11#define kTopValue ((UInt32)1 << 24) 9#define kTopValue ((UInt32)1 << 24)
12#define kNumModelBits 11 10#define kNumBitModelTotalBits 11
13#define kBitModelTotal (1 << kNumModelBits) 11#define kBitModelTotal (1 << kNumBitModelTotalBits)
14#define kNumMoveBits 5 12#define kNumMoveBits 5
15 13
16#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound) 14// UInt32 bcj2_stats[256 + 2][2];
17#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
18#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
19 15
20void Bcj2Dec_Init(CBcj2Dec *p) 16void Bcj2Dec_Init(CBcj2Dec *p)
21{ 17{
22 unsigned i; 18 unsigned i;
23 19 p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
24 p->state = BCJ2_DEC_STATE_OK;
25 p->ip = 0; 20 p->ip = 0;
26 p->temp[3] = 0; 21 p->temp = 0;
27 p->range = 0; 22 p->range = 0;
28 p->code = 0; 23 p->code = 0;
29 for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) 24 for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
@@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p)
32 27
33SRes Bcj2Dec_Decode(CBcj2Dec *p) 28SRes Bcj2Dec_Decode(CBcj2Dec *p)
34{ 29{
30 UInt32 v = p->temp;
31 // const Byte *src;
35 if (p->range <= 5) 32 if (p->range <= 5)
36 { 33 {
37 p->state = BCJ2_DEC_STATE_OK; 34 UInt32 code = p->code;
35 p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
38 for (; p->range != 5; p->range++) 36 for (; p->range != 5; p->range++)
39 { 37 {
40 if (p->range == 1 && p->code != 0) 38 if (p->range == 1 && code != 0)
41 return SZ_ERROR_DATA; 39 return SZ_ERROR_DATA;
42
43 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 40 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
44 { 41 {
45 p->state = BCJ2_STREAM_RC; 42 p->state = BCJ2_STREAM_RC;
46 return SZ_OK; 43 return SZ_OK;
47 } 44 }
48 45 code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
49 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 46 p->code = code;
50 } 47 }
51 48 if (code == 0xffffffff)
52 if (p->code == 0xFFFFFFFF)
53 return SZ_ERROR_DATA; 49 return SZ_ERROR_DATA;
54 50 p->range = 0xffffffff;
55 p->range = 0xFFFFFFFF;
56 } 51 }
57 else if (p->state >= BCJ2_DEC_STATE_ORIG_0) 52 // else
58 { 53 {
59 while (p->state <= BCJ2_DEC_STATE_ORIG_3) 54 unsigned state = p->state;
55 // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
56 if (BCJ2_IS_32BIT_STREAM(state))
60 { 57 {
61 Byte *dest = p->dest; 58 const Byte *cur = p->bufs[state];
62 if (dest == p->destLim) 59 if (cur == p->lims[state])
63 return SZ_OK; 60 return SZ_OK;
64 *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0]; 61 p->bufs[state] = cur + 4;
65 p->state++; 62 {
66 p->dest = dest + 1; 63 const UInt32 ip = p->ip + 4;
64 v = GetBe32a(cur) - ip;
65 p->ip = ip;
66 }
67 state = BCJ2_DEC_STATE_ORIG_0;
67 } 68 }
68 } 69 if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
69
70 /*
71 if (BCJ2_IS_32BIT_STREAM(p->state))
72 {
73 const Byte *cur = p->bufs[p->state];
74 if (cur == p->lims[p->state])
75 return SZ_OK;
76 p->bufs[p->state] = cur + 4;
77
78 { 70 {
79 UInt32 val; 71 Byte *dest = p->dest;
80 Byte *dest; 72 for (;;)
81 SizeT rem;
82
83 p->ip += 4;
84 val = GetBe32(cur) - p->ip;
85 dest = p->dest;
86 rem = p->destLim - dest;
87 if (rem < 4)
88 { 73 {
89 SizeT i; 74 if (dest == p->destLim)
90 SetUi32(p->temp, val); 75 {
91 for (i = 0; i < rem; i++) 76 p->state = state;
92 dest[i] = p->temp[i]; 77 p->temp = v;
93 p->dest = dest + rem; 78 return SZ_OK;
94 p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; 79 }
95 return SZ_OK; 80 *dest++ = (Byte)v;
81 p->dest = dest;
82 if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
83 break;
84 v >>= 8;
96 } 85 }
97 SetUi32(dest, val);
98 p->temp[3] = (Byte)(val >> 24);
99 p->dest = dest + 4;
100 p->state = BCJ2_DEC_STATE_OK;
101 } 86 }
102 } 87 }
103 */
104 88
89 // src = p->bufs[BCJ2_STREAM_MAIN];
105 for (;;) 90 for (;;)
106 { 91 {
92 /*
107 if (BCJ2_IS_32BIT_STREAM(p->state)) 93 if (BCJ2_IS_32BIT_STREAM(p->state))
108 p->state = BCJ2_DEC_STATE_OK; 94 p->state = BCJ2_DEC_STATE_OK;
109 else 95 else
96 */
110 { 97 {
111 if (p->range < kTopValue) 98 if (p->range < kTopValue)
112 { 99 {
113 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 100 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
114 { 101 {
115 p->state = BCJ2_STREAM_RC; 102 p->state = BCJ2_STREAM_RC;
103 p->temp = v;
116 return SZ_OK; 104 return SZ_OK;
117 } 105 }
118 p->range <<= 8; 106 p->range <<= 8;
119 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 107 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
120 } 108 }
121
122 { 109 {
123 const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; 110 const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
124 const Byte *srcLim; 111 const Byte *srcLim;
125 Byte *dest; 112 Byte *dest = p->dest;
126 SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
127
128 if (num == 0)
129 { 113 {
130 p->state = BCJ2_STREAM_MAIN; 114 const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
131 return SZ_OK; 115 SizeT num = (SizeT)(p->destLim - dest);
116 if (num >= rem)
117 num = rem;
118 #define NUM_ITERS 4
119 #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
120 num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
121 #else
122 num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
123 #endif
124 srcLim = src + num;
132 } 125 }
133 126
134 dest = p->dest; 127 #define NUM_SHIFT_BITS 24
135 if (num > (SizeT)(p->destLim - dest)) 128 #define ONE_ITER(indx) { \
129 const unsigned b = src[indx]; \
130 *dest++ = (Byte)b; \
131 v = (v << NUM_SHIFT_BITS) | b; \
132 if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
133 if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
134 ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
135 /* ++dest */; /* v = b; */ }
136
137 if (src != srcLim)
138 for (;;)
136 { 139 {
137 num = (SizeT)(p->destLim - dest); 140 /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
138 if (num == 0) 141 But we can remove dependency chain with v = b in the end of loop. */
139 { 142 ONE_ITER(0)
140 p->state = BCJ2_DEC_STATE_ORIG; 143 #if (NUM_ITERS > 1)
141 return SZ_OK; 144 ONE_ITER(1)
142 } 145 #if (NUM_ITERS > 2)
146 ONE_ITER(2)
147 #if (NUM_ITERS > 3)
148 ONE_ITER(3)
149 #if (NUM_ITERS > 4)
150 ONE_ITER(4)
151 #if (NUM_ITERS > 5)
152 ONE_ITER(5)
153 #if (NUM_ITERS > 6)
154 ONE_ITER(6)
155 #if (NUM_ITERS > 7)
156 ONE_ITER(7)
157 #endif
158 #endif
159 #endif
160 #endif
161 #endif
162 #endif
163 #endif
164
165 src += NUM_ITERS;
166 if (src == srcLim)
167 break;
143 } 168 }
144
145 srcLim = src + num;
146 169
147 if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80) 170 if (src == srcLim)
148 *dest = src[0]; 171 #if (NUM_ITERS > 1)
149 else for (;;) 172 for (;;)
173 #endif
150 { 174 {
151 Byte b = *src; 175 #if (NUM_ITERS > 1)
152 *dest = b; 176 if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
153 if (b != 0x0F) 177 #endif
154 { 178 {
155 if ((b & 0xFE) == 0xE8) 179 const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
156 break; 180 p->bufs[BCJ2_STREAM_MAIN] = src;
157 dest++; 181 p->dest = dest;
158 if (++src != srcLim) 182 p->ip += (UInt32)num;
159 continue; 183 /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
160 break; 184 p->state =
185 src == p->lims[BCJ2_STREAM_MAIN] ?
186 (unsigned)BCJ2_STREAM_MAIN :
187 (unsigned)BCJ2_DEC_STATE_ORIG;
188 p->temp = v;
189 return SZ_OK;
161 } 190 }
162 dest++; 191 #if (NUM_ITERS > 1)
163 if (++src == srcLim) 192 ONE_ITER(0)
164 break; 193 src++;
165 if ((*src & 0xF0) != 0x80) 194 #endif
166 continue;
167 *dest = *src;
168 break;
169 } 195 }
170 196
171 num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
172
173 if (src == srcLim)
174 { 197 {
175 p->temp[3] = src[-1]; 198 const SizeT num = (SizeT)(dest - p->dest);
176 p->bufs[BCJ2_STREAM_MAIN] = src; 199 p->dest = dest; // p->dest += num;
200 p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
177 p->ip += (UInt32)num; 201 p->ip += (UInt32)num;
178 p->dest += num;
179 p->state =
180 p->bufs[BCJ2_STREAM_MAIN] ==
181 p->lims[BCJ2_STREAM_MAIN] ?
182 (unsigned)BCJ2_STREAM_MAIN :
183 (unsigned)BCJ2_DEC_STATE_ORIG;
184 return SZ_OK;
185 } 202 }
186
187 { 203 {
188 UInt32 bound, ttt; 204 UInt32 bound, ttt;
189 CProb *prob; 205 CBcj2Prob *prob; // unsigned index;
190 Byte b = src[0]; 206 /*
191 Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]); 207 prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
192 208 2 + (Byte)(v >> 8) :
193 p->temp[3] = b; 209 ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
194 p->bufs[BCJ2_STREAM_MAIN] = src + 1; 210 */
195 num++;
196 p->ip += (UInt32)num;
197 p->dest += num;
198
199 prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
200
201 _IF_BIT_0
202 { 211 {
203 _UPDATE_0 212 const unsigned c = ((v + 0x17) >> 6) & 1;
213 prob = p->probs + (unsigned)
214 (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
215 // (Byte)
216 // 8x->0 : e9->1 : xxe8->xx+2
217 // 8x->0x100 : e9->0x101 : xxe8->xx
218 // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
219 // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
220 }
221 ttt = *prob;
222 bound = (p->range >> kNumBitModelTotalBits) * ttt;
223 if (p->code < bound)
224 {
225 // bcj2_stats[prob - p->probs][0]++;
226 p->range = bound;
227 *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
204 continue; 228 continue;
205 } 229 }
206 _UPDATE_1 230 {
207 231 // bcj2_stats[prob - p->probs][1]++;
232 p->range -= bound;
233 p->code -= bound;
234 *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
235 }
208 } 236 }
209 } 237 }
210 } 238 }
211
212 { 239 {
213 UInt32 val; 240 /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
214 unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; 241 // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
242 const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
215 const Byte *cur = p->bufs[cj]; 243 const Byte *cur = p->bufs[cj];
216 Byte *dest; 244 Byte *dest;
217 SizeT rem; 245 SizeT rem;
218
219 if (cur == p->lims[cj]) 246 if (cur == p->lims[cj])
220 { 247 {
221 p->state = cj; 248 p->state = cj;
222 break; 249 break;
223 } 250 }
224 251 v = GetBe32a(cur);
225 val = GetBe32(cur);
226 p->bufs[cj] = cur + 4; 252 p->bufs[cj] = cur + 4;
227 253 {
228 p->ip += 4; 254 const UInt32 ip = p->ip + 4;
229 val -= p->ip; 255 v -= ip;
256 p->ip = ip;
257 }
230 dest = p->dest; 258 dest = p->dest;
231 rem = (SizeT)(p->destLim - dest); 259 rem = (SizeT)(p->destLim - dest);
232
233 if (rem < 4) 260 if (rem < 4)
234 { 261 {
235 p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8; 262 if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
236 p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8; 263 if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
237 p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8; 264 if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
238 p->temp[3] = (Byte)val; 265 p->temp = v;
239 p->dest = dest + rem; 266 p->dest = dest + rem;
240 p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; 267 p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
241 break; 268 break;
242 } 269 }
243 270 SetUi32(dest, v)
244 SetUi32(dest, val); 271 v >>= 24;
245 p->temp[3] = (Byte)(val >> 24);
246 p->dest = dest + 4; 272 p->dest = dest + 4;
247 } 273 }
248 } 274 }
@@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
252 p->range <<= 8; 278 p->range <<= 8;
253 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 279 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
254 } 280 }
255
256 return SZ_OK; 281 return SZ_OK;
257} 282}
283
284#undef NUM_ITERS
285#undef ONE_ITER
286#undef NUM_SHIFT_BITS
287#undef kTopValue
288#undef kNumBitModelTotalBits
289#undef kBitModelTotal
290#undef kNumMoveBits