diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-06-21 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-12-17 14:59:19 +0500 |
commit | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch) | |
tree | fe5e17420300b715021a76328444088d32047963 /C/Bcj2.c | |
parent | 93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff) | |
download | 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip |
23.0123.01
Diffstat (limited to 'C/Bcj2.c')
-rw-r--r-- | C/Bcj2.c | 319 |
1 files changed, 176 insertions, 143 deletions
@@ -1,29 +1,24 @@ | |||
1 | /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) | 1 | /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) |
2 | 2021-02-09 : Igor Pavlov : Public domain */ | 2 | 2023-03-01 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "Bcj2.h" | 6 | #include "Bcj2.h" |
7 | #include "CpuArch.h" | 7 | #include "CpuArch.h" |
8 | 8 | ||
9 | #define CProb UInt16 | ||
10 | |||
11 | #define kTopValue ((UInt32)1 << 24) | 9 | #define kTopValue ((UInt32)1 << 24) |
12 | #define kNumModelBits 11 | 10 | #define kNumBitModelTotalBits 11 |
13 | #define kBitModelTotal (1 << kNumModelBits) | 11 | #define kBitModelTotal (1 << kNumBitModelTotalBits) |
14 | #define kNumMoveBits 5 | 12 | #define kNumMoveBits 5 |
15 | 13 | ||
16 | #define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound) | 14 | // UInt32 bcj2_stats[256 + 2][2]; |
17 | #define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); | ||
18 | #define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); | ||
19 | 15 | ||
20 | void Bcj2Dec_Init(CBcj2Dec *p) | 16 | void Bcj2Dec_Init(CBcj2Dec *p) |
21 | { | 17 | { |
22 | unsigned i; | 18 | unsigned i; |
23 | 19 | p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; | |
24 | p->state = BCJ2_DEC_STATE_OK; | ||
25 | p->ip = 0; | 20 | p->ip = 0; |
26 | p->temp[3] = 0; | 21 | p->temp = 0; |
27 | p->range = 0; | 22 | p->range = 0; |
28 | p->code = 0; | 23 | p->code = 0; |
29 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) | 24 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) |
@@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p) | |||
32 | 27 | ||
33 | SRes Bcj2Dec_Decode(CBcj2Dec *p) | 28 | SRes Bcj2Dec_Decode(CBcj2Dec *p) |
34 | { | 29 | { |
30 | UInt32 v = p->temp; | ||
31 | // const Byte *src; | ||
35 | if (p->range <= 5) | 32 | if (p->range <= 5) |
36 | { | 33 | { |
37 | p->state = BCJ2_DEC_STATE_OK; | 34 | UInt32 code = p->code; |
35 | p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ | ||
38 | for (; p->range != 5; p->range++) | 36 | for (; p->range != 5; p->range++) |
39 | { | 37 | { |
40 | if (p->range == 1 && p->code != 0) | 38 | if (p->range == 1 && code != 0) |
41 | return SZ_ERROR_DATA; | 39 | return SZ_ERROR_DATA; |
42 | |||
43 | if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) | 40 | if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) |
44 | { | 41 | { |
45 | p->state = BCJ2_STREAM_RC; | 42 | p->state = BCJ2_STREAM_RC; |
46 | return SZ_OK; | 43 | return SZ_OK; |
47 | } | 44 | } |
48 | 45 | code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; | |
49 | p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; | 46 | p->code = code; |
50 | } | 47 | } |
51 | 48 | if (code == 0xffffffff) | |
52 | if (p->code == 0xFFFFFFFF) | ||
53 | return SZ_ERROR_DATA; | 49 | return SZ_ERROR_DATA; |
54 | 50 | p->range = 0xffffffff; | |
55 | p->range = 0xFFFFFFFF; | ||
56 | } | 51 | } |
57 | else if (p->state >= BCJ2_DEC_STATE_ORIG_0) | 52 | // else |
58 | { | 53 | { |
59 | while (p->state <= BCJ2_DEC_STATE_ORIG_3) | 54 | unsigned state = p->state; |
55 | // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop | ||
56 | if (BCJ2_IS_32BIT_STREAM(state)) | ||
60 | { | 57 | { |
61 | Byte *dest = p->dest; | 58 | const Byte *cur = p->bufs[state]; |
62 | if (dest == p->destLim) | 59 | if (cur == p->lims[state]) |
63 | return SZ_OK; | 60 | return SZ_OK; |
64 | *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0]; | 61 | p->bufs[state] = cur + 4; |
65 | p->state++; | 62 | { |
66 | p->dest = dest + 1; | 63 | const UInt32 ip = p->ip + 4; |
64 | v = GetBe32a(cur) - ip; | ||
65 | p->ip = ip; | ||
66 | } | ||
67 | state = BCJ2_DEC_STATE_ORIG_0; | ||
67 | } | 68 | } |
68 | } | 69 | if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) |
69 | |||
70 | /* | ||
71 | if (BCJ2_IS_32BIT_STREAM(p->state)) | ||
72 | { | ||
73 | const Byte *cur = p->bufs[p->state]; | ||
74 | if (cur == p->lims[p->state]) | ||
75 | return SZ_OK; | ||
76 | p->bufs[p->state] = cur + 4; | ||
77 | |||
78 | { | 70 | { |
79 | UInt32 val; | 71 | Byte *dest = p->dest; |
80 | Byte *dest; | 72 | for (;;) |
81 | SizeT rem; | ||
82 | |||
83 | p->ip += 4; | ||
84 | val = GetBe32(cur) - p->ip; | ||
85 | dest = p->dest; | ||
86 | rem = p->destLim - dest; | ||
87 | if (rem < 4) | ||
88 | { | 73 | { |
89 | SizeT i; | 74 | if (dest == p->destLim) |
90 | SetUi32(p->temp, val); | 75 | { |
91 | for (i = 0; i < rem; i++) | 76 | p->state = state; |
92 | dest[i] = p->temp[i]; | 77 | p->temp = v; |
93 | p->dest = dest + rem; | 78 | return SZ_OK; |
94 | p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; | 79 | } |
95 | return SZ_OK; | 80 | *dest++ = (Byte)v; |
81 | p->dest = dest; | ||
82 | if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) | ||
83 | break; | ||
84 | v >>= 8; | ||
96 | } | 85 | } |
97 | SetUi32(dest, val); | ||
98 | p->temp[3] = (Byte)(val >> 24); | ||
99 | p->dest = dest + 4; | ||
100 | p->state = BCJ2_DEC_STATE_OK; | ||
101 | } | 86 | } |
102 | } | 87 | } |
103 | */ | ||
104 | 88 | ||
89 | // src = p->bufs[BCJ2_STREAM_MAIN]; | ||
105 | for (;;) | 90 | for (;;) |
106 | { | 91 | { |
92 | /* | ||
107 | if (BCJ2_IS_32BIT_STREAM(p->state)) | 93 | if (BCJ2_IS_32BIT_STREAM(p->state)) |
108 | p->state = BCJ2_DEC_STATE_OK; | 94 | p->state = BCJ2_DEC_STATE_OK; |
109 | else | 95 | else |
96 | */ | ||
110 | { | 97 | { |
111 | if (p->range < kTopValue) | 98 | if (p->range < kTopValue) |
112 | { | 99 | { |
113 | if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) | 100 | if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) |
114 | { | 101 | { |
115 | p->state = BCJ2_STREAM_RC; | 102 | p->state = BCJ2_STREAM_RC; |
103 | p->temp = v; | ||
116 | return SZ_OK; | 104 | return SZ_OK; |
117 | } | 105 | } |
118 | p->range <<= 8; | 106 | p->range <<= 8; |
119 | p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; | 107 | p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; |
120 | } | 108 | } |
121 | |||
122 | { | 109 | { |
123 | const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; | 110 | const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; |
124 | const Byte *srcLim; | 111 | const Byte *srcLim; |
125 | Byte *dest; | 112 | Byte *dest = p->dest; |
126 | SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); | ||
127 | |||
128 | if (num == 0) | ||
129 | { | 113 | { |
130 | p->state = BCJ2_STREAM_MAIN; | 114 | const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); |
131 | return SZ_OK; | 115 | SizeT num = (SizeT)(p->destLim - dest); |
116 | if (num >= rem) | ||
117 | num = rem; | ||
118 | #define NUM_ITERS 4 | ||
119 | #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 | ||
120 | num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) | ||
121 | #else | ||
122 | num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) | ||
123 | #endif | ||
124 | srcLim = src + num; | ||
132 | } | 125 | } |
133 | 126 | ||
134 | dest = p->dest; | 127 | #define NUM_SHIFT_BITS 24 |
135 | if (num > (SizeT)(p->destLim - dest)) | 128 | #define ONE_ITER(indx) { \ |
129 | const unsigned b = src[indx]; \ | ||
130 | *dest++ = (Byte)b; \ | ||
131 | v = (v << NUM_SHIFT_BITS) | b; \ | ||
132 | if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ | ||
133 | if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ | ||
134 | ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ | ||
135 | /* ++dest */; /* v = b; */ } | ||
136 | |||
137 | if (src != srcLim) | ||
138 | for (;;) | ||
136 | { | 139 | { |
137 | num = (SizeT)(p->destLim - dest); | 140 | /* The dependency chain of 2-cycle for (v) calculation is not big problem here. |
138 | if (num == 0) | 141 | But we can remove dependency chain with v = b in the end of loop. */ |
139 | { | 142 | ONE_ITER(0) |
140 | p->state = BCJ2_DEC_STATE_ORIG; | 143 | #if (NUM_ITERS > 1) |
141 | return SZ_OK; | 144 | ONE_ITER(1) |
142 | } | 145 | #if (NUM_ITERS > 2) |
146 | ONE_ITER(2) | ||
147 | #if (NUM_ITERS > 3) | ||
148 | ONE_ITER(3) | ||
149 | #if (NUM_ITERS > 4) | ||
150 | ONE_ITER(4) | ||
151 | #if (NUM_ITERS > 5) | ||
152 | ONE_ITER(5) | ||
153 | #if (NUM_ITERS > 6) | ||
154 | ONE_ITER(6) | ||
155 | #if (NUM_ITERS > 7) | ||
156 | ONE_ITER(7) | ||
157 | #endif | ||
158 | #endif | ||
159 | #endif | ||
160 | #endif | ||
161 | #endif | ||
162 | #endif | ||
163 | #endif | ||
164 | |||
165 | src += NUM_ITERS; | ||
166 | if (src == srcLim) | ||
167 | break; | ||
143 | } | 168 | } |
144 | |||
145 | srcLim = src + num; | ||
146 | 169 | ||
147 | if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80) | 170 | if (src == srcLim) |
148 | *dest = src[0]; | 171 | #if (NUM_ITERS > 1) |
149 | else for (;;) | 172 | for (;;) |
173 | #endif | ||
150 | { | 174 | { |
151 | Byte b = *src; | 175 | #if (NUM_ITERS > 1) |
152 | *dest = b; | 176 | if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) |
153 | if (b != 0x0F) | 177 | #endif |
154 | { | 178 | { |
155 | if ((b & 0xFE) == 0xE8) | 179 | const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); |
156 | break; | 180 | p->bufs[BCJ2_STREAM_MAIN] = src; |
157 | dest++; | 181 | p->dest = dest; |
158 | if (++src != srcLim) | 182 | p->ip += (UInt32)num; |
159 | continue; | 183 | /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ |
160 | break; | 184 | p->state = |
185 | src == p->lims[BCJ2_STREAM_MAIN] ? | ||
186 | (unsigned)BCJ2_STREAM_MAIN : | ||
187 | (unsigned)BCJ2_DEC_STATE_ORIG; | ||
188 | p->temp = v; | ||
189 | return SZ_OK; | ||
161 | } | 190 | } |
162 | dest++; | 191 | #if (NUM_ITERS > 1) |
163 | if (++src == srcLim) | 192 | ONE_ITER(0) |
164 | break; | 193 | src++; |
165 | if ((*src & 0xF0) != 0x80) | 194 | #endif |
166 | continue; | ||
167 | *dest = *src; | ||
168 | break; | ||
169 | } | 195 | } |
170 | 196 | ||
171 | num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); | ||
172 | |||
173 | if (src == srcLim) | ||
174 | { | 197 | { |
175 | p->temp[3] = src[-1]; | 198 | const SizeT num = (SizeT)(dest - p->dest); |
176 | p->bufs[BCJ2_STREAM_MAIN] = src; | 199 | p->dest = dest; // p->dest += num; |
200 | p->bufs[BCJ2_STREAM_MAIN] += num; // = src; | ||
177 | p->ip += (UInt32)num; | 201 | p->ip += (UInt32)num; |
178 | p->dest += num; | ||
179 | p->state = | ||
180 | p->bufs[BCJ2_STREAM_MAIN] == | ||
181 | p->lims[BCJ2_STREAM_MAIN] ? | ||
182 | (unsigned)BCJ2_STREAM_MAIN : | ||
183 | (unsigned)BCJ2_DEC_STATE_ORIG; | ||
184 | return SZ_OK; | ||
185 | } | 202 | } |
186 | |||
187 | { | 203 | { |
188 | UInt32 bound, ttt; | 204 | UInt32 bound, ttt; |
189 | CProb *prob; | 205 | CBcj2Prob *prob; // unsigned index; |
190 | Byte b = src[0]; | 206 | /* |
191 | Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]); | 207 | prob = p->probs + (unsigned)((Byte)v == 0xe8 ? |
192 | 208 | 2 + (Byte)(v >> 8) : | |
193 | p->temp[3] = b; | 209 | ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); |
194 | p->bufs[BCJ2_STREAM_MAIN] = src + 1; | 210 | */ |
195 | num++; | ||
196 | p->ip += (UInt32)num; | ||
197 | p->dest += num; | ||
198 | |||
199 | prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0)); | ||
200 | |||
201 | _IF_BIT_0 | ||
202 | { | 211 | { |
203 | _UPDATE_0 | 212 | const unsigned c = ((v + 0x17) >> 6) & 1; |
213 | prob = p->probs + (unsigned) | ||
214 | (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); | ||
215 | // (Byte) | ||
216 | // 8x->0 : e9->1 : xxe8->xx+2 | ||
217 | // 8x->0x100 : e9->0x101 : xxe8->xx | ||
218 | // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); | ||
219 | // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); | ||
220 | } | ||
221 | ttt = *prob; | ||
222 | bound = (p->range >> kNumBitModelTotalBits) * ttt; | ||
223 | if (p->code < bound) | ||
224 | { | ||
225 | // bcj2_stats[prob - p->probs][0]++; | ||
226 | p->range = bound; | ||
227 | *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); | ||
204 | continue; | 228 | continue; |
205 | } | 229 | } |
206 | _UPDATE_1 | 230 | { |
207 | 231 | // bcj2_stats[prob - p->probs][1]++; | |
232 | p->range -= bound; | ||
233 | p->code -= bound; | ||
234 | *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); | ||
235 | } | ||
208 | } | 236 | } |
209 | } | 237 | } |
210 | } | 238 | } |
211 | |||
212 | { | 239 | { |
213 | UInt32 val; | 240 | /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ |
214 | unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; | 241 | // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; |
242 | const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; | ||
215 | const Byte *cur = p->bufs[cj]; | 243 | const Byte *cur = p->bufs[cj]; |
216 | Byte *dest; | 244 | Byte *dest; |
217 | SizeT rem; | 245 | SizeT rem; |
218 | |||
219 | if (cur == p->lims[cj]) | 246 | if (cur == p->lims[cj]) |
220 | { | 247 | { |
221 | p->state = cj; | 248 | p->state = cj; |
222 | break; | 249 | break; |
223 | } | 250 | } |
224 | 251 | v = GetBe32a(cur); | |
225 | val = GetBe32(cur); | ||
226 | p->bufs[cj] = cur + 4; | 252 | p->bufs[cj] = cur + 4; |
227 | 253 | { | |
228 | p->ip += 4; | 254 | const UInt32 ip = p->ip + 4; |
229 | val -= p->ip; | 255 | v -= ip; |
256 | p->ip = ip; | ||
257 | } | ||
230 | dest = p->dest; | 258 | dest = p->dest; |
231 | rem = (SizeT)(p->destLim - dest); | 259 | rem = (SizeT)(p->destLim - dest); |
232 | |||
233 | if (rem < 4) | 260 | if (rem < 4) |
234 | { | 261 | { |
235 | p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8; | 262 | if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; |
236 | p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8; | 263 | if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; |
237 | p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8; | 264 | if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} |
238 | p->temp[3] = (Byte)val; | 265 | p->temp = v; |
239 | p->dest = dest + rem; | 266 | p->dest = dest + rem; |
240 | p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; | 267 | p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; |
241 | break; | 268 | break; |
242 | } | 269 | } |
243 | 270 | SetUi32(dest, v) | |
244 | SetUi32(dest, val); | 271 | v >>= 24; |
245 | p->temp[3] = (Byte)(val >> 24); | ||
246 | p->dest = dest + 4; | 272 | p->dest = dest + 4; |
247 | } | 273 | } |
248 | } | 274 | } |
@@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) | |||
252 | p->range <<= 8; | 278 | p->range <<= 8; |
253 | p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; | 279 | p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; |
254 | } | 280 | } |
255 | |||
256 | return SZ_OK; | 281 | return SZ_OK; |
257 | } | 282 | } |
283 | |||
284 | #undef NUM_ITERS | ||
285 | #undef ONE_ITER | ||
286 | #undef NUM_SHIFT_BITS | ||
287 | #undef kTopValue | ||
288 | #undef kNumBitModelTotalBits | ||
289 | #undef kBitModelTotal | ||
290 | #undef kNumMoveBits | ||