diff options
Diffstat (limited to '')
-rw-r--r-- | C/Bcj2Enc.c | 559 |
1 files changed, 377 insertions, 182 deletions
diff --git a/C/Bcj2Enc.c b/C/Bcj2Enc.c index 682362a..79460bb 100644 --- a/C/Bcj2Enc.c +++ b/C/Bcj2Enc.c | |||
@@ -1,60 +1,62 @@ | |||
1 | /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) | 1 | /* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2) |
2 | 2021-02-09 : Igor Pavlov : Public domain */ | 2 | 2023-04-02 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | /* #define SHOW_STAT */ | 6 | /* #define SHOW_STAT */ |
7 | |||
8 | #ifdef SHOW_STAT | 7 | #ifdef SHOW_STAT |
9 | #include <stdio.h> | 8 | #include <stdio.h> |
10 | #define PRF(x) x | 9 | #define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src)); |
11 | #else | 10 | #else |
12 | #define PRF(x) | 11 | #define PRF2(s) |
13 | #endif | 12 | #endif |
14 | 13 | ||
15 | #include <string.h> | ||
16 | |||
17 | #include "Bcj2.h" | 14 | #include "Bcj2.h" |
18 | #include "CpuArch.h" | 15 | #include "CpuArch.h" |
19 | 16 | ||
20 | #define CProb UInt16 | ||
21 | |||
22 | #define kTopValue ((UInt32)1 << 24) | 17 | #define kTopValue ((UInt32)1 << 24) |
23 | #define kNumModelBits 11 | 18 | #define kNumBitModelTotalBits 11 |
24 | #define kBitModelTotal (1 << kNumModelBits) | 19 | #define kBitModelTotal (1 << kNumBitModelTotalBits) |
25 | #define kNumMoveBits 5 | 20 | #define kNumMoveBits 5 |
26 | 21 | ||
27 | void Bcj2Enc_Init(CBcj2Enc *p) | 22 | void Bcj2Enc_Init(CBcj2Enc *p) |
28 | { | 23 | { |
29 | unsigned i; | 24 | unsigned i; |
30 | 25 | p->state = BCJ2_ENC_STATE_ORIG; | |
31 | p->state = BCJ2_ENC_STATE_OK; | ||
32 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; | 26 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; |
33 | 27 | p->context = 0; | |
34 | p->prevByte = 0; | 28 | p->flushRem = 5; |
35 | 29 | p->isFlushState = 0; | |
36 | p->cache = 0; | 30 | p->cache = 0; |
37 | p->range = 0xFFFFFFFF; | 31 | p->range = 0xffffffff; |
38 | p->low = 0; | 32 | p->low = 0; |
39 | p->cacheSize = 1; | 33 | p->cacheSize = 1; |
40 | 34 | p->ip64 = 0; | |
41 | p->ip = 0; | 35 | p->fileIp64 = 0; |
42 | 36 | p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED; | |
43 | p->fileIp = 0; | 37 | p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT; |
44 | p->fileSize = 0; | 38 | // p->relatExcludeBits = 0; |
45 | p->relatLimit = BCJ2_RELAT_LIMIT; | ||
46 | |||
47 | p->tempPos = 0; | 39 | p->tempPos = 0; |
48 | |||
49 | p->flushPos = 0; | ||
50 | |||
51 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) | 40 | for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) |
52 | p->probs[i] = kBitModelTotal >> 1; | 41 | p->probs[i] = kBitModelTotal >> 1; |
53 | } | 42 | } |
54 | 43 | ||
55 | static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) | 44 | // Z7_NO_INLINE |
45 | Z7_FORCE_INLINE | ||
46 | static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p) | ||
56 | { | 47 | { |
57 | if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0) | 48 | const UInt32 low = (UInt32)p->low; |
49 | const unsigned high = (unsigned) | ||
50 | #if defined(Z7_MSC_VER_ORIGINAL) \ | ||
51 | && defined(MY_CPU_X86) \ | ||
52 | && defined(MY_CPU_LE) \ | ||
53 | && !defined(MY_CPU_64BIT) | ||
54 | // we try to rid of __aullshr() call in MSVS-x86 | ||
55 | (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only | ||
56 | #else | ||
57 | (p->low >> 32); | ||
58 | #endif | ||
59 | if (low < (UInt32)0xff000000 || high != 0) | ||
58 | { | 60 | { |
59 | Byte *buf = p->bufs[BCJ2_STREAM_RC]; | 61 | Byte *buf = p->bufs[BCJ2_STREAM_RC]; |
60 | do | 62 | do |
@@ -65,247 +67,440 @@ static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) | |||
65 | p->bufs[BCJ2_STREAM_RC] = buf; | 67 | p->bufs[BCJ2_STREAM_RC] = buf; |
66 | return True; | 68 | return True; |
67 | } | 69 | } |
68 | *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32)); | 70 | *buf++ = (Byte)(p->cache + high); |
69 | p->cache = 0xFF; | 71 | p->cache = 0xff; |
70 | } | 72 | } |
71 | while (--p->cacheSize); | 73 | while (--p->cacheSize); |
72 | p->bufs[BCJ2_STREAM_RC] = buf; | 74 | p->bufs[BCJ2_STREAM_RC] = buf; |
73 | p->cache = (Byte)((UInt32)p->low >> 24); | 75 | p->cache = (Byte)(low >> 24); |
74 | } | 76 | } |
75 | p->cacheSize++; | 77 | p->cacheSize++; |
76 | p->low = (UInt32)p->low << 8; | 78 | p->low = low << 8; |
77 | return False; | 79 | return False; |
78 | } | 80 | } |
79 | 81 | ||
80 | static void Bcj2Enc_Encode_2(CBcj2Enc *p) | 82 | |
81 | { | 83 | /* |
82 | if (BCJ2_IS_32BIT_STREAM(p->state)) | 84 | We can use 2 alternative versions of code: |
85 | 1) non-marker version: | ||
86 | Byte CBcj2Enc::context | ||
87 | Byte temp[8]; | ||
88 | Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer. | ||
89 | Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction | ||
90 | with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call. | ||
91 | |||
92 | 2) marker version: | ||
93 | UInt32 CBcj2Enc::context | ||
94 | Byte CBcj2Enc::temp[4]; | ||
95 | MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker. | ||
96 | it's allowed that | ||
97 | one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest, | ||
98 | and another call of Bcj2Enc_Encode_2() does offset conversion. | ||
99 | So different values of (fileIp) and (fileSize) are possible | ||
100 | in these different Bcj2Enc_Encode_2() calls. | ||
101 | |||
102 | Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop. | ||
103 | So we use non-marker version. | ||
104 | */ | ||
105 | |||
106 | /* | ||
107 | Corner cases with overlap in multi-block. | ||
108 | before v23: there was one corner case, where converted instruction | ||
109 | could start in one sub-stream and finish in next sub-stream. | ||
110 | If multi-block (solid) encoding is used, | ||
111 | and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream. | ||
112 | and (0f) is last byte of previous sub-stream | ||
113 | and (8x) is first byte of current sub-stream | ||
114 | then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder. | ||
115 | BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage, | ||
116 | if that offset meets limit requirements. | ||
117 | If encoder allows 32-bit offset conversion for such overlap case, | ||
118 | then the data in 3 uncompressed BCJ2 streams for some sub-stream | ||
119 | can depend from data of previous sub-stream. | ||
120 | That corner case is not big problem, and it's rare case. | ||
121 | Since v23.00 we do additional check to prevent conversions in such overlap cases. | ||
122 | */ | ||
123 | |||
124 | /* | ||
125 | Bcj2Enc_Encode_2() output variables at exit: | ||
83 | { | 126 | { |
84 | Byte *cur = p->bufs[p->state]; | 127 | if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG)) |
85 | if (cur == p->lims[p->state]) | 128 | { |
86 | return; | 129 | it means that encoder needs more input data. |
87 | SetBe32(cur, p->tempTarget); | 130 | if (p->srcLim == p->src) at exit, then |
88 | p->bufs[p->state] = cur + 4; | 131 | { |
132 | (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) | ||
133 | all input data were read and processed, and we are ready for | ||
134 | new input data. | ||
135 | } | ||
136 | else | ||
137 | { | ||
138 | (p->srcLim != p->src) | ||
139 | (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) | ||
140 | The encoder have found e8/e9/0f_8x marker, | ||
141 | and p->src points to last byte of that marker, | ||
142 | Bcj2Enc_Encode_2() needs more input data to get totally | ||
143 | 5 bytes (last byte of marker and 32-bit branch offset) | ||
144 | as continuous array starting from p->src. | ||
145 | (p->srcLim - p->src < 5) requirement is met after exit. | ||
146 | So non-processed resedue from p->src to p->srcLim is always less than 5 bytes. | ||
147 | } | ||
148 | } | ||
89 | } | 149 | } |
150 | */ | ||
90 | 151 | ||
91 | p->state = BCJ2_ENC_STATE_ORIG; | 152 | Z7_NO_INLINE |
92 | 153 | static void Bcj2Enc_Encode_2(CBcj2Enc *p) | |
93 | for (;;) | 154 | { |
155 | if (!p->isFlushState) | ||
94 | { | 156 | { |
95 | if (p->range < kTopValue) | 157 | const Byte *src; |
158 | UInt32 v; | ||
96 | { | 159 | { |
97 | if (RangeEnc_ShiftLow(p)) | 160 | const unsigned state = p->state; |
98 | return; | 161 | if (BCJ2_IS_32BIT_STREAM(state)) |
99 | p->range <<= 8; | 162 | { |
163 | Byte *cur = p->bufs[state]; | ||
164 | if (cur == p->lims[state]) | ||
165 | return; | ||
166 | SetBe32a(cur, p->tempTarget) | ||
167 | p->bufs[state] = cur + 4; | ||
168 | } | ||
100 | } | 169 | } |
170 | p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit | ||
171 | src = p->src; | ||
172 | v = p->context; | ||
173 | |||
174 | // #define WRITE_CONTEXT p->context = v; // for marker version | ||
175 | #define WRITE_CONTEXT p->context = (Byte)v; | ||
176 | #define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT | ||
101 | 177 | ||
178 | for (;;) | ||
102 | { | 179 | { |
180 | // const Byte *src; | ||
181 | // UInt32 v; | ||
182 | CBcj2Enc_ip_unsigned ip; | ||
183 | if (p->range < kTopValue) | ||
184 | { | ||
185 | // to reduce register pressure and code size: we save and restore local variables. | ||
186 | WRITE_CONTEXT_AND_SRC | ||
187 | if (Bcj2_RangeEnc_ShiftLow(p)) | ||
188 | return; | ||
189 | p->range <<= 8; | ||
190 | src = p->src; | ||
191 | v = p->context; | ||
192 | } | ||
193 | // src = p->src; | ||
194 | // #define MARKER_FLAG ((UInt32)1 << 17) | ||
195 | // if ((v & MARKER_FLAG) == 0) // for marker version | ||
103 | { | 196 | { |
104 | const Byte *src = p->src; | ||
105 | const Byte *srcLim; | 197 | const Byte *srcLim; |
106 | Byte *dest; | 198 | Byte *dest = p->bufs[BCJ2_STREAM_MAIN]; |
107 | SizeT num = (SizeT)(p->srcLim - src); | ||
108 | |||
109 | if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) | ||
110 | { | 199 | { |
111 | if (num <= 4) | 200 | const SizeT remSrc = (SizeT)(p->srcLim - src); |
112 | return; | 201 | SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); |
113 | num -= 4; | 202 | if (rem >= remSrc) |
203 | rem = remSrc; | ||
204 | srcLim = src + rem; | ||
114 | } | 205 | } |
115 | else if (num == 0) | 206 | /* p->context contains context of previous byte: |
116 | break; | 207 | bits [0 : 7] : src[-1], if (src) was changed in this call |
117 | 208 | bits [8 : 31] : are undefined for non-marker version | |
118 | dest = p->bufs[BCJ2_STREAM_MAIN]; | 209 | */ |
119 | if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) | 210 | // v = p->context; |
211 | #define NUM_SHIFT_BITS 24 | ||
212 | #define CONV_FLAG ((UInt32)1 << 16) | ||
213 | #define ONE_ITER { \ | ||
214 | b = src[0]; \ | ||
215 | *dest++ = (Byte)b; \ | ||
216 | v = (v << NUM_SHIFT_BITS) | b; \ | ||
217 | if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ | ||
218 | if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ | ||
219 | ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ | ||
220 | src++; if (src == srcLim) { break; } } | ||
221 | |||
222 | if (src != srcLim) | ||
223 | for (;;) | ||
120 | { | 224 | { |
121 | num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); | 225 | /* clang can generate ineffective code with setne instead of two jcc instructions. |
122 | if (num == 0) | 226 | we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */ |
123 | { | 227 | unsigned b; |
124 | p->state = BCJ2_STREAM_MAIN; | 228 | ONE_ITER |
125 | return; | 229 | ONE_ITER |
126 | } | ||
127 | } | 230 | } |
128 | 231 | ||
129 | srcLim = src + num; | 232 | ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]); |
233 | p->bufs[BCJ2_STREAM_MAIN] = dest; | ||
234 | p->ip64 = ip; | ||
130 | 235 | ||
131 | if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80) | 236 | if (src == srcLim) |
132 | *dest = src[0]; | ||
133 | else for (;;) | ||
134 | { | 237 | { |
135 | Byte b = *src; | 238 | WRITE_CONTEXT_AND_SRC |
136 | *dest = b; | 239 | if (src != p->srcLim) |
137 | if (b != 0x0F) | ||
138 | { | 240 | { |
139 | if ((b & 0xFE) == 0xE8) | 241 | p->state = BCJ2_STREAM_MAIN; |
140 | break; | 242 | return; |
141 | dest++; | ||
142 | if (++src != srcLim) | ||
143 | continue; | ||
144 | break; | ||
145 | } | 243 | } |
146 | dest++; | 244 | /* (p->src == p->srcLim) |
147 | if (++src == srcLim) | 245 | (p->state == BCJ2_ENC_STATE_ORIG) */ |
148 | break; | 246 | if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) |
149 | if ((*src & 0xF0) != 0x80) | 247 | return; |
150 | continue; | 248 | /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */ |
151 | *dest = *src; | 249 | // (p->flushRem == 5); |
250 | p->isFlushState = 1; | ||
152 | break; | 251 | break; |
153 | } | 252 | } |
154 | 253 | src++; | |
155 | num = (SizeT)(src - p->src); | 254 | // p->src = src; |
156 | 255 | } | |
157 | if (src == srcLim) | 256 | // ip = p->ip; // for marker version |
158 | { | 257 | /* marker was found */ |
159 | p->prevByte = src[-1]; | 258 | /* (v) contains marker that was found: |
160 | p->bufs[BCJ2_STREAM_MAIN] = dest; | 259 | bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7] |
161 | p->src = src; | 260 | : value of src[-2] : xx/xx/0f |
162 | p->ip += (UInt32)num; | 261 | bits [0 : 7] : value of src[-1] : e8/e9/8x |
163 | continue; | 262 | */ |
164 | } | 263 | { |
165 | |||
166 | { | 264 | { |
167 | Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]); | 265 | #if NUM_SHIFT_BITS != 24 |
168 | BoolInt needConvert; | 266 | v &= ~(UInt32)CONV_FLAG; |
169 | 267 | #endif | |
170 | p->bufs[BCJ2_STREAM_MAIN] = dest + 1; | 268 | // UInt32 relat = 0; |
171 | p->ip += (UInt32)num + 1; | ||
172 | src++; | ||
173 | |||
174 | needConvert = False; | ||
175 | |||
176 | if ((SizeT)(p->srcLim - src) >= 4) | 269 | if ((SizeT)(p->srcLim - src) >= 4) |
177 | { | 270 | { |
178 | UInt32 relatVal = GetUi32(src); | 271 | /* |
179 | if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize) | 272 | if (relat != 0 || (Byte)v != 0xe8) |
180 | && ((relatVal + p->relatLimit) >> 1) < p->relatLimit) | 273 | BoolInt isBigOffset = True; |
181 | needConvert = True; | 274 | */ |
275 | const UInt32 relat = GetUi32(src); | ||
276 | /* | ||
277 | #define EXCLUDE_FLAG ((UInt32)1 << 4) | ||
278 | #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0) | ||
279 | if (p->relatExcludeBits != 0) | ||
280 | { | ||
281 | const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1); | ||
282 | isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0); | ||
283 | } | ||
284 | // isBigOffset = False; // for debug | ||
285 | */ | ||
286 | ip -= p->fileIp64; | ||
287 | // Use the following if check, if (ip) is 64-bit: | ||
288 | if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9) | ||
289 | if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1) | ||
290 | if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit) | ||
291 | v |= CONV_FLAG; | ||
182 | } | 292 | } |
183 | 293 | else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) | |
184 | { | 294 | { |
185 | UInt32 bound; | 295 | // (p->srcLim - src < 4) |
186 | unsigned ttt; | 296 | // /* |
187 | Byte b = src[-1]; | 297 | // for non-marker version |
188 | CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0)); | 298 | p->ip64--; // p->ip = ip - 1; |
189 | 299 | p->bufs[BCJ2_STREAM_MAIN]--; | |
190 | ttt = *prob; | 300 | src--; |
191 | bound = (p->range >> kNumModelBits) * ttt; | 301 | v >>= NUM_SHIFT_BITS; |
192 | 302 | // (0 < p->srcLim - p->src <= 4) | |
193 | if (!needConvert) | 303 | // */ |
304 | // v |= MARKER_FLAG; // for marker version | ||
305 | /* (p->state == BCJ2_ENC_STATE_ORIG) */ | ||
306 | WRITE_CONTEXT_AND_SRC | ||
307 | return; | ||
308 | } | ||
309 | { | ||
310 | const unsigned c = ((v + 0x17) >> 6) & 1; | ||
311 | CBcj2Prob *prob = p->probs + (unsigned) | ||
312 | (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); | ||
313 | /* | ||
314 | ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) : | ||
315 | ((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1)); | ||
316 | */ | ||
317 | const unsigned ttt = *prob; | ||
318 | const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt; | ||
319 | if ((v & CONV_FLAG) == 0) | ||
194 | { | 320 | { |
321 | // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy); | ||
322 | // v = (Byte)v; // for marker version | ||
195 | p->range = bound; | 323 | p->range = bound; |
196 | *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); | 324 | *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); |
197 | p->src = src; | 325 | // WRITE_CONTEXT_AND_SRC |
198 | p->prevByte = b; | ||
199 | continue; | 326 | continue; |
200 | } | 327 | } |
201 | |||
202 | p->low += bound; | 328 | p->low += bound; |
203 | p->range -= bound; | 329 | p->range -= bound; |
204 | *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); | 330 | *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); |
205 | 331 | } | |
332 | // p->context = src[3]; | ||
333 | { | ||
334 | // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP); | ||
335 | const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; | ||
336 | ip = p->ip64; | ||
337 | v = GetUi32(src); // relat | ||
338 | ip += 4; | ||
339 | p->ip64 = ip; | ||
340 | src += 4; | ||
341 | // p->src = src; | ||
206 | { | 342 | { |
207 | UInt32 relatVal = GetUi32(src); | 343 | const UInt32 absol = (UInt32)ip + v; |
208 | UInt32 absVal; | 344 | Byte *cur = p->bufs[cj]; |
209 | p->ip += 4; | 345 | v >>= 24; |
210 | absVal = p->ip + relatVal; | 346 | // WRITE_CONTEXT |
211 | p->prevByte = src[3]; | 347 | if (cur == p->lims[cj]) |
212 | src += 4; | ||
213 | p->src = src; | ||
214 | { | 348 | { |
215 | unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; | 349 | p->state = cj; |
216 | Byte *cur = p->bufs[cj]; | 350 | p->tempTarget = absol; |
217 | if (cur == p->lims[cj]) | 351 | WRITE_CONTEXT_AND_SRC |
218 | { | 352 | return; |
219 | p->state = cj; | ||
220 | p->tempTarget = absVal; | ||
221 | return; | ||
222 | } | ||
223 | SetBe32(cur, absVal); | ||
224 | p->bufs[cj] = cur + 4; | ||
225 | } | 353 | } |
354 | SetBe32a(cur, absol) | ||
355 | p->bufs[cj] = cur + 4; | ||
226 | } | 356 | } |
227 | } | 357 | } |
228 | } | 358 | } |
229 | } | 359 | } |
230 | } | 360 | } // end of loop |
231 | } | 361 | } |
232 | 362 | ||
233 | if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) | 363 | for (; p->flushRem != 0; p->flushRem--) |
234 | return; | 364 | if (Bcj2_RangeEnc_ShiftLow(p)) |
235 | |||
236 | for (; p->flushPos < 5; p->flushPos++) | ||
237 | if (RangeEnc_ShiftLow(p)) | ||
238 | return; | 365 | return; |
239 | p->state = BCJ2_ENC_STATE_OK; | 366 | p->state = BCJ2_ENC_STATE_FINISHED; |
240 | } | 367 | } |
241 | 368 | ||
242 | 369 | ||
370 | /* | ||
371 | BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer. | ||
372 | So base function Bcj2Enc_Encode_2() | ||
373 | in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with | ||
374 | (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim) | ||
375 | Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer. | ||
376 | so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(), | ||
377 | then (p->src == p->srcLim). | ||
378 | And the caller's code is simpler with Bcj2Enc_Encode(). | ||
379 | */ | ||
380 | |||
381 | Z7_NO_INLINE | ||
243 | void Bcj2Enc_Encode(CBcj2Enc *p) | 382 | void Bcj2Enc_Encode(CBcj2Enc *p) |
244 | { | 383 | { |
245 | PRF(printf("\n")); | 384 | PRF2("\n----") |
246 | PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); | ||
247 | |||
248 | if (p->tempPos != 0) | 385 | if (p->tempPos != 0) |
249 | { | 386 | { |
387 | /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */ | ||
250 | unsigned extra = 0; | 388 | unsigned extra = 0; |
251 | 389 | /* We will touch only minimal required number of bytes in input (src) stream. | |
390 | So we will add input bytes from (src) stream to temp[] with step of 1 byte. | ||
391 | We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call | ||
392 | in first loop iteration because | ||
393 | - previous call of Bcj2Enc_Encode() could use another (finishMode), | ||
394 | - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG). | ||
395 | the case with full temp[] buffer (p->tempPos == 4) is possible here. | ||
396 | */ | ||
252 | for (;;) | 397 | for (;;) |
253 | { | 398 | { |
399 | // (0 < p->tempPos <= 5) // in non-marker version | ||
400 | /* p->src : the current src data position including extra bytes | ||
401 | that were copied to temp[] buffer in this call */ | ||
254 | const Byte *src = p->src; | 402 | const Byte *src = p->src; |
255 | const Byte *srcLim = p->srcLim; | 403 | const Byte *srcLim = p->srcLim; |
256 | EBcj2Enc_FinishMode finishMode = p->finishMode; | 404 | const EBcj2Enc_FinishMode finishMode = p->finishMode; |
257 | |||
258 | p->src = p->temp; | ||
259 | p->srcLim = p->temp + p->tempPos; | ||
260 | if (src != srcLim) | 405 | if (src != srcLim) |
406 | { | ||
407 | /* if there are some src data after the data copied to temp[], | ||
408 | then we use MODE_CONTINUE for temp data */ | ||
261 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; | 409 | p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; |
262 | 410 | } | |
263 | PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); | 411 | p->src = p->temp; |
264 | 412 | p->srcLim = p->temp + p->tempPos; | |
413 | PRF2(" ") | ||
265 | Bcj2Enc_Encode_2(p); | 414 | Bcj2Enc_Encode_2(p); |
266 | |||
267 | { | 415 | { |
268 | unsigned num = (unsigned)(p->src - p->temp); | 416 | const unsigned num = (unsigned)(p->src - p->temp); |
269 | unsigned tempPos = p->tempPos - num; | 417 | const unsigned tempPos = p->tempPos - num; |
270 | unsigned i; | 418 | unsigned i; |
271 | p->tempPos = tempPos; | 419 | p->tempPos = tempPos; |
272 | for (i = 0; i < tempPos; i++) | 420 | for (i = 0; i < tempPos; i++) |
273 | p->temp[i] = p->temp[(size_t)i + num]; | 421 | p->temp[i] = p->temp[(SizeT)i + num]; |
274 | 422 | // tempPos : number of bytes in temp buffer | |
275 | p->src = src; | 423 | p->src = src; |
276 | p->srcLim = srcLim; | 424 | p->srcLim = srcLim; |
277 | p->finishMode = finishMode; | 425 | p->finishMode = finishMode; |
278 | 426 | if (p->state != BCJ2_ENC_STATE_ORIG) | |
279 | if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim) | 427 | { |
428 | // (p->tempPos <= 4) // in non-marker version | ||
429 | /* if (the reason of exit from Bcj2Enc_Encode_2() | ||
430 | is not BCJ2_ENC_STATE_ORIG), | ||
431 | then we exit from Bcj2Enc_Encode() with same reason */ | ||
432 | // optional code begin : we rollback (src) and tempPos, if it's possible: | ||
433 | if (extra >= tempPos) | ||
434 | extra = tempPos; | ||
435 | p->src = src - extra; | ||
436 | p->tempPos = tempPos - extra; | ||
437 | // optional code end : rollback of (src) and tempPos | ||
280 | return; | 438 | return; |
281 | 439 | } | |
440 | /* (p->tempPos <= 4) | ||
441 | (p->state == BCJ2_ENC_STATE_ORIG) | ||
442 | so encoder needs more data than in temp[] */ | ||
443 | if (src == srcLim) | ||
444 | return; // src buffer has no more input data. | ||
445 | /* (src != srcLim) | ||
446 | so we can provide more input data from src for Bcj2Enc_Encode_2() */ | ||
282 | if (extra >= tempPos) | 447 | if (extra >= tempPos) |
283 | { | 448 | { |
284 | p->src = src - tempPos; | 449 | /* (extra >= tempPos) means that temp buffer contains |
450 | only data from src buffer of this call. | ||
451 | So now we can encode without temp buffer */ | ||
452 | p->src = src - tempPos; // rollback (src) | ||
285 | p->tempPos = 0; | 453 | p->tempPos = 0; |
286 | break; | 454 | break; |
287 | } | 455 | } |
288 | 456 | // we append one additional extra byte from (src) to temp[] buffer: | |
289 | p->temp[tempPos] = src[0]; | 457 | p->temp[tempPos] = *src; |
290 | p->tempPos = tempPos + 1; | 458 | p->tempPos = tempPos + 1; |
459 | // (0 < p->tempPos <= 5) // in non-marker version | ||
291 | p->src = src + 1; | 460 | p->src = src + 1; |
292 | extra++; | 461 | extra++; |
293 | } | 462 | } |
294 | } | 463 | } |
295 | } | 464 | } |
296 | 465 | ||
297 | PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); | 466 | PRF2("++++") |
298 | 467 | // (p->tempPos == 0) | |
299 | Bcj2Enc_Encode_2(p); | 468 | Bcj2Enc_Encode_2(p); |
469 | PRF2("====") | ||
300 | 470 | ||
301 | if (p->state == BCJ2_ENC_STATE_ORIG) | 471 | if (p->state == BCJ2_ENC_STATE_ORIG) |
302 | { | 472 | { |
303 | const Byte *src = p->src; | 473 | const Byte *src = p->src; |
304 | unsigned rem = (unsigned)(p->srcLim - src); | 474 | const Byte *srcLim = p->srcLim; |
305 | unsigned i; | 475 | const unsigned rem = (unsigned)(srcLim - src); |
306 | for (i = 0; i < rem; i++) | 476 | /* (rem <= 4) here. |
307 | p->temp[i] = src[i]; | 477 | if (p->src != p->srcLim), then |
308 | p->tempPos = rem; | 478 | - we copy non-processed bytes from (p->src) to temp[] buffer, |
309 | p->src = src + rem; | 479 | - we set p->src equal to p->srcLim. |
480 | */ | ||
481 | if (rem) | ||
482 | { | ||
483 | unsigned i = 0; | ||
484 | p->src = srcLim; | ||
485 | p->tempPos = rem; | ||
486 | // (0 < p->tempPos <= 4) | ||
487 | do | ||
488 | p->temp[i] = src[i]; | ||
489 | while (++i != rem); | ||
490 | } | ||
491 | // (p->tempPos <= 4) | ||
492 | // (p->src == p->srcLim) | ||
310 | } | 493 | } |
311 | } | 494 | } |
495 | |||
496 | #undef PRF2 | ||
497 | #undef CONV_FLAG | ||
498 | #undef MARKER_FLAG | ||
499 | #undef WRITE_CONTEXT | ||
500 | #undef WRITE_CONTEXT_AND_SRC | ||
501 | #undef ONE_ITER | ||
502 | #undef NUM_SHIFT_BITS | ||
503 | #undef kTopValue | ||
504 | #undef kNumBitModelTotalBits | ||
505 | #undef kBitModelTotal | ||
506 | #undef kNumMoveBits | ||