aboutsummaryrefslogtreecommitdiff
path: root/C/Bcj2Enc.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Bcj2Enc.c')
-rw-r--r--C/Bcj2Enc.c559
1 files changed, 377 insertions, 182 deletions
diff --git a/C/Bcj2Enc.c b/C/Bcj2Enc.c
index 682362a..79460bb 100644
--- a/C/Bcj2Enc.c
+++ b/C/Bcj2Enc.c
@@ -1,60 +1,62 @@
1/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) 1/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2)
22021-02-09 : Igor Pavlov : Public domain */ 22023-04-02 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6/* #define SHOW_STAT */ 6/* #define SHOW_STAT */
7
8#ifdef SHOW_STAT 7#ifdef SHOW_STAT
9#include <stdio.h> 8#include <stdio.h>
10#define PRF(x) x 9#define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src));
11#else 10#else
12#define PRF(x) 11#define PRF2(s)
13#endif 12#endif
14 13
15#include <string.h>
16
17#include "Bcj2.h" 14#include "Bcj2.h"
18#include "CpuArch.h" 15#include "CpuArch.h"
19 16
20#define CProb UInt16
21
22#define kTopValue ((UInt32)1 << 24) 17#define kTopValue ((UInt32)1 << 24)
23#define kNumModelBits 11 18#define kNumBitModelTotalBits 11
24#define kBitModelTotal (1 << kNumModelBits) 19#define kBitModelTotal (1 << kNumBitModelTotalBits)
25#define kNumMoveBits 5 20#define kNumMoveBits 5
26 21
27void Bcj2Enc_Init(CBcj2Enc *p) 22void Bcj2Enc_Init(CBcj2Enc *p)
28{ 23{
29 unsigned i; 24 unsigned i;
30 25 p->state = BCJ2_ENC_STATE_ORIG;
31 p->state = BCJ2_ENC_STATE_OK;
32 p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; 26 p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
33 27 p->context = 0;
34 p->prevByte = 0; 28 p->flushRem = 5;
35 29 p->isFlushState = 0;
36 p->cache = 0; 30 p->cache = 0;
37 p->range = 0xFFFFFFFF; 31 p->range = 0xffffffff;
38 p->low = 0; 32 p->low = 0;
39 p->cacheSize = 1; 33 p->cacheSize = 1;
40 34 p->ip64 = 0;
41 p->ip = 0; 35 p->fileIp64 = 0;
42 36 p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED;
43 p->fileIp = 0; 37 p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT;
44 p->fileSize = 0; 38 // p->relatExcludeBits = 0;
45 p->relatLimit = BCJ2_RELAT_LIMIT;
46
47 p->tempPos = 0; 39 p->tempPos = 0;
48
49 p->flushPos = 0;
50
51 for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) 40 for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
52 p->probs[i] = kBitModelTotal >> 1; 41 p->probs[i] = kBitModelTotal >> 1;
53} 42}
54 43
55static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) 44// Z7_NO_INLINE
45Z7_FORCE_INLINE
46static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p)
56{ 47{
57 if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0) 48 const UInt32 low = (UInt32)p->low;
49 const unsigned high = (unsigned)
50 #if defined(Z7_MSC_VER_ORIGINAL) \
51 && defined(MY_CPU_X86) \
52 && defined(MY_CPU_LE) \
53 && !defined(MY_CPU_64BIT)
54 // we try to rid of __aullshr() call in MSVS-x86
55 (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only
56 #else
57 (p->low >> 32);
58 #endif
59 if (low < (UInt32)0xff000000 || high != 0)
58 { 60 {
59 Byte *buf = p->bufs[BCJ2_STREAM_RC]; 61 Byte *buf = p->bufs[BCJ2_STREAM_RC];
60 do 62 do
@@ -65,247 +67,440 @@ static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
65 p->bufs[BCJ2_STREAM_RC] = buf; 67 p->bufs[BCJ2_STREAM_RC] = buf;
66 return True; 68 return True;
67 } 69 }
68 *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32)); 70 *buf++ = (Byte)(p->cache + high);
69 p->cache = 0xFF; 71 p->cache = 0xff;
70 } 72 }
71 while (--p->cacheSize); 73 while (--p->cacheSize);
72 p->bufs[BCJ2_STREAM_RC] = buf; 74 p->bufs[BCJ2_STREAM_RC] = buf;
73 p->cache = (Byte)((UInt32)p->low >> 24); 75 p->cache = (Byte)(low >> 24);
74 } 76 }
75 p->cacheSize++; 77 p->cacheSize++;
76 p->low = (UInt32)p->low << 8; 78 p->low = low << 8;
77 return False; 79 return False;
78} 80}
79 81
80static void Bcj2Enc_Encode_2(CBcj2Enc *p) 82
81{ 83/*
82 if (BCJ2_IS_32BIT_STREAM(p->state)) 84We can use 2 alternative versions of code:
851) non-marker version:
86 Byte CBcj2Enc::context
87 Byte temp[8];
88 Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer.
89 Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction
90 with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call.
91
922) marker version:
93 UInt32 CBcj2Enc::context
94 Byte CBcj2Enc::temp[4];
95 MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker.
96 it's allowed that
97 one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest,
98 and another call of Bcj2Enc_Encode_2() does offset conversion.
99 So different values of (fileIp) and (fileSize) are possible
100 in these different Bcj2Enc_Encode_2() calls.
101
102Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop.
103So we use non-marker version.
104*/
105
106/*
107 Corner cases with overlap in multi-block.
108 before v23: there was one corner case, where converted instruction
109 could start in one sub-stream and finish in next sub-stream.
110 If multi-block (solid) encoding is used,
111 and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream.
112 and (0f) is last byte of previous sub-stream
113 and (8x) is first byte of current sub-stream
114 then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder.
115 BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage,
116 if that offset meets limit requirements.
117 If encoder allows 32-bit offset conversion for such overlap case,
118 then the data in 3 uncompressed BCJ2 streams for some sub-stream
119 can depend from data of previous sub-stream.
120 That corner case is not big problem, and it's rare case.
121 Since v23.00 we do additional check to prevent conversions in such overlap cases.
122*/
123
124/*
125 Bcj2Enc_Encode_2() output variables at exit:
83 { 126 {
84 Byte *cur = p->bufs[p->state]; 127 if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG))
85 if (cur == p->lims[p->state]) 128 {
86 return; 129 it means that encoder needs more input data.
87 SetBe32(cur, p->tempTarget); 130 if (p->srcLim == p->src) at exit, then
88 p->bufs[p->state] = cur + 4; 131 {
132 (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
133 all input data were read and processed, and we are ready for
134 new input data.
135 }
136 else
137 {
138 (p->srcLim != p->src)
139 (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
140 The encoder have found e8/e9/0f_8x marker,
141 and p->src points to last byte of that marker,
142 Bcj2Enc_Encode_2() needs more input data to get totally
143 5 bytes (last byte of marker and 32-bit branch offset)
144 as continuous array starting from p->src.
145 (p->srcLim - p->src < 5) requirement is met after exit.
146 So non-processed resedue from p->src to p->srcLim is always less than 5 bytes.
147 }
148 }
89 } 149 }
150*/
90 151
91 p->state = BCJ2_ENC_STATE_ORIG; 152Z7_NO_INLINE
92 153static void Bcj2Enc_Encode_2(CBcj2Enc *p)
93 for (;;) 154{
155 if (!p->isFlushState)
94 { 156 {
95 if (p->range < kTopValue) 157 const Byte *src;
158 UInt32 v;
96 { 159 {
97 if (RangeEnc_ShiftLow(p)) 160 const unsigned state = p->state;
98 return; 161 if (BCJ2_IS_32BIT_STREAM(state))
99 p->range <<= 8; 162 {
163 Byte *cur = p->bufs[state];
164 if (cur == p->lims[state])
165 return;
166 SetBe32a(cur, p->tempTarget)
167 p->bufs[state] = cur + 4;
168 }
100 } 169 }
170 p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit
171 src = p->src;
172 v = p->context;
173
174 // #define WRITE_CONTEXT p->context = v; // for marker version
175 #define WRITE_CONTEXT p->context = (Byte)v;
176 #define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT
101 177
178 for (;;)
102 { 179 {
180 // const Byte *src;
181 // UInt32 v;
182 CBcj2Enc_ip_unsigned ip;
183 if (p->range < kTopValue)
184 {
185 // to reduce register pressure and code size: we save and restore local variables.
186 WRITE_CONTEXT_AND_SRC
187 if (Bcj2_RangeEnc_ShiftLow(p))
188 return;
189 p->range <<= 8;
190 src = p->src;
191 v = p->context;
192 }
193 // src = p->src;
194 // #define MARKER_FLAG ((UInt32)1 << 17)
195 // if ((v & MARKER_FLAG) == 0) // for marker version
103 { 196 {
104 const Byte *src = p->src;
105 const Byte *srcLim; 197 const Byte *srcLim;
106 Byte *dest; 198 Byte *dest = p->bufs[BCJ2_STREAM_MAIN];
107 SizeT num = (SizeT)(p->srcLim - src);
108
109 if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
110 { 199 {
111 if (num <= 4) 200 const SizeT remSrc = (SizeT)(p->srcLim - src);
112 return; 201 SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
113 num -= 4; 202 if (rem >= remSrc)
203 rem = remSrc;
204 srcLim = src + rem;
114 } 205 }
115 else if (num == 0) 206 /* p->context contains context of previous byte:
116 break; 207 bits [0 : 7] : src[-1], if (src) was changed in this call
117 208 bits [8 : 31] : are undefined for non-marker version
118 dest = p->bufs[BCJ2_STREAM_MAIN]; 209 */
119 if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) 210 // v = p->context;
211 #define NUM_SHIFT_BITS 24
212 #define CONV_FLAG ((UInt32)1 << 16)
213 #define ONE_ITER { \
214 b = src[0]; \
215 *dest++ = (Byte)b; \
216 v = (v << NUM_SHIFT_BITS) | b; \
217 if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
218 if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
219 ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
220 src++; if (src == srcLim) { break; } }
221
222 if (src != srcLim)
223 for (;;)
120 { 224 {
121 num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); 225 /* clang can generate ineffective code with setne instead of two jcc instructions.
122 if (num == 0) 226 we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */
123 { 227 unsigned b;
124 p->state = BCJ2_STREAM_MAIN; 228 ONE_ITER
125 return; 229 ONE_ITER
126 }
127 } 230 }
128 231
129 srcLim = src + num; 232 ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]);
233 p->bufs[BCJ2_STREAM_MAIN] = dest;
234 p->ip64 = ip;
130 235
131 if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80) 236 if (src == srcLim)
132 *dest = src[0];
133 else for (;;)
134 { 237 {
135 Byte b = *src; 238 WRITE_CONTEXT_AND_SRC
136 *dest = b; 239 if (src != p->srcLim)
137 if (b != 0x0F)
138 { 240 {
139 if ((b & 0xFE) == 0xE8) 241 p->state = BCJ2_STREAM_MAIN;
140 break; 242 return;
141 dest++;
142 if (++src != srcLim)
143 continue;
144 break;
145 } 243 }
146 dest++; 244 /* (p->src == p->srcLim)
147 if (++src == srcLim) 245 (p->state == BCJ2_ENC_STATE_ORIG) */
148 break; 246 if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
149 if ((*src & 0xF0) != 0x80) 247 return;
150 continue; 248 /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */
151 *dest = *src; 249 // (p->flushRem == 5);
250 p->isFlushState = 1;
152 break; 251 break;
153 } 252 }
154 253 src++;
155 num = (SizeT)(src - p->src); 254 // p->src = src;
156 255 }
157 if (src == srcLim) 256 // ip = p->ip; // for marker version
158 { 257 /* marker was found */
159 p->prevByte = src[-1]; 258 /* (v) contains marker that was found:
160 p->bufs[BCJ2_STREAM_MAIN] = dest; 259 bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7]
161 p->src = src; 260 : value of src[-2] : xx/xx/0f
162 p->ip += (UInt32)num; 261 bits [0 : 7] : value of src[-1] : e8/e9/8x
163 continue; 262 */
164 } 263 {
165
166 { 264 {
167 Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]); 265 #if NUM_SHIFT_BITS != 24
168 BoolInt needConvert; 266 v &= ~(UInt32)CONV_FLAG;
169 267 #endif
170 p->bufs[BCJ2_STREAM_MAIN] = dest + 1; 268 // UInt32 relat = 0;
171 p->ip += (UInt32)num + 1;
172 src++;
173
174 needConvert = False;
175
176 if ((SizeT)(p->srcLim - src) >= 4) 269 if ((SizeT)(p->srcLim - src) >= 4)
177 { 270 {
178 UInt32 relatVal = GetUi32(src); 271 /*
179 if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize) 272 if (relat != 0 || (Byte)v != 0xe8)
180 && ((relatVal + p->relatLimit) >> 1) < p->relatLimit) 273 BoolInt isBigOffset = True;
181 needConvert = True; 274 */
275 const UInt32 relat = GetUi32(src);
276 /*
277 #define EXCLUDE_FLAG ((UInt32)1 << 4)
278 #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0)
279 if (p->relatExcludeBits != 0)
280 {
281 const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1);
282 isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0);
283 }
284 // isBigOffset = False; // for debug
285 */
286 ip -= p->fileIp64;
287 // Use the following if check, if (ip) is 64-bit:
288 if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9)
289 if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1)
290 if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit)
291 v |= CONV_FLAG;
182 } 292 }
183 293 else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
184 { 294 {
185 UInt32 bound; 295 // (p->srcLim - src < 4)
186 unsigned ttt; 296 // /*
187 Byte b = src[-1]; 297 // for non-marker version
188 CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0)); 298 p->ip64--; // p->ip = ip - 1;
189 299 p->bufs[BCJ2_STREAM_MAIN]--;
190 ttt = *prob; 300 src--;
191 bound = (p->range >> kNumModelBits) * ttt; 301 v >>= NUM_SHIFT_BITS;
192 302 // (0 < p->srcLim - p->src <= 4)
193 if (!needConvert) 303 // */
304 // v |= MARKER_FLAG; // for marker version
305 /* (p->state == BCJ2_ENC_STATE_ORIG) */
306 WRITE_CONTEXT_AND_SRC
307 return;
308 }
309 {
310 const unsigned c = ((v + 0x17) >> 6) & 1;
311 CBcj2Prob *prob = p->probs + (unsigned)
312 (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
313 /*
314 ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) :
315 ((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1));
316 */
317 const unsigned ttt = *prob;
318 const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt;
319 if ((v & CONV_FLAG) == 0)
194 { 320 {
321 // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy);
322 // v = (Byte)v; // for marker version
195 p->range = bound; 323 p->range = bound;
196 *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); 324 *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
197 p->src = src; 325 // WRITE_CONTEXT_AND_SRC
198 p->prevByte = b;
199 continue; 326 continue;
200 } 327 }
201
202 p->low += bound; 328 p->low += bound;
203 p->range -= bound; 329 p->range -= bound;
204 *prob = (CProb)(ttt - (ttt >> kNumMoveBits)); 330 *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
205 331 }
332 // p->context = src[3];
333 {
334 // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP);
335 const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
336 ip = p->ip64;
337 v = GetUi32(src); // relat
338 ip += 4;
339 p->ip64 = ip;
340 src += 4;
341 // p->src = src;
206 { 342 {
207 UInt32 relatVal = GetUi32(src); 343 const UInt32 absol = (UInt32)ip + v;
208 UInt32 absVal; 344 Byte *cur = p->bufs[cj];
209 p->ip += 4; 345 v >>= 24;
210 absVal = p->ip + relatVal; 346 // WRITE_CONTEXT
211 p->prevByte = src[3]; 347 if (cur == p->lims[cj])
212 src += 4;
213 p->src = src;
214 { 348 {
215 unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; 349 p->state = cj;
216 Byte *cur = p->bufs[cj]; 350 p->tempTarget = absol;
217 if (cur == p->lims[cj]) 351 WRITE_CONTEXT_AND_SRC
218 { 352 return;
219 p->state = cj;
220 p->tempTarget = absVal;
221 return;
222 }
223 SetBe32(cur, absVal);
224 p->bufs[cj] = cur + 4;
225 } 353 }
354 SetBe32a(cur, absol)
355 p->bufs[cj] = cur + 4;
226 } 356 }
227 } 357 }
228 } 358 }
229 } 359 }
230 } 360 } // end of loop
231 } 361 }
232 362
233 if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) 363 for (; p->flushRem != 0; p->flushRem--)
234 return; 364 if (Bcj2_RangeEnc_ShiftLow(p))
235
236 for (; p->flushPos < 5; p->flushPos++)
237 if (RangeEnc_ShiftLow(p))
238 return; 365 return;
239 p->state = BCJ2_ENC_STATE_OK; 366 p->state = BCJ2_ENC_STATE_FINISHED;
240} 367}
241 368
242 369
370/*
371BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer.
372So base function Bcj2Enc_Encode_2()
373 in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with
374 (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim)
375Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer.
376 so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(),
377 then (p->src == p->srcLim).
378 And the caller's code is simpler with Bcj2Enc_Encode().
379*/
380
381Z7_NO_INLINE
243void Bcj2Enc_Encode(CBcj2Enc *p) 382void Bcj2Enc_Encode(CBcj2Enc *p)
244{ 383{
245 PRF(printf("\n")); 384 PRF2("\n----")
246 PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
247
248 if (p->tempPos != 0) 385 if (p->tempPos != 0)
249 { 386 {
387 /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */
250 unsigned extra = 0; 388 unsigned extra = 0;
251 389 /* We will touch only minimal required number of bytes in input (src) stream.
390 So we will add input bytes from (src) stream to temp[] with step of 1 byte.
391 We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call
392 in first loop iteration because
393 - previous call of Bcj2Enc_Encode() could use another (finishMode),
394 - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG).
395 the case with full temp[] buffer (p->tempPos == 4) is possible here.
396 */
252 for (;;) 397 for (;;)
253 { 398 {
399 // (0 < p->tempPos <= 5) // in non-marker version
400 /* p->src : the current src data position including extra bytes
401 that were copied to temp[] buffer in this call */
254 const Byte *src = p->src; 402 const Byte *src = p->src;
255 const Byte *srcLim = p->srcLim; 403 const Byte *srcLim = p->srcLim;
256 EBcj2Enc_FinishMode finishMode = p->finishMode; 404 const EBcj2Enc_FinishMode finishMode = p->finishMode;
257
258 p->src = p->temp;
259 p->srcLim = p->temp + p->tempPos;
260 if (src != srcLim) 405 if (src != srcLim)
406 {
407 /* if there are some src data after the data copied to temp[],
408 then we use MODE_CONTINUE for temp data */
261 p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; 409 p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
262 410 }
263 PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); 411 p->src = p->temp;
264 412 p->srcLim = p->temp + p->tempPos;
413 PRF2(" ")
265 Bcj2Enc_Encode_2(p); 414 Bcj2Enc_Encode_2(p);
266
267 { 415 {
268 unsigned num = (unsigned)(p->src - p->temp); 416 const unsigned num = (unsigned)(p->src - p->temp);
269 unsigned tempPos = p->tempPos - num; 417 const unsigned tempPos = p->tempPos - num;
270 unsigned i; 418 unsigned i;
271 p->tempPos = tempPos; 419 p->tempPos = tempPos;
272 for (i = 0; i < tempPos; i++) 420 for (i = 0; i < tempPos; i++)
273 p->temp[i] = p->temp[(size_t)i + num]; 421 p->temp[i] = p->temp[(SizeT)i + num];
274 422 // tempPos : number of bytes in temp buffer
275 p->src = src; 423 p->src = src;
276 p->srcLim = srcLim; 424 p->srcLim = srcLim;
277 p->finishMode = finishMode; 425 p->finishMode = finishMode;
278 426 if (p->state != BCJ2_ENC_STATE_ORIG)
279 if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim) 427 {
428 // (p->tempPos <= 4) // in non-marker version
429 /* if (the reason of exit from Bcj2Enc_Encode_2()
430 is not BCJ2_ENC_STATE_ORIG),
431 then we exit from Bcj2Enc_Encode() with same reason */
432 // optional code begin : we rollback (src) and tempPos, if it's possible:
433 if (extra >= tempPos)
434 extra = tempPos;
435 p->src = src - extra;
436 p->tempPos = tempPos - extra;
437 // optional code end : rollback of (src) and tempPos
280 return; 438 return;
281 439 }
440 /* (p->tempPos <= 4)
441 (p->state == BCJ2_ENC_STATE_ORIG)
442 so encoder needs more data than in temp[] */
443 if (src == srcLim)
444 return; // src buffer has no more input data.
445 /* (src != srcLim)
446 so we can provide more input data from src for Bcj2Enc_Encode_2() */
282 if (extra >= tempPos) 447 if (extra >= tempPos)
283 { 448 {
284 p->src = src - tempPos; 449 /* (extra >= tempPos) means that temp buffer contains
450 only data from src buffer of this call.
451 So now we can encode without temp buffer */
452 p->src = src - tempPos; // rollback (src)
285 p->tempPos = 0; 453 p->tempPos = 0;
286 break; 454 break;
287 } 455 }
288 456 // we append one additional extra byte from (src) to temp[] buffer:
289 p->temp[tempPos] = src[0]; 457 p->temp[tempPos] = *src;
290 p->tempPos = tempPos + 1; 458 p->tempPos = tempPos + 1;
459 // (0 < p->tempPos <= 5) // in non-marker version
291 p->src = src + 1; 460 p->src = src + 1;
292 extra++; 461 extra++;
293 } 462 }
294 } 463 }
295 } 464 }
296 465
297 PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); 466 PRF2("++++")
298 467 // (p->tempPos == 0)
299 Bcj2Enc_Encode_2(p); 468 Bcj2Enc_Encode_2(p);
469 PRF2("====")
300 470
301 if (p->state == BCJ2_ENC_STATE_ORIG) 471 if (p->state == BCJ2_ENC_STATE_ORIG)
302 { 472 {
303 const Byte *src = p->src; 473 const Byte *src = p->src;
304 unsigned rem = (unsigned)(p->srcLim - src); 474 const Byte *srcLim = p->srcLim;
305 unsigned i; 475 const unsigned rem = (unsigned)(srcLim - src);
306 for (i = 0; i < rem; i++) 476 /* (rem <= 4) here.
307 p->temp[i] = src[i]; 477 if (p->src != p->srcLim), then
308 p->tempPos = rem; 478 - we copy non-processed bytes from (p->src) to temp[] buffer,
309 p->src = src + rem; 479 - we set p->src equal to p->srcLim.
480 */
481 if (rem)
482 {
483 unsigned i = 0;
484 p->src = srcLim;
485 p->tempPos = rem;
486 // (0 < p->tempPos <= 4)
487 do
488 p->temp[i] = src[i];
489 while (++i != rem);
490 }
491 // (p->tempPos <= 4)
492 // (p->src == p->srcLim)
310 } 493 }
311} 494}
495
496#undef PRF2
497#undef CONV_FLAG
498#undef MARKER_FLAG
499#undef WRITE_CONTEXT
500#undef WRITE_CONTEXT_AND_SRC
501#undef ONE_ITER
502#undef NUM_SHIFT_BITS
503#undef kTopValue
504#undef kNumBitModelTotalBits
505#undef kBitModelTotal
506#undef kNumMoveBits