diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-06-21 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-12-17 14:59:19 +0500 |
commit | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch) | |
tree | fe5e17420300b715021a76328444088d32047963 /C/Bcj2.h | |
parent | 93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff) | |
download | 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip |
23.0123.01
Diffstat (limited to 'C/Bcj2.h')
-rw-r--r-- | C/Bcj2.h | 270 |
1 files changed, 228 insertions, 42 deletions
@@ -1,8 +1,8 @@ | |||
1 | /* Bcj2.h -- BCJ2 Converter for x86 code | 1 | /* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2) |
2 | 2014-11-10 : Igor Pavlov : Public domain */ | 2 | 2023-03-02 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #ifndef __BCJ2_H | 4 | #ifndef ZIP7_INC_BCJ2_H |
5 | #define __BCJ2_H | 5 | #define ZIP7_INC_BCJ2_H |
6 | 6 | ||
7 | #include "7zTypes.h" | 7 | #include "7zTypes.h" |
8 | 8 | ||
@@ -26,37 +26,68 @@ enum | |||
26 | BCJ2_DEC_STATE_ORIG_3, | 26 | BCJ2_DEC_STATE_ORIG_3, |
27 | 27 | ||
28 | BCJ2_DEC_STATE_ORIG, | 28 | BCJ2_DEC_STATE_ORIG, |
29 | BCJ2_DEC_STATE_OK | 29 | BCJ2_DEC_STATE_ERROR /* after detected data error */ |
30 | }; | 30 | }; |
31 | 31 | ||
32 | enum | 32 | enum |
33 | { | 33 | { |
34 | BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, | 34 | BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, |
35 | BCJ2_ENC_STATE_OK | 35 | BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */ |
36 | }; | 36 | }; |
37 | 37 | ||
38 | 38 | ||
39 | #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) | 39 | /* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */ |
40 | #define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2) | ||
40 | 41 | ||
41 | /* | 42 | /* |
42 | CBcj2Dec / CBcj2Enc | 43 | CBcj2Dec / CBcj2Enc |
43 | bufs sizes: | 44 | bufs sizes: |
44 | BUF_SIZE(n) = lims[n] - bufs[n] | 45 | BUF_SIZE(n) = lims[n] - bufs[n] |
45 | bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4: | 46 | bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4: |
46 | (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 | 47 | (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 |
47 | (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 | 48 | (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 |
48 | */ | 49 | */ |
49 | 50 | ||
51 | // typedef UInt32 CBcj2Prob; | ||
52 | typedef UInt16 CBcj2Prob; | ||
53 | |||
54 | /* | ||
55 | BCJ2 encoder / decoder internal requirements: | ||
56 | - If last bytes of stream contain marker (e8/e8/0f8x), then | ||
57 | there is also encoded symbol (0 : no conversion) in RC stream. | ||
58 | - One case of overlapped instructions is supported, | ||
59 | if last byte of converted instruction is (0f) and next byte is (8x): | ||
60 | marker [xx xx xx 0f] 8x | ||
61 | then the pair (0f 8x) is treated as marker. | ||
62 | */ | ||
63 | |||
64 | /* ---------- BCJ2 Decoder ---------- */ | ||
65 | |||
50 | /* | 66 | /* |
51 | CBcj2Dec: | 67 | CBcj2Dec: |
52 | dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: | 68 | (dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: |
53 | bufs[BCJ2_STREAM_MAIN] >= dest && | 69 | bufs[BCJ2_STREAM_MAIN] >= dest && |
54 | bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv + | 70 | bufs[BCJ2_STREAM_MAIN] - dest >= |
55 | BUF_SIZE(BCJ2_STREAM_CALL) + | 71 | BUF_SIZE(BCJ2_STREAM_CALL) + |
56 | BUF_SIZE(BCJ2_STREAM_JUMP) | 72 | BUF_SIZE(BCJ2_STREAM_JUMP) |
57 | tempReserv = 0 : for first call of Bcj2Dec_Decode | 73 | reserve = bufs[BCJ2_STREAM_MAIN] - dest - |
58 | tempReserv = 4 : for any other calls of Bcj2Dec_Decode | 74 | ( BUF_SIZE(BCJ2_STREAM_CALL) + |
59 | overlap with offset = 1 is not allowed | 75 | BUF_SIZE(BCJ2_STREAM_JUMP) ) |
76 | and additional conditions: | ||
77 | if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init()) | ||
78 | { | ||
79 | (reserve != 1) : if (ver < v23.00) | ||
80 | } | ||
81 | else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init()) | ||
82 | { | ||
83 | (reserve >= 6) : if (ver < v23.00) | ||
84 | (reserve >= 4) : if (ver >= v23.00) | ||
85 | We need that (reserve) because after first call of Bcj2Dec_Decode(), | ||
86 | CBcj2Dec::temp can contain up to 4 bytes for writing to (dest). | ||
87 | } | ||
88 | (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode(). | ||
89 | (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers, | ||
90 | and (reserve) is calculated from full (final) sizes of all streams before first call. | ||
60 | */ | 91 | */ |
61 | 92 | ||
62 | typedef struct | 93 | typedef struct |
@@ -68,21 +99,65 @@ typedef struct | |||
68 | 99 | ||
69 | unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ | 100 | unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ |
70 | 101 | ||
71 | UInt32 ip; | 102 | UInt32 ip; /* property of starting base for decoding */ |
72 | Byte temp[4]; | 103 | UInt32 temp; /* Byte temp[4]; */ |
73 | UInt32 range; | 104 | UInt32 range; |
74 | UInt32 code; | 105 | UInt32 code; |
75 | UInt16 probs[2 + 256]; | 106 | CBcj2Prob probs[2 + 256]; |
76 | } CBcj2Dec; | 107 | } CBcj2Dec; |
77 | 108 | ||
109 | |||
110 | /* Note: | ||
111 | Bcj2Dec_Init() sets (CBcj2Dec::ip = 0) | ||
112 | if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init() | ||
113 | */ | ||
78 | void Bcj2Dec_Init(CBcj2Dec *p); | 114 | void Bcj2Dec_Init(CBcj2Dec *p); |
79 | 115 | ||
80 | /* Returns: SZ_OK or SZ_ERROR_DATA */ | 116 | |
117 | /* Bcj2Dec_Decode(): | ||
118 | returns: | ||
119 | SZ_OK | ||
120 | SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct | ||
121 | */ | ||
81 | SRes Bcj2Dec_Decode(CBcj2Dec *p); | 122 | SRes Bcj2Dec_Decode(CBcj2Dec *p); |
82 | 123 | ||
83 | #define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0) | 124 | /* To check that decoding was finished you can compare |
125 | sizes of processed streams with sizes known from another sources. | ||
126 | You must do at least one mandatory check from the two following options: | ||
127 | - the check for size of processed output (ORIG) stream. | ||
128 | - the check for size of processed input (MAIN) stream. | ||
129 | additional optional checks: | ||
130 | - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC) | ||
131 | - the checks Bcj2Dec_IsMaybeFinished*() | ||
132 | also before actual decoding you can check that the | ||
133 | following condition is met for stream sizes: | ||
134 | ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) ) | ||
135 | */ | ||
84 | 136 | ||
137 | /* (state == BCJ2_STREAM_MAIN) means that decoder is ready for | ||
138 | additional input data in BCJ2_STREAM_MAIN stream. | ||
139 | Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding. | ||
140 | */ | ||
141 | #define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN) | ||
85 | 142 | ||
143 | /* if the stream decoding was finished correctly, then range decoder | ||
144 | part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0). | ||
145 | Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding. | ||
146 | */ | ||
147 | #define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0) | ||
148 | |||
149 | /* use Bcj2Dec_IsMaybeFinished() only as additional check | ||
150 | after at least one mandatory check from the two following options: | ||
151 | - the check for size of processed output (ORIG) stream. | ||
152 | - the check for size of processed input (MAIN) stream. | ||
153 | */ | ||
154 | #define Bcj2Dec_IsMaybeFinished(_p_) ( \ | ||
155 | Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \ | ||
156 | Bcj2Dec_IsMaybeFinished_code(_p_)) | ||
157 | |||
158 | |||
159 | |||
160 | /* ---------- BCJ2 Encoder ---------- */ | ||
86 | 161 | ||
87 | typedef enum | 162 | typedef enum |
88 | { | 163 | { |
@@ -91,6 +166,91 @@ typedef enum | |||
91 | BCJ2_ENC_FINISH_MODE_END_STREAM | 166 | BCJ2_ENC_FINISH_MODE_END_STREAM |
92 | } EBcj2Enc_FinishMode; | 167 | } EBcj2Enc_FinishMode; |
93 | 168 | ||
169 | /* | ||
170 | BCJ2_ENC_FINISH_MODE_CONTINUE: | ||
171 | process non finished encoding. | ||
172 | It notifies the encoder that additional further calls | ||
173 | can provide more input data (src) than provided by current call. | ||
174 | In that case the CBcj2Enc encoder still can move (src) pointer | ||
175 | up to (srcLim), but CBcj2Enc encoder can store some of the last | ||
176 | processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer. | ||
177 | at return: | ||
178 | (CBcj2Enc::src will point to position that includes | ||
179 | processed data and data copied to (temp[]) buffer) | ||
180 | That data from (temp[]) buffer will be used in further calls. | ||
181 | |||
182 | BCJ2_ENC_FINISH_MODE_END_BLOCK: | ||
183 | finish encoding of current block (ended at srcLim) without RC flushing. | ||
184 | at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) && | ||
185 | CBcj2Enc::src == CBcj2Enc::srcLim) | ||
186 | : it shows that block encoding was finished. And the encoder is | ||
187 | ready for new (src) data or for stream finish operation. | ||
188 | finished block means | ||
189 | { | ||
190 | CBcj2Enc has completed block encoding up to (srcLim). | ||
191 | (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will | ||
192 | not cross block boundary at (srcLim). | ||
193 | temporary CBcj2Enc buffer for (ORIG) src data is empty. | ||
194 | 3 output uncompressed streams (MAIN, CALL, JUMP) were flushed. | ||
195 | RC stream was not flushed. And RC stream will cross block boundary. | ||
196 | } | ||
197 | Note: some possible implementation of BCJ2 encoder could | ||
198 | write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(), | ||
199 | and it could calculate symbol for RC in another call of Bcj2Enc_Encode(). | ||
200 | BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol. | ||
201 | And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls. | ||
202 | So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK | ||
203 | to ensure that RC symbol is calculated and written in proper block. | ||
204 | |||
205 | BCJ2_ENC_FINISH_MODE_END_STREAM | ||
206 | finish encoding of stream (ended at srcLim) fully including RC flushing. | ||
207 | at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED) | ||
208 | : it shows that stream encoding was finished fully, | ||
209 | and all output streams were flushed fully. | ||
210 | also Bcj2Enc_IsFinished() can be called. | ||
211 | */ | ||
212 | |||
213 | |||
214 | /* | ||
215 | 32-bit relative offset in JUMP/CALL commands is | ||
216 | - (mod 4 GiB) for 32-bit x86 code | ||
217 | - signed Int32 for 64-bit x86-64 code | ||
218 | BCJ2 encoder also does internal relative to absolute address conversions. | ||
219 | And there are 2 possible ways to do it: | ||
220 | before v23: we used 32-bit variables and (mod 4 GiB) conversion | ||
221 | since v23: we use 64-bit variables and (signed Int32 offset) conversion. | ||
222 | The absolute address condition for conversion in v23: | ||
223 | ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64) | ||
224 | note that if (fileSize64 > 2 GiB). there is difference between | ||
225 | old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23). | ||
226 | And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases. | ||
227 | */ | ||
228 | |||
229 | /* | ||
230 | // for old (v22) way for conversion: | ||
231 | typedef UInt32 CBcj2Enc_ip_unsigned; | ||
232 | typedef Int32 CBcj2Enc_ip_signed; | ||
233 | #define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31) | ||
234 | */ | ||
235 | typedef UInt64 CBcj2Enc_ip_unsigned; | ||
236 | typedef Int64 CBcj2Enc_ip_signed; | ||
237 | |||
238 | /* maximum size of file that can be used for conversion condition */ | ||
239 | #define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2) | ||
240 | |||
241 | /* default value of fileSize64_minus1 variable that means | ||
242 | that absolute address limitation will not be used */ | ||
243 | #define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1) | ||
244 | |||
245 | /* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */ | ||
246 | #define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \ | ||
247 | ((CBcj2Enc_ip_unsigned)(fileSize) - 1) | ||
248 | |||
249 | /* set CBcj2Enc::fileSize64_minus1 variable from size of file */ | ||
250 | #define Bcj2Enc_SET_FileSize(p, fileSize) \ | ||
251 | (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize); | ||
252 | |||
253 | |||
94 | typedef struct | 254 | typedef struct |
95 | { | 255 | { |
96 | Byte *bufs[BCJ2_NUM_STREAMS]; | 256 | Byte *bufs[BCJ2_NUM_STREAMS]; |
@@ -101,45 +261,71 @@ typedef struct | |||
101 | unsigned state; | 261 | unsigned state; |
102 | EBcj2Enc_FinishMode finishMode; | 262 | EBcj2Enc_FinishMode finishMode; |
103 | 263 | ||
104 | Byte prevByte; | 264 | Byte context; |
265 | Byte flushRem; | ||
266 | Byte isFlushState; | ||
105 | 267 | ||
106 | Byte cache; | 268 | Byte cache; |
107 | UInt32 range; | 269 | UInt32 range; |
108 | UInt64 low; | 270 | UInt64 low; |
109 | UInt64 cacheSize; | 271 | UInt64 cacheSize; |
272 | |||
273 | // UInt32 context; // for marker version, it can include marker flag. | ||
110 | 274 | ||
111 | UInt32 ip; | 275 | /* (ip64) and (fileIp64) correspond to virtual source stream position |
112 | 276 | that doesn't include data in temp[] */ | |
113 | /* 32-bit ralative offset in JUMP/CALL commands is | 277 | CBcj2Enc_ip_unsigned ip64; /* current (ip) position */ |
114 | - (mod 4 GB) in 32-bit mode | 278 | CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */ |
115 | - signed Int32 in 64-bit mode | 279 | CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */ |
116 | We use (mod 4 GB) check for fileSize. | 280 | UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */ |
117 | Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */ | 281 | // UInt32 relatExcludeBits; |
118 | UInt32 fileIp; | ||
119 | UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */ | ||
120 | UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */ | ||
121 | 282 | ||
122 | UInt32 tempTarget; | 283 | UInt32 tempTarget; |
123 | unsigned tempPos; | 284 | unsigned tempPos; /* the number of bytes that were copied to temp[] buffer |
124 | Byte temp[4 * 2]; | 285 | (tempPos <= 4) outside of Bcj2Enc_Encode() */ |
125 | 286 | // Byte temp[4]; // for marker version | |
126 | unsigned flushPos; | 287 | Byte temp[8]; |
127 | 288 | CBcj2Prob probs[2 + 256]; | |
128 | UInt16 probs[2 + 256]; | ||
129 | } CBcj2Enc; | 289 | } CBcj2Enc; |
130 | 290 | ||
131 | void Bcj2Enc_Init(CBcj2Enc *p); | 291 | void Bcj2Enc_Init(CBcj2Enc *p); |
132 | void Bcj2Enc_Encode(CBcj2Enc *p); | ||
133 | 292 | ||
134 | #define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos) | ||
135 | #define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5) | ||
136 | 293 | ||
294 | /* | ||
295 | Bcj2Enc_Encode(): at exit: | ||
296 | p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream | ||
297 | (bufs[p->State] == lims[p->State]) | ||
298 | p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream | ||
299 | (src == srcLim) | ||
300 | p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream | ||
301 | */ | ||
302 | void Bcj2Enc_Encode(CBcj2Enc *p); | ||
137 | 303 | ||
138 | #define BCJ2_RELAT_LIMIT_NUM_BITS 26 | 304 | /* Bcj2Enc encoder can look ahead for up 4 bytes of source stream. |
139 | #define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS) | 305 | CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer. |
306 | (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after | ||
307 | fully processed data and after data copied to temp buffer. | ||
308 | So if the caller needs to get real number of fully processed input | ||
309 | bytes (without look ahead data in temp buffer), | ||
310 | the caller must subtruct (CBcj2Enc::tempPos) value from processed size | ||
311 | value that is calculated based on current (CBcj2Enc::src): | ||
312 | cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) - | ||
313 | Bcj2Enc_Get_AvailInputSize_in_Temp(&enc); | ||
314 | */ | ||
315 | /* get the size of input data that was stored in temp[] buffer: */ | ||
316 | #define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos) | ||
140 | 317 | ||
141 | /* limit for CBcj2Enc::fileSize variable */ | 318 | #define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0) |
142 | #define BCJ2_FileSize_MAX ((UInt32)1 << 31) | 319 | |
320 | /* Note : the decoder supports overlapping of marker (0f 80). | ||
321 | But we can eliminate such overlapping cases by setting | ||
322 | the limit for relative offset conversion as | ||
323 | CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB) | ||
324 | */ | ||
325 | /* default value for CBcj2Enc::relatLimit */ | ||
326 | #define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24) | ||
327 | #define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31) | ||
328 | // #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5 | ||
143 | 329 | ||
144 | EXTERN_C_END | 330 | EXTERN_C_END |
145 | 331 | ||