diff options
Diffstat (limited to 'C/7zCrcOpt.c')
-rw-r--r-- | C/7zCrcOpt.c | 244 |
1 files changed, 163 insertions, 81 deletions
diff --git a/C/7zCrcOpt.c b/C/7zCrcOpt.c index 9c64929..9408017 100644 --- a/C/7zCrcOpt.c +++ b/C/7zCrcOpt.c | |||
@@ -1,117 +1,199 @@ | |||
1 | /* 7zCrcOpt.c -- CRC32 calculation | 1 | /* 7zCrcOpt.c -- CRC32 calculation (optimized functions) |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | 2023-12-07 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "CpuArch.h" | 6 | #include "CpuArch.h" |
7 | 7 | ||
8 | #if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1 | ||
9 | |||
10 | // for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu | ||
11 | // #define Z7_CRC_DEBUG_BE | ||
12 | #ifdef Z7_CRC_DEBUG_BE | ||
13 | #undef MY_CPU_LE | ||
14 | #define MY_CPU_BE | ||
15 | #endif | ||
16 | |||
17 | // the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c | ||
18 | #ifdef Z7_CRC_NUM_TABLES | ||
19 | #define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES | ||
20 | #else | ||
21 | #define Z7_CRC_NUM_TABLES_USE 12 | ||
22 | #endif | ||
23 | |||
24 | #if Z7_CRC_NUM_TABLES_USE % 4 || \ | ||
25 | Z7_CRC_NUM_TABLES_USE < 4 * 1 || \ | ||
26 | Z7_CRC_NUM_TABLES_USE > 4 * 6 | ||
27 | #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES | ||
28 | #endif | ||
29 | |||
30 | |||
8 | #ifndef MY_CPU_BE | 31 | #ifndef MY_CPU_BE |
9 | 32 | ||
10 | #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) | 33 | #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) |
11 | 34 | ||
12 | UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); | 35 | #define Q(n, d) \ |
13 | UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) | 36 | ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \ |
14 | { | 37 | ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ |
15 | const Byte *p = (const Byte *)data; | 38 | ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ |
16 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) | 39 | ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) |
17 | v = CRC_UPDATE_BYTE_2(v, *p); | 40 | |
18 | for (; size >= 4; size -= 4, p += 4) | 41 | #define R(a) *((const UInt32 *)(const void *)p + (a)) |
19 | { | 42 | |
20 | v ^= *(const UInt32 *)(const void *)p; | 43 | #define CRC_FUNC_PRE_LE2(step) \ |
21 | v = | 44 | UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) |
22 | (table + 0x300)[((v ) & 0xFF)] | ||
23 | ^ (table + 0x200)[((v >> 8) & 0xFF)] | ||
24 | ^ (table + 0x100)[((v >> 16) & 0xFF)] | ||
25 | ^ (table + 0x000)[((v >> 24))]; | ||
26 | } | ||
27 | for (; size > 0; size--, p++) | ||
28 | v = CRC_UPDATE_BYTE_2(v, *p); | ||
29 | return v; | ||
30 | } | ||
31 | 45 | ||
32 | UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); | 46 | #define CRC_FUNC_PRE_LE(step) \ |
33 | UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) | 47 | CRC_FUNC_PRE_LE2(step); \ |
48 | CRC_FUNC_PRE_LE2(step) | ||
49 | |||
50 | CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE) | ||
34 | { | 51 | { |
35 | const Byte *p = (const Byte *)data; | 52 | const Byte *p = (const Byte *)data; |
36 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) | 53 | const Byte *lim; |
54 | for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) | ||
37 | v = CRC_UPDATE_BYTE_2(v, *p); | 55 | v = CRC_UPDATE_BYTE_2(v, *p); |
38 | for (; size >= 8; size -= 8, p += 8) | 56 | lim = p + size; |
57 | if (size >= Z7_CRC_NUM_TABLES_USE) | ||
39 | { | 58 | { |
40 | UInt32 d; | 59 | lim -= Z7_CRC_NUM_TABLES_USE; |
41 | v ^= *(const UInt32 *)(const void *)p; | 60 | do |
42 | v = | 61 | { |
43 | (table + 0x700)[((v ) & 0xFF)] | 62 | v ^= R(0); |
44 | ^ (table + 0x600)[((v >> 8) & 0xFF)] | 63 | { |
45 | ^ (table + 0x500)[((v >> 16) & 0xFF)] | 64 | #if Z7_CRC_NUM_TABLES_USE == 1 * 4 |
46 | ^ (table + 0x400)[((v >> 24))]; | 65 | v = Q(0, v); |
47 | d = *((const UInt32 *)(const void *)p + 1); | 66 | #else |
48 | v ^= | 67 | #define U2(r, op) \ |
49 | (table + 0x300)[((d ) & 0xFF)] | 68 | { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } |
50 | ^ (table + 0x200)[((d >> 8) & 0xFF)] | 69 | UInt32 d, x; |
51 | ^ (table + 0x100)[((d >> 16) & 0xFF)] | 70 | U2(1, =) |
52 | ^ (table + 0x000)[((d >> 24))]; | 71 | #if Z7_CRC_NUM_TABLES_USE >= 3 * 4 |
72 | #define U(r) U2(r, ^=) | ||
73 | U(2) | ||
74 | #if Z7_CRC_NUM_TABLES_USE >= 4 * 4 | ||
75 | U(3) | ||
76 | #if Z7_CRC_NUM_TABLES_USE >= 5 * 4 | ||
77 | U(4) | ||
78 | #if Z7_CRC_NUM_TABLES_USE >= 6 * 4 | ||
79 | U(5) | ||
80 | #if Z7_CRC_NUM_TABLES_USE >= 7 * 4 | ||
81 | #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES | ||
82 | #endif | ||
83 | #endif | ||
84 | #endif | ||
85 | #endif | ||
86 | #endif | ||
87 | #undef U | ||
88 | #undef U2 | ||
89 | v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); | ||
90 | #endif | ||
91 | } | ||
92 | p += Z7_CRC_NUM_TABLES_USE; | ||
93 | } | ||
94 | while (p <= lim); | ||
95 | lim += Z7_CRC_NUM_TABLES_USE; | ||
53 | } | 96 | } |
54 | for (; size > 0; size--, p++) | 97 | for (; p < lim; p++) |
55 | v = CRC_UPDATE_BYTE_2(v, *p); | 98 | v = CRC_UPDATE_BYTE_2(v, *p); |
56 | return v; | 99 | return v; |
57 | } | 100 | } |
58 | 101 | ||
102 | #undef CRC_UPDATE_BYTE_2 | ||
103 | #undef R | ||
104 | #undef Q | ||
105 | #undef CRC_FUNC_PRE_LE | ||
106 | #undef CRC_FUNC_PRE_LE2 | ||
107 | |||
59 | #endif | 108 | #endif |
60 | 109 | ||
61 | 110 | ||
111 | |||
112 | |||
62 | #ifndef MY_CPU_LE | 113 | #ifndef MY_CPU_LE |
63 | 114 | ||
64 | #define CRC_UINT32_SWAP(v) Z7_BSWAP32(v) | 115 | #define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8)) |
65 | 116 | ||
66 | #define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8)) | 117 | #define Q(n, d) \ |
118 | ( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \ | ||
119 | ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ | ||
120 | ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ | ||
121 | ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) | ||
67 | 122 | ||
68 | UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) | 123 | #ifdef Z7_CRC_DEBUG_BE |
69 | { | 124 | #define R(a) GetBe32a((const UInt32 *)(const void *)p + (a)) |
70 | const Byte *p = (const Byte *)data; | 125 | #else |
71 | table += 0x100; | 126 | #define R(a) *((const UInt32 *)(const void *)p + (a)) |
72 | v = CRC_UINT32_SWAP(v); | 127 | #endif |
73 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) | 128 | |
74 | v = CRC_UPDATE_BYTE_2_BE(v, *p); | 129 | |
75 | for (; size >= 4; size -= 4, p += 4) | 130 | #define CRC_FUNC_PRE_BE2(step) \ |
76 | { | 131 | UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) |
77 | v ^= *(const UInt32 *)(const void *)p; | ||
78 | v = | ||
79 | (table + 0x000)[((v ) & 0xFF)] | ||
80 | ^ (table + 0x100)[((v >> 8) & 0xFF)] | ||
81 | ^ (table + 0x200)[((v >> 16) & 0xFF)] | ||
82 | ^ (table + 0x300)[((v >> 24))]; | ||
83 | } | ||
84 | for (; size > 0; size--, p++) | ||
85 | v = CRC_UPDATE_BYTE_2_BE(v, *p); | ||
86 | return CRC_UINT32_SWAP(v); | ||
87 | } | ||
88 | 132 | ||
89 | UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) | 133 | #define CRC_FUNC_PRE_BE(step) \ |
134 | CRC_FUNC_PRE_BE2(step); \ | ||
135 | CRC_FUNC_PRE_BE2(step) | ||
136 | |||
137 | CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE) | ||
90 | { | 138 | { |
91 | const Byte *p = (const Byte *)data; | 139 | const Byte *p = (const Byte *)data; |
140 | const Byte *lim; | ||
92 | table += 0x100; | 141 | table += 0x100; |
93 | v = CRC_UINT32_SWAP(v); | 142 | v = Z7_BSWAP32(v); |
94 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) | 143 | for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) |
95 | v = CRC_UPDATE_BYTE_2_BE(v, *p); | 144 | v = CRC_UPDATE_BYTE_2_BE(v, *p); |
96 | for (; size >= 8; size -= 8, p += 8) | 145 | lim = p + size; |
146 | if (size >= Z7_CRC_NUM_TABLES_USE) | ||
97 | { | 147 | { |
98 | UInt32 d; | 148 | lim -= Z7_CRC_NUM_TABLES_USE; |
99 | v ^= *(const UInt32 *)(const void *)p; | 149 | do |
100 | v = | 150 | { |
101 | (table + 0x400)[((v ) & 0xFF)] | 151 | v ^= R(0); |
102 | ^ (table + 0x500)[((v >> 8) & 0xFF)] | 152 | { |
103 | ^ (table + 0x600)[((v >> 16) & 0xFF)] | 153 | #if Z7_CRC_NUM_TABLES_USE == 1 * 4 |
104 | ^ (table + 0x700)[((v >> 24))]; | 154 | v = Q(0, v); |
105 | d = *((const UInt32 *)(const void *)p + 1); | 155 | #else |
106 | v ^= | 156 | #define U2(r, op) \ |
107 | (table + 0x000)[((d ) & 0xFF)] | 157 | { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } |
108 | ^ (table + 0x100)[((d >> 8) & 0xFF)] | 158 | UInt32 d, x; |
109 | ^ (table + 0x200)[((d >> 16) & 0xFF)] | 159 | U2(1, =) |
110 | ^ (table + 0x300)[((d >> 24))]; | 160 | #if Z7_CRC_NUM_TABLES_USE >= 3 * 4 |
161 | #define U(r) U2(r, ^=) | ||
162 | U(2) | ||
163 | #if Z7_CRC_NUM_TABLES_USE >= 4 * 4 | ||
164 | U(3) | ||
165 | #if Z7_CRC_NUM_TABLES_USE >= 5 * 4 | ||
166 | U(4) | ||
167 | #if Z7_CRC_NUM_TABLES_USE >= 6 * 4 | ||
168 | U(5) | ||
169 | #if Z7_CRC_NUM_TABLES_USE >= 7 * 4 | ||
170 | #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES | ||
171 | #endif | ||
172 | #endif | ||
173 | #endif | ||
174 | #endif | ||
175 | #endif | ||
176 | #undef U | ||
177 | #undef U2 | ||
178 | v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); | ||
179 | #endif | ||
180 | } | ||
181 | p += Z7_CRC_NUM_TABLES_USE; | ||
182 | } | ||
183 | while (p <= lim); | ||
184 | lim += Z7_CRC_NUM_TABLES_USE; | ||
111 | } | 185 | } |
112 | for (; size > 0; size--, p++) | 186 | for (; p < lim; p++) |
113 | v = CRC_UPDATE_BYTE_2_BE(v, *p); | 187 | v = CRC_UPDATE_BYTE_2_BE(v, *p); |
114 | return CRC_UINT32_SWAP(v); | 188 | return Z7_BSWAP32(v); |
115 | } | 189 | } |
116 | 190 | ||
191 | #undef CRC_UPDATE_BYTE_2_BE | ||
192 | #undef R | ||
193 | #undef Q | ||
194 | #undef CRC_FUNC_PRE_BE | ||
195 | #undef CRC_FUNC_PRE_BE2 | ||
196 | |||
197 | #endif | ||
198 | #undef Z7_CRC_NUM_TABLES_USE | ||
117 | #endif | 199 | #endif |