aboutsummaryrefslogtreecommitdiff
path: root/C/XzCrc64Opt.c
blob: 0c1fc2ffecb89ebe834823eeda226a9ae3366171 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2023-12-08 : Igor Pavlov : Public domain */

#include "Precomp.h"

#include "CpuArch.h"

#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1

// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC64_DEBUG_BE
#ifdef Z7_CRC64_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif

#if defined(MY_CPU_64BIT)
#define Z7_CRC64_USE_64BIT
#endif

// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE  Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE  12
#endif

#if Z7_CRC64_NUM_TABLES_USE % 4 || \
    Z7_CRC64_NUM_TABLES_USE < 4 || \
    Z7_CRC64_NUM_TABLES_USE > 4 * 4
  #error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif


#ifndef MY_CPU_BE

#define CRC64_UPDATE_BYTE_2(crc, b)  (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))

#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)

#define Q64LE(n, d) \
    ( (table + ((n) * 8 + 7) * 0x100)[((d)         ) & 0xFF] \
    ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )

#define R64(a)  *((const UInt64 *)(const void *)p + (a))

#else

#define Q32LE(n, d) \
    ( (table + ((n) * 4 + 3) * 0x100)[((d)         ) & 0xFF] \
    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
    ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )

#define R32(a)  *((const UInt32 *)(const void *)p + (a))

#endif


#define CRC64_FUNC_PRE_LE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)

#define CRC64_FUNC_PRE_LE(step)   \
        CRC64_FUNC_PRE_LE2(step); \
        CRC64_FUNC_PRE_LE2(step)

CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
{
  const Byte *p = (const Byte *)data;
  const Byte *lim;
  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
    v = CRC64_UPDATE_BYTE_2(v, *p);
  lim = p + size;
  if (size >= Z7_CRC64_NUM_TABLES_USE)
  {
    lim -= Z7_CRC64_NUM_TABLES_USE;
    do
    {
#if Z7_CRC64_NUM_TABLES_USE == 4
      const UInt32 d = (UInt32)v ^ R32(0);
      v = (v >> 32) ^ Q32LE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
      v ^= R64(0);
      v = Q64LE(0, v);
#else
      UInt32 v0, v1;
      v0 = (UInt32)v         ^ R32(0);
      v1 = (UInt32)(v >> 32) ^ R32(1);
      v = Q32LE(1, v0) ^ Q32LE(0, v1);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 12
      UInt32 w;
      UInt32 v0, v1;
      v0 = (UInt32)v         ^ R32(0);
      v1 = (UInt32)(v >> 32) ^ R32(1);
      w = R32(2);
      v = Q32LE(0, w);
      v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
      UInt64 w;
      UInt64 x;
      w  = R64(1);      x = Q64LE(0, w);
      v ^= R64(0);  v = x ^ Q64LE(1, v);
#else
      UInt32 v0, v1;
      UInt32 r0, r1;
      v0 = (UInt32)v         ^ R32(0);
      v1 = (UInt32)(v >> 32) ^ R32(1);
      r0 =                     R32(2);
      r1 =                     R32(3);
      v  = Q32LE(1, r0) ^ Q32LE(0, r1);
      v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
#endif
#else
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
      p += Z7_CRC64_NUM_TABLES_USE;
    }
    while (p <= lim);
    lim += Z7_CRC64_NUM_TABLES_USE;
  }
  for (; p < lim; p++)
    v = CRC64_UPDATE_BYTE_2(v, *p);
  return v;
}

#undef CRC64_UPDATE_BYTE_2
#undef R32
#undef R64
#undef Q32LE
#undef Q64LE
#undef CRC64_FUNC_PRE_LE
#undef CRC64_FUNC_PRE_LE2

#endif




#ifndef MY_CPU_LE

#define CRC64_UPDATE_BYTE_2_BE(crc, b)  (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))

#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)

#define Q64BE(n, d) \
    ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
    ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
    ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )

#ifdef Z7_CRC64_DEBUG_BE
  #define R64BE(a)  GetBe64a((const UInt64 *)(const void *)p + (a))
#else
  #define R64BE(a)         *((const UInt64 *)(const void *)p + (a))
#endif

#else

#define Q32BE(n, d) \
    ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
    ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )

#ifdef Z7_CRC64_DEBUG_BE
  #define R32BE(a)  GetBe32a((const UInt32 *)(const void *)p + (a))
#else
  #define R32BE(a)         *((const UInt32 *)(const void *)p + (a))
#endif

#endif

#define CRC64_FUNC_PRE_BE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)

#define CRC64_FUNC_PRE_BE(step)   \
        CRC64_FUNC_PRE_BE2(step); \
        CRC64_FUNC_PRE_BE2(step)

CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
{
  const Byte *p = (const Byte *)data;
  const Byte *lim;
  v = Z7_BSWAP64(v);
  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
    v = CRC64_UPDATE_BYTE_2_BE(v, *p);
  lim = p + size;
  if (size >= Z7_CRC64_NUM_TABLES_USE)
  {
    lim -= Z7_CRC64_NUM_TABLES_USE;
    do
    {
#if   Z7_CRC64_NUM_TABLES_USE == 4
      const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
      v = (v << 32) ^ Q32BE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 12
      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
      const UInt32 w =                      R32BE(2);
      v  = Q32BE(0, w);
      v ^= Q32BE(2, d1) ^ Q32BE(1, d0);

#elif Z7_CRC64_NUM_TABLES_USE == 8
  #ifdef Z7_CRC64_USE_64BIT
      v ^= R64BE(0);
      v  = Q64BE(0, v);
  #else
      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
      v = Q32BE(1, d1) ^ Q32BE(0, d0);
  #endif
#elif Z7_CRC64_NUM_TABLES_USE == 16
  #ifdef Z7_CRC64_USE_64BIT
      const UInt64 w = R64BE(1);
      v ^= R64BE(0);
      v  = Q64BE(0, w) ^ Q64BE(1, v);
  #else
      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
      const UInt32 w1 =                     R32BE(2);
      const UInt32 w0 =                     R32BE(3);
      v  = Q32BE(1, w1) ^ Q32BE(0, w0);
      v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
  #endif
#elif
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
      p += Z7_CRC64_NUM_TABLES_USE;
    }
    while (p <= lim);
    lim += Z7_CRC64_NUM_TABLES_USE;
  }
  for (; p < lim; p++)
    v = CRC64_UPDATE_BYTE_2_BE(v, *p);
  return Z7_BSWAP64(v);
}

#undef CRC64_UPDATE_BYTE_2_BE
#undef R32BE
#undef R64BE
#undef Q32BE
#undef Q64BE
#undef CRC64_FUNC_PRE_BE
#undef CRC64_FUNC_PRE_BE2

#endif
#undef Z7_CRC64_NUM_TABLES_USE
#endif