diff options
author | jsing <> | 2016-09-04 14:06:46 +0000 |
---|---|---|
committer | jsing <> | 2016-09-04 14:06:46 +0000 |
commit | 392813b7d9ed86b80127b58bc6e108cc28530eca (patch) | |
tree | 8494faa8d6a64a635803db2bcff9d555fe5adcae /src/lib/libcrypto/md5/asm | |
parent | becd55246777151f47f161f226165d6bbae02434 (diff) | |
download | openbsd-392813b7d9ed86b80127b58bc6e108cc28530eca.tar.gz openbsd-392813b7d9ed86b80127b58bc6e108cc28530eca.tar.bz2 openbsd-392813b7d9ed86b80127b58bc6e108cc28530eca.zip |
Less IA64.
ok deraadt@
Diffstat (limited to 'src/lib/libcrypto/md5/asm')
-rw-r--r-- | src/lib/libcrypto/md5/asm/md5-ia64.S | 992 |
1 files changed, 0 insertions, 992 deletions
diff --git a/src/lib/libcrypto/md5/asm/md5-ia64.S b/src/lib/libcrypto/md5/asm/md5-ia64.S deleted file mode 100644 index e7de08d46a..0000000000 --- a/src/lib/libcrypto/md5/asm/md5-ia64.S +++ /dev/null | |||
@@ -1,992 +0,0 @@ | |||
1 | /* Copyright (c) 2005 Hewlett-Packard Development Company, L.P. | ||
2 | |||
3 | Permission is hereby granted, free of charge, to any person obtaining | ||
4 | a copy of this software and associated documentation files (the | ||
5 | "Software"), to deal in the Software without restriction, including | ||
6 | without limitation the rights to use, copy, modify, merge, publish, | ||
7 | distribute, sublicense, and/or sell copies of the Software, and to | ||
8 | permit persons to whom the Software is furnished to do so, subject to | ||
9 | the following conditions: | ||
10 | |||
11 | The above copyright notice and this permission notice shall be | ||
12 | included in all copies or substantial portions of the Software. | ||
13 | |||
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | ||
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ | ||
21 | |||
22 | // Common registers are assigned as follows: | ||
23 | // | ||
24 | // COMMON | ||
25 | // | ||
26 | // t0 Const Tbl Ptr TPtr | ||
27 | // t1 Round Constant TRound | ||
28 | // t4 Block residual LenResid | ||
29 | // t5 Residual Data DTmp | ||
30 | // | ||
31 | // {in,out}0 Block 0 Cycle RotateM0 | ||
32 | // {in,out}1 Block Value 12 M12 | ||
33 | // {in,out}2 Block Value 8 M8 | ||
34 | // {in,out}3 Block Value 4 M4 | ||
35 | // {in,out}4 Block Value 0 M0 | ||
36 | // {in,out}5 Block 1 Cycle RotateM1 | ||
37 | // {in,out}6 Block Value 13 M13 | ||
38 | // {in,out}7 Block Value 9 M9 | ||
39 | // {in,out}8 Block Value 5 M5 | ||
40 | // {in,out}9 Block Value 1 M1 | ||
41 | // {in,out}10 Block 2 Cycle RotateM2 | ||
42 | // {in,out}11 Block Value 14 M14 | ||
43 | // {in,out}12 Block Value 10 M10 | ||
44 | // {in,out}13 Block Value 6 M6 | ||
45 | // {in,out}14 Block Value 2 M2 | ||
46 | // {in,out}15 Block 3 Cycle RotateM3 | ||
47 | // {in,out}16 Block Value 15 M15 | ||
48 | // {in,out}17 Block Value 11 M11 | ||
49 | // {in,out}18 Block Value 7 M7 | ||
50 | // {in,out}19 Block Value 3 M3 | ||
51 | // {in,out}20 Scratch Z | ||
52 | // {in,out}21 Scratch Y | ||
53 | // {in,out}22 Scratch X | ||
54 | // {in,out}23 Scratch W | ||
55 | // {in,out}24 Digest A A | ||
56 | // {in,out}25 Digest B B | ||
57 | // {in,out}26 Digest C C | ||
58 | // {in,out}27 Digest D D | ||
59 | // {in,out}28 Active Data Ptr DPtr | ||
60 | // in28 Dummy Value - | ||
61 | // out28 Dummy Value - | ||
62 | // bt0 Coroutine Link QUICK_RTN | ||
63 | // | ||
64 | /// These predicates are used for computing the padding block(s) and | ||
65 | /// are shared between the driver and digest co-routines | ||
66 | // | ||
67 | // pt0 Extra Pad Block pExtra | ||
68 | // pt1 Load next word pLoad | ||
69 | // pt2 Skip next word pSkip | ||
70 | // pt3 Search for Pad pNoPad | ||
71 | // pt4 Pad Word 0 pPad0 | ||
72 | // pt5 Pad Word 1 pPad1 | ||
73 | // pt6 Pad Word 2 pPad2 | ||
74 | // pt7 Pad Word 3 pPad3 | ||
75 | |||
76 | #define DTmp r19 | ||
77 | #define LenResid r18 | ||
78 | #define QUICK_RTN b6 | ||
79 | #define TPtr r14 | ||
80 | #define TRound r15 | ||
81 | #define pExtra p6 | ||
82 | #define pLoad p7 | ||
83 | #define pNoPad p9 | ||
84 | #define pPad0 p10 | ||
85 | #define pPad1 p11 | ||
86 | #define pPad2 p12 | ||
87 | #define pPad3 p13 | ||
88 | #define pSkip p8 | ||
89 | |||
90 | #define A_ out24 | ||
91 | #define B_ out25 | ||
92 | #define C_ out26 | ||
93 | #define D_ out27 | ||
94 | #define DPtr_ out28 | ||
95 | #define M0_ out4 | ||
96 | #define M1_ out9 | ||
97 | #define M10_ out12 | ||
98 | #define M11_ out17 | ||
99 | #define M12_ out1 | ||
100 | #define M13_ out6 | ||
101 | #define M14_ out11 | ||
102 | #define M15_ out16 | ||
103 | #define M2_ out14 | ||
104 | #define M3_ out19 | ||
105 | #define M4_ out3 | ||
106 | #define M5_ out8 | ||
107 | #define M6_ out13 | ||
108 | #define M7_ out18 | ||
109 | #define M8_ out2 | ||
110 | #define M9_ out7 | ||
111 | #define RotateM0_ out0 | ||
112 | #define RotateM1_ out5 | ||
113 | #define RotateM2_ out10 | ||
114 | #define RotateM3_ out15 | ||
115 | #define W_ out23 | ||
116 | #define X_ out22 | ||
117 | #define Y_ out21 | ||
118 | #define Z_ out20 | ||
119 | |||
120 | #define A in24 | ||
121 | #define B in25 | ||
122 | #define C in26 | ||
123 | #define D in27 | ||
124 | #define DPtr in28 | ||
125 | #define M0 in4 | ||
126 | #define M1 in9 | ||
127 | #define M10 in12 | ||
128 | #define M11 in17 | ||
129 | #define M12 in1 | ||
130 | #define M13 in6 | ||
131 | #define M14 in11 | ||
132 | #define M15 in16 | ||
133 | #define M2 in14 | ||
134 | #define M3 in19 | ||
135 | #define M4 in3 | ||
136 | #define M5 in8 | ||
137 | #define M6 in13 | ||
138 | #define M7 in18 | ||
139 | #define M8 in2 | ||
140 | #define M9 in7 | ||
141 | #define RotateM0 in0 | ||
142 | #define RotateM1 in5 | ||
143 | #define RotateM2 in10 | ||
144 | #define RotateM3 in15 | ||
145 | #define W in23 | ||
146 | #define X in22 | ||
147 | #define Y in21 | ||
148 | #define Z in20 | ||
149 | |||
150 | /* register stack configuration for md5_block_asm_data_order(): */ | ||
151 | #define MD5_NINP 3 | ||
152 | #define MD5_NLOC 0 | ||
153 | #define MD5_NOUT 29 | ||
154 | #define MD5_NROT 0 | ||
155 | |||
156 | /* register stack configuration for helpers: */ | ||
157 | #define _NINPUTS MD5_NOUT | ||
158 | #define _NLOCALS 0 | ||
159 | #define _NOUTPUT 0 | ||
160 | #define _NROTATE 24 /* this must be <= _NINPUTS */ | ||
161 | |||
162 | #if defined(_HPUX_SOURCE) && !defined(_LP64) | ||
163 | #define ADDP addp4 | ||
164 | #else | ||
165 | #define ADDP add | ||
166 | #endif | ||
167 | |||
168 | #if defined(_HPUX_SOURCE) || defined(B_ENDIAN) | ||
169 | #define HOST_IS_BIG_ENDIAN | ||
170 | #endif | ||
171 | |||
172 | // Macros for getting the left and right portions of little-endian words | ||
173 | |||
174 | #define GETLW(dst, src, align) dep.z dst = src, 32 - 8 * align, 8 * align | ||
175 | #define GETRW(dst, src, align) extr.u dst = src, 8 * align, 32 - 8 * align | ||
176 | |||
177 | // MD5 driver | ||
178 | // | ||
179 | // Reads an input block, then calls the digest block | ||
180 | // subroutine and adds the results to the accumulated | ||
181 | // digest. It allocates 32 outs which the subroutine | ||
182 | // uses as it's inputs and rotating | ||
183 | // registers. Initializes the round constant pointer and | ||
184 | // takes care of saving/restoring ar.lc | ||
185 | // | ||
186 | /// INPUT | ||
187 | // | ||
188 | // in0 Context Ptr CtxPtr0 | ||
189 | // in1 Input Data Ptr DPtrIn | ||
190 | // in2 Integral Blocks BlockCount | ||
191 | // rp Return Address - | ||
192 | // | ||
193 | /// CODE | ||
194 | // | ||
195 | // v2 Input Align InAlign | ||
196 | // t0 Shared w/digest - | ||
197 | // t1 Shared w/digest - | ||
198 | // t2 Shared w/digest - | ||
199 | // t3 Shared w/digest - | ||
200 | // t4 Shared w/digest - | ||
201 | // t5 Shared w/digest - | ||
202 | // t6 PFS Save PFSSave | ||
203 | // t7 ar.lc Save LCSave | ||
204 | // t8 Saved PR PRSave | ||
205 | // t9 2nd CtxPtr CtxPtr1 | ||
206 | // t10 Table Base CTable | ||
207 | // t11 Table[0] CTable0 | ||
208 | // t13 Accumulator A AccumA | ||
209 | // t14 Accumulator B AccumB | ||
210 | // t15 Accumulator C AccumC | ||
211 | // t16 Accumulator D AccumD | ||
212 | // pt0 Shared w/digest - | ||
213 | // pt1 Shared w/digest - | ||
214 | // pt2 Shared w/digest - | ||
215 | // pt3 Shared w/digest - | ||
216 | // pt4 Shared w/digest - | ||
217 | // pt5 Shared w/digest - | ||
218 | // pt6 Shared w/digest - | ||
219 | // pt7 Shared w/digest - | ||
220 | // pt8 Not Aligned pOff | ||
221 | // pt8 Blocks Left pAgain | ||
222 | |||
223 | #define AccumA r27 | ||
224 | #define AccumB r28 | ||
225 | #define AccumC r29 | ||
226 | #define AccumD r30 | ||
227 | #define CTable r24 | ||
228 | #define CTable0 r25 | ||
229 | #define CtxPtr0 in0 | ||
230 | #define CtxPtr1 r23 | ||
231 | #define DPtrIn in1 | ||
232 | #define BlockCount in2 | ||
233 | #define InAlign r10 | ||
234 | #define LCSave r21 | ||
235 | #define PFSSave r20 | ||
236 | #define PRSave r22 | ||
237 | #define pAgain p63 | ||
238 | #define pOff p63 | ||
239 | |||
240 | .text | ||
241 | |||
242 | /* md5_block_asm_data_order(MD5_CTX *c, const void *data, size_t num) | ||
243 | |||
244 | where: | ||
245 | c: a pointer to a structure of this type: | ||
246 | |||
247 | typedef struct MD5state_st | ||
248 | { | ||
249 | MD5_LONG A,B,C,D; | ||
250 | MD5_LONG Nl,Nh; | ||
251 | MD5_LONG data[MD5_LBLOCK]; | ||
252 | unsigned int num; | ||
253 | } | ||
254 | MD5_CTX; | ||
255 | |||
256 | data: a pointer to the input data (may be misaligned) | ||
257 | num: the number of 16-byte blocks to hash (i.e., the length | ||
258 | of DATA is 16*NUM. | ||
259 | |||
260 | */ | ||
261 | |||
262 | .type md5_block_asm_data_order, @function | ||
263 | .global md5_block_asm_data_order | ||
264 | .align 32 | ||
265 | .proc md5_block_asm_data_order | ||
266 | md5_block_asm_data_order: | ||
267 | .md5_block: | ||
268 | .prologue | ||
269 | { .mmi | ||
270 | .save ar.pfs, PFSSave | ||
271 | alloc PFSSave = ar.pfs, MD5_NINP, MD5_NLOC, MD5_NOUT, MD5_NROT | ||
272 | ADDP CtxPtr1 = 8, CtxPtr0 | ||
273 | mov CTable = ip | ||
274 | } | ||
275 | { .mmi | ||
276 | ADDP DPtrIn = 0, DPtrIn | ||
277 | ADDP CtxPtr0 = 0, CtxPtr0 | ||
278 | .save ar.lc, LCSave | ||
279 | mov LCSave = ar.lc | ||
280 | } | ||
281 | ;; | ||
282 | { .mmi | ||
283 | add CTable = .md5_tbl_data_order#-.md5_block#, CTable | ||
284 | and InAlign = 0x3, DPtrIn | ||
285 | } | ||
286 | |||
287 | { .mmi | ||
288 | ld4 AccumA = [CtxPtr0], 4 | ||
289 | ld4 AccumC = [CtxPtr1], 4 | ||
290 | .save pr, PRSave | ||
291 | mov PRSave = pr | ||
292 | .body | ||
293 | } | ||
294 | ;; | ||
295 | { .mmi | ||
296 | ld4 AccumB = [CtxPtr0] | ||
297 | ld4 AccumD = [CtxPtr1] | ||
298 | dep DPtr_ = 0, DPtrIn, 0, 2 | ||
299 | } ;; | ||
300 | #ifdef HOST_IS_BIG_ENDIAN | ||
301 | rum psr.be;; // switch to little-endian | ||
302 | #endif | ||
303 | { .mmb | ||
304 | ld4 CTable0 = [CTable], 4 | ||
305 | cmp.ne pOff, p0 = 0, InAlign | ||
306 | (pOff) br.cond.spnt.many .md5_unaligned | ||
307 | } ;; | ||
308 | |||
309 | // The FF load/compute loop rotates values three times, so that | ||
310 | // loading into M12 here produces the M0 value, M13 -> M1, etc. | ||
311 | |||
312 | .md5_block_loop0: | ||
313 | { .mmi | ||
314 | ld4 M12_ = [DPtr_], 4 | ||
315 | mov TPtr = CTable | ||
316 | mov TRound = CTable0 | ||
317 | } ;; | ||
318 | { .mmi | ||
319 | ld4 M13_ = [DPtr_], 4 | ||
320 | mov A_ = AccumA | ||
321 | mov B_ = AccumB | ||
322 | } ;; | ||
323 | { .mmi | ||
324 | ld4 M14_ = [DPtr_], 4 | ||
325 | mov C_ = AccumC | ||
326 | mov D_ = AccumD | ||
327 | } ;; | ||
328 | { .mmb | ||
329 | ld4 M15_ = [DPtr_], 4 | ||
330 | add BlockCount = -1, BlockCount | ||
331 | br.call.sptk.many QUICK_RTN = md5_digest_block0 | ||
332 | } ;; | ||
333 | |||
334 | // Now, we add the new digest values and do some clean-up | ||
335 | // before checking if there's another full block to process | ||
336 | |||
337 | { .mmi | ||
338 | add AccumA = AccumA, A_ | ||
339 | add AccumB = AccumB, B_ | ||
340 | cmp.ne pAgain, p0 = 0, BlockCount | ||
341 | } | ||
342 | { .mib | ||
343 | add AccumC = AccumC, C_ | ||
344 | add AccumD = AccumD, D_ | ||
345 | (pAgain) br.cond.dptk.many .md5_block_loop0 | ||
346 | } ;; | ||
347 | |||
348 | .md5_exit: | ||
349 | #ifdef HOST_IS_BIG_ENDIAN | ||
350 | sum psr.be;; // switch back to big-endian mode | ||
351 | #endif | ||
352 | { .mmi | ||
353 | st4 [CtxPtr0] = AccumB, -4 | ||
354 | st4 [CtxPtr1] = AccumD, -4 | ||
355 | mov pr = PRSave, 0x1ffff ;; | ||
356 | } | ||
357 | { .mmi | ||
358 | st4 [CtxPtr0] = AccumA | ||
359 | st4 [CtxPtr1] = AccumC | ||
360 | mov ar.lc = LCSave | ||
361 | } ;; | ||
362 | { .mib | ||
363 | mov ar.pfs = PFSSave | ||
364 | br.ret.sptk.few rp | ||
365 | } ;; | ||
366 | |||
367 | #define MD5UNALIGNED(offset) \ | ||
368 | .md5_process##offset: \ | ||
369 | { .mib ; \ | ||
370 | nop 0x0 ; \ | ||
371 | GETRW(DTmp, DTmp, offset) ; \ | ||
372 | } ;; \ | ||
373 | .md5_block_loop##offset: \ | ||
374 | { .mmi ; \ | ||
375 | ld4 Y_ = [DPtr_], 4 ; \ | ||
376 | mov TPtr = CTable ; \ | ||
377 | mov TRound = CTable0 ; \ | ||
378 | } ;; \ | ||
379 | { .mmi ; \ | ||
380 | ld4 M13_ = [DPtr_], 4 ; \ | ||
381 | mov A_ = AccumA ; \ | ||
382 | mov B_ = AccumB ; \ | ||
383 | } ;; \ | ||
384 | { .mii ; \ | ||
385 | ld4 M14_ = [DPtr_], 4 ; \ | ||
386 | GETLW(W_, Y_, offset) ; \ | ||
387 | mov C_ = AccumC ; \ | ||
388 | } \ | ||
389 | { .mmi ; \ | ||
390 | mov D_ = AccumD ;; \ | ||
391 | or M12_ = W_, DTmp ; \ | ||
392 | GETRW(DTmp, Y_, offset) ; \ | ||
393 | } \ | ||
394 | { .mib ; \ | ||
395 | ld4 M15_ = [DPtr_], 4 ; \ | ||
396 | add BlockCount = -1, BlockCount ; \ | ||
397 | br.call.sptk.many QUICK_RTN = md5_digest_block##offset; \ | ||
398 | } ;; \ | ||
399 | { .mmi ; \ | ||
400 | add AccumA = AccumA, A_ ; \ | ||
401 | add AccumB = AccumB, B_ ; \ | ||
402 | cmp.ne pAgain, p0 = 0, BlockCount ; \ | ||
403 | } \ | ||
404 | { .mib ; \ | ||
405 | add AccumC = AccumC, C_ ; \ | ||
406 | add AccumD = AccumD, D_ ; \ | ||
407 | (pAgain) br.cond.dptk.many .md5_block_loop##offset ; \ | ||
408 | } ;; \ | ||
409 | { .mib ; \ | ||
410 | nop 0x0 ; \ | ||
411 | nop 0x0 ; \ | ||
412 | br.cond.sptk.many .md5_exit ; \ | ||
413 | } ;; | ||
414 | |||
415 | .align 32 | ||
416 | .md5_unaligned: | ||
417 | // | ||
418 | // Because variable shifts are expensive, we special case each of | ||
419 | // the four alignements. In practice, this won't hurt too much | ||
420 | // since only one working set of code will be loaded. | ||
421 | // | ||
422 | { .mib | ||
423 | ld4 DTmp = [DPtr_], 4 | ||
424 | cmp.eq pOff, p0 = 1, InAlign | ||
425 | (pOff) br.cond.dpnt.many .md5_process1 | ||
426 | } ;; | ||
427 | { .mib | ||
428 | cmp.eq pOff, p0 = 2, InAlign | ||
429 | nop 0x0 | ||
430 | (pOff) br.cond.dpnt.many .md5_process2 | ||
431 | } ;; | ||
432 | MD5UNALIGNED(3) | ||
433 | MD5UNALIGNED(1) | ||
434 | MD5UNALIGNED(2) | ||
435 | |||
436 | .endp md5_block_asm_data_order | ||
437 | |||
438 | |||
439 | // MD5 Perform the F function and load | ||
440 | // | ||
441 | // Passed the first 4 words (M0 - M3) and initial (A, B, C, D) values, | ||
442 | // computes the FF() round of functions, then branches to the common | ||
443 | // digest code to finish up with GG(), HH, and II(). | ||
444 | // | ||
445 | // INPUT | ||
446 | // | ||
447 | // rp Return Address - | ||
448 | // | ||
449 | // CODE | ||
450 | // | ||
451 | // v0 PFS bit bucket PFS | ||
452 | // v1 Loop Trip Count LTrip | ||
453 | // pt0 Load next word pMore | ||
454 | |||
455 | /* For F round: */ | ||
456 | #define LTrip r9 | ||
457 | #define PFS r8 | ||
458 | #define pMore p6 | ||
459 | |||
460 | /* For GHI rounds: */ | ||
461 | #define T r9 | ||
462 | #define U r10 | ||
463 | #define V r11 | ||
464 | |||
465 | #define COMPUTE(a, b, s, M, R) \ | ||
466 | { \ | ||
467 | .mii ; \ | ||
468 | ld4 TRound = [TPtr], 4 ; \ | ||
469 | dep.z Y = Z, 32, 32 ;; \ | ||
470 | shrp Z = Z, Y, 64 - s ; \ | ||
471 | } ;; \ | ||
472 | { \ | ||
473 | .mmi ; \ | ||
474 | add a = Z, b ; \ | ||
475 | mov R = M ; \ | ||
476 | nop 0x0 ; \ | ||
477 | } ;; | ||
478 | |||
479 | #define LOOP(a, b, s, M, R, label) \ | ||
480 | { .mii ; \ | ||
481 | ld4 TRound = [TPtr], 4 ; \ | ||
482 | dep.z Y = Z, 32, 32 ;; \ | ||
483 | shrp Z = Z, Y, 64 - s ; \ | ||
484 | } ;; \ | ||
485 | { .mib ; \ | ||
486 | add a = Z, b ; \ | ||
487 | mov R = M ; \ | ||
488 | br.ctop.sptk.many label ; \ | ||
489 | } ;; | ||
490 | |||
491 | // G(B, C, D) = (B & D) | (C & ~D) | ||
492 | |||
493 | #define G(a, b, c, d, M) \ | ||
494 | { .mmi ; \ | ||
495 | add Z = M, TRound ; \ | ||
496 | and Y = b, d ; \ | ||
497 | andcm X = c, d ; \ | ||
498 | } ;; \ | ||
499 | { .mii ; \ | ||
500 | add Z = Z, a ; \ | ||
501 | or Y = Y, X ;; \ | ||
502 | add Z = Z, Y ; \ | ||
503 | } ;; | ||
504 | |||
505 | // H(B, C, D) = B ^ C ^ D | ||
506 | |||
507 | #define H(a, b, c, d, M) \ | ||
508 | { .mmi ; \ | ||
509 | add Z = M, TRound ; \ | ||
510 | xor Y = b, c ; \ | ||
511 | nop 0x0 ; \ | ||
512 | } ;; \ | ||
513 | { .mii ; \ | ||
514 | add Z = Z, a ; \ | ||
515 | xor Y = Y, d ;; \ | ||
516 | add Z = Z, Y ; \ | ||
517 | } ;; | ||
518 | |||
519 | // I(B, C, D) = C ^ (B | ~D) | ||
520 | // | ||
521 | // However, since we have an andcm operator, we use the fact that | ||
522 | // | ||
523 | // Y ^ Z == ~Y ^ ~Z | ||
524 | // | ||
525 | // to rewrite the expression as | ||
526 | // | ||
527 | // I(B, C, D) = ~C ^ (~B & D) | ||
528 | |||
529 | #define I(a, b, c, d, M) \ | ||
530 | { .mmi ; \ | ||
531 | add Z = M, TRound ; \ | ||
532 | andcm Y = d, b ; \ | ||
533 | andcm X = -1, c ; \ | ||
534 | } ;; \ | ||
535 | { .mii ; \ | ||
536 | add Z = Z, a ; \ | ||
537 | xor Y = Y, X ;; \ | ||
538 | add Z = Z, Y ; \ | ||
539 | } ;; | ||
540 | |||
541 | #define GG4(label) \ | ||
542 | G(A, B, C, D, M0) \ | ||
543 | COMPUTE(A, B, 5, M0, RotateM0) \ | ||
544 | G(D, A, B, C, M1) \ | ||
545 | COMPUTE(D, A, 9, M1, RotateM1) \ | ||
546 | G(C, D, A, B, M2) \ | ||
547 | COMPUTE(C, D, 14, M2, RotateM2) \ | ||
548 | G(B, C, D, A, M3) \ | ||
549 | LOOP(B, C, 20, M3, RotateM3, label) | ||
550 | |||
551 | #define HH4(label) \ | ||
552 | H(A, B, C, D, M0) \ | ||
553 | COMPUTE(A, B, 4, M0, RotateM0) \ | ||
554 | H(D, A, B, C, M1) \ | ||
555 | COMPUTE(D, A, 11, M1, RotateM1) \ | ||
556 | H(C, D, A, B, M2) \ | ||
557 | COMPUTE(C, D, 16, M2, RotateM2) \ | ||
558 | H(B, C, D, A, M3) \ | ||
559 | LOOP(B, C, 23, M3, RotateM3, label) | ||
560 | |||
561 | #define II4(label) \ | ||
562 | I(A, B, C, D, M0) \ | ||
563 | COMPUTE(A, B, 6, M0, RotateM0) \ | ||
564 | I(D, A, B, C, M1) \ | ||
565 | COMPUTE(D, A, 10, M1, RotateM1) \ | ||
566 | I(C, D, A, B, M2) \ | ||
567 | COMPUTE(C, D, 15, M2, RotateM2) \ | ||
568 | I(B, C, D, A, M3) \ | ||
569 | LOOP(B, C, 21, M3, RotateM3, label) | ||
570 | |||
571 | #define FFLOAD(a, b, c, d, M, N, s) \ | ||
572 | { .mii ; \ | ||
573 | (pMore) ld4 N = [DPtr], 4 ; \ | ||
574 | add Z = M, TRound ; \ | ||
575 | and Y = c, b ; \ | ||
576 | } \ | ||
577 | { .mmi ; \ | ||
578 | andcm X = d, b ;; \ | ||
579 | add Z = Z, a ; \ | ||
580 | or Y = Y, X ; \ | ||
581 | } ;; \ | ||
582 | { .mii ; \ | ||
583 | ld4 TRound = [TPtr], 4 ; \ | ||
584 | add Z = Z, Y ;; \ | ||
585 | dep.z Y = Z, 32, 32 ; \ | ||
586 | } ;; \ | ||
587 | { .mii ; \ | ||
588 | nop 0x0 ; \ | ||
589 | shrp Z = Z, Y, 64 - s ;; \ | ||
590 | add a = Z, b ; \ | ||
591 | } ;; | ||
592 | |||
593 | #define FFLOOP(a, b, c, d, M, N, s, dest) \ | ||
594 | { .mii ; \ | ||
595 | (pMore) ld4 N = [DPtr], 4 ; \ | ||
596 | add Z = M, TRound ; \ | ||
597 | and Y = c, b ; \ | ||
598 | } \ | ||
599 | { .mmi ; \ | ||
600 | andcm X = d, b ;; \ | ||
601 | add Z = Z, a ; \ | ||
602 | or Y = Y, X ; \ | ||
603 | } ;; \ | ||
604 | { .mii ; \ | ||
605 | ld4 TRound = [TPtr], 4 ; \ | ||
606 | add Z = Z, Y ;; \ | ||
607 | dep.z Y = Z, 32, 32 ; \ | ||
608 | } ;; \ | ||
609 | { .mii ; \ | ||
610 | nop 0x0 ; \ | ||
611 | shrp Z = Z, Y, 64 - s ;; \ | ||
612 | add a = Z, b ; \ | ||
613 | } \ | ||
614 | { .mib ; \ | ||
615 | cmp.ne pMore, p0 = 0, LTrip ; \ | ||
616 | add LTrip = -1, LTrip ; \ | ||
617 | br.ctop.dptk.many dest ; \ | ||
618 | } ;; | ||
619 | |||
620 | .type md5_digest_block0, @function | ||
621 | .align 32 | ||
622 | |||
623 | .proc md5_digest_block0 | ||
624 | .prologue | ||
625 | md5_digest_block0: | ||
626 | .altrp QUICK_RTN | ||
627 | .body | ||
628 | { .mmi | ||
629 | alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE | ||
630 | mov LTrip = 2 | ||
631 | mov ar.lc = 3 | ||
632 | } ;; | ||
633 | { .mii | ||
634 | cmp.eq pMore, p0 = r0, r0 | ||
635 | mov ar.ec = 0 | ||
636 | nop 0x0 | ||
637 | } ;; | ||
638 | |||
639 | .md5_FF_round0: | ||
640 | FFLOAD(A, B, C, D, M12, RotateM0, 7) | ||
641 | FFLOAD(D, A, B, C, M13, RotateM1, 12) | ||
642 | FFLOAD(C, D, A, B, M14, RotateM2, 17) | ||
643 | FFLOOP(B, C, D, A, M15, RotateM3, 22, .md5_FF_round0) | ||
644 | // | ||
645 | // !!! Fall through to md5_digest_GHI | ||
646 | // | ||
647 | .endp md5_digest_block0 | ||
648 | |||
649 | .type md5_digest_GHI, @function | ||
650 | .align 32 | ||
651 | |||
652 | .proc md5_digest_GHI | ||
653 | .prologue | ||
654 | .regstk _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE | ||
655 | md5_digest_GHI: | ||
656 | .altrp QUICK_RTN | ||
657 | .body | ||
658 | // | ||
659 | // The following sequence shuffles the block counstants round for the | ||
660 | // next round: | ||
661 | // | ||
662 | // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | ||
663 | // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 | ||
664 | // | ||
665 | { .mmi | ||
666 | mov Z = M0 | ||
667 | mov Y = M15 | ||
668 | mov ar.lc = 3 | ||
669 | } | ||
670 | { .mmi | ||
671 | mov X = M2 | ||
672 | mov W = M9 | ||
673 | mov V = M4 | ||
674 | } ;; | ||
675 | |||
676 | { .mmi | ||
677 | mov M0 = M1 | ||
678 | mov M15 = M12 | ||
679 | mov ar.ec = 1 | ||
680 | } | ||
681 | { .mmi | ||
682 | mov M2 = M11 | ||
683 | mov M9 = M14 | ||
684 | mov M4 = M5 | ||
685 | } ;; | ||
686 | |||
687 | { .mmi | ||
688 | mov M1 = M6 | ||
689 | mov M12 = M13 | ||
690 | mov U = M3 | ||
691 | } | ||
692 | { .mmi | ||
693 | mov M11 = M8 | ||
694 | mov M14 = M7 | ||
695 | mov M5 = M10 | ||
696 | } ;; | ||
697 | |||
698 | { .mmi | ||
699 | mov M6 = Y | ||
700 | mov M13 = X | ||
701 | mov M3 = Z | ||
702 | } | ||
703 | { .mmi | ||
704 | mov M8 = W | ||
705 | mov M7 = V | ||
706 | mov M10 = U | ||
707 | } ;; | ||
708 | |||
709 | .md5_GG_round: | ||
710 | GG4(.md5_GG_round) | ||
711 | |||
712 | // The following sequence shuffles the block constants round for the | ||
713 | // next round: | ||
714 | // | ||
715 | // 1 6 11 0 5 10 14 4 9 14 3 8 13 2 7 12 | ||
716 | // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 | ||
717 | |||
718 | { .mmi | ||
719 | mov Z = M0 | ||
720 | mov Y = M1 | ||
721 | mov ar.lc = 3 | ||
722 | } | ||
723 | { .mmi | ||
724 | mov X = M3 | ||
725 | mov W = M5 | ||
726 | mov V = M6 | ||
727 | } ;; | ||
728 | |||
729 | { .mmi | ||
730 | mov M0 = M4 | ||
731 | mov M1 = M11 | ||
732 | mov ar.ec = 1 | ||
733 | } | ||
734 | { .mmi | ||
735 | mov M3 = M9 | ||
736 | mov U = M8 | ||
737 | mov T = M13 | ||
738 | } ;; | ||
739 | |||
740 | { .mmi | ||
741 | mov M4 = Z | ||
742 | mov M11 = Y | ||
743 | mov M5 = M7 | ||
744 | } | ||
745 | { .mmi | ||
746 | mov M6 = M14 | ||
747 | mov M8 = M12 | ||
748 | mov M13 = M15 | ||
749 | } ;; | ||
750 | |||
751 | { .mmi | ||
752 | mov M7 = W | ||
753 | mov M14 = V | ||
754 | nop 0x0 | ||
755 | } | ||
756 | { .mmi | ||
757 | mov M9 = X | ||
758 | mov M12 = U | ||
759 | mov M15 = T | ||
760 | } ;; | ||
761 | |||
762 | .md5_HH_round: | ||
763 | HH4(.md5_HH_round) | ||
764 | |||
765 | // The following sequence shuffles the block constants round for the | ||
766 | // next round: | ||
767 | // | ||
768 | // 5 8 11 14 1 4 7 10 13 0 3 6 9 12 15 2 | ||
769 | // 0 7 14 5 12 3 10 1 8 15 6 13 4 11 2 9 | ||
770 | |||
771 | { .mmi | ||
772 | mov Z = M0 | ||
773 | mov Y = M15 | ||
774 | mov ar.lc = 3 | ||
775 | } | ||
776 | { .mmi | ||
777 | mov X = M10 | ||
778 | mov W = M1 | ||
779 | mov V = M4 | ||
780 | } ;; | ||
781 | |||
782 | { .mmi | ||
783 | mov M0 = M9 | ||
784 | mov M15 = M12 | ||
785 | mov ar.ec = 1 | ||
786 | } | ||
787 | { .mmi | ||
788 | mov M10 = M11 | ||
789 | mov M1 = M6 | ||
790 | mov M4 = M13 | ||
791 | } ;; | ||
792 | |||
793 | { .mmi | ||
794 | mov M9 = M14 | ||
795 | mov M12 = M5 | ||
796 | mov U = M3 | ||
797 | } | ||
798 | { .mmi | ||
799 | mov M11 = M8 | ||
800 | mov M6 = M7 | ||
801 | mov M13 = M2 | ||
802 | } ;; | ||
803 | |||
804 | { .mmi | ||
805 | mov M14 = Y | ||
806 | mov M5 = X | ||
807 | mov M3 = Z | ||
808 | } | ||
809 | { .mmi | ||
810 | mov M8 = W | ||
811 | mov M7 = V | ||
812 | mov M2 = U | ||
813 | } ;; | ||
814 | |||
815 | .md5_II_round: | ||
816 | II4(.md5_II_round) | ||
817 | |||
818 | { .mib | ||
819 | nop 0x0 | ||
820 | nop 0x0 | ||
821 | br.ret.sptk.many QUICK_RTN | ||
822 | } ;; | ||
823 | |||
824 | .endp md5_digest_GHI | ||
825 | |||
826 | #define FFLOADU(a, b, c, d, M, P, N, s, offset) \ | ||
827 | { .mii ; \ | ||
828 | (pMore) ld4 N = [DPtr], 4 ; \ | ||
829 | add Z = M, TRound ; \ | ||
830 | and Y = c, b ; \ | ||
831 | } \ | ||
832 | { .mmi ; \ | ||
833 | andcm X = d, b ;; \ | ||
834 | add Z = Z, a ; \ | ||
835 | or Y = Y, X ; \ | ||
836 | } ;; \ | ||
837 | { .mii ; \ | ||
838 | ld4 TRound = [TPtr], 4 ; \ | ||
839 | GETLW(W, P, offset) ; \ | ||
840 | add Z = Z, Y ; \ | ||
841 | } ;; \ | ||
842 | { .mii ; \ | ||
843 | or W = W, DTmp ; \ | ||
844 | dep.z Y = Z, 32, 32 ;; \ | ||
845 | shrp Z = Z, Y, 64 - s ; \ | ||
846 | } ;; \ | ||
847 | { .mii ; \ | ||
848 | add a = Z, b ; \ | ||
849 | GETRW(DTmp, P, offset) ; \ | ||
850 | mov P = W ; \ | ||
851 | } ;; | ||
852 | |||
853 | #define FFLOOPU(a, b, c, d, M, P, N, s, offset) \ | ||
854 | { .mii ; \ | ||
855 | (pMore) ld4 N = [DPtr], 4 ; \ | ||
856 | add Z = M, TRound ; \ | ||
857 | and Y = c, b ; \ | ||
858 | } \ | ||
859 | { .mmi ; \ | ||
860 | andcm X = d, b ;; \ | ||
861 | add Z = Z, a ; \ | ||
862 | or Y = Y, X ; \ | ||
863 | } ;; \ | ||
864 | { .mii ; \ | ||
865 | ld4 TRound = [TPtr], 4 ; \ | ||
866 | (pMore) GETLW(W, P, offset) ; \ | ||
867 | add Z = Z, Y ; \ | ||
868 | } ;; \ | ||
869 | { .mii ; \ | ||
870 | (pMore) or W = W, DTmp ; \ | ||
871 | dep.z Y = Z, 32, 32 ;; \ | ||
872 | shrp Z = Z, Y, 64 - s ; \ | ||
873 | } ;; \ | ||
874 | { .mii ; \ | ||
875 | add a = Z, b ; \ | ||
876 | (pMore) GETRW(DTmp, P, offset) ; \ | ||
877 | (pMore) mov P = W ; \ | ||
878 | } \ | ||
879 | { .mib ; \ | ||
880 | cmp.ne pMore, p0 = 0, LTrip ; \ | ||
881 | add LTrip = -1, LTrip ; \ | ||
882 | br.ctop.sptk.many .md5_FF_round##offset ; \ | ||
883 | } ;; | ||
884 | |||
885 | #define MD5FBLOCK(offset) \ | ||
886 | .type md5_digest_block##offset, @function ; \ | ||
887 | \ | ||
888 | .align 32 ; \ | ||
889 | .proc md5_digest_block##offset ; \ | ||
890 | .prologue ; \ | ||
891 | .altrp QUICK_RTN ; \ | ||
892 | .body ; \ | ||
893 | md5_digest_block##offset: \ | ||
894 | { .mmi ; \ | ||
895 | alloc PFS = ar.pfs, _NINPUTS, _NLOCALS, _NOUTPUT, _NROTATE ; \ | ||
896 | mov LTrip = 2 ; \ | ||
897 | mov ar.lc = 3 ; \ | ||
898 | } ;; \ | ||
899 | { .mii ; \ | ||
900 | cmp.eq pMore, p0 = r0, r0 ; \ | ||
901 | mov ar.ec = 0 ; \ | ||
902 | nop 0x0 ; \ | ||
903 | } ;; \ | ||
904 | \ | ||
905 | .pred.rel "mutex", pLoad, pSkip ; \ | ||
906 | .md5_FF_round##offset: \ | ||
907 | FFLOADU(A, B, C, D, M12, M13, RotateM0, 7, offset) \ | ||
908 | FFLOADU(D, A, B, C, M13, M14, RotateM1, 12, offset) \ | ||
909 | FFLOADU(C, D, A, B, M14, M15, RotateM2, 17, offset) \ | ||
910 | FFLOOPU(B, C, D, A, M15, RotateM0, RotateM3, 22, offset) \ | ||
911 | \ | ||
912 | { .mib ; \ | ||
913 | nop 0x0 ; \ | ||
914 | nop 0x0 ; \ | ||
915 | br.cond.sptk.many md5_digest_GHI ; \ | ||
916 | } ;; \ | ||
917 | .endp md5_digest_block##offset | ||
918 | |||
919 | MD5FBLOCK(1) | ||
920 | MD5FBLOCK(2) | ||
921 | MD5FBLOCK(3) | ||
922 | |||
923 | .align 64 | ||
924 | .type md5_constants, @object | ||
925 | md5_constants: | ||
926 | .md5_tbl_data_order: // To ensure little-endian data | ||
927 | // order, code as bytes. | ||
928 | data1 0x78, 0xa4, 0x6a, 0xd7 // 0 | ||
929 | data1 0x56, 0xb7, 0xc7, 0xe8 // 1 | ||
930 | data1 0xdb, 0x70, 0x20, 0x24 // 2 | ||
931 | data1 0xee, 0xce, 0xbd, 0xc1 // 3 | ||
932 | data1 0xaf, 0x0f, 0x7c, 0xf5 // 4 | ||
933 | data1 0x2a, 0xc6, 0x87, 0x47 // 5 | ||
934 | data1 0x13, 0x46, 0x30, 0xa8 // 6 | ||
935 | data1 0x01, 0x95, 0x46, 0xfd // 7 | ||
936 | data1 0xd8, 0x98, 0x80, 0x69 // 8 | ||
937 | data1 0xaf, 0xf7, 0x44, 0x8b // 9 | ||
938 | data1 0xb1, 0x5b, 0xff, 0xff // 10 | ||
939 | data1 0xbe, 0xd7, 0x5c, 0x89 // 11 | ||
940 | data1 0x22, 0x11, 0x90, 0x6b // 12 | ||
941 | data1 0x93, 0x71, 0x98, 0xfd // 13 | ||
942 | data1 0x8e, 0x43, 0x79, 0xa6 // 14 | ||
943 | data1 0x21, 0x08, 0xb4, 0x49 // 15 | ||
944 | data1 0x62, 0x25, 0x1e, 0xf6 // 16 | ||
945 | data1 0x40, 0xb3, 0x40, 0xc0 // 17 | ||
946 | data1 0x51, 0x5a, 0x5e, 0x26 // 18 | ||
947 | data1 0xaa, 0xc7, 0xb6, 0xe9 // 19 | ||
948 | data1 0x5d, 0x10, 0x2f, 0xd6 // 20 | ||
949 | data1 0x53, 0x14, 0x44, 0x02 // 21 | ||
950 | data1 0x81, 0xe6, 0xa1, 0xd8 // 22 | ||
951 | data1 0xc8, 0xfb, 0xd3, 0xe7 // 23 | ||
952 | data1 0xe6, 0xcd, 0xe1, 0x21 // 24 | ||
953 | data1 0xd6, 0x07, 0x37, 0xc3 // 25 | ||
954 | data1 0x87, 0x0d, 0xd5, 0xf4 // 26 | ||
955 | data1 0xed, 0x14, 0x5a, 0x45 // 27 | ||
956 | data1 0x05, 0xe9, 0xe3, 0xa9 // 28 | ||
957 | data1 0xf8, 0xa3, 0xef, 0xfc // 29 | ||
958 | data1 0xd9, 0x02, 0x6f, 0x67 // 30 | ||
959 | data1 0x8a, 0x4c, 0x2a, 0x8d // 31 | ||
960 | data1 0x42, 0x39, 0xfa, 0xff // 32 | ||
961 | data1 0x81, 0xf6, 0x71, 0x87 // 33 | ||
962 | data1 0x22, 0x61, 0x9d, 0x6d // 34 | ||
963 | data1 0x0c, 0x38, 0xe5, 0xfd // 35 | ||
964 | data1 0x44, 0xea, 0xbe, 0xa4 // 36 | ||
965 | data1 0xa9, 0xcf, 0xde, 0x4b // 37 | ||
966 | data1 0x60, 0x4b, 0xbb, 0xf6 // 38 | ||
967 | data1 0x70, 0xbc, 0xbf, 0xbe // 39 | ||
968 | data1 0xc6, 0x7e, 0x9b, 0x28 // 40 | ||
969 | data1 0xfa, 0x27, 0xa1, 0xea // 41 | ||
970 | data1 0x85, 0x30, 0xef, 0xd4 // 42 | ||
971 | data1 0x05, 0x1d, 0x88, 0x04 // 43 | ||
972 | data1 0x39, 0xd0, 0xd4, 0xd9 // 44 | ||
973 | data1 0xe5, 0x99, 0xdb, 0xe6 // 45 | ||
974 | data1 0xf8, 0x7c, 0xa2, 0x1f // 46 | ||
975 | data1 0x65, 0x56, 0xac, 0xc4 // 47 | ||
976 | data1 0x44, 0x22, 0x29, 0xf4 // 48 | ||
977 | data1 0x97, 0xff, 0x2a, 0x43 // 49 | ||
978 | data1 0xa7, 0x23, 0x94, 0xab // 50 | ||
979 | data1 0x39, 0xa0, 0x93, 0xfc // 51 | ||
980 | data1 0xc3, 0x59, 0x5b, 0x65 // 52 | ||
981 | data1 0x92, 0xcc, 0x0c, 0x8f // 53 | ||
982 | data1 0x7d, 0xf4, 0xef, 0xff // 54 | ||
983 | data1 0xd1, 0x5d, 0x84, 0x85 // 55 | ||
984 | data1 0x4f, 0x7e, 0xa8, 0x6f // 56 | ||
985 | data1 0xe0, 0xe6, 0x2c, 0xfe // 57 | ||
986 | data1 0x14, 0x43, 0x01, 0xa3 // 58 | ||
987 | data1 0xa1, 0x11, 0x08, 0x4e // 59 | ||
988 | data1 0x82, 0x7e, 0x53, 0xf7 // 60 | ||
989 | data1 0x35, 0xf2, 0x3a, 0xbd // 61 | ||
990 | data1 0xbb, 0xd2, 0xd7, 0x2a // 62 | ||
991 | data1 0x91, 0xd3, 0x86, 0xeb // 63 | ||
992 | .size md5_constants#,64*4 | ||