summaryrefslogtreecommitdiff
path: root/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl')
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl730
1 files changed, 730 insertions, 0 deletions
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl
new file mode 100644
index 0000000000..8c7454ee93
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl
@@ -0,0 +1,730 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# April 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
15# it processes one byte in 19.6 cycles, which is more than twice as
16# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
17# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
18# processed byte. This is ~2.2x faster than 64-bit code generated by
19# vendor compiler (which used to be very hard to beat:-).
20#
21# Special thanks to polarhome.com for providing HP-UX account.
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36 $NREGS =6;
37} else {
38 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
39 $SIZE_T =4;
40 $FRAME_MARKER =48;
41 $SAVED_RP =20;
42 $PUSH ="stw";
43 $PUSHMA ="stwm";
44 $POP ="ldw";
45 $POPMB ="ldwm";
46 $NREGS =11;
47}
48
49$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
50 # [+ argument transfer]
51
52################# volatile registers
53$Xi="%r26"; # argument block
54$Htbl="%r25";
55$inp="%r24";
56$len="%r23";
57$Hhh=$Htbl; # variables
58$Hll="%r22";
59$Zhh="%r21";
60$Zll="%r20";
61$cnt="%r19";
62$rem_4bit="%r28";
63$rem="%r29";
64$mask0xf0="%r31";
65
66################# preserved registers
67$Thh="%r1";
68$Tll="%r2";
69$nlo="%r3";
70$nhi="%r4";
71$byte="%r5";
72if ($SIZE_T==4) {
73 $Zhl="%r6";
74 $Zlh="%r7";
75 $Hhl="%r8";
76 $Hlh="%r9";
77 $Thl="%r10";
78 $Tlh="%r11";
79}
80$rem2="%r6"; # used in PA-RISC 2.0 code
81
82$code.=<<___;
83 .LEVEL $LEVEL
84 .SPACE \$TEXT\$
85 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
86
87 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
88 .ALIGN 64
89gcm_gmult_4bit
90 .PROC
91 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
92 .ENTRY
93 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
94 $PUSHMA %r3,$FRAME(%sp)
95 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
96 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
97 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
98___
99$code.=<<___ if ($SIZE_T==4);
100 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
101 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
102 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
103 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
104 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
105___
106$code.=<<___;
107 blr %r0,$rem_4bit
108 ldi 3,$rem
109L\$pic_gmult
110 andcm $rem_4bit,$rem,$rem_4bit
111 addl $inp,$len,$len
112 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
113 ldi 0xf0,$mask0xf0
114___
115$code.=<<___ if ($SIZE_T==4);
116 ldi 31,$rem
117 mtctl $rem,%cr11
118 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
119 b L\$parisc1_gmult
120 nop
121___
122
123$code.=<<___;
124 ldb 15($Xi),$nlo
125 ldo 8($Htbl),$Hll
126
127 and $mask0xf0,$nlo,$nhi
128 depd,z $nlo,59,4,$nlo
129
130 ldd $nlo($Hll),$Zll
131 ldd $nlo($Hhh),$Zhh
132
133 depd,z $Zll,60,4,$rem
134 shrpd $Zhh,$Zll,4,$Zll
135 extrd,u $Zhh,59,60,$Zhh
136 ldb 14($Xi),$nlo
137
138 ldd $nhi($Hll),$Tll
139 ldd $nhi($Hhh),$Thh
140 and $mask0xf0,$nlo,$nhi
141 depd,z $nlo,59,4,$nlo
142
143 xor $Tll,$Zll,$Zll
144 xor $Thh,$Zhh,$Zhh
145 ldd $rem($rem_4bit),$rem
146 b L\$oop_gmult_pa2
147 ldi 13,$cnt
148
149 .ALIGN 8
150L\$oop_gmult_pa2
151 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
152 depd,z $Zll,60,4,$rem
153
154 shrpd $Zhh,$Zll,4,$Zll
155 extrd,u $Zhh,59,60,$Zhh
156 ldd $nlo($Hll),$Tll
157 ldd $nlo($Hhh),$Thh
158
159 xor $Tll,$Zll,$Zll
160 xor $Thh,$Zhh,$Zhh
161 ldd $rem($rem_4bit),$rem
162
163 xor $rem,$Zhh,$Zhh
164 depd,z $Zll,60,4,$rem
165 ldbx $cnt($Xi),$nlo
166
167 shrpd $Zhh,$Zll,4,$Zll
168 extrd,u $Zhh,59,60,$Zhh
169 ldd $nhi($Hll),$Tll
170 ldd $nhi($Hhh),$Thh
171
172 and $mask0xf0,$nlo,$nhi
173 depd,z $nlo,59,4,$nlo
174 ldd $rem($rem_4bit),$rem
175
176 xor $Tll,$Zll,$Zll
177 addib,uv -1,$cnt,L\$oop_gmult_pa2
178 xor $Thh,$Zhh,$Zhh
179
180 xor $rem,$Zhh,$Zhh
181 depd,z $Zll,60,4,$rem
182
183 shrpd $Zhh,$Zll,4,$Zll
184 extrd,u $Zhh,59,60,$Zhh
185 ldd $nlo($Hll),$Tll
186 ldd $nlo($Hhh),$Thh
187
188 xor $Tll,$Zll,$Zll
189 xor $Thh,$Zhh,$Zhh
190 ldd $rem($rem_4bit),$rem
191
192 xor $rem,$Zhh,$Zhh
193 depd,z $Zll,60,4,$rem
194
195 shrpd $Zhh,$Zll,4,$Zll
196 extrd,u $Zhh,59,60,$Zhh
197 ldd $nhi($Hll),$Tll
198 ldd $nhi($Hhh),$Thh
199
200 xor $Tll,$Zll,$Zll
201 xor $Thh,$Zhh,$Zhh
202 ldd $rem($rem_4bit),$rem
203
204 xor $rem,$Zhh,$Zhh
205 std $Zll,8($Xi)
206 std $Zhh,0($Xi)
207___
208
209$code.=<<___ if ($SIZE_T==4);
210 b L\$done_gmult
211 nop
212
213L\$parisc1_gmult
214 ldb 15($Xi),$nlo
215 ldo 12($Htbl),$Hll
216 ldo 8($Htbl),$Hlh
217 ldo 4($Htbl),$Hhl
218
219 and $mask0xf0,$nlo,$nhi
220 zdep $nlo,27,4,$nlo
221
222 ldwx $nlo($Hll),$Zll
223 ldwx $nlo($Hlh),$Zlh
224 ldwx $nlo($Hhl),$Zhl
225 ldwx $nlo($Hhh),$Zhh
226 zdep $Zll,28,4,$rem
227 ldb 14($Xi),$nlo
228 ldwx $rem($rem_4bit),$rem
229 shrpw $Zlh,$Zll,4,$Zll
230 ldwx $nhi($Hll),$Tll
231 shrpw $Zhl,$Zlh,4,$Zlh
232 ldwx $nhi($Hlh),$Tlh
233 shrpw $Zhh,$Zhl,4,$Zhl
234 ldwx $nhi($Hhl),$Thl
235 extru $Zhh,27,28,$Zhh
236 ldwx $nhi($Hhh),$Thh
237 xor $rem,$Zhh,$Zhh
238 and $mask0xf0,$nlo,$nhi
239 zdep $nlo,27,4,$nlo
240
241 xor $Tll,$Zll,$Zll
242 ldwx $nlo($Hll),$Tll
243 xor $Tlh,$Zlh,$Zlh
244 ldwx $nlo($Hlh),$Tlh
245 xor $Thl,$Zhl,$Zhl
246 b L\$oop_gmult_pa1
247 ldi 13,$cnt
248
249 .ALIGN 8
250L\$oop_gmult_pa1
251 zdep $Zll,28,4,$rem
252 ldwx $nlo($Hhl),$Thl
253 xor $Thh,$Zhh,$Zhh
254 ldwx $rem($rem_4bit),$rem
255 shrpw $Zlh,$Zll,4,$Zll
256 ldwx $nlo($Hhh),$Thh
257 shrpw $Zhl,$Zlh,4,$Zlh
258 ldbx $cnt($Xi),$nlo
259 xor $Tll,$Zll,$Zll
260 ldwx $nhi($Hll),$Tll
261 shrpw $Zhh,$Zhl,4,$Zhl
262 xor $Tlh,$Zlh,$Zlh
263 ldwx $nhi($Hlh),$Tlh
264 extru $Zhh,27,28,$Zhh
265 xor $Thl,$Zhl,$Zhl
266 ldwx $nhi($Hhl),$Thl
267 xor $rem,$Zhh,$Zhh
268 zdep $Zll,28,4,$rem
269 xor $Thh,$Zhh,$Zhh
270 ldwx $nhi($Hhh),$Thh
271 shrpw $Zlh,$Zll,4,$Zll
272 ldwx $rem($rem_4bit),$rem
273 shrpw $Zhl,$Zlh,4,$Zlh
274 shrpw $Zhh,$Zhl,4,$Zhl
275 and $mask0xf0,$nlo,$nhi
276 extru $Zhh,27,28,$Zhh
277 zdep $nlo,27,4,$nlo
278 xor $Tll,$Zll,$Zll
279 ldwx $nlo($Hll),$Tll
280 xor $Tlh,$Zlh,$Zlh
281 ldwx $nlo($Hlh),$Tlh
282 xor $rem,$Zhh,$Zhh
283 addib,uv -1,$cnt,L\$oop_gmult_pa1
284 xor $Thl,$Zhl,$Zhl
285
286 zdep $Zll,28,4,$rem
287 ldwx $nlo($Hhl),$Thl
288 xor $Thh,$Zhh,$Zhh
289 ldwx $rem($rem_4bit),$rem
290 shrpw $Zlh,$Zll,4,$Zll
291 ldwx $nlo($Hhh),$Thh
292 shrpw $Zhl,$Zlh,4,$Zlh
293 xor $Tll,$Zll,$Zll
294 ldwx $nhi($Hll),$Tll
295 shrpw $Zhh,$Zhl,4,$Zhl
296 xor $Tlh,$Zlh,$Zlh
297 ldwx $nhi($Hlh),$Tlh
298 extru $Zhh,27,28,$Zhh
299 xor $rem,$Zhh,$Zhh
300 xor $Thl,$Zhl,$Zhl
301 ldwx $nhi($Hhl),$Thl
302 xor $Thh,$Zhh,$Zhh
303 ldwx $nhi($Hhh),$Thh
304 zdep $Zll,28,4,$rem
305 ldwx $rem($rem_4bit),$rem
306 shrpw $Zlh,$Zll,4,$Zll
307 shrpw $Zhl,$Zlh,4,$Zlh
308 shrpw $Zhh,$Zhl,4,$Zhl
309 extru $Zhh,27,28,$Zhh
310 xor $Tll,$Zll,$Zll
311 xor $Tlh,$Zlh,$Zlh
312 xor $rem,$Zhh,$Zhh
313 stw $Zll,12($Xi)
314 xor $Thl,$Zhl,$Zhl
315 stw $Zlh,8($Xi)
316 xor $Thh,$Zhh,$Zhh
317 stw $Zhl,4($Xi)
318 stw $Zhh,0($Xi)
319___
320$code.=<<___;
321L\$done_gmult
322 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
323 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
324 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
325 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
326___
327$code.=<<___ if ($SIZE_T==4);
328 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
329 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
330 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
331 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
332 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
333___
334$code.=<<___;
335 bv (%r2)
336 .EXIT
337 $POPMB -$FRAME(%sp),%r3
338 .PROCEND
339
340 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
341 .ALIGN 64
342gcm_ghash_4bit
343 .PROC
344 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
345 .ENTRY
346 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
347 $PUSHMA %r3,$FRAME(%sp)
348 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
349 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
350 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
351___
352$code.=<<___ if ($SIZE_T==4);
353 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
354 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
355 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
356 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
357 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
358___
359$code.=<<___;
360 blr %r0,$rem_4bit
361 ldi 3,$rem
362L\$pic_ghash
363 andcm $rem_4bit,$rem,$rem_4bit
364 addl $inp,$len,$len
365 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
366 ldi 0xf0,$mask0xf0
367___
368$code.=<<___ if ($SIZE_T==4);
369 ldi 31,$rem
370 mtctl $rem,%cr11
371 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
372 b L\$parisc1_ghash
373 nop
374___
375
376$code.=<<___;
377 ldb 15($Xi),$nlo
378 ldo 8($Htbl),$Hll
379
380L\$outer_ghash_pa2
381 ldb 15($inp),$nhi
382 xor $nhi,$nlo,$nlo
383 and $mask0xf0,$nlo,$nhi
384 depd,z $nlo,59,4,$nlo
385
386 ldd $nlo($Hll),$Zll
387 ldd $nlo($Hhh),$Zhh
388
389 depd,z $Zll,60,4,$rem
390 shrpd $Zhh,$Zll,4,$Zll
391 extrd,u $Zhh,59,60,$Zhh
392 ldb 14($Xi),$nlo
393 ldb 14($inp),$byte
394
395 ldd $nhi($Hll),$Tll
396 ldd $nhi($Hhh),$Thh
397 xor $byte,$nlo,$nlo
398 and $mask0xf0,$nlo,$nhi
399 depd,z $nlo,59,4,$nlo
400
401 xor $Tll,$Zll,$Zll
402 xor $Thh,$Zhh,$Zhh
403 ldd $rem($rem_4bit),$rem
404 b L\$oop_ghash_pa2
405 ldi 13,$cnt
406
407 .ALIGN 8
408L\$oop_ghash_pa2
409 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
410 depd,z $Zll,60,4,$rem2
411
412 shrpd $Zhh,$Zll,4,$Zll
413 extrd,u $Zhh,59,60,$Zhh
414 ldd $nlo($Hll),$Tll
415 ldd $nlo($Hhh),$Thh
416
417 xor $Tll,$Zll,$Zll
418 xor $Thh,$Zhh,$Zhh
419 ldbx $cnt($Xi),$nlo
420 ldbx $cnt($inp),$byte
421
422 depd,z $Zll,60,4,$rem
423 shrpd $Zhh,$Zll,4,$Zll
424 ldd $rem2($rem_4bit),$rem2
425
426 xor $rem2,$Zhh,$Zhh
427 xor $byte,$nlo,$nlo
428 ldd $nhi($Hll),$Tll
429 ldd $nhi($Hhh),$Thh
430
431 and $mask0xf0,$nlo,$nhi
432 depd,z $nlo,59,4,$nlo
433
434 extrd,u $Zhh,59,60,$Zhh
435 xor $Tll,$Zll,$Zll
436
437 ldd $rem($rem_4bit),$rem
438 addib,uv -1,$cnt,L\$oop_ghash_pa2
439 xor $Thh,$Zhh,$Zhh
440
441 xor $rem,$Zhh,$Zhh
442 depd,z $Zll,60,4,$rem2
443
444 shrpd $Zhh,$Zll,4,$Zll
445 extrd,u $Zhh,59,60,$Zhh
446 ldd $nlo($Hll),$Tll
447 ldd $nlo($Hhh),$Thh
448
449 xor $Tll,$Zll,$Zll
450 xor $Thh,$Zhh,$Zhh
451
452 depd,z $Zll,60,4,$rem
453 shrpd $Zhh,$Zll,4,$Zll
454 ldd $rem2($rem_4bit),$rem2
455
456 xor $rem2,$Zhh,$Zhh
457 ldd $nhi($Hll),$Tll
458 ldd $nhi($Hhh),$Thh
459
460 extrd,u $Zhh,59,60,$Zhh
461 xor $Tll,$Zll,$Zll
462 xor $Thh,$Zhh,$Zhh
463 ldd $rem($rem_4bit),$rem
464
465 xor $rem,$Zhh,$Zhh
466 std $Zll,8($Xi)
467 ldo 16($inp),$inp
468 std $Zhh,0($Xi)
469 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
470 copy $Zll,$nlo
471___
472
473$code.=<<___ if ($SIZE_T==4);
474 b L\$done_ghash
475 nop
476
477L\$parisc1_ghash
478 ldb 15($Xi),$nlo
479 ldo 12($Htbl),$Hll
480 ldo 8($Htbl),$Hlh
481 ldo 4($Htbl),$Hhl
482
483L\$outer_ghash_pa1
484 ldb 15($inp),$byte
485 xor $byte,$nlo,$nlo
486 and $mask0xf0,$nlo,$nhi
487 zdep $nlo,27,4,$nlo
488
489 ldwx $nlo($Hll),$Zll
490 ldwx $nlo($Hlh),$Zlh
491 ldwx $nlo($Hhl),$Zhl
492 ldwx $nlo($Hhh),$Zhh
493 zdep $Zll,28,4,$rem
494 ldb 14($Xi),$nlo
495 ldb 14($inp),$byte
496 ldwx $rem($rem_4bit),$rem
497 shrpw $Zlh,$Zll,4,$Zll
498 ldwx $nhi($Hll),$Tll
499 shrpw $Zhl,$Zlh,4,$Zlh
500 ldwx $nhi($Hlh),$Tlh
501 shrpw $Zhh,$Zhl,4,$Zhl
502 ldwx $nhi($Hhl),$Thl
503 extru $Zhh,27,28,$Zhh
504 ldwx $nhi($Hhh),$Thh
505 xor $byte,$nlo,$nlo
506 xor $rem,$Zhh,$Zhh
507 and $mask0xf0,$nlo,$nhi
508 zdep $nlo,27,4,$nlo
509
510 xor $Tll,$Zll,$Zll
511 ldwx $nlo($Hll),$Tll
512 xor $Tlh,$Zlh,$Zlh
513 ldwx $nlo($Hlh),$Tlh
514 xor $Thl,$Zhl,$Zhl
515 b L\$oop_ghash_pa1
516 ldi 13,$cnt
517
518 .ALIGN 8
519L\$oop_ghash_pa1
520 zdep $Zll,28,4,$rem
521 ldwx $nlo($Hhl),$Thl
522 xor $Thh,$Zhh,$Zhh
523 ldwx $rem($rem_4bit),$rem
524 shrpw $Zlh,$Zll,4,$Zll
525 ldwx $nlo($Hhh),$Thh
526 shrpw $Zhl,$Zlh,4,$Zlh
527 ldbx $cnt($Xi),$nlo
528 xor $Tll,$Zll,$Zll
529 ldwx $nhi($Hll),$Tll
530 shrpw $Zhh,$Zhl,4,$Zhl
531 ldbx $cnt($inp),$byte
532 xor $Tlh,$Zlh,$Zlh
533 ldwx $nhi($Hlh),$Tlh
534 extru $Zhh,27,28,$Zhh
535 xor $Thl,$Zhl,$Zhl
536 ldwx $nhi($Hhl),$Thl
537 xor $rem,$Zhh,$Zhh
538 zdep $Zll,28,4,$rem
539 xor $Thh,$Zhh,$Zhh
540 ldwx $nhi($Hhh),$Thh
541 shrpw $Zlh,$Zll,4,$Zll
542 ldwx $rem($rem_4bit),$rem
543 shrpw $Zhl,$Zlh,4,$Zlh
544 xor $byte,$nlo,$nlo
545 shrpw $Zhh,$Zhl,4,$Zhl
546 and $mask0xf0,$nlo,$nhi
547 extru $Zhh,27,28,$Zhh
548 zdep $nlo,27,4,$nlo
549 xor $Tll,$Zll,$Zll
550 ldwx $nlo($Hll),$Tll
551 xor $Tlh,$Zlh,$Zlh
552 ldwx $nlo($Hlh),$Tlh
553 xor $rem,$Zhh,$Zhh
554 addib,uv -1,$cnt,L\$oop_ghash_pa1
555 xor $Thl,$Zhl,$Zhl
556
557 zdep $Zll,28,4,$rem
558 ldwx $nlo($Hhl),$Thl
559 xor $Thh,$Zhh,$Zhh
560 ldwx $rem($rem_4bit),$rem
561 shrpw $Zlh,$Zll,4,$Zll
562 ldwx $nlo($Hhh),$Thh
563 shrpw $Zhl,$Zlh,4,$Zlh
564 xor $Tll,$Zll,$Zll
565 ldwx $nhi($Hll),$Tll
566 shrpw $Zhh,$Zhl,4,$Zhl
567 xor $Tlh,$Zlh,$Zlh
568 ldwx $nhi($Hlh),$Tlh
569 extru $Zhh,27,28,$Zhh
570 xor $rem,$Zhh,$Zhh
571 xor $Thl,$Zhl,$Zhl
572 ldwx $nhi($Hhl),$Thl
573 xor $Thh,$Zhh,$Zhh
574 ldwx $nhi($Hhh),$Thh
575 zdep $Zll,28,4,$rem
576 ldwx $rem($rem_4bit),$rem
577 shrpw $Zlh,$Zll,4,$Zll
578 shrpw $Zhl,$Zlh,4,$Zlh
579 shrpw $Zhh,$Zhl,4,$Zhl
580 extru $Zhh,27,28,$Zhh
581 xor $Tll,$Zll,$Zll
582 xor $Tlh,$Zlh,$Zlh
583 xor $rem,$Zhh,$Zhh
584 stw $Zll,12($Xi)
585 xor $Thl,$Zhl,$Zhl
586 stw $Zlh,8($Xi)
587 xor $Thh,$Zhh,$Zhh
588 stw $Zhl,4($Xi)
589 ldo 16($inp),$inp
590 stw $Zhh,0($Xi)
591 comb,<> $inp,$len,L\$outer_ghash_pa1
592 copy $Zll,$nlo
593___
594$code.=<<___;
595L\$done_ghash
596 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
597 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
598 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
599 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
600___
601$code.=<<___ if ($SIZE_T==4);
602 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
603 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
604 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
605 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
606 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
607___
608$code.=<<___;
609 bv (%r2)
610 .EXIT
611 $POPMB -$FRAME(%sp),%r3
612 .PROCEND
613
614 .ALIGN 64
615L\$rem_4bit
616 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
617 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
618 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
619 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
620 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
621 .ALIGN 64
622___
623
624# Explicitly encode PA-RISC 2.0 instructions used in this module, so
625# that it can be compiled with .LEVEL 1.0. It should be noted that I
626# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
627# directive...
628
629my $ldd = sub {
630 my ($mod,$args) = @_;
631 my $orig = "ldd$mod\t$args";
632
633 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
634 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
635 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
636 }
637 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
638 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
639 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
640 $opcode|=(1<<5) if ($mod =~ /^,m/);
641 $opcode|=(1<<13) if ($mod =~ /^,mb/);
642 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
643 }
644 else { "\t".$orig; }
645};
646
647my $std = sub {
648 my ($mod,$args) = @_;
649 my $orig = "std$mod\t$args";
650
651 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
652 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
653 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
654 }
655 else { "\t".$orig; }
656};
657
658my $extrd = sub {
659 my ($mod,$args) = @_;
660 my $orig = "extrd$mod\t$args";
661
662 # I only have ",u" completer, it's implicitly encoded...
663 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
664 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
665 my $len=32-$3;
666 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
667 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
668 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
669 }
670 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
671 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
672 my $len=32-$2;
673 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
674 $opcode |= (1<<13) if ($mod =~ /,\**=/);
675 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
676 }
677 else { "\t".$orig; }
678};
679
680my $shrpd = sub {
681 my ($mod,$args) = @_;
682 my $orig = "shrpd$mod\t$args";
683
684 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
685 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
686 my $cpos=63-$3;
687 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
688 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
689 }
690 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
691 { sprintf "\t.WORD\t0x%08x\t; %s",
692 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
693 }
694 else { "\t".$orig; }
695};
696
697my $depd = sub {
698 my ($mod,$args) = @_;
699 my $orig = "depd$mod\t$args";
700
701 # I only have ",z" completer, it's impicitly encoded...
702 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
703 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
704 my $cpos=63-$2;
705 my $len=32-$3;
706 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
707 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
708 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
709 }
710 else { "\t".$orig; }
711};
712
713sub assemble {
714 my ($mnemonic,$mod,$args)=@_;
715 my $opcode = eval("\$$mnemonic");
716
717 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
718}
719
720foreach (split("\n",$code)) {
721 s/\`([^\`]*)\`/eval $1/ge;
722 if ($SIZE_T==4) {
723 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
724 s/cmpb,\*/comb,/;
725 s/,\*/,/;
726 }
727 print $_,"\n";
728}
729
730close STDOUT;