summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/modes/asm/ghash-parisc.pl
diff options
context:
space:
mode:
authorcvs2svn <admin@example.com>2025-04-14 17:32:06 +0000
committercvs2svn <admin@example.com>2025-04-14 17:32:06 +0000
commiteb8dd9dca1228af0cd132f515509051ecfabf6f6 (patch)
treeedb6da6af7e865d488dc1a29309f1e1ec226e603 /src/lib/libcrypto/modes/asm/ghash-parisc.pl
parent247f0352e0ed72a4f476db9dc91f4d982bc83eb2 (diff)
downloadopenbsd-tb_20250414.tar.gz
openbsd-tb_20250414.tar.bz2
openbsd-tb_20250414.zip
This commit was manufactured by cvs2git to create tag 'tb_20250414'.tb_20250414
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/modes/asm/ghash-parisc.pl740
1 files changed, 0 insertions, 740 deletions
diff --git a/src/lib/libcrypto/modes/asm/ghash-parisc.pl b/src/lib/libcrypto/modes/asm/ghash-parisc.pl
deleted file mode 100644
index 3f98513105..0000000000
--- a/src/lib/libcrypto/modes/asm/ghash-parisc.pl
+++ /dev/null
@@ -1,740 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# April 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
15# it processes one byte in 19.6 cycles, which is more than twice as
16# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
17# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
18# processed byte. This is ~2.2x faster than 64-bit code generated by
19# vendor compiler (which used to be very hard to beat:-).
20#
21# Special thanks to polarhome.com for providing HP-UX account.
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36 $NREGS =6;
37} else {
38 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
39 $SIZE_T =4;
40 $FRAME_MARKER =48;
41 $SAVED_RP =20;
42 $PUSH ="stw";
43 $PUSHMA ="stwm";
44 $POP ="ldw";
45 $POPMB ="ldwm";
46 $NREGS =11;
47}
48
49$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
50 # [+ argument transfer]
51
52################# volatile registers
53$Xi="%r26"; # argument block
54$Htbl="%r25";
55$inp="%r24";
56$len="%r23";
57$Hhh=$Htbl; # variables
58$Hll="%r22";
59$Zhh="%r21";
60$Zll="%r20";
61$cnt="%r19";
62$rem_4bit="%r28";
63$rem="%r29";
64$mask0xf0="%r31";
65
66################# preserved registers
67$Thh="%r1";
68$Tll="%r2";
69$nlo="%r3";
70$nhi="%r4";
71$byte="%r5";
72if ($SIZE_T==4) {
73 $Zhl="%r6";
74 $Zlh="%r7";
75 $Hhl="%r8";
76 $Hlh="%r9";
77 $Thl="%r10";
78 $Tlh="%r11";
79}
80$rem2="%r6"; # used in PA-RISC 2.0 code
81
82$code.=<<___;
83 .LEVEL $LEVEL
84 .text
85
86 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
87 .ALIGN 64
88gcm_gmult_4bit
89 .PROC
90 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
91 .ENTRY
92 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
93 $PUSHMA %r3,$FRAME(%sp)
94 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
95 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
96 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
97___
98$code.=<<___ if ($SIZE_T==4);
99 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
100 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
101 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
102 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
103 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
104___
105$code.=<<___;
106 addl $inp,$len,$len
107#ifdef __PIC__
108 addil LT'L\$rem_4bit, %r19
109 ldw RT'L\$rem_4bit(%r1), $rem_4bit
110#else
111 ldil L'L\$rem_4bit, %t1
112 ldo R'L\$rem_4bit(%t1), $rem_4bit
113#endif
114 ldi 0xf0,$mask0xf0
115___
116$code.=<<___ if ($SIZE_T==4);
117#ifndef __OpenBSD__
118 ldi 31,$rem
119 mtctl $rem,%cr11
120 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
121 b L\$parisc1_gmult
122 nop
123___
124
125$code.=<<___;
126 ldb 15($Xi),$nlo
127 ldo 8($Htbl),$Hll
128
129 and $mask0xf0,$nlo,$nhi
130 depd,z $nlo,59,4,$nlo
131
132 ldd $nlo($Hll),$Zll
133 ldd $nlo($Hhh),$Zhh
134
135 depd,z $Zll,60,4,$rem
136 shrpd $Zhh,$Zll,4,$Zll
137 extrd,u $Zhh,59,60,$Zhh
138 ldb 14($Xi),$nlo
139
140 ldd $nhi($Hll),$Tll
141 ldd $nhi($Hhh),$Thh
142 and $mask0xf0,$nlo,$nhi
143 depd,z $nlo,59,4,$nlo
144
145 xor $Tll,$Zll,$Zll
146 xor $Thh,$Zhh,$Zhh
147 ldd $rem($rem_4bit),$rem
148 b L\$oop_gmult_pa2
149 ldi 13,$cnt
150
151 .ALIGN 8
152L\$oop_gmult_pa2
153 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
154 depd,z $Zll,60,4,$rem
155
156 shrpd $Zhh,$Zll,4,$Zll
157 extrd,u $Zhh,59,60,$Zhh
158 ldd $nlo($Hll),$Tll
159 ldd $nlo($Hhh),$Thh
160
161 xor $Tll,$Zll,$Zll
162 xor $Thh,$Zhh,$Zhh
163 ldd $rem($rem_4bit),$rem
164
165 xor $rem,$Zhh,$Zhh
166 depd,z $Zll,60,4,$rem
167 ldbx $cnt($Xi),$nlo
168
169 shrpd $Zhh,$Zll,4,$Zll
170 extrd,u $Zhh,59,60,$Zhh
171 ldd $nhi($Hll),$Tll
172 ldd $nhi($Hhh),$Thh
173
174 and $mask0xf0,$nlo,$nhi
175 depd,z $nlo,59,4,$nlo
176 ldd $rem($rem_4bit),$rem
177
178 xor $Tll,$Zll,$Zll
179 addib,uv -1,$cnt,L\$oop_gmult_pa2
180 xor $Thh,$Zhh,$Zhh
181
182 xor $rem,$Zhh,$Zhh
183 depd,z $Zll,60,4,$rem
184
185 shrpd $Zhh,$Zll,4,$Zll
186 extrd,u $Zhh,59,60,$Zhh
187 ldd $nlo($Hll),$Tll
188 ldd $nlo($Hhh),$Thh
189
190 xor $Tll,$Zll,$Zll
191 xor $Thh,$Zhh,$Zhh
192 ldd $rem($rem_4bit),$rem
193
194 xor $rem,$Zhh,$Zhh
195 depd,z $Zll,60,4,$rem
196
197 shrpd $Zhh,$Zll,4,$Zll
198 extrd,u $Zhh,59,60,$Zhh
199 ldd $nhi($Hll),$Tll
200 ldd $nhi($Hhh),$Thh
201
202 xor $Tll,$Zll,$Zll
203 xor $Thh,$Zhh,$Zhh
204 ldd $rem($rem_4bit),$rem
205
206 xor $rem,$Zhh,$Zhh
207 std $Zll,8($Xi)
208 std $Zhh,0($Xi)
209___
210
211$code.=<<___ if ($SIZE_T==4);
212 b L\$done_gmult
213 nop
214
215L\$parisc1_gmult
216#endif
217 ldb 15($Xi),$nlo
218 ldo 12($Htbl),$Hll
219 ldo 8($Htbl),$Hlh
220 ldo 4($Htbl),$Hhl
221
222 and $mask0xf0,$nlo,$nhi
223 zdep $nlo,27,4,$nlo
224
225 ldwx $nlo($Hll),$Zll
226 ldwx $nlo($Hlh),$Zlh
227 ldwx $nlo($Hhl),$Zhl
228 ldwx $nlo($Hhh),$Zhh
229 zdep $Zll,28,4,$rem
230 ldb 14($Xi),$nlo
231 ldwx $rem($rem_4bit),$rem
232 shrpw $Zlh,$Zll,4,$Zll
233 ldwx $nhi($Hll),$Tll
234 shrpw $Zhl,$Zlh,4,$Zlh
235 ldwx $nhi($Hlh),$Tlh
236 shrpw $Zhh,$Zhl,4,$Zhl
237 ldwx $nhi($Hhl),$Thl
238 extru $Zhh,27,28,$Zhh
239 ldwx $nhi($Hhh),$Thh
240 xor $rem,$Zhh,$Zhh
241 and $mask0xf0,$nlo,$nhi
242 zdep $nlo,27,4,$nlo
243
244 xor $Tll,$Zll,$Zll
245 ldwx $nlo($Hll),$Tll
246 xor $Tlh,$Zlh,$Zlh
247 ldwx $nlo($Hlh),$Tlh
248 xor $Thl,$Zhl,$Zhl
249 b L\$oop_gmult_pa1
250 ldi 13,$cnt
251
252 .ALIGN 8
253L\$oop_gmult_pa1
254 zdep $Zll,28,4,$rem
255 ldwx $nlo($Hhl),$Thl
256 xor $Thh,$Zhh,$Zhh
257 ldwx $rem($rem_4bit),$rem
258 shrpw $Zlh,$Zll,4,$Zll
259 ldwx $nlo($Hhh),$Thh
260 shrpw $Zhl,$Zlh,4,$Zlh
261 ldbx $cnt($Xi),$nlo
262 xor $Tll,$Zll,$Zll
263 ldwx $nhi($Hll),$Tll
264 shrpw $Zhh,$Zhl,4,$Zhl
265 xor $Tlh,$Zlh,$Zlh
266 ldwx $nhi($Hlh),$Tlh
267 extru $Zhh,27,28,$Zhh
268 xor $Thl,$Zhl,$Zhl
269 ldwx $nhi($Hhl),$Thl
270 xor $rem,$Zhh,$Zhh
271 zdep $Zll,28,4,$rem
272 xor $Thh,$Zhh,$Zhh
273 ldwx $nhi($Hhh),$Thh
274 shrpw $Zlh,$Zll,4,$Zll
275 ldwx $rem($rem_4bit),$rem
276 shrpw $Zhl,$Zlh,4,$Zlh
277 shrpw $Zhh,$Zhl,4,$Zhl
278 and $mask0xf0,$nlo,$nhi
279 extru $Zhh,27,28,$Zhh
280 zdep $nlo,27,4,$nlo
281 xor $Tll,$Zll,$Zll
282 ldwx $nlo($Hll),$Tll
283 xor $Tlh,$Zlh,$Zlh
284 ldwx $nlo($Hlh),$Tlh
285 xor $rem,$Zhh,$Zhh
286 addib,uv -1,$cnt,L\$oop_gmult_pa1
287 xor $Thl,$Zhl,$Zhl
288
289 zdep $Zll,28,4,$rem
290 ldwx $nlo($Hhl),$Thl
291 xor $Thh,$Zhh,$Zhh
292 ldwx $rem($rem_4bit),$rem
293 shrpw $Zlh,$Zll,4,$Zll
294 ldwx $nlo($Hhh),$Thh
295 shrpw $Zhl,$Zlh,4,$Zlh
296 xor $Tll,$Zll,$Zll
297 ldwx $nhi($Hll),$Tll
298 shrpw $Zhh,$Zhl,4,$Zhl
299 xor $Tlh,$Zlh,$Zlh
300 ldwx $nhi($Hlh),$Tlh
301 extru $Zhh,27,28,$Zhh
302 xor $rem,$Zhh,$Zhh
303 xor $Thl,$Zhl,$Zhl
304 ldwx $nhi($Hhl),$Thl
305 xor $Thh,$Zhh,$Zhh
306 ldwx $nhi($Hhh),$Thh
307 zdep $Zll,28,4,$rem
308 ldwx $rem($rem_4bit),$rem
309 shrpw $Zlh,$Zll,4,$Zll
310 shrpw $Zhl,$Zlh,4,$Zlh
311 shrpw $Zhh,$Zhl,4,$Zhl
312 extru $Zhh,27,28,$Zhh
313 xor $Tll,$Zll,$Zll
314 xor $Tlh,$Zlh,$Zlh
315 xor $rem,$Zhh,$Zhh
316 stw $Zll,12($Xi)
317 xor $Thl,$Zhl,$Zhl
318 stw $Zlh,8($Xi)
319 xor $Thh,$Zhh,$Zhh
320 stw $Zhl,4($Xi)
321 stw $Zhh,0($Xi)
322___
323$code.=<<___;
324L\$done_gmult
325 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
326 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
327 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
328 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
329___
330$code.=<<___ if ($SIZE_T==4);
331 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
332 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
333 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
334 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
335 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
336___
337$code.=<<___;
338 bv (%r2)
339 .EXIT
340 $POPMB -$FRAME(%sp),%r3
341 .PROCEND
342
343 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
344 .ALIGN 64
345gcm_ghash_4bit
346 .PROC
347 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
348 .ENTRY
349 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
350 $PUSHMA %r3,$FRAME(%sp)
351 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
352 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
353 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
354___
355$code.=<<___ if ($SIZE_T==4);
356 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
357 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
358 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
359 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
360 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
361___
362$code.=<<___;
363 addl $inp,$len,$len
364#ifdef __PIC__
365 addil LT'L\$rem_4bit, %r19
366 ldw RT'L\$rem_4bit(%r1), $rem_4bit
367#else
368 ldil L'L\$rem_4bit, %t1
369 ldo R'L\$rem_4bit(%t1), $rem_4bit
370#endif
371 ldi 0xf0,$mask0xf0
372___
373$code.=<<___ if ($SIZE_T==4);
374#ifndef __OpenBSD__
375 ldi 31,$rem
376 mtctl $rem,%cr11
377 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
378 b L\$parisc1_ghash
379 nop
380___
381
382$code.=<<___;
383 ldb 15($Xi),$nlo
384 ldo 8($Htbl),$Hll
385
386L\$outer_ghash_pa2
387 ldb 15($inp),$nhi
388 xor $nhi,$nlo,$nlo
389 and $mask0xf0,$nlo,$nhi
390 depd,z $nlo,59,4,$nlo
391
392 ldd $nlo($Hll),$Zll
393 ldd $nlo($Hhh),$Zhh
394
395 depd,z $Zll,60,4,$rem
396 shrpd $Zhh,$Zll,4,$Zll
397 extrd,u $Zhh,59,60,$Zhh
398 ldb 14($Xi),$nlo
399 ldb 14($inp),$byte
400
401 ldd $nhi($Hll),$Tll
402 ldd $nhi($Hhh),$Thh
403 xor $byte,$nlo,$nlo
404 and $mask0xf0,$nlo,$nhi
405 depd,z $nlo,59,4,$nlo
406
407 xor $Tll,$Zll,$Zll
408 xor $Thh,$Zhh,$Zhh
409 ldd $rem($rem_4bit),$rem
410 b L\$oop_ghash_pa2
411 ldi 13,$cnt
412
413 .ALIGN 8
414L\$oop_ghash_pa2
415 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
416 depd,z $Zll,60,4,$rem2
417
418 shrpd $Zhh,$Zll,4,$Zll
419 extrd,u $Zhh,59,60,$Zhh
420 ldd $nlo($Hll),$Tll
421 ldd $nlo($Hhh),$Thh
422
423 xor $Tll,$Zll,$Zll
424 xor $Thh,$Zhh,$Zhh
425 ldbx $cnt($Xi),$nlo
426 ldbx $cnt($inp),$byte
427
428 depd,z $Zll,60,4,$rem
429 shrpd $Zhh,$Zll,4,$Zll
430 ldd $rem2($rem_4bit),$rem2
431
432 xor $rem2,$Zhh,$Zhh
433 xor $byte,$nlo,$nlo
434 ldd $nhi($Hll),$Tll
435 ldd $nhi($Hhh),$Thh
436
437 and $mask0xf0,$nlo,$nhi
438 depd,z $nlo,59,4,$nlo
439
440 extrd,u $Zhh,59,60,$Zhh
441 xor $Tll,$Zll,$Zll
442
443 ldd $rem($rem_4bit),$rem
444 addib,uv -1,$cnt,L\$oop_ghash_pa2
445 xor $Thh,$Zhh,$Zhh
446
447 xor $rem,$Zhh,$Zhh
448 depd,z $Zll,60,4,$rem2
449
450 shrpd $Zhh,$Zll,4,$Zll
451 extrd,u $Zhh,59,60,$Zhh
452 ldd $nlo($Hll),$Tll
453 ldd $nlo($Hhh),$Thh
454
455 xor $Tll,$Zll,$Zll
456 xor $Thh,$Zhh,$Zhh
457
458 depd,z $Zll,60,4,$rem
459 shrpd $Zhh,$Zll,4,$Zll
460 ldd $rem2($rem_4bit),$rem2
461
462 xor $rem2,$Zhh,$Zhh
463 ldd $nhi($Hll),$Tll
464 ldd $nhi($Hhh),$Thh
465
466 extrd,u $Zhh,59,60,$Zhh
467 xor $Tll,$Zll,$Zll
468 xor $Thh,$Zhh,$Zhh
469 ldd $rem($rem_4bit),$rem
470
471 xor $rem,$Zhh,$Zhh
472 std $Zll,8($Xi)
473 ldo 16($inp),$inp
474 std $Zhh,0($Xi)
475 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
476 copy $Zll,$nlo
477___
478
479$code.=<<___ if ($SIZE_T==4);
480 b L\$done_ghash
481 nop
482
483L\$parisc1_ghash
484#endif
485 ldb 15($Xi),$nlo
486 ldo 12($Htbl),$Hll
487 ldo 8($Htbl),$Hlh
488 ldo 4($Htbl),$Hhl
489
490L\$outer_ghash_pa1
491 ldb 15($inp),$byte
492 xor $byte,$nlo,$nlo
493 and $mask0xf0,$nlo,$nhi
494 zdep $nlo,27,4,$nlo
495
496 ldwx $nlo($Hll),$Zll
497 ldwx $nlo($Hlh),$Zlh
498 ldwx $nlo($Hhl),$Zhl
499 ldwx $nlo($Hhh),$Zhh
500 zdep $Zll,28,4,$rem
501 ldb 14($Xi),$nlo
502 ldb 14($inp),$byte
503 ldwx $rem($rem_4bit),$rem
504 shrpw $Zlh,$Zll,4,$Zll
505 ldwx $nhi($Hll),$Tll
506 shrpw $Zhl,$Zlh,4,$Zlh
507 ldwx $nhi($Hlh),$Tlh
508 shrpw $Zhh,$Zhl,4,$Zhl
509 ldwx $nhi($Hhl),$Thl
510 extru $Zhh,27,28,$Zhh
511 ldwx $nhi($Hhh),$Thh
512 xor $byte,$nlo,$nlo
513 xor $rem,$Zhh,$Zhh
514 and $mask0xf0,$nlo,$nhi
515 zdep $nlo,27,4,$nlo
516
517 xor $Tll,$Zll,$Zll
518 ldwx $nlo($Hll),$Tll
519 xor $Tlh,$Zlh,$Zlh
520 ldwx $nlo($Hlh),$Tlh
521 xor $Thl,$Zhl,$Zhl
522 b L\$oop_ghash_pa1
523 ldi 13,$cnt
524
525 .ALIGN 8
526L\$oop_ghash_pa1
527 zdep $Zll,28,4,$rem
528 ldwx $nlo($Hhl),$Thl
529 xor $Thh,$Zhh,$Zhh
530 ldwx $rem($rem_4bit),$rem
531 shrpw $Zlh,$Zll,4,$Zll
532 ldwx $nlo($Hhh),$Thh
533 shrpw $Zhl,$Zlh,4,$Zlh
534 ldbx $cnt($Xi),$nlo
535 xor $Tll,$Zll,$Zll
536 ldwx $nhi($Hll),$Tll
537 shrpw $Zhh,$Zhl,4,$Zhl
538 ldbx $cnt($inp),$byte
539 xor $Tlh,$Zlh,$Zlh
540 ldwx $nhi($Hlh),$Tlh
541 extru $Zhh,27,28,$Zhh
542 xor $Thl,$Zhl,$Zhl
543 ldwx $nhi($Hhl),$Thl
544 xor $rem,$Zhh,$Zhh
545 zdep $Zll,28,4,$rem
546 xor $Thh,$Zhh,$Zhh
547 ldwx $nhi($Hhh),$Thh
548 shrpw $Zlh,$Zll,4,$Zll
549 ldwx $rem($rem_4bit),$rem
550 shrpw $Zhl,$Zlh,4,$Zlh
551 xor $byte,$nlo,$nlo
552 shrpw $Zhh,$Zhl,4,$Zhl
553 and $mask0xf0,$nlo,$nhi
554 extru $Zhh,27,28,$Zhh
555 zdep $nlo,27,4,$nlo
556 xor $Tll,$Zll,$Zll
557 ldwx $nlo($Hll),$Tll
558 xor $Tlh,$Zlh,$Zlh
559 ldwx $nlo($Hlh),$Tlh
560 xor $rem,$Zhh,$Zhh
561 addib,uv -1,$cnt,L\$oop_ghash_pa1
562 xor $Thl,$Zhl,$Zhl
563
564 zdep $Zll,28,4,$rem
565 ldwx $nlo($Hhl),$Thl
566 xor $Thh,$Zhh,$Zhh
567 ldwx $rem($rem_4bit),$rem
568 shrpw $Zlh,$Zll,4,$Zll
569 ldwx $nlo($Hhh),$Thh
570 shrpw $Zhl,$Zlh,4,$Zlh
571 xor $Tll,$Zll,$Zll
572 ldwx $nhi($Hll),$Tll
573 shrpw $Zhh,$Zhl,4,$Zhl
574 xor $Tlh,$Zlh,$Zlh
575 ldwx $nhi($Hlh),$Tlh
576 extru $Zhh,27,28,$Zhh
577 xor $rem,$Zhh,$Zhh
578 xor $Thl,$Zhl,$Zhl
579 ldwx $nhi($Hhl),$Thl
580 xor $Thh,$Zhh,$Zhh
581 ldwx $nhi($Hhh),$Thh
582 zdep $Zll,28,4,$rem
583 ldwx $rem($rem_4bit),$rem
584 shrpw $Zlh,$Zll,4,$Zll
585 shrpw $Zhl,$Zlh,4,$Zlh
586 shrpw $Zhh,$Zhl,4,$Zhl
587 extru $Zhh,27,28,$Zhh
588 xor $Tll,$Zll,$Zll
589 xor $Tlh,$Zlh,$Zlh
590 xor $rem,$Zhh,$Zhh
591 stw $Zll,12($Xi)
592 xor $Thl,$Zhl,$Zhl
593 stw $Zlh,8($Xi)
594 xor $Thh,$Zhh,$Zhh
595 stw $Zhl,4($Xi)
596 ldo 16($inp),$inp
597 stw $Zhh,0($Xi)
598 comb,<> $inp,$len,L\$outer_ghash_pa1
599 copy $Zll,$nlo
600___
601$code.=<<___;
602L\$done_ghash
603 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
604 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
605 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
606 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
607___
608$code.=<<___ if ($SIZE_T==4);
609 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
610 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
611 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
612 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
613 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
614___
615$code.=<<___;
616 bv (%r2)
617 .EXIT
618 $POPMB -$FRAME(%sp),%r3
619 .PROCEND
620
621 .section .rodata
622 .ALIGN 64
623L\$rem_4bit
624 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
625 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
626 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
627 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
628 .previous
629
630 .ALIGN 64
631___
632
633# Explicitly encode PA-RISC 2.0 instructions used in this module, so
634# that it can be compiled with .LEVEL 1.0. It should be noted that I
635# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
636# directive...
637
638my $ldd = sub {
639 my ($mod,$args) = @_;
640 my $orig = "ldd$mod\t$args";
641
642 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
643 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
644 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
645 }
646 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
647 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
648 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
649 $opcode|=(1<<5) if ($mod =~ /^,m/);
650 $opcode|=(1<<13) if ($mod =~ /^,mb/);
651 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
652 }
653 else { "\t".$orig; }
654};
655
656my $std = sub {
657 my ($mod,$args) = @_;
658 my $orig = "std$mod\t$args";
659
660 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
661 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
662 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
663 }
664 else { "\t".$orig; }
665};
666
667my $extrd = sub {
668 my ($mod,$args) = @_;
669 my $orig = "extrd$mod\t$args";
670
671 # I only have ",u" completer, it's implicitly encoded...
672 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
673 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
674 my $len=32-$3;
675 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
676 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
677 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
678 }
679 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
680 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
681 my $len=32-$2;
682 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
683 $opcode |= (1<<13) if ($mod =~ /,\**=/);
684 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
685 }
686 else { "\t".$orig; }
687};
688
689my $shrpd = sub {
690 my ($mod,$args) = @_;
691 my $orig = "shrpd$mod\t$args";
692
693 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
694 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
695 my $cpos=63-$3;
696 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
697 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
698 }
699 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
700 { sprintf "\t.WORD\t0x%08x\t; %s",
701 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
702 }
703 else { "\t".$orig; }
704};
705
706my $depd = sub {
707 my ($mod,$args) = @_;
708 my $orig = "depd$mod\t$args";
709
710 # I only have ",z" completer, it's implicitly encoded...
711 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
712 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
713 my $cpos=63-$2;
714 my $len=32-$3;
715 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
716 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
717 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
718 }
719 else { "\t".$orig; }
720};
721
722sub assemble {
723 my ($mnemonic,$mod,$args)=@_;
724 my $opcode = eval("\$$mnemonic");
725
726 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
727}
728
729foreach (split("\n",$code)) {
730 s/\`([^\`]*)\`/eval $1/ge;
731 if ($SIZE_T==4) {
732 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
733 s/cmpb,\*/comb,/;
734 s/,\*/,/;
735 }
736 s/\bbv\b/bve/ if ($SIZE_T==8);
737 print $_,"\n";
738}
739
740close STDOUT;