summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordjm <>2012-10-13 21:23:57 +0000
committerdjm <>2012-10-13 21:23:57 +0000
commit5bb3399db864c8865e0df73bd1564407bac5d182 (patch)
tree51d00d308f46148b4b341133936e44706703ad8b
parent7aa3571aba92d82f8dd3caabe48fad636f05a0fd (diff)
downloadopenbsd-5bb3399db864c8865e0df73bd1564407bac5d182.tar.gz
openbsd-5bb3399db864c8865e0df73bd1564407bac5d182.tar.bz2
openbsd-5bb3399db864c8865e0df73bd1564407bac5d182.zip
import OpenSSL-1.0.1c
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/camellia/Makefile17
-rw-r--r--src/lib/libcrypto/camellia/cmll_utl.c64
-rw-r--r--src/lib/libcrypto/cmac/Makefile111
-rw-r--r--src/lib/libcrypto/cms/Makefile24
-rw-r--r--src/lib/libcrypto/ecdh/ecdhtest.c6
-rw-r--r--src/lib/libcrypto/ecdh/ech_ossl.c2
-rw-r--r--src/lib/libcrypto/ecdsa/ecdsatest.c14
-rw-r--r--src/lib/libcrypto/engine/eng_rdrand.c142
-rw-r--r--src/lib/libcrypto/engine/eng_rsax.c668
-rw-r--r--src/lib/libcrypto/evp/evp_fips.c113
-rw-r--r--src/lib/libcrypto/fips_err.h100
-rw-r--r--src/lib/libcrypto/fips_ers.c7
-rw-r--r--src/lib/libcrypto/idea/idea_spd.c299
-rw-r--r--src/lib/libcrypto/mdc2/mdc2dgst.c3
-rw-r--r--src/lib/libcrypto/modes/Makefile77
-rw-r--r--src/lib/libcrypto/o_fips.c96
-rw-r--r--src/lib/libcrypto/perlasm/x86masm.pl19
-rw-r--r--src/lib/libcrypto/rc4/rc4_utl.c62
-rw-r--r--src/lib/libcrypto/seed/seed.c13
-rw-r--r--src/lib/libcrypto/seed/seed.h4
-rw-r--r--src/lib/libcrypto/srp/Makefile98
-rw-r--r--src/lib/libcrypto/srp/srp.h172
-rw-r--r--src/lib/libcrypto/srp/srp_grps.h517
-rw-r--r--src/lib/libcrypto/srp/srp_lcl.h83
-rw-r--r--src/lib/libcrypto/srp/srp_lib.c357
-rw-r--r--src/lib/libcrypto/srp/srp_vfy.c657
-rw-r--r--src/lib/libcrypto/srp/srptest.c162
-rw-r--r--src/lib/libcrypto/util/copy.pl11
-rw-r--r--src/lib/libcrypto/whrlpool/Makefile5
-rw-r--r--src/lib/libcrypto/x509v3/v3_asid.c63
-rw-r--r--src/lib/libssl/src/apps/cms.c37
-rw-r--r--src/lib/libssl/src/apps/demoSRP/srp_verifier.txt6
-rw-r--r--src/lib/libssl/src/apps/demoSRP/srp_verifier.txt.attr1
-rw-r--r--src/lib/libssl/src/apps/srp.c756
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-586.pl14
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl182
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-mips.pl1611
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-parisc.pl1021
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl444
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl1071
-rwxr-xr-xsrc/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl3
-rwxr-xr-xsrc/lib/libssl/src/crypto/aes/asm/aes-x86_64.pl45
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aesni-sha1-x86_64.pl1249
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aesni-x86.pl2189
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/aesni-x86_64.pl2478
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/bsaes-x86_64.pl3044
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/vpaes-x86.pl903
-rw-r--r--src/lib/libssl/src/crypto/aes/asm/vpaes-x86_64.pl1206
-rw-r--r--src/lib/libssl/src/crypto/arm_arch.h51
-rw-r--r--src/lib/libssl/src/crypto/armcap.c80
-rw-r--r--src/lib/libssl/src/crypto/armv4cpuid.S154
-rw-r--r--src/lib/libssl/src/crypto/asn1/ameth_lib.c12
-rw-r--r--src/lib/libssl/src/crypto/asn1/asn1_locl.h11
-rw-r--r--src/lib/libssl/src/crypto/asn1/asn_mime.c23
-rw-r--r--src/lib/libssl/src/crypto/bio/bss_dgram.c996
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/armv4-gf2m.pl278
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl23
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/ia64-mont.pl851
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/mips-mont.pl426
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/mips.pl2585
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/modexp512-x86_64.pl1496
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/parisc-mont.pl993
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl107
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl338
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/s390x-gf2m.pl221
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl102
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/x86-gf2m.pl313
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl389
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/x86_64-mont.pl1486
-rwxr-xr-xsrc/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl1070
-rw-r--r--src/lib/libssl/src/crypto/bn/bn_gf2m.c114
-rw-r--r--src/lib/libssl/src/crypto/bn/bn_nist.c338
-rw-r--r--src/lib/libssl/src/crypto/buffer/buf_str.c99
-rw-r--r--src/lib/libssl/src/crypto/camellia/Makefile17
-rw-r--r--src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl6
-rw-r--r--src/lib/libssl/src/crypto/camellia/camellia.h4
-rw-r--r--src/lib/libssl/src/crypto/camellia/cmll_locl.h5
-rw-r--r--src/lib/libssl/src/crypto/camellia/cmll_misc.c3
-rw-r--r--src/lib/libssl/src/crypto/camellia/cmll_utl.c64
-rw-r--r--src/lib/libssl/src/crypto/cmac/Makefile111
-rw-r--r--src/lib/libssl/src/crypto/cmac/cm_ameth.c97
-rw-r--r--src/lib/libssl/src/crypto/cmac/cm_pmeth.c224
-rw-r--r--src/lib/libssl/src/crypto/cmac/cmac.c308
-rw-r--r--src/lib/libssl/src/crypto/cmac/cmac.h82
-rw-r--r--src/lib/libssl/src/crypto/cms/Makefile24
-rw-r--r--src/lib/libssl/src/crypto/cms/cms.h22
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_asn1.c9
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_enc.c60
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_env.c22
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_err.c13
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_lcl.h12
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_lib.c3
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_pwri.c454
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_sd.c3
-rw-r--r--src/lib/libssl/src/crypto/cms/cms_smime.c61
-rw-r--r--src/lib/libssl/src/crypto/dh/dh_ameth.c1
-rw-r--r--src/lib/libssl/src/crypto/dsa/dsa_ameth.c47
-rw-r--r--src/lib/libssl/src/crypto/dsa/dsa_locl.h1
-rw-r--r--src/lib/libssl/src/crypto/dsa/dsa_pmeth.c6
-rw-r--r--src/lib/libssl/src/crypto/ec/ec2_mult.c4
-rw-r--r--src/lib/libssl/src/crypto/ec/ec2_oct.c407
-rw-r--r--src/lib/libssl/src/crypto/ec/ec2_smpl.c351
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_ameth.c1
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_asn1.c24
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_curve.c197
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_key.c102
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_oct.c199
-rw-r--r--src/lib/libssl/src/crypto/ec/ec_pmeth.c1
-rw-r--r--src/lib/libssl/src/crypto/ec/eck_prn.c3
-rw-r--r--src/lib/libssl/src/crypto/ec/ecp_nistp224.c1658
-rw-r--r--src/lib/libssl/src/crypto/ec/ecp_nistp256.c2171
-rw-r--r--src/lib/libssl/src/crypto/ec/ecp_nistp521.c2025
-rw-r--r--src/lib/libssl/src/crypto/ec/ecp_nistputil.c197
-rw-r--r--src/lib/libssl/src/crypto/ec/ecp_oct.c433
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ecdh.h2
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ecdhtest.c6
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ech_err.c4
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ech_lib.c20
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ech_locl.h8
-rw-r--r--src/lib/libssl/src/crypto/ecdh/ech_ossl.c2
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecdsa.h2
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecdsatest.c14
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecs_err.c4
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecs_lib.c21
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecs_locl.h8
-rw-r--r--src/lib/libssl/src/crypto/ecdsa/ecs_ossl.c5
-rw-r--r--src/lib/libssl/src/crypto/engine/eng_rdrand.c142
-rw-r--r--src/lib/libssl/src/crypto/engine/eng_rsax.c668
-rw-r--r--src/lib/libssl/src/crypto/evp/e_aes_cbc_hmac_sha1.c406
-rw-r--r--src/lib/libssl/src/crypto/evp/e_rc4_hmac_md5.c298
-rw-r--r--src/lib/libssl/src/crypto/evp/evp_fips.c113
-rw-r--r--src/lib/libssl/src/crypto/evp/m_ecdsa.c3
-rw-r--r--src/lib/libssl/src/crypto/evp/m_wp.c1
-rw-r--r--src/lib/libssl/src/crypto/evp/pmeth_gn.c5
-rw-r--r--src/lib/libssl/src/crypto/evp/pmeth_lib.c55
-rw-r--r--src/lib/libssl/src/crypto/fips_err.h100
-rw-r--r--src/lib/libssl/src/crypto/fips_ers.c7
-rw-r--r--src/lib/libssl/src/crypto/hmac/hm_ameth.c2
-rw-r--r--src/lib/libssl/src/crypto/hmac/hm_pmeth.c14
-rw-r--r--src/lib/libssl/src/crypto/ia64cpuid.S2
-rw-r--r--src/lib/libssl/src/crypto/idea/i_cbc.c168
-rw-r--r--src/lib/libssl/src/crypto/idea/i_cfb64.c122
-rw-r--r--src/lib/libssl/src/crypto/idea/i_ecb.c85
-rw-r--r--src/lib/libssl/src/crypto/idea/i_ofb64.c111
-rw-r--r--src/lib/libssl/src/crypto/idea/i_skey.c164
-rw-r--r--src/lib/libssl/src/crypto/idea/idea_lcl.h215
-rw-r--r--src/lib/libssl/src/crypto/idea/idea_spd.c299
-rw-r--r--src/lib/libssl/src/crypto/mdc2/mdc2dgst.c3
-rw-r--r--src/lib/libssl/src/crypto/modes/Makefile77
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-alpha.pl451
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-armv4.pl429
-rwxr-xr-xsrc/lib/libssl/src/crypto/modes/asm/ghash-ia64.pl463
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl730
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-s390x.pl262
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-sparcv9.pl330
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-x86.pl1342
-rw-r--r--src/lib/libssl/src/crypto/modes/asm/ghash-x86_64.pl805
-rw-r--r--src/lib/libssl/src/crypto/modes/cbc128.c10
-rw-r--r--src/lib/libssl/src/crypto/modes/ccm128.c441
-rw-r--r--src/lib/libssl/src/crypto/modes/cfb128.c11
-rw-r--r--src/lib/libssl/src/crypto/modes/ctr128.c92
-rw-r--r--src/lib/libssl/src/crypto/modes/cts128.c226
-rw-r--r--src/lib/libssl/src/crypto/modes/gcm128.c1757
-rw-r--r--src/lib/libssl/src/crypto/modes/modes.h76
-rw-r--r--src/lib/libssl/src/crypto/modes/modes_lcl.h131
-rw-r--r--src/lib/libssl/src/crypto/modes/ofb128.c11
-rw-r--r--src/lib/libssl/src/crypto/modes/xts128.c187
-rw-r--r--src/lib/libssl/src/crypto/o_fips.c96
-rw-r--r--src/lib/libssl/src/crypto/o_init.c34
-rw-r--r--src/lib/libssl/src/crypto/objects/obj_xref.c9
-rw-r--r--src/lib/libssl/src/crypto/objects/obj_xref.h2
-rw-r--r--src/lib/libssl/src/crypto/objects/obj_xref.txt4
-rw-r--r--src/lib/libssl/src/crypto/pariscid.pl224
-rw-r--r--src/lib/libssl/src/crypto/pem/pvkfmt.c58
-rwxr-xr-xsrc/lib/libssl/src/crypto/perlasm/ppc-xlate.pl13
-rwxr-xr-xsrc/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl221
-rw-r--r--src/lib/libssl/src/crypto/perlasm/x86gas.pl34
-rw-r--r--src/lib/libssl/src/crypto/perlasm/x86masm.pl19
-rw-r--r--src/lib/libssl/src/crypto/ppccap.c115
-rwxr-xr-xsrc/lib/libssl/src/crypto/ppccpuid.pl48
-rw-r--r--src/lib/libssl/src/crypto/rc4/asm/rc4-md5-x86_64.pl631
-rw-r--r--src/lib/libssl/src/crypto/rc4/asm/rc4-parisc.pl313
-rw-r--r--src/lib/libssl/src/crypto/rc4/asm/rc4-s390x.pl47
-rwxr-xr-xsrc/lib/libssl/src/crypto/rc4/asm/rc4-x86_64.pl290
-rw-r--r--src/lib/libssl/src/crypto/rc4/rc4_utl.c62
-rw-r--r--src/lib/libssl/src/crypto/rsa/rsa_ameth.c351
-rw-r--r--src/lib/libssl/src/crypto/rsa/rsa_crpt.c257
-rw-r--r--src/lib/libssl/src/crypto/rsa/rsa_pmeth.c154
-rw-r--r--src/lib/libssl/src/crypto/rsa/rsa_pss.c81
-rw-r--r--src/lib/libssl/src/crypto/s390xcap.c12
-rw-r--r--src/lib/libssl/src/crypto/s390xcpuid.S17
-rw-r--r--src/lib/libssl/src/crypto/seed/seed.c13
-rw-r--r--src/lib/libssl/src/crypto/seed/seed.h4
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha1-alpha.pl322
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha1-armv4-large.pl38
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha1-mips.pl354
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha1-parisc.pl259
-rwxr-xr-xsrc/lib/libssl/src/crypto/sha/asm/sha1-ppc.pl83
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha1-s390x.pl50
-rwxr-xr-xsrc/lib/libssl/src/crypto/sha/asm/sha1-x86_64.pl1185
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha256-586.pl52
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha256-armv4.pl55
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha512-armv4.pl357
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha512-mips.pl455
-rwxr-xr-xsrc/lib/libssl/src/crypto/sha/asm/sha512-parisc.pl791
-rwxr-xr-xsrc/lib/libssl/src/crypto/sha/asm/sha512-ppc.pl114
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha512-s390x.pl63
-rw-r--r--src/lib/libssl/src/crypto/sha/asm/sha512-sparcv9.pl6
-rwxr-xr-xsrc/lib/libssl/src/crypto/sha/asm/sha512-x86_64.pl86
-rw-r--r--src/lib/libssl/src/crypto/sha/sha256.c4
-rw-r--r--src/lib/libssl/src/crypto/sha/sha512.c54
-rw-r--r--src/lib/libssl/src/crypto/sparcv9cap.c4
-rw-r--r--src/lib/libssl/src/crypto/srp/Makefile98
-rw-r--r--src/lib/libssl/src/crypto/srp/srp.h172
-rw-r--r--src/lib/libssl/src/crypto/srp/srp_grps.h517
-rw-r--r--src/lib/libssl/src/crypto/srp/srp_lcl.h83
-rw-r--r--src/lib/libssl/src/crypto/srp/srp_lib.c357
-rw-r--r--src/lib/libssl/src/crypto/srp/srp_vfy.c657
-rw-r--r--src/lib/libssl/src/crypto/srp/srptest.c162
-rw-r--r--src/lib/libssl/src/crypto/ts/ts.h3
-rw-r--r--src/lib/libssl/src/crypto/ts/ts_rsp_verify.c9
-rw-r--r--src/lib/libssl/src/crypto/whrlpool/Makefile5
-rw-r--r--src/lib/libssl/src/crypto/whrlpool/whrlpool.h3
-rw-r--r--src/lib/libssl/src/crypto/whrlpool/wp_block.c4
-rw-r--r--src/lib/libssl/src/crypto/whrlpool/wp_dgst.c3
-rw-r--r--src/lib/libssl/src/crypto/x509v3/v3_asid.c63
-rw-r--r--src/lib/libssl/src/crypto/x86_64cpuid.pl87
-rw-r--r--src/lib/libssl/src/crypto/x86cpuid.pl78
-rw-r--r--src/lib/libssl/src/doc/apps/genpkey.pod2
-rw-r--r--src/lib/libssl/src/doc/crypto/ecdsa.pod2
-rw-r--r--src/lib/libssl/src/engines/ccgost/Makefile14
-rw-r--r--src/lib/libssl/src/engines/ccgost/gost_ameth.c37
-rw-r--r--src/lib/libssl/src/engines/ccgost/gost_pmeth.c19
-rw-r--r--src/lib/libssl/src/engines/e_aep.c1
-rw-r--r--src/lib/libssl/src/engines/e_capi.c51
-rw-r--r--src/lib/libssl/src/engines/e_padlock.c8
-rw-r--r--src/lib/libssl/src/ssl/d1_both.c178
-rw-r--r--src/lib/libssl/src/ssl/d1_clnt.c194
-rw-r--r--src/lib/libssl/src/ssl/d1_enc.c2
-rw-r--r--src/lib/libssl/src/ssl/d1_lib.c54
-rw-r--r--src/lib/libssl/src/ssl/d1_pkt.c167
-rw-r--r--src/lib/libssl/src/ssl/d1_srtp.c493
-rw-r--r--src/lib/libssl/src/ssl/d1_srvr.c186
-rw-r--r--src/lib/libssl/src/ssl/dtls1.h18
-rw-r--r--src/lib/libssl/src/ssl/srtp.h145
-rw-r--r--src/lib/libssl/src/ssl/tls_srp.c506
-rw-r--r--src/lib/libssl/src/test/pkits-test.pl9
-rw-r--r--src/lib/libssl/src/util/copy.pl11
248 files changed, 62631 insertions, 2332 deletions
diff --git a/src/lib/libcrypto/camellia/Makefile b/src/lib/libcrypto/camellia/Makefile
index ff5fe4a01d..6ce6fc99cd 100644
--- a/src/lib/libcrypto/camellia/Makefile
+++ b/src/lib/libcrypto/camellia/Makefile
@@ -23,9 +23,9 @@ APPS=
23 23
24LIB=$(TOP)/libcrypto.a 24LIB=$(TOP)/libcrypto.a
25LIBSRC=camellia.c cmll_misc.c cmll_ecb.c cmll_cbc.c cmll_ofb.c \ 25LIBSRC=camellia.c cmll_misc.c cmll_ecb.c cmll_cbc.c cmll_ofb.c \
26 cmll_cfb.c cmll_ctr.c 26 cmll_cfb.c cmll_ctr.c cmll_utl.c
27 27
28LIBOBJ= cmll_ecb.o cmll_ofb.o cmll_cfb.o cmll_ctr.o $(CMLL_ENC) 28LIBOBJ= cmll_ecb.o cmll_ofb.o cmll_cfb.o cmll_ctr.o cmll_utl.o $(CMLL_ENC)
29 29
30SRC= $(LIBSRC) 30SRC= $(LIBSRC)
31 31
@@ -96,8 +96,15 @@ cmll_ctr.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h
96cmll_ctr.o: ../../include/openssl/opensslconf.h cmll_ctr.c 96cmll_ctr.o: ../../include/openssl/opensslconf.h cmll_ctr.c
97cmll_ecb.o: ../../include/openssl/camellia.h 97cmll_ecb.o: ../../include/openssl/camellia.h
98cmll_ecb.o: ../../include/openssl/opensslconf.h cmll_ecb.c cmll_locl.h 98cmll_ecb.o: ../../include/openssl/opensslconf.h cmll_ecb.c cmll_locl.h
99cmll_misc.o: ../../include/openssl/camellia.h 99cmll_misc.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h
100cmll_misc.o: ../../include/openssl/opensslconf.h 100cmll_misc.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
101cmll_misc.o: ../../include/openssl/opensslv.h cmll_locl.h cmll_misc.c 101cmll_misc.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
102cmll_misc.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
103cmll_misc.o: ../../include/openssl/symhacks.h cmll_locl.h cmll_misc.c
102cmll_ofb.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h 104cmll_ofb.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h
103cmll_ofb.o: ../../include/openssl/opensslconf.h cmll_ofb.c 105cmll_ofb.o: ../../include/openssl/opensslconf.h cmll_ofb.c
106cmll_utl.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h
107cmll_utl.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
108cmll_utl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
109cmll_utl.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
110cmll_utl.o: ../../include/openssl/symhacks.h cmll_locl.h cmll_utl.c
diff --git a/src/lib/libcrypto/camellia/cmll_utl.c b/src/lib/libcrypto/camellia/cmll_utl.c
new file mode 100644
index 0000000000..7a35711ec1
--- /dev/null
+++ b/src/lib/libcrypto/camellia/cmll_utl.c
@@ -0,0 +1,64 @@
1/* crypto/camellia/cmll_utl.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/crypto.h>
54#include <openssl/camellia.h>
55#include "cmll_locl.h"
56
57int Camellia_set_key(const unsigned char *userKey, const int bits,
58 CAMELLIA_KEY *key)
59 {
60#ifdef OPENSSL_FIPS
61 fips_cipher_abort(Camellia);
62#endif
63 return private_Camellia_set_key(userKey, bits, key);
64 }
diff --git a/src/lib/libcrypto/cmac/Makefile b/src/lib/libcrypto/cmac/Makefile
new file mode 100644
index 0000000000..54e7cc39d5
--- /dev/null
+++ b/src/lib/libcrypto/cmac/Makefile
@@ -0,0 +1,111 @@
1#
2# OpenSSL/crypto/cmac/Makefile
3#
4
5DIR= cmac
6TOP= ../..
7CC= cc
8INCLUDES=
9CFLAG=-g
10MAKEFILE= Makefile
11AR= ar r
12
13CFLAGS= $(INCLUDES) $(CFLAG)
14
15GENERAL=Makefile
16TEST=
17APPS=
18
19LIB=$(TOP)/libcrypto.a
20LIBSRC=cmac.c cm_ameth.c cm_pmeth.c
21LIBOBJ=cmac.o cm_ameth.o cm_pmeth.o
22
23SRC= $(LIBSRC)
24
25EXHEADER= cmac.h
26HEADER= $(EXHEADER)
27
28ALL= $(GENERAL) $(SRC) $(HEADER)
29
30top:
31 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
32
33all: lib
34
35lib: $(LIBOBJ)
36 $(AR) $(LIB) $(LIBOBJ)
37 $(RANLIB) $(LIB) || echo Never mind.
38 @touch lib
39
40files:
41 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
42
43links:
44 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
45 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
46 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
47
48install:
49 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
50 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
51 do \
52 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
53 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
54 done;
55
56tags:
57 ctags $(SRC)
58
59tests:
60
61lint:
62 lint -DLINT $(INCLUDES) $(SRC)>fluff
63
64depend:
65 @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
66 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
67
68dclean:
69 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
70 mv -f Makefile.new $(MAKEFILE)
71
72clean:
73 rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
74
75# DO NOT DELETE THIS LINE -- make depend depends on it.
76
77cm_ameth.o: ../../e_os.h ../../include/openssl/asn1.h
78cm_ameth.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
79cm_ameth.o: ../../include/openssl/cmac.h ../../include/openssl/crypto.h
80cm_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
81cm_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
82cm_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
83cm_ameth.o: ../../include/openssl/opensslconf.h
84cm_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
85cm_ameth.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
86cm_ameth.o: ../../include/openssl/symhacks.h ../asn1/asn1_locl.h ../cryptlib.h
87cm_ameth.o: cm_ameth.c
88cm_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h
89cm_pmeth.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
90cm_pmeth.o: ../../include/openssl/cmac.h ../../include/openssl/conf.h
91cm_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
92cm_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
93cm_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
94cm_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
95cm_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
96cm_pmeth.o: ../../include/openssl/opensslconf.h
97cm_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
98cm_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
99cm_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
100cm_pmeth.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
101cm_pmeth.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
102cm_pmeth.o: ../cryptlib.h ../evp/evp_locl.h cm_pmeth.c
103cmac.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
104cmac.o: ../../include/openssl/buffer.h ../../include/openssl/cmac.h
105cmac.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
106cmac.o: ../../include/openssl/err.h ../../include/openssl/evp.h
107cmac.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
108cmac.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
109cmac.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
110cmac.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
111cmac.o: ../../include/openssl/symhacks.h ../cryptlib.h cmac.c
diff --git a/src/lib/libcrypto/cms/Makefile b/src/lib/libcrypto/cms/Makefile
index 5837049725..9820adb212 100644
--- a/src/lib/libcrypto/cms/Makefile
+++ b/src/lib/libcrypto/cms/Makefile
@@ -18,9 +18,11 @@ APPS=
18 18
19LIB=$(TOP)/libcrypto.a 19LIB=$(TOP)/libcrypto.a
20LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \ 20LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
21 cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c 21 cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \
22 cms_pwri.c
22LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \ 23LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \
23 cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o 24 cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o \
25 cms_pwri.o
24 26
25SRC= $(LIBSRC) 27SRC= $(LIBSRC)
26 28
@@ -230,6 +232,24 @@ cms_lib.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
230cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 232cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
231cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h 233cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h
232cms_lib.o: cms_lcl.h cms_lib.c 234cms_lib.o: cms_lcl.h cms_lib.c
235cms_pwri.o: ../../e_os.h ../../include/openssl/aes.h
236cms_pwri.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
237cms_pwri.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
238cms_pwri.o: ../../include/openssl/cms.h ../../include/openssl/conf.h
239cms_pwri.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
240cms_pwri.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
241cms_pwri.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
242cms_pwri.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
243cms_pwri.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
244cms_pwri.o: ../../include/openssl/opensslconf.h
245cms_pwri.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
246cms_pwri.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h
247cms_pwri.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
248cms_pwri.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
249cms_pwri.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
250cms_pwri.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
251cms_pwri.o: ../../include/openssl/x509v3.h ../asn1/asn1_locl.h ../cryptlib.h
252cms_pwri.o: cms_lcl.h cms_pwri.c
233cms_sd.o: ../../e_os.h ../../include/openssl/asn1.h 253cms_sd.o: ../../e_os.h ../../include/openssl/asn1.h
234cms_sd.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h 254cms_sd.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
235cms_sd.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h 255cms_sd.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
diff --git a/src/lib/libcrypto/ecdh/ecdhtest.c b/src/lib/libcrypto/ecdh/ecdhtest.c
index 212a87efa4..823d7baa65 100644
--- a/src/lib/libcrypto/ecdh/ecdhtest.c
+++ b/src/lib/libcrypto/ecdh/ecdhtest.c
@@ -158,11 +158,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out)
158 if (!EC_POINT_get_affine_coordinates_GFp(group, 158 if (!EC_POINT_get_affine_coordinates_GFp(group,
159 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; 159 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err;
160 } 160 }
161#ifndef OPENSSL_NO_EC2M
161 else 162 else
162 { 163 {
163 if (!EC_POINT_get_affine_coordinates_GF2m(group, 164 if (!EC_POINT_get_affine_coordinates_GF2m(group,
164 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; 165 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err;
165 } 166 }
167#endif
166#ifdef NOISY 168#ifdef NOISY
167 BIO_puts(out," pri 1="); 169 BIO_puts(out," pri 1=");
168 BN_print(out,a->priv_key); 170 BN_print(out,a->priv_key);
@@ -183,11 +185,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out)
183 if (!EC_POINT_get_affine_coordinates_GFp(group, 185 if (!EC_POINT_get_affine_coordinates_GFp(group,
184 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; 186 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err;
185 } 187 }
188#ifndef OPENSSL_NO_EC2M
186 else 189 else
187 { 190 {
188 if (!EC_POINT_get_affine_coordinates_GF2m(group, 191 if (!EC_POINT_get_affine_coordinates_GF2m(group,
189 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; 192 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err;
190 } 193 }
194#endif
191 195
192#ifdef NOISY 196#ifdef NOISY
193 BIO_puts(out," pri 2="); 197 BIO_puts(out," pri 2=");
@@ -324,6 +328,7 @@ int main(int argc, char *argv[])
324 if (!test_ecdh_curve(NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) goto err; 328 if (!test_ecdh_curve(NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) goto err;
325 if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) goto err; 329 if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) goto err;
326 if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) goto err; 330 if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) goto err;
331#ifndef OPENSSL_NO_EC2M
327 /* NIST BINARY CURVES TESTS */ 332 /* NIST BINARY CURVES TESTS */
328 if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) goto err; 333 if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) goto err;
329 if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) goto err; 334 if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) goto err;
@@ -335,6 +340,7 @@ int main(int argc, char *argv[])
335 if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) goto err; 340 if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) goto err;
336 if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) goto err; 341 if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) goto err;
337 if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err; 342 if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err;
343#endif
338 344
339 ret = 0; 345 ret = 0;
340 346
diff --git a/src/lib/libcrypto/ecdh/ech_ossl.c b/src/lib/libcrypto/ecdh/ech_ossl.c
index 2a40ff12df..4a30628fbc 100644
--- a/src/lib/libcrypto/ecdh/ech_ossl.c
+++ b/src/lib/libcrypto/ecdh/ech_ossl.c
@@ -157,6 +157,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key,
157 goto err; 157 goto err;
158 } 158 }
159 } 159 }
160#ifndef OPENSSL_NO_EC2M
160 else 161 else
161 { 162 {
162 if (!EC_POINT_get_affine_coordinates_GF2m(group, tmp, x, y, ctx)) 163 if (!EC_POINT_get_affine_coordinates_GF2m(group, tmp, x, y, ctx))
@@ -165,6 +166,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key,
165 goto err; 166 goto err;
166 } 167 }
167 } 168 }
169#endif
168 170
169 buflen = (EC_GROUP_get_degree(group) + 7)/8; 171 buflen = (EC_GROUP_get_degree(group) + 7)/8;
170 len = BN_num_bytes(x); 172 len = BN_num_bytes(x);
diff --git a/src/lib/libcrypto/ecdsa/ecdsatest.c b/src/lib/libcrypto/ecdsa/ecdsatest.c
index 54cfb8c753..537bb30362 100644
--- a/src/lib/libcrypto/ecdsa/ecdsatest.c
+++ b/src/lib/libcrypto/ecdsa/ecdsatest.c
@@ -262,6 +262,7 @@ int x9_62_tests(BIO *out)
262 "3238135532097973577080787768312505059318910517550078427819" 262 "3238135532097973577080787768312505059318910517550078427819"
263 "78505179448783")) 263 "78505179448783"))
264 goto x962_err; 264 goto x962_err;
265#ifndef OPENSSL_NO_EC2M
265 if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1, 266 if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1,
266 "87194383164871543355722284926904419997237591535066528048", 267 "87194383164871543355722284926904419997237591535066528048",
267 "308992691965804947361541664549085895292153777025772063598")) 268 "308992691965804947361541664549085895292153777025772063598"))
@@ -272,7 +273,7 @@ int x9_62_tests(BIO *out)
272 "1970303740007316867383349976549972270528498040721988191026" 273 "1970303740007316867383349976549972270528498040721988191026"
273 "49413465737174")) 274 "49413465737174"))
274 goto x962_err; 275 goto x962_err;
275 276#endif
276 ret = 1; 277 ret = 1;
277x962_err: 278x962_err:
278 if (!restore_rand()) 279 if (!restore_rand())
@@ -289,7 +290,8 @@ int test_builtin(BIO *out)
289 ECDSA_SIG *ecdsa_sig = NULL; 290 ECDSA_SIG *ecdsa_sig = NULL;
290 unsigned char digest[20], wrong_digest[20]; 291 unsigned char digest[20], wrong_digest[20];
291 unsigned char *signature = NULL; 292 unsigned char *signature = NULL;
292 unsigned char *sig_ptr; 293 const unsigned char *sig_ptr;
294 unsigned char *sig_ptr2;
293 unsigned char *raw_buf = NULL; 295 unsigned char *raw_buf = NULL;
294 unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len; 296 unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len;
295 int nid, ret = 0; 297 int nid, ret = 0;
@@ -464,8 +466,8 @@ int test_builtin(BIO *out)
464 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) 466 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL))
465 goto builtin_err; 467 goto builtin_err;
466 468
467 sig_ptr = signature; 469 sig_ptr2 = signature;
468 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr); 470 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2);
469 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1) 471 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1)
470 { 472 {
471 BIO_printf(out, " failed\n"); 473 BIO_printf(out, " failed\n");
@@ -477,8 +479,8 @@ int test_builtin(BIO *out)
477 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) 479 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL))
478 goto builtin_err; 480 goto builtin_err;
479 481
480 sig_ptr = signature; 482 sig_ptr2 = signature;
481 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr); 483 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2);
482 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1) 484 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1)
483 { 485 {
484 BIO_printf(out, " failed\n"); 486 BIO_printf(out, " failed\n");
diff --git a/src/lib/libcrypto/engine/eng_rdrand.c b/src/lib/libcrypto/engine/eng_rdrand.c
new file mode 100644
index 0000000000..a9ba5ae6f9
--- /dev/null
+++ b/src/lib/libcrypto/engine/eng_rdrand.c
@@ -0,0 +1,142 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * licensing@OpenSSL.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/opensslconf.h>
51
52#include <stdio.h>
53#include <string.h>
54#include <openssl/engine.h>
55#include <openssl/rand.h>
56#include <openssl/err.h>
57
58#if (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
59 defined(__x86_64) || defined(__x86_64__) || \
60 defined(_M_AMD64) || defined (_M_X64)) && defined(OPENSSL_CPUID_OBJ)
61
62size_t OPENSSL_ia32_rdrand(void);
63
64static int get_random_bytes (unsigned char *buf, int num)
65 {
66 size_t rnd;
67
68 while (num>=(int)sizeof(size_t)) {
69 if ((rnd = OPENSSL_ia32_rdrand()) == 0) return 0;
70
71 *((size_t *)buf) = rnd;
72 buf += sizeof(size_t);
73 num -= sizeof(size_t);
74 }
75 if (num) {
76 if ((rnd = OPENSSL_ia32_rdrand()) == 0) return 0;
77
78 memcpy (buf,&rnd,num);
79 }
80
81 return 1;
82 }
83
84static int random_status (void)
85{ return 1; }
86
87static RAND_METHOD rdrand_meth =
88 {
89 NULL, /* seed */
90 get_random_bytes,
91 NULL, /* cleanup */
92 NULL, /* add */
93 get_random_bytes,
94 random_status,
95 };
96
97static int rdrand_init(ENGINE *e)
98{ return 1; }
99
100static const char *engine_e_rdrand_id = "rdrand";
101static const char *engine_e_rdrand_name = "Intel RDRAND engine";
102
103static int bind_helper(ENGINE *e)
104 {
105 if (!ENGINE_set_id(e, engine_e_rdrand_id) ||
106 !ENGINE_set_name(e, engine_e_rdrand_name) ||
107 !ENGINE_set_init_function(e, rdrand_init) ||
108 !ENGINE_set_RAND(e, &rdrand_meth) )
109 return 0;
110
111 return 1;
112 }
113
114static ENGINE *ENGINE_rdrand(void)
115 {
116 ENGINE *ret = ENGINE_new();
117 if(!ret)
118 return NULL;
119 if(!bind_helper(ret))
120 {
121 ENGINE_free(ret);
122 return NULL;
123 }
124 return ret;
125 }
126
127void ENGINE_load_rdrand (void)
128 {
129 extern unsigned int OPENSSL_ia32cap_P[];
130
131 if (OPENSSL_ia32cap_P[1] & (1<<(62-32)))
132 {
133 ENGINE *toadd = ENGINE_rdrand();
134 if(!toadd) return;
135 ENGINE_add(toadd);
136 ENGINE_free(toadd);
137 ERR_clear_error();
138 }
139 }
140#else
141void ENGINE_load_rdrand (void) {}
142#endif
diff --git a/src/lib/libcrypto/engine/eng_rsax.c b/src/lib/libcrypto/engine/eng_rsax.c
new file mode 100644
index 0000000000..96e63477ee
--- /dev/null
+++ b/src/lib/libcrypto/engine/eng_rsax.c
@@ -0,0 +1,668 @@
1/* crypto/engine/eng_rsax.c */
2/* Copyright (c) 2010-2010 Intel Corp.
3 * Author: Vinodh.Gopal@intel.com
4 * Jim Guilford
5 * Erdinc.Ozturk@intel.com
6 * Maxim.Perminov@intel.com
7 * Ying.Huang@intel.com
8 *
9 * More information about algorithm used can be found at:
10 * http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
11 */
12/* ====================================================================
13 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 *
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
25 * distribution.
26 *
27 * 3. All advertising materials mentioning features or use of this
28 * software must display the following acknowledgment:
29 * "This product includes software developed by the OpenSSL Project
30 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 *
32 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
33 * endorse or promote products derived from this software without
34 * prior written permission. For written permission, please contact
35 * licensing@OpenSSL.org.
36 *
37 * 5. Products derived from this software may not be called "OpenSSL"
38 * nor may "OpenSSL" appear in their names without prior written
39 * permission of the OpenSSL Project.
40 *
41 * 6. Redistributions of any form whatsoever must retain the following
42 * acknowledgment:
43 * "This product includes software developed by the OpenSSL Project
44 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
47 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
49 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
50 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
55 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
57 * OF THE POSSIBILITY OF SUCH DAMAGE.
58 * ====================================================================
59 *
60 * This product includes cryptographic software written by Eric Young
61 * (eay@cryptsoft.com). This product includes software written by Tim
62 * Hudson (tjh@cryptsoft.com).
63 */
64
65#include <openssl/opensslconf.h>
66
67#include <stdio.h>
68#include <string.h>
69#include <openssl/crypto.h>
70#include <openssl/buffer.h>
71#include <openssl/engine.h>
72#ifndef OPENSSL_NO_RSA
73#include <openssl/rsa.h>
74#endif
75#include <openssl/bn.h>
76#include <openssl/err.h>
77
78/* RSAX is available **ONLY* on x86_64 CPUs */
79#undef COMPILE_RSAX
80
81#if (defined(__x86_64) || defined(__x86_64__) || \
82 defined(_M_AMD64) || defined (_M_X64)) && !defined(OPENSSL_NO_ASM)
83#define COMPILE_RSAX
84static ENGINE *ENGINE_rsax (void);
85#endif
86
87void ENGINE_load_rsax (void)
88 {
89/* On non-x86 CPUs it just returns. */
90#ifdef COMPILE_RSAX
91 ENGINE *toadd = ENGINE_rsax();
92 if(!toadd) return;
93 ENGINE_add(toadd);
94 ENGINE_free(toadd);
95 ERR_clear_error();
96#endif
97 }
98
99#ifdef COMPILE_RSAX
100#define E_RSAX_LIB_NAME "rsax engine"
101
102static int e_rsax_destroy(ENGINE *e);
103static int e_rsax_init(ENGINE *e);
104static int e_rsax_finish(ENGINE *e);
105static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void));
106
107#ifndef OPENSSL_NO_RSA
108/* RSA stuff */
109static int e_rsax_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx);
110static int e_rsax_rsa_finish(RSA *r);
111#endif
112
113static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = {
114 {0, NULL, NULL, 0}
115 };
116
117#ifndef OPENSSL_NO_RSA
118/* Our internal RSA_METHOD that we provide pointers to */
119static RSA_METHOD e_rsax_rsa =
120 {
121 "Intel RSA-X method",
122 NULL,
123 NULL,
124 NULL,
125 NULL,
126 e_rsax_rsa_mod_exp,
127 NULL,
128 NULL,
129 e_rsax_rsa_finish,
130 RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE,
131 NULL,
132 NULL,
133 NULL
134 };
135#endif
136
137/* Constants used when creating the ENGINE */
138static const char *engine_e_rsax_id = "rsax";
139static const char *engine_e_rsax_name = "RSAX engine support";
140
141/* This internal function is used by ENGINE_rsax() */
142static int bind_helper(ENGINE *e)
143 {
144#ifndef OPENSSL_NO_RSA
145 const RSA_METHOD *meth1;
146#endif
147 if(!ENGINE_set_id(e, engine_e_rsax_id) ||
148 !ENGINE_set_name(e, engine_e_rsax_name) ||
149#ifndef OPENSSL_NO_RSA
150 !ENGINE_set_RSA(e, &e_rsax_rsa) ||
151#endif
152 !ENGINE_set_destroy_function(e, e_rsax_destroy) ||
153 !ENGINE_set_init_function(e, e_rsax_init) ||
154 !ENGINE_set_finish_function(e, e_rsax_finish) ||
155 !ENGINE_set_ctrl_function(e, e_rsax_ctrl) ||
156 !ENGINE_set_cmd_defns(e, e_rsax_cmd_defns))
157 return 0;
158
159#ifndef OPENSSL_NO_RSA
160 meth1 = RSA_PKCS1_SSLeay();
161 e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc;
162 e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec;
163 e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc;
164 e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec;
165 e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp;
166#endif
167 return 1;
168 }
169
170static ENGINE *ENGINE_rsax(void)
171 {
172 ENGINE *ret = ENGINE_new();
173 if(!ret)
174 return NULL;
175 if(!bind_helper(ret))
176 {
177 ENGINE_free(ret);
178 return NULL;
179 }
180 return ret;
181 }
182
183#ifndef OPENSSL_NO_RSA
184/* Used to attach our own key-data to an RSA structure */
185static int rsax_ex_data_idx = -1;
186#endif
187
188static int e_rsax_destroy(ENGINE *e)
189 {
190 return 1;
191 }
192
193/* (de)initialisation functions. */
194static int e_rsax_init(ENGINE *e)
195 {
196#ifndef OPENSSL_NO_RSA
197 if (rsax_ex_data_idx == -1)
198 rsax_ex_data_idx = RSA_get_ex_new_index(0,
199 NULL,
200 NULL, NULL, NULL);
201#endif
202 if (rsax_ex_data_idx == -1)
203 return 0;
204 return 1;
205 }
206
207static int e_rsax_finish(ENGINE *e)
208 {
209 return 1;
210 }
211
212static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void))
213 {
214 int to_return = 1;
215
216 switch(cmd)
217 {
218 /* The command isn't understood by this engine */
219 default:
220 to_return = 0;
221 break;
222 }
223
224 return to_return;
225 }
226
227
228#ifndef OPENSSL_NO_RSA
229
230#ifdef _WIN32
231typedef unsigned __int64 UINT64;
232#else
233typedef unsigned long long UINT64;
234#endif
235typedef unsigned short UINT16;
236
237/* Table t is interleaved in the following manner:
238 * The order in memory is t[0][0], t[0][1], ..., t[0][7], t[1][0], ...
239 * A particular 512-bit value is stored in t[][index] rather than the more
240 * normal t[index][]; i.e. the qwords of a particular entry in t are not
241 * adjacent in memory
242 */
243
244/* Init BIGNUM b from the interleaved UINT64 array */
245static int interleaved_array_to_bn_512(BIGNUM* b, UINT64 *array);
246
247/* Extract array elements from BIGNUM b
248 * To set the whole array from b, call with n=8
249 */
250static int bn_extract_to_array_512(const BIGNUM* b, unsigned int n, UINT64 *array);
251
252struct mod_ctx_512 {
253 UINT64 t[8][8];
254 UINT64 m[8];
255 UINT64 m1[8]; /* 2^278 % m */
256 UINT64 m2[8]; /* 2^640 % m */
257 UINT64 k1[2]; /* (- 1/m) % 2^128 */
258};
259
260static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data);
261
262void mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
263 UINT64 *g, /* 512 bits, 8 qwords */
264 UINT64 *exp, /* 512 bits, 8 qwords */
265 struct mod_ctx_512 *data);
266
267typedef struct st_e_rsax_mod_ctx
268{
269 UINT64 type;
270 union {
271 struct mod_ctx_512 b512;
272 } ctx;
273
274} E_RSAX_MOD_CTX;
275
276static E_RSAX_MOD_CTX *e_rsax_get_ctx(RSA *rsa, int idx, BIGNUM* m)
277{
278 E_RSAX_MOD_CTX *hptr;
279
280 if (idx < 0 || idx > 2)
281 return NULL;
282
283 hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
284 if (!hptr) {
285 hptr = OPENSSL_malloc(3*sizeof(E_RSAX_MOD_CTX));
286 if (!hptr) return NULL;
287 hptr[2].type = hptr[1].type= hptr[0].type = 0;
288 RSA_set_ex_data(rsa, rsax_ex_data_idx, hptr);
289 }
290
291 if (hptr[idx].type == (UINT64)BN_num_bits(m))
292 return hptr+idx;
293
294 if (BN_num_bits(m) == 512) {
295 UINT64 _m[8];
296 bn_extract_to_array_512(m, 8, _m);
297 memset( &hptr[idx].ctx.b512, 0, sizeof(struct mod_ctx_512));
298 mod_exp_pre_compute_data_512(_m, &hptr[idx].ctx.b512);
299 }
300
301 hptr[idx].type = BN_num_bits(m);
302 return hptr+idx;
303}
304
305static int e_rsax_rsa_finish(RSA *rsa)
306 {
307 E_RSAX_MOD_CTX *hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
308 if(hptr)
309 {
310 OPENSSL_free(hptr);
311 RSA_set_ex_data(rsa, rsax_ex_data_idx, NULL);
312 }
313 if (rsa->_method_mod_n)
314 BN_MONT_CTX_free(rsa->_method_mod_n);
315 if (rsa->_method_mod_p)
316 BN_MONT_CTX_free(rsa->_method_mod_p);
317 if (rsa->_method_mod_q)
318 BN_MONT_CTX_free(rsa->_method_mod_q);
319 return 1;
320 }
321
322
323static int e_rsax_bn_mod_exp(BIGNUM *r, const BIGNUM *g, const BIGNUM *e,
324 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont, E_RSAX_MOD_CTX* rsax_mod_ctx )
325{
326 if (rsax_mod_ctx && BN_get_flags(e, BN_FLG_CONSTTIME) != 0) {
327 if (BN_num_bits(m) == 512) {
328 UINT64 _r[8];
329 UINT64 _g[8];
330 UINT64 _e[8];
331
332 /* Init the arrays from the BIGNUMs */
333 bn_extract_to_array_512(g, 8, _g);
334 bn_extract_to_array_512(e, 8, _e);
335
336 mod_exp_512(_r, _g, _e, &rsax_mod_ctx->ctx.b512);
337 /* Return the result in the BIGNUM */
338 interleaved_array_to_bn_512(r, _r);
339 return 1;
340 }
341 }
342
343 return BN_mod_exp_mont(r, g, e, m, ctx, in_mont);
344}
345
346/* Declares for the Intel CIAP 512-bit / CRT / 1024 bit RSA modular
347 * exponentiation routine precalculations and a structure to hold the
348 * necessary values. These files are meant to live in crypto/rsa/ in
349 * the target openssl.
350 */
351
352/*
353 * Local method: extracts a piece from a BIGNUM, to fit it into
354 * an array. Call with n=8 to extract an entire 512-bit BIGNUM
355 */
356static int bn_extract_to_array_512(const BIGNUM* b, unsigned int n, UINT64 *array)
357{
358 int i;
359 UINT64 tmp;
360 unsigned char bn_buff[64];
361 memset(bn_buff, 0, 64);
362 if (BN_num_bytes(b) > 64) {
363 printf ("Can't support this byte size\n");
364 return 0; }
365 if (BN_num_bytes(b)!=0) {
366 if (!BN_bn2bin(b, bn_buff+(64-BN_num_bytes(b)))) {
367 printf ("Error's in bn2bin\n");
368 /* We have to error, here */
369 return 0; } }
370 while (n-- > 0) {
371 array[n] = 0;
372 for (i=7; i>=0; i--) {
373 tmp = bn_buff[63-(n*8+i)];
374 array[n] |= tmp << (8*i); } }
375 return 1;
376}
377
378/* Init a 512-bit BIGNUM from the UINT64*_ (8 * 64) interleaved array */
379static int interleaved_array_to_bn_512(BIGNUM* b, UINT64 *array)
380{
381 unsigned char tmp[64];
382 int n=8;
383 int i;
384 while (n-- > 0) {
385 for (i = 7; i>=0; i--) {
386 tmp[63-(n*8+i)] = (unsigned char)(array[n]>>(8*i)); } }
387 BN_bin2bn(tmp, 64, b);
388 return 0;
389}
390
391
392/* The main 512bit precompute call */
393static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data)
394 {
395 BIGNUM two_768, two_640, two_128, two_512, tmp, _m, tmp2;
396
397 /* We need a BN_CTX for the modulo functions */
398 BN_CTX* ctx;
399 /* Some tmps */
400 UINT64 _t[8];
401 int i, j, ret = 0;
402
403 /* Init _m with m */
404 BN_init(&_m);
405 interleaved_array_to_bn_512(&_m, m);
406 memset(_t, 0, 64);
407
408 /* Inits */
409 BN_init(&two_768);
410 BN_init(&two_640);
411 BN_init(&two_128);
412 BN_init(&two_512);
413 BN_init(&tmp);
414 BN_init(&tmp2);
415
416 /* Create our context */
417 if ((ctx=BN_CTX_new()) == NULL) { goto err; }
418 BN_CTX_start(ctx);
419
420 /*
421 * For production, if you care, these only need to be set once,
422 * and may be made constants.
423 */
424 BN_lshift(&two_768, BN_value_one(), 768);
425 BN_lshift(&two_640, BN_value_one(), 640);
426 BN_lshift(&two_128, BN_value_one(), 128);
427 BN_lshift(&two_512, BN_value_one(), 512);
428
429 if (0 == (m[7] & 0x8000000000000000)) {
430 exit(1);
431 }
432 if (0 == (m[0] & 0x1)) { /* Odd modulus required for Mont */
433 exit(1);
434 }
435
436 /* Precompute m1 */
437 BN_mod(&tmp, &two_768, &_m, ctx);
438 if (!bn_extract_to_array_512(&tmp, 8, &data->m1[0])) {
439 goto err; }
440
441 /* Precompute m2 */
442 BN_mod(&tmp, &two_640, &_m, ctx);
443 if (!bn_extract_to_array_512(&tmp, 8, &data->m2[0])) {
444 goto err;
445 }
446
447 /*
448 * Precompute k1, a 128b number = ((-1)* m-1 ) mod 2128; k1 should
449 * be non-negative.
450 */
451 BN_mod_inverse(&tmp, &_m, &two_128, ctx);
452 if (!BN_is_zero(&tmp)) { BN_sub(&tmp, &two_128, &tmp); }
453 if (!bn_extract_to_array_512(&tmp, 2, &data->k1[0])) {
454 goto err; }
455
456 /* Precompute t */
457 for (i=0; i<8; i++) {
458 BN_zero(&tmp);
459 if (i & 1) { BN_add(&tmp, &two_512, &tmp); }
460 if (i & 2) { BN_add(&tmp, &two_512, &tmp); }
461 if (i & 4) { BN_add(&tmp, &two_640, &tmp); }
462
463 BN_nnmod(&tmp2, &tmp, &_m, ctx);
464 if (!bn_extract_to_array_512(&tmp2, 8, _t)) {
465 goto err; }
466 for (j=0; j<8; j++) data->t[j][i] = _t[j]; }
467
468 /* Precompute m */
469 for (i=0; i<8; i++) {
470 data->m[i] = m[i]; }
471
472 ret = 1;
473
474err:
475 /* Cleanup */
476 if (ctx != NULL) {
477 BN_CTX_end(ctx); BN_CTX_free(ctx); }
478 BN_free(&two_768);
479 BN_free(&two_640);
480 BN_free(&two_128);
481 BN_free(&two_512);
482 BN_free(&tmp);
483 BN_free(&tmp2);
484 BN_free(&_m);
485
486 return ret;
487}
488
489
490static int e_rsax_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
491 {
492 BIGNUM *r1,*m1,*vrfy;
493 BIGNUM local_dmp1,local_dmq1,local_c,local_r1;
494 BIGNUM *dmp1,*dmq1,*c,*pr1;
495 int ret=0;
496
497 BN_CTX_start(ctx);
498 r1 = BN_CTX_get(ctx);
499 m1 = BN_CTX_get(ctx);
500 vrfy = BN_CTX_get(ctx);
501
502 {
503 BIGNUM local_p, local_q;
504 BIGNUM *p = NULL, *q = NULL;
505 int error = 0;
506
507 /* Make sure BN_mod_inverse in Montgomery
508 * intialization uses the BN_FLG_CONSTTIME flag
509 * (unless RSA_FLAG_NO_CONSTTIME is set)
510 */
511 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
512 {
513 BN_init(&local_p);
514 p = &local_p;
515 BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
516
517 BN_init(&local_q);
518 q = &local_q;
519 BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME);
520 }
521 else
522 {
523 p = rsa->p;
524 q = rsa->q;
525 }
526
527 if (rsa->flags & RSA_FLAG_CACHE_PRIVATE)
528 {
529 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_p, CRYPTO_LOCK_RSA, p, ctx))
530 error = 1;
531 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx))
532 error = 1;
533 }
534
535 /* clean up */
536 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
537 {
538 BN_free(&local_p);
539 BN_free(&local_q);
540 }
541 if ( error )
542 goto err;
543 }
544
545 if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
546 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx))
547 goto err;
548
549 /* compute I mod q */
550 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
551 {
552 c = &local_c;
553 BN_with_flags(c, I, BN_FLG_CONSTTIME);
554 if (!BN_mod(r1,c,rsa->q,ctx)) goto err;
555 }
556 else
557 {
558 if (!BN_mod(r1,I,rsa->q,ctx)) goto err;
559 }
560
561 /* compute r1^dmq1 mod q */
562 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
563 {
564 dmq1 = &local_dmq1;
565 BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME);
566 }
567 else
568 dmq1 = rsa->dmq1;
569
570 if (!e_rsax_bn_mod_exp(m1,r1,dmq1,rsa->q,ctx,
571 rsa->_method_mod_q, e_rsax_get_ctx(rsa, 0, rsa->q) )) goto err;
572
573 /* compute I mod p */
574 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
575 {
576 c = &local_c;
577 BN_with_flags(c, I, BN_FLG_CONSTTIME);
578 if (!BN_mod(r1,c,rsa->p,ctx)) goto err;
579 }
580 else
581 {
582 if (!BN_mod(r1,I,rsa->p,ctx)) goto err;
583 }
584
585 /* compute r1^dmp1 mod p */
586 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
587 {
588 dmp1 = &local_dmp1;
589 BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME);
590 }
591 else
592 dmp1 = rsa->dmp1;
593
594 if (!e_rsax_bn_mod_exp(r0,r1,dmp1,rsa->p,ctx,
595 rsa->_method_mod_p, e_rsax_get_ctx(rsa, 1, rsa->p) )) goto err;
596
597 if (!BN_sub(r0,r0,m1)) goto err;
598 /* This will help stop the size of r0 increasing, which does
599 * affect the multiply if it optimised for a power of 2 size */
600 if (BN_is_negative(r0))
601 if (!BN_add(r0,r0,rsa->p)) goto err;
602
603 if (!BN_mul(r1,r0,rsa->iqmp,ctx)) goto err;
604
605 /* Turn BN_FLG_CONSTTIME flag on before division operation */
606 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
607 {
608 pr1 = &local_r1;
609 BN_with_flags(pr1, r1, BN_FLG_CONSTTIME);
610 }
611 else
612 pr1 = r1;
613 if (!BN_mod(r0,pr1,rsa->p,ctx)) goto err;
614
615 /* If p < q it is occasionally possible for the correction of
616 * adding 'p' if r0 is negative above to leave the result still
617 * negative. This can break the private key operations: the following
618 * second correction should *always* correct this rare occurrence.
619 * This will *never* happen with OpenSSL generated keys because
620 * they ensure p > q [steve]
621 */
622 if (BN_is_negative(r0))
623 if (!BN_add(r0,r0,rsa->p)) goto err;
624 if (!BN_mul(r1,r0,rsa->q,ctx)) goto err;
625 if (!BN_add(r0,r1,m1)) goto err;
626
627 if (rsa->e && rsa->n)
628 {
629 if (!e_rsax_bn_mod_exp(vrfy,r0,rsa->e,rsa->n,ctx,rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, rsa->n) ))
630 goto err;
631
632 /* If 'I' was greater than (or equal to) rsa->n, the operation
633 * will be equivalent to using 'I mod n'. However, the result of
634 * the verify will *always* be less than 'n' so we don't check
635 * for absolute equality, just congruency. */
636 if (!BN_sub(vrfy, vrfy, I)) goto err;
637 if (!BN_mod(vrfy, vrfy, rsa->n, ctx)) goto err;
638 if (BN_is_negative(vrfy))
639 if (!BN_add(vrfy, vrfy, rsa->n)) goto err;
640 if (!BN_is_zero(vrfy))
641 {
642 /* 'I' and 'vrfy' aren't congruent mod n. Don't leak
643 * miscalculated CRT output, just do a raw (slower)
644 * mod_exp and return that instead. */
645
646 BIGNUM local_d;
647 BIGNUM *d = NULL;
648
649 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
650 {
651 d = &local_d;
652 BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
653 }
654 else
655 d = rsa->d;
656 if (!e_rsax_bn_mod_exp(r0,I,d,rsa->n,ctx,
657 rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, rsa->n) )) goto err;
658 }
659 }
660 ret=1;
661
662err:
663 BN_CTX_end(ctx);
664
665 return ret;
666 }
667#endif /* !OPENSSL_NO_RSA */
668#endif /* !COMPILE_RSAX */
diff --git a/src/lib/libcrypto/evp/evp_fips.c b/src/lib/libcrypto/evp/evp_fips.c
new file mode 100644
index 0000000000..cb7f4fc0fa
--- /dev/null
+++ b/src/lib/libcrypto/evp/evp_fips.c
@@ -0,0 +1,113 @@
1/* crypto/evp/evp_fips.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54
55#include <openssl/evp.h>
56
57#ifdef OPENSSL_FIPS
58#include <openssl/fips.h>
59
60const EVP_CIPHER *EVP_aes_128_cbc(void) { return FIPS_evp_aes_128_cbc(); }
61const EVP_CIPHER *EVP_aes_128_ccm(void) { return FIPS_evp_aes_128_ccm(); }
62const EVP_CIPHER *EVP_aes_128_cfb1(void) { return FIPS_evp_aes_128_cfb1(); }
63const EVP_CIPHER *EVP_aes_128_cfb128(void) { return FIPS_evp_aes_128_cfb128(); }
64const EVP_CIPHER *EVP_aes_128_cfb8(void) { return FIPS_evp_aes_128_cfb8(); }
65const EVP_CIPHER *EVP_aes_128_ctr(void) { return FIPS_evp_aes_128_ctr(); }
66const EVP_CIPHER *EVP_aes_128_ecb(void) { return FIPS_evp_aes_128_ecb(); }
67const EVP_CIPHER *EVP_aes_128_gcm(void) { return FIPS_evp_aes_128_gcm(); }
68const EVP_CIPHER *EVP_aes_128_ofb(void) { return FIPS_evp_aes_128_ofb(); }
69const EVP_CIPHER *EVP_aes_128_xts(void) { return FIPS_evp_aes_128_xts(); }
70const EVP_CIPHER *EVP_aes_192_cbc(void) { return FIPS_evp_aes_192_cbc(); }
71const EVP_CIPHER *EVP_aes_192_ccm(void) { return FIPS_evp_aes_192_ccm(); }
72const EVP_CIPHER *EVP_aes_192_cfb1(void) { return FIPS_evp_aes_192_cfb1(); }
73const EVP_CIPHER *EVP_aes_192_cfb128(void) { return FIPS_evp_aes_192_cfb128(); }
74const EVP_CIPHER *EVP_aes_192_cfb8(void) { return FIPS_evp_aes_192_cfb8(); }
75const EVP_CIPHER *EVP_aes_192_ctr(void) { return FIPS_evp_aes_192_ctr(); }
76const EVP_CIPHER *EVP_aes_192_ecb(void) { return FIPS_evp_aes_192_ecb(); }
77const EVP_CIPHER *EVP_aes_192_gcm(void) { return FIPS_evp_aes_192_gcm(); }
78const EVP_CIPHER *EVP_aes_192_ofb(void) { return FIPS_evp_aes_192_ofb(); }
79const EVP_CIPHER *EVP_aes_256_cbc(void) { return FIPS_evp_aes_256_cbc(); }
80const EVP_CIPHER *EVP_aes_256_ccm(void) { return FIPS_evp_aes_256_ccm(); }
81const EVP_CIPHER *EVP_aes_256_cfb1(void) { return FIPS_evp_aes_256_cfb1(); }
82const EVP_CIPHER *EVP_aes_256_cfb128(void) { return FIPS_evp_aes_256_cfb128(); }
83const EVP_CIPHER *EVP_aes_256_cfb8(void) { return FIPS_evp_aes_256_cfb8(); }
84const EVP_CIPHER *EVP_aes_256_ctr(void) { return FIPS_evp_aes_256_ctr(); }
85const EVP_CIPHER *EVP_aes_256_ecb(void) { return FIPS_evp_aes_256_ecb(); }
86const EVP_CIPHER *EVP_aes_256_gcm(void) { return FIPS_evp_aes_256_gcm(); }
87const EVP_CIPHER *EVP_aes_256_ofb(void) { return FIPS_evp_aes_256_ofb(); }
88const EVP_CIPHER *EVP_aes_256_xts(void) { return FIPS_evp_aes_256_xts(); }
89const EVP_CIPHER *EVP_des_ede(void) { return FIPS_evp_des_ede(); }
90const EVP_CIPHER *EVP_des_ede3(void) { return FIPS_evp_des_ede3(); }
91const EVP_CIPHER *EVP_des_ede3_cbc(void) { return FIPS_evp_des_ede3_cbc(); }
92const EVP_CIPHER *EVP_des_ede3_cfb1(void) { return FIPS_evp_des_ede3_cfb1(); }
93const EVP_CIPHER *EVP_des_ede3_cfb64(void) { return FIPS_evp_des_ede3_cfb64(); }
94const EVP_CIPHER *EVP_des_ede3_cfb8(void) { return FIPS_evp_des_ede3_cfb8(); }
95const EVP_CIPHER *EVP_des_ede3_ecb(void) { return FIPS_evp_des_ede3_ecb(); }
96const EVP_CIPHER *EVP_des_ede3_ofb(void) { return FIPS_evp_des_ede3_ofb(); }
97const EVP_CIPHER *EVP_des_ede_cbc(void) { return FIPS_evp_des_ede_cbc(); }
98const EVP_CIPHER *EVP_des_ede_cfb64(void) { return FIPS_evp_des_ede_cfb64(); }
99const EVP_CIPHER *EVP_des_ede_ecb(void) { return FIPS_evp_des_ede_ecb(); }
100const EVP_CIPHER *EVP_des_ede_ofb(void) { return FIPS_evp_des_ede_ofb(); }
101const EVP_CIPHER *EVP_enc_null(void) { return FIPS_evp_enc_null(); }
102
103const EVP_MD *EVP_sha1(void) { return FIPS_evp_sha1(); }
104const EVP_MD *EVP_sha224(void) { return FIPS_evp_sha224(); }
105const EVP_MD *EVP_sha256(void) { return FIPS_evp_sha256(); }
106const EVP_MD *EVP_sha384(void) { return FIPS_evp_sha384(); }
107const EVP_MD *EVP_sha512(void) { return FIPS_evp_sha512(); }
108
109const EVP_MD *EVP_dss(void) { return FIPS_evp_dss(); }
110const EVP_MD *EVP_dss1(void) { return FIPS_evp_dss1(); }
111const EVP_MD *EVP_ecdsa(void) { return FIPS_evp_ecdsa(); }
112
113#endif
diff --git a/src/lib/libcrypto/fips_err.h b/src/lib/libcrypto/fips_err.h
index b328616858..c671691b47 100644
--- a/src/lib/libcrypto/fips_err.h
+++ b/src/lib/libcrypto/fips_err.h
@@ -1,6 +1,6 @@
1/* crypto/fips_err.h */ 1/* crypto/fips_err.h */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
@@ -71,53 +71,125 @@
71static ERR_STRING_DATA FIPS_str_functs[]= 71static ERR_STRING_DATA FIPS_str_functs[]=
72 { 72 {
73{ERR_FUNC(FIPS_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"}, 73{ERR_FUNC(FIPS_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"},
74{ERR_FUNC(FIPS_F_DH_INIT), "DH_INIT"},
75{ERR_FUNC(FIPS_F_DRBG_RESEED), "DRBG_RESEED"},
74{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN), "DSA_BUILTIN_PARAMGEN"}, 76{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN), "DSA_BUILTIN_PARAMGEN"},
77{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN2), "DSA_BUILTIN_PARAMGEN2"},
75{ERR_FUNC(FIPS_F_DSA_DO_SIGN), "DSA_do_sign"}, 78{ERR_FUNC(FIPS_F_DSA_DO_SIGN), "DSA_do_sign"},
76{ERR_FUNC(FIPS_F_DSA_DO_VERIFY), "DSA_do_verify"}, 79{ERR_FUNC(FIPS_F_DSA_DO_VERIFY), "DSA_do_verify"},
77{ERR_FUNC(FIPS_F_EVP_CIPHERINIT_EX), "EVP_CipherInit_ex"},
78{ERR_FUNC(FIPS_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"},
79{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA), "FIPS_CHECK_DSA"}, 80{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA), "FIPS_CHECK_DSA"},
80{ERR_FUNC(FIPS_F_FIPS_CHECK_INCORE_FINGERPRINT), "FIPS_CHECK_INCORE_FINGERPRINT"}, 81{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA_PRNG), "fips_check_dsa_prng"},
81{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA), "FIPS_CHECK_RSA"}, 82{ERR_FUNC(FIPS_F_FIPS_CHECK_EC), "FIPS_CHECK_EC"},
82{ERR_FUNC(FIPS_F_FIPS_DSA_CHECK), "FIPS_DSA_CHECK"}, 83{ERR_FUNC(FIPS_F_FIPS_CHECK_EC_PRNG), "fips_check_ec_prng"},
83{ERR_FUNC(FIPS_F_FIPS_MODE_SET), "FIPS_mode_set"}, 84{ERR_FUNC(FIPS_F_FIPS_CHECK_INCORE_FINGERPRINT), "FIPS_check_incore_fingerprint"},
85{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA), "fips_check_rsa"},
86{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA_PRNG), "fips_check_rsa_prng"},
87{ERR_FUNC(FIPS_F_FIPS_CIPHER), "FIPS_cipher"},
88{ERR_FUNC(FIPS_F_FIPS_CIPHERINIT), "FIPS_cipherinit"},
89{ERR_FUNC(FIPS_F_FIPS_CIPHER_CTX_CTRL), "FIPS_CIPHER_CTX_CTRL"},
90{ERR_FUNC(FIPS_F_FIPS_DIGESTFINAL), "FIPS_digestfinal"},
91{ERR_FUNC(FIPS_F_FIPS_DIGESTINIT), "FIPS_digestinit"},
92{ERR_FUNC(FIPS_F_FIPS_DIGESTUPDATE), "FIPS_digestupdate"},
93{ERR_FUNC(FIPS_F_FIPS_DRBG_BYTES), "FIPS_DRBG_BYTES"},
94{ERR_FUNC(FIPS_F_FIPS_DRBG_CHECK), "FIPS_DRBG_CHECK"},
95{ERR_FUNC(FIPS_F_FIPS_DRBG_CPRNG_TEST), "FIPS_DRBG_CPRNG_TEST"},
96{ERR_FUNC(FIPS_F_FIPS_DRBG_ERROR_CHECK), "FIPS_DRBG_ERROR_CHECK"},
97{ERR_FUNC(FIPS_F_FIPS_DRBG_GENERATE), "FIPS_drbg_generate"},
98{ERR_FUNC(FIPS_F_FIPS_DRBG_INIT), "FIPS_drbg_init"},
99{ERR_FUNC(FIPS_F_FIPS_DRBG_INSTANTIATE), "FIPS_drbg_instantiate"},
100{ERR_FUNC(FIPS_F_FIPS_DRBG_NEW), "FIPS_drbg_new"},
101{ERR_FUNC(FIPS_F_FIPS_DRBG_RESEED), "FIPS_drbg_reseed"},
102{ERR_FUNC(FIPS_F_FIPS_DRBG_SINGLE_KAT), "FIPS_DRBG_SINGLE_KAT"},
103{ERR_FUNC(FIPS_F_FIPS_DSA_SIGN_DIGEST), "FIPS_dsa_sign_digest"},
104{ERR_FUNC(FIPS_F_FIPS_DSA_VERIFY_DIGEST), "FIPS_dsa_verify_digest"},
105{ERR_FUNC(FIPS_F_FIPS_GET_ENTROPY), "FIPS_GET_ENTROPY"},
106{ERR_FUNC(FIPS_F_FIPS_MODULE_MODE_SET), "FIPS_module_mode_set"},
84{ERR_FUNC(FIPS_F_FIPS_PKEY_SIGNATURE_TEST), "fips_pkey_signature_test"}, 107{ERR_FUNC(FIPS_F_FIPS_PKEY_SIGNATURE_TEST), "fips_pkey_signature_test"},
108{ERR_FUNC(FIPS_F_FIPS_RAND_ADD), "FIPS_rand_add"},
109{ERR_FUNC(FIPS_F_FIPS_RAND_BYTES), "FIPS_rand_bytes"},
110{ERR_FUNC(FIPS_F_FIPS_RAND_PSEUDO_BYTES), "FIPS_rand_pseudo_bytes"},
111{ERR_FUNC(FIPS_F_FIPS_RAND_SEED), "FIPS_rand_seed"},
112{ERR_FUNC(FIPS_F_FIPS_RAND_SET_METHOD), "FIPS_rand_set_method"},
113{ERR_FUNC(FIPS_F_FIPS_RAND_STATUS), "FIPS_rand_status"},
114{ERR_FUNC(FIPS_F_FIPS_RSA_SIGN_DIGEST), "FIPS_rsa_sign_digest"},
115{ERR_FUNC(FIPS_F_FIPS_RSA_VERIFY_DIGEST), "FIPS_rsa_verify_digest"},
85{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES), "FIPS_selftest_aes"}, 116{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES), "FIPS_selftest_aes"},
117{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_CCM), "FIPS_selftest_aes_ccm"},
118{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_GCM), "FIPS_selftest_aes_gcm"},
119{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_XTS), "FIPS_selftest_aes_xts"},
120{ERR_FUNC(FIPS_F_FIPS_SELFTEST_CMAC), "FIPS_selftest_cmac"},
86{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DES), "FIPS_selftest_des"}, 121{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DES), "FIPS_selftest_des"},
87{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DSA), "FIPS_selftest_dsa"}, 122{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DSA), "FIPS_selftest_dsa"},
123{ERR_FUNC(FIPS_F_FIPS_SELFTEST_ECDSA), "FIPS_selftest_ecdsa"},
88{ERR_FUNC(FIPS_F_FIPS_SELFTEST_HMAC), "FIPS_selftest_hmac"}, 124{ERR_FUNC(FIPS_F_FIPS_SELFTEST_HMAC), "FIPS_selftest_hmac"},
89{ERR_FUNC(FIPS_F_FIPS_SELFTEST_RNG), "FIPS_selftest_rng"},
90{ERR_FUNC(FIPS_F_FIPS_SELFTEST_SHA1), "FIPS_selftest_sha1"}, 125{ERR_FUNC(FIPS_F_FIPS_SELFTEST_SHA1), "FIPS_selftest_sha1"},
126{ERR_FUNC(FIPS_F_FIPS_SELFTEST_X931), "FIPS_selftest_x931"},
127{ERR_FUNC(FIPS_F_FIPS_SET_PRNG_KEY), "FIPS_SET_PRNG_KEY"},
91{ERR_FUNC(FIPS_F_HASH_FINAL), "HASH_FINAL"}, 128{ERR_FUNC(FIPS_F_HASH_FINAL), "HASH_FINAL"},
92{ERR_FUNC(FIPS_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"}, 129{ERR_FUNC(FIPS_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"},
130{ERR_FUNC(FIPS_F_RSA_EAY_INIT), "RSA_EAY_INIT"},
93{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"}, 131{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"},
94{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"}, 132{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"},
95{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"}, 133{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"},
96{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_ENCRYPT), "RSA_EAY_PUBLIC_ENCRYPT"}, 134{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_ENCRYPT), "RSA_EAY_PUBLIC_ENCRYPT"},
97{ERR_FUNC(FIPS_F_RSA_X931_GENERATE_KEY_EX), "RSA_X931_generate_key_ex"}, 135{ERR_FUNC(FIPS_F_RSA_X931_GENERATE_KEY_EX), "RSA_X931_generate_key_ex"},
98{ERR_FUNC(FIPS_F_SSLEAY_RAND_BYTES), "SSLEAY_RAND_BYTES"},
99{0,NULL} 136{0,NULL}
100 }; 137 };
101 138
102static ERR_STRING_DATA FIPS_str_reasons[]= 139static ERR_STRING_DATA FIPS_str_reasons[]=
103 { 140 {
104{ERR_REASON(FIPS_R_CANNOT_READ_EXE) ,"cannot read exe"}, 141{ERR_REASON(FIPS_R_ADDITIONAL_INPUT_ERROR_UNDETECTED),"additional input error undetected"},
105{ERR_REASON(FIPS_R_CANNOT_READ_EXE_DIGEST),"cannot read exe digest"}, 142{ERR_REASON(FIPS_R_ADDITIONAL_INPUT_TOO_LONG),"additional input too long"},
143{ERR_REASON(FIPS_R_ALREADY_INSTANTIATED) ,"already instantiated"},
144{ERR_REASON(FIPS_R_AUTHENTICATION_FAILURE),"authentication failure"},
106{ERR_REASON(FIPS_R_CONTRADICTING_EVIDENCE),"contradicting evidence"}, 145{ERR_REASON(FIPS_R_CONTRADICTING_EVIDENCE),"contradicting evidence"},
107{ERR_REASON(FIPS_R_EXE_DIGEST_DOES_NOT_MATCH),"exe digest does not match"}, 146{ERR_REASON(FIPS_R_DRBG_NOT_INITIALISED) ,"drbg not initialised"},
147{ERR_REASON(FIPS_R_DRBG_STUCK) ,"drbg stuck"},
148{ERR_REASON(FIPS_R_ENTROPY_ERROR_UNDETECTED),"entropy error undetected"},
149{ERR_REASON(FIPS_R_ENTROPY_NOT_REQUESTED_FOR_RESEED),"entropy not requested for reseed"},
150{ERR_REASON(FIPS_R_ENTROPY_SOURCE_STUCK) ,"entropy source stuck"},
151{ERR_REASON(FIPS_R_ERROR_INITIALISING_DRBG),"error initialising drbg"},
152{ERR_REASON(FIPS_R_ERROR_INSTANTIATING_DRBG),"error instantiating drbg"},
153{ERR_REASON(FIPS_R_ERROR_RETRIEVING_ADDITIONAL_INPUT),"error retrieving additional input"},
154{ERR_REASON(FIPS_R_ERROR_RETRIEVING_ENTROPY),"error retrieving entropy"},
155{ERR_REASON(FIPS_R_ERROR_RETRIEVING_NONCE),"error retrieving nonce"},
108{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH),"fingerprint does not match"}, 156{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH),"fingerprint does not match"},
109{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_NONPIC_RELOCATED),"fingerprint does not match nonpic relocated"}, 157{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_NONPIC_RELOCATED),"fingerprint does not match nonpic relocated"},
110{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_SEGMENT_ALIASING),"fingerprint does not match segment aliasing"}, 158{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_SEGMENT_ALIASING),"fingerprint does not match segment aliasing"},
111{ERR_REASON(FIPS_R_FIPS_MODE_ALREADY_SET),"fips mode already set"}, 159{ERR_REASON(FIPS_R_FIPS_MODE_ALREADY_SET),"fips mode already set"},
112{ERR_REASON(FIPS_R_FIPS_SELFTEST_FAILED) ,"fips selftest failed"}, 160{ERR_REASON(FIPS_R_FIPS_SELFTEST_FAILED) ,"fips selftest failed"},
161{ERR_REASON(FIPS_R_FUNCTION_ERROR) ,"function error"},
162{ERR_REASON(FIPS_R_GENERATE_ERROR) ,"generate error"},
163{ERR_REASON(FIPS_R_GENERATE_ERROR_UNDETECTED),"generate error undetected"},
164{ERR_REASON(FIPS_R_INSTANTIATE_ERROR) ,"instantiate error"},
165{ERR_REASON(FIPS_R_INSUFFICIENT_SECURITY_STRENGTH),"insufficient security strength"},
166{ERR_REASON(FIPS_R_INTERNAL_ERROR) ,"internal error"},
113{ERR_REASON(FIPS_R_INVALID_KEY_LENGTH) ,"invalid key length"}, 167{ERR_REASON(FIPS_R_INVALID_KEY_LENGTH) ,"invalid key length"},
168{ERR_REASON(FIPS_R_INVALID_PARAMETERS) ,"invalid parameters"},
169{ERR_REASON(FIPS_R_IN_ERROR_STATE) ,"in error state"},
114{ERR_REASON(FIPS_R_KEY_TOO_SHORT) ,"key too short"}, 170{ERR_REASON(FIPS_R_KEY_TOO_SHORT) ,"key too short"},
171{ERR_REASON(FIPS_R_NONCE_ERROR_UNDETECTED),"nonce error undetected"},
115{ERR_REASON(FIPS_R_NON_FIPS_METHOD) ,"non fips method"}, 172{ERR_REASON(FIPS_R_NON_FIPS_METHOD) ,"non fips method"},
173{ERR_REASON(FIPS_R_NOPR_TEST1_FAILURE) ,"nopr test1 failure"},
174{ERR_REASON(FIPS_R_NOPR_TEST2_FAILURE) ,"nopr test2 failure"},
175{ERR_REASON(FIPS_R_NOT_INSTANTIATED) ,"not instantiated"},
116{ERR_REASON(FIPS_R_PAIRWISE_TEST_FAILED) ,"pairwise test failed"}, 176{ERR_REASON(FIPS_R_PAIRWISE_TEST_FAILED) ,"pairwise test failed"},
117{ERR_REASON(FIPS_R_RSA_DECRYPT_ERROR) ,"rsa decrypt error"}, 177{ERR_REASON(FIPS_R_PERSONALISATION_ERROR_UNDETECTED),"personalisation error undetected"},
118{ERR_REASON(FIPS_R_RSA_ENCRYPT_ERROR) ,"rsa encrypt error"}, 178{ERR_REASON(FIPS_R_PERSONALISATION_STRING_TOO_LONG),"personalisation string too long"},
179{ERR_REASON(FIPS_R_PRNG_STRENGTH_TOO_LOW),"prng strength too low"},
180{ERR_REASON(FIPS_R_PR_TEST1_FAILURE) ,"pr test1 failure"},
181{ERR_REASON(FIPS_R_PR_TEST2_FAILURE) ,"pr test2 failure"},
182{ERR_REASON(FIPS_R_REQUEST_LENGTH_ERROR_UNDETECTED),"request length error undetected"},
183{ERR_REASON(FIPS_R_REQUEST_TOO_LARGE_FOR_DRBG),"request too large for drbg"},
184{ERR_REASON(FIPS_R_RESEED_COUNTER_ERROR) ,"reseed counter error"},
185{ERR_REASON(FIPS_R_RESEED_ERROR) ,"reseed error"},
119{ERR_REASON(FIPS_R_SELFTEST_FAILED) ,"selftest failed"}, 186{ERR_REASON(FIPS_R_SELFTEST_FAILED) ,"selftest failed"},
187{ERR_REASON(FIPS_R_SELFTEST_FAILURE) ,"selftest failure"},
188{ERR_REASON(FIPS_R_STRENGTH_ERROR_UNDETECTED),"strength error undetected"},
120{ERR_REASON(FIPS_R_TEST_FAILURE) ,"test failure"}, 189{ERR_REASON(FIPS_R_TEST_FAILURE) ,"test failure"},
190{ERR_REASON(FIPS_R_UNINSTANTIATE_ERROR) ,"uninstantiate error"},
191{ERR_REASON(FIPS_R_UNINSTANTIATE_ZEROISE_ERROR),"uninstantiate zeroise error"},
192{ERR_REASON(FIPS_R_UNSUPPORTED_DRBG_TYPE),"unsupported drbg type"},
121{ERR_REASON(FIPS_R_UNSUPPORTED_PLATFORM) ,"unsupported platform"}, 193{ERR_REASON(FIPS_R_UNSUPPORTED_PLATFORM) ,"unsupported platform"},
122{0,NULL} 194{0,NULL}
123 }; 195 };
diff --git a/src/lib/libcrypto/fips_ers.c b/src/lib/libcrypto/fips_ers.c
new file mode 100644
index 0000000000..09f11748f6
--- /dev/null
+++ b/src/lib/libcrypto/fips_ers.c
@@ -0,0 +1,7 @@
1#include <openssl/opensslconf.h>
2
3#ifdef OPENSSL_FIPS
4# include "fips_err.h"
5#else
6static void *dummy=&dummy;
7#endif
diff --git a/src/lib/libcrypto/idea/idea_spd.c b/src/lib/libcrypto/idea/idea_spd.c
new file mode 100644
index 0000000000..699353e871
--- /dev/null
+++ b/src/lib/libcrypto/idea/idea_spd.c
@@ -0,0 +1,299 @@
1/* crypto/idea/idea_spd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/* 11-Sep-92 Andrew Daviel Support for Silicon Graphics IRIX added */
60/* 06-Apr-92 Luke Brennan Support for VMS and add extra signal calls */
61
62#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
63#define TIMES
64#endif
65
66#include <stdio.h>
67
68#include <openssl/e_os2.h>
69#include OPENSSL_UNISTD_IO
70OPENSSL_DECLARE_EXIT
71
72#ifndef OPENSSL_SYS_NETWARE
73#include <signal.h>
74#endif
75
76#ifndef _IRIX
77#include <time.h>
78#endif
79#ifdef TIMES
80#include <sys/types.h>
81#include <sys/times.h>
82#endif
83
84/* Depending on the VMS version, the tms structure is perhaps defined.
85 The __TMS macro will show if it was. If it wasn't defined, we should
86 undefine TIMES, since that tells the rest of the program how things
87 should be handled. -- Richard Levitte */
88#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
89#undef TIMES
90#endif
91
92#ifndef TIMES
93#include <sys/timeb.h>
94#endif
95
96#if defined(sun) || defined(__ultrix)
97#define _POSIX_SOURCE
98#include <limits.h>
99#include <sys/param.h>
100#endif
101
102#include <openssl/idea.h>
103
104/* The following if from times(3) man page. It may need to be changed */
105#ifndef HZ
106#ifndef CLK_TCK
107#define HZ 100.0
108#else /* CLK_TCK */
109#define HZ ((double)CLK_TCK)
110#endif
111#endif
112
113#define BUFSIZE ((long)1024)
114long run=0;
115
116double Time_F(int s);
117#ifdef SIGALRM
118#if defined(__STDC__) || defined(sgi) || defined(_AIX)
119#define SIGRETTYPE void
120#else
121#define SIGRETTYPE int
122#endif
123
124SIGRETTYPE sig_done(int sig);
125SIGRETTYPE sig_done(int sig)
126 {
127 signal(SIGALRM,sig_done);
128 run=0;
129#ifdef LINT
130 sig=sig;
131#endif
132 }
133#endif
134
135#define START 0
136#define STOP 1
137
138double Time_F(int s)
139 {
140 double ret;
141#ifdef TIMES
142 static struct tms tstart,tend;
143
144 if (s == START)
145 {
146 times(&tstart);
147 return(0);
148 }
149 else
150 {
151 times(&tend);
152 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
153 return((ret == 0.0)?1e-6:ret);
154 }
155#else /* !times() */
156 static struct timeb tstart,tend;
157 long i;
158
159 if (s == START)
160 {
161 ftime(&tstart);
162 return(0);
163 }
164 else
165 {
166 ftime(&tend);
167 i=(long)tend.millitm-(long)tstart.millitm;
168 ret=((double)(tend.time-tstart.time))+((double)i)/1e3;
169 return((ret == 0.0)?1e-6:ret);
170 }
171#endif
172 }
173
174int main(int argc, char **argv)
175 {
176 long count;
177 static unsigned char buf[BUFSIZE];
178 static unsigned char key[] ={
179 0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,
180 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10,
181 };
182 IDEA_KEY_SCHEDULE sch;
183 double a,aa,b,c,d;
184#ifndef SIGALRM
185 long ca,cca,cb,cc;
186#endif
187
188#ifndef TIMES
189 printf("To get the most accurate results, try to run this\n");
190 printf("program when this computer is idle.\n");
191#endif
192
193#ifndef SIGALRM
194 printf("First we calculate the approximate speed ...\n");
195 idea_set_encrypt_key(key,&sch);
196 count=10;
197 do {
198 long i;
199 IDEA_INT data[2];
200
201 count*=2;
202 Time_F(START);
203 for (i=count; i; i--)
204 idea_encrypt(data,&sch);
205 d=Time_F(STOP);
206 } while (d < 3.0);
207 ca=count/4;
208 cca=count/200;
209 cb=count;
210 cc=count*8/BUFSIZE+1;
211 printf("idea_set_encrypt_key %ld times\n",ca);
212#define COND(d) (count <= (d))
213#define COUNT(d) (d)
214#else
215#define COND(c) (run)
216#define COUNT(d) (count)
217 signal(SIGALRM,sig_done);
218 printf("Doing idea_set_encrypt_key for 10 seconds\n");
219 alarm(10);
220#endif
221
222 Time_F(START);
223 for (count=0,run=1; COND(ca); count+=4)
224 {
225 idea_set_encrypt_key(key,&sch);
226 idea_set_encrypt_key(key,&sch);
227 idea_set_encrypt_key(key,&sch);
228 idea_set_encrypt_key(key,&sch);
229 }
230 d=Time_F(STOP);
231 printf("%ld idea idea_set_encrypt_key's in %.2f seconds\n",count,d);
232 a=((double)COUNT(ca))/d;
233
234#ifdef SIGALRM
235 printf("Doing idea_set_decrypt_key for 10 seconds\n");
236 alarm(10);
237#else
238 printf("Doing idea_set_decrypt_key %ld times\n",cca);
239#endif
240
241 Time_F(START);
242 for (count=0,run=1; COND(cca); count+=4)
243 {
244 idea_set_decrypt_key(&sch,&sch);
245 idea_set_decrypt_key(&sch,&sch);
246 idea_set_decrypt_key(&sch,&sch);
247 idea_set_decrypt_key(&sch,&sch);
248 }
249 d=Time_F(STOP);
250 printf("%ld idea idea_set_decrypt_key's in %.2f seconds\n",count,d);
251 aa=((double)COUNT(cca))/d;
252
253#ifdef SIGALRM
254 printf("Doing idea_encrypt's for 10 seconds\n");
255 alarm(10);
256#else
257 printf("Doing idea_encrypt %ld times\n",cb);
258#endif
259 Time_F(START);
260 for (count=0,run=1; COND(cb); count+=4)
261 {
262 unsigned long data[2];
263
264 idea_encrypt(data,&sch);
265 idea_encrypt(data,&sch);
266 idea_encrypt(data,&sch);
267 idea_encrypt(data,&sch);
268 }
269 d=Time_F(STOP);
270 printf("%ld idea_encrypt's in %.2f second\n",count,d);
271 b=((double)COUNT(cb)*8)/d;
272
273#ifdef SIGALRM
274 printf("Doing idea_cbc_encrypt on %ld byte blocks for 10 seconds\n",
275 BUFSIZE);
276 alarm(10);
277#else
278 printf("Doing idea_cbc_encrypt %ld times on %ld byte blocks\n",cc,
279 BUFSIZE);
280#endif
281 Time_F(START);
282 for (count=0,run=1; COND(cc); count++)
283 idea_cbc_encrypt(buf,buf,BUFSIZE,&sch,
284 &(key[0]),IDEA_ENCRYPT);
285 d=Time_F(STOP);
286 printf("%ld idea_cbc_encrypt's of %ld byte blocks in %.2f second\n",
287 count,BUFSIZE,d);
288 c=((double)COUNT(cc)*BUFSIZE)/d;
289
290 printf("IDEA set_encrypt_key per sec = %12.2f (%9.3fuS)\n",a,1.0e6/a);
291 printf("IDEA set_decrypt_key per sec = %12.2f (%9.3fuS)\n",aa,1.0e6/aa);
292 printf("IDEA raw ecb bytes per sec = %12.2f (%9.3fuS)\n",b,8.0e6/b);
293 printf("IDEA cbc bytes per sec = %12.2f (%9.3fuS)\n",c,8.0e6/c);
294 exit(0);
295#if defined(LINT) || defined(OPENSSL_SYS_MSDOS)
296 return(0);
297#endif
298 }
299
diff --git a/src/lib/libcrypto/mdc2/mdc2dgst.c b/src/lib/libcrypto/mdc2/mdc2dgst.c
index 4aa406edc3..b74bb1a759 100644
--- a/src/lib/libcrypto/mdc2/mdc2dgst.c
+++ b/src/lib/libcrypto/mdc2/mdc2dgst.c
@@ -61,6 +61,7 @@
61#include <string.h> 61#include <string.h>
62#include <openssl/des.h> 62#include <openssl/des.h>
63#include <openssl/mdc2.h> 63#include <openssl/mdc2.h>
64#include <openssl/crypto.h>
64 65
65#undef c2l 66#undef c2l
66#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \ 67#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \
@@ -75,7 +76,7 @@
75 *((c)++)=(unsigned char)(((l)>>24L)&0xff)) 76 *((c)++)=(unsigned char)(((l)>>24L)&0xff))
76 77
77static void mdc2_body(MDC2_CTX *c, const unsigned char *in, size_t len); 78static void mdc2_body(MDC2_CTX *c, const unsigned char *in, size_t len);
78int MDC2_Init(MDC2_CTX *c) 79fips_md_init(MDC2)
79 { 80 {
80 c->num=0; 81 c->num=0;
81 c->pad_type=1; 82 c->pad_type=1;
diff --git a/src/lib/libcrypto/modes/Makefile b/src/lib/libcrypto/modes/Makefile
index 6c85861b6c..c825b12f25 100644
--- a/src/lib/libcrypto/modes/Makefile
+++ b/src/lib/libcrypto/modes/Makefile
@@ -10,21 +10,27 @@ CFLAG=-g
10MAKEFILE= Makefile 10MAKEFILE= Makefile
11AR= ar r 11AR= ar r
12 12
13MODES_ASM_OBJ=
14
13CFLAGS= $(INCLUDES) $(CFLAG) 15CFLAGS= $(INCLUDES) $(CFLAG)
16ASFLAGS= $(INCLUDES) $(ASFLAG)
17AFLAGS= $(ASFLAGS)
14 18
15GENERAL=Makefile 19GENERAL=Makefile
16TEST= 20TEST=
17APPS= 21APPS=
18 22
19LIB=$(TOP)/libcrypto.a 23LIB=$(TOP)/libcrypto.a
20LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c 24LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
21LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o 25 ccm128.c xts128.c
26LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
27 ccm128.o xts128.o $(MODES_ASM_OBJ)
22 28
23SRC= $(LIBSRC) 29SRC= $(LIBSRC)
24 30
25#EXHEADER= store.h str_compat.h 31#EXHEADER= store.h str_compat.h
26EXHEADER= modes.h 32EXHEADER= modes.h
27HEADER= $(EXHEADER) 33HEADER= modes_lcl.h $(EXHEADER)
28 34
29ALL= $(GENERAL) $(SRC) $(HEADER) 35ALL= $(GENERAL) $(SRC) $(HEADER)
30 36
@@ -38,6 +44,24 @@ lib: $(LIBOBJ)
38 $(RANLIB) $(LIB) || echo Never mind. 44 $(RANLIB) $(LIB) || echo Never mind.
39 @touch lib 45 @touch lib
40 46
47ghash-ia64.s: asm/ghash-ia64.pl
48 $(PERL) asm/ghash-ia64.pl $@ $(CFLAGS)
49ghash-x86.s: asm/ghash-x86.pl
50 $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
51ghash-x86_64.s: asm/ghash-x86_64.pl
52 $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
53ghash-sparcv9.s: asm/ghash-sparcv9.pl
54 $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
55ghash-alpha.s: asm/ghash-alpha.pl
56 $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
57ghash-parisc.s: asm/ghash-parisc.pl
58 $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
59
60# GNU make "catch all"
61ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
62
63ghash-armv4.o: ghash-armv4.S
64
41files: 65files:
42 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO 66 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
43 67
@@ -71,12 +95,47 @@ dclean:
71 mv -f Makefile.new $(MAKEFILE) 95 mv -f Makefile.new $(MAKEFILE)
72 96
73clean: 97clean:
74 rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff 98 rm -f *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
75 99
76# DO NOT DELETE THIS LINE -- make depend depends on it. 100# DO NOT DELETE THIS LINE -- make depend depends on it.
77 101
78cbc128.o: cbc128.c modes.h 102cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
79cfb128.o: cfb128.c modes.h 103cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
80ctr128.o: ctr128.c modes.h 104cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
81cts128.o: cts128.c modes.h 105cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
82ofb128.o: modes.h ofb128.c 106cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h
107ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
108ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
109ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
110ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
111ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h
112cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
113cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
114cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
115cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
116cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h
117ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
118ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
119ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
120ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
121ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h
122cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
123cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
124cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
125cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
126cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h
127gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
128gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
129gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
130gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
131gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h
132ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
133ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
134ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
135ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
136ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
137xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
138xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
139xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
140xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
141xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c
diff --git a/src/lib/libcrypto/o_fips.c b/src/lib/libcrypto/o_fips.c
new file mode 100644
index 0000000000..f6d1b21855
--- /dev/null
+++ b/src/lib/libcrypto/o_fips.c
@@ -0,0 +1,96 @@
1/* Written by Stephen henson (steve@openssl.org) for the OpenSSL
2 * project 2011.
3 */
4/* ====================================================================
5 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#ifdef OPENSSL_FIPS
60#include <openssl/fips.h>
61#include <openssl/fips_rand.h>
62#include <openssl/rand.h>
63#endif
64
65int FIPS_mode(void)
66 {
67 OPENSSL_init();
68#ifdef OPENSSL_FIPS
69 return FIPS_module_mode();
70#else
71 return 0;
72#endif
73 }
74
75int FIPS_mode_set(int r)
76 {
77 OPENSSL_init();
78#ifdef OPENSSL_FIPS
79#ifndef FIPS_AUTH_USER_PASS
80#define FIPS_AUTH_USER_PASS "Default FIPS Crypto User Password"
81#endif
82 if (!FIPS_module_mode_set(r, FIPS_AUTH_USER_PASS))
83 return 0;
84 if (r)
85 RAND_set_rand_method(FIPS_rand_get_method());
86 else
87 RAND_set_rand_method(NULL);
88 return 1;
89#else
90 if (r == 0)
91 return 1;
92 CRYPTOerr(CRYPTO_F_FIPS_MODE_SET, CRYPTO_R_FIPS_MODE_NOT_SUPPORTED);
93 return 0;
94#endif
95 }
96
diff --git a/src/lib/libcrypto/perlasm/x86masm.pl b/src/lib/libcrypto/perlasm/x86masm.pl
index 3d50e4a786..96b1b73e1a 100644
--- a/src/lib/libcrypto/perlasm/x86masm.pl
+++ b/src/lib/libcrypto/perlasm/x86masm.pl
@@ -14,9 +14,11 @@ sub ::generic
14{ my ($opcode,@arg)=@_; 14{ my ($opcode,@arg)=@_;
15 15
16 # fix hexadecimal constants 16 # fix hexadecimal constants
17 for (@arg) { s/0x([0-9a-f]+)/0$1h/oi; } 17 for (@arg) { s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/oi; }
18 18
19 if ($opcode !~ /movq/) 19 if ($opcode =~ /lea/ && @arg[1] =~ s/.*PTR\s+(\(.*\))$/OFFSET $1/) # no []
20 { $opcode="mov"; }
21 elsif ($opcode !~ /movq/)
20 { # fix xmm references 22 { # fix xmm references
21 $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i); 23 $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i);
22 $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); 24 $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i);
@@ -65,6 +67,7 @@ sub get_mem
65 $ret; 67 $ret;
66} 68}
67sub ::BP { &get_mem("BYTE",@_); } 69sub ::BP { &get_mem("BYTE",@_); }
70sub ::WP { &get_mem("WORD",@_); }
68sub ::DWP { &get_mem("DWORD",@_); } 71sub ::DWP { &get_mem("DWORD",@_); }
69sub ::QWP { &get_mem("QWORD",@_); } 72sub ::QWP { &get_mem("QWORD",@_); }
70sub ::BC { "@_"; } 73sub ::BC { "@_"; }
@@ -129,7 +132,7 @@ ___
129 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) 132 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
130 { my $comm=<<___; 133 { my $comm=<<___;
131.bss SEGMENT 'BSS' 134.bss SEGMENT 'BSS'
132COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD 135COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD
133.bss ENDS 136.bss ENDS
134___ 137___
135 # comment out OPENSSL_ia32cap_P declarations 138 # comment out OPENSSL_ia32cap_P declarations
@@ -156,6 +159,9 @@ sub ::public_label
156sub ::data_byte 159sub ::data_byte
157{ push(@out,("DB\t").join(',',@_)."\n"); } 160{ push(@out,("DB\t").join(',',@_)."\n"); }
158 161
162sub ::data_short
163{ push(@out,("DW\t").join(',',@_)."\n"); }
164
159sub ::data_word 165sub ::data_word
160{ push(@out,("DD\t").join(',',@_)."\n"); } 166{ push(@out,("DD\t").join(',',@_)."\n"); }
161 167
@@ -181,4 +187,11 @@ ___
181sub ::dataseg 187sub ::dataseg
182{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; } 188{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; }
183 189
190sub ::safeseh
191{ my $nm=shift;
192 push(@out,"IF \@Version GE 710\n");
193 push(@out,".SAFESEH ".&::LABEL($nm,$nmdecor.$nm)."\n");
194 push(@out,"ENDIF\n");
195}
196
1841; 1971;
diff --git a/src/lib/libcrypto/rc4/rc4_utl.c b/src/lib/libcrypto/rc4/rc4_utl.c
new file mode 100644
index 0000000000..ab3f02fe6a
--- /dev/null
+++ b/src/lib/libcrypto/rc4/rc4_utl.c
@@ -0,0 +1,62 @@
1/* crypto/rc4/rc4_utl.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/crypto.h>
54#include <openssl/rc4.h>
55
56void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
57 {
58#ifdef OPENSSL_FIPS
59 fips_cipher_abort(RC4);
60#endif
61 private_RC4_set_key(key, len, data);
62 }
diff --git a/src/lib/libcrypto/seed/seed.c b/src/lib/libcrypto/seed/seed.c
index 2bc384a19f..3e675a8d75 100644
--- a/src/lib/libcrypto/seed/seed.c
+++ b/src/lib/libcrypto/seed/seed.c
@@ -32,9 +32,14 @@
32#include <memory.h> 32#include <memory.h>
33#endif 33#endif
34 34
35#include <openssl/crypto.h>
35#include <openssl/seed.h> 36#include <openssl/seed.h>
36#include "seed_locl.h" 37#include "seed_locl.h"
37 38
39#ifdef SS /* can get defined on Solaris by inclusion of <stdlib.h> */
40#undef SS
41#endif
42
38static const seed_word SS[4][256] = { { 43static const seed_word SS[4][256] = { {
39 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124, 44 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124,
40 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, 45 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360,
@@ -192,8 +197,14 @@ static const seed_word KC[] = {
192 KC0, KC1, KC2, KC3, KC4, KC5, KC6, KC7, 197 KC0, KC1, KC2, KC3, KC4, KC5, KC6, KC7,
193 KC8, KC9, KC10, KC11, KC12, KC13, KC14, KC15 }; 198 KC8, KC9, KC10, KC11, KC12, KC13, KC14, KC15 };
194#endif 199#endif
195
196void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks) 200void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks)
201#ifdef OPENSSL_FIPS
202 {
203 fips_cipher_abort(SEED);
204 private_SEED_set_key(rawkey, ks);
205 }
206void private_SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks)
207#endif
197{ 208{
198 seed_word x1, x2, x3, x4; 209 seed_word x1, x2, x3, x4;
199 seed_word t0, t1; 210 seed_word t0, t1;
diff --git a/src/lib/libcrypto/seed/seed.h b/src/lib/libcrypto/seed/seed.h
index 6ffa5f024e..c50fdd3607 100644
--- a/src/lib/libcrypto/seed/seed.h
+++ b/src/lib/libcrypto/seed/seed.h
@@ -116,7 +116,9 @@ typedef struct seed_key_st {
116#endif 116#endif
117} SEED_KEY_SCHEDULE; 117} SEED_KEY_SCHEDULE;
118 118
119 119#ifdef OPENSSL_FIPS
120void private_SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks);
121#endif
120void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks); 122void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks);
121 123
122void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], unsigned char d[SEED_BLOCK_SIZE], const SEED_KEY_SCHEDULE *ks); 124void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], unsigned char d[SEED_BLOCK_SIZE], const SEED_KEY_SCHEDULE *ks);
diff --git a/src/lib/libcrypto/srp/Makefile b/src/lib/libcrypto/srp/Makefile
new file mode 100644
index 0000000000..41859d46fa
--- /dev/null
+++ b/src/lib/libcrypto/srp/Makefile
@@ -0,0 +1,98 @@
1DIR= srp
2TOP= ../..
3CC= cc
4INCLUDES= -I.. -I$(TOP) -I../../include
5CFLAG=-g
6INSTALL_PREFIX=
7OPENSSLDIR= /usr/local/ssl
8INSTALLTOP=/usr/local/ssl
9MAKE= make -f Makefile.ssl
10MAKEDEPPROG= makedepend
11MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG)
12MAKEFILE= Makefile.ssl
13AR= ar r
14
15CFLAGS= $(INCLUDES) $(CFLAG)
16
17GENERAL=Makefile
18TEST=srptest.c
19APPS=
20
21LIB=$(TOP)/libcrypto.a
22LIBSRC=srp_lib.c srp_vfy.c
23LIBOBJ=srp_lib.o srp_vfy.o
24
25SRC= $(LIBSRC)
26
27EXHEADER= srp.h
28HEADER= $(EXHEADER)
29
30top:
31 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
32
33all: lib
34
35lib: $(LIBOBJ)
36 $(AR) $(LIB) $(LIBOBJ)
37 $(RANLIB) $(LIB) || echo Never mind.
38 @touch lib
39
40links:
41 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
42 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
43 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
44
45install:
46 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
47 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
48 do \
49 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
50 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
51 done;
52
53tags:
54 ctags $(SRC)
55
56tests:
57
58srptest: top srptest.c $(LIB)
59 $(CC) $(CFLAGS) -Wall -Werror -g -o srptest srptest.c $(LIB)
60
61lint:
62 lint -DLINT $(INCLUDES) $(SRC)>fluff
63
64depend:
65 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
66
67dclean:
68 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
69 mv -f Makefile.new $(MAKEFILE)
70
71clean:
72 rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
73
74# DO NOT DELETE THIS LINE -- make depend depends on it.
75
76srp_lib.o: ../../e_os.h ../../include/openssl/asn1.h
77srp_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
78srp_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
79srp_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
80srp_lib.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
81srp_lib.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
82srp_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
83srp_lib.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
84srp_lib.o: ../../include/openssl/sha.h ../../include/openssl/srp.h
85srp_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
86srp_lib.o: ../cryptlib.h srp_grps.h srp_lcl.h srp_lib.c
87srp_vfy.o: ../../e_os.h ../../include/openssl/asn1.h
88srp_vfy.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
89srp_vfy.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
90srp_vfy.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
91srp_vfy.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
92srp_vfy.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
93srp_vfy.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
94srp_vfy.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h
95srp_vfy.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
96srp_vfy.o: ../../include/openssl/srp.h ../../include/openssl/stack.h
97srp_vfy.o: ../../include/openssl/symhacks.h ../../include/openssl/txt_db.h
98srp_vfy.o: ../cryptlib.h srp_lcl.h srp_vfy.c
diff --git a/src/lib/libcrypto/srp/srp.h b/src/lib/libcrypto/srp/srp.h
new file mode 100644
index 0000000000..7ec7825cad
--- /dev/null
+++ b/src/lib/libcrypto/srp/srp.h
@@ -0,0 +1,172 @@
1/* crypto/srp/srp.h */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef __SRP_H__
60#define __SRP_H__
61
62#ifndef OPENSSL_NO_SRP
63
64#include <stdio.h>
65#include <string.h>
66
67#ifdef __cplusplus
68extern "C" {
69#endif
70
71#include <openssl/safestack.h>
72#include <openssl/bn.h>
73#include <openssl/crypto.h>
74
75typedef struct SRP_gN_cache_st
76 {
77 char *b64_bn;
78 BIGNUM *bn;
79 } SRP_gN_cache;
80
81
82DECLARE_STACK_OF(SRP_gN_cache)
83
84typedef struct SRP_user_pwd_st
85 {
86 char *id;
87 BIGNUM *s;
88 BIGNUM *v;
89 const BIGNUM *g;
90 const BIGNUM *N;
91 char *info;
92 } SRP_user_pwd;
93
94DECLARE_STACK_OF(SRP_user_pwd)
95
96typedef struct SRP_VBASE_st
97 {
98 STACK_OF(SRP_user_pwd) *users_pwd;
99 STACK_OF(SRP_gN_cache) *gN_cache;
100/* to simulate a user */
101 char *seed_key;
102 BIGNUM *default_g;
103 BIGNUM *default_N;
104 } SRP_VBASE;
105
106
107/*Structure interne pour retenir les couples N et g*/
108typedef struct SRP_gN_st
109 {
110 char *id;
111 BIGNUM *g;
112 BIGNUM *N;
113 } SRP_gN;
114
115DECLARE_STACK_OF(SRP_gN)
116
117SRP_VBASE *SRP_VBASE_new(char *seed_key);
118int SRP_VBASE_free(SRP_VBASE *vb);
119int SRP_VBASE_init(SRP_VBASE *vb, char * verifier_file);
120SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username);
121char *SRP_create_verifier(const char *user, const char *pass, char **salt,
122 char **verifier, const char *N, const char *g);
123int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g);
124
125
126#define SRP_NO_ERROR 0
127#define SRP_ERR_VBASE_INCOMPLETE_FILE 1
128#define SRP_ERR_VBASE_BN_LIB 2
129#define SRP_ERR_OPEN_FILE 3
130#define SRP_ERR_MEMORY 4
131
132#define DB_srptype 0
133#define DB_srpverifier 1
134#define DB_srpsalt 2
135#define DB_srpid 3
136#define DB_srpgN 4
137#define DB_srpinfo 5
138#undef DB_NUMBER
139#define DB_NUMBER 6
140
141#define DB_SRP_INDEX 'I'
142#define DB_SRP_VALID 'V'
143#define DB_SRP_REVOKED 'R'
144#define DB_SRP_MODIF 'v'
145
146
147/* see srp.c */
148char * SRP_check_known_gN_param(BIGNUM* g, BIGNUM* N);
149SRP_gN *SRP_get_default_gN(const char * id) ;
150
151/* server side .... */
152BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N);
153BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v);
154int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N);
155BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N) ;
156
157
158
159/* client side .... */
160BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass);
161BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g);
162BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u);
163int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N);
164
165#define SRP_MINIMAL_N 1024
166
167#ifdef __cplusplus
168}
169#endif
170
171#endif
172#endif
diff --git a/src/lib/libcrypto/srp/srp_grps.h b/src/lib/libcrypto/srp/srp_grps.h
new file mode 100644
index 0000000000..d77c9fff4b
--- /dev/null
+++ b/src/lib/libcrypto/srp/srp_grps.h
@@ -0,0 +1,517 @@
1/* start of generated data */
2
3static BN_ULONG bn_group_1024_value[] = {
4 bn_pack4(9FC6,1D2F,C0EB,06E3),
5 bn_pack4(FD51,38FE,8376,435B),
6 bn_pack4(2FD4,CBF4,976E,AA9A),
7 bn_pack4(68ED,BC3C,0572,6CC0),
8 bn_pack4(C529,F566,660E,57EC),
9 bn_pack4(8255,9B29,7BCF,1885),
10 bn_pack4(CE8E,F4AD,69B1,5D49),
11 bn_pack4(5DC7,D7B4,6154,D6B6),
12 bn_pack4(8E49,5C1D,6089,DAD1),
13 bn_pack4(E0D5,D8E2,50B9,8BE4),
14 bn_pack4(383B,4813,D692,C6E0),
15 bn_pack4(D674,DF74,96EA,81D3),
16 bn_pack4(9EA2,314C,9C25,6576),
17 bn_pack4(6072,6187,75FF,3C0B),
18 bn_pack4(9C33,F80A,FA8F,C5E8),
19 bn_pack4(EEAF,0AB9,ADB3,8DD6)
20};
21static BIGNUM bn_group_1024 = {
22 bn_group_1024_value,
23 (sizeof bn_group_1024_value)/sizeof(BN_ULONG),
24 (sizeof bn_group_1024_value)/sizeof(BN_ULONG),
25 0,
26 BN_FLG_STATIC_DATA
27};
28
29static BN_ULONG bn_group_1536_value[] = {
30 bn_pack4(CF76,E3FE,D135,F9BB),
31 bn_pack4(1518,0F93,499A,234D),
32 bn_pack4(8CE7,A28C,2442,C6F3),
33 bn_pack4(5A02,1FFF,5E91,479E),
34 bn_pack4(7F8A,2FE9,B8B5,292E),
35 bn_pack4(837C,264A,E3A9,BEB8),
36 bn_pack4(E442,734A,F7CC,B7AE),
37 bn_pack4(6577,2E43,7D6C,7F8C),
38 bn_pack4(DB2F,D53D,24B7,C486),
39 bn_pack4(6EDF,0195,3934,9627),
40 bn_pack4(158B,FD3E,2B9C,8CF5),
41 bn_pack4(764E,3F4B,53DD,9DA1),
42 bn_pack4(4754,8381,DBC5,B1FC),
43 bn_pack4(9B60,9E0B,E3BA,B63D),
44 bn_pack4(8134,B1C8,B979,8914),
45 bn_pack4(DF02,8A7C,EC67,F0D0),
46 bn_pack4(80B6,55BB,9A22,E8DC),
47 bn_pack4(1558,903B,A0D0,F843),
48 bn_pack4(51C6,A94B,E460,7A29),
49 bn_pack4(5F4F,5F55,6E27,CBDE),
50 bn_pack4(BEEE,A961,4B19,CC4D),
51 bn_pack4(DBA5,1DF4,99AC,4C80),
52 bn_pack4(B1F1,2A86,17A4,7BBB),
53 bn_pack4(9DEF,3CAF,B939,277A)
54};
55static BIGNUM bn_group_1536 = {
56 bn_group_1536_value,
57 (sizeof bn_group_1536_value)/sizeof(BN_ULONG),
58 (sizeof bn_group_1536_value)/sizeof(BN_ULONG),
59 0,
60 BN_FLG_STATIC_DATA
61};
62
63static BN_ULONG bn_group_2048_value[] = {
64 bn_pack4(0FA7,111F,9E4A,FF73),
65 bn_pack4(9B65,E372,FCD6,8EF2),
66 bn_pack4(35DE,236D,525F,5475),
67 bn_pack4(94B5,C803,D89F,7AE4),
68 bn_pack4(71AE,35F8,E9DB,FBB6),
69 bn_pack4(2A56,98F3,A8D0,C382),
70 bn_pack4(9CCC,041C,7BC3,08D8),
71 bn_pack4(AF87,4E73,03CE,5329),
72 bn_pack4(6160,2790,04E5,7AE6),
73 bn_pack4(032C,FBDB,F52F,B378),
74 bn_pack4(5EA7,7A27,75D2,ECFA),
75 bn_pack4(5445,23B5,24B0,D57D),
76 bn_pack4(5B9D,32E6,88F8,7748),
77 bn_pack4(F1D2,B907,8717,461A),
78 bn_pack4(76BD,207A,436C,6481),
79 bn_pack4(CA97,B43A,23FB,8016),
80 bn_pack4(1D28,1E44,6B14,773B),
81 bn_pack4(7359,D041,D5C3,3EA7),
82 bn_pack4(A80D,740A,DBF4,FF74),
83 bn_pack4(55F9,7993,EC97,5EEA),
84 bn_pack4(2918,A996,2F0B,93B8),
85 bn_pack4(661A,05FB,D5FA,AAE8),
86 bn_pack4(CF60,9517,9A16,3AB3),
87 bn_pack4(E808,3969,EDB7,67B0),
88 bn_pack4(CD7F,48A9,DA04,FD50),
89 bn_pack4(D523,12AB,4B03,310D),
90 bn_pack4(8193,E075,7767,A13D),
91 bn_pack4(A373,29CB,B4A0,99ED),
92 bn_pack4(FC31,9294,3DB5,6050),
93 bn_pack4(AF72,B665,1987,EE07),
94 bn_pack4(F166,DE5E,1389,582F),
95 bn_pack4(AC6B,DB41,324A,9A9B)
96};
97static BIGNUM bn_group_2048 = {
98 bn_group_2048_value,
99 (sizeof bn_group_2048_value)/sizeof(BN_ULONG),
100 (sizeof bn_group_2048_value)/sizeof(BN_ULONG),
101 0,
102 BN_FLG_STATIC_DATA
103};
104
105static BN_ULONG bn_group_3072_value[] = {
106 bn_pack4(FFFF,FFFF,FFFF,FFFF),
107 bn_pack4(4B82,D120,A93A,D2CA),
108 bn_pack4(43DB,5BFC,E0FD,108E),
109 bn_pack4(08E2,4FA0,74E5,AB31),
110 bn_pack4(7709,88C0,BAD9,46E2),
111 bn_pack4(BBE1,1757,7A61,5D6C),
112 bn_pack4(521F,2B18,177B,200C),
113 bn_pack4(D876,0273,3EC8,6A64),
114 bn_pack4(F12F,FA06,D98A,0864),
115 bn_pack4(CEE3,D226,1AD2,EE6B),
116 bn_pack4(1E8C,94E0,4A25,619D),
117 bn_pack4(ABF5,AE8C,DB09,33D7),
118 bn_pack4(B397,0F85,A6E1,E4C7),
119 bn_pack4(8AEA,7157,5D06,0C7D),
120 bn_pack4(ECFB,8504,58DB,EF0A),
121 bn_pack4(A855,21AB,DF1C,BA64),
122 bn_pack4(AD33,170D,0450,7A33),
123 bn_pack4(1572,8E5A,8AAA,C42D),
124 bn_pack4(15D2,2618,98FA,0510),
125 bn_pack4(3995,497C,EA95,6AE5),
126 bn_pack4(DE2B,CBF6,9558,1718),
127 bn_pack4(B5C5,5DF0,6F4C,52C9),
128 bn_pack4(9B27,83A2,EC07,A28F),
129 bn_pack4(E39E,772C,180E,8603),
130 bn_pack4(3290,5E46,2E36,CE3B),
131 bn_pack4(F174,6C08,CA18,217C),
132 bn_pack4(670C,354E,4ABC,9804),
133 bn_pack4(9ED5,2907,7096,966D),
134 bn_pack4(1C62,F356,2085,52BB),
135 bn_pack4(8365,5D23,DCA3,AD96),
136 bn_pack4(6916,3FA8,FD24,CF5F),
137 bn_pack4(98DA,4836,1C55,D39A),
138 bn_pack4(C200,7CB8,A163,BF05),
139 bn_pack4(4928,6651,ECE4,5B3D),
140 bn_pack4(AE9F,2411,7C4B,1FE6),
141 bn_pack4(EE38,6BFB,5A89,9FA5),
142 bn_pack4(0BFF,5CB6,F406,B7ED),
143 bn_pack4(F44C,42E9,A637,ED6B),
144 bn_pack4(E485,B576,625E,7EC6),
145 bn_pack4(4FE1,356D,6D51,C245),
146 bn_pack4(302B,0A6D,F25F,1437),
147 bn_pack4(EF95,19B3,CD3A,431B),
148 bn_pack4(514A,0879,8E34,04DD),
149 bn_pack4(020B,BEA6,3B13,9B22),
150 bn_pack4(2902,4E08,8A67,CC74),
151 bn_pack4(C4C6,628B,80DC,1CD1),
152 bn_pack4(C90F,DAA2,2168,C234),
153 bn_pack4(FFFF,FFFF,FFFF,FFFF)
154};
155static BIGNUM bn_group_3072 = {
156 bn_group_3072_value,
157 (sizeof bn_group_3072_value)/sizeof(BN_ULONG),
158 (sizeof bn_group_3072_value)/sizeof(BN_ULONG),
159 0,
160 BN_FLG_STATIC_DATA
161};
162
163static BN_ULONG bn_group_4096_value[] = {
164 bn_pack4(FFFF,FFFF,FFFF,FFFF),
165 bn_pack4(4DF4,35C9,3406,3199),
166 bn_pack4(86FF,B7DC,90A6,C08F),
167 bn_pack4(93B4,EA98,8D8F,DDC1),
168 bn_pack4(D006,9127,D5B0,5AA9),
169 bn_pack4(B81B,DD76,2170,481C),
170 bn_pack4(1F61,2970,CEE2,D7AF),
171 bn_pack4(233B,A186,515B,E7ED),
172 bn_pack4(99B2,964F,A090,C3A2),
173 bn_pack4(287C,5947,4E6B,C05D),
174 bn_pack4(2E8E,FC14,1FBE,CAA6),
175 bn_pack4(DBBB,C2DB,04DE,8EF9),
176 bn_pack4(2583,E9CA,2AD4,4CE8),
177 bn_pack4(1A94,6834,B615,0BDA),
178 bn_pack4(99C3,2718,6AF4,E23C),
179 bn_pack4(8871,9A10,BDBA,5B26),
180 bn_pack4(1A72,3C12,A787,E6D7),
181 bn_pack4(4B82,D120,A921,0801),
182 bn_pack4(43DB,5BFC,E0FD,108E),
183 bn_pack4(08E2,4FA0,74E5,AB31),
184 bn_pack4(7709,88C0,BAD9,46E2),
185 bn_pack4(BBE1,1757,7A61,5D6C),
186 bn_pack4(521F,2B18,177B,200C),
187 bn_pack4(D876,0273,3EC8,6A64),
188 bn_pack4(F12F,FA06,D98A,0864),
189 bn_pack4(CEE3,D226,1AD2,EE6B),
190 bn_pack4(1E8C,94E0,4A25,619D),
191 bn_pack4(ABF5,AE8C,DB09,33D7),
192 bn_pack4(B397,0F85,A6E1,E4C7),
193 bn_pack4(8AEA,7157,5D06,0C7D),
194 bn_pack4(ECFB,8504,58DB,EF0A),
195 bn_pack4(A855,21AB,DF1C,BA64),
196 bn_pack4(AD33,170D,0450,7A33),
197 bn_pack4(1572,8E5A,8AAA,C42D),
198 bn_pack4(15D2,2618,98FA,0510),
199 bn_pack4(3995,497C,EA95,6AE5),
200 bn_pack4(DE2B,CBF6,9558,1718),
201 bn_pack4(B5C5,5DF0,6F4C,52C9),
202 bn_pack4(9B27,83A2,EC07,A28F),
203 bn_pack4(E39E,772C,180E,8603),
204 bn_pack4(3290,5E46,2E36,CE3B),
205 bn_pack4(F174,6C08,CA18,217C),
206 bn_pack4(670C,354E,4ABC,9804),
207 bn_pack4(9ED5,2907,7096,966D),
208 bn_pack4(1C62,F356,2085,52BB),
209 bn_pack4(8365,5D23,DCA3,AD96),
210 bn_pack4(6916,3FA8,FD24,CF5F),
211 bn_pack4(98DA,4836,1C55,D39A),
212 bn_pack4(C200,7CB8,A163,BF05),
213 bn_pack4(4928,6651,ECE4,5B3D),
214 bn_pack4(AE9F,2411,7C4B,1FE6),
215 bn_pack4(EE38,6BFB,5A89,9FA5),
216 bn_pack4(0BFF,5CB6,F406,B7ED),
217 bn_pack4(F44C,42E9,A637,ED6B),
218 bn_pack4(E485,B576,625E,7EC6),
219 bn_pack4(4FE1,356D,6D51,C245),
220 bn_pack4(302B,0A6D,F25F,1437),
221 bn_pack4(EF95,19B3,CD3A,431B),
222 bn_pack4(514A,0879,8E34,04DD),
223 bn_pack4(020B,BEA6,3B13,9B22),
224 bn_pack4(2902,4E08,8A67,CC74),
225 bn_pack4(C4C6,628B,80DC,1CD1),
226 bn_pack4(C90F,DAA2,2168,C234),
227 bn_pack4(FFFF,FFFF,FFFF,FFFF)
228};
229static BIGNUM bn_group_4096 = {
230 bn_group_4096_value,
231 (sizeof bn_group_4096_value)/sizeof(BN_ULONG),
232 (sizeof bn_group_4096_value)/sizeof(BN_ULONG),
233 0,
234 BN_FLG_STATIC_DATA
235};
236
237static BN_ULONG bn_group_6144_value[] = {
238 bn_pack4(FFFF,FFFF,FFFF,FFFF),
239 bn_pack4(E694,F91E,6DCC,4024),
240 bn_pack4(12BF,2D5B,0B74,74D6),
241 bn_pack4(043E,8F66,3F48,60EE),
242 bn_pack4(387F,E8D7,6E3C,0468),
243 bn_pack4(DA56,C9EC,2EF2,9632),
244 bn_pack4(EB19,CCB1,A313,D55C),
245 bn_pack4(F550,AA3D,8A1F,BFF0),
246 bn_pack4(06A1,D58B,B7C5,DA76),
247 bn_pack4(A797,15EE,F29B,E328),
248 bn_pack4(14CC,5ED2,0F80,37E0),
249 bn_pack4(CC8F,6D7E,BF48,E1D8),
250 bn_pack4(4BD4,07B2,2B41,54AA),
251 bn_pack4(0F1D,45B7,FF58,5AC5),
252 bn_pack4(23A9,7A7E,36CC,88BE),
253 bn_pack4(59E7,C97F,BEC7,E8F3),
254 bn_pack4(B5A8,4031,900B,1C9E),
255 bn_pack4(D55E,702F,4698,0C82),
256 bn_pack4(F482,D7CE,6E74,FEF6),
257 bn_pack4(F032,EA15,D172,1D03),
258 bn_pack4(5983,CA01,C64B,92EC),
259 bn_pack4(6FB8,F401,378C,D2BF),
260 bn_pack4(3320,5151,2BD7,AF42),
261 bn_pack4(DB7F,1447,E6CC,254B),
262 bn_pack4(44CE,6CBA,CED4,BB1B),
263 bn_pack4(DA3E,DBEB,CF9B,14ED),
264 bn_pack4(1797,27B0,865A,8918),
265 bn_pack4(B06A,53ED,9027,D831),
266 bn_pack4(E5DB,382F,4130,01AE),
267 bn_pack4(F8FF,9406,AD9E,530E),
268 bn_pack4(C975,1E76,3DBA,37BD),
269 bn_pack4(C1D4,DCB2,6026,46DE),
270 bn_pack4(36C3,FAB4,D27C,7026),
271 bn_pack4(4DF4,35C9,3402,8492),
272 bn_pack4(86FF,B7DC,90A6,C08F),
273 bn_pack4(93B4,EA98,8D8F,DDC1),
274 bn_pack4(D006,9127,D5B0,5AA9),
275 bn_pack4(B81B,DD76,2170,481C),
276 bn_pack4(1F61,2970,CEE2,D7AF),
277 bn_pack4(233B,A186,515B,E7ED),
278 bn_pack4(99B2,964F,A090,C3A2),
279 bn_pack4(287C,5947,4E6B,C05D),
280 bn_pack4(2E8E,FC14,1FBE,CAA6),
281 bn_pack4(DBBB,C2DB,04DE,8EF9),
282 bn_pack4(2583,E9CA,2AD4,4CE8),
283 bn_pack4(1A94,6834,B615,0BDA),
284 bn_pack4(99C3,2718,6AF4,E23C),
285 bn_pack4(8871,9A10,BDBA,5B26),
286 bn_pack4(1A72,3C12,A787,E6D7),
287 bn_pack4(4B82,D120,A921,0801),
288 bn_pack4(43DB,5BFC,E0FD,108E),
289 bn_pack4(08E2,4FA0,74E5,AB31),
290 bn_pack4(7709,88C0,BAD9,46E2),
291 bn_pack4(BBE1,1757,7A61,5D6C),
292 bn_pack4(521F,2B18,177B,200C),
293 bn_pack4(D876,0273,3EC8,6A64),
294 bn_pack4(F12F,FA06,D98A,0864),
295 bn_pack4(CEE3,D226,1AD2,EE6B),
296 bn_pack4(1E8C,94E0,4A25,619D),
297 bn_pack4(ABF5,AE8C,DB09,33D7),
298 bn_pack4(B397,0F85,A6E1,E4C7),
299 bn_pack4(8AEA,7157,5D06,0C7D),
300 bn_pack4(ECFB,8504,58DB,EF0A),
301 bn_pack4(A855,21AB,DF1C,BA64),
302 bn_pack4(AD33,170D,0450,7A33),
303 bn_pack4(1572,8E5A,8AAA,C42D),
304 bn_pack4(15D2,2618,98FA,0510),
305 bn_pack4(3995,497C,EA95,6AE5),
306 bn_pack4(DE2B,CBF6,9558,1718),
307 bn_pack4(B5C5,5DF0,6F4C,52C9),
308 bn_pack4(9B27,83A2,EC07,A28F),
309 bn_pack4(E39E,772C,180E,8603),
310 bn_pack4(3290,5E46,2E36,CE3B),
311 bn_pack4(F174,6C08,CA18,217C),
312 bn_pack4(670C,354E,4ABC,9804),
313 bn_pack4(9ED5,2907,7096,966D),
314 bn_pack4(1C62,F356,2085,52BB),
315 bn_pack4(8365,5D23,DCA3,AD96),
316 bn_pack4(6916,3FA8,FD24,CF5F),
317 bn_pack4(98DA,4836,1C55,D39A),
318 bn_pack4(C200,7CB8,A163,BF05),
319 bn_pack4(4928,6651,ECE4,5B3D),
320 bn_pack4(AE9F,2411,7C4B,1FE6),
321 bn_pack4(EE38,6BFB,5A89,9FA5),
322 bn_pack4(0BFF,5CB6,F406,B7ED),
323 bn_pack4(F44C,42E9,A637,ED6B),
324 bn_pack4(E485,B576,625E,7EC6),
325 bn_pack4(4FE1,356D,6D51,C245),
326 bn_pack4(302B,0A6D,F25F,1437),
327 bn_pack4(EF95,19B3,CD3A,431B),
328 bn_pack4(514A,0879,8E34,04DD),
329 bn_pack4(020B,BEA6,3B13,9B22),
330 bn_pack4(2902,4E08,8A67,CC74),
331 bn_pack4(C4C6,628B,80DC,1CD1),
332 bn_pack4(C90F,DAA2,2168,C234),
333 bn_pack4(FFFF,FFFF,FFFF,FFFF)
334};
335static BIGNUM bn_group_6144 = {
336 bn_group_6144_value,
337 (sizeof bn_group_6144_value)/sizeof(BN_ULONG),
338 (sizeof bn_group_6144_value)/sizeof(BN_ULONG),
339 0,
340 BN_FLG_STATIC_DATA
341};
342
343static BN_ULONG bn_group_8192_value[] = {
344 bn_pack4(FFFF,FFFF,FFFF,FFFF),
345 bn_pack4(60C9,80DD,98ED,D3DF),
346 bn_pack4(C81F,56E8,80B9,6E71),
347 bn_pack4(9E30,50E2,7656,94DF),
348 bn_pack4(9558,E447,5677,E9AA),
349 bn_pack4(C919,0DA6,FC02,6E47),
350 bn_pack4(889A,002E,D5EE,382B),
351 bn_pack4(4009,438B,481C,6CD7),
352 bn_pack4(3590,46F4,EB87,9F92),
353 bn_pack4(FAF3,6BC3,1ECF,A268),
354 bn_pack4(B1D5,10BD,7EE7,4D73),
355 bn_pack4(F9AB,4819,5DED,7EA1),
356 bn_pack4(64F3,1CC5,0846,851D),
357 bn_pack4(4597,E899,A025,5DC1),
358 bn_pack4(DF31,0EE0,74AB,6A36),
359 bn_pack4(6D2A,13F8,3F44,F82D),
360 bn_pack4(062B,3CF5,B3A2,78A6),
361 bn_pack4(7968,3303,ED5B,DD3A),
362 bn_pack4(FA9D,4B7F,A2C0,87E8),
363 bn_pack4(4BCB,C886,2F83,85DD),
364 bn_pack4(3473,FC64,6CEA,306B),
365 bn_pack4(13EB,57A8,1A23,F0C7),
366 bn_pack4(2222,2E04,A403,7C07),
367 bn_pack4(E3FD,B8BE,FC84,8AD9),
368 bn_pack4(238F,16CB,E39D,652D),
369 bn_pack4(3423,B474,2BF1,C978),
370 bn_pack4(3AAB,639C,5AE4,F568),
371 bn_pack4(2576,F693,6BA4,2466),
372 bn_pack4(741F,A7BF,8AFC,47ED),
373 bn_pack4(3BC8,32B6,8D9D,D300),
374 bn_pack4(D8BE,C4D0,73B9,31BA),
375 bn_pack4(3877,7CB6,A932,DF8C),
376 bn_pack4(74A3,926F,12FE,E5E4),
377 bn_pack4(E694,F91E,6DBE,1159),
378 bn_pack4(12BF,2D5B,0B74,74D6),
379 bn_pack4(043E,8F66,3F48,60EE),
380 bn_pack4(387F,E8D7,6E3C,0468),
381 bn_pack4(DA56,C9EC,2EF2,9632),
382 bn_pack4(EB19,CCB1,A313,D55C),
383 bn_pack4(F550,AA3D,8A1F,BFF0),
384 bn_pack4(06A1,D58B,B7C5,DA76),
385 bn_pack4(A797,15EE,F29B,E328),
386 bn_pack4(14CC,5ED2,0F80,37E0),
387 bn_pack4(CC8F,6D7E,BF48,E1D8),
388 bn_pack4(4BD4,07B2,2B41,54AA),
389 bn_pack4(0F1D,45B7,FF58,5AC5),
390 bn_pack4(23A9,7A7E,36CC,88BE),
391 bn_pack4(59E7,C97F,BEC7,E8F3),
392 bn_pack4(B5A8,4031,900B,1C9E),
393 bn_pack4(D55E,702F,4698,0C82),
394 bn_pack4(F482,D7CE,6E74,FEF6),
395 bn_pack4(F032,EA15,D172,1D03),
396 bn_pack4(5983,CA01,C64B,92EC),
397 bn_pack4(6FB8,F401,378C,D2BF),
398 bn_pack4(3320,5151,2BD7,AF42),
399 bn_pack4(DB7F,1447,E6CC,254B),
400 bn_pack4(44CE,6CBA,CED4,BB1B),
401 bn_pack4(DA3E,DBEB,CF9B,14ED),
402 bn_pack4(1797,27B0,865A,8918),
403 bn_pack4(B06A,53ED,9027,D831),
404 bn_pack4(E5DB,382F,4130,01AE),
405 bn_pack4(F8FF,9406,AD9E,530E),
406 bn_pack4(C975,1E76,3DBA,37BD),
407 bn_pack4(C1D4,DCB2,6026,46DE),
408 bn_pack4(36C3,FAB4,D27C,7026),
409 bn_pack4(4DF4,35C9,3402,8492),
410 bn_pack4(86FF,B7DC,90A6,C08F),
411 bn_pack4(93B4,EA98,8D8F,DDC1),
412 bn_pack4(D006,9127,D5B0,5AA9),
413 bn_pack4(B81B,DD76,2170,481C),
414 bn_pack4(1F61,2970,CEE2,D7AF),
415 bn_pack4(233B,A186,515B,E7ED),
416 bn_pack4(99B2,964F,A090,C3A2),
417 bn_pack4(287C,5947,4E6B,C05D),
418 bn_pack4(2E8E,FC14,1FBE,CAA6),
419 bn_pack4(DBBB,C2DB,04DE,8EF9),
420 bn_pack4(2583,E9CA,2AD4,4CE8),
421 bn_pack4(1A94,6834,B615,0BDA),
422 bn_pack4(99C3,2718,6AF4,E23C),
423 bn_pack4(8871,9A10,BDBA,5B26),
424 bn_pack4(1A72,3C12,A787,E6D7),
425 bn_pack4(4B82,D120,A921,0801),
426 bn_pack4(43DB,5BFC,E0FD,108E),
427 bn_pack4(08E2,4FA0,74E5,AB31),
428 bn_pack4(7709,88C0,BAD9,46E2),
429 bn_pack4(BBE1,1757,7A61,5D6C),
430 bn_pack4(521F,2B18,177B,200C),
431 bn_pack4(D876,0273,3EC8,6A64),
432 bn_pack4(F12F,FA06,D98A,0864),
433 bn_pack4(CEE3,D226,1AD2,EE6B),
434 bn_pack4(1E8C,94E0,4A25,619D),
435 bn_pack4(ABF5,AE8C,DB09,33D7),
436 bn_pack4(B397,0F85,A6E1,E4C7),
437 bn_pack4(8AEA,7157,5D06,0C7D),
438 bn_pack4(ECFB,8504,58DB,EF0A),
439 bn_pack4(A855,21AB,DF1C,BA64),
440 bn_pack4(AD33,170D,0450,7A33),
441 bn_pack4(1572,8E5A,8AAA,C42D),
442 bn_pack4(15D2,2618,98FA,0510),
443 bn_pack4(3995,497C,EA95,6AE5),
444 bn_pack4(DE2B,CBF6,9558,1718),
445 bn_pack4(B5C5,5DF0,6F4C,52C9),
446 bn_pack4(9B27,83A2,EC07,A28F),
447 bn_pack4(E39E,772C,180E,8603),
448 bn_pack4(3290,5E46,2E36,CE3B),
449 bn_pack4(F174,6C08,CA18,217C),
450 bn_pack4(670C,354E,4ABC,9804),
451 bn_pack4(9ED5,2907,7096,966D),
452 bn_pack4(1C62,F356,2085,52BB),
453 bn_pack4(8365,5D23,DCA3,AD96),
454 bn_pack4(6916,3FA8,FD24,CF5F),
455 bn_pack4(98DA,4836,1C55,D39A),
456 bn_pack4(C200,7CB8,A163,BF05),
457 bn_pack4(4928,6651,ECE4,5B3D),
458 bn_pack4(AE9F,2411,7C4B,1FE6),
459 bn_pack4(EE38,6BFB,5A89,9FA5),
460 bn_pack4(0BFF,5CB6,F406,B7ED),
461 bn_pack4(F44C,42E9,A637,ED6B),
462 bn_pack4(E485,B576,625E,7EC6),
463 bn_pack4(4FE1,356D,6D51,C245),
464 bn_pack4(302B,0A6D,F25F,1437),
465 bn_pack4(EF95,19B3,CD3A,431B),
466 bn_pack4(514A,0879,8E34,04DD),
467 bn_pack4(020B,BEA6,3B13,9B22),
468 bn_pack4(2902,4E08,8A67,CC74),
469 bn_pack4(C4C6,628B,80DC,1CD1),
470 bn_pack4(C90F,DAA2,2168,C234),
471 bn_pack4(FFFF,FFFF,FFFF,FFFF)
472};
473static BIGNUM bn_group_8192 = {
474 bn_group_8192_value,
475 (sizeof bn_group_8192_value)/sizeof(BN_ULONG),
476 (sizeof bn_group_8192_value)/sizeof(BN_ULONG),
477 0,
478 BN_FLG_STATIC_DATA
479};
480
481static BN_ULONG bn_generator_19_value[] = {19} ;
482static BIGNUM bn_generator_19 = {
483 bn_generator_19_value,
484 1,
485 1,
486 0,
487 BN_FLG_STATIC_DATA
488};
489static BN_ULONG bn_generator_5_value[] = {5} ;
490static BIGNUM bn_generator_5 = {
491 bn_generator_5_value,
492 1,
493 1,
494 0,
495 BN_FLG_STATIC_DATA
496};
497static BN_ULONG bn_generator_2_value[] = {2} ;
498static BIGNUM bn_generator_2 = {
499 bn_generator_2_value,
500 1,
501 1,
502 0,
503 BN_FLG_STATIC_DATA
504};
505
506static SRP_gN knowngN[] = {
507 {"8192",&bn_generator_19 , &bn_group_8192},
508 {"6144",&bn_generator_5 , &bn_group_6144},
509 {"4096",&bn_generator_5 , &bn_group_4096},
510 {"3072",&bn_generator_5 , &bn_group_3072},
511 {"2048",&bn_generator_2 , &bn_group_2048},
512 {"1536",&bn_generator_2 , &bn_group_1536},
513 {"1024",&bn_generator_2 , &bn_group_1024},
514};
515#define KNOWN_GN_NUMBER sizeof(knowngN) / sizeof(SRP_gN)
516
517/* end of generated data */
diff --git a/src/lib/libcrypto/srp/srp_lcl.h b/src/lib/libcrypto/srp/srp_lcl.h
new file mode 100644
index 0000000000..42bda3f148
--- /dev/null
+++ b/src/lib/libcrypto/srp/srp_lcl.h
@@ -0,0 +1,83 @@
1/* crypto/srp/srp_lcl.h */
2/* Written by Peter Sylvester (peter.sylvester@edelweb.fr)
3 * for the EdelKey project and contributed to the OpenSSL project 2004.
4 */
5/* ====================================================================
6 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#ifndef HEADER_SRP_LCL_H
59#define HEADER_SRP_LCL_H
60
61#include <openssl/srp.h>
62#include <openssl/sha.h>
63
64#if 0
65#define srp_bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \
66 fprintf(stderr,"\n");}
67#else
68#define srp_bn_print(a)
69#endif
70
71
72
73#ifdef __cplusplus
74extern "C" {
75#endif
76
77
78
79#ifdef __cplusplus
80}
81#endif
82
83#endif
diff --git a/src/lib/libcrypto/srp/srp_lib.c b/src/lib/libcrypto/srp/srp_lib.c
new file mode 100644
index 0000000000..92cea98dcd
--- /dev/null
+++ b/src/lib/libcrypto/srp/srp_lib.c
@@ -0,0 +1,357 @@
1/* crypto/srp/srp_lib.c */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef OPENSSL_NO_SRP
60#include "cryptlib.h"
61#include "srp_lcl.h"
62#include <openssl/srp.h>
63#include <openssl/evp.h>
64
65#if (BN_BYTES == 8)
66#define bn_pack4(a1,a2,a3,a4) 0x##a1##a2##a3##a4##ul
67#endif
68#if (BN_BYTES == 4)
69#define bn_pack4(a1,a2,a3,a4) 0x##a3##a4##ul, 0x##a1##a2##ul
70#endif
71#if (BN_BYTES == 2)
72#define bn_pack4(a1,a2,a3,a4) 0x##a4##u,0x##a3##u,0x##a2##u,0x##a1##u
73#endif
74
75
76#include "srp_grps.h"
77
78static BIGNUM *srp_Calc_k(BIGNUM *N, BIGNUM *g)
79 {
80 /* k = SHA1(N | PAD(g)) -- tls-srp draft 8 */
81
82 unsigned char digest[SHA_DIGEST_LENGTH];
83 unsigned char *tmp;
84 EVP_MD_CTX ctxt;
85 int longg ;
86 int longN = BN_num_bytes(N);
87
88 if ((tmp = OPENSSL_malloc(longN)) == NULL)
89 return NULL;
90 BN_bn2bin(N,tmp) ;
91
92 EVP_MD_CTX_init(&ctxt);
93 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
94 EVP_DigestUpdate(&ctxt, tmp, longN);
95
96 memset(tmp, 0, longN);
97 longg = BN_bn2bin(g,tmp) ;
98 /* use the zeros behind to pad on left */
99 EVP_DigestUpdate(&ctxt, tmp + longg, longN-longg);
100 EVP_DigestUpdate(&ctxt, tmp, longg);
101 OPENSSL_free(tmp);
102
103 EVP_DigestFinal_ex(&ctxt, digest, NULL);
104 EVP_MD_CTX_cleanup(&ctxt);
105 return BN_bin2bn(digest, sizeof(digest), NULL);
106 }
107
108BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N)
109 {
110 /* k = SHA1(PAD(A) || PAD(B) ) -- tls-srp draft 8 */
111
112 BIGNUM *u;
113 unsigned char cu[SHA_DIGEST_LENGTH];
114 unsigned char *cAB;
115 EVP_MD_CTX ctxt;
116 int longN;
117 if ((A == NULL) ||(B == NULL) || (N == NULL))
118 return NULL;
119
120 longN= BN_num_bytes(N);
121
122 if ((cAB = OPENSSL_malloc(2*longN)) == NULL)
123 return NULL;
124
125 memset(cAB, 0, longN);
126
127 EVP_MD_CTX_init(&ctxt);
128 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
129 EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(A,cAB+longN), longN);
130 EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(B,cAB+longN), longN);
131 OPENSSL_free(cAB);
132 EVP_DigestFinal_ex(&ctxt, cu, NULL);
133 EVP_MD_CTX_cleanup(&ctxt);
134
135 if (!(u = BN_bin2bn(cu, sizeof(cu), NULL)))
136 return NULL;
137 if (!BN_is_zero(u))
138 return u;
139 BN_free(u);
140 return NULL;
141}
142
143BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N)
144 {
145 BIGNUM *tmp = NULL, *S = NULL;
146 BN_CTX *bn_ctx;
147
148 if (u == NULL || A == NULL || v == NULL || b == NULL || N == NULL)
149 return NULL;
150
151 if ((bn_ctx = BN_CTX_new()) == NULL ||
152 (tmp = BN_new()) == NULL ||
153 (S = BN_new()) == NULL )
154 goto err;
155
156 /* S = (A*v**u) ** b */
157
158 if (!BN_mod_exp(tmp,v,u,N,bn_ctx))
159 goto err;
160 if (!BN_mod_mul(tmp,A,tmp,N,bn_ctx))
161 goto err;
162 if (!BN_mod_exp(S,tmp,b,N,bn_ctx))
163 goto err;
164err:
165 BN_CTX_free(bn_ctx);
166 BN_clear_free(tmp);
167 return S;
168 }
169
170BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v)
171 {
172 BIGNUM *kv = NULL, *gb = NULL;
173 BIGNUM *B = NULL, *k = NULL;
174 BN_CTX *bn_ctx;
175
176 if (b == NULL || N == NULL || g == NULL || v == NULL ||
177 (bn_ctx = BN_CTX_new()) == NULL)
178 return NULL;
179
180 if ( (kv = BN_new()) == NULL ||
181 (gb = BN_new()) == NULL ||
182 (B = BN_new())== NULL)
183 goto err;
184
185 /* B = g**b + k*v */
186
187 if (!BN_mod_exp(gb,g,b,N,bn_ctx) ||
188 !(k = srp_Calc_k(N,g)) ||
189 !BN_mod_mul(kv,v,k,N,bn_ctx) ||
190 !BN_mod_add(B,gb,kv,N,bn_ctx))
191 {
192 BN_free(B);
193 B = NULL;
194 }
195err:
196 BN_CTX_free(bn_ctx);
197 BN_clear_free(kv);
198 BN_clear_free(gb);
199 BN_free(k);
200 return B;
201 }
202
203BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass)
204 {
205 unsigned char dig[SHA_DIGEST_LENGTH];
206 EVP_MD_CTX ctxt;
207 unsigned char *cs;
208
209 if ((s == NULL) ||
210 (user == NULL) ||
211 (pass == NULL))
212 return NULL;
213
214 if ((cs = OPENSSL_malloc(BN_num_bytes(s))) == NULL)
215 return NULL;
216
217 EVP_MD_CTX_init(&ctxt);
218 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
219 EVP_DigestUpdate(&ctxt, user, strlen(user));
220 EVP_DigestUpdate(&ctxt, ":", 1);
221 EVP_DigestUpdate(&ctxt, pass, strlen(pass));
222 EVP_DigestFinal_ex(&ctxt, dig, NULL);
223
224 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
225 BN_bn2bin(s,cs);
226 EVP_DigestUpdate(&ctxt, cs, BN_num_bytes(s));
227 OPENSSL_free(cs);
228 EVP_DigestUpdate(&ctxt, dig, sizeof(dig));
229 EVP_DigestFinal_ex(&ctxt, dig, NULL);
230 EVP_MD_CTX_cleanup(&ctxt);
231
232 return BN_bin2bn(dig, sizeof(dig), NULL);
233 }
234
235BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g)
236 {
237 BN_CTX *bn_ctx;
238 BIGNUM * A = NULL;
239
240 if (a == NULL || N == NULL || g == NULL ||
241 (bn_ctx = BN_CTX_new()) == NULL)
242 return NULL;
243
244 if ((A = BN_new()) != NULL &&
245 !BN_mod_exp(A,g,a,N,bn_ctx))
246 {
247 BN_free(A);
248 A = NULL;
249 }
250 BN_CTX_free(bn_ctx);
251 return A;
252 }
253
254
255BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u)
256 {
257 BIGNUM *tmp = NULL, *tmp2 = NULL, *tmp3 = NULL , *k = NULL, *K = NULL;
258 BN_CTX *bn_ctx;
259
260 if (u == NULL || B == NULL || N == NULL || g == NULL || x == NULL || a == NULL ||
261 (bn_ctx = BN_CTX_new()) == NULL)
262 return NULL;
263
264 if ((tmp = BN_new()) == NULL ||
265 (tmp2 = BN_new())== NULL ||
266 (tmp3 = BN_new())== NULL ||
267 (K = BN_new()) == NULL)
268 goto err;
269
270 if (!BN_mod_exp(tmp,g,x,N,bn_ctx))
271 goto err;
272 if (!(k = srp_Calc_k(N,g)))
273 goto err;
274 if (!BN_mod_mul(tmp2,tmp,k,N,bn_ctx))
275 goto err;
276 if (!BN_mod_sub(tmp,B,tmp2,N,bn_ctx))
277 goto err;
278
279 if (!BN_mod_mul(tmp3,u,x,N,bn_ctx))
280 goto err;
281 if (!BN_mod_add(tmp2,a,tmp3,N,bn_ctx))
282 goto err;
283 if (!BN_mod_exp(K,tmp,tmp2,N,bn_ctx))
284 goto err;
285
286err :
287 BN_CTX_free(bn_ctx);
288 BN_clear_free(tmp);
289 BN_clear_free(tmp2);
290 BN_clear_free(tmp3);
291 BN_free(k);
292 return K;
293 }
294
295int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N)
296 {
297 BIGNUM *r;
298 BN_CTX *bn_ctx;
299 int ret = 0;
300
301 if (B == NULL || N == NULL ||
302 (bn_ctx = BN_CTX_new()) == NULL)
303 return 0;
304
305 if ((r = BN_new()) == NULL)
306 goto err;
307 /* Checks if B % N == 0 */
308 if (!BN_nnmod(r,B,N,bn_ctx))
309 goto err;
310 ret = !BN_is_zero(r);
311err:
312 BN_CTX_free(bn_ctx);
313 BN_free(r);
314 return ret;
315 }
316
317int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N)
318 {
319 /* Checks if A % N == 0 */
320 return SRP_Verify_B_mod_N(A,N) ;
321 }
322
323
324/* Check if G and N are kwown parameters.
325 The values have been generated from the ietf-tls-srp draft version 8
326*/
327char *SRP_check_known_gN_param(BIGNUM *g, BIGNUM *N)
328 {
329 size_t i;
330 if ((g == NULL) || (N == NULL))
331 return 0;
332
333 srp_bn_print(g);
334 srp_bn_print(N);
335
336 for(i = 0; i < KNOWN_GN_NUMBER; i++)
337 {
338 if (BN_cmp(knowngN[i].g, g) == 0 && BN_cmp(knowngN[i].N, N) == 0)
339 return knowngN[i].id;
340 }
341 return NULL;
342 }
343
344SRP_gN *SRP_get_default_gN(const char *id)
345 {
346 size_t i;
347
348 if (id == NULL)
349 return knowngN;
350 for(i = 0; i < KNOWN_GN_NUMBER; i++)
351 {
352 if (strcmp(knowngN[i].id, id)==0)
353 return knowngN + i;
354 }
355 return NULL;
356 }
357#endif
diff --git a/src/lib/libcrypto/srp/srp_vfy.c b/src/lib/libcrypto/srp/srp_vfy.c
new file mode 100644
index 0000000000..c8be907d7f
--- /dev/null
+++ b/src/lib/libcrypto/srp/srp_vfy.c
@@ -0,0 +1,657 @@
1/* crypto/srp/srp_vfy.c */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef OPENSSL_NO_SRP
60#include "cryptlib.h"
61#include "srp_lcl.h"
62#include <openssl/srp.h>
63#include <openssl/evp.h>
64#include <openssl/buffer.h>
65#include <openssl/rand.h>
66#include <openssl/txt_db.h>
67
68#define SRP_RANDOM_SALT_LEN 20
69#define MAX_LEN 2500
70
71static char b64table[] =
72 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./";
73
74/* the following two conversion routines have been inspired by code from Stanford */
75
76/*
77 * Convert a base64 string into raw byte array representation.
78 */
79static int t_fromb64(unsigned char *a, const char *src)
80 {
81 char *loc;
82 int i, j;
83 int size;
84
85 while(*src && (*src == ' ' || *src == '\t' || *src == '\n'))
86 ++src;
87 size = strlen(src);
88 i = 0;
89 while(i < size)
90 {
91 loc = strchr(b64table, src[i]);
92 if(loc == (char *) 0) break;
93 else a[i] = loc - b64table;
94 ++i;
95 }
96 size = i;
97 i = size - 1;
98 j = size;
99 while(1)
100 {
101 a[j] = a[i];
102 if(--i < 0) break;
103 a[j] |= (a[i] & 3) << 6;
104 --j;
105 a[j] = (unsigned char) ((a[i] & 0x3c) >> 2);
106 if(--i < 0) break;
107 a[j] |= (a[i] & 0xf) << 4;
108 --j;
109 a[j] = (unsigned char) ((a[i] & 0x30) >> 4);
110 if(--i < 0) break;
111 a[j] |= (a[i] << 2);
112
113 a[--j] = 0;
114 if(--i < 0) break;
115 }
116 while(a[j] == 0 && j <= size) ++j;
117 i = 0;
118 while (j <= size) a[i++] = a[j++];
119 return i;
120 }
121
122
123/*
124 * Convert a raw byte string into a null-terminated base64 ASCII string.
125 */
126static char *t_tob64(char *dst, const unsigned char *src, int size)
127 {
128 int c, pos = size % 3;
129 unsigned char b0 = 0, b1 = 0, b2 = 0, notleading = 0;
130 char *olddst = dst;
131
132 switch(pos)
133 {
134 case 1:
135 b2 = src[0];
136 break;
137 case 2:
138 b1 = src[0];
139 b2 = src[1];
140 break;
141 }
142
143 while(1)
144 {
145 c = (b0 & 0xfc) >> 2;
146 if(notleading || c != 0)
147 {
148 *dst++ = b64table[c];
149 notleading = 1;
150 }
151 c = ((b0 & 3) << 4) | ((b1 & 0xf0) >> 4);
152 if(notleading || c != 0)
153 {
154 *dst++ = b64table[c];
155 notleading = 1;
156 }
157 c = ((b1 & 0xf) << 2) | ((b2 & 0xc0) >> 6);
158 if(notleading || c != 0)
159 {
160 *dst++ = b64table[c];
161 notleading = 1;
162 }
163 c = b2 & 0x3f;
164 if(notleading || c != 0)
165 {
166 *dst++ = b64table[c];
167 notleading = 1;
168 }
169 if(pos >= size) break;
170 else
171 {
172 b0 = src[pos++];
173 b1 = src[pos++];
174 b2 = src[pos++];
175 }
176 }
177
178 *dst++ = '\0';
179 return olddst;
180 }
181
182static void SRP_user_pwd_free(SRP_user_pwd *user_pwd)
183 {
184 if (user_pwd == NULL)
185 return;
186 BN_free(user_pwd->s);
187 BN_clear_free(user_pwd->v);
188 OPENSSL_free(user_pwd->id);
189 OPENSSL_free(user_pwd->info);
190 OPENSSL_free(user_pwd);
191 }
192
193static SRP_user_pwd *SRP_user_pwd_new()
194 {
195 SRP_user_pwd *ret = OPENSSL_malloc(sizeof(SRP_user_pwd));
196 if (ret == NULL)
197 return NULL;
198 ret->N = NULL;
199 ret->g = NULL;
200 ret->s = NULL;
201 ret->v = NULL;
202 ret->id = NULL ;
203 ret->info = NULL;
204 return ret;
205 }
206
207static void SRP_user_pwd_set_gN(SRP_user_pwd *vinfo, const BIGNUM *g,
208 const BIGNUM *N)
209 {
210 vinfo->N = N;
211 vinfo->g = g;
212 }
213
214static int SRP_user_pwd_set_ids(SRP_user_pwd *vinfo, const char *id,
215 const char *info)
216 {
217 if (id != NULL && NULL == (vinfo->id = BUF_strdup(id)))
218 return 0;
219 return (info == NULL || NULL != (vinfo->info = BUF_strdup(info))) ;
220 }
221
222static int SRP_user_pwd_set_sv(SRP_user_pwd *vinfo, const char *s,
223 const char *v)
224 {
225 unsigned char tmp[MAX_LEN];
226 int len;
227
228 if (strlen(s) > MAX_LEN || strlen(v) > MAX_LEN)
229 return 0;
230 len = t_fromb64(tmp, v);
231 if (NULL == (vinfo->v = BN_bin2bn(tmp, len, NULL)) )
232 return 0;
233 len = t_fromb64(tmp, s);
234 return ((vinfo->s = BN_bin2bn(tmp, len, NULL)) != NULL) ;
235 }
236
237static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v)
238 {
239 vinfo->v = v;
240 vinfo->s = s;
241 return (vinfo->s != NULL && vinfo->v != NULL) ;
242 }
243
244SRP_VBASE *SRP_VBASE_new(char *seed_key)
245 {
246 SRP_VBASE *vb = (SRP_VBASE *) OPENSSL_malloc(sizeof(SRP_VBASE));
247
248 if (vb == NULL)
249 return NULL;
250 if (!(vb->users_pwd = sk_SRP_user_pwd_new_null()) ||
251 !(vb->gN_cache = sk_SRP_gN_cache_new_null()))
252 {
253 OPENSSL_free(vb);
254 return NULL;
255 }
256 vb->default_g = NULL;
257 vb->default_N = NULL;
258 vb->seed_key = NULL;
259 if ((seed_key != NULL) &&
260 (vb->seed_key = BUF_strdup(seed_key)) == NULL)
261 {
262 sk_SRP_user_pwd_free(vb->users_pwd);
263 sk_SRP_gN_cache_free(vb->gN_cache);
264 OPENSSL_free(vb);
265 return NULL;
266 }
267 return vb;
268 }
269
270
271int SRP_VBASE_free(SRP_VBASE *vb)
272 {
273 sk_SRP_user_pwd_pop_free(vb->users_pwd,SRP_user_pwd_free);
274 sk_SRP_gN_cache_free(vb->gN_cache);
275 OPENSSL_free(vb->seed_key);
276 OPENSSL_free(vb);
277 return 0;
278 }
279
280
281static SRP_gN_cache *SRP_gN_new_init(const char *ch)
282 {
283 unsigned char tmp[MAX_LEN];
284 int len;
285
286 SRP_gN_cache *newgN = (SRP_gN_cache *)OPENSSL_malloc(sizeof(SRP_gN_cache));
287 if (newgN == NULL)
288 return NULL;
289
290 if ((newgN->b64_bn = BUF_strdup(ch)) == NULL)
291 goto err;
292
293 len = t_fromb64(tmp, ch);
294 if ((newgN->bn = BN_bin2bn(tmp, len, NULL)))
295 return newgN;
296
297 OPENSSL_free(newgN->b64_bn);
298err:
299 OPENSSL_free(newgN);
300 return NULL;
301 }
302
303
304static void SRP_gN_free(SRP_gN_cache *gN_cache)
305 {
306 if (gN_cache == NULL)
307 return;
308 OPENSSL_free(gN_cache->b64_bn);
309 BN_free(gN_cache->bn);
310 OPENSSL_free(gN_cache);
311 }
312
313static SRP_gN *SRP_get_gN_by_id(const char *id, STACK_OF(SRP_gN) *gN_tab)
314 {
315 int i;
316
317 SRP_gN *gN;
318 if (gN_tab != NULL)
319 for(i = 0; i < sk_SRP_gN_num(gN_tab); i++)
320 {
321 gN = sk_SRP_gN_value(gN_tab, i);
322 if (gN && (id == NULL || strcmp(gN->id,id)==0))
323 return gN;
324 }
325
326 return SRP_get_default_gN(id);
327 }
328
329static BIGNUM *SRP_gN_place_bn(STACK_OF(SRP_gN_cache) *gN_cache, char *ch)
330 {
331 int i;
332 if (gN_cache == NULL)
333 return NULL;
334
335 /* search if we have already one... */
336 for(i = 0; i < sk_SRP_gN_cache_num(gN_cache); i++)
337 {
338 SRP_gN_cache *cache = sk_SRP_gN_cache_value(gN_cache, i);
339 if (strcmp(cache->b64_bn,ch)==0)
340 return cache->bn;
341 }
342 { /* it is the first time that we find it */
343 SRP_gN_cache *newgN = SRP_gN_new_init(ch);
344 if (newgN)
345 {
346 if (sk_SRP_gN_cache_insert(gN_cache,newgN,0)>0)
347 return newgN->bn;
348 SRP_gN_free(newgN);
349 }
350 }
351 return NULL;
352 }
353
354/* this function parses verifier file. Format is:
355 * string(index):base64(N):base64(g):0
356 * string(username):base64(v):base64(salt):int(index)
357 */
358
359
360int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file)
361 {
362 int error_code ;
363 STACK_OF(SRP_gN) *SRP_gN_tab = sk_SRP_gN_new_null();
364 char *last_index = NULL;
365 int i;
366 char **pp;
367
368 SRP_gN *gN = NULL;
369 SRP_user_pwd *user_pwd = NULL ;
370
371 TXT_DB *tmpdb = NULL;
372 BIO *in = BIO_new(BIO_s_file());
373
374 error_code = SRP_ERR_OPEN_FILE;
375
376 if (in == NULL || BIO_read_filename(in,verifier_file) <= 0)
377 goto err;
378
379 error_code = SRP_ERR_VBASE_INCOMPLETE_FILE;
380
381 if ((tmpdb =TXT_DB_read(in,DB_NUMBER)) == NULL)
382 goto err;
383
384 error_code = SRP_ERR_MEMORY;
385
386
387 if (vb->seed_key)
388 {
389 last_index = SRP_get_default_gN(NULL)->id;
390 }
391 for (i = 0; i < sk_OPENSSL_PSTRING_num(tmpdb->data); i++)
392 {
393 pp = (char **)sk_OPENSSL_PSTRING_value(tmpdb->data,i);
394 if (pp[DB_srptype][0] == DB_SRP_INDEX)
395 {
396 /*we add this couple in the internal Stack */
397
398 if ((gN = (SRP_gN *)OPENSSL_malloc(sizeof(SRP_gN))) == NULL)
399 goto err;
400
401 if (!(gN->id = BUF_strdup(pp[DB_srpid]))
402 || !(gN->N = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpverifier]))
403 || !(gN->g = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpsalt]))
404 || sk_SRP_gN_insert(SRP_gN_tab,gN,0) == 0)
405 goto err;
406
407 gN = NULL;
408
409 if (vb->seed_key != NULL)
410 {
411 last_index = pp[DB_srpid];
412 }
413 }
414 else if (pp[DB_srptype][0] == DB_SRP_VALID)
415 {
416 /* it is a user .... */
417 SRP_gN *lgN;
418 if ((lgN = SRP_get_gN_by_id(pp[DB_srpgN],SRP_gN_tab))!=NULL)
419 {
420 error_code = SRP_ERR_MEMORY;
421 if ((user_pwd = SRP_user_pwd_new()) == NULL)
422 goto err;
423
424 SRP_user_pwd_set_gN(user_pwd,lgN->g,lgN->N);
425 if (!SRP_user_pwd_set_ids(user_pwd, pp[DB_srpid],pp[DB_srpinfo]))
426 goto err;
427
428 error_code = SRP_ERR_VBASE_BN_LIB;
429 if (!SRP_user_pwd_set_sv(user_pwd, pp[DB_srpsalt],pp[DB_srpverifier]))
430 goto err;
431
432 if (sk_SRP_user_pwd_insert(vb->users_pwd, user_pwd, 0) == 0)
433 goto err;
434 user_pwd = NULL; /* abandon responsability */
435 }
436 }
437 }
438
439 if (last_index != NULL)
440 {
441 /* this means that we want to simulate a default user */
442
443 if (((gN = SRP_get_gN_by_id(last_index,SRP_gN_tab))==NULL))
444 {
445 error_code = SRP_ERR_VBASE_BN_LIB;
446 goto err;
447 }
448 vb->default_g = gN->g ;
449 vb->default_N = gN->N ;
450 gN = NULL ;
451 }
452 error_code = SRP_NO_ERROR;
453
454 err:
455 /* there may be still some leaks to fix, if this fails, the application terminates most likely */
456
457 if (gN != NULL)
458 {
459 OPENSSL_free(gN->id);
460 OPENSSL_free(gN);
461 }
462
463 SRP_user_pwd_free(user_pwd);
464
465 if (tmpdb) TXT_DB_free(tmpdb);
466 if (in) BIO_free_all(in);
467
468 sk_SRP_gN_free(SRP_gN_tab);
469
470 return error_code;
471
472 }
473
474
475SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username)
476 {
477 int i;
478 SRP_user_pwd *user;
479 unsigned char digv[SHA_DIGEST_LENGTH];
480 unsigned char digs[SHA_DIGEST_LENGTH];
481 EVP_MD_CTX ctxt;
482
483 if (vb == NULL)
484 return NULL;
485 for(i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++)
486 {
487 user = sk_SRP_user_pwd_value(vb->users_pwd, i);
488 if (strcmp(user->id,username)==0)
489 return user;
490 }
491 if ((vb->seed_key == NULL) ||
492 (vb->default_g == NULL) ||
493 (vb->default_N == NULL))
494 return NULL;
495
496/* if the user is unknown we set parameters as well if we have a seed_key */
497
498 if ((user = SRP_user_pwd_new()) == NULL)
499 return NULL;
500
501 SRP_user_pwd_set_gN(user,vb->default_g,vb->default_N);
502
503 if (!SRP_user_pwd_set_ids(user,username,NULL))
504 goto err;
505
506 RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH);
507 EVP_MD_CTX_init(&ctxt);
508 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
509 EVP_DigestUpdate(&ctxt, vb->seed_key, strlen(vb->seed_key));
510 EVP_DigestUpdate(&ctxt, username, strlen(username));
511 EVP_DigestFinal_ex(&ctxt, digs, NULL);
512 EVP_MD_CTX_cleanup(&ctxt);
513 if (SRP_user_pwd_set_sv_BN(user, BN_bin2bn(digs,SHA_DIGEST_LENGTH,NULL), BN_bin2bn(digv,SHA_DIGEST_LENGTH, NULL)))
514 return user;
515
516err: SRP_user_pwd_free(user);
517 return NULL;
518 }
519
520
521/*
522 create a verifier (*salt,*verifier,g and N are in base64)
523*/
524char *SRP_create_verifier(const char *user, const char *pass, char **salt,
525 char **verifier, const char *N, const char *g)
526 {
527 int len;
528 char * result=NULL;
529 char *vf;
530 BIGNUM *N_bn = NULL, *g_bn = NULL, *s = NULL, *v = NULL;
531 unsigned char tmp[MAX_LEN];
532 unsigned char tmp2[MAX_LEN];
533 char * defgNid = NULL;
534
535 if ((user == NULL)||
536 (pass == NULL)||
537 (salt == NULL)||
538 (verifier == NULL))
539 goto err;
540
541 if (N)
542 {
543 if (!(len = t_fromb64(tmp, N))) goto err;
544 N_bn = BN_bin2bn(tmp, len, NULL);
545 if (!(len = t_fromb64(tmp, g))) goto err;
546 g_bn = BN_bin2bn(tmp, len, NULL);
547 defgNid = "*";
548 }
549 else
550 {
551 SRP_gN * gN = SRP_get_gN_by_id(g, NULL) ;
552 if (gN == NULL)
553 goto err;
554 N_bn = gN->N;
555 g_bn = gN->g;
556 defgNid = gN->id;
557 }
558
559 if (*salt == NULL)
560 {
561 RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN);
562
563 s = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL);
564 }
565 else
566 {
567 if (!(len = t_fromb64(tmp2, *salt)))
568 goto err;
569 s = BN_bin2bn(tmp2, len, NULL);
570 }
571
572
573 if(!SRP_create_verifier_BN(user, pass, &s, &v, N_bn, g_bn)) goto err;
574
575 BN_bn2bin(v,tmp);
576 if (((vf = OPENSSL_malloc(BN_num_bytes(v)*2)) == NULL))
577 goto err;
578 t_tob64(vf, tmp, BN_num_bytes(v));
579
580 *verifier = vf;
581 if (*salt == NULL)
582 {
583 char *tmp_salt;
584 if ((tmp_salt = (char *)OPENSSL_malloc(SRP_RANDOM_SALT_LEN * 2)) == NULL)
585 {
586 OPENSSL_free(vf);
587 goto err;
588 }
589 t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN);
590 *salt = tmp_salt;
591 }
592
593 result=defgNid;
594
595err:
596 if(N)
597 {
598 BN_free(N_bn);
599 BN_free(g_bn);
600 }
601 return result;
602 }
603
604/*
605 create a verifier (*salt,*verifier,g and N are BIGNUMs)
606*/
607int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g)
608 {
609 int result=0;
610 BIGNUM *x = NULL;
611 BN_CTX *bn_ctx = BN_CTX_new();
612 unsigned char tmp2[MAX_LEN];
613
614 if ((user == NULL)||
615 (pass == NULL)||
616 (salt == NULL)||
617 (verifier == NULL)||
618 (N == NULL)||
619 (g == NULL)||
620 (bn_ctx == NULL))
621 goto err;
622
623 srp_bn_print(N);
624 srp_bn_print(g);
625
626 if (*salt == NULL)
627 {
628 RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN);
629
630 *salt = BN_bin2bn(tmp2,SRP_RANDOM_SALT_LEN,NULL);
631 }
632
633 x = SRP_Calc_x(*salt,user,pass);
634
635 *verifier = BN_new();
636 if(*verifier == NULL) goto err;
637
638 if (!BN_mod_exp(*verifier,g,x,N,bn_ctx))
639 {
640 BN_clear_free(*verifier);
641 goto err;
642 }
643
644 srp_bn_print(*verifier);
645
646 result=1;
647
648err:
649
650 BN_clear_free(x);
651 BN_CTX_free(bn_ctx);
652 return result;
653 }
654
655
656
657#endif
diff --git a/src/lib/libcrypto/srp/srptest.c b/src/lib/libcrypto/srp/srptest.c
new file mode 100644
index 0000000000..04b66b4544
--- /dev/null
+++ b/src/lib/libcrypto/srp/srptest.c
@@ -0,0 +1,162 @@
1#include <openssl/opensslconf.h>
2#ifdef OPENSSL_NO_SRP
3
4#include <stdio.h>
5
6int main(int argc, char *argv[])
7 {
8 printf("No SRP support\n");
9 return(0);
10 }
11
12#else
13
14#include <openssl/srp.h>
15#include <openssl/rand.h>
16#include <openssl/err.h>
17
18static void showbn(const char *name, const BIGNUM *bn)
19 {
20 fputs(name, stdout);
21 fputs(" = ", stdout);
22 BN_print_fp(stdout, bn);
23 putc('\n', stdout);
24 }
25
26#define RANDOM_SIZE 32 /* use 256 bits on each side */
27
28static int run_srp(const char *username, const char *client_pass, const char *server_pass)
29 {
30 int ret=-1;
31 BIGNUM *s = NULL;
32 BIGNUM *v = NULL;
33 BIGNUM *a = NULL;
34 BIGNUM *b = NULL;
35 BIGNUM *u = NULL;
36 BIGNUM *x = NULL;
37 BIGNUM *Apub = NULL;
38 BIGNUM *Bpub = NULL;
39 BIGNUM *Kclient = NULL;
40 BIGNUM *Kserver = NULL;
41 unsigned char rand_tmp[RANDOM_SIZE];
42 /* use builtin 1024-bit params */
43 SRP_gN *GN = SRP_get_default_gN("1024");
44
45 if(GN == NULL)
46 {
47 fprintf(stderr, "Failed to get SRP parameters\n");
48 return -1;
49 }
50 /* Set up server's password entry */
51 if(!SRP_create_verifier_BN(username, server_pass, &s, &v, GN->N, GN->g))
52 {
53 fprintf(stderr, "Failed to create SRP verifier\n");
54 return -1;
55 }
56
57 showbn("N", GN->N);
58 showbn("g", GN->g);
59 showbn("Salt", s);
60 showbn("Verifier", v);
61
62 /* Server random */
63 RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp));
64 b = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL);
65 /* TODO - check b != 0 */
66 showbn("b", b);
67
68 /* Server's first message */
69 Bpub = SRP_Calc_B(b, GN->N, GN->g, v);
70 showbn("B", Bpub);
71
72 if(!SRP_Verify_B_mod_N(Bpub, GN->N))
73 {
74 fprintf(stderr, "Invalid B\n");
75 return -1;
76 }
77
78 /* Client random */
79 RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp));
80 a = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL);
81 /* TODO - check a != 0 */
82 showbn("a", a);
83
84 /* Client's response */
85 Apub = SRP_Calc_A(a, GN->N, GN->g);
86 showbn("A", Apub);
87
88 if(!SRP_Verify_A_mod_N(Apub, GN->N))
89 {
90 fprintf(stderr, "Invalid A\n");
91 return -1;
92 }
93
94 /* Both sides calculate u */
95 u = SRP_Calc_u(Apub, Bpub, GN->N);
96
97 /* Client's key */
98 x = SRP_Calc_x(s, username, client_pass);
99 Kclient = SRP_Calc_client_key(GN->N, Bpub, GN->g, x, a, u);
100 showbn("Client's key", Kclient);
101
102 /* Server's key */
103 Kserver = SRP_Calc_server_key(Apub, v, u, b, GN->N);
104 showbn("Server's key", Kserver);
105
106 if(BN_cmp(Kclient, Kserver) == 0)
107 {
108 ret = 0;
109 }
110 else
111 {
112 fprintf(stderr, "Keys mismatch\n");
113 ret = 1;
114 }
115
116 BN_clear_free(Kclient);
117 BN_clear_free(Kserver);
118 BN_clear_free(x);
119 BN_free(u);
120 BN_free(Apub);
121 BN_clear_free(a);
122 BN_free(Bpub);
123 BN_clear_free(b);
124 BN_free(s);
125 BN_clear_free(v);
126
127 return ret;
128 }
129
130int main(int argc, char **argv)
131 {
132 BIO *bio_err;
133 bio_err = BIO_new_fp(stderr, BIO_NOCLOSE);
134
135 CRYPTO_malloc_debug_init();
136 CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL);
137 CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON);
138
139 ERR_load_crypto_strings();
140
141 /* "Negative" test, expect a mismatch */
142 if(run_srp("alice", "password1", "password2") == 0)
143 {
144 fprintf(stderr, "Mismatched SRP run failed\n");
145 return 1;
146 }
147
148 /* "Positive" test, should pass */
149 if(run_srp("alice", "password", "password") != 0)
150 {
151 fprintf(stderr, "Plain SRP run failed\n");
152 return 1;
153 }
154
155 CRYPTO_cleanup_all_ex_data();
156 ERR_remove_thread_state(NULL);
157 ERR_free_strings();
158 CRYPTO_mem_leaks(bio_err);
159
160 return 0;
161 }
162#endif
diff --git a/src/lib/libcrypto/util/copy.pl b/src/lib/libcrypto/util/copy.pl
index e20b45530a..eba6d5815e 100644
--- a/src/lib/libcrypto/util/copy.pl
+++ b/src/lib/libcrypto/util/copy.pl
@@ -8,9 +8,16 @@ use Fcntl;
8# Perl script 'copy' comment. On Windows the built in "copy" command also 8# Perl script 'copy' comment. On Windows the built in "copy" command also
9# copies timestamps: this messes up Makefile dependencies. 9# copies timestamps: this messes up Makefile dependencies.
10 10
11my $stripcr = 0;
12
11my $arg; 13my $arg;
12 14
13foreach $arg (@ARGV) { 15foreach $arg (@ARGV) {
16 if ($arg eq "-stripcr")
17 {
18 $stripcr = 1;
19 next;
20 }
14 $arg =~ s|\\|/|g; # compensate for bug/feature in cygwin glob... 21 $arg =~ s|\\|/|g; # compensate for bug/feature in cygwin glob...
15 foreach (glob $arg) 22 foreach (glob $arg)
16 { 23 {
@@ -49,6 +56,10 @@ foreach (@filelist)
49 || die "Can't Open $dfile"; 56 || die "Can't Open $dfile";
50 while (sysread IN, $buf, 10240) 57 while (sysread IN, $buf, 10240)
51 { 58 {
59 if ($stripcr)
60 {
61 $buf =~ tr/\015//d;
62 }
52 syswrite(OUT, $buf, length($buf)); 63 syswrite(OUT, $buf, length($buf));
53 } 64 }
54 close(IN); 65 close(IN);
diff --git a/src/lib/libcrypto/whrlpool/Makefile b/src/lib/libcrypto/whrlpool/Makefile
index 566b996290..f4d46e4d17 100644
--- a/src/lib/libcrypto/whrlpool/Makefile
+++ b/src/lib/libcrypto/whrlpool/Makefile
@@ -89,5 +89,8 @@ clean:
89 89
90wp_block.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h 90wp_block.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
91wp_block.o: ../../include/openssl/whrlpool.h wp_block.c wp_locl.h 91wp_block.o: ../../include/openssl/whrlpool.h wp_block.c wp_locl.h
92wp_dgst.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h 92wp_dgst.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
93wp_dgst.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
94wp_dgst.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
95wp_dgst.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
93wp_dgst.o: ../../include/openssl/whrlpool.h wp_dgst.c wp_locl.h 96wp_dgst.o: ../../include/openssl/whrlpool.h wp_dgst.c wp_locl.h
diff --git a/src/lib/libcrypto/x509v3/v3_asid.c b/src/lib/libcrypto/x509v3/v3_asid.c
index 3f434c0603..1587e8ed72 100644
--- a/src/lib/libcrypto/x509v3/v3_asid.c
+++ b/src/lib/libcrypto/x509v3/v3_asid.c
@@ -358,6 +358,20 @@ static int ASIdentifierChoice_is_canonical(ASIdentifierChoice *choice)
358 goto done; 358 goto done;
359 } 359 }
360 360
361 /*
362 * Check for inverted range.
363 */
364 i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1;
365 {
366 ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i);
367 ASN1_INTEGER *a_min, *a_max;
368 if (a != NULL && a->type == ASIdOrRange_range) {
369 extract_min_max(a, &a_min, &a_max);
370 if (ASN1_INTEGER_cmp(a_min, a_max) > 0)
371 goto done;
372 }
373 }
374
361 ret = 1; 375 ret = 1;
362 376
363 done: 377 done:
@@ -392,9 +406,18 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
392 return 1; 406 return 1;
393 407
394 /* 408 /*
395 * We have a list. Sort it. 409 * If not a list, or if empty list, it's broken.
410 */
411 if (choice->type != ASIdentifierChoice_asIdsOrRanges ||
412 sk_ASIdOrRange_num(choice->u.asIdsOrRanges) == 0) {
413 X509V3err(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE,
414 X509V3_R_EXTENSION_VALUE_ERROR);
415 return 0;
416 }
417
418 /*
419 * We have a non-empty list. Sort it.
396 */ 420 */
397 OPENSSL_assert(choice->type == ASIdentifierChoice_asIdsOrRanges);
398 sk_ASIdOrRange_sort(choice->u.asIdsOrRanges); 421 sk_ASIdOrRange_sort(choice->u.asIdsOrRanges);
399 422
400 /* 423 /*
@@ -415,6 +438,13 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
415 OPENSSL_assert(ASN1_INTEGER_cmp(a_min, b_min) <= 0); 438 OPENSSL_assert(ASN1_INTEGER_cmp(a_min, b_min) <= 0);
416 439
417 /* 440 /*
441 * Punt inverted ranges.
442 */
443 if (ASN1_INTEGER_cmp(a_min, a_max) > 0 ||
444 ASN1_INTEGER_cmp(b_min, b_max) > 0)
445 goto done;
446
447 /*
418 * Check for overlaps. 448 * Check for overlaps.
419 */ 449 */
420 if (ASN1_INTEGER_cmp(a_max, b_min) >= 0) { 450 if (ASN1_INTEGER_cmp(a_max, b_min) >= 0) {
@@ -465,12 +495,26 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
465 break; 495 break;
466 } 496 }
467 ASIdOrRange_free(b); 497 ASIdOrRange_free(b);
468 sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1); 498 (void) sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1);
469 i--; 499 i--;
470 continue; 500 continue;
471 } 501 }
472 } 502 }
473 503
504 /*
505 * Check for final inverted range.
506 */
507 i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1;
508 {
509 ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i);
510 ASN1_INTEGER *a_min, *a_max;
511 if (a != NULL && a->type == ASIdOrRange_range) {
512 extract_min_max(a, &a_min, &a_max);
513 if (ASN1_INTEGER_cmp(a_min, a_max) > 0)
514 goto done;
515 }
516 }
517
474 OPENSSL_assert(ASIdentifierChoice_is_canonical(choice)); /* Paranoia */ 518 OPENSSL_assert(ASIdentifierChoice_is_canonical(choice)); /* Paranoia */
475 519
476 ret = 1; 520 ret = 1;
@@ -498,6 +542,7 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
498 struct v3_ext_ctx *ctx, 542 struct v3_ext_ctx *ctx,
499 STACK_OF(CONF_VALUE) *values) 543 STACK_OF(CONF_VALUE) *values)
500{ 544{
545 ASN1_INTEGER *min = NULL, *max = NULL;
501 ASIdentifiers *asid = NULL; 546 ASIdentifiers *asid = NULL;
502 int i; 547 int i;
503 548
@@ -508,7 +553,6 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
508 553
509 for (i = 0; i < sk_CONF_VALUE_num(values); i++) { 554 for (i = 0; i < sk_CONF_VALUE_num(values); i++) {
510 CONF_VALUE *val = sk_CONF_VALUE_value(values, i); 555 CONF_VALUE *val = sk_CONF_VALUE_value(values, i);
511 ASN1_INTEGER *min = NULL, *max = NULL;
512 int i1, i2, i3, is_range, which; 556 int i1, i2, i3, is_range, which;
513 557
514 /* 558 /*
@@ -578,18 +622,19 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
578 max = s2i_ASN1_INTEGER(NULL, s + i2); 622 max = s2i_ASN1_INTEGER(NULL, s + i2);
579 OPENSSL_free(s); 623 OPENSSL_free(s);
580 if (min == NULL || max == NULL) { 624 if (min == NULL || max == NULL) {
581 ASN1_INTEGER_free(min);
582 ASN1_INTEGER_free(max);
583 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); 625 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE);
584 goto err; 626 goto err;
585 } 627 }
628 if (ASN1_INTEGER_cmp(min, max) > 0) {
629 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, X509V3_R_EXTENSION_VALUE_ERROR);
630 goto err;
631 }
586 } 632 }
587 if (!v3_asid_add_id_or_range(asid, which, min, max)) { 633 if (!v3_asid_add_id_or_range(asid, which, min, max)) {
588 ASN1_INTEGER_free(min);
589 ASN1_INTEGER_free(max);
590 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); 634 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE);
591 goto err; 635 goto err;
592 } 636 }
637 min = max = NULL;
593 } 638 }
594 639
595 /* 640 /*
@@ -601,6 +646,8 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
601 646
602 err: 647 err:
603 ASIdentifiers_free(asid); 648 ASIdentifiers_free(asid);
649 ASN1_INTEGER_free(min);
650 ASN1_INTEGER_free(max);
604 return NULL; 651 return NULL;
605} 652}
606 653
diff --git a/src/lib/libssl/src/apps/cms.c b/src/lib/libssl/src/apps/cms.c
index 3f5ee1b577..d754140987 100644
--- a/src/lib/libssl/src/apps/cms.c
+++ b/src/lib/libssl/src/apps/cms.c
@@ -136,6 +136,7 @@ int MAIN(int argc, char **argv)
136 char *engine=NULL; 136 char *engine=NULL;
137#endif 137#endif
138 unsigned char *secret_key = NULL, *secret_keyid = NULL; 138 unsigned char *secret_key = NULL, *secret_keyid = NULL;
139 unsigned char *pwri_pass = NULL, *pwri_tmp = NULL;
139 size_t secret_keylen = 0, secret_keyidlen = 0; 140 size_t secret_keylen = 0, secret_keyidlen = 0;
140 141
141 ASN1_OBJECT *econtent_type = NULL; 142 ASN1_OBJECT *econtent_type = NULL;
@@ -326,6 +327,13 @@ int MAIN(int argc, char **argv)
326 } 327 }
327 secret_keyidlen = (size_t)ltmp; 328 secret_keyidlen = (size_t)ltmp;
328 } 329 }
330 else if (!strcmp(*args,"-pwri_password"))
331 {
332 if (!args[1])
333 goto argerr;
334 args++;
335 pwri_pass = (unsigned char *)*args;
336 }
329 else if (!strcmp(*args,"-econtent_type")) 337 else if (!strcmp(*args,"-econtent_type"))
330 { 338 {
331 if (!args[1]) 339 if (!args[1])
@@ -559,7 +567,7 @@ int MAIN(int argc, char **argv)
559 567
560 else if (operation == SMIME_DECRYPT) 568 else if (operation == SMIME_DECRYPT)
561 { 569 {
562 if (!recipfile && !keyfile && !secret_key) 570 if (!recipfile && !keyfile && !secret_key && !pwri_pass)
563 { 571 {
564 BIO_printf(bio_err, "No recipient certificate or key specified\n"); 572 BIO_printf(bio_err, "No recipient certificate or key specified\n");
565 badarg = 1; 573 badarg = 1;
@@ -567,7 +575,7 @@ int MAIN(int argc, char **argv)
567 } 575 }
568 else if (operation == SMIME_ENCRYPT) 576 else if (operation == SMIME_ENCRYPT)
569 { 577 {
570 if (!*args && !secret_key) 578 if (!*args && !secret_key && !pwri_pass)
571 { 579 {
572 BIO_printf(bio_err, "No recipient(s) certificate(s) specified\n"); 580 BIO_printf(bio_err, "No recipient(s) certificate(s) specified\n");
573 badarg = 1; 581 badarg = 1;
@@ -618,7 +626,7 @@ int MAIN(int argc, char **argv)
618 BIO_printf (bio_err, "-certsout file certificate output file\n"); 626 BIO_printf (bio_err, "-certsout file certificate output file\n");
619 BIO_printf (bio_err, "-signer file signer certificate file\n"); 627 BIO_printf (bio_err, "-signer file signer certificate file\n");
620 BIO_printf (bio_err, "-recip file recipient certificate file for decryption\n"); 628 BIO_printf (bio_err, "-recip file recipient certificate file for decryption\n");
621 BIO_printf (bio_err, "-keyid use subject key identifier\n"); 629 BIO_printf (bio_err, "-keyid use subject key identifier\n");
622 BIO_printf (bio_err, "-in file input file\n"); 630 BIO_printf (bio_err, "-in file input file\n");
623 BIO_printf (bio_err, "-inform arg input format SMIME (default), PEM or DER\n"); 631 BIO_printf (bio_err, "-inform arg input format SMIME (default), PEM or DER\n");
624 BIO_printf (bio_err, "-inkey file input private key (if not signer or recipient)\n"); 632 BIO_printf (bio_err, "-inkey file input private key (if not signer or recipient)\n");
@@ -917,6 +925,17 @@ int MAIN(int argc, char **argv)
917 secret_key = NULL; 925 secret_key = NULL;
918 secret_keyid = NULL; 926 secret_keyid = NULL;
919 } 927 }
928 if (pwri_pass)
929 {
930 pwri_tmp = (unsigned char *)BUF_strdup((char *)pwri_pass);
931 if (!pwri_tmp)
932 goto end;
933 if (!CMS_add0_recipient_password(cms,
934 -1, NID_undef, NID_undef,
935 pwri_tmp, -1, NULL))
936 goto end;
937 pwri_tmp = NULL;
938 }
920 if (!(flags & CMS_STREAM)) 939 if (!(flags & CMS_STREAM))
921 { 940 {
922 if (!CMS_final(cms, in, NULL, flags)) 941 if (!CMS_final(cms, in, NULL, flags))
@@ -1043,6 +1062,16 @@ int MAIN(int argc, char **argv)
1043 } 1062 }
1044 } 1063 }
1045 1064
1065 if (pwri_pass)
1066 {
1067 if (!CMS_decrypt_set1_password(cms, pwri_pass, -1))
1068 {
1069 BIO_puts(bio_err,
1070 "Error decrypting CMS using password\n");
1071 goto end;
1072 }
1073 }
1074
1046 if (!CMS_decrypt(cms, NULL, NULL, indata, out, flags)) 1075 if (!CMS_decrypt(cms, NULL, NULL, indata, out, flags))
1047 { 1076 {
1048 BIO_printf(bio_err, "Error decrypting CMS structure\n"); 1077 BIO_printf(bio_err, "Error decrypting CMS structure\n");
@@ -1167,6 +1196,8 @@ end:
1167 OPENSSL_free(secret_key); 1196 OPENSSL_free(secret_key);
1168 if (secret_keyid) 1197 if (secret_keyid)
1169 OPENSSL_free(secret_keyid); 1198 OPENSSL_free(secret_keyid);
1199 if (pwri_tmp)
1200 OPENSSL_free(pwri_tmp);
1170 if (econtent_type) 1201 if (econtent_type)
1171 ASN1_OBJECT_free(econtent_type); 1202 ASN1_OBJECT_free(econtent_type);
1172 if (rr) 1203 if (rr)
diff --git a/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt b/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt
new file mode 100644
index 0000000000..ccae629247
--- /dev/null
+++ b/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt
@@ -0,0 +1,6 @@
1# This is a file that will be filled by the openssl srp routine.
2# You can initialize the file with additional groups, these are
3# records starting with a I followed by the g and N values and the id.
4# The exact values ... you have to dig this out from the source of srp.c
5# or srp_vfy.c
6# The last value of an I is used as the default group for new users.
diff --git a/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt.attr b/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt.attr
new file mode 100644
index 0000000000..8f7e63a347
--- /dev/null
+++ b/src/lib/libssl/src/apps/demoSRP/srp_verifier.txt.attr
@@ -0,0 +1 @@
unique_subject = yes
diff --git a/src/lib/libssl/src/apps/srp.c b/src/lib/libssl/src/apps/srp.c
new file mode 100644
index 0000000000..80e1b8a660
--- /dev/null
+++ b/src/lib/libssl/src/apps/srp.c
@@ -0,0 +1,756 @@
1/* apps/srp.c */
2/* Written by Peter Sylvester (peter.sylvester@edelweb.fr)
3 * for the EdelKey project and contributed to the OpenSSL project 2004.
4 */
5/* ====================================================================
6 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#include <openssl/opensslconf.h>
59
60#ifndef OPENSSL_NO_SRP
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <openssl/conf.h>
65#include <openssl/bio.h>
66#include <openssl/err.h>
67#include <openssl/txt_db.h>
68#include <openssl/buffer.h>
69#include <openssl/srp.h>
70
71#include "apps.h"
72
73#undef PROG
74#define PROG srp_main
75
76#define BASE_SECTION "srp"
77#define CONFIG_FILE "openssl.cnf"
78
79#define ENV_RANDFILE "RANDFILE"
80
81#define ENV_DATABASE "srpvfile"
82#define ENV_DEFAULT_SRP "default_srp"
83
84static char *srp_usage[]={
85"usage: srp [args] [user] \n",
86"\n",
87" -verbose Talk alot while doing things\n",
88" -config file A config file\n",
89" -name arg The particular srp definition to use\n",
90" -srpvfile arg The srp verifier file name\n",
91" -add add an user and srp verifier\n",
92" -modify modify the srp verifier of an existing user\n",
93" -delete delete user from verifier file\n",
94" -list list user\n",
95" -gn arg g and N values to be used for new verifier\n",
96" -userinfo arg additional info to be set for user\n",
97" -passin arg input file pass phrase source\n",
98" -passout arg output file pass phrase source\n",
99#ifndef OPENSSL_NO_ENGINE
100" -engine e - use engine e, possibly a hardware device.\n",
101#endif
102NULL
103};
104
105#ifdef EFENCE
106extern int EF_PROTECT_FREE;
107extern int EF_PROTECT_BELOW;
108extern int EF_ALIGNMENT;
109#endif
110
111static CONF *conf=NULL;
112static char *section=NULL;
113
114#define VERBOSE if (verbose)
115#define VVERBOSE if (verbose>1)
116
117
118int MAIN(int, char **);
119
120static int get_index(CA_DB *db, char* id, char type)
121 {
122 char ** pp;
123 int i;
124 if (id == NULL) return -1;
125 if (type == DB_SRP_INDEX)
126 for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++)
127 {
128 pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, i);
129 if (pp[DB_srptype][0] == DB_SRP_INDEX && !strcmp(id, pp[DB_srpid]))
130 return i;
131 }
132 else for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++)
133 {
134 pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, i);
135
136 if (pp[DB_srptype][0] != DB_SRP_INDEX && !strcmp(id,pp[DB_srpid]))
137 return i;
138 }
139
140 return -1 ;
141 }
142
143static void print_entry(CA_DB *db, BIO *bio, int indx, int verbose, char *s)
144 {
145 if (indx >= 0 && verbose)
146 {
147 int j;
148 char **pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, indx);
149 BIO_printf(bio, "%s \"%s\"\n", s, pp[DB_srpid]);
150 for (j = 0; j < DB_NUMBER; j++)
151 {
152 BIO_printf(bio_err," %d = \"%s\"\n", j, pp[j]);
153 }
154 }
155 }
156
157static void print_index(CA_DB *db, BIO *bio, int indexindex, int verbose)
158 {
159 print_entry(db, bio, indexindex, verbose, "g N entry") ;
160 }
161
162static void print_user(CA_DB *db, BIO *bio, int userindex, int verbose)
163 {
164 if (verbose > 0)
165 {
166 char **pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex);
167
168 if (pp[DB_srptype][0] != 'I')
169 {
170 print_entry(db, bio, userindex, verbose, "User entry");
171 print_entry(db, bio, get_index(db, pp[DB_srpgN], 'I'), verbose, "g N entry");
172 }
173
174 }
175 }
176
177static int update_index(CA_DB *db, BIO *bio, char **row)
178 {
179 char ** irow;
180 int i;
181
182 if ((irow=(char **)OPENSSL_malloc(sizeof(char *)*(DB_NUMBER+1))) == NULL)
183 {
184 BIO_printf(bio_err,"Memory allocation failure\n");
185 return 0;
186 }
187
188 for (i=0; i<DB_NUMBER; i++)
189 {
190 irow[i]=row[i];
191 row[i]=NULL;
192 }
193 irow[DB_NUMBER]=NULL;
194
195 if (!TXT_DB_insert(db->db,irow))
196 {
197 BIO_printf(bio,"failed to update srpvfile\n");
198 BIO_printf(bio,"TXT_DB error number %ld\n",db->db->error);
199 OPENSSL_free(irow);
200 return 0;
201 }
202 return 1;
203 }
204
205static void lookup_fail(const char *name, char *tag)
206 {
207 BIO_printf(bio_err,"variable lookup failed for %s::%s\n",name,tag);
208 }
209
210
211static char *srp_verify_user(const char *user, const char *srp_verifier,
212 char *srp_usersalt, const char *g, const char *N,
213 const char *passin, BIO *bio, int verbose)
214 {
215 char password[1024];
216 PW_CB_DATA cb_tmp;
217 char *verifier = NULL;
218 char *gNid = NULL;
219
220 cb_tmp.prompt_info = user;
221 cb_tmp.password = passin;
222
223 if (password_callback(password, 1024, 0, &cb_tmp) >0)
224 {
225 VERBOSE BIO_printf(bio,"Validating\n user=\"%s\"\n srp_verifier=\"%s\"\n srp_usersalt=\"%s\"\n g=\"%s\"\n N=\"%s\"\n",user,srp_verifier,srp_usersalt, g, N);
226 BIO_printf(bio, "Pass %s\n", password);
227
228 if (!(gNid=SRP_create_verifier(user, password, &srp_usersalt, &verifier, N, g)))
229 {
230 BIO_printf(bio, "Internal error validating SRP verifier\n");
231 }
232 else
233 {
234 if (strcmp(verifier, srp_verifier))
235 gNid = NULL;
236 OPENSSL_free(verifier);
237 }
238 }
239 return gNid;
240 }
241
242static char *srp_create_user(char *user, char **srp_verifier,
243 char **srp_usersalt, char *g, char *N,
244 char *passout, BIO *bio, int verbose)
245 {
246 char password[1024];
247 PW_CB_DATA cb_tmp;
248 char *gNid = NULL;
249 char *salt = NULL;
250 cb_tmp.prompt_info = user;
251 cb_tmp.password = passout;
252
253 if (password_callback(password,1024,1,&cb_tmp) >0)
254 {
255 VERBOSE BIO_printf(bio,"Creating\n user=\"%s\"\n g=\"%s\"\n N=\"%s\"\n",user,g,N);
256 if (!(gNid =SRP_create_verifier(user, password, &salt, srp_verifier, N, g)))
257 {
258 BIO_printf(bio,"Internal error creating SRP verifier\n");
259 }
260 else
261 *srp_usersalt = salt;
262 VVERBOSE BIO_printf(bio,"gNid=%s salt =\"%s\"\n verifier =\"%s\"\n", gNid,salt, *srp_verifier);
263
264 }
265 return gNid;
266 }
267
268int MAIN(int argc, char **argv)
269 {
270 int add_user = 0;
271 int list_user= 0;
272 int delete_user= 0;
273 int modify_user= 0;
274 char * user = NULL;
275
276 char *passargin = NULL, *passargout = NULL;
277 char *passin = NULL, *passout = NULL;
278 char * gN = NULL;
279 int gNindex = -1;
280 char ** gNrow = NULL;
281 int maxgN = -1;
282
283 char * userinfo = NULL;
284
285 int badops=0;
286 int ret=1;
287 int errors=0;
288 int verbose=0;
289 int doupdatedb=0;
290 char *configfile=NULL;
291 char *dbfile=NULL;
292 CA_DB *db=NULL;
293 char **pp ;
294 int i;
295 long errorline = -1;
296 char *randfile=NULL;
297#ifndef OPENSSL_NO_ENGINE
298 char *engine = NULL;
299#endif
300 char *tofree=NULL;
301 DB_ATTR db_attr;
302
303#ifdef EFENCE
304EF_PROTECT_FREE=1;
305EF_PROTECT_BELOW=1;
306EF_ALIGNMENT=0;
307#endif
308
309 apps_startup();
310
311 conf = NULL;
312 section = NULL;
313
314 if (bio_err == NULL)
315 if ((bio_err=BIO_new(BIO_s_file())) != NULL)
316 BIO_set_fp(bio_err,stderr,BIO_NOCLOSE|BIO_FP_TEXT);
317
318 argc--;
319 argv++;
320 while (argc >= 1 && badops == 0)
321 {
322 if (strcmp(*argv,"-verbose") == 0)
323 verbose++;
324 else if (strcmp(*argv,"-config") == 0)
325 {
326 if (--argc < 1) goto bad;
327 configfile= *(++argv);
328 }
329 else if (strcmp(*argv,"-name") == 0)
330 {
331 if (--argc < 1) goto bad;
332 section= *(++argv);
333 }
334 else if (strcmp(*argv,"-srpvfile") == 0)
335 {
336 if (--argc < 1) goto bad;
337 dbfile= *(++argv);
338 }
339 else if (strcmp(*argv,"-add") == 0)
340 add_user=1;
341 else if (strcmp(*argv,"-delete") == 0)
342 delete_user=1;
343 else if (strcmp(*argv,"-modify") == 0)
344 modify_user=1;
345 else if (strcmp(*argv,"-list") == 0)
346 list_user=1;
347 else if (strcmp(*argv,"-gn") == 0)
348 {
349 if (--argc < 1) goto bad;
350 gN= *(++argv);
351 }
352 else if (strcmp(*argv,"-userinfo") == 0)
353 {
354 if (--argc < 1) goto bad;
355 userinfo= *(++argv);
356 }
357 else if (strcmp(*argv,"-passin") == 0)
358 {
359 if (--argc < 1) goto bad;
360 passargin= *(++argv);
361 }
362 else if (strcmp(*argv,"-passout") == 0)
363 {
364 if (--argc < 1) goto bad;
365 passargout= *(++argv);
366 }
367#ifndef OPENSSL_NO_ENGINE
368 else if (strcmp(*argv,"-engine") == 0)
369 {
370 if (--argc < 1) goto bad;
371 engine= *(++argv);
372 }
373#endif
374
375 else if (**argv == '-')
376 {
377bad:
378 BIO_printf(bio_err,"unknown option %s\n",*argv);
379 badops=1;
380 break;
381 }
382 else
383 break;
384
385 argc--;
386 argv++;
387 }
388
389 if (dbfile && configfile)
390 {
391 BIO_printf(bio_err,"-dbfile and -configfile cannot be specified together.\n");
392 badops = 1;
393 }
394 if (add_user+delete_user+modify_user+list_user != 1)
395 {
396 BIO_printf(bio_err,"Exactly one of the options -add, -delete, -modify -list must be specified.\n");
397 badops = 1;
398 }
399 if (delete_user+modify_user+delete_user== 1 && argc <= 0)
400 {
401 BIO_printf(bio_err,"Need at least one user for options -add, -delete, -modify. \n");
402 badops = 1;
403 }
404 if ((passin || passout) && argc != 1 )
405 {
406 BIO_printf(bio_err,"-passin, -passout arguments only valid with one user.\n");
407 badops = 1;
408 }
409
410 if (badops)
411 {
412 for (pp=srp_usage; (*pp != NULL); pp++)
413 BIO_printf(bio_err,"%s",*pp);
414
415 BIO_printf(bio_err," -rand file%cfile%c...\n", LIST_SEPARATOR_CHAR, LIST_SEPARATOR_CHAR);
416 BIO_printf(bio_err," load the file (or the files in the directory) into\n");
417 BIO_printf(bio_err," the random number generator\n");
418 goto err;
419 }
420
421 ERR_load_crypto_strings();
422
423#ifndef OPENSSL_NO_ENGINE
424 setup_engine(bio_err, engine, 0);
425#endif
426
427 if(!app_passwd(bio_err, passargin, passargout, &passin, &passout))
428 {
429 BIO_printf(bio_err, "Error getting passwords\n");
430 goto err;
431 }
432
433 if (!dbfile)
434 {
435
436
437 /*****************************************************************/
438 tofree=NULL;
439 if (configfile == NULL) configfile = getenv("OPENSSL_CONF");
440 if (configfile == NULL) configfile = getenv("SSLEAY_CONF");
441 if (configfile == NULL)
442 {
443 const char *s=X509_get_default_cert_area();
444 size_t len;
445
446#ifdef OPENSSL_SYS_VMS
447 len = strlen(s)+sizeof(CONFIG_FILE);
448 tofree=OPENSSL_malloc(len);
449 strcpy(tofree,s);
450#else
451 len = strlen(s)+sizeof(CONFIG_FILE)+1;
452 tofree=OPENSSL_malloc(len);
453 BUF_strlcpy(tofree,s,len);
454 BUF_strlcat(tofree,"/",len);
455#endif
456 BUF_strlcat(tofree,CONFIG_FILE,len);
457 configfile=tofree;
458 }
459
460 VERBOSE BIO_printf(bio_err,"Using configuration from %s\n",configfile);
461 conf = NCONF_new(NULL);
462 if (NCONF_load(conf,configfile,&errorline) <= 0)
463 {
464 if (errorline <= 0)
465 BIO_printf(bio_err,"error loading the config file '%s'\n",
466 configfile);
467 else
468 BIO_printf(bio_err,"error on line %ld of config file '%s'\n"
469 ,errorline,configfile);
470 goto err;
471 }
472 if(tofree)
473 {
474 OPENSSL_free(tofree);
475 tofree = NULL;
476 }
477
478 if (!load_config(bio_err, conf))
479 goto err;
480
481 /* Lets get the config section we are using */
482 if (section == NULL)
483 {
484 VERBOSE BIO_printf(bio_err,"trying to read " ENV_DEFAULT_SRP " in \" BASE_SECTION \"\n");
485
486 section=NCONF_get_string(conf,BASE_SECTION,ENV_DEFAULT_SRP);
487 if (section == NULL)
488 {
489 lookup_fail(BASE_SECTION,ENV_DEFAULT_SRP);
490 goto err;
491 }
492 }
493
494 if (randfile == NULL && conf)
495 randfile = NCONF_get_string(conf, BASE_SECTION, "RANDFILE");
496
497
498 VERBOSE BIO_printf(bio_err,"trying to read " ENV_DATABASE " in section \"%s\"\n",section);
499
500 if ((dbfile=NCONF_get_string(conf,section,ENV_DATABASE)) == NULL)
501 {
502 lookup_fail(section,ENV_DATABASE);
503 goto err;
504 }
505
506 }
507 if (randfile == NULL)
508 ERR_clear_error();
509 else
510 app_RAND_load_file(randfile, bio_err, 0);
511
512 VERBOSE BIO_printf(bio_err,"Trying to read SRP verifier file \"%s\"\n",dbfile);
513
514 db = load_index(dbfile, &db_attr);
515 if (db == NULL) goto err;
516
517 /* Lets check some fields */
518 for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++)
519 {
520 pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, i);
521
522 if (pp[DB_srptype][0] == DB_SRP_INDEX)
523 {
524 maxgN = i;
525 if (gNindex < 0 && gN != NULL && !strcmp(gN, pp[DB_srpid]))
526 gNindex = i;
527
528 print_index(db, bio_err, i, verbose > 1);
529 }
530 }
531
532 VERBOSE BIO_printf(bio_err, "Database initialised\n");
533
534 if (gNindex >= 0)
535 {
536 gNrow = (char **)sk_OPENSSL_PSTRING_value(db->db->data, gNindex);
537 print_entry(db, bio_err, gNindex, verbose > 1, "Default g and N") ;
538 }
539 else if (maxgN > 0 && !SRP_get_default_gN(gN))
540 {
541 BIO_printf(bio_err, "No g and N value for index \"%s\"\n", gN);
542 goto err;
543 }
544 else
545 {
546 VERBOSE BIO_printf(bio_err, "Database has no g N information.\n");
547 gNrow = NULL;
548 }
549
550
551 VVERBOSE BIO_printf(bio_err,"Starting user processing\n");
552
553 if (argc > 0)
554 user = *(argv++) ;
555
556 while (list_user || user)
557 {
558 int userindex = -1;
559 if (user)
560 VVERBOSE BIO_printf(bio_err, "Processing user \"%s\"\n", user);
561 if ((userindex = get_index(db, user, 'U')) >= 0)
562 {
563 print_user(db, bio_err, userindex, (verbose > 0) || list_user);
564 }
565
566 if (list_user)
567 {
568 if (user == NULL)
569 {
570 BIO_printf(bio_err,"List all users\n");
571
572 for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++)
573 {
574 print_user(db,bio_err, i, 1);
575 }
576 list_user = 0;
577 }
578 else if (userindex < 0)
579 {
580 BIO_printf(bio_err, "user \"%s\" does not exist, ignored. t\n",
581 user);
582 errors++;
583 }
584 }
585 else if (add_user)
586 {
587 if (userindex >= 0)
588 {
589 /* reactivation of a new user */
590 char **row = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex);
591 BIO_printf(bio_err, "user \"%s\" reactivated.\n", user);
592 row[DB_srptype][0] = 'V';
593
594 doupdatedb = 1;
595 }
596 else
597 {
598 char *row[DB_NUMBER] ; char *gNid;
599 row[DB_srpverifier] = NULL;
600 row[DB_srpsalt] = NULL;
601 row[DB_srpinfo] = NULL;
602 if (!(gNid = srp_create_user(user,&(row[DB_srpverifier]), &(row[DB_srpsalt]),gNrow?gNrow[DB_srpsalt]:gN,gNrow?gNrow[DB_srpverifier]:NULL, passout, bio_err,verbose)))
603 {
604 BIO_printf(bio_err, "Cannot create srp verifier for user \"%s\", operation abandoned .\n", user);
605 errors++;
606 goto err;
607 }
608 row[DB_srpid] = BUF_strdup(user);
609 row[DB_srptype] = BUF_strdup("v");
610 row[DB_srpgN] = BUF_strdup(gNid);
611
612 if (!row[DB_srpid] || !row[DB_srpgN] || !row[DB_srptype] || !row[DB_srpverifier] || !row[DB_srpsalt] ||
613 (userinfo && (!(row[DB_srpinfo] = BUF_strdup(userinfo)))) ||
614 !update_index(db, bio_err, row))
615 {
616 if (row[DB_srpid]) OPENSSL_free(row[DB_srpid]);
617 if (row[DB_srpgN]) OPENSSL_free(row[DB_srpgN]);
618 if (row[DB_srpinfo]) OPENSSL_free(row[DB_srpinfo]);
619 if (row[DB_srptype]) OPENSSL_free(row[DB_srptype]);
620 if (row[DB_srpverifier]) OPENSSL_free(row[DB_srpverifier]);
621 if (row[DB_srpsalt]) OPENSSL_free(row[DB_srpsalt]);
622 goto err;
623 }
624 doupdatedb = 1;
625 }
626 }
627 else if (modify_user)
628 {
629 if (userindex < 0)
630 {
631 BIO_printf(bio_err,"user \"%s\" does not exist, operation ignored.\n",user);
632 errors++;
633 }
634 else
635 {
636
637 char **row = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex);
638 char type = row[DB_srptype][0];
639 if (type == 'v')
640 {
641 BIO_printf(bio_err,"user \"%s\" already updated, operation ignored.\n",user);
642 errors++;
643 }
644 else
645 {
646 char *gNid;
647
648 if (row[DB_srptype][0] == 'V')
649 {
650 int user_gN;
651 char **irow = NULL;
652 VERBOSE BIO_printf(bio_err,"Verifying password for user \"%s\"\n",user);
653 if ( (user_gN = get_index(db, row[DB_srpgN], DB_SRP_INDEX)) >= 0)
654 irow = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex);
655
656 if (!srp_verify_user(user, row[DB_srpverifier], row[DB_srpsalt], irow ? irow[DB_srpsalt] : row[DB_srpgN], irow ? irow[DB_srpverifier] : NULL, passin, bio_err, verbose))
657 {
658 BIO_printf(bio_err, "Invalid password for user \"%s\", operation abandoned.\n", user);
659 errors++;
660 goto err;
661 }
662 }
663 VERBOSE BIO_printf(bio_err,"Password for user \"%s\" ok.\n",user);
664
665 if (!(gNid=srp_create_user(user,&(row[DB_srpverifier]), &(row[DB_srpsalt]),gNrow?gNrow[DB_srpsalt]:NULL, gNrow?gNrow[DB_srpverifier]:NULL, passout, bio_err,verbose)))
666 {
667 BIO_printf(bio_err, "Cannot create srp verifier for user \"%s\", operation abandoned.\n", user);
668 errors++;
669 goto err;
670 }
671
672 row[DB_srptype][0] = 'v';
673 row[DB_srpgN] = BUF_strdup(gNid);
674
675 if (!row[DB_srpid] || !row[DB_srpgN] || !row[DB_srptype] || !row[DB_srpverifier] || !row[DB_srpsalt] ||
676 (userinfo && (!(row[DB_srpinfo] = BUF_strdup(userinfo)))))
677 goto err;
678
679 doupdatedb = 1;
680 }
681 }
682 }
683 else if (delete_user)
684 {
685 if (userindex < 0)
686 {
687 BIO_printf(bio_err, "user \"%s\" does not exist, operation ignored. t\n", user);
688 errors++;
689 }
690 else
691 {
692 char **xpp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, userindex);
693 BIO_printf(bio_err, "user \"%s\" revoked. t\n", user);
694
695 xpp[DB_srptype][0] = 'R';
696
697 doupdatedb = 1;
698 }
699 }
700 if (--argc > 0)
701 user = *(argv++) ;
702 else
703 {
704 user = NULL;
705 list_user = 0;
706 }
707 }
708
709 VERBOSE BIO_printf(bio_err,"User procession done.\n");
710
711
712 if (doupdatedb)
713 {
714 /* Lets check some fields */
715 for (i = 0; i < sk_OPENSSL_PSTRING_num(db->db->data); i++)
716 {
717 pp = (char **)sk_OPENSSL_PSTRING_value(db->db->data, i);
718
719 if (pp[DB_srptype][0] == 'v')
720 {
721 pp[DB_srptype][0] = 'V';
722 print_user(db, bio_err, i, verbose);
723 }
724 }
725
726 VERBOSE BIO_printf(bio_err, "Trying to update srpvfile.\n");
727 if (!save_index(dbfile, "new", db)) goto err;
728
729 VERBOSE BIO_printf(bio_err, "Temporary srpvfile created.\n");
730 if (!rotate_index(dbfile, "new", "old")) goto err;
731
732 VERBOSE BIO_printf(bio_err, "srpvfile updated.\n");
733 }
734
735 ret = (errors != 0);
736err:
737 if (errors != 0)
738 VERBOSE BIO_printf(bio_err,"User errors %d.\n",errors);
739
740 VERBOSE BIO_printf(bio_err,"SRP terminating with code %d.\n",ret);
741 if(tofree)
742 OPENSSL_free(tofree);
743 if (ret) ERR_print_errors(bio_err);
744 if (randfile) app_RAND_write_file(randfile, bio_err);
745 if (conf) NCONF_free(conf);
746 if (db) free_index(db);
747
748 OBJ_cleanup();
749 apps_shutdown();
750 OPENSSL_EXIT(ret);
751 }
752
753
754
755#endif
756
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-586.pl b/src/lib/libssl/src/crypto/aes/asm/aes-586.pl
index aab40e6f1c..687ed811be 100644
--- a/src/lib/libssl/src/crypto/aes/asm/aes-586.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-586.pl
@@ -39,7 +39,7 @@
39# but exhibits up to 10% improvement on other cores. 39# but exhibits up to 10% improvement on other cores.
40# 40#
41# Second version is "monolithic" replacement for aes_core.c, which in 41# Second version is "monolithic" replacement for aes_core.c, which in
42# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key. 42# addition to AES_[de|en]crypt implements private_AES_set_[de|en]cryption_key.
43# This made it possible to implement little-endian variant of the 43# This made it possible to implement little-endian variant of the
44# algorithm without modifying the base C code. Motivating factor for 44# algorithm without modifying the base C code. Motivating factor for
45# the undertaken effort was that it appeared that in tight IA-32 45# the undertaken effort was that it appeared that in tight IA-32
@@ -2854,12 +2854,12 @@ sub enckey()
2854 &set_label("exit"); 2854 &set_label("exit");
2855&function_end("_x86_AES_set_encrypt_key"); 2855&function_end("_x86_AES_set_encrypt_key");
2856 2856
2857# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 2857# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
2858# AES_KEY *key) 2858# AES_KEY *key)
2859&function_begin_B("AES_set_encrypt_key"); 2859&function_begin_B("private_AES_set_encrypt_key");
2860 &call ("_x86_AES_set_encrypt_key"); 2860 &call ("_x86_AES_set_encrypt_key");
2861 &ret (); 2861 &ret ();
2862&function_end_B("AES_set_encrypt_key"); 2862&function_end_B("private_AES_set_encrypt_key");
2863 2863
2864sub deckey() 2864sub deckey()
2865{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_; 2865{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
@@ -2916,9 +2916,9 @@ sub deckey()
2916 &mov (&DWP(4*$i,$key),$tp1); 2916 &mov (&DWP(4*$i,$key),$tp1);
2917} 2917}
2918 2918
2919# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 2919# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
2920# AES_KEY *key) 2920# AES_KEY *key)
2921&function_begin_B("AES_set_decrypt_key"); 2921&function_begin_B("private_AES_set_decrypt_key");
2922 &call ("_x86_AES_set_encrypt_key"); 2922 &call ("_x86_AES_set_encrypt_key");
2923 &cmp ("eax",0); 2923 &cmp ("eax",0);
2924 &je (&label("proceed")); 2924 &je (&label("proceed"));
@@ -2974,7 +2974,7 @@ sub deckey()
2974 &jb (&label("permute")); 2974 &jb (&label("permute"));
2975 2975
2976 &xor ("eax","eax"); # return success 2976 &xor ("eax","eax"); # return success
2977&function_end("AES_set_decrypt_key"); 2977&function_end("private_AES_set_decrypt_key");
2978&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>"); 2978&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
2979 2979
2980&asm_finish(); 2980&asm_finish();
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl b/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl
index c51ee1fbf6..86b86c4a0f 100644
--- a/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-armv4.pl
@@ -27,6 +27,11 @@
27# Rescheduling for dual-issue pipeline resulted in 12% improvement on 27# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key. 28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29 29
30# February 2011.
31#
32# Profiler-assisted and platform-specific optimization resulted in 16%
33# improvement on Cortex A8 core and ~21.5 cycles per byte.
34
30while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 35while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
31open STDOUT,">$output"; 36open STDOUT,">$output";
32 37
@@ -46,6 +51,7 @@ $key="r11";
46$rounds="r12"; 51$rounds="r12";
47 52
48$code=<<___; 53$code=<<___;
54#include "arm_arch.h"
49.text 55.text
50.code 32 56.code 32
51 57
@@ -166,7 +172,7 @@ AES_encrypt:
166 mov $rounds,r0 @ inp 172 mov $rounds,r0 @ inp
167 mov $key,r2 173 mov $key,r2
168 sub $tbl,r3,#AES_encrypt-AES_Te @ Te 174 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
169 175#if __ARM_ARCH__<7
170 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 176 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
171 ldrb $t1,[$rounds,#2] @ manner... 177 ldrb $t1,[$rounds,#2] @ manner...
172 ldrb $t2,[$rounds,#1] 178 ldrb $t2,[$rounds,#1]
@@ -195,10 +201,33 @@ AES_encrypt:
195 orr $s3,$s3,$t1,lsl#8 201 orr $s3,$s3,$t1,lsl#8
196 orr $s3,$s3,$t2,lsl#16 202 orr $s3,$s3,$t2,lsl#16
197 orr $s3,$s3,$t3,lsl#24 203 orr $s3,$s3,$t3,lsl#24
198 204#else
205 ldr $s0,[$rounds,#0]
206 ldr $s1,[$rounds,#4]
207 ldr $s2,[$rounds,#8]
208 ldr $s3,[$rounds,#12]
209#ifdef __ARMEL__
210 rev $s0,$s0
211 rev $s1,$s1
212 rev $s2,$s2
213 rev $s3,$s3
214#endif
215#endif
199 bl _armv4_AES_encrypt 216 bl _armv4_AES_encrypt
200 217
201 ldr $rounds,[sp],#4 @ pop out 218 ldr $rounds,[sp],#4 @ pop out
219#if __ARM_ARCH__>=7
220#ifdef __ARMEL__
221 rev $s0,$s0
222 rev $s1,$s1
223 rev $s2,$s2
224 rev $s3,$s3
225#endif
226 str $s0,[$rounds,#0]
227 str $s1,[$rounds,#4]
228 str $s2,[$rounds,#8]
229 str $s3,[$rounds,#12]
230#else
202 mov $t1,$s0,lsr#24 @ write output in endian-neutral 231 mov $t1,$s0,lsr#24 @ write output in endian-neutral
203 mov $t2,$s0,lsr#16 @ manner... 232 mov $t2,$s0,lsr#16 @ manner...
204 mov $t3,$s0,lsr#8 233 mov $t3,$s0,lsr#8
@@ -227,11 +256,15 @@ AES_encrypt:
227 strb $t2,[$rounds,#13] 256 strb $t2,[$rounds,#13]
228 strb $t3,[$rounds,#14] 257 strb $t3,[$rounds,#14]
229 strb $s3,[$rounds,#15] 258 strb $s3,[$rounds,#15]
230 259#endif
260#if __ARM_ARCH__>=5
261 ldmia sp!,{r4-r12,pc}
262#else
231 ldmia sp!,{r4-r12,lr} 263 ldmia sp!,{r4-r12,lr}
232 tst lr,#1 264 tst lr,#1
233 moveq pc,lr @ be binary compatible with V4, yet 265 moveq pc,lr @ be binary compatible with V4, yet
234 bx lr @ interoperable with Thumb ISA:-) 266 bx lr @ interoperable with Thumb ISA:-)
267#endif
235.size AES_encrypt,.-AES_encrypt 268.size AES_encrypt,.-AES_encrypt
236 269
237.type _armv4_AES_encrypt,%function 270.type _armv4_AES_encrypt,%function
@@ -271,11 +304,11 @@ _armv4_AES_encrypt:
271 and $i2,lr,$s2,lsr#16 @ i1 304 and $i2,lr,$s2,lsr#16 @ i1
272 eor $t3,$t3,$i3,ror#8 305 eor $t3,$t3,$i3,ror#8
273 and $i3,lr,$s2 306 and $i3,lr,$s2
274 eor $s1,$s1,$t1,ror#24
275 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] 307 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
308 eor $s1,$s1,$t1,ror#24
309 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
276 mov $s2,$s2,lsr#24 310 mov $s2,$s2,lsr#24
277 311
278 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
279 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] 312 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
280 eor $s0,$s0,$i1,ror#16 313 eor $s0,$s0,$i1,ror#16
281 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] 314 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
@@ -284,16 +317,16 @@ _armv4_AES_encrypt:
284 and $i2,lr,$s3,lsr#8 @ i1 317 and $i2,lr,$s3,lsr#8 @ i1
285 eor $t3,$t3,$i3,ror#16 318 eor $t3,$t3,$i3,ror#16
286 and $i3,lr,$s3,lsr#16 @ i2 319 and $i3,lr,$s3,lsr#16 @ i2
287 eor $s2,$s2,$t2,ror#16
288 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] 320 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
321 eor $s2,$s2,$t2,ror#16
322 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
289 mov $s3,$s3,lsr#24 323 mov $s3,$s3,lsr#24
290 324
291 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
292 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] 325 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
293 eor $s0,$s0,$i1,ror#24 326 eor $s0,$s0,$i1,ror#24
294 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
295 eor $s1,$s1,$i2,ror#16
296 ldr $i1,[$key],#16 327 ldr $i1,[$key],#16
328 eor $s1,$s1,$i2,ror#16
329 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
297 eor $s2,$s2,$i3,ror#8 330 eor $s2,$s2,$i3,ror#8
298 ldr $t1,[$key,#-12] 331 ldr $t1,[$key,#-12]
299 eor $s3,$s3,$t3,ror#8 332 eor $s3,$s3,$t3,ror#8
@@ -333,11 +366,11 @@ _armv4_AES_encrypt:
333 and $i2,lr,$s2,lsr#16 @ i1 366 and $i2,lr,$s2,lsr#16 @ i1
334 eor $t3,$i3,$t3,lsl#8 367 eor $t3,$i3,$t3,lsl#8
335 and $i3,lr,$s2 368 and $i3,lr,$s2
336 eor $s1,$t1,$s1,lsl#24
337 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] 369 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
370 eor $s1,$t1,$s1,lsl#24
371 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
338 mov $s2,$s2,lsr#24 372 mov $s2,$s2,lsr#24
339 373
340 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
341 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] 374 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
342 eor $s0,$i1,$s0,lsl#8 375 eor $s0,$i1,$s0,lsl#8
343 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] 376 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
@@ -346,15 +379,15 @@ _armv4_AES_encrypt:
346 and $i2,lr,$s3,lsr#8 @ i1 379 and $i2,lr,$s3,lsr#8 @ i1
347 eor $t3,$i3,$t3,lsl#8 380 eor $t3,$i3,$t3,lsl#8
348 and $i3,lr,$s3,lsr#16 @ i2 381 and $i3,lr,$s3,lsr#16 @ i2
349 eor $s2,$t2,$s2,lsl#24
350 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] 382 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
383 eor $s2,$t2,$s2,lsl#24
384 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
351 mov $s3,$s3,lsr#24 385 mov $s3,$s3,lsr#24
352 386
353 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
354 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] 387 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
355 eor $s0,$i1,$s0,lsl#8 388 eor $s0,$i1,$s0,lsl#8
356 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
357 ldr $i1,[$key,#0] 389 ldr $i1,[$key,#0]
390 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
358 eor $s1,$s1,$i2,lsl#8 391 eor $s1,$s1,$i2,lsl#8
359 ldr $t1,[$key,#4] 392 ldr $t1,[$key,#4]
360 eor $s2,$s2,$i3,lsl#16 393 eor $s2,$s2,$i3,lsl#16
@@ -371,10 +404,11 @@ _armv4_AES_encrypt:
371 ldr pc,[sp],#4 @ pop and return 404 ldr pc,[sp],#4 @ pop and return
372.size _armv4_AES_encrypt,.-_armv4_AES_encrypt 405.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
373 406
374.global AES_set_encrypt_key 407.global private_AES_set_encrypt_key
375.type AES_set_encrypt_key,%function 408.type private_AES_set_encrypt_key,%function
376.align 5 409.align 5
377AES_set_encrypt_key: 410private_AES_set_encrypt_key:
411_armv4_AES_set_encrypt_key:
378 sub r3,pc,#8 @ AES_set_encrypt_key 412 sub r3,pc,#8 @ AES_set_encrypt_key
379 teq r0,#0 413 teq r0,#0
380 moveq r0,#-1 414 moveq r0,#-1
@@ -392,12 +426,13 @@ AES_set_encrypt_key:
392 bne .Labrt 426 bne .Labrt
393 427
394.Lok: stmdb sp!,{r4-r12,lr} 428.Lok: stmdb sp!,{r4-r12,lr}
395 sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 429 sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
396 430
397 mov $rounds,r0 @ inp 431 mov $rounds,r0 @ inp
398 mov lr,r1 @ bits 432 mov lr,r1 @ bits
399 mov $key,r2 @ key 433 mov $key,r2 @ key
400 434
435#if __ARM_ARCH__<7
401 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 436 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
402 ldrb $t1,[$rounds,#2] @ manner... 437 ldrb $t1,[$rounds,#2] @ manner...
403 ldrb $t2,[$rounds,#1] 438 ldrb $t2,[$rounds,#1]
@@ -430,6 +465,22 @@ AES_set_encrypt_key:
430 orr $s3,$s3,$t3,lsl#24 465 orr $s3,$s3,$t3,lsl#24
431 str $s2,[$key,#-8] 466 str $s2,[$key,#-8]
432 str $s3,[$key,#-4] 467 str $s3,[$key,#-4]
468#else
469 ldr $s0,[$rounds,#0]
470 ldr $s1,[$rounds,#4]
471 ldr $s2,[$rounds,#8]
472 ldr $s3,[$rounds,#12]
473#ifdef __ARMEL__
474 rev $s0,$s0
475 rev $s1,$s1
476 rev $s2,$s2
477 rev $s3,$s3
478#endif
479 str $s0,[$key],#16
480 str $s1,[$key,#-12]
481 str $s2,[$key,#-8]
482 str $s3,[$key,#-4]
483#endif
433 484
434 teq lr,#128 485 teq lr,#128
435 bne .Lnot128 486 bne .Lnot128
@@ -466,6 +517,7 @@ AES_set_encrypt_key:
466 b .Ldone 517 b .Ldone
467 518
468.Lnot128: 519.Lnot128:
520#if __ARM_ARCH__<7
469 ldrb $i2,[$rounds,#19] 521 ldrb $i2,[$rounds,#19]
470 ldrb $t1,[$rounds,#18] 522 ldrb $t1,[$rounds,#18]
471 ldrb $t2,[$rounds,#17] 523 ldrb $t2,[$rounds,#17]
@@ -482,6 +534,16 @@ AES_set_encrypt_key:
482 str $i2,[$key],#8 534 str $i2,[$key],#8
483 orr $i3,$i3,$t3,lsl#24 535 orr $i3,$i3,$t3,lsl#24
484 str $i3,[$key,#-4] 536 str $i3,[$key,#-4]
537#else
538 ldr $i2,[$rounds,#16]
539 ldr $i3,[$rounds,#20]
540#ifdef __ARMEL__
541 rev $i2,$i2
542 rev $i3,$i3
543#endif
544 str $i2,[$key],#8
545 str $i3,[$key,#-4]
546#endif
485 547
486 teq lr,#192 548 teq lr,#192
487 bne .Lnot192 549 bne .Lnot192
@@ -526,6 +588,7 @@ AES_set_encrypt_key:
526 b .L192_loop 588 b .L192_loop
527 589
528.Lnot192: 590.Lnot192:
591#if __ARM_ARCH__<7
529 ldrb $i2,[$rounds,#27] 592 ldrb $i2,[$rounds,#27]
530 ldrb $t1,[$rounds,#26] 593 ldrb $t1,[$rounds,#26]
531 ldrb $t2,[$rounds,#25] 594 ldrb $t2,[$rounds,#25]
@@ -542,6 +605,16 @@ AES_set_encrypt_key:
542 str $i2,[$key],#8 605 str $i2,[$key],#8
543 orr $i3,$i3,$t3,lsl#24 606 orr $i3,$i3,$t3,lsl#24
544 str $i3,[$key,#-4] 607 str $i3,[$key,#-4]
608#else
609 ldr $i2,[$rounds,#24]
610 ldr $i3,[$rounds,#28]
611#ifdef __ARMEL__
612 rev $i2,$i2
613 rev $i3,$i3
614#endif
615 str $i2,[$key],#8
616 str $i3,[$key,#-4]
617#endif
545 618
546 mov $rounds,#14 619 mov $rounds,#14
547 str $rounds,[$key,#240-32] 620 str $rounds,[$key,#240-32]
@@ -606,14 +679,14 @@ AES_set_encrypt_key:
606.Labrt: tst lr,#1 679.Labrt: tst lr,#1
607 moveq pc,lr @ be binary compatible with V4, yet 680 moveq pc,lr @ be binary compatible with V4, yet
608 bx lr @ interoperable with Thumb ISA:-) 681 bx lr @ interoperable with Thumb ISA:-)
609.size AES_set_encrypt_key,.-AES_set_encrypt_key 682.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
610 683
611.global AES_set_decrypt_key 684.global private_AES_set_decrypt_key
612.type AES_set_decrypt_key,%function 685.type private_AES_set_decrypt_key,%function
613.align 5 686.align 5
614AES_set_decrypt_key: 687private_AES_set_decrypt_key:
615 str lr,[sp,#-4]! @ push lr 688 str lr,[sp,#-4]! @ push lr
616 bl AES_set_encrypt_key 689 bl _armv4_AES_set_encrypt_key
617 teq r0,#0 690 teq r0,#0
618 ldrne lr,[sp],#4 @ pop lr 691 ldrne lr,[sp],#4 @ pop lr
619 bne .Labrt 692 bne .Labrt
@@ -692,11 +765,15 @@ $code.=<<___;
692 bne .Lmix 765 bne .Lmix
693 766
694 mov r0,#0 767 mov r0,#0
768#if __ARM_ARCH__>=5
769 ldmia sp!,{r4-r12,pc}
770#else
695 ldmia sp!,{r4-r12,lr} 771 ldmia sp!,{r4-r12,lr}
696 tst lr,#1 772 tst lr,#1
697 moveq pc,lr @ be binary compatible with V4, yet 773 moveq pc,lr @ be binary compatible with V4, yet
698 bx lr @ interoperable with Thumb ISA:-) 774 bx lr @ interoperable with Thumb ISA:-)
699.size AES_set_decrypt_key,.-AES_set_decrypt_key 775#endif
776.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
700 777
701.type AES_Td,%object 778.type AES_Td,%object
702.align 5 779.align 5
@@ -811,7 +888,7 @@ AES_decrypt:
811 mov $rounds,r0 @ inp 888 mov $rounds,r0 @ inp
812 mov $key,r2 889 mov $key,r2
813 sub $tbl,r3,#AES_decrypt-AES_Td @ Td 890 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
814 891#if __ARM_ARCH__<7
815 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral 892 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
816 ldrb $t1,[$rounds,#2] @ manner... 893 ldrb $t1,[$rounds,#2] @ manner...
817 ldrb $t2,[$rounds,#1] 894 ldrb $t2,[$rounds,#1]
@@ -840,10 +917,33 @@ AES_decrypt:
840 orr $s3,$s3,$t1,lsl#8 917 orr $s3,$s3,$t1,lsl#8
841 orr $s3,$s3,$t2,lsl#16 918 orr $s3,$s3,$t2,lsl#16
842 orr $s3,$s3,$t3,lsl#24 919 orr $s3,$s3,$t3,lsl#24
843 920#else
921 ldr $s0,[$rounds,#0]
922 ldr $s1,[$rounds,#4]
923 ldr $s2,[$rounds,#8]
924 ldr $s3,[$rounds,#12]
925#ifdef __ARMEL__
926 rev $s0,$s0
927 rev $s1,$s1
928 rev $s2,$s2
929 rev $s3,$s3
930#endif
931#endif
844 bl _armv4_AES_decrypt 932 bl _armv4_AES_decrypt
845 933
846 ldr $rounds,[sp],#4 @ pop out 934 ldr $rounds,[sp],#4 @ pop out
935#if __ARM_ARCH__>=7
936#ifdef __ARMEL__
937 rev $s0,$s0
938 rev $s1,$s1
939 rev $s2,$s2
940 rev $s3,$s3
941#endif
942 str $s0,[$rounds,#0]
943 str $s1,[$rounds,#4]
944 str $s2,[$rounds,#8]
945 str $s3,[$rounds,#12]
946#else
847 mov $t1,$s0,lsr#24 @ write output in endian-neutral 947 mov $t1,$s0,lsr#24 @ write output in endian-neutral
848 mov $t2,$s0,lsr#16 @ manner... 948 mov $t2,$s0,lsr#16 @ manner...
849 mov $t3,$s0,lsr#8 949 mov $t3,$s0,lsr#8
@@ -872,11 +972,15 @@ AES_decrypt:
872 strb $t2,[$rounds,#13] 972 strb $t2,[$rounds,#13]
873 strb $t3,[$rounds,#14] 973 strb $t3,[$rounds,#14]
874 strb $s3,[$rounds,#15] 974 strb $s3,[$rounds,#15]
875 975#endif
976#if __ARM_ARCH__>=5
977 ldmia sp!,{r4-r12,pc}
978#else
876 ldmia sp!,{r4-r12,lr} 979 ldmia sp!,{r4-r12,lr}
877 tst lr,#1 980 tst lr,#1
878 moveq pc,lr @ be binary compatible with V4, yet 981 moveq pc,lr @ be binary compatible with V4, yet
879 bx lr @ interoperable with Thumb ISA:-) 982 bx lr @ interoperable with Thumb ISA:-)
983#endif
880.size AES_decrypt,.-AES_decrypt 984.size AES_decrypt,.-AES_decrypt
881 985
882.type _armv4_AES_decrypt,%function 986.type _armv4_AES_decrypt,%function
@@ -916,11 +1020,11 @@ _armv4_AES_decrypt:
916 and $i2,lr,$s2 @ i1 1020 and $i2,lr,$s2 @ i1
917 eor $t3,$i3,$t3,ror#8 1021 eor $t3,$i3,$t3,ror#8
918 and $i3,lr,$s2,lsr#16 1022 and $i3,lr,$s2,lsr#16
919 eor $s1,$s1,$t1,ror#8
920 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] 1023 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
1024 eor $s1,$s1,$t1,ror#8
1025 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
921 mov $s2,$s2,lsr#24 1026 mov $s2,$s2,lsr#24
922 1027
923 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
924 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] 1028 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
925 eor $s0,$s0,$i1,ror#16 1029 eor $s0,$s0,$i1,ror#16
926 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] 1030 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
@@ -929,22 +1033,22 @@ _armv4_AES_decrypt:
929 and $i2,lr,$s3,lsr#8 @ i1 1033 and $i2,lr,$s3,lsr#8 @ i1
930 eor $t3,$i3,$t3,ror#8 1034 eor $t3,$i3,$t3,ror#8
931 and $i3,lr,$s3 @ i2 1035 and $i3,lr,$s3 @ i2
932 eor $s2,$s2,$t2,ror#8
933 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] 1036 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
1037 eor $s2,$s2,$t2,ror#8
1038 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
934 mov $s3,$s3,lsr#24 1039 mov $s3,$s3,lsr#24
935 1040
936 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
937 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] 1041 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
938 eor $s0,$s0,$i1,ror#8 1042 eor $s0,$s0,$i1,ror#8
939 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] 1043 ldr $i1,[$key],#16
940 eor $s1,$s1,$i2,ror#16 1044 eor $s1,$s1,$i2,ror#16
1045 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
941 eor $s2,$s2,$i3,ror#24 1046 eor $s2,$s2,$i3,ror#24
942 ldr $i1,[$key],#16
943 eor $s3,$s3,$t3,ror#8
944 1047
945 ldr $t1,[$key,#-12] 1048 ldr $t1,[$key,#-12]
946 ldr $t2,[$key,#-8]
947 eor $s0,$s0,$i1 1049 eor $s0,$s0,$i1
1050 ldr $t2,[$key,#-8]
1051 eor $s3,$s3,$t3,ror#8
948 ldr $t3,[$key,#-4] 1052 ldr $t3,[$key,#-4]
949 and $i1,lr,$s0,lsr#16 1053 and $i1,lr,$s0,lsr#16
950 eor $s1,$s1,$t1 1054 eor $s1,$s1,$t1
@@ -985,11 +1089,11 @@ _armv4_AES_decrypt:
985 and $i1,lr,$s2,lsr#8 @ i0 1089 and $i1,lr,$s2,lsr#8 @ i0
986 eor $t2,$t2,$i2,lsl#8 1090 eor $t2,$t2,$i2,lsl#8
987 and $i2,lr,$s2 @ i1 1091 and $i2,lr,$s2 @ i1
988 eor $t3,$t3,$i3,lsl#8
989 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] 1092 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
1093 eor $t3,$t3,$i3,lsl#8
1094 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
990 and $i3,lr,$s2,lsr#16 1095 and $i3,lr,$s2,lsr#16
991 1096
992 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
993 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] 1097 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
994 eor $s0,$s0,$i1,lsl#8 1098 eor $s0,$s0,$i1,lsl#8
995 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] 1099 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
@@ -997,11 +1101,11 @@ _armv4_AES_decrypt:
997 and $i1,lr,$s3,lsr#16 @ i0 1101 and $i1,lr,$s3,lsr#16 @ i0
998 eor $s2,$t2,$s2,lsl#16 1102 eor $s2,$t2,$s2,lsl#16
999 and $i2,lr,$s3,lsr#8 @ i1 1103 and $i2,lr,$s3,lsr#8 @ i1
1000 eor $t3,$t3,$i3,lsl#16
1001 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] 1104 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1105 eor $t3,$t3,$i3,lsl#16
1106 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1002 and $i3,lr,$s3 @ i2 1107 and $i3,lr,$s3 @ i2
1003 1108
1004 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1005 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] 1109 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1006 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] 1110 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
1007 eor $s0,$s0,$i1,lsl#16 1111 eor $s0,$s0,$i1,lsl#16
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-mips.pl b/src/lib/libssl/src/crypto/aes/asm/aes-mips.pl
new file mode 100644
index 0000000000..2ce6deffc8
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-mips.pl
@@ -0,0 +1,1611 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for MIPS
11
12# October 2010
13#
14# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16# faster than gcc-generated code, which is not very impressive. But
17# recall that compressed S-box requires extra processing, namely
18# additional rotations. Rotations are implemented with lwl/lwr pairs,
19# which is normally used for loading unaligned data. Another cool
20# thing about this module is its endian neutrality, which means that
21# it processes data without ever changing byte order...
22
23######################################################################
24# There is a number of MIPS ABI in use, O32 and N32/64 are most
25# widely used. Then there is a new contender: NUBI. It appears that if
26# one picks the latter, it's possible to arrange code in ABI neutral
27# manner. Therefore let's stick to NUBI register layout:
28#
29($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33#
34# The return value is placed in $a0. Following coding rules facilitate
35# interoperability:
36#
37# - never ever touch $tp, "thread pointer", former $gp;
38# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39# old code];
40# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41#
42# For reference here is register layout for N32/64 MIPS ABIs:
43#
44# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49#
50$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
51
52if ($flavour =~ /64|n32/i) {
53 $PTR_ADD="dadd"; # incidentally works even on n32
54 $PTR_SUB="dsub"; # incidentally works even on n32
55 $REG_S="sd";
56 $REG_L="ld";
57 $PTR_SLL="dsll"; # incidentally works even on n32
58 $SZREG=8;
59} else {
60 $PTR_ADD="add";
61 $PTR_SUB="sub";
62 $REG_S="sw";
63 $REG_L="lw";
64 $PTR_SLL="sll";
65 $SZREG=4;
66}
67$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
68#
69# <appro@openssl.org>
70#
71######################################################################
72
73$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
74
75for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
76open STDOUT,">$output";
77
78if (!defined($big_endian))
79{ $big_endian=(unpack('L',pack('N',1))==1); }
80
81while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
82open STDOUT,">$output";
83
84my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
85
86$code.=<<___;
87.text
88#ifdef OPENSSL_FIPSCANISTER
89# include <openssl/fipssyms.h>
90#endif
91
92#if !defined(__vxworks) || defined(__pic__)
93.option pic2
94#endif
95.set noat
96___
97
98{{{
99my $FRAMESIZE=16*$SZREG;
100my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
101
102my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
103my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
104my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
105my ($key0,$cnt)=($gp,$fp);
106
107# instuction ordering is "stolen" from output from MIPSpro assembler
108# invoked with -mips3 -O3 arguments...
109$code.=<<___;
110.align 5
111.ent _mips_AES_encrypt
112_mips_AES_encrypt:
113 .frame $sp,0,$ra
114 .set reorder
115 lw $t0,0($key)
116 lw $t1,4($key)
117 lw $t2,8($key)
118 lw $t3,12($key)
119 lw $cnt,240($key)
120 $PTR_ADD $key0,$key,16
121
122 xor $s0,$t0
123 xor $s1,$t1
124 xor $s2,$t2
125 xor $s3,$t3
126
127 sub $cnt,1
128 _xtr $i0,$s1,16-2
129.Loop_enc:
130 _xtr $i1,$s2,16-2
131 _xtr $i2,$s3,16-2
132 _xtr $i3,$s0,16-2
133 and $i0,0x3fc
134 and $i1,0x3fc
135 and $i2,0x3fc
136 and $i3,0x3fc
137 $PTR_ADD $i0,$Tbl
138 $PTR_ADD $i1,$Tbl
139 $PTR_ADD $i2,$Tbl
140 $PTR_ADD $i3,$Tbl
141 lwl $t0,3($i0) # Te1[s1>>16]
142 lwl $t1,3($i1) # Te1[s2>>16]
143 lwl $t2,3($i2) # Te1[s3>>16]
144 lwl $t3,3($i3) # Te1[s0>>16]
145 lwr $t0,2($i0) # Te1[s1>>16]
146 lwr $t1,2($i1) # Te1[s2>>16]
147 lwr $t2,2($i2) # Te1[s3>>16]
148 lwr $t3,2($i3) # Te1[s0>>16]
149
150 _xtr $i0,$s2,8-2
151 _xtr $i1,$s3,8-2
152 _xtr $i2,$s0,8-2
153 _xtr $i3,$s1,8-2
154 and $i0,0x3fc
155 and $i1,0x3fc
156 and $i2,0x3fc
157 and $i3,0x3fc
158 $PTR_ADD $i0,$Tbl
159 $PTR_ADD $i1,$Tbl
160 $PTR_ADD $i2,$Tbl
161 $PTR_ADD $i3,$Tbl
162 lwl $t4,2($i0) # Te2[s2>>8]
163 lwl $t5,2($i1) # Te2[s3>>8]
164 lwl $t6,2($i2) # Te2[s0>>8]
165 lwl $t7,2($i3) # Te2[s1>>8]
166 lwr $t4,1($i0) # Te2[s2>>8]
167 lwr $t5,1($i1) # Te2[s3>>8]
168 lwr $t6,1($i2) # Te2[s0>>8]
169 lwr $t7,1($i3) # Te2[s1>>8]
170
171 _xtr $i0,$s3,0-2
172 _xtr $i1,$s0,0-2
173 _xtr $i2,$s1,0-2
174 _xtr $i3,$s2,0-2
175 and $i0,0x3fc
176 and $i1,0x3fc
177 and $i2,0x3fc
178 and $i3,0x3fc
179 $PTR_ADD $i0,$Tbl
180 $PTR_ADD $i1,$Tbl
181 $PTR_ADD $i2,$Tbl
182 $PTR_ADD $i3,$Tbl
183 lwl $t8,1($i0) # Te3[s3]
184 lwl $t9,1($i1) # Te3[s0]
185 lwl $t10,1($i2) # Te3[s1]
186 lwl $t11,1($i3) # Te3[s2]
187 lwr $t8,0($i0) # Te3[s3]
188 lwr $t9,0($i1) # Te3[s0]
189 lwr $t10,0($i2) # Te3[s1]
190 lwr $t11,0($i3) # Te3[s2]
191
192 _xtr $i0,$s0,24-2
193 _xtr $i1,$s1,24-2
194 _xtr $i2,$s2,24-2
195 _xtr $i3,$s3,24-2
196 and $i0,0x3fc
197 and $i1,0x3fc
198 and $i2,0x3fc
199 and $i3,0x3fc
200 $PTR_ADD $i0,$Tbl
201 $PTR_ADD $i1,$Tbl
202 $PTR_ADD $i2,$Tbl
203 $PTR_ADD $i3,$Tbl
204 xor $t0,$t4
205 xor $t1,$t5
206 xor $t2,$t6
207 xor $t3,$t7
208 lw $t4,0($i0) # Te0[s0>>24]
209 lw $t5,0($i1) # Te0[s1>>24]
210 lw $t6,0($i2) # Te0[s2>>24]
211 lw $t7,0($i3) # Te0[s3>>24]
212
213 lw $s0,0($key0)
214 lw $s1,4($key0)
215 lw $s2,8($key0)
216 lw $s3,12($key0)
217
218 xor $t0,$t8
219 xor $t1,$t9
220 xor $t2,$t10
221 xor $t3,$t11
222
223 xor $t0,$t4
224 xor $t1,$t5
225 xor $t2,$t6
226 xor $t3,$t7
227
228 sub $cnt,1
229 $PTR_ADD $key0,16
230 xor $s0,$t0
231 xor $s1,$t1
232 xor $s2,$t2
233 xor $s3,$t3
234 .set noreorder
235 bnez $cnt,.Loop_enc
236 _xtr $i0,$s1,16-2
237
238 .set reorder
239 _xtr $i1,$s2,16-2
240 _xtr $i2,$s3,16-2
241 _xtr $i3,$s0,16-2
242 and $i0,0x3fc
243 and $i1,0x3fc
244 and $i2,0x3fc
245 and $i3,0x3fc
246 $PTR_ADD $i0,$Tbl
247 $PTR_ADD $i1,$Tbl
248 $PTR_ADD $i2,$Tbl
249 $PTR_ADD $i3,$Tbl
250 lbu $t0,2($i0) # Te4[s1>>16]
251 lbu $t1,2($i1) # Te4[s2>>16]
252 lbu $t2,2($i2) # Te4[s3>>16]
253 lbu $t3,2($i3) # Te4[s0>>16]
254
255 _xtr $i0,$s2,8-2
256 _xtr $i1,$s3,8-2
257 _xtr $i2,$s0,8-2
258 _xtr $i3,$s1,8-2
259 and $i0,0x3fc
260 and $i1,0x3fc
261 and $i2,0x3fc
262 and $i3,0x3fc
263 $PTR_ADD $i0,$Tbl
264 $PTR_ADD $i1,$Tbl
265 $PTR_ADD $i2,$Tbl
266 $PTR_ADD $i3,$Tbl
267 lbu $t4,2($i0) # Te4[s2>>8]
268 lbu $t5,2($i1) # Te4[s3>>8]
269 lbu $t6,2($i2) # Te4[s0>>8]
270 lbu $t7,2($i3) # Te4[s1>>8]
271
272 _xtr $i0,$s0,24-2
273 _xtr $i1,$s1,24-2
274 _xtr $i2,$s2,24-2
275 _xtr $i3,$s3,24-2
276 and $i0,0x3fc
277 and $i1,0x3fc
278 and $i2,0x3fc
279 and $i3,0x3fc
280 $PTR_ADD $i0,$Tbl
281 $PTR_ADD $i1,$Tbl
282 $PTR_ADD $i2,$Tbl
283 $PTR_ADD $i3,$Tbl
284 lbu $t8,2($i0) # Te4[s0>>24]
285 lbu $t9,2($i1) # Te4[s1>>24]
286 lbu $t10,2($i2) # Te4[s2>>24]
287 lbu $t11,2($i3) # Te4[s3>>24]
288
289 _xtr $i0,$s3,0-2
290 _xtr $i1,$s0,0-2
291 _xtr $i2,$s1,0-2
292 _xtr $i3,$s2,0-2
293 and $i0,0x3fc
294 and $i1,0x3fc
295 and $i2,0x3fc
296 and $i3,0x3fc
297
298 _ins $t0,16
299 _ins $t1,16
300 _ins $t2,16
301 _ins $t3,16
302
303 _ins $t4,8
304 _ins $t5,8
305 _ins $t6,8
306 _ins $t7,8
307
308 xor $t0,$t4
309 xor $t1,$t5
310 xor $t2,$t6
311 xor $t3,$t7
312
313 $PTR_ADD $i0,$Tbl
314 $PTR_ADD $i1,$Tbl
315 $PTR_ADD $i2,$Tbl
316 $PTR_ADD $i3,$Tbl
317 lbu $t4,2($i0) # Te4[s3]
318 lbu $t5,2($i1) # Te4[s0]
319 lbu $t6,2($i2) # Te4[s1]
320 lbu $t7,2($i3) # Te4[s2]
321
322 _ins $t8,24
323 _ins $t9,24
324 _ins $t10,24
325 _ins $t11,24
326
327 lw $s0,0($key0)
328 lw $s1,4($key0)
329 lw $s2,8($key0)
330 lw $s3,12($key0)
331
332 xor $t0,$t8
333 xor $t1,$t9
334 xor $t2,$t10
335 xor $t3,$t11
336
337 _ins $t4,0
338 _ins $t5,0
339 _ins $t6,0
340 _ins $t7,0
341
342 xor $t0,$t4
343 xor $t1,$t5
344 xor $t2,$t6
345 xor $t3,$t7
346
347 xor $s0,$t0
348 xor $s1,$t1
349 xor $s2,$t2
350 xor $s3,$t3
351
352 jr $ra
353.end _mips_AES_encrypt
354
355.align 5
356.globl AES_encrypt
357.ent AES_encrypt
358AES_encrypt:
359 .frame $sp,$FRAMESIZE,$ra
360 .mask $SAVED_REGS_MASK,-$SZREG
361 .set noreorder
362___
363$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
364 .cpload $pf
365___
366$code.=<<___;
367 $PTR_SUB $sp,$FRAMESIZE
368 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
369 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
370 $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
371 $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
372 $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
373 $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
374 $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
375 $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
376 $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
377 $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
378___
379$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
380 $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
381 $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
382 $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
383 $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
384 $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
385___
386$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
387 .cplocal $Tbl
388 .cpsetup $pf,$zero,AES_encrypt
389___
390$code.=<<___;
391 .set reorder
392 la $Tbl,AES_Te # PIC-ified 'load address'
393
394 lwl $s0,0+$MSB($inp)
395 lwl $s1,4+$MSB($inp)
396 lwl $s2,8+$MSB($inp)
397 lwl $s3,12+$MSB($inp)
398 lwr $s0,0+$LSB($inp)
399 lwr $s1,4+$LSB($inp)
400 lwr $s2,8+$LSB($inp)
401 lwr $s3,12+$LSB($inp)
402
403 bal _mips_AES_encrypt
404
405 swr $s0,0+$LSB($out)
406 swr $s1,4+$LSB($out)
407 swr $s2,8+$LSB($out)
408 swr $s3,12+$LSB($out)
409 swl $s0,0+$MSB($out)
410 swl $s1,4+$MSB($out)
411 swl $s2,8+$MSB($out)
412 swl $s3,12+$MSB($out)
413
414 .set noreorder
415 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
416 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
417 $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
418 $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
419 $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
420 $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
421 $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
422 $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
423 $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
424 $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
425___
426$code.=<<___ if ($flavour =~ /nubi/i);
427 $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
428 $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
429 $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
430 $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
431 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
432___
433$code.=<<___;
434 jr $ra
435 $PTR_ADD $sp,$FRAMESIZE
436.end AES_encrypt
437___
438
439$code.=<<___;
440.align 5
441.ent _mips_AES_decrypt
442_mips_AES_decrypt:
443 .frame $sp,0,$ra
444 .set reorder
445 lw $t0,0($key)
446 lw $t1,4($key)
447 lw $t2,8($key)
448 lw $t3,12($key)
449 lw $cnt,240($key)
450 $PTR_ADD $key0,$key,16
451
452 xor $s0,$t0
453 xor $s1,$t1
454 xor $s2,$t2
455 xor $s3,$t3
456
457 sub $cnt,1
458 _xtr $i0,$s3,16-2
459.Loop_dec:
460 _xtr $i1,$s0,16-2
461 _xtr $i2,$s1,16-2
462 _xtr $i3,$s2,16-2
463 and $i0,0x3fc
464 and $i1,0x3fc
465 and $i2,0x3fc
466 and $i3,0x3fc
467 $PTR_ADD $i0,$Tbl
468 $PTR_ADD $i1,$Tbl
469 $PTR_ADD $i2,$Tbl
470 $PTR_ADD $i3,$Tbl
471 lwl $t0,3($i0) # Td1[s3>>16]
472 lwl $t1,3($i1) # Td1[s0>>16]
473 lwl $t2,3($i2) # Td1[s1>>16]
474 lwl $t3,3($i3) # Td1[s2>>16]
475 lwr $t0,2($i0) # Td1[s3>>16]
476 lwr $t1,2($i1) # Td1[s0>>16]
477 lwr $t2,2($i2) # Td1[s1>>16]
478 lwr $t3,2($i3) # Td1[s2>>16]
479
480 _xtr $i0,$s2,8-2
481 _xtr $i1,$s3,8-2
482 _xtr $i2,$s0,8-2
483 _xtr $i3,$s1,8-2
484 and $i0,0x3fc
485 and $i1,0x3fc
486 and $i2,0x3fc
487 and $i3,0x3fc
488 $PTR_ADD $i0,$Tbl
489 $PTR_ADD $i1,$Tbl
490 $PTR_ADD $i2,$Tbl
491 $PTR_ADD $i3,$Tbl
492 lwl $t4,2($i0) # Td2[s2>>8]
493 lwl $t5,2($i1) # Td2[s3>>8]
494 lwl $t6,2($i2) # Td2[s0>>8]
495 lwl $t7,2($i3) # Td2[s1>>8]
496 lwr $t4,1($i0) # Td2[s2>>8]
497 lwr $t5,1($i1) # Td2[s3>>8]
498 lwr $t6,1($i2) # Td2[s0>>8]
499 lwr $t7,1($i3) # Td2[s1>>8]
500
501 _xtr $i0,$s1,0-2
502 _xtr $i1,$s2,0-2
503 _xtr $i2,$s3,0-2
504 _xtr $i3,$s0,0-2
505 and $i0,0x3fc
506 and $i1,0x3fc
507 and $i2,0x3fc
508 and $i3,0x3fc
509 $PTR_ADD $i0,$Tbl
510 $PTR_ADD $i1,$Tbl
511 $PTR_ADD $i2,$Tbl
512 $PTR_ADD $i3,$Tbl
513 lwl $t8,1($i0) # Td3[s1]
514 lwl $t9,1($i1) # Td3[s2]
515 lwl $t10,1($i2) # Td3[s3]
516 lwl $t11,1($i3) # Td3[s0]
517 lwr $t8,0($i0) # Td3[s1]
518 lwr $t9,0($i1) # Td3[s2]
519 lwr $t10,0($i2) # Td3[s3]
520 lwr $t11,0($i3) # Td3[s0]
521
522 _xtr $i0,$s0,24-2
523 _xtr $i1,$s1,24-2
524 _xtr $i2,$s2,24-2
525 _xtr $i3,$s3,24-2
526 and $i0,0x3fc
527 and $i1,0x3fc
528 and $i2,0x3fc
529 and $i3,0x3fc
530 $PTR_ADD $i0,$Tbl
531 $PTR_ADD $i1,$Tbl
532 $PTR_ADD $i2,$Tbl
533 $PTR_ADD $i3,$Tbl
534
535 xor $t0,$t4
536 xor $t1,$t5
537 xor $t2,$t6
538 xor $t3,$t7
539
540
541 lw $t4,0($i0) # Td0[s0>>24]
542 lw $t5,0($i1) # Td0[s1>>24]
543 lw $t6,0($i2) # Td0[s2>>24]
544 lw $t7,0($i3) # Td0[s3>>24]
545
546 lw $s0,0($key0)
547 lw $s1,4($key0)
548 lw $s2,8($key0)
549 lw $s3,12($key0)
550
551 xor $t0,$t8
552 xor $t1,$t9
553 xor $t2,$t10
554 xor $t3,$t11
555
556 xor $t0,$t4
557 xor $t1,$t5
558 xor $t2,$t6
559 xor $t3,$t7
560
561 sub $cnt,1
562 $PTR_ADD $key0,16
563 xor $s0,$t0
564 xor $s1,$t1
565 xor $s2,$t2
566 xor $s3,$t3
567 .set noreorder
568 bnez $cnt,.Loop_dec
569 _xtr $i0,$s3,16-2
570
571 .set reorder
572 lw $t4,1024($Tbl) # prefetch Td4
573 lw $t5,1024+32($Tbl)
574 lw $t6,1024+64($Tbl)
575 lw $t7,1024+96($Tbl)
576 lw $t8,1024+128($Tbl)
577 lw $t9,1024+160($Tbl)
578 lw $t10,1024+192($Tbl)
579 lw $t11,1024+224($Tbl)
580
581 _xtr $i0,$s3,16
582 _xtr $i1,$s0,16
583 _xtr $i2,$s1,16
584 _xtr $i3,$s2,16
585 and $i0,0xff
586 and $i1,0xff
587 and $i2,0xff
588 and $i3,0xff
589 $PTR_ADD $i0,$Tbl
590 $PTR_ADD $i1,$Tbl
591 $PTR_ADD $i2,$Tbl
592 $PTR_ADD $i3,$Tbl
593 lbu $t0,1024($i0) # Td4[s3>>16]
594 lbu $t1,1024($i1) # Td4[s0>>16]
595 lbu $t2,1024($i2) # Td4[s1>>16]
596 lbu $t3,1024($i3) # Td4[s2>>16]
597
598 _xtr $i0,$s2,8
599 _xtr $i1,$s3,8
600 _xtr $i2,$s0,8
601 _xtr $i3,$s1,8
602 and $i0,0xff
603 and $i1,0xff
604 and $i2,0xff
605 and $i3,0xff
606 $PTR_ADD $i0,$Tbl
607 $PTR_ADD $i1,$Tbl
608 $PTR_ADD $i2,$Tbl
609 $PTR_ADD $i3,$Tbl
610 lbu $t4,1024($i0) # Td4[s2>>8]
611 lbu $t5,1024($i1) # Td4[s3>>8]
612 lbu $t6,1024($i2) # Td4[s0>>8]
613 lbu $t7,1024($i3) # Td4[s1>>8]
614
615 _xtr $i0,$s0,24
616 _xtr $i1,$s1,24
617 _xtr $i2,$s2,24
618 _xtr $i3,$s3,24
619 $PTR_ADD $i0,$Tbl
620 $PTR_ADD $i1,$Tbl
621 $PTR_ADD $i2,$Tbl
622 $PTR_ADD $i3,$Tbl
623 lbu $t8,1024($i0) # Td4[s0>>24]
624 lbu $t9,1024($i1) # Td4[s1>>24]
625 lbu $t10,1024($i2) # Td4[s2>>24]
626 lbu $t11,1024($i3) # Td4[s3>>24]
627
628 _xtr $i0,$s1,0
629 _xtr $i1,$s2,0
630 _xtr $i2,$s3,0
631 _xtr $i3,$s0,0
632
633 _ins $t0,16
634 _ins $t1,16
635 _ins $t2,16
636 _ins $t3,16
637
638 _ins $t4,8
639 _ins $t5,8
640 _ins $t6,8
641 _ins $t7,8
642
643 xor $t0,$t4
644 xor $t1,$t5
645 xor $t2,$t6
646 xor $t3,$t7
647
648 $PTR_ADD $i0,$Tbl
649 $PTR_ADD $i1,$Tbl
650 $PTR_ADD $i2,$Tbl
651 $PTR_ADD $i3,$Tbl
652 lbu $t4,1024($i0) # Td4[s1]
653 lbu $t5,1024($i1) # Td4[s2]
654 lbu $t6,1024($i2) # Td4[s3]
655 lbu $t7,1024($i3) # Td4[s0]
656
657 _ins $t8,24
658 _ins $t9,24
659 _ins $t10,24
660 _ins $t11,24
661
662 lw $s0,0($key0)
663 lw $s1,4($key0)
664 lw $s2,8($key0)
665 lw $s3,12($key0)
666
667 _ins $t4,0
668 _ins $t5,0
669 _ins $t6,0
670 _ins $t7,0
671
672
673 xor $t0,$t8
674 xor $t1,$t9
675 xor $t2,$t10
676 xor $t3,$t11
677
678 xor $t0,$t4
679 xor $t1,$t5
680 xor $t2,$t6
681 xor $t3,$t7
682
683 xor $s0,$t0
684 xor $s1,$t1
685 xor $s2,$t2
686 xor $s3,$t3
687
688 jr $ra
689.end _mips_AES_decrypt
690
691.align 5
692.globl AES_decrypt
693.ent AES_decrypt
694AES_decrypt:
695 .frame $sp,$FRAMESIZE,$ra
696 .mask $SAVED_REGS_MASK,-$SZREG
697 .set noreorder
698___
699$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
700 .cpload $pf
701___
702$code.=<<___;
703 $PTR_SUB $sp,$FRAMESIZE
704 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
705 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
706 $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
707 $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
708 $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
709 $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
710 $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
711 $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
712 $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
713 $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
714___
715$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
716 $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
717 $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
718 $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
719 $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
720 $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
721___
722$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
723 .cplocal $Tbl
724 .cpsetup $pf,$zero,AES_decrypt
725___
726$code.=<<___;
727 .set reorder
728 la $Tbl,AES_Td # PIC-ified 'load address'
729
730 lwl $s0,0+$MSB($inp)
731 lwl $s1,4+$MSB($inp)
732 lwl $s2,8+$MSB($inp)
733 lwl $s3,12+$MSB($inp)
734 lwr $s0,0+$LSB($inp)
735 lwr $s1,4+$LSB($inp)
736 lwr $s2,8+$LSB($inp)
737 lwr $s3,12+$LSB($inp)
738
739 bal _mips_AES_decrypt
740
741 swr $s0,0+$LSB($out)
742 swr $s1,4+$LSB($out)
743 swr $s2,8+$LSB($out)
744 swr $s3,12+$LSB($out)
745 swl $s0,0+$MSB($out)
746 swl $s1,4+$MSB($out)
747 swl $s2,8+$MSB($out)
748 swl $s3,12+$MSB($out)
749
750 .set noreorder
751 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
752 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
753 $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
754 $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
755 $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
756 $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
757 $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
758 $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
759 $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
760 $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
761___
762$code.=<<___ if ($flavour =~ /nubi/i);
763 $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
764 $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
765 $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
766 $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
767 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
768___
769$code.=<<___;
770 jr $ra
771 $PTR_ADD $sp,$FRAMESIZE
772.end AES_decrypt
773___
774}}}
775
776{{{
777my $FRAMESIZE=8*$SZREG;
778my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
779
780my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
781my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
782my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
783my ($rcon,$cnt)=($gp,$fp);
784
785$code.=<<___;
786.align 5
787.ent _mips_AES_set_encrypt_key
788_mips_AES_set_encrypt_key:
789 .frame $sp,0,$ra
790 .set noreorder
791 beqz $inp,.Lekey_done
792 li $t0,-1
793 beqz $key,.Lekey_done
794 $PTR_ADD $rcon,$Tbl,1024+256
795
796 .set reorder
797 lwl $rk0,0+$MSB($inp) # load 128 bits
798 lwl $rk1,4+$MSB($inp)
799 lwl $rk2,8+$MSB($inp)
800 lwl $rk3,12+$MSB($inp)
801 li $at,128
802 lwr $rk0,0+$LSB($inp)
803 lwr $rk1,4+$LSB($inp)
804 lwr $rk2,8+$LSB($inp)
805 lwr $rk3,12+$LSB($inp)
806 .set noreorder
807 beq $bits,$at,.L128bits
808 li $cnt,10
809
810 .set reorder
811 lwl $rk4,16+$MSB($inp) # load 192 bits
812 lwl $rk5,20+$MSB($inp)
813 li $at,192
814 lwr $rk4,16+$LSB($inp)
815 lwr $rk5,20+$LSB($inp)
816 .set noreorder
817 beq $bits,$at,.L192bits
818 li $cnt,8
819
820 .set reorder
821 lwl $rk6,24+$MSB($inp) # load 256 bits
822 lwl $rk7,28+$MSB($inp)
823 li $at,256
824 lwr $rk6,24+$LSB($inp)
825 lwr $rk7,28+$LSB($inp)
826 .set noreorder
827 beq $bits,$at,.L256bits
828 li $cnt,7
829
830 b .Lekey_done
831 li $t0,-2
832
833.align 4
834.L128bits:
835 .set reorder
836 srl $i0,$rk3,16
837 srl $i1,$rk3,8
838 and $i0,0xff
839 and $i1,0xff
840 and $i2,$rk3,0xff
841 srl $i3,$rk3,24
842 $PTR_ADD $i0,$Tbl
843 $PTR_ADD $i1,$Tbl
844 $PTR_ADD $i2,$Tbl
845 $PTR_ADD $i3,$Tbl
846 lbu $i0,1024($i0)
847 lbu $i1,1024($i1)
848 lbu $i2,1024($i2)
849 lbu $i3,1024($i3)
850
851 sw $rk0,0($key)
852 sw $rk1,4($key)
853 sw $rk2,8($key)
854 sw $rk3,12($key)
855 sub $cnt,1
856 $PTR_ADD $key,16
857
858 _bias $i0,24
859 _bias $i1,16
860 _bias $i2,8
861 _bias $i3,0
862
863 xor $rk0,$i0
864 lw $i0,0($rcon)
865 xor $rk0,$i1
866 xor $rk0,$i2
867 xor $rk0,$i3
868 xor $rk0,$i0
869
870 xor $rk1,$rk0
871 xor $rk2,$rk1
872 xor $rk3,$rk2
873
874 .set noreorder
875 bnez $cnt,.L128bits
876 $PTR_ADD $rcon,4
877
878 sw $rk0,0($key)
879 sw $rk1,4($key)
880 sw $rk2,8($key)
881 li $cnt,10
882 sw $rk3,12($key)
883 li $t0,0
884 sw $cnt,80($key)
885 b .Lekey_done
886 $PTR_SUB $key,10*16
887
888.align 4
889.L192bits:
890 .set reorder
891 srl $i0,$rk5,16
892 srl $i1,$rk5,8
893 and $i0,0xff
894 and $i1,0xff
895 and $i2,$rk5,0xff
896 srl $i3,$rk5,24
897 $PTR_ADD $i0,$Tbl
898 $PTR_ADD $i1,$Tbl
899 $PTR_ADD $i2,$Tbl
900 $PTR_ADD $i3,$Tbl
901 lbu $i0,1024($i0)
902 lbu $i1,1024($i1)
903 lbu $i2,1024($i2)
904 lbu $i3,1024($i3)
905
906 sw $rk0,0($key)
907 sw $rk1,4($key)
908 sw $rk2,8($key)
909 sw $rk3,12($key)
910 sw $rk4,16($key)
911 sw $rk5,20($key)
912 sub $cnt,1
913 $PTR_ADD $key,24
914
915 _bias $i0,24
916 _bias $i1,16
917 _bias $i2,8
918 _bias $i3,0
919
920 xor $rk0,$i0
921 lw $i0,0($rcon)
922 xor $rk0,$i1
923 xor $rk0,$i2
924 xor $rk0,$i3
925 xor $rk0,$i0
926
927 xor $rk1,$rk0
928 xor $rk2,$rk1
929 xor $rk3,$rk2
930 xor $rk4,$rk3
931 xor $rk5,$rk4
932
933 .set noreorder
934 bnez $cnt,.L192bits
935 $PTR_ADD $rcon,4
936
937 sw $rk0,0($key)
938 sw $rk1,4($key)
939 sw $rk2,8($key)
940 li $cnt,12
941 sw $rk3,12($key)
942 li $t0,0
943 sw $cnt,48($key)
944 b .Lekey_done
945 $PTR_SUB $key,12*16
946
947.align 4
948.L256bits:
949 .set reorder
950 srl $i0,$rk7,16
951 srl $i1,$rk7,8
952 and $i0,0xff
953 and $i1,0xff
954 and $i2,$rk7,0xff
955 srl $i3,$rk7,24
956 $PTR_ADD $i0,$Tbl
957 $PTR_ADD $i1,$Tbl
958 $PTR_ADD $i2,$Tbl
959 $PTR_ADD $i3,$Tbl
960 lbu $i0,1024($i0)
961 lbu $i1,1024($i1)
962 lbu $i2,1024($i2)
963 lbu $i3,1024($i3)
964
965 sw $rk0,0($key)
966 sw $rk1,4($key)
967 sw $rk2,8($key)
968 sw $rk3,12($key)
969 sw $rk4,16($key)
970 sw $rk5,20($key)
971 sw $rk6,24($key)
972 sw $rk7,28($key)
973 sub $cnt,1
974
975 _bias $i0,24
976 _bias $i1,16
977 _bias $i2,8
978 _bias $i3,0
979
980 xor $rk0,$i0
981 lw $i0,0($rcon)
982 xor $rk0,$i1
983 xor $rk0,$i2
984 xor $rk0,$i3
985 xor $rk0,$i0
986
987 xor $rk1,$rk0
988 xor $rk2,$rk1
989 xor $rk3,$rk2
990 beqz $cnt,.L256bits_done
991
992 srl $i0,$rk3,24
993 srl $i1,$rk3,16
994 srl $i2,$rk3,8
995 and $i3,$rk3,0xff
996 and $i1,0xff
997 and $i2,0xff
998 $PTR_ADD $i0,$Tbl
999 $PTR_ADD $i1,$Tbl
1000 $PTR_ADD $i2,$Tbl
1001 $PTR_ADD $i3,$Tbl
1002 lbu $i0,1024($i0)
1003 lbu $i1,1024($i1)
1004 lbu $i2,1024($i2)
1005 lbu $i3,1024($i3)
1006 sll $i0,24
1007 sll $i1,16
1008 sll $i2,8
1009
1010 xor $rk4,$i0
1011 xor $rk4,$i1
1012 xor $rk4,$i2
1013 xor $rk4,$i3
1014
1015 xor $rk5,$rk4
1016 xor $rk6,$rk5
1017 xor $rk7,$rk6
1018
1019 $PTR_ADD $key,32
1020 .set noreorder
1021 b .L256bits
1022 $PTR_ADD $rcon,4
1023
1024.L256bits_done:
1025 sw $rk0,32($key)
1026 sw $rk1,36($key)
1027 sw $rk2,40($key)
1028 li $cnt,14
1029 sw $rk3,44($key)
1030 li $t0,0
1031 sw $cnt,48($key)
1032 $PTR_SUB $key,12*16
1033
1034.Lekey_done:
1035 jr $ra
1036 nop
1037.end _mips_AES_set_encrypt_key
1038
1039.globl AES_set_encrypt_key
1040.ent AES_set_encrypt_key
1041AES_set_encrypt_key:
1042 .frame $sp,$FRAMESIZE,$ra
1043 .mask $SAVED_REGS_MASK,-$SZREG
1044 .set noreorder
1045___
1046$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
1047 .cpload $pf
1048___
1049$code.=<<___;
1050 $PTR_SUB $sp,$FRAMESIZE
1051 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
1052 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
1053___
1054$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1055 $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
1056 $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
1057 $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
1058 $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
1059 $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
1060___
1061$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1062 .cplocal $Tbl
1063 .cpsetup $pf,$zero,AES_set_encrypt_key
1064___
1065$code.=<<___;
1066 .set reorder
1067 la $Tbl,AES_Te # PIC-ified 'load address'
1068
1069 bal _mips_AES_set_encrypt_key
1070
1071 .set noreorder
1072 move $a0,$t0
1073 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
1074 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
1075___
1076$code.=<<___ if ($flavour =~ /nubi/i);
1077 $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
1078 $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
1079 $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
1080 $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
1081 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
1082___
1083$code.=<<___;
1084 jr $ra
1085 $PTR_ADD $sp,$FRAMESIZE
1086.end AES_set_encrypt_key
1087___
1088
1089my ($head,$tail)=($inp,$bits);
1090my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1091my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1092$code.=<<___;
1093.align 5
1094.globl AES_set_decrypt_key
1095.ent AES_set_decrypt_key
1096AES_set_decrypt_key:
1097 .frame $sp,$FRAMESIZE,$ra
1098 .mask $SAVED_REGS_MASK,-$SZREG
1099 .set noreorder
1100___
1101$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
1102 .cpload $pf
1103___
1104$code.=<<___;
1105 $PTR_SUB $sp,$FRAMESIZE
1106 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
1107 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
1108___
1109$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1110 $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
1111 $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
1112 $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
1113 $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
1114 $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
1115___
1116$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1117 .cplocal $Tbl
1118 .cpsetup $pf,$zero,AES_set_decrypt_key
1119___
1120$code.=<<___;
1121 .set reorder
1122 la $Tbl,AES_Te # PIC-ified 'load address'
1123
1124 bal _mips_AES_set_encrypt_key
1125
1126 bltz $t0,.Ldkey_done
1127
1128 sll $at,$cnt,4
1129 $PTR_ADD $head,$key,0
1130 $PTR_ADD $tail,$key,$at
1131.align 4
1132.Lswap:
1133 lw $rk0,0($head)
1134 lw $rk1,4($head)
1135 lw $rk2,8($head)
1136 lw $rk3,12($head)
1137 lw $rk4,0($tail)
1138 lw $rk5,4($tail)
1139 lw $rk6,8($tail)
1140 lw $rk7,12($tail)
1141 sw $rk0,0($tail)
1142 sw $rk1,4($tail)
1143 sw $rk2,8($tail)
1144 sw $rk3,12($tail)
1145 $PTR_ADD $head,16
1146 $PTR_SUB $tail,16
1147 sw $rk4,-16($head)
1148 sw $rk5,-12($head)
1149 sw $rk6,-8($head)
1150 sw $rk7,-4($head)
1151 bne $head,$tail,.Lswap
1152
1153 lw $tp1,16($key) # modulo-scheduled
1154 lui $x80808080,0x8080
1155 sub $cnt,1
1156 or $x80808080,0x8080
1157 sll $cnt,2
1158 $PTR_ADD $key,16
1159 lui $x1b1b1b1b,0x1b1b
1160 nor $x7f7f7f7f,$zero,$x80808080
1161 or $x1b1b1b1b,0x1b1b
1162.align 4
1163.Lmix:
1164 and $m,$tp1,$x80808080
1165 and $tp2,$tp1,$x7f7f7f7f
1166 srl $tp4,$m,7
1167 addu $tp2,$tp2 # tp2<<1
1168 subu $m,$tp4
1169 and $m,$x1b1b1b1b
1170 xor $tp2,$m
1171
1172 and $m,$tp2,$x80808080
1173 and $tp4,$tp2,$x7f7f7f7f
1174 srl $tp8,$m,7
1175 addu $tp4,$tp4 # tp4<<1
1176 subu $m,$tp8
1177 and $m,$x1b1b1b1b
1178 xor $tp4,$m
1179
1180 and $m,$tp4,$x80808080
1181 and $tp8,$tp4,$x7f7f7f7f
1182 srl $tp9,$m,7
1183 addu $tp8,$tp8 # tp8<<1
1184 subu $m,$tp9
1185 and $m,$x1b1b1b1b
1186 xor $tp8,$m
1187
1188 xor $tp9,$tp8,$tp1
1189 xor $tpe,$tp8,$tp4
1190 xor $tpb,$tp9,$tp2
1191 xor $tpd,$tp9,$tp4
1192
1193 _ror $tp1,$tpd,16
1194 xor $tpe,$tp2
1195 _ror $tp2,$tpd,-16
1196 xor $tpe,$tp1
1197 _ror $tp1,$tp9,8
1198 xor $tpe,$tp2
1199 _ror $tp2,$tp9,-24
1200 xor $tpe,$tp1
1201 _ror $tp1,$tpb,24
1202 xor $tpe,$tp2
1203 _ror $tp2,$tpb,-8
1204 xor $tpe,$tp1
1205 lw $tp1,4($key) # modulo-scheduled
1206 xor $tpe,$tp2
1207 sub $cnt,1
1208 sw $tpe,0($key)
1209 $PTR_ADD $key,4
1210 bnez $cnt,.Lmix
1211
1212 li $t0,0
1213.Ldkey_done:
1214 .set noreorder
1215 move $a0,$t0
1216 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
1217 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
1218___
1219$code.=<<___ if ($flavour =~ /nubi/i);
1220 $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
1221 $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
1222 $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
1223 $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
1224 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
1225___
1226$code.=<<___;
1227 jr $ra
1228 $PTR_ADD $sp,$FRAMESIZE
1229.end AES_set_decrypt_key
1230___
1231}}}
1232
1233######################################################################
1234# Tables are kept in endian-neutral manner
1235$code.=<<___;
1236.rdata
1237.align 6
1238AES_Te:
1239.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
1240.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
1241.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
1242.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
1243.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
1244.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
1245.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
1246.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
1247.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
1248.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
1249.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
1250.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
1251.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
1252.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
1253.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
1254.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
1255.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
1256.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
1257.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
1258.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
1259.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
1260.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
1261.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
1262.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
1263.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
1264.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
1265.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
1266.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
1267.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
1268.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
1269.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
1270.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
1271.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
1272.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
1273.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
1274.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
1275.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
1276.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
1277.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
1278.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
1279.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
1280.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
1281.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
1282.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
1283.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
1284.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
1285.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
1286.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
1287.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
1288.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
1289.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
1290.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
1291.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
1292.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
1293.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
1294.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
1295.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
1296.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
1297.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
1298.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
1299.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
1300.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
1301.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
1302.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
1303.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
1304.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
1305.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
1306.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
1307.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
1308.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
1309.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
1310.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
1311.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
1312.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
1313.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
1314.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
1315.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
1316.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
1317.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
1318.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
1319.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
1320.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
1321.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
1322.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
1323.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
1324.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
1325.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
1326.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
1327.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
1328.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
1329.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
1330.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
1331.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
1332.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
1333.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
1334.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
1335.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
1336.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
1337.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
1338.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
1339.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
1340.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
1341.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
1342.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
1343.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
1344.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
1345.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
1346.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
1347.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
1348.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
1349.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
1350.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
1351.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
1352.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
1353.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
1354.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
1355.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
1356.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
1357.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
1358.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
1359.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
1360.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
1361.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
1362.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
1363.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
1364.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
1365.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
1366.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
1367
1368.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
1369.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1370.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1371.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1372.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1373.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1374.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1375.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1376.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1377.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1378.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1379.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1380.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1381.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1382.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1383.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1384.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1385.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1386.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1387.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1388.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1389.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1390.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1391.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1392.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1393.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1394.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1395.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1396.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1397.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1398.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1399.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1400
1401.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
1402.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
1403.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
1404.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
1405.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
1406
1407.align 6
1408AES_Td:
1409.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
1410.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
1411.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
1412.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
1413.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
1414.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
1415.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
1416.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
1417.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
1418.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
1419.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
1420.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
1421.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
1422.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
1423.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
1424.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
1425.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
1426.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
1427.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
1428.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
1429.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
1430.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
1431.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
1432.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
1433.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
1434.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
1435.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
1436.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
1437.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
1438.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
1439.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
1440.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
1441.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
1442.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
1443.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
1444.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
1445.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
1446.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
1447.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
1448.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
1449.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
1450.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
1451.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
1452.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
1453.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
1454.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
1455.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
1456.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
1457.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
1458.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
1459.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
1460.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
1461.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
1462.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
1463.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
1464.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
1465.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
1466.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
1467.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
1468.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
1469.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
1470.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
1471.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
1472.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
1473.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
1474.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
1475.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
1476.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
1477.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
1478.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
1479.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
1480.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
1481.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
1482.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
1483.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
1484.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
1485.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
1486.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
1487.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
1488.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
1489.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
1490.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
1491.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
1492.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
1493.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
1494.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
1495.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
1496.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
1497.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
1498.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
1499.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
1500.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
1501.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
1502.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
1503.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
1504.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
1505.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
1506.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
1507.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
1508.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
1509.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
1510.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
1511.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
1512.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
1513.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
1514.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
1515.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
1516.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
1517.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
1518.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
1519.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
1520.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
1521.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
1522.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
1523.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
1524.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
1525.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
1526.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
1527.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
1528.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
1529.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
1530.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
1531.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
1532.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
1533.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
1534.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
1535.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
1536.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
1537
1538.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
1539.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1540.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1541.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1542.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1543.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1544.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1545.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1546.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1547.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1548.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1549.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1550.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1551.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1552.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1553.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1554.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1555.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1556.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1557.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1558.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1559.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1560.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1561.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1562.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1563.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1564.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1565.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1566.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1567.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1568.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1569.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1570___
1571
1572foreach (split("\n",$code)) {
1573 s/\`([^\`]*)\`/eval $1/ge;
1574
1575 # made-up _instructions, _xtr, _ins, _ror and _bias, cope
1576 # with byte order dependencies...
1577 if (/^\s+_/) {
1578 s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1579
1580 s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1581 sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
1582 : eval("24-$3"))/e or
1583 s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1584 sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
1585 : eval("24-$3"))/e or
1586 s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1587 sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
1588 : eval("$3*-1"))/e or
1589 s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1590 sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
1591 : eval("($3-16)&31"))/e;
1592
1593 s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1594 sprintf("sll\t$1,$2,$3")/e or
1595 s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1596 sprintf("and\t$1,$2,0xff")/e or
1597 s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1598 }
1599
1600 # convert lwl/lwr and swr/swl to little-endian order
1601 if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1602 s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1603 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
1604 s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1605 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1606 }
1607
1608 print $_,"\n";
1609}
1610
1611close STDOUT;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-parisc.pl b/src/lib/libssl/src/crypto/aes/asm/aes-parisc.pl
new file mode 100644
index 0000000000..c36b6a2270
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-parisc.pl
@@ -0,0 +1,1021 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for PA-RISC.
11#
12# June 2009.
13#
14# The module is mechanical transliteration of aes-sparcv9.pl, but with
15# a twist: S-boxes are compressed even further down to 1K+256B. On
16# PA-7100LC performance is ~40% better than gcc 3.2 generated code and
17# is about 33 cycles per byte processed with 128-bit key. Newer CPUs
18# perform at 16 cycles per byte. It's not faster than code generated
19# by vendor compiler, but recall that it has compressed S-boxes, which
20# requires extra processing.
21#
22# Special thanks to polarhome.com for providing HP-UX account.
23
24$flavour = shift;
25$output = shift;
26open STDOUT,">$output";
27
28if ($flavour =~ /64/) {
29 $LEVEL ="2.0W";
30 $SIZE_T =8;
31 $FRAME_MARKER =80;
32 $SAVED_RP =16;
33 $PUSH ="std";
34 $PUSHMA ="std,ma";
35 $POP ="ldd";
36 $POPMB ="ldd,mb";
37} else {
38 $LEVEL ="1.0";
39 $SIZE_T =4;
40 $FRAME_MARKER =48;
41 $SAVED_RP =20;
42 $PUSH ="stw";
43 $PUSHMA ="stwm";
44 $POP ="ldw";
45 $POPMB ="ldwm";
46}
47
48$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
49 # [+ argument transfer]
50$inp="%r26"; # arg0
51$out="%r25"; # arg1
52$key="%r24"; # arg2
53
54($s0,$s1,$s2,$s3) = ("%r1","%r2","%r3","%r4");
55($t0,$t1,$t2,$t3) = ("%r5","%r6","%r7","%r8");
56
57($acc0, $acc1, $acc2, $acc3, $acc4, $acc5, $acc6, $acc7,
58 $acc8, $acc9,$acc10,$acc11,$acc12,$acc13,$acc14,$acc15) =
59("%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16",
60"%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r26");
61
62$tbl="%r28";
63$rounds="%r29";
64
65$code=<<___;
66 .LEVEL $LEVEL
67 .SPACE \$TEXT\$
68 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
69
70 .EXPORT AES_encrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
71 .ALIGN 64
72AES_encrypt
73 .PROC
74 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
75 .ENTRY
76 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
77 $PUSHMA %r3,$FRAME(%sp)
78 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
79 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
80 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
81 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
82 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
83 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
84 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
85 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
86 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
87 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
88 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
89 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
90 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
91 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
92 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
93
94 blr %r0,$tbl
95 ldi 3,$t0
96L\$enc_pic
97 andcm $tbl,$t0,$tbl
98 ldo L\$AES_Te-L\$enc_pic($tbl),$tbl
99
100 and $inp,$t0,$t0
101 sub $inp,$t0,$inp
102 ldw 0($inp),$s0
103 ldw 4($inp),$s1
104 ldw 8($inp),$s2
105 comib,= 0,$t0,L\$enc_inp_aligned
106 ldw 12($inp),$s3
107
108 sh3addl $t0,%r0,$t0
109 subi 32,$t0,$t0
110 mtctl $t0,%cr11
111 ldw 16($inp),$t1
112 vshd $s0,$s1,$s0
113 vshd $s1,$s2,$s1
114 vshd $s2,$s3,$s2
115 vshd $s3,$t1,$s3
116
117L\$enc_inp_aligned
118 bl _parisc_AES_encrypt,%r31
119 nop
120
121 extru,<> $out,31,2,%r0
122 b L\$enc_out_aligned
123 nop
124
125 _srm $s0,24,$acc0
126 _srm $s0,16,$acc1
127 stb $acc0,0($out)
128 _srm $s0,8,$acc2
129 stb $acc1,1($out)
130 _srm $s1,24,$acc4
131 stb $acc2,2($out)
132 _srm $s1,16,$acc5
133 stb $s0,3($out)
134 _srm $s1,8,$acc6
135 stb $acc4,4($out)
136 _srm $s2,24,$acc0
137 stb $acc5,5($out)
138 _srm $s2,16,$acc1
139 stb $acc6,6($out)
140 _srm $s2,8,$acc2
141 stb $s1,7($out)
142 _srm $s3,24,$acc4
143 stb $acc0,8($out)
144 _srm $s3,16,$acc5
145 stb $acc1,9($out)
146 _srm $s3,8,$acc6
147 stb $acc2,10($out)
148 stb $s2,11($out)
149 stb $acc4,12($out)
150 stb $acc5,13($out)
151 stb $acc6,14($out)
152 b L\$enc_done
153 stb $s3,15($out)
154
155L\$enc_out_aligned
156 stw $s0,0($out)
157 stw $s1,4($out)
158 stw $s2,8($out)
159 stw $s3,12($out)
160
161L\$enc_done
162 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
163 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
164 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
165 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
166 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
167 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
168 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
169 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
170 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
171 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
172 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
173 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
174 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
175 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
176 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
177 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
178 bv (%r2)
179 .EXIT
180 $POPMB -$FRAME(%sp),%r3
181 .PROCEND
182
183 .ALIGN 16
184_parisc_AES_encrypt
185 .PROC
186 .CALLINFO MILLICODE
187 .ENTRY
188 ldw 240($key),$rounds
189 ldw 0($key),$t0
190 ldw 4($key),$t1
191 ldw 8($key),$t2
192 _srm $rounds,1,$rounds
193 xor $t0,$s0,$s0
194 ldw 12($key),$t3
195 _srm $s0,24,$acc0
196 xor $t1,$s1,$s1
197 ldw 16($key),$t0
198 _srm $s1,16,$acc1
199 xor $t2,$s2,$s2
200 ldw 20($key),$t1
201 xor $t3,$s3,$s3
202 ldw 24($key),$t2
203 ldw 28($key),$t3
204L\$enc_loop
205 _srm $s2,8,$acc2
206 ldwx,s $acc0($tbl),$acc0
207 _srm $s3,0,$acc3
208 ldwx,s $acc1($tbl),$acc1
209 _srm $s1,24,$acc4
210 ldwx,s $acc2($tbl),$acc2
211 _srm $s2,16,$acc5
212 ldwx,s $acc3($tbl),$acc3
213 _srm $s3,8,$acc6
214 ldwx,s $acc4($tbl),$acc4
215 _srm $s0,0,$acc7
216 ldwx,s $acc5($tbl),$acc5
217 _srm $s2,24,$acc8
218 ldwx,s $acc6($tbl),$acc6
219 _srm $s3,16,$acc9
220 ldwx,s $acc7($tbl),$acc7
221 _srm $s0,8,$acc10
222 ldwx,s $acc8($tbl),$acc8
223 _srm $s1,0,$acc11
224 ldwx,s $acc9($tbl),$acc9
225 _srm $s3,24,$acc12
226 ldwx,s $acc10($tbl),$acc10
227 _srm $s0,16,$acc13
228 ldwx,s $acc11($tbl),$acc11
229 _srm $s1,8,$acc14
230 ldwx,s $acc12($tbl),$acc12
231 _srm $s2,0,$acc15
232 ldwx,s $acc13($tbl),$acc13
233 ldwx,s $acc14($tbl),$acc14
234 ldwx,s $acc15($tbl),$acc15
235 addib,= -1,$rounds,L\$enc_last
236 ldo 32($key),$key
237
238 _ror $acc1,8,$acc1
239 xor $acc0,$t0,$t0
240 ldw 0($key),$s0
241 _ror $acc2,16,$acc2
242 xor $acc1,$t0,$t0
243 ldw 4($key),$s1
244 _ror $acc3,24,$acc3
245 xor $acc2,$t0,$t0
246 ldw 8($key),$s2
247 _ror $acc5,8,$acc5
248 xor $acc3,$t0,$t0
249 ldw 12($key),$s3
250 _ror $acc6,16,$acc6
251 xor $acc4,$t1,$t1
252 _ror $acc7,24,$acc7
253 xor $acc5,$t1,$t1
254 _ror $acc9,8,$acc9
255 xor $acc6,$t1,$t1
256 _ror $acc10,16,$acc10
257 xor $acc7,$t1,$t1
258 _ror $acc11,24,$acc11
259 xor $acc8,$t2,$t2
260 _ror $acc13,8,$acc13
261 xor $acc9,$t2,$t2
262 _ror $acc14,16,$acc14
263 xor $acc10,$t2,$t2
264 _ror $acc15,24,$acc15
265 xor $acc11,$t2,$t2
266 xor $acc12,$acc14,$acc14
267 xor $acc13,$t3,$t3
268 _srm $t0,24,$acc0
269 xor $acc14,$t3,$t3
270 _srm $t1,16,$acc1
271 xor $acc15,$t3,$t3
272
273 _srm $t2,8,$acc2
274 ldwx,s $acc0($tbl),$acc0
275 _srm $t3,0,$acc3
276 ldwx,s $acc1($tbl),$acc1
277 _srm $t1,24,$acc4
278 ldwx,s $acc2($tbl),$acc2
279 _srm $t2,16,$acc5
280 ldwx,s $acc3($tbl),$acc3
281 _srm $t3,8,$acc6
282 ldwx,s $acc4($tbl),$acc4
283 _srm $t0,0,$acc7
284 ldwx,s $acc5($tbl),$acc5
285 _srm $t2,24,$acc8
286 ldwx,s $acc6($tbl),$acc6
287 _srm $t3,16,$acc9
288 ldwx,s $acc7($tbl),$acc7
289 _srm $t0,8,$acc10
290 ldwx,s $acc8($tbl),$acc8
291 _srm $t1,0,$acc11
292 ldwx,s $acc9($tbl),$acc9
293 _srm $t3,24,$acc12
294 ldwx,s $acc10($tbl),$acc10
295 _srm $t0,16,$acc13
296 ldwx,s $acc11($tbl),$acc11
297 _srm $t1,8,$acc14
298 ldwx,s $acc12($tbl),$acc12
299 _srm $t2,0,$acc15
300 ldwx,s $acc13($tbl),$acc13
301 _ror $acc1,8,$acc1
302 ldwx,s $acc14($tbl),$acc14
303
304 _ror $acc2,16,$acc2
305 xor $acc0,$s0,$s0
306 ldwx,s $acc15($tbl),$acc15
307 _ror $acc3,24,$acc3
308 xor $acc1,$s0,$s0
309 ldw 16($key),$t0
310 _ror $acc5,8,$acc5
311 xor $acc2,$s0,$s0
312 ldw 20($key),$t1
313 _ror $acc6,16,$acc6
314 xor $acc3,$s0,$s0
315 ldw 24($key),$t2
316 _ror $acc7,24,$acc7
317 xor $acc4,$s1,$s1
318 ldw 28($key),$t3
319 _ror $acc9,8,$acc9
320 xor $acc5,$s1,$s1
321 ldw 1024+0($tbl),%r0 ; prefetch te4
322 _ror $acc10,16,$acc10
323 xor $acc6,$s1,$s1
324 ldw 1024+32($tbl),%r0 ; prefetch te4
325 _ror $acc11,24,$acc11
326 xor $acc7,$s1,$s1
327 ldw 1024+64($tbl),%r0 ; prefetch te4
328 _ror $acc13,8,$acc13
329 xor $acc8,$s2,$s2
330 ldw 1024+96($tbl),%r0 ; prefetch te4
331 _ror $acc14,16,$acc14
332 xor $acc9,$s2,$s2
333 ldw 1024+128($tbl),%r0 ; prefetch te4
334 _ror $acc15,24,$acc15
335 xor $acc10,$s2,$s2
336 ldw 1024+160($tbl),%r0 ; prefetch te4
337 _srm $s0,24,$acc0
338 xor $acc11,$s2,$s2
339 ldw 1024+192($tbl),%r0 ; prefetch te4
340 xor $acc12,$acc14,$acc14
341 xor $acc13,$s3,$s3
342 ldw 1024+224($tbl),%r0 ; prefetch te4
343 _srm $s1,16,$acc1
344 xor $acc14,$s3,$s3
345 b L\$enc_loop
346 xor $acc15,$s3,$s3
347
348 .ALIGN 16
349L\$enc_last
350 ldo 1024($tbl),$rounds
351 _ror $acc1,8,$acc1
352 xor $acc0,$t0,$t0
353 ldw 0($key),$s0
354 _ror $acc2,16,$acc2
355 xor $acc1,$t0,$t0
356 ldw 4($key),$s1
357 _ror $acc3,24,$acc3
358 xor $acc2,$t0,$t0
359 ldw 8($key),$s2
360 _ror $acc5,8,$acc5
361 xor $acc3,$t0,$t0
362 ldw 12($key),$s3
363 _ror $acc6,16,$acc6
364 xor $acc4,$t1,$t1
365 _ror $acc7,24,$acc7
366 xor $acc5,$t1,$t1
367 _ror $acc9,8,$acc9
368 xor $acc6,$t1,$t1
369 _ror $acc10,16,$acc10
370 xor $acc7,$t1,$t1
371 _ror $acc11,24,$acc11
372 xor $acc8,$t2,$t2
373 _ror $acc13,8,$acc13
374 xor $acc9,$t2,$t2
375 _ror $acc14,16,$acc14
376 xor $acc10,$t2,$t2
377 _ror $acc15,24,$acc15
378 xor $acc11,$t2,$t2
379 xor $acc12,$acc14,$acc14
380 xor $acc13,$t3,$t3
381 _srm $t0,24,$acc0
382 xor $acc14,$t3,$t3
383 _srm $t1,16,$acc1
384 xor $acc15,$t3,$t3
385
386 _srm $t2,8,$acc2
387 ldbx $acc0($rounds),$acc0
388 _srm $t1,24,$acc4
389 ldbx $acc1($rounds),$acc1
390 _srm $t2,16,$acc5
391 _srm $t3,0,$acc3
392 ldbx $acc2($rounds),$acc2
393 ldbx $acc3($rounds),$acc3
394 _srm $t3,8,$acc6
395 ldbx $acc4($rounds),$acc4
396 _srm $t2,24,$acc8
397 ldbx $acc5($rounds),$acc5
398 _srm $t3,16,$acc9
399 _srm $t0,0,$acc7
400 ldbx $acc6($rounds),$acc6
401 ldbx $acc7($rounds),$acc7
402 _srm $t0,8,$acc10
403 ldbx $acc8($rounds),$acc8
404 _srm $t3,24,$acc12
405 ldbx $acc9($rounds),$acc9
406 _srm $t0,16,$acc13
407 _srm $t1,0,$acc11
408 ldbx $acc10($rounds),$acc10
409 _srm $t1,8,$acc14
410 ldbx $acc11($rounds),$acc11
411 ldbx $acc12($rounds),$acc12
412 ldbx $acc13($rounds),$acc13
413 _srm $t2,0,$acc15
414 ldbx $acc14($rounds),$acc14
415
416 dep $acc0,7,8,$acc3
417 ldbx $acc15($rounds),$acc15
418 dep $acc4,7,8,$acc7
419 dep $acc1,15,8,$acc3
420 dep $acc5,15,8,$acc7
421 dep $acc2,23,8,$acc3
422 dep $acc6,23,8,$acc7
423 xor $acc3,$s0,$s0
424 xor $acc7,$s1,$s1
425 dep $acc8,7,8,$acc11
426 dep $acc12,7,8,$acc15
427 dep $acc9,15,8,$acc11
428 dep $acc13,15,8,$acc15
429 dep $acc10,23,8,$acc11
430 dep $acc14,23,8,$acc15
431 xor $acc11,$s2,$s2
432
433 bv (%r31)
434 .EXIT
435 xor $acc15,$s3,$s3
436 .PROCEND
437
438 .ALIGN 64
439L\$AES_Te
440 .WORD 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
441 .WORD 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
442 .WORD 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
443 .WORD 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
444 .WORD 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
445 .WORD 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
446 .WORD 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
447 .WORD 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
448 .WORD 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
449 .WORD 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
450 .WORD 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
451 .WORD 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
452 .WORD 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
453 .WORD 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
454 .WORD 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
455 .WORD 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
456 .WORD 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
457 .WORD 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
458 .WORD 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
459 .WORD 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
460 .WORD 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
461 .WORD 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
462 .WORD 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
463 .WORD 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
464 .WORD 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
465 .WORD 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
466 .WORD 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
467 .WORD 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
468 .WORD 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
469 .WORD 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
470 .WORD 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
471 .WORD 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
472 .WORD 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
473 .WORD 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
474 .WORD 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
475 .WORD 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
476 .WORD 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
477 .WORD 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
478 .WORD 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
479 .WORD 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
480 .WORD 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
481 .WORD 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
482 .WORD 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
483 .WORD 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
484 .WORD 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
485 .WORD 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
486 .WORD 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
487 .WORD 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
488 .WORD 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
489 .WORD 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
490 .WORD 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
491 .WORD 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
492 .WORD 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
493 .WORD 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
494 .WORD 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
495 .WORD 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
496 .WORD 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
497 .WORD 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
498 .WORD 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
499 .WORD 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
500 .WORD 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
501 .WORD 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
502 .WORD 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
503 .WORD 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
504 .BYTE 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
505 .BYTE 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
506 .BYTE 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
507 .BYTE 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
508 .BYTE 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
509 .BYTE 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
510 .BYTE 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
511 .BYTE 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
512 .BYTE 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
513 .BYTE 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
514 .BYTE 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
515 .BYTE 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
516 .BYTE 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
517 .BYTE 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
518 .BYTE 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
519 .BYTE 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
520 .BYTE 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
521 .BYTE 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
522 .BYTE 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
523 .BYTE 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
524 .BYTE 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
525 .BYTE 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
526 .BYTE 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
527 .BYTE 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
528 .BYTE 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
529 .BYTE 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
530 .BYTE 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
531 .BYTE 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
532 .BYTE 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
533 .BYTE 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
534 .BYTE 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
535 .BYTE 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
536___
537
538$code.=<<___;
539 .EXPORT AES_decrypt,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
540 .ALIGN 16
541AES_decrypt
542 .PROC
543 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
544 .ENTRY
545 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
546 $PUSHMA %r3,$FRAME(%sp)
547 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
548 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
549 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
550 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
551 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
552 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
553 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
554 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
555 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
556 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
557 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
558 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
559 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
560 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
561 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
562
563 blr %r0,$tbl
564 ldi 3,$t0
565L\$dec_pic
566 andcm $tbl,$t0,$tbl
567 ldo L\$AES_Td-L\$dec_pic($tbl),$tbl
568
569 and $inp,$t0,$t0
570 sub $inp,$t0,$inp
571 ldw 0($inp),$s0
572 ldw 4($inp),$s1
573 ldw 8($inp),$s2
574 comib,= 0,$t0,L\$dec_inp_aligned
575 ldw 12($inp),$s3
576
577 sh3addl $t0,%r0,$t0
578 subi 32,$t0,$t0
579 mtctl $t0,%cr11
580 ldw 16($inp),$t1
581 vshd $s0,$s1,$s0
582 vshd $s1,$s2,$s1
583 vshd $s2,$s3,$s2
584 vshd $s3,$t1,$s3
585
586L\$dec_inp_aligned
587 bl _parisc_AES_decrypt,%r31
588 nop
589
590 extru,<> $out,31,2,%r0
591 b L\$dec_out_aligned
592 nop
593
594 _srm $s0,24,$acc0
595 _srm $s0,16,$acc1
596 stb $acc0,0($out)
597 _srm $s0,8,$acc2
598 stb $acc1,1($out)
599 _srm $s1,24,$acc4
600 stb $acc2,2($out)
601 _srm $s1,16,$acc5
602 stb $s0,3($out)
603 _srm $s1,8,$acc6
604 stb $acc4,4($out)
605 _srm $s2,24,$acc0
606 stb $acc5,5($out)
607 _srm $s2,16,$acc1
608 stb $acc6,6($out)
609 _srm $s2,8,$acc2
610 stb $s1,7($out)
611 _srm $s3,24,$acc4
612 stb $acc0,8($out)
613 _srm $s3,16,$acc5
614 stb $acc1,9($out)
615 _srm $s3,8,$acc6
616 stb $acc2,10($out)
617 stb $s2,11($out)
618 stb $acc4,12($out)
619 stb $acc5,13($out)
620 stb $acc6,14($out)
621 b L\$dec_done
622 stb $s3,15($out)
623
624L\$dec_out_aligned
625 stw $s0,0($out)
626 stw $s1,4($out)
627 stw $s2,8($out)
628 stw $s3,12($out)
629
630L\$dec_done
631 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
632 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
633 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
634 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
635 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
636 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
637 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
638 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
639 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
640 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
641 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
642 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
643 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
644 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
645 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
646 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
647 bv (%r2)
648 .EXIT
649 $POPMB -$FRAME(%sp),%r3
650 .PROCEND
651
652 .ALIGN 16
653_parisc_AES_decrypt
654 .PROC
655 .CALLINFO MILLICODE
656 .ENTRY
657 ldw 240($key),$rounds
658 ldw 0($key),$t0
659 ldw 4($key),$t1
660 ldw 8($key),$t2
661 ldw 12($key),$t3
662 _srm $rounds,1,$rounds
663 xor $t0,$s0,$s0
664 ldw 16($key),$t0
665 xor $t1,$s1,$s1
666 ldw 20($key),$t1
667 _srm $s0,24,$acc0
668 xor $t2,$s2,$s2
669 ldw 24($key),$t2
670 xor $t3,$s3,$s3
671 ldw 28($key),$t3
672 _srm $s3,16,$acc1
673L\$dec_loop
674 _srm $s2,8,$acc2
675 ldwx,s $acc0($tbl),$acc0
676 _srm $s1,0,$acc3
677 ldwx,s $acc1($tbl),$acc1
678 _srm $s1,24,$acc4
679 ldwx,s $acc2($tbl),$acc2
680 _srm $s0,16,$acc5
681 ldwx,s $acc3($tbl),$acc3
682 _srm $s3,8,$acc6
683 ldwx,s $acc4($tbl),$acc4
684 _srm $s2,0,$acc7
685 ldwx,s $acc5($tbl),$acc5
686 _srm $s2,24,$acc8
687 ldwx,s $acc6($tbl),$acc6
688 _srm $s1,16,$acc9
689 ldwx,s $acc7($tbl),$acc7
690 _srm $s0,8,$acc10
691 ldwx,s $acc8($tbl),$acc8
692 _srm $s3,0,$acc11
693 ldwx,s $acc9($tbl),$acc9
694 _srm $s3,24,$acc12
695 ldwx,s $acc10($tbl),$acc10
696 _srm $s2,16,$acc13
697 ldwx,s $acc11($tbl),$acc11
698 _srm $s1,8,$acc14
699 ldwx,s $acc12($tbl),$acc12
700 _srm $s0,0,$acc15
701 ldwx,s $acc13($tbl),$acc13
702 ldwx,s $acc14($tbl),$acc14
703 ldwx,s $acc15($tbl),$acc15
704 addib,= -1,$rounds,L\$dec_last
705 ldo 32($key),$key
706
707 _ror $acc1,8,$acc1
708 xor $acc0,$t0,$t0
709 ldw 0($key),$s0
710 _ror $acc2,16,$acc2
711 xor $acc1,$t0,$t0
712 ldw 4($key),$s1
713 _ror $acc3,24,$acc3
714 xor $acc2,$t0,$t0
715 ldw 8($key),$s2
716 _ror $acc5,8,$acc5
717 xor $acc3,$t0,$t0
718 ldw 12($key),$s3
719 _ror $acc6,16,$acc6
720 xor $acc4,$t1,$t1
721 _ror $acc7,24,$acc7
722 xor $acc5,$t1,$t1
723 _ror $acc9,8,$acc9
724 xor $acc6,$t1,$t1
725 _ror $acc10,16,$acc10
726 xor $acc7,$t1,$t1
727 _ror $acc11,24,$acc11
728 xor $acc8,$t2,$t2
729 _ror $acc13,8,$acc13
730 xor $acc9,$t2,$t2
731 _ror $acc14,16,$acc14
732 xor $acc10,$t2,$t2
733 _ror $acc15,24,$acc15
734 xor $acc11,$t2,$t2
735 xor $acc12,$acc14,$acc14
736 xor $acc13,$t3,$t3
737 _srm $t0,24,$acc0
738 xor $acc14,$t3,$t3
739 xor $acc15,$t3,$t3
740 _srm $t3,16,$acc1
741
742 _srm $t2,8,$acc2
743 ldwx,s $acc0($tbl),$acc0
744 _srm $t1,0,$acc3
745 ldwx,s $acc1($tbl),$acc1
746 _srm $t1,24,$acc4
747 ldwx,s $acc2($tbl),$acc2
748 _srm $t0,16,$acc5
749 ldwx,s $acc3($tbl),$acc3
750 _srm $t3,8,$acc6
751 ldwx,s $acc4($tbl),$acc4
752 _srm $t2,0,$acc7
753 ldwx,s $acc5($tbl),$acc5
754 _srm $t2,24,$acc8
755 ldwx,s $acc6($tbl),$acc6
756 _srm $t1,16,$acc9
757 ldwx,s $acc7($tbl),$acc7
758 _srm $t0,8,$acc10
759 ldwx,s $acc8($tbl),$acc8
760 _srm $t3,0,$acc11
761 ldwx,s $acc9($tbl),$acc9
762 _srm $t3,24,$acc12
763 ldwx,s $acc10($tbl),$acc10
764 _srm $t2,16,$acc13
765 ldwx,s $acc11($tbl),$acc11
766 _srm $t1,8,$acc14
767 ldwx,s $acc12($tbl),$acc12
768 _srm $t0,0,$acc15
769 ldwx,s $acc13($tbl),$acc13
770 _ror $acc1,8,$acc1
771 ldwx,s $acc14($tbl),$acc14
772
773 _ror $acc2,16,$acc2
774 xor $acc0,$s0,$s0
775 ldwx,s $acc15($tbl),$acc15
776 _ror $acc3,24,$acc3
777 xor $acc1,$s0,$s0
778 ldw 16($key),$t0
779 _ror $acc5,8,$acc5
780 xor $acc2,$s0,$s0
781 ldw 20($key),$t1
782 _ror $acc6,16,$acc6
783 xor $acc3,$s0,$s0
784 ldw 24($key),$t2
785 _ror $acc7,24,$acc7
786 xor $acc4,$s1,$s1
787 ldw 28($key),$t3
788 _ror $acc9,8,$acc9
789 xor $acc5,$s1,$s1
790 ldw 1024+0($tbl),%r0 ; prefetch td4
791 _ror $acc10,16,$acc10
792 xor $acc6,$s1,$s1
793 ldw 1024+32($tbl),%r0 ; prefetch td4
794 _ror $acc11,24,$acc11
795 xor $acc7,$s1,$s1
796 ldw 1024+64($tbl),%r0 ; prefetch td4
797 _ror $acc13,8,$acc13
798 xor $acc8,$s2,$s2
799 ldw 1024+96($tbl),%r0 ; prefetch td4
800 _ror $acc14,16,$acc14
801 xor $acc9,$s2,$s2
802 ldw 1024+128($tbl),%r0 ; prefetch td4
803 _ror $acc15,24,$acc15
804 xor $acc10,$s2,$s2
805 ldw 1024+160($tbl),%r0 ; prefetch td4
806 _srm $s0,24,$acc0
807 xor $acc11,$s2,$s2
808 ldw 1024+192($tbl),%r0 ; prefetch td4
809 xor $acc12,$acc14,$acc14
810 xor $acc13,$s3,$s3
811 ldw 1024+224($tbl),%r0 ; prefetch td4
812 xor $acc14,$s3,$s3
813 xor $acc15,$s3,$s3
814 b L\$dec_loop
815 _srm $s3,16,$acc1
816
817 .ALIGN 16
818L\$dec_last
819 ldo 1024($tbl),$rounds
820 _ror $acc1,8,$acc1
821 xor $acc0,$t0,$t0
822 ldw 0($key),$s0
823 _ror $acc2,16,$acc2
824 xor $acc1,$t0,$t0
825 ldw 4($key),$s1
826 _ror $acc3,24,$acc3
827 xor $acc2,$t0,$t0
828 ldw 8($key),$s2
829 _ror $acc5,8,$acc5
830 xor $acc3,$t0,$t0
831 ldw 12($key),$s3
832 _ror $acc6,16,$acc6
833 xor $acc4,$t1,$t1
834 _ror $acc7,24,$acc7
835 xor $acc5,$t1,$t1
836 _ror $acc9,8,$acc9
837 xor $acc6,$t1,$t1
838 _ror $acc10,16,$acc10
839 xor $acc7,$t1,$t1
840 _ror $acc11,24,$acc11
841 xor $acc8,$t2,$t2
842 _ror $acc13,8,$acc13
843 xor $acc9,$t2,$t2
844 _ror $acc14,16,$acc14
845 xor $acc10,$t2,$t2
846 _ror $acc15,24,$acc15
847 xor $acc11,$t2,$t2
848 xor $acc12,$acc14,$acc14
849 xor $acc13,$t3,$t3
850 _srm $t0,24,$acc0
851 xor $acc14,$t3,$t3
852 xor $acc15,$t3,$t3
853 _srm $t3,16,$acc1
854
855 _srm $t2,8,$acc2
856 ldbx $acc0($rounds),$acc0
857 _srm $t1,24,$acc4
858 ldbx $acc1($rounds),$acc1
859 _srm $t0,16,$acc5
860 _srm $t1,0,$acc3
861 ldbx $acc2($rounds),$acc2
862 ldbx $acc3($rounds),$acc3
863 _srm $t3,8,$acc6
864 ldbx $acc4($rounds),$acc4
865 _srm $t2,24,$acc8
866 ldbx $acc5($rounds),$acc5
867 _srm $t1,16,$acc9
868 _srm $t2,0,$acc7
869 ldbx $acc6($rounds),$acc6
870 ldbx $acc7($rounds),$acc7
871 _srm $t0,8,$acc10
872 ldbx $acc8($rounds),$acc8
873 _srm $t3,24,$acc12
874 ldbx $acc9($rounds),$acc9
875 _srm $t2,16,$acc13
876 _srm $t3,0,$acc11
877 ldbx $acc10($rounds),$acc10
878 _srm $t1,8,$acc14
879 ldbx $acc11($rounds),$acc11
880 ldbx $acc12($rounds),$acc12
881 ldbx $acc13($rounds),$acc13
882 _srm $t0,0,$acc15
883 ldbx $acc14($rounds),$acc14
884
885 dep $acc0,7,8,$acc3
886 ldbx $acc15($rounds),$acc15
887 dep $acc4,7,8,$acc7
888 dep $acc1,15,8,$acc3
889 dep $acc5,15,8,$acc7
890 dep $acc2,23,8,$acc3
891 dep $acc6,23,8,$acc7
892 xor $acc3,$s0,$s0
893 xor $acc7,$s1,$s1
894 dep $acc8,7,8,$acc11
895 dep $acc12,7,8,$acc15
896 dep $acc9,15,8,$acc11
897 dep $acc13,15,8,$acc15
898 dep $acc10,23,8,$acc11
899 dep $acc14,23,8,$acc15
900 xor $acc11,$s2,$s2
901
902 bv (%r31)
903 .EXIT
904 xor $acc15,$s3,$s3
905 .PROCEND
906
907 .ALIGN 64
908L\$AES_Td
909 .WORD 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
910 .WORD 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
911 .WORD 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
912 .WORD 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
913 .WORD 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
914 .WORD 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
915 .WORD 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
916 .WORD 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
917 .WORD 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
918 .WORD 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
919 .WORD 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
920 .WORD 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
921 .WORD 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
922 .WORD 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
923 .WORD 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
924 .WORD 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
925 .WORD 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
926 .WORD 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
927 .WORD 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
928 .WORD 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
929 .WORD 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
930 .WORD 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
931 .WORD 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
932 .WORD 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
933 .WORD 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
934 .WORD 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
935 .WORD 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
936 .WORD 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
937 .WORD 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
938 .WORD 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
939 .WORD 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
940 .WORD 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
941 .WORD 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
942 .WORD 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
943 .WORD 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
944 .WORD 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
945 .WORD 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
946 .WORD 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
947 .WORD 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
948 .WORD 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
949 .WORD 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
950 .WORD 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
951 .WORD 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
952 .WORD 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
953 .WORD 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
954 .WORD 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
955 .WORD 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
956 .WORD 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
957 .WORD 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
958 .WORD 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
959 .WORD 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
960 .WORD 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
961 .WORD 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
962 .WORD 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
963 .WORD 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
964 .WORD 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
965 .WORD 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
966 .WORD 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
967 .WORD 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
968 .WORD 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
969 .WORD 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
970 .WORD 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
971 .WORD 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
972 .WORD 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
973 .BYTE 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
974 .BYTE 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
975 .BYTE 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
976 .BYTE 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
977 .BYTE 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
978 .BYTE 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
979 .BYTE 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
980 .BYTE 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
981 .BYTE 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
982 .BYTE 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
983 .BYTE 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
984 .BYTE 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
985 .BYTE 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
986 .BYTE 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
987 .BYTE 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
988 .BYTE 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
989 .BYTE 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
990 .BYTE 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
991 .BYTE 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
992 .BYTE 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
993 .BYTE 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
994 .BYTE 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
995 .BYTE 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
996 .BYTE 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
997 .BYTE 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
998 .BYTE 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
999 .BYTE 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1000 .BYTE 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1001 .BYTE 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1002 .BYTE 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1003 .BYTE 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1004 .BYTE 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1005 .STRINGZ "AES for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
1006___
1007
1008foreach (split("\n",$code)) {
1009 s/\`([^\`]*)\`/eval $1/ge;
1010
1011 # translate made up instructons: _ror, _srm
1012 s/_ror(\s+)(%r[0-9]+),/shd$1$2,$2,/ or
1013
1014 s/_srm(\s+%r[0-9]+),([0-9]+),/
1015 $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2)
1016 : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e;
1017
1018 s/,\*/,/ if ($SIZE_T==4);
1019 print $_,"\n";
1020}
1021close STDOUT;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl b/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl
index f82c5e1814..7c52cbe5f9 100644
--- a/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-ppc.pl
@@ -7,7 +7,7 @@
7# details see http://www.openssl.org/~appro/cryptogams/. 7# details see http://www.openssl.org/~appro/cryptogams/.
8# ==================================================================== 8# ====================================================================
9 9
10# Needs more work: key setup, page boundaries, CBC routine... 10# Needs more work: key setup, CBC routine...
11# 11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc 13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
@@ -18,7 +18,7 @@
18 18
19# February 2010 19# February 2010
20# 20#
21# Rescheduling instructions to favour Power6 pipeline gives 10% 21# Rescheduling instructions to favour Power6 pipeline gave 10%
22# performance improvement on the platfrom in question (and marginal 22# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails 23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue 24# to process byte in 18 cycles, only in 23, because it fails to issue
@@ -33,11 +33,13 @@ $flavour = shift;
33 33
34if ($flavour =~ /64/) { 34if ($flavour =~ /64/) {
35 $SIZE_T =8; 35 $SIZE_T =8;
36 $LRSAVE =2*$SIZE_T;
36 $STU ="stdu"; 37 $STU ="stdu";
37 $POP ="ld"; 38 $POP ="ld";
38 $PUSH ="std"; 39 $PUSH ="std";
39} elsif ($flavour =~ /32/) { 40} elsif ($flavour =~ /32/) {
40 $SIZE_T =4; 41 $SIZE_T =4;
42 $LRSAVE =$SIZE_T;
41 $STU ="stwu"; 43 $STU ="stwu";
42 $POP ="lwz"; 44 $POP ="lwz";
43 $PUSH ="stw"; 45 $PUSH ="stw";
@@ -116,15 +118,19 @@ LAES_Te:
116 addi $Tbl0,$Tbl0,`128-8` 118 addi $Tbl0,$Tbl0,`128-8`
117 mtlr r0 119 mtlr r0
118 blr 120 blr
119 .space `32-24` 121 .long 0
122 .byte 0,12,0x14,0,0,0,0,0
123 .space `64-9*4`
120LAES_Td: 124LAES_Td:
121 mflr r0 125 mflr r0
122 bcl 20,31,\$+4 126 bcl 20,31,\$+4
123 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 127 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
124 addi $Tbl0,$Tbl0,`128-8-32+2048+256` 128 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
125 mtlr r0 129 mtlr r0
126 blr 130 blr
127 .space `128-32-24` 131 .long 0
132 .byte 0,12,0x14,0,0,0,0,0
133 .space `128-64-9*4`
128___ 134___
129&_data_word( 135&_data_word(
130 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 136 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
@@ -328,10 +334,9 @@ $code.=<<___;
328.globl .AES_encrypt 334.globl .AES_encrypt
329.align 7 335.align 7
330.AES_encrypt: 336.AES_encrypt:
331 mflr r0
332 $STU $sp,-$FRAME($sp) 337 $STU $sp,-$FRAME($sp)
338 mflr r0
333 339
334 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
335 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 340 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
336 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 341 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
337 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 342 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
@@ -352,7 +357,14 @@ $code.=<<___;
352 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 357 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
353 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 358 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
354 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 359 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
360 $PUSH r0,`$FRAME+$LRSAVE`($sp)
361
362 andi. $t0,$inp,3
363 andi. $t1,$out,3
364 or. $t0,$t0,$t1
365 bne Lenc_unaligned
355 366
367Lenc_unaligned_ok:
356 lwz $s0,0($inp) 368 lwz $s0,0($inp)
357 lwz $s1,4($inp) 369 lwz $s1,4($inp)
358 lwz $s2,8($inp) 370 lwz $s2,8($inp)
@@ -363,8 +375,80 @@ $code.=<<___;
363 stw $s1,4($out) 375 stw $s1,4($out)
364 stw $s2,8($out) 376 stw $s2,8($out)
365 stw $s3,12($out) 377 stw $s3,12($out)
378 b Lenc_done
379
380Lenc_unaligned:
381 subfic $t0,$inp,4096
382 subfic $t1,$out,4096
383 andi. $t0,$t0,4096-16
384 beq Lenc_xpage
385 andi. $t1,$t1,4096-16
386 bne Lenc_unaligned_ok
387
388Lenc_xpage:
389 lbz $acc00,0($inp)
390 lbz $acc01,1($inp)
391 lbz $acc02,2($inp)
392 lbz $s0,3($inp)
393 lbz $acc04,4($inp)
394 lbz $acc05,5($inp)
395 lbz $acc06,6($inp)
396 lbz $s1,7($inp)
397 lbz $acc08,8($inp)
398 lbz $acc09,9($inp)
399 lbz $acc10,10($inp)
400 insrwi $s0,$acc00,8,0
401 lbz $s2,11($inp)
402 insrwi $s1,$acc04,8,0
403 lbz $acc12,12($inp)
404 insrwi $s0,$acc01,8,8
405 lbz $acc13,13($inp)
406 insrwi $s1,$acc05,8,8
407 lbz $acc14,14($inp)
408 insrwi $s0,$acc02,8,16
409 lbz $s3,15($inp)
410 insrwi $s1,$acc06,8,16
411 insrwi $s2,$acc08,8,0
412 insrwi $s3,$acc12,8,0
413 insrwi $s2,$acc09,8,8
414 insrwi $s3,$acc13,8,8
415 insrwi $s2,$acc10,8,16
416 insrwi $s3,$acc14,8,16
417
418 bl LAES_Te
419 bl Lppc_AES_encrypt_compact
420
421 extrwi $acc00,$s0,8,0
422 extrwi $acc01,$s0,8,8
423 stb $acc00,0($out)
424 extrwi $acc02,$s0,8,16
425 stb $acc01,1($out)
426 stb $acc02,2($out)
427 extrwi $acc04,$s1,8,0
428 stb $s0,3($out)
429 extrwi $acc05,$s1,8,8
430 stb $acc04,4($out)
431 extrwi $acc06,$s1,8,16
432 stb $acc05,5($out)
433 stb $acc06,6($out)
434 extrwi $acc08,$s2,8,0
435 stb $s1,7($out)
436 extrwi $acc09,$s2,8,8
437 stb $acc08,8($out)
438 extrwi $acc10,$s2,8,16
439 stb $acc09,9($out)
440 stb $acc10,10($out)
441 extrwi $acc12,$s3,8,0
442 stb $s2,11($out)
443 extrwi $acc13,$s3,8,8
444 stb $acc12,12($out)
445 extrwi $acc14,$s3,8,16
446 stb $acc13,13($out)
447 stb $acc14,14($out)
448 stb $s3,15($out)
366 449
367 $POP r0,`$FRAME-$SIZE_T*21`($sp) 450Lenc_done:
451 $POP r0,`$FRAME+$LRSAVE`($sp)
368 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 452 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
369 $POP r13,`$FRAME-$SIZE_T*19`($sp) 453 $POP r13,`$FRAME-$SIZE_T*19`($sp)
370 $POP r14,`$FRAME-$SIZE_T*18`($sp) 454 $POP r14,`$FRAME-$SIZE_T*18`($sp)
@@ -388,18 +472,21 @@ $code.=<<___;
388 mtlr r0 472 mtlr r0
389 addi $sp,$sp,$FRAME 473 addi $sp,$sp,$FRAME
390 blr 474 blr
475 .long 0
476 .byte 0,12,4,1,0x80,18,3,0
477 .long 0
391 478
392.align 5 479.align 5
393Lppc_AES_encrypt: 480Lppc_AES_encrypt:
394 lwz $acc00,240($key) 481 lwz $acc00,240($key)
395 lwz $t0,0($key)
396 lwz $t1,4($key)
397 lwz $t2,8($key)
398 lwz $t3,12($key)
399 addi $Tbl1,$Tbl0,3 482 addi $Tbl1,$Tbl0,3
483 lwz $t0,0($key)
400 addi $Tbl2,$Tbl0,2 484 addi $Tbl2,$Tbl0,2
485 lwz $t1,4($key)
401 addi $Tbl3,$Tbl0,1 486 addi $Tbl3,$Tbl0,1
487 lwz $t2,8($key)
402 addi $acc00,$acc00,-1 488 addi $acc00,$acc00,-1
489 lwz $t3,12($key)
403 addi $key,$key,16 490 addi $key,$key,16
404 xor $s0,$s0,$t0 491 xor $s0,$s0,$t0
405 xor $s1,$s1,$t1 492 xor $s1,$s1,$t1
@@ -413,44 +500,44 @@ Lenc_loop:
413 rlwinm $acc02,$s2,`32-24+3`,21,28 500 rlwinm $acc02,$s2,`32-24+3`,21,28
414 rlwinm $acc03,$s3,`32-24+3`,21,28 501 rlwinm $acc03,$s3,`32-24+3`,21,28
415 lwz $t0,0($key) 502 lwz $t0,0($key)
416 lwz $t1,4($key)
417 rlwinm $acc04,$s1,`32-16+3`,21,28 503 rlwinm $acc04,$s1,`32-16+3`,21,28
504 lwz $t1,4($key)
418 rlwinm $acc05,$s2,`32-16+3`,21,28 505 rlwinm $acc05,$s2,`32-16+3`,21,28
419 lwz $t2,8($key) 506 lwz $t2,8($key)
420 lwz $t3,12($key)
421 rlwinm $acc06,$s3,`32-16+3`,21,28 507 rlwinm $acc06,$s3,`32-16+3`,21,28
508 lwz $t3,12($key)
422 rlwinm $acc07,$s0,`32-16+3`,21,28 509 rlwinm $acc07,$s0,`32-16+3`,21,28
423 lwzx $acc00,$Tbl0,$acc00 510 lwzx $acc00,$Tbl0,$acc00
424 lwzx $acc01,$Tbl0,$acc01
425 rlwinm $acc08,$s2,`32-8+3`,21,28 511 rlwinm $acc08,$s2,`32-8+3`,21,28
512 lwzx $acc01,$Tbl0,$acc01
426 rlwinm $acc09,$s3,`32-8+3`,21,28 513 rlwinm $acc09,$s3,`32-8+3`,21,28
427 lwzx $acc02,$Tbl0,$acc02 514 lwzx $acc02,$Tbl0,$acc02
428 lwzx $acc03,$Tbl0,$acc03
429 rlwinm $acc10,$s0,`32-8+3`,21,28 515 rlwinm $acc10,$s0,`32-8+3`,21,28
516 lwzx $acc03,$Tbl0,$acc03
430 rlwinm $acc11,$s1,`32-8+3`,21,28 517 rlwinm $acc11,$s1,`32-8+3`,21,28
431 lwzx $acc04,$Tbl1,$acc04 518 lwzx $acc04,$Tbl1,$acc04
432 lwzx $acc05,$Tbl1,$acc05
433 rlwinm $acc12,$s3,`0+3`,21,28 519 rlwinm $acc12,$s3,`0+3`,21,28
520 lwzx $acc05,$Tbl1,$acc05
434 rlwinm $acc13,$s0,`0+3`,21,28 521 rlwinm $acc13,$s0,`0+3`,21,28
435 lwzx $acc06,$Tbl1,$acc06 522 lwzx $acc06,$Tbl1,$acc06
436 lwzx $acc07,$Tbl1,$acc07
437 rlwinm $acc14,$s1,`0+3`,21,28 523 rlwinm $acc14,$s1,`0+3`,21,28
524 lwzx $acc07,$Tbl1,$acc07
438 rlwinm $acc15,$s2,`0+3`,21,28 525 rlwinm $acc15,$s2,`0+3`,21,28
439 lwzx $acc08,$Tbl2,$acc08 526 lwzx $acc08,$Tbl2,$acc08
440 lwzx $acc09,$Tbl2,$acc09
441 xor $t0,$t0,$acc00 527 xor $t0,$t0,$acc00
528 lwzx $acc09,$Tbl2,$acc09
442 xor $t1,$t1,$acc01 529 xor $t1,$t1,$acc01
443 lwzx $acc10,$Tbl2,$acc10 530 lwzx $acc10,$Tbl2,$acc10
444 lwzx $acc11,$Tbl2,$acc11
445 xor $t2,$t2,$acc02 531 xor $t2,$t2,$acc02
532 lwzx $acc11,$Tbl2,$acc11
446 xor $t3,$t3,$acc03 533 xor $t3,$t3,$acc03
447 lwzx $acc12,$Tbl3,$acc12 534 lwzx $acc12,$Tbl3,$acc12
448 lwzx $acc13,$Tbl3,$acc13
449 xor $t0,$t0,$acc04 535 xor $t0,$t0,$acc04
536 lwzx $acc13,$Tbl3,$acc13
450 xor $t1,$t1,$acc05 537 xor $t1,$t1,$acc05
451 lwzx $acc14,$Tbl3,$acc14 538 lwzx $acc14,$Tbl3,$acc14
452 lwzx $acc15,$Tbl3,$acc15
453 xor $t2,$t2,$acc06 539 xor $t2,$t2,$acc06
540 lwzx $acc15,$Tbl3,$acc15
454 xor $t3,$t3,$acc07 541 xor $t3,$t3,$acc07
455 xor $t0,$t0,$acc08 542 xor $t0,$t0,$acc08
456 xor $t1,$t1,$acc09 543 xor $t1,$t1,$acc09
@@ -466,60 +553,60 @@ Lenc_loop:
466 addi $Tbl2,$Tbl0,2048 553 addi $Tbl2,$Tbl0,2048
467 nop 554 nop
468 lwz $t0,0($key) 555 lwz $t0,0($key)
469 lwz $t1,4($key)
470 rlwinm $acc00,$s0,`32-24`,24,31 556 rlwinm $acc00,$s0,`32-24`,24,31
557 lwz $t1,4($key)
471 rlwinm $acc01,$s1,`32-24`,24,31 558 rlwinm $acc01,$s1,`32-24`,24,31
472 lwz $t2,8($key) 559 lwz $t2,8($key)
473 lwz $t3,12($key)
474 rlwinm $acc02,$s2,`32-24`,24,31 560 rlwinm $acc02,$s2,`32-24`,24,31
561 lwz $t3,12($key)
475 rlwinm $acc03,$s3,`32-24`,24,31 562 rlwinm $acc03,$s3,`32-24`,24,31
476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 563 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
477 lwz $acc09,`2048+32`($Tbl0)
478 rlwinm $acc04,$s1,`32-16`,24,31 564 rlwinm $acc04,$s1,`32-16`,24,31
565 lwz $acc09,`2048+32`($Tbl0)
479 rlwinm $acc05,$s2,`32-16`,24,31 566 rlwinm $acc05,$s2,`32-16`,24,31
480 lwz $acc10,`2048+64`($Tbl0) 567 lwz $acc10,`2048+64`($Tbl0)
481 lwz $acc11,`2048+96`($Tbl0)
482 rlwinm $acc06,$s3,`32-16`,24,31 568 rlwinm $acc06,$s3,`32-16`,24,31
569 lwz $acc11,`2048+96`($Tbl0)
483 rlwinm $acc07,$s0,`32-16`,24,31 570 rlwinm $acc07,$s0,`32-16`,24,31
484 lwz $acc12,`2048+128`($Tbl0) 571 lwz $acc12,`2048+128`($Tbl0)
485 lwz $acc13,`2048+160`($Tbl0)
486 rlwinm $acc08,$s2,`32-8`,24,31 572 rlwinm $acc08,$s2,`32-8`,24,31
573 lwz $acc13,`2048+160`($Tbl0)
487 rlwinm $acc09,$s3,`32-8`,24,31 574 rlwinm $acc09,$s3,`32-8`,24,31
488 lwz $acc14,`2048+192`($Tbl0) 575 lwz $acc14,`2048+192`($Tbl0)
489 lwz $acc15,`2048+224`($Tbl0)
490 rlwinm $acc10,$s0,`32-8`,24,31 576 rlwinm $acc10,$s0,`32-8`,24,31
577 lwz $acc15,`2048+224`($Tbl0)
491 rlwinm $acc11,$s1,`32-8`,24,31 578 rlwinm $acc11,$s1,`32-8`,24,31
492 lbzx $acc00,$Tbl2,$acc00 579 lbzx $acc00,$Tbl2,$acc00
493 lbzx $acc01,$Tbl2,$acc01
494 rlwinm $acc12,$s3,`0`,24,31 580 rlwinm $acc12,$s3,`0`,24,31
581 lbzx $acc01,$Tbl2,$acc01
495 rlwinm $acc13,$s0,`0`,24,31 582 rlwinm $acc13,$s0,`0`,24,31
496 lbzx $acc02,$Tbl2,$acc02 583 lbzx $acc02,$Tbl2,$acc02
497 lbzx $acc03,$Tbl2,$acc03
498 rlwinm $acc14,$s1,`0`,24,31 584 rlwinm $acc14,$s1,`0`,24,31
585 lbzx $acc03,$Tbl2,$acc03
499 rlwinm $acc15,$s2,`0`,24,31 586 rlwinm $acc15,$s2,`0`,24,31
500 lbzx $acc04,$Tbl2,$acc04 587 lbzx $acc04,$Tbl2,$acc04
501 lbzx $acc05,$Tbl2,$acc05
502 rlwinm $s0,$acc00,24,0,7 588 rlwinm $s0,$acc00,24,0,7
589 lbzx $acc05,$Tbl2,$acc05
503 rlwinm $s1,$acc01,24,0,7 590 rlwinm $s1,$acc01,24,0,7
504 lbzx $acc06,$Tbl2,$acc06 591 lbzx $acc06,$Tbl2,$acc06
505 lbzx $acc07,$Tbl2,$acc07
506 rlwinm $s2,$acc02,24,0,7 592 rlwinm $s2,$acc02,24,0,7
593 lbzx $acc07,$Tbl2,$acc07
507 rlwinm $s3,$acc03,24,0,7 594 rlwinm $s3,$acc03,24,0,7
508 lbzx $acc08,$Tbl2,$acc08 595 lbzx $acc08,$Tbl2,$acc08
509 lbzx $acc09,$Tbl2,$acc09
510 rlwimi $s0,$acc04,16,8,15 596 rlwimi $s0,$acc04,16,8,15
597 lbzx $acc09,$Tbl2,$acc09
511 rlwimi $s1,$acc05,16,8,15 598 rlwimi $s1,$acc05,16,8,15
512 lbzx $acc10,$Tbl2,$acc10 599 lbzx $acc10,$Tbl2,$acc10
513 lbzx $acc11,$Tbl2,$acc11
514 rlwimi $s2,$acc06,16,8,15 600 rlwimi $s2,$acc06,16,8,15
601 lbzx $acc11,$Tbl2,$acc11
515 rlwimi $s3,$acc07,16,8,15 602 rlwimi $s3,$acc07,16,8,15
516 lbzx $acc12,$Tbl2,$acc12 603 lbzx $acc12,$Tbl2,$acc12
517 lbzx $acc13,$Tbl2,$acc13
518 rlwimi $s0,$acc08,8,16,23 604 rlwimi $s0,$acc08,8,16,23
605 lbzx $acc13,$Tbl2,$acc13
519 rlwimi $s1,$acc09,8,16,23 606 rlwimi $s1,$acc09,8,16,23
520 lbzx $acc14,$Tbl2,$acc14 607 lbzx $acc14,$Tbl2,$acc14
521 lbzx $acc15,$Tbl2,$acc15
522 rlwimi $s2,$acc10,8,16,23 608 rlwimi $s2,$acc10,8,16,23
609 lbzx $acc15,$Tbl2,$acc15
523 rlwimi $s3,$acc11,8,16,23 610 rlwimi $s3,$acc11,8,16,23
524 or $s0,$s0,$acc12 611 or $s0,$s0,$acc12
525 or $s1,$s1,$acc13 612 or $s1,$s1,$acc13
@@ -530,29 +617,31 @@ Lenc_loop:
530 xor $s2,$s2,$t2 617 xor $s2,$s2,$t2
531 xor $s3,$s3,$t3 618 xor $s3,$s3,$t3
532 blr 619 blr
620 .long 0
621 .byte 0,12,0x14,0,0,0,0,0
533 622
534.align 4 623.align 4
535Lppc_AES_encrypt_compact: 624Lppc_AES_encrypt_compact:
536 lwz $acc00,240($key) 625 lwz $acc00,240($key)
537 lwz $t0,0($key)
538 lwz $t1,4($key)
539 lwz $t2,8($key)
540 lwz $t3,12($key)
541 addi $Tbl1,$Tbl0,2048 626 addi $Tbl1,$Tbl0,2048
627 lwz $t0,0($key)
542 lis $mask80,0x8080 628 lis $mask80,0x8080
629 lwz $t1,4($key)
543 lis $mask1b,0x1b1b 630 lis $mask1b,0x1b1b
544 addi $key,$key,16 631 lwz $t2,8($key)
545 ori $mask80,$mask80,0x8080 632 ori $mask80,$mask80,0x8080
633 lwz $t3,12($key)
546 ori $mask1b,$mask1b,0x1b1b 634 ori $mask1b,$mask1b,0x1b1b
635 addi $key,$key,16
547 mtctr $acc00 636 mtctr $acc00
548.align 4 637.align 4
549Lenc_compact_loop: 638Lenc_compact_loop:
550 xor $s0,$s0,$t0 639 xor $s0,$s0,$t0
551 xor $s1,$s1,$t1 640 xor $s1,$s1,$t1
552 xor $s2,$s2,$t2
553 xor $s3,$s3,$t3
554 rlwinm $acc00,$s0,`32-24`,24,31 641 rlwinm $acc00,$s0,`32-24`,24,31
642 xor $s2,$s2,$t2
555 rlwinm $acc01,$s1,`32-24`,24,31 643 rlwinm $acc01,$s1,`32-24`,24,31
644 xor $s3,$s3,$t3
556 rlwinm $acc02,$s2,`32-24`,24,31 645 rlwinm $acc02,$s2,`32-24`,24,31
557 rlwinm $acc03,$s3,`32-24`,24,31 646 rlwinm $acc03,$s3,`32-24`,24,31
558 rlwinm $acc04,$s1,`32-16`,24,31 647 rlwinm $acc04,$s1,`32-16`,24,31
@@ -560,48 +649,48 @@ Lenc_compact_loop:
560 rlwinm $acc06,$s3,`32-16`,24,31 649 rlwinm $acc06,$s3,`32-16`,24,31
561 rlwinm $acc07,$s0,`32-16`,24,31 650 rlwinm $acc07,$s0,`32-16`,24,31
562 lbzx $acc00,$Tbl1,$acc00 651 lbzx $acc00,$Tbl1,$acc00
563 lbzx $acc01,$Tbl1,$acc01
564 rlwinm $acc08,$s2,`32-8`,24,31 652 rlwinm $acc08,$s2,`32-8`,24,31
653 lbzx $acc01,$Tbl1,$acc01
565 rlwinm $acc09,$s3,`32-8`,24,31 654 rlwinm $acc09,$s3,`32-8`,24,31
566 lbzx $acc02,$Tbl1,$acc02 655 lbzx $acc02,$Tbl1,$acc02
567 lbzx $acc03,$Tbl1,$acc03
568 rlwinm $acc10,$s0,`32-8`,24,31 656 rlwinm $acc10,$s0,`32-8`,24,31
657 lbzx $acc03,$Tbl1,$acc03
569 rlwinm $acc11,$s1,`32-8`,24,31 658 rlwinm $acc11,$s1,`32-8`,24,31
570 lbzx $acc04,$Tbl1,$acc04 659 lbzx $acc04,$Tbl1,$acc04
571 lbzx $acc05,$Tbl1,$acc05
572 rlwinm $acc12,$s3,`0`,24,31 660 rlwinm $acc12,$s3,`0`,24,31
661 lbzx $acc05,$Tbl1,$acc05
573 rlwinm $acc13,$s0,`0`,24,31 662 rlwinm $acc13,$s0,`0`,24,31
574 lbzx $acc06,$Tbl1,$acc06 663 lbzx $acc06,$Tbl1,$acc06
575 lbzx $acc07,$Tbl1,$acc07
576 rlwinm $acc14,$s1,`0`,24,31 664 rlwinm $acc14,$s1,`0`,24,31
665 lbzx $acc07,$Tbl1,$acc07
577 rlwinm $acc15,$s2,`0`,24,31 666 rlwinm $acc15,$s2,`0`,24,31
578 lbzx $acc08,$Tbl1,$acc08 667 lbzx $acc08,$Tbl1,$acc08
579 lbzx $acc09,$Tbl1,$acc09
580 rlwinm $s0,$acc00,24,0,7 668 rlwinm $s0,$acc00,24,0,7
669 lbzx $acc09,$Tbl1,$acc09
581 rlwinm $s1,$acc01,24,0,7 670 rlwinm $s1,$acc01,24,0,7
582 lbzx $acc10,$Tbl1,$acc10 671 lbzx $acc10,$Tbl1,$acc10
583 lbzx $acc11,$Tbl1,$acc11
584 rlwinm $s2,$acc02,24,0,7 672 rlwinm $s2,$acc02,24,0,7
673 lbzx $acc11,$Tbl1,$acc11
585 rlwinm $s3,$acc03,24,0,7 674 rlwinm $s3,$acc03,24,0,7
586 lbzx $acc12,$Tbl1,$acc12 675 lbzx $acc12,$Tbl1,$acc12
587 lbzx $acc13,$Tbl1,$acc13
588 rlwimi $s0,$acc04,16,8,15 676 rlwimi $s0,$acc04,16,8,15
677 lbzx $acc13,$Tbl1,$acc13
589 rlwimi $s1,$acc05,16,8,15 678 rlwimi $s1,$acc05,16,8,15
590 lbzx $acc14,$Tbl1,$acc14 679 lbzx $acc14,$Tbl1,$acc14
591 lbzx $acc15,$Tbl1,$acc15
592 rlwimi $s2,$acc06,16,8,15 680 rlwimi $s2,$acc06,16,8,15
681 lbzx $acc15,$Tbl1,$acc15
593 rlwimi $s3,$acc07,16,8,15 682 rlwimi $s3,$acc07,16,8,15
594 rlwimi $s0,$acc08,8,16,23 683 rlwimi $s0,$acc08,8,16,23
595 rlwimi $s1,$acc09,8,16,23 684 rlwimi $s1,$acc09,8,16,23
596 rlwimi $s2,$acc10,8,16,23 685 rlwimi $s2,$acc10,8,16,23
597 rlwimi $s3,$acc11,8,16,23 686 rlwimi $s3,$acc11,8,16,23
598 lwz $t0,0($key) 687 lwz $t0,0($key)
599 lwz $t1,4($key)
600 or $s0,$s0,$acc12 688 or $s0,$s0,$acc12
689 lwz $t1,4($key)
601 or $s1,$s1,$acc13 690 or $s1,$s1,$acc13
602 lwz $t2,8($key) 691 lwz $t2,8($key)
603 lwz $t3,12($key)
604 or $s2,$s2,$acc14 692 or $s2,$s2,$acc14
693 lwz $t3,12($key)
605 or $s3,$s3,$acc15 694 or $s3,$s3,$acc15
606 695
607 addi $key,$key,16 696 addi $key,$key,16
@@ -612,12 +701,12 @@ Lenc_compact_loop:
612 and $acc02,$s2,$mask80 701 and $acc02,$s2,$mask80
613 and $acc03,$s3,$mask80 702 and $acc03,$s3,$mask80
614 srwi $acc04,$acc00,7 # r1>>7 703 srwi $acc04,$acc00,7 # r1>>7
615 srwi $acc05,$acc01,7
616 srwi $acc06,$acc02,7
617 srwi $acc07,$acc03,7
618 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 704 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
705 srwi $acc05,$acc01,7
619 andc $acc09,$s1,$mask80 706 andc $acc09,$s1,$mask80
707 srwi $acc06,$acc02,7
620 andc $acc10,$s2,$mask80 708 andc $acc10,$s2,$mask80
709 srwi $acc07,$acc03,7
621 andc $acc11,$s3,$mask80 710 andc $acc11,$s3,$mask80
622 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 711 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
623 sub $acc01,$acc01,$acc05 712 sub $acc01,$acc01,$acc05
@@ -633,32 +722,32 @@ Lenc_compact_loop:
633 and $acc03,$acc03,$mask1b 722 and $acc03,$acc03,$mask1b
634 xor $acc00,$acc00,$acc08 # r2 723 xor $acc00,$acc00,$acc08 # r2
635 xor $acc01,$acc01,$acc09 724 xor $acc01,$acc01,$acc09
725 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
636 xor $acc02,$acc02,$acc10 726 xor $acc02,$acc02,$acc10
727 rotlwi $acc13,$s1,16
637 xor $acc03,$acc03,$acc11 728 xor $acc03,$acc03,$acc11
729 rotlwi $acc14,$s2,16
638 730
639 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
640 rotlwi $acc13,$s1,16
641 rotlwi $acc14,$s2,16
642 rotlwi $acc15,$s3,16
643 xor $s0,$s0,$acc00 # r0^r2 731 xor $s0,$s0,$acc00 # r0^r2
732 rotlwi $acc15,$s3,16
644 xor $s1,$s1,$acc01 733 xor $s1,$s1,$acc01
645 xor $s2,$s2,$acc02
646 xor $s3,$s3,$acc03
647 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 734 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
735 xor $s2,$s2,$acc02
648 rotrwi $s1,$s1,24 736 rotrwi $s1,$s1,24
737 xor $s3,$s3,$acc03
649 rotrwi $s2,$s2,24 738 rotrwi $s2,$s2,24
650 rotrwi $s3,$s3,24
651 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 739 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
740 rotrwi $s3,$s3,24
652 xor $s1,$s1,$acc01 741 xor $s1,$s1,$acc01
653 xor $s2,$s2,$acc02 742 xor $s2,$s2,$acc02
654 xor $s3,$s3,$acc03 743 xor $s3,$s3,$acc03
655 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 744 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
656 rotlwi $acc09,$acc13,8
657 rotlwi $acc10,$acc14,8
658 rotlwi $acc11,$acc15,8
659 xor $s0,$s0,$acc12 # 745 xor $s0,$s0,$acc12 #
746 rotlwi $acc09,$acc13,8
660 xor $s1,$s1,$acc13 747 xor $s1,$s1,$acc13
748 rotlwi $acc10,$acc14,8
661 xor $s2,$s2,$acc14 749 xor $s2,$s2,$acc14
750 rotlwi $acc11,$acc15,8
662 xor $s3,$s3,$acc15 751 xor $s3,$s3,$acc15
663 xor $s0,$s0,$acc08 # 752 xor $s0,$s0,$acc08 #
664 xor $s1,$s1,$acc09 753 xor $s1,$s1,$acc09
@@ -673,14 +762,15 @@ Lenc_compact_done:
673 xor $s2,$s2,$t2 762 xor $s2,$s2,$t2
674 xor $s3,$s3,$t3 763 xor $s3,$s3,$t3
675 blr 764 blr
765 .long 0
766 .byte 0,12,0x14,0,0,0,0,0
676 767
677.globl .AES_decrypt 768.globl .AES_decrypt
678.align 7 769.align 7
679.AES_decrypt: 770.AES_decrypt:
680 mflr r0
681 $STU $sp,-$FRAME($sp) 771 $STU $sp,-$FRAME($sp)
772 mflr r0
682 773
683 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
684 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 774 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
685 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 775 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
686 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 776 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
@@ -701,7 +791,14 @@ Lenc_compact_done:
701 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 791 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
702 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 792 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
703 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 793 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
794 $PUSH r0,`$FRAME+$LRSAVE`($sp)
704 795
796 andi. $t0,$inp,3
797 andi. $t1,$out,3
798 or. $t0,$t0,$t1
799 bne Ldec_unaligned
800
801Ldec_unaligned_ok:
705 lwz $s0,0($inp) 802 lwz $s0,0($inp)
706 lwz $s1,4($inp) 803 lwz $s1,4($inp)
707 lwz $s2,8($inp) 804 lwz $s2,8($inp)
@@ -712,8 +809,80 @@ Lenc_compact_done:
712 stw $s1,4($out) 809 stw $s1,4($out)
713 stw $s2,8($out) 810 stw $s2,8($out)
714 stw $s3,12($out) 811 stw $s3,12($out)
812 b Ldec_done
813
814Ldec_unaligned:
815 subfic $t0,$inp,4096
816 subfic $t1,$out,4096
817 andi. $t0,$t0,4096-16
818 beq Ldec_xpage
819 andi. $t1,$t1,4096-16
820 bne Ldec_unaligned_ok
821
822Ldec_xpage:
823 lbz $acc00,0($inp)
824 lbz $acc01,1($inp)
825 lbz $acc02,2($inp)
826 lbz $s0,3($inp)
827 lbz $acc04,4($inp)
828 lbz $acc05,5($inp)
829 lbz $acc06,6($inp)
830 lbz $s1,7($inp)
831 lbz $acc08,8($inp)
832 lbz $acc09,9($inp)
833 lbz $acc10,10($inp)
834 insrwi $s0,$acc00,8,0
835 lbz $s2,11($inp)
836 insrwi $s1,$acc04,8,0
837 lbz $acc12,12($inp)
838 insrwi $s0,$acc01,8,8
839 lbz $acc13,13($inp)
840 insrwi $s1,$acc05,8,8
841 lbz $acc14,14($inp)
842 insrwi $s0,$acc02,8,16
843 lbz $s3,15($inp)
844 insrwi $s1,$acc06,8,16
845 insrwi $s2,$acc08,8,0
846 insrwi $s3,$acc12,8,0
847 insrwi $s2,$acc09,8,8
848 insrwi $s3,$acc13,8,8
849 insrwi $s2,$acc10,8,16
850 insrwi $s3,$acc14,8,16
851
852 bl LAES_Td
853 bl Lppc_AES_decrypt_compact
715 854
716 $POP r0,`$FRAME-$SIZE_T*21`($sp) 855 extrwi $acc00,$s0,8,0
856 extrwi $acc01,$s0,8,8
857 stb $acc00,0($out)
858 extrwi $acc02,$s0,8,16
859 stb $acc01,1($out)
860 stb $acc02,2($out)
861 extrwi $acc04,$s1,8,0
862 stb $s0,3($out)
863 extrwi $acc05,$s1,8,8
864 stb $acc04,4($out)
865 extrwi $acc06,$s1,8,16
866 stb $acc05,5($out)
867 stb $acc06,6($out)
868 extrwi $acc08,$s2,8,0
869 stb $s1,7($out)
870 extrwi $acc09,$s2,8,8
871 stb $acc08,8($out)
872 extrwi $acc10,$s2,8,16
873 stb $acc09,9($out)
874 stb $acc10,10($out)
875 extrwi $acc12,$s3,8,0
876 stb $s2,11($out)
877 extrwi $acc13,$s3,8,8
878 stb $acc12,12($out)
879 extrwi $acc14,$s3,8,16
880 stb $acc13,13($out)
881 stb $acc14,14($out)
882 stb $s3,15($out)
883
884Ldec_done:
885 $POP r0,`$FRAME+$LRSAVE`($sp)
717 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 886 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
718 $POP r13,`$FRAME-$SIZE_T*19`($sp) 887 $POP r13,`$FRAME-$SIZE_T*19`($sp)
719 $POP r14,`$FRAME-$SIZE_T*18`($sp) 888 $POP r14,`$FRAME-$SIZE_T*18`($sp)
@@ -737,18 +906,21 @@ Lenc_compact_done:
737 mtlr r0 906 mtlr r0
738 addi $sp,$sp,$FRAME 907 addi $sp,$sp,$FRAME
739 blr 908 blr
909 .long 0
910 .byte 0,12,4,1,0x80,18,3,0
911 .long 0
740 912
741.align 5 913.align 5
742Lppc_AES_decrypt: 914Lppc_AES_decrypt:
743 lwz $acc00,240($key) 915 lwz $acc00,240($key)
744 lwz $t0,0($key)
745 lwz $t1,4($key)
746 lwz $t2,8($key)
747 lwz $t3,12($key)
748 addi $Tbl1,$Tbl0,3 916 addi $Tbl1,$Tbl0,3
917 lwz $t0,0($key)
749 addi $Tbl2,$Tbl0,2 918 addi $Tbl2,$Tbl0,2
919 lwz $t1,4($key)
750 addi $Tbl3,$Tbl0,1 920 addi $Tbl3,$Tbl0,1
921 lwz $t2,8($key)
751 addi $acc00,$acc00,-1 922 addi $acc00,$acc00,-1
923 lwz $t3,12($key)
752 addi $key,$key,16 924 addi $key,$key,16
753 xor $s0,$s0,$t0 925 xor $s0,$s0,$t0
754 xor $s1,$s1,$t1 926 xor $s1,$s1,$t1
@@ -762,44 +934,44 @@ Ldec_loop:
762 rlwinm $acc02,$s2,`32-24+3`,21,28 934 rlwinm $acc02,$s2,`32-24+3`,21,28
763 rlwinm $acc03,$s3,`32-24+3`,21,28 935 rlwinm $acc03,$s3,`32-24+3`,21,28
764 lwz $t0,0($key) 936 lwz $t0,0($key)
765 lwz $t1,4($key)
766 rlwinm $acc04,$s3,`32-16+3`,21,28 937 rlwinm $acc04,$s3,`32-16+3`,21,28
938 lwz $t1,4($key)
767 rlwinm $acc05,$s0,`32-16+3`,21,28 939 rlwinm $acc05,$s0,`32-16+3`,21,28
768 lwz $t2,8($key) 940 lwz $t2,8($key)
769 lwz $t3,12($key)
770 rlwinm $acc06,$s1,`32-16+3`,21,28 941 rlwinm $acc06,$s1,`32-16+3`,21,28
942 lwz $t3,12($key)
771 rlwinm $acc07,$s2,`32-16+3`,21,28 943 rlwinm $acc07,$s2,`32-16+3`,21,28
772 lwzx $acc00,$Tbl0,$acc00 944 lwzx $acc00,$Tbl0,$acc00
773 lwzx $acc01,$Tbl0,$acc01
774 rlwinm $acc08,$s2,`32-8+3`,21,28 945 rlwinm $acc08,$s2,`32-8+3`,21,28
946 lwzx $acc01,$Tbl0,$acc01
775 rlwinm $acc09,$s3,`32-8+3`,21,28 947 rlwinm $acc09,$s3,`32-8+3`,21,28
776 lwzx $acc02,$Tbl0,$acc02 948 lwzx $acc02,$Tbl0,$acc02
777 lwzx $acc03,$Tbl0,$acc03
778 rlwinm $acc10,$s0,`32-8+3`,21,28 949 rlwinm $acc10,$s0,`32-8+3`,21,28
950 lwzx $acc03,$Tbl0,$acc03
779 rlwinm $acc11,$s1,`32-8+3`,21,28 951 rlwinm $acc11,$s1,`32-8+3`,21,28
780 lwzx $acc04,$Tbl1,$acc04 952 lwzx $acc04,$Tbl1,$acc04
781 lwzx $acc05,$Tbl1,$acc05
782 rlwinm $acc12,$s1,`0+3`,21,28 953 rlwinm $acc12,$s1,`0+3`,21,28
954 lwzx $acc05,$Tbl1,$acc05
783 rlwinm $acc13,$s2,`0+3`,21,28 955 rlwinm $acc13,$s2,`0+3`,21,28
784 lwzx $acc06,$Tbl1,$acc06 956 lwzx $acc06,$Tbl1,$acc06
785 lwzx $acc07,$Tbl1,$acc07
786 rlwinm $acc14,$s3,`0+3`,21,28 957 rlwinm $acc14,$s3,`0+3`,21,28
958 lwzx $acc07,$Tbl1,$acc07
787 rlwinm $acc15,$s0,`0+3`,21,28 959 rlwinm $acc15,$s0,`0+3`,21,28
788 lwzx $acc08,$Tbl2,$acc08 960 lwzx $acc08,$Tbl2,$acc08
789 lwzx $acc09,$Tbl2,$acc09
790 xor $t0,$t0,$acc00 961 xor $t0,$t0,$acc00
962 lwzx $acc09,$Tbl2,$acc09
791 xor $t1,$t1,$acc01 963 xor $t1,$t1,$acc01
792 lwzx $acc10,$Tbl2,$acc10 964 lwzx $acc10,$Tbl2,$acc10
793 lwzx $acc11,$Tbl2,$acc11
794 xor $t2,$t2,$acc02 965 xor $t2,$t2,$acc02
966 lwzx $acc11,$Tbl2,$acc11
795 xor $t3,$t3,$acc03 967 xor $t3,$t3,$acc03
796 lwzx $acc12,$Tbl3,$acc12 968 lwzx $acc12,$Tbl3,$acc12
797 lwzx $acc13,$Tbl3,$acc13
798 xor $t0,$t0,$acc04 969 xor $t0,$t0,$acc04
970 lwzx $acc13,$Tbl3,$acc13
799 xor $t1,$t1,$acc05 971 xor $t1,$t1,$acc05
800 lwzx $acc14,$Tbl3,$acc14 972 lwzx $acc14,$Tbl3,$acc14
801 lwzx $acc15,$Tbl3,$acc15
802 xor $t2,$t2,$acc06 973 xor $t2,$t2,$acc06
974 lwzx $acc15,$Tbl3,$acc15
803 xor $t3,$t3,$acc07 975 xor $t3,$t3,$acc07
804 xor $t0,$t0,$acc08 976 xor $t0,$t0,$acc08
805 xor $t1,$t1,$acc09 977 xor $t1,$t1,$acc09
@@ -815,56 +987,56 @@ Ldec_loop:
815 addi $Tbl2,$Tbl0,2048 987 addi $Tbl2,$Tbl0,2048
816 nop 988 nop
817 lwz $t0,0($key) 989 lwz $t0,0($key)
818 lwz $t1,4($key)
819 rlwinm $acc00,$s0,`32-24`,24,31 990 rlwinm $acc00,$s0,`32-24`,24,31
991 lwz $t1,4($key)
820 rlwinm $acc01,$s1,`32-24`,24,31 992 rlwinm $acc01,$s1,`32-24`,24,31
821 lwz $t2,8($key) 993 lwz $t2,8($key)
822 lwz $t3,12($key)
823 rlwinm $acc02,$s2,`32-24`,24,31 994 rlwinm $acc02,$s2,`32-24`,24,31
995 lwz $t3,12($key)
824 rlwinm $acc03,$s3,`32-24`,24,31 996 rlwinm $acc03,$s3,`32-24`,24,31
825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 997 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
826 lwz $acc09,`2048+32`($Tbl0)
827 rlwinm $acc04,$s3,`32-16`,24,31 998 rlwinm $acc04,$s3,`32-16`,24,31
999 lwz $acc09,`2048+32`($Tbl0)
828 rlwinm $acc05,$s0,`32-16`,24,31 1000 rlwinm $acc05,$s0,`32-16`,24,31
829 lwz $acc10,`2048+64`($Tbl0) 1001 lwz $acc10,`2048+64`($Tbl0)
830 lwz $acc11,`2048+96`($Tbl0)
831 lbzx $acc00,$Tbl2,$acc00 1002 lbzx $acc00,$Tbl2,$acc00
1003 lwz $acc11,`2048+96`($Tbl0)
832 lbzx $acc01,$Tbl2,$acc01 1004 lbzx $acc01,$Tbl2,$acc01
833 lwz $acc12,`2048+128`($Tbl0) 1005 lwz $acc12,`2048+128`($Tbl0)
834 lwz $acc13,`2048+160`($Tbl0)
835 rlwinm $acc06,$s1,`32-16`,24,31 1006 rlwinm $acc06,$s1,`32-16`,24,31
1007 lwz $acc13,`2048+160`($Tbl0)
836 rlwinm $acc07,$s2,`32-16`,24,31 1008 rlwinm $acc07,$s2,`32-16`,24,31
837 lwz $acc14,`2048+192`($Tbl0) 1009 lwz $acc14,`2048+192`($Tbl0)
838 lwz $acc15,`2048+224`($Tbl0)
839 rlwinm $acc08,$s2,`32-8`,24,31 1010 rlwinm $acc08,$s2,`32-8`,24,31
1011 lwz $acc15,`2048+224`($Tbl0)
840 rlwinm $acc09,$s3,`32-8`,24,31 1012 rlwinm $acc09,$s3,`32-8`,24,31
841 lbzx $acc02,$Tbl2,$acc02 1013 lbzx $acc02,$Tbl2,$acc02
842 lbzx $acc03,$Tbl2,$acc03
843 rlwinm $acc10,$s0,`32-8`,24,31 1014 rlwinm $acc10,$s0,`32-8`,24,31
1015 lbzx $acc03,$Tbl2,$acc03
844 rlwinm $acc11,$s1,`32-8`,24,31 1016 rlwinm $acc11,$s1,`32-8`,24,31
845 lbzx $acc04,$Tbl2,$acc04 1017 lbzx $acc04,$Tbl2,$acc04
846 lbzx $acc05,$Tbl2,$acc05
847 rlwinm $acc12,$s1,`0`,24,31 1018 rlwinm $acc12,$s1,`0`,24,31
1019 lbzx $acc05,$Tbl2,$acc05
848 rlwinm $acc13,$s2,`0`,24,31 1020 rlwinm $acc13,$s2,`0`,24,31
849 lbzx $acc06,$Tbl2,$acc06 1021 lbzx $acc06,$Tbl2,$acc06
850 lbzx $acc07,$Tbl2,$acc07
851 rlwinm $acc14,$s3,`0`,24,31 1022 rlwinm $acc14,$s3,`0`,24,31
1023 lbzx $acc07,$Tbl2,$acc07
852 rlwinm $acc15,$s0,`0`,24,31 1024 rlwinm $acc15,$s0,`0`,24,31
853 lbzx $acc08,$Tbl2,$acc08 1025 lbzx $acc08,$Tbl2,$acc08
854 lbzx $acc09,$Tbl2,$acc09
855 rlwinm $s0,$acc00,24,0,7 1026 rlwinm $s0,$acc00,24,0,7
1027 lbzx $acc09,$Tbl2,$acc09
856 rlwinm $s1,$acc01,24,0,7 1028 rlwinm $s1,$acc01,24,0,7
857 lbzx $acc10,$Tbl2,$acc10 1029 lbzx $acc10,$Tbl2,$acc10
858 lbzx $acc11,$Tbl2,$acc11
859 rlwinm $s2,$acc02,24,0,7 1030 rlwinm $s2,$acc02,24,0,7
1031 lbzx $acc11,$Tbl2,$acc11
860 rlwinm $s3,$acc03,24,0,7 1032 rlwinm $s3,$acc03,24,0,7
861 lbzx $acc12,$Tbl2,$acc12 1033 lbzx $acc12,$Tbl2,$acc12
862 lbzx $acc13,$Tbl2,$acc13
863 rlwimi $s0,$acc04,16,8,15 1034 rlwimi $s0,$acc04,16,8,15
1035 lbzx $acc13,$Tbl2,$acc13
864 rlwimi $s1,$acc05,16,8,15 1036 rlwimi $s1,$acc05,16,8,15
865 lbzx $acc14,$Tbl2,$acc14 1037 lbzx $acc14,$Tbl2,$acc14
866 lbzx $acc15,$Tbl2,$acc15
867 rlwimi $s2,$acc06,16,8,15 1038 rlwimi $s2,$acc06,16,8,15
1039 lbzx $acc15,$Tbl2,$acc15
868 rlwimi $s3,$acc07,16,8,15 1040 rlwimi $s3,$acc07,16,8,15
869 rlwimi $s0,$acc08,8,16,23 1041 rlwimi $s0,$acc08,8,16,23
870 rlwimi $s1,$acc09,8,16,23 1042 rlwimi $s1,$acc09,8,16,23
@@ -879,20 +1051,22 @@ Ldec_loop:
879 xor $s2,$s2,$t2 1051 xor $s2,$s2,$t2
880 xor $s3,$s3,$t3 1052 xor $s3,$s3,$t3
881 blr 1053 blr
1054 .long 0
1055 .byte 0,12,0x14,0,0,0,0,0
882 1056
883.align 4 1057.align 4
884Lppc_AES_decrypt_compact: 1058Lppc_AES_decrypt_compact:
885 lwz $acc00,240($key) 1059 lwz $acc00,240($key)
886 lwz $t0,0($key)
887 lwz $t1,4($key)
888 lwz $t2,8($key)
889 lwz $t3,12($key)
890 addi $Tbl1,$Tbl0,2048 1060 addi $Tbl1,$Tbl0,2048
1061 lwz $t0,0($key)
891 lis $mask80,0x8080 1062 lis $mask80,0x8080
1063 lwz $t1,4($key)
892 lis $mask1b,0x1b1b 1064 lis $mask1b,0x1b1b
893 addi $key,$key,16 1065 lwz $t2,8($key)
894 ori $mask80,$mask80,0x8080 1066 ori $mask80,$mask80,0x8080
1067 lwz $t3,12($key)
895 ori $mask1b,$mask1b,0x1b1b 1068 ori $mask1b,$mask1b,0x1b1b
1069 addi $key,$key,16
896___ 1070___
897$code.=<<___ if ($SIZE_T==8); 1071$code.=<<___ if ($SIZE_T==8);
898 insrdi $mask80,$mask80,32,0 1072 insrdi $mask80,$mask80,32,0
@@ -904,10 +1078,10 @@ $code.=<<___;
904Ldec_compact_loop: 1078Ldec_compact_loop:
905 xor $s0,$s0,$t0 1079 xor $s0,$s0,$t0
906 xor $s1,$s1,$t1 1080 xor $s1,$s1,$t1
907 xor $s2,$s2,$t2
908 xor $s3,$s3,$t3
909 rlwinm $acc00,$s0,`32-24`,24,31 1081 rlwinm $acc00,$s0,`32-24`,24,31
1082 xor $s2,$s2,$t2
910 rlwinm $acc01,$s1,`32-24`,24,31 1083 rlwinm $acc01,$s1,`32-24`,24,31
1084 xor $s3,$s3,$t3
911 rlwinm $acc02,$s2,`32-24`,24,31 1085 rlwinm $acc02,$s2,`32-24`,24,31
912 rlwinm $acc03,$s3,`32-24`,24,31 1086 rlwinm $acc03,$s3,`32-24`,24,31
913 rlwinm $acc04,$s3,`32-16`,24,31 1087 rlwinm $acc04,$s3,`32-16`,24,31
@@ -915,48 +1089,48 @@ Ldec_compact_loop:
915 rlwinm $acc06,$s1,`32-16`,24,31 1089 rlwinm $acc06,$s1,`32-16`,24,31
916 rlwinm $acc07,$s2,`32-16`,24,31 1090 rlwinm $acc07,$s2,`32-16`,24,31
917 lbzx $acc00,$Tbl1,$acc00 1091 lbzx $acc00,$Tbl1,$acc00
918 lbzx $acc01,$Tbl1,$acc01
919 rlwinm $acc08,$s2,`32-8`,24,31 1092 rlwinm $acc08,$s2,`32-8`,24,31
1093 lbzx $acc01,$Tbl1,$acc01
920 rlwinm $acc09,$s3,`32-8`,24,31 1094 rlwinm $acc09,$s3,`32-8`,24,31
921 lbzx $acc02,$Tbl1,$acc02 1095 lbzx $acc02,$Tbl1,$acc02
922 lbzx $acc03,$Tbl1,$acc03
923 rlwinm $acc10,$s0,`32-8`,24,31 1096 rlwinm $acc10,$s0,`32-8`,24,31
1097 lbzx $acc03,$Tbl1,$acc03
924 rlwinm $acc11,$s1,`32-8`,24,31 1098 rlwinm $acc11,$s1,`32-8`,24,31
925 lbzx $acc04,$Tbl1,$acc04 1099 lbzx $acc04,$Tbl1,$acc04
926 lbzx $acc05,$Tbl1,$acc05
927 rlwinm $acc12,$s1,`0`,24,31 1100 rlwinm $acc12,$s1,`0`,24,31
1101 lbzx $acc05,$Tbl1,$acc05
928 rlwinm $acc13,$s2,`0`,24,31 1102 rlwinm $acc13,$s2,`0`,24,31
929 lbzx $acc06,$Tbl1,$acc06 1103 lbzx $acc06,$Tbl1,$acc06
930 lbzx $acc07,$Tbl1,$acc07
931 rlwinm $acc14,$s3,`0`,24,31 1104 rlwinm $acc14,$s3,`0`,24,31
1105 lbzx $acc07,$Tbl1,$acc07
932 rlwinm $acc15,$s0,`0`,24,31 1106 rlwinm $acc15,$s0,`0`,24,31
933 lbzx $acc08,$Tbl1,$acc08 1107 lbzx $acc08,$Tbl1,$acc08
934 lbzx $acc09,$Tbl1,$acc09
935 rlwinm $s0,$acc00,24,0,7 1108 rlwinm $s0,$acc00,24,0,7
1109 lbzx $acc09,$Tbl1,$acc09
936 rlwinm $s1,$acc01,24,0,7 1110 rlwinm $s1,$acc01,24,0,7
937 lbzx $acc10,$Tbl1,$acc10 1111 lbzx $acc10,$Tbl1,$acc10
938 lbzx $acc11,$Tbl1,$acc11
939 rlwinm $s2,$acc02,24,0,7 1112 rlwinm $s2,$acc02,24,0,7
1113 lbzx $acc11,$Tbl1,$acc11
940 rlwinm $s3,$acc03,24,0,7 1114 rlwinm $s3,$acc03,24,0,7
941 lbzx $acc12,$Tbl1,$acc12 1115 lbzx $acc12,$Tbl1,$acc12
942 lbzx $acc13,$Tbl1,$acc13
943 rlwimi $s0,$acc04,16,8,15 1116 rlwimi $s0,$acc04,16,8,15
1117 lbzx $acc13,$Tbl1,$acc13
944 rlwimi $s1,$acc05,16,8,15 1118 rlwimi $s1,$acc05,16,8,15
945 lbzx $acc14,$Tbl1,$acc14 1119 lbzx $acc14,$Tbl1,$acc14
946 lbzx $acc15,$Tbl1,$acc15
947 rlwimi $s2,$acc06,16,8,15 1120 rlwimi $s2,$acc06,16,8,15
1121 lbzx $acc15,$Tbl1,$acc15
948 rlwimi $s3,$acc07,16,8,15 1122 rlwimi $s3,$acc07,16,8,15
949 rlwimi $s0,$acc08,8,16,23 1123 rlwimi $s0,$acc08,8,16,23
950 rlwimi $s1,$acc09,8,16,23 1124 rlwimi $s1,$acc09,8,16,23
951 rlwimi $s2,$acc10,8,16,23 1125 rlwimi $s2,$acc10,8,16,23
952 rlwimi $s3,$acc11,8,16,23 1126 rlwimi $s3,$acc11,8,16,23
953 lwz $t0,0($key) 1127 lwz $t0,0($key)
954 lwz $t1,4($key)
955 or $s0,$s0,$acc12 1128 or $s0,$s0,$acc12
1129 lwz $t1,4($key)
956 or $s1,$s1,$acc13 1130 or $s1,$s1,$acc13
957 lwz $t2,8($key) 1131 lwz $t2,8($key)
958 lwz $t3,12($key)
959 or $s2,$s2,$acc14 1132 or $s2,$s2,$acc14
1133 lwz $t3,12($key)
960 or $s3,$s3,$acc15 1134 or $s3,$s3,$acc15
961 1135
962 addi $key,$key,16 1136 addi $key,$key,16
@@ -1030,12 +1204,12 @@ $code.=<<___ if ($SIZE_T==4);
1030 and $acc02,$s2,$mask80 1204 and $acc02,$s2,$mask80
1031 and $acc03,$s3,$mask80 1205 and $acc03,$s3,$mask80
1032 srwi $acc04,$acc00,7 # r1>>7 1206 srwi $acc04,$acc00,7 # r1>>7
1033 srwi $acc05,$acc01,7
1034 srwi $acc06,$acc02,7
1035 srwi $acc07,$acc03,7
1036 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1207 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1208 srwi $acc05,$acc01,7
1037 andc $acc09,$s1,$mask80 1209 andc $acc09,$s1,$mask80
1210 srwi $acc06,$acc02,7
1038 andc $acc10,$s2,$mask80 1211 andc $acc10,$s2,$mask80
1212 srwi $acc07,$acc03,7
1039 andc $acc11,$s3,$mask80 1213 andc $acc11,$s3,$mask80
1040 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1214 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1041 sub $acc01,$acc01,$acc05 1215 sub $acc01,$acc01,$acc05
@@ -1059,12 +1233,12 @@ $code.=<<___ if ($SIZE_T==4);
1059 and $acc06,$acc02,$mask80 1233 and $acc06,$acc02,$mask80
1060 and $acc07,$acc03,$mask80 1234 and $acc07,$acc03,$mask80
1061 srwi $acc08,$acc04,7 # r1>>7 1235 srwi $acc08,$acc04,7 # r1>>7
1062 srwi $acc09,$acc05,7
1063 srwi $acc10,$acc06,7
1064 srwi $acc11,$acc07,7
1065 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1236 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1237 srwi $acc09,$acc05,7
1066 andc $acc13,$acc01,$mask80 1238 andc $acc13,$acc01,$mask80
1239 srwi $acc10,$acc06,7
1067 andc $acc14,$acc02,$mask80 1240 andc $acc14,$acc02,$mask80
1241 srwi $acc11,$acc07,7
1068 andc $acc15,$acc03,$mask80 1242 andc $acc15,$acc03,$mask80
1069 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1243 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1070 sub $acc05,$acc05,$acc09 1244 sub $acc05,$acc05,$acc09
@@ -1085,13 +1259,13 @@ $code.=<<___ if ($SIZE_T==4);
1085 1259
1086 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1260 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1087 and $acc09,$acc05,$mask80 1261 and $acc09,$acc05,$mask80
1088 and $acc10,$acc06,$mask80
1089 and $acc11,$acc07,$mask80
1090 srwi $acc12,$acc08,7 # r1>>7 1262 srwi $acc12,$acc08,7 # r1>>7
1263 and $acc10,$acc06,$mask80
1091 srwi $acc13,$acc09,7 1264 srwi $acc13,$acc09,7
1265 and $acc11,$acc07,$mask80
1092 srwi $acc14,$acc10,7 1266 srwi $acc14,$acc10,7
1093 srwi $acc15,$acc11,7
1094 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1267 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1268 srwi $acc15,$acc11,7
1095 sub $acc09,$acc09,$acc13 1269 sub $acc09,$acc09,$acc13
1096 sub $acc10,$acc10,$acc14 1270 sub $acc10,$acc10,$acc14
1097 sub $acc11,$acc11,$acc15 1271 sub $acc11,$acc11,$acc15
@@ -1124,10 +1298,10 @@ ___
1124$code.=<<___; 1298$code.=<<___;
1125 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1299 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1126 rotrwi $s1,$s1,8 1300 rotrwi $s1,$s1,8
1127 rotrwi $s2,$s2,8
1128 rotrwi $s3,$s3,8
1129 xor $s0,$s0,$acc00 # ^= r2^r0 1301 xor $s0,$s0,$acc00 # ^= r2^r0
1302 rotrwi $s2,$s2,8
1130 xor $s1,$s1,$acc01 1303 xor $s1,$s1,$acc01
1304 rotrwi $s3,$s3,8
1131 xor $s2,$s2,$acc02 1305 xor $s2,$s2,$acc02
1132 xor $s3,$s3,$acc03 1306 xor $s3,$s3,$acc03
1133 xor $acc00,$acc00,$acc08 1307 xor $acc00,$acc00,$acc08
@@ -1135,32 +1309,32 @@ $code.=<<___;
1135 xor $acc02,$acc02,$acc10 1309 xor $acc02,$acc02,$acc10
1136 xor $acc03,$acc03,$acc11 1310 xor $acc03,$acc03,$acc11
1137 xor $s0,$s0,$acc04 # ^= r4^r0 1311 xor $s0,$s0,$acc04 # ^= r4^r0
1138 xor $s1,$s1,$acc05
1139 xor $s2,$s2,$acc06
1140 xor $s3,$s3,$acc07
1141 rotrwi $acc00,$acc00,24 1312 rotrwi $acc00,$acc00,24
1313 xor $s1,$s1,$acc05
1142 rotrwi $acc01,$acc01,24 1314 rotrwi $acc01,$acc01,24
1315 xor $s2,$s2,$acc06
1143 rotrwi $acc02,$acc02,24 1316 rotrwi $acc02,$acc02,24
1317 xor $s3,$s3,$acc07
1144 rotrwi $acc03,$acc03,24 1318 rotrwi $acc03,$acc03,24
1145 xor $acc04,$acc04,$acc08 1319 xor $acc04,$acc04,$acc08
1146 xor $acc05,$acc05,$acc09 1320 xor $acc05,$acc05,$acc09
1147 xor $acc06,$acc06,$acc10 1321 xor $acc06,$acc06,$acc10
1148 xor $acc07,$acc07,$acc11 1322 xor $acc07,$acc07,$acc11
1149 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1323 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1150 xor $s1,$s1,$acc09
1151 xor $s2,$s2,$acc10
1152 xor $s3,$s3,$acc11
1153 rotrwi $acc04,$acc04,16 1324 rotrwi $acc04,$acc04,16
1325 xor $s1,$s1,$acc09
1154 rotrwi $acc05,$acc05,16 1326 rotrwi $acc05,$acc05,16
1327 xor $s2,$s2,$acc10
1155 rotrwi $acc06,$acc06,16 1328 rotrwi $acc06,$acc06,16
1329 xor $s3,$s3,$acc11
1156 rotrwi $acc07,$acc07,16 1330 rotrwi $acc07,$acc07,16
1157 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1331 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1158 xor $s1,$s1,$acc01
1159 xor $s2,$s2,$acc02
1160 xor $s3,$s3,$acc03
1161 rotrwi $acc08,$acc08,8 1332 rotrwi $acc08,$acc08,8
1333 xor $s1,$s1,$acc01
1162 rotrwi $acc09,$acc09,8 1334 rotrwi $acc09,$acc09,8
1335 xor $s2,$s2,$acc02
1163 rotrwi $acc10,$acc10,8 1336 rotrwi $acc10,$acc10,8
1337 xor $s3,$s3,$acc03
1164 rotrwi $acc11,$acc11,8 1338 rotrwi $acc11,$acc11,8
1165 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1339 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1166 xor $s1,$s1,$acc05 1340 xor $s1,$s1,$acc05
@@ -1179,7 +1353,9 @@ Ldec_compact_done:
1179 xor $s2,$s2,$t2 1353 xor $s2,$s2,$t2
1180 xor $s3,$s3,$t3 1354 xor $s3,$s3,$t3
1181 blr 1355 blr
1182.long 0 1356 .long 0
1357 .byte 0,12,0x14,0,0,0,0,0
1358
1183.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1359.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1184.align 7 1360.align 7
1185___ 1361___
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl b/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl
index 7e01889298..445a1e6762 100644
--- a/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-s390x.pl
@@ -44,12 +44,57 @@
44# Unlike previous version hardware support detection takes place only 44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds. 45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not 46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits. 47# for keys longer than 128 bits.
48# 48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement, 49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x, 50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized. 51# because software implementation was optimized.
52 52
53# May 2010.
54#
55# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
56# performance improvement over "generic" counter mode routine relying
57# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
58# to the fact that exact throughput value depends on current stack
59# frame alignment within 4KB page. In worst case you get ~75% of the
60# maximum, but *on average* it would be as much as ~98%. Meaning that
61# worst case is unlike, it's like hitting ravine on plateau.
62
63# November 2010.
64#
65# Adapt for -m31 build. If kernel supports what's called "highgprs"
66# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
67# instructions and achieve "64-bit" performance even in 31-bit legacy
68# application context. The feature is not specific to any particular
69# processor, as long as it's "z-CPU". Latter implies that the code
70# remains z/Architecture specific. On z990 it was measured to perform
71# 2x better than code generated by gcc 4.3.
72
73# December 2010.
74#
75# Add support for z196 "cipher message with counter" instruction.
76# Note however that it's disengaged, because it was measured to
77# perform ~12% worse than vanilla km-based code...
78
79# February 2011.
80#
81# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
82# instructions, which deliver ~70% improvement at 8KB block size over
83# vanilla km-based code, 37% - at most like 512-bytes block size.
84
85$flavour = shift;
86
87if ($flavour =~ /3[12]/) {
88 $SIZE_T=4;
89 $g="";
90} else {
91 $SIZE_T=8;
92 $g="g";
93}
94
95while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
96open STDOUT,">$output";
97
53$softonly=0; # allow hardware support 98$softonly=0; # allow hardware support
54 99
55$t0="%r0"; $mask="%r0"; 100$t0="%r0"; $mask="%r0";
@@ -69,6 +114,8 @@ $rounds="%r13";
69$ra="%r14"; 114$ra="%r14";
70$sp="%r15"; 115$sp="%r15";
71 116
117$stdframe=16*$SIZE_T+4*8;
118
72sub _data_word() 119sub _data_word()
73{ my $i; 120{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 121 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
@@ -210,7 +257,7 @@ $code.=<<___ if (!$softonly);
210.Lesoft: 257.Lesoft:
211___ 258___
212$code.=<<___; 259$code.=<<___;
213 stmg %r3,$ra,24($sp) 260 stm${g} %r3,$ra,3*$SIZE_T($sp)
214 261
215 llgf $s0,0($inp) 262 llgf $s0,0($inp)
216 llgf $s1,4($inp) 263 llgf $s1,4($inp)
@@ -220,20 +267,20 @@ $code.=<<___;
220 larl $tbl,AES_Te 267 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt 268 bras $ra,_s390x_AES_encrypt
222 269
223 lg $out,24($sp) 270 l${g} $out,3*$SIZE_T($sp)
224 st $s0,0($out) 271 st $s0,0($out)
225 st $s1,4($out) 272 st $s1,4($out)
226 st $s2,8($out) 273 st $s2,8($out)
227 st $s3,12($out) 274 st $s3,12($out)
228 275
229 lmg %r6,$ra,48($sp) 276 lm${g} %r6,$ra,6*$SIZE_T($sp)
230 br $ra 277 br $ra
231.size AES_encrypt,.-AES_encrypt 278.size AES_encrypt,.-AES_encrypt
232 279
233.type _s390x_AES_encrypt,\@function 280.type _s390x_AES_encrypt,\@function
234.align 16 281.align 16
235_s390x_AES_encrypt: 282_s390x_AES_encrypt:
236 stg $ra,152($sp) 283 st${g} $ra,15*$SIZE_T($sp)
237 x $s0,0($key) 284 x $s0,0($key)
238 x $s1,4($key) 285 x $s1,4($key)
239 x $s2,8($key) 286 x $s2,8($key)
@@ -397,7 +444,7 @@ _s390x_AES_encrypt:
397 or $s2,$i3 444 or $s2,$i3
398 or $s3,$t3 445 or $s3,$t3
399 446
400 lg $ra,152($sp) 447 l${g} $ra,15*$SIZE_T($sp)
401 xr $s0,$t0 448 xr $s0,$t0
402 xr $s1,$t2 449 xr $s1,$t2
403 x $s2,24($key) 450 x $s2,24($key)
@@ -536,7 +583,7 @@ $code.=<<___ if (!$softonly);
536.Ldsoft: 583.Ldsoft:
537___ 584___
538$code.=<<___; 585$code.=<<___;
539 stmg %r3,$ra,24($sp) 586 stm${g} %r3,$ra,3*$SIZE_T($sp)
540 587
541 llgf $s0,0($inp) 588 llgf $s0,0($inp)
542 llgf $s1,4($inp) 589 llgf $s1,4($inp)
@@ -546,20 +593,20 @@ $code.=<<___;
546 larl $tbl,AES_Td 593 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt 594 bras $ra,_s390x_AES_decrypt
548 595
549 lg $out,24($sp) 596 l${g} $out,3*$SIZE_T($sp)
550 st $s0,0($out) 597 st $s0,0($out)
551 st $s1,4($out) 598 st $s1,4($out)
552 st $s2,8($out) 599 st $s2,8($out)
553 st $s3,12($out) 600 st $s3,12($out)
554 601
555 lmg %r6,$ra,48($sp) 602 lm${g} %r6,$ra,6*$SIZE_T($sp)
556 br $ra 603 br $ra
557.size AES_decrypt,.-AES_decrypt 604.size AES_decrypt,.-AES_decrypt
558 605
559.type _s390x_AES_decrypt,\@function 606.type _s390x_AES_decrypt,\@function
560.align 16 607.align 16
561_s390x_AES_decrypt: 608_s390x_AES_decrypt:
562 stg $ra,152($sp) 609 st${g} $ra,15*$SIZE_T($sp)
563 x $s0,0($key) 610 x $s0,0($key)
564 x $s1,4($key) 611 x $s1,4($key)
565 x $s2,8($key) 612 x $s2,8($key)
@@ -703,7 +750,7 @@ _s390x_AES_decrypt:
703 nr $i1,$mask 750 nr $i1,$mask
704 nr $i2,$mask 751 nr $i2,$mask
705 752
706 lg $ra,152($sp) 753 l${g} $ra,15*$SIZE_T($sp)
707 or $s1,$t1 754 or $s1,$t1
708 l $t0,16($key) 755 l $t0,16($key)
709 l $t1,20($key) 756 l $t1,20($key)
@@ -732,14 +779,15 @@ ___
732$code.=<<___; 779$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits, 780# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) { 781# AES_KEY *key) {
735.globl AES_set_encrypt_key 782.globl private_AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function 783.type private_AES_set_encrypt_key,\@function
737.align 16 784.align 16
738AES_set_encrypt_key: 785private_AES_set_encrypt_key:
786_s390x_AES_set_encrypt_key:
739 lghi $t0,0 787 lghi $t0,0
740 clgr $inp,$t0 788 cl${g}r $inp,$t0
741 je .Lminus1 789 je .Lminus1
742 clgr $key,$t0 790 cl${g}r $key,$t0
743 je .Lminus1 791 je .Lminus1
744 792
745 lghi $t0,128 793 lghi $t0,128
@@ -789,7 +837,8 @@ $code.=<<___ if (!$softonly);
789 je 1f 837 je 1f
790 lg %r1,24($inp) 838 lg %r1,24($inp)
791 stg %r1,24($key) 839 stg %r1,24($key)
7921: st $bits,236($key) # save bits 8401: st $bits,236($key) # save bits [for debugging purposes]
841 lgr $t0,%r5
793 st %r5,240($key) # save km code 842 st %r5,240($key) # save km code
794 lghi %r2,0 843 lghi %r2,0
795 br %r14 844 br %r14
@@ -797,7 +846,7 @@ ___
797$code.=<<___; 846$code.=<<___;
798.align 16 847.align 16
799.Lekey_internal: 848.Lekey_internal:
800 stmg %r6,%r13,48($sp) # all non-volatile regs 849 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
801 850
802 larl $tbl,AES_Te+2048 851 larl $tbl,AES_Te+2048
803 852
@@ -857,8 +906,9 @@ $code.=<<___;
857 la $key,16($key) # key+=4 906 la $key,16($key) # key+=4
858 la $t3,4($t3) # i++ 907 la $t3,4($t3) # i++
859 brct $rounds,.L128_loop 908 brct $rounds,.L128_loop
909 lghi $t0,10
860 lghi %r2,0 910 lghi %r2,0
861 lmg %r6,%r13,48($sp) 911 lm${g} %r4,%r13,4*$SIZE_T($sp)
862 br $ra 912 br $ra
863 913
864.align 16 914.align 16
@@ -905,8 +955,9 @@ $code.=<<___;
905 st $s2,32($key) 955 st $s2,32($key)
906 st $s3,36($key) 956 st $s3,36($key)
907 brct $rounds,.L192_continue 957 brct $rounds,.L192_continue
958 lghi $t0,12
908 lghi %r2,0 959 lghi %r2,0
909 lmg %r6,%r13,48($sp) 960 lm${g} %r4,%r13,4*$SIZE_T($sp)
910 br $ra 961 br $ra
911 962
912.align 16 963.align 16
@@ -967,8 +1018,9 @@ $code.=<<___;
967 st $s2,40($key) 1018 st $s2,40($key)
968 st $s3,44($key) 1019 st $s3,44($key)
969 brct $rounds,.L256_continue 1020 brct $rounds,.L256_continue
1021 lghi $t0,14
970 lghi %r2,0 1022 lghi %r2,0
971 lmg %r6,%r13,48($sp) 1023 lm${g} %r4,%r13,4*$SIZE_T($sp)
972 br $ra 1024 br $ra
973 1025
974.align 16 1026.align 16
@@ -1011,42 +1063,34 @@ $code.=<<___;
1011.Lminus1: 1063.Lminus1:
1012 lghi %r2,-1 1064 lghi %r2,-1
1013 br $ra 1065 br $ra
1014.size AES_set_encrypt_key,.-AES_set_encrypt_key 1066.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
1015 1067
1016# void AES_set_decrypt_key(const unsigned char *in, int bits, 1068# void AES_set_decrypt_key(const unsigned char *in, int bits,
1017# AES_KEY *key) { 1069# AES_KEY *key) {
1018.globl AES_set_decrypt_key 1070.globl private_AES_set_decrypt_key
1019.type AES_set_decrypt_key,\@function 1071.type private_AES_set_decrypt_key,\@function
1020.align 16 1072.align 16
1021AES_set_decrypt_key: 1073private_AES_set_decrypt_key:
1022 stg $key,32($sp) # I rely on AES_set_encrypt_key to 1074 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1023 stg $ra,112($sp) # save non-volatile registers! 1075 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
1024 bras $ra,AES_set_encrypt_key 1076 bras $ra,_s390x_AES_set_encrypt_key
1025 lg $key,32($sp) 1077 #l${g} $key,4*$SIZE_T($sp)
1026 lg $ra,112($sp) 1078 l${g} $ra,14*$SIZE_T($sp)
1027 ltgr %r2,%r2 1079 ltgr %r2,%r2
1028 bnzr $ra 1080 bnzr $ra
1029___ 1081___
1030$code.=<<___ if (!$softonly); 1082$code.=<<___ if (!$softonly);
1031 l $t0,240($key) 1083 #l $t0,240($key)
1032 lhi $t1,16 1084 lhi $t1,16
1033 cr $t0,$t1 1085 cr $t0,$t1
1034 jl .Lgo 1086 jl .Lgo
1035 oill $t0,0x80 # set "decrypt" bit 1087 oill $t0,0x80 # set "decrypt" bit
1036 st $t0,240($key) 1088 st $t0,240($key)
1037 br $ra 1089 br $ra
1038
1039.align 16
1040.Ldkey_internal:
1041 stg $key,32($sp)
1042 stg $ra,40($sp)
1043 bras $ra,.Lekey_internal
1044 lg $key,32($sp)
1045 lg $ra,40($sp)
1046___ 1090___
1047$code.=<<___; 1091$code.=<<___;
1048 1092.align 16
1049.Lgo: llgf $rounds,240($key) 1093.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
1050 la $i1,0($key) 1094 la $i1,0($key)
1051 sllg $i2,$rounds,4 1095 sllg $i2,$rounds,4
1052 la $i2,0($i2,$key) 1096 la $i2,0($i2,$key)
@@ -1123,13 +1167,14 @@ $code.=<<___;
1123 la $key,4($key) 1167 la $key,4($key)
1124 brct $rounds,.Lmix 1168 brct $rounds,.Lmix
1125 1169
1126 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! 1170 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1127 lghi %r2,0 1171 lghi %r2,0
1128 br $ra 1172 br $ra
1129.size AES_set_decrypt_key,.-AES_set_decrypt_key 1173.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
1130___ 1174___
1131 1175
1132#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1176########################################################################
1177# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1133# size_t length, const AES_KEY *key, 1178# size_t length, const AES_KEY *key,
1134# unsigned char *ivec, const int enc) 1179# unsigned char *ivec, const int enc)
1135{ 1180{
@@ -1163,7 +1208,7 @@ $code.=<<___ if (!$softonly);
1163 l %r0,240($key) # load kmc code 1208 l %r0,240($key) # load kmc code
1164 lghi $key,15 # res=len%16, len-=res; 1209 lghi $key,15 # res=len%16, len-=res;
1165 ngr $key,$len 1210 ngr $key,$len
1166 slgr $len,$key 1211 sl${g}r $len,$key
1167 la %r1,16($sp) # parameter block - ivec || key 1212 la %r1,16($sp) # parameter block - ivec || key
1168 jz .Lkmc_truncated 1213 jz .Lkmc_truncated
1169 .long 0xb92f0042 # kmc %r4,%r2 1214 .long 0xb92f0042 # kmc %r4,%r2
@@ -1181,34 +1226,34 @@ $code.=<<___ if (!$softonly);
1181 tmll %r0,0x80 1226 tmll %r0,0x80
1182 jnz .Lkmc_truncated_dec 1227 jnz .Lkmc_truncated_dec
1183 lghi %r1,0 1228 lghi %r1,0
1184 stg %r1,128($sp) 1229 stg %r1,16*$SIZE_T($sp)
1185 stg %r1,136($sp) 1230 stg %r1,16*$SIZE_T+8($sp)
1186 bras %r1,1f 1231 bras %r1,1f
1187 mvc 128(1,$sp),0($inp) 1232 mvc 16*$SIZE_T(1,$sp),0($inp)
11881: ex $key,0(%r1) 12331: ex $key,0(%r1)
1189 la %r1,16($sp) # restore parameter block 1234 la %r1,16($sp) # restore parameter block
1190 la $inp,128($sp) 1235 la $inp,16*$SIZE_T($sp)
1191 lghi $len,16 1236 lghi $len,16
1192 .long 0xb92f0042 # kmc %r4,%r2 1237 .long 0xb92f0042 # kmc %r4,%r2
1193 j .Lkmc_done 1238 j .Lkmc_done
1194.align 16 1239.align 16
1195.Lkmc_truncated_dec: 1240.Lkmc_truncated_dec:
1196 stg $out,64($sp) 1241 st${g} $out,4*$SIZE_T($sp)
1197 la $out,128($sp) 1242 la $out,16*$SIZE_T($sp)
1198 lghi $len,16 1243 lghi $len,16
1199 .long 0xb92f0042 # kmc %r4,%r2 1244 .long 0xb92f0042 # kmc %r4,%r2
1200 lg $out,64($sp) 1245 l${g} $out,4*$SIZE_T($sp)
1201 bras %r1,2f 1246 bras %r1,2f
1202 mvc 0(1,$out),128($sp) 1247 mvc 0(1,$out),16*$SIZE_T($sp)
12032: ex $key,0(%r1) 12482: ex $key,0(%r1)
1204 j .Lkmc_done 1249 j .Lkmc_done
1205.align 16 1250.align 16
1206.Lcbc_software: 1251.Lcbc_software:
1207___ 1252___
1208$code.=<<___; 1253$code.=<<___;
1209 stmg $key,$ra,40($sp) 1254 stm${g} $key,$ra,5*$SIZE_T($sp)
1210 lhi %r0,0 1255 lhi %r0,0
1211 cl %r0,164($sp) 1256 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1212 je .Lcbc_decrypt 1257 je .Lcbc_decrypt
1213 1258
1214 larl $tbl,AES_Te 1259 larl $tbl,AES_Te
@@ -1219,10 +1264,10 @@ $code.=<<___;
1219 llgf $s3,12($ivp) 1264 llgf $s3,12($ivp)
1220 1265
1221 lghi $t0,16 1266 lghi $t0,16
1222 slgr $len,$t0 1267 sl${g}r $len,$t0
1223 brc 4,.Lcbc_enc_tail # if borrow 1268 brc 4,.Lcbc_enc_tail # if borrow
1224.Lcbc_enc_loop: 1269.Lcbc_enc_loop:
1225 stmg $inp,$out,16($sp) 1270 stm${g} $inp,$out,2*$SIZE_T($sp)
1226 x $s0,0($inp) 1271 x $s0,0($inp)
1227 x $s1,4($inp) 1272 x $s1,4($inp)
1228 x $s2,8($inp) 1273 x $s2,8($inp)
@@ -1231,7 +1276,7 @@ $code.=<<___;
1231 1276
1232 bras $ra,_s390x_AES_encrypt 1277 bras $ra,_s390x_AES_encrypt
1233 1278
1234 lmg $inp,$key,16($sp) 1279 lm${g} $inp,$key,2*$SIZE_T($sp)
1235 st $s0,0($out) 1280 st $s0,0($out)
1236 st $s1,4($out) 1281 st $s1,4($out)
1237 st $s2,8($out) 1282 st $s2,8($out)
@@ -1240,33 +1285,33 @@ $code.=<<___;
1240 la $inp,16($inp) 1285 la $inp,16($inp)
1241 la $out,16($out) 1286 la $out,16($out)
1242 lghi $t0,16 1287 lghi $t0,16
1243 ltgr $len,$len 1288 lt${g}r $len,$len
1244 jz .Lcbc_enc_done 1289 jz .Lcbc_enc_done
1245 slgr $len,$t0 1290 sl${g}r $len,$t0
1246 brc 4,.Lcbc_enc_tail # if borrow 1291 brc 4,.Lcbc_enc_tail # if borrow
1247 j .Lcbc_enc_loop 1292 j .Lcbc_enc_loop
1248.align 16 1293.align 16
1249.Lcbc_enc_done: 1294.Lcbc_enc_done:
1250 lg $ivp,48($sp) 1295 l${g} $ivp,6*$SIZE_T($sp)
1251 st $s0,0($ivp) 1296 st $s0,0($ivp)
1252 st $s1,4($ivp) 1297 st $s1,4($ivp)
1253 st $s2,8($ivp) 1298 st $s2,8($ivp)
1254 st $s3,12($ivp) 1299 st $s3,12($ivp)
1255 1300
1256 lmg %r7,$ra,56($sp) 1301 lm${g} %r7,$ra,7*$SIZE_T($sp)
1257 br $ra 1302 br $ra
1258 1303
1259.align 16 1304.align 16
1260.Lcbc_enc_tail: 1305.Lcbc_enc_tail:
1261 aghi $len,15 1306 aghi $len,15
1262 lghi $t0,0 1307 lghi $t0,0
1263 stg $t0,128($sp) 1308 stg $t0,16*$SIZE_T($sp)
1264 stg $t0,136($sp) 1309 stg $t0,16*$SIZE_T+8($sp)
1265 bras $t1,3f 1310 bras $t1,3f
1266 mvc 128(1,$sp),0($inp) 1311 mvc 16*$SIZE_T(1,$sp),0($inp)
12673: ex $len,0($t1) 13123: ex $len,0($t1)
1268 lghi $len,0 1313 lghi $len,0
1269 la $inp,128($sp) 1314 la $inp,16*$SIZE_T($sp)
1270 j .Lcbc_enc_loop 1315 j .Lcbc_enc_loop
1271 1316
1272.align 16 1317.align 16
@@ -1275,10 +1320,10 @@ $code.=<<___;
1275 1320
1276 lg $t0,0($ivp) 1321 lg $t0,0($ivp)
1277 lg $t1,8($ivp) 1322 lg $t1,8($ivp)
1278 stmg $t0,$t1,128($sp) 1323 stmg $t0,$t1,16*$SIZE_T($sp)
1279 1324
1280.Lcbc_dec_loop: 1325.Lcbc_dec_loop:
1281 stmg $inp,$out,16($sp) 1326 stm${g} $inp,$out,2*$SIZE_T($sp)
1282 llgf $s0,0($inp) 1327 llgf $s0,0($inp)
1283 llgf $s1,4($inp) 1328 llgf $s1,4($inp)
1284 llgf $s2,8($inp) 1329 llgf $s2,8($inp)
@@ -1287,7 +1332,7 @@ $code.=<<___;
1287 1332
1288 bras $ra,_s390x_AES_decrypt 1333 bras $ra,_s390x_AES_decrypt
1289 1334
1290 lmg $inp,$key,16($sp) 1335 lm${g} $inp,$key,2*$SIZE_T($sp)
1291 sllg $s0,$s0,32 1336 sllg $s0,$s0,32
1292 sllg $s2,$s2,32 1337 sllg $s2,$s2,32
1293 lr $s0,$s1 1338 lr $s0,$s1
@@ -1295,15 +1340,15 @@ $code.=<<___;
1295 1340
1296 lg $t0,0($inp) 1341 lg $t0,0($inp)
1297 lg $t1,8($inp) 1342 lg $t1,8($inp)
1298 xg $s0,128($sp) 1343 xg $s0,16*$SIZE_T($sp)
1299 xg $s2,136($sp) 1344 xg $s2,16*$SIZE_T+8($sp)
1300 lghi $s1,16 1345 lghi $s1,16
1301 slgr $len,$s1 1346 sl${g}r $len,$s1
1302 brc 4,.Lcbc_dec_tail # if borrow 1347 brc 4,.Lcbc_dec_tail # if borrow
1303 brc 2,.Lcbc_dec_done # if zero 1348 brc 2,.Lcbc_dec_done # if zero
1304 stg $s0,0($out) 1349 stg $s0,0($out)
1305 stg $s2,8($out) 1350 stg $s2,8($out)
1306 stmg $t0,$t1,128($sp) 1351 stmg $t0,$t1,16*$SIZE_T($sp)
1307 1352
1308 la $inp,16($inp) 1353 la $inp,16($inp)
1309 la $out,16($out) 1354 la $out,16($out)
@@ -1313,7 +1358,7 @@ $code.=<<___;
1313 stg $s0,0($out) 1358 stg $s0,0($out)
1314 stg $s2,8($out) 1359 stg $s2,8($out)
1315.Lcbc_dec_exit: 1360.Lcbc_dec_exit:
1316 lmg $ivp,$ra,48($sp) 1361 lm${g} %r6,$ra,6*$SIZE_T($sp)
1317 stmg $t0,$t1,0($ivp) 1362 stmg $t0,$t1,0($ivp)
1318 1363
1319 br $ra 1364 br $ra
@@ -1321,19 +1366,889 @@ $code.=<<___;
1321.align 16 1366.align 16
1322.Lcbc_dec_tail: 1367.Lcbc_dec_tail:
1323 aghi $len,15 1368 aghi $len,15
1324 stg $s0,128($sp) 1369 stg $s0,16*$SIZE_T($sp)
1325 stg $s2,136($sp) 1370 stg $s2,16*$SIZE_T+8($sp)
1326 bras $s1,4f 1371 bras $s1,4f
1327 mvc 0(1,$out),128($sp) 1372 mvc 0(1,$out),16*$SIZE_T($sp)
13284: ex $len,0($s1) 13734: ex $len,0($s1)
1329 j .Lcbc_dec_exit 1374 j .Lcbc_dec_exit
1330.size AES_cbc_encrypt,.-AES_cbc_encrypt 1375.size AES_cbc_encrypt,.-AES_cbc_encrypt
1331.comm OPENSSL_s390xcap_P,8,8 1376___
1377}
1378########################################################################
1379# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1380# size_t blocks, const AES_KEY *key,
1381# const unsigned char *ivec)
1382{
1383my $inp="%r2";
1384my $out="%r4"; # blocks and out are swapped
1385my $len="%r3";
1386my $key="%r5"; my $iv0="%r5";
1387my $ivp="%r6";
1388my $fp ="%r7";
1389
1390$code.=<<___;
1391.globl AES_ctr32_encrypt
1392.type AES_ctr32_encrypt,\@function
1393.align 16
1394AES_ctr32_encrypt:
1395 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1396 xgr %r4,%r3
1397 xgr %r3,%r4
1398 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1399___
1400$code.=<<___ if (!$softonly);
1401 l %r0,240($key)
1402 lhi %r1,16
1403 clr %r0,%r1
1404 jl .Lctr32_software
1405
1406 stm${g} %r6,$s3,6*$SIZE_T($sp)
1407
1408 slgr $out,$inp
1409 la %r1,0($key) # %r1 is permanent copy of $key
1410 lg $iv0,0($ivp) # load ivec
1411 lg $ivp,8($ivp)
1412
1413 # prepare and allocate stack frame at the top of 4K page
1414 # with 1K reserved for eventual signal handling
1415 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1416 lghi $s1,-4096
1417 algr $s0,$sp
1418 lgr $fp,$sp
1419 ngr $s0,$s1 # align at page boundary
1420 slgr $fp,$s0 # total buffer size
1421 lgr $s2,$sp
1422 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1423 slgr $fp,$s1 # deduct reservation to get usable buffer size
1424 # buffer size is at lest 256 and at most 3072+256-16
1425
1426 la $sp,1024($s0) # alloca
1427 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1428 st${g} $s2,0($sp) # back-chain
1429 st${g} $fp,$SIZE_T($sp)
1430
1431 slgr $len,$fp
1432 brc 1,.Lctr32_hw_switch # not zero, no borrow
1433 algr $fp,$len # input is shorter than allocated buffer
1434 lghi $len,0
1435 st${g} $fp,$SIZE_T($sp)
1436
1437.Lctr32_hw_switch:
1438___
1439$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower
1440 larl $s0,OPENSSL_s390xcap_P
1441 lg $s0,8($s0)
1442 tmhh $s0,0x0004 # check for message_security-assist-4
1443 jz .Lctr32_km_loop
1444
1445 llgfr $s0,%r0
1446 lgr $s1,%r1
1447 lghi %r0,0
1448 la %r1,16($sp)
1449 .long 0xb92d2042 # kmctr %r4,%r2,%r2
1450
1451 llihh %r0,0x8000 # check if kmctr supports the function code
1452 srlg %r0,%r0,0($s0)
1453 ng %r0,16($sp)
1454 lgr %r0,$s0
1455 lgr %r1,$s1
1456 jz .Lctr32_km_loop
1457
1458####### kmctr code
1459 algr $out,$inp # restore $out
1460 lgr $s1,$len # $s1 undertakes $len
1461 j .Lctr32_kmctr_loop
1462.align 16
1463.Lctr32_kmctr_loop:
1464 la $s2,16($sp)
1465 lgr $s3,$fp
1466.Lctr32_kmctr_prepare:
1467 stg $iv0,0($s2)
1468 stg $ivp,8($s2)
1469 la $s2,16($s2)
1470 ahi $ivp,1 # 32-bit increment, preserves upper half
1471 brct $s3,.Lctr32_kmctr_prepare
1472
1473 #la $inp,0($inp) # inp
1474 sllg $len,$fp,4 # len
1475 #la $out,0($out) # out
1476 la $s2,16($sp) # iv
1477 .long 0xb92da042 # kmctr $out,$s2,$inp
1478 brc 1,.-4 # pay attention to "partial completion"
1479
1480 slgr $s1,$fp
1481 brc 1,.Lctr32_kmctr_loop # not zero, no borrow
1482 algr $fp,$s1
1483 lghi $s1,0
1484 brc 4+1,.Lctr32_kmctr_loop # not zero
1485
1486 l${g} $sp,0($sp)
1487 lm${g} %r6,$s3,6*$SIZE_T($sp)
1488 br $ra
1489.align 16
1490___
1491$code.=<<___;
1492.Lctr32_km_loop:
1493 la $s2,16($sp)
1494 lgr $s3,$fp
1495.Lctr32_km_prepare:
1496 stg $iv0,0($s2)
1497 stg $ivp,8($s2)
1498 la $s2,16($s2)
1499 ahi $ivp,1 # 32-bit increment, preserves upper half
1500 brct $s3,.Lctr32_km_prepare
1501
1502 la $s0,16($sp) # inp
1503 sllg $s1,$fp,4 # len
1504 la $s2,16($sp) # out
1505 .long 0xb92e00a8 # km %r10,%r8
1506 brc 1,.-4 # pay attention to "partial completion"
1507
1508 la $s2,16($sp)
1509 lgr $s3,$fp
1510 slgr $s2,$inp
1511.Lctr32_km_xor:
1512 lg $s0,0($inp)
1513 lg $s1,8($inp)
1514 xg $s0,0($s2,$inp)
1515 xg $s1,8($s2,$inp)
1516 stg $s0,0($out,$inp)
1517 stg $s1,8($out,$inp)
1518 la $inp,16($inp)
1519 brct $s3,.Lctr32_km_xor
1520
1521 slgr $len,$fp
1522 brc 1,.Lctr32_km_loop # not zero, no borrow
1523 algr $fp,$len
1524 lghi $len,0
1525 brc 4+1,.Lctr32_km_loop # not zero
1526
1527 l${g} $s0,0($sp)
1528 l${g} $s1,$SIZE_T($sp)
1529 la $s2,16($sp)
1530.Lctr32_km_zap:
1531 stg $s0,0($s2)
1532 stg $s0,8($s2)
1533 la $s2,16($s2)
1534 brct $s1,.Lctr32_km_zap
1535
1536 la $sp,0($s0)
1537 lm${g} %r6,$s3,6*$SIZE_T($sp)
1538 br $ra
1539.align 16
1540.Lctr32_software:
1541___
1542$code.=<<___;
1543 stm${g} $key,$ra,5*$SIZE_T($sp)
1544 sl${g}r $inp,$out
1545 larl $tbl,AES_Te
1546 llgf $t1,12($ivp)
1547
1548.Lctr32_loop:
1549 stm${g} $inp,$out,2*$SIZE_T($sp)
1550 llgf $s0,0($ivp)
1551 llgf $s1,4($ivp)
1552 llgf $s2,8($ivp)
1553 lgr $s3,$t1
1554 st $t1,16*$SIZE_T($sp)
1555 lgr %r4,$key
1556
1557 bras $ra,_s390x_AES_encrypt
1558
1559 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1560 llgf $t1,16*$SIZE_T($sp)
1561 x $s0,0($inp,$out)
1562 x $s1,4($inp,$out)
1563 x $s2,8($inp,$out)
1564 x $s3,12($inp,$out)
1565 stm $s0,$s3,0($out)
1566
1567 la $out,16($out)
1568 ahi $t1,1 # 32-bit increment
1569 brct $len,.Lctr32_loop
1570
1571 lm${g} %r6,$ra,6*$SIZE_T($sp)
1572 br $ra
1573.size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1574___
1575}
1576
1577########################################################################
1578# void AES_xts_encrypt(const char *inp,char *out,size_t len,
1579# const AES_KEY *key1, const AES_KEY *key2,
1580# const unsigned char iv[16]);
1581#
1582{
1583my $inp="%r2";
1584my $out="%r4"; # len and out are swapped
1585my $len="%r3";
1586my $key1="%r5"; # $i1
1587my $key2="%r6"; # $i2
1588my $fp="%r7"; # $i3
1589my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
1590
1591$code.=<<___;
1592.type _s390x_xts_km,\@function
1593.align 16
1594_s390x_xts_km:
1595___
1596$code.=<<___ if(1);
1597 llgfr $s0,%r0 # put aside the function code
1598 lghi $s1,0x7f
1599 nr $s1,%r0
1600 lghi %r0,0 # query capability vector
1601 la %r1,2*$SIZE_T($sp)
1602 .long 0xb92e0042 # km %r4,%r2
1603 llihh %r1,0x8000
1604 srlg %r1,%r1,32($s1) # check for 32+function code
1605 ng %r1,2*$SIZE_T($sp)
1606 lgr %r0,$s0 # restore the function code
1607 la %r1,0($key1) # restore $key1
1608 jz .Lxts_km_vanilla
1609
1610 lmg $i2,$i3,$tweak($sp) # put aside the tweak value
1611 algr $out,$inp
1612
1613 oill %r0,32 # switch to xts function code
1614 aghi $s1,-18 #
1615 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
1616 la %r1,$tweak-16($sp)
1617 slgr %r1,$s1 # parameter block position
1618 lmg $s0,$s3,0($key1) # load 256 bits of key material,
1619 stmg $s0,$s3,0(%r1) # and copy it to parameter block.
1620 # yes, it contains junk and overlaps
1621 # with the tweak in 128-bit case.
1622 # it's done to avoid conditional
1623 # branch.
1624 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
1625
1626 .long 0xb92e0042 # km %r4,%r2
1627 brc 1,.-4 # pay attention to "partial completion"
1628
1629 lrvg $s0,$tweak+0($sp) # load the last tweak
1630 lrvg $s1,$tweak+8($sp)
1631 stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key
1632
1633 nill %r0,0xffdf # switch back to original function code
1634 la %r1,0($key1) # restore pointer to $key1
1635 slgr $out,$inp
1636
1637 llgc $len,2*$SIZE_T-1($sp)
1638 nill $len,0x0f # $len%=16
1639 br $ra
1640
1641.align 16
1642.Lxts_km_vanilla:
1643___
1644$code.=<<___;
1645 # prepare and allocate stack frame at the top of 4K page
1646 # with 1K reserved for eventual signal handling
1647 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1648 lghi $s1,-4096
1649 algr $s0,$sp
1650 lgr $fp,$sp
1651 ngr $s0,$s1 # align at page boundary
1652 slgr $fp,$s0 # total buffer size
1653 lgr $s2,$sp
1654 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1655 slgr $fp,$s1 # deduct reservation to get usable buffer size
1656 # buffer size is at lest 256 and at most 3072+256-16
1657
1658 la $sp,1024($s0) # alloca
1659 nill $fp,0xfff0 # round to 16*n
1660 st${g} $s2,0($sp) # back-chain
1661 nill $len,0xfff0 # redundant
1662 st${g} $fp,$SIZE_T($sp)
1663
1664 slgr $len,$fp
1665 brc 1,.Lxts_km_go # not zero, no borrow
1666 algr $fp,$len # input is shorter than allocated buffer
1667 lghi $len,0
1668 st${g} $fp,$SIZE_T($sp)
1669
1670.Lxts_km_go:
1671 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
1672 lrvg $s1,$tweak+8($s2)
1673
1674 la $s2,16($sp) # vector of ascending tweak values
1675 slgr $s2,$inp
1676 srlg $s3,$fp,4
1677 j .Lxts_km_start
1678
1679.Lxts_km_loop:
1680 la $s2,16($sp)
1681 slgr $s2,$inp
1682 srlg $s3,$fp,4
1683.Lxts_km_prepare:
1684 lghi $i1,0x87
1685 srag $i2,$s1,63 # broadcast upper bit
1686 ngr $i1,$i2 # rem
1687 srlg $i2,$s0,63 # carry bit from lower half
1688 sllg $s0,$s0,1
1689 sllg $s1,$s1,1
1690 xgr $s0,$i1
1691 ogr $s1,$i2
1692.Lxts_km_start:
1693 lrvgr $i1,$s0 # flip byte order
1694 lrvgr $i2,$s1
1695 stg $i1,0($s2,$inp)
1696 stg $i2,8($s2,$inp)
1697 xg $i1,0($inp)
1698 xg $i2,8($inp)
1699 stg $i1,0($out,$inp)
1700 stg $i2,8($out,$inp)
1701 la $inp,16($inp)
1702 brct $s3,.Lxts_km_prepare
1703
1704 slgr $inp,$fp # rewind $inp
1705 la $s2,0($out,$inp)
1706 lgr $s3,$fp
1707 .long 0xb92e00aa # km $s2,$s2
1708 brc 1,.-4 # pay attention to "partial completion"
1709
1710 la $s2,16($sp)
1711 slgr $s2,$inp
1712 srlg $s3,$fp,4
1713.Lxts_km_xor:
1714 lg $i1,0($out,$inp)
1715 lg $i2,8($out,$inp)
1716 xg $i1,0($s2,$inp)
1717 xg $i2,8($s2,$inp)
1718 stg $i1,0($out,$inp)
1719 stg $i2,8($out,$inp)
1720 la $inp,16($inp)
1721 brct $s3,.Lxts_km_xor
1722
1723 slgr $len,$fp
1724 brc 1,.Lxts_km_loop # not zero, no borrow
1725 algr $fp,$len
1726 lghi $len,0
1727 brc 4+1,.Lxts_km_loop # not zero
1728
1729 l${g} $i1,0($sp) # back-chain
1730 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
1731 la $i2,16($sp)
1732 srlg $fp,$fp,4
1733.Lxts_km_zap:
1734 stg $i1,0($i2)
1735 stg $i1,8($i2)
1736 la $i2,16($i2)
1737 brct $fp,.Lxts_km_zap
1738
1739 la $sp,0($i1)
1740 llgc $len,2*$SIZE_T-1($i1)
1741 nill $len,0x0f # $len%=16
1742 bzr $ra
1743
1744 # generate one more tweak...
1745 lghi $i1,0x87
1746 srag $i2,$s1,63 # broadcast upper bit
1747 ngr $i1,$i2 # rem
1748 srlg $i2,$s0,63 # carry bit from lower half
1749 sllg $s0,$s0,1
1750 sllg $s1,$s1,1
1751 xgr $s0,$i1
1752 ogr $s1,$i2
1753
1754 ltr $len,$len # clear zero flag
1755 br $ra
1756.size _s390x_xts_km,.-_s390x_xts_km
1757
1758.globl AES_xts_encrypt
1759.type AES_xts_encrypt,\@function
1760.align 16
1761AES_xts_encrypt:
1762 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1763 xgr %r4,%r3
1764 xgr %r3,%r4
1765___
1766$code.=<<___ if ($SIZE_T==4);
1767 llgfr $len,$len
1768___
1769$code.=<<___;
1770 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1771 srag $len,$len,4 # formally wrong, because it expands
1772 # sign byte, but who can afford asking
1773 # to process more than 2^63-1 bytes?
1774 # I use it, because it sets condition
1775 # code...
1776 bcr 8,$ra # abort if zero (i.e. less than 16)
1777___
1778$code.=<<___ if (!$softonly);
1779 llgf %r0,240($key2)
1780 lhi %r1,16
1781 clr %r0,%r1
1782 jl .Lxts_enc_software
1783
1784 stm${g} %r6,$s3,6*$SIZE_T($sp)
1785 st${g} $ra,14*$SIZE_T($sp)
1786
1787 sllg $len,$len,4 # $len&=~15
1788 slgr $out,$inp
1789
1790 # generate the tweak value
1791 l${g} $s3,$stdframe($sp) # pointer to iv
1792 la $s2,$tweak($sp)
1793 lmg $s0,$s1,0($s3)
1794 lghi $s3,16
1795 stmg $s0,$s1,0($s2)
1796 la %r1,0($key2) # $key2 is not needed anymore
1797 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1798 brc 1,.-4 # can this happen?
1799
1800 l %r0,240($key1)
1801 la %r1,0($key1) # $key1 is not needed anymore
1802 bras $ra,_s390x_xts_km
1803 jz .Lxts_enc_km_done
1804
1805 aghi $inp,-16 # take one step back
1806 la $i3,0($out,$inp) # put aside real $out
1807.Lxts_enc_km_steal:
1808 llgc $i1,16($inp)
1809 llgc $i2,0($out,$inp)
1810 stc $i1,0($out,$inp)
1811 stc $i2,16($out,$inp)
1812 la $inp,1($inp)
1813 brct $len,.Lxts_enc_km_steal
1814
1815 la $s2,0($i3)
1816 lghi $s3,16
1817 lrvgr $i1,$s0 # flip byte order
1818 lrvgr $i2,$s1
1819 xg $i1,0($s2)
1820 xg $i2,8($s2)
1821 stg $i1,0($s2)
1822 stg $i2,8($s2)
1823 .long 0xb92e00aa # km $s2,$s2
1824 brc 1,.-4 # can this happen?
1825 lrvgr $i1,$s0 # flip byte order
1826 lrvgr $i2,$s1
1827 xg $i1,0($i3)
1828 xg $i2,8($i3)
1829 stg $i1,0($i3)
1830 stg $i2,8($i3)
1831
1832.Lxts_enc_km_done:
1833 l${g} $ra,14*$SIZE_T($sp)
1834 st${g} $sp,$tweak($sp) # wipe tweak
1835 st${g} $sp,$tweak($sp)
1836 lm${g} %r6,$s3,6*$SIZE_T($sp)
1837 br $ra
1838.align 16
1839.Lxts_enc_software:
1840___
1841$code.=<<___;
1842 stm${g} %r6,$ra,6*$SIZE_T($sp)
1843
1844 slgr $out,$inp
1845
1846 xgr $s0,$s0 # clear upper half
1847 xgr $s1,$s1
1848 lrv $s0,$stdframe+4($sp) # load secno
1849 lrv $s1,$stdframe+0($sp)
1850 xgr $s2,$s2
1851 xgr $s3,$s3
1852 stm${g} %r2,%r5,2*$SIZE_T($sp)
1853 la $key,0($key2)
1854 larl $tbl,AES_Te
1855 bras $ra,_s390x_AES_encrypt # generate the tweak
1856 lm${g} %r2,%r5,2*$SIZE_T($sp)
1857 stm $s0,$s3,$tweak($sp) # save the tweak
1858 j .Lxts_enc_enter
1859
1860.align 16
1861.Lxts_enc_loop:
1862 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1863 lrvg $s3,$tweak+8($sp)
1864 lghi %r1,0x87
1865 srag %r0,$s3,63 # broadcast upper bit
1866 ngr %r1,%r0 # rem
1867 srlg %r0,$s1,63 # carry bit from lower half
1868 sllg $s1,$s1,1
1869 sllg $s3,$s3,1
1870 xgr $s1,%r1
1871 ogr $s3,%r0
1872 lrvgr $s1,$s1 # flip byte order
1873 lrvgr $s3,$s3
1874 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1875 stg $s1,$tweak+0($sp) # save the tweak
1876 llgfr $s1,$s1
1877 srlg $s2,$s3,32
1878 stg $s3,$tweak+8($sp)
1879 llgfr $s3,$s3
1880 la $inp,16($inp) # $inp+=16
1881.Lxts_enc_enter:
1882 x $s0,0($inp) # ^=*($inp)
1883 x $s1,4($inp)
1884 x $s2,8($inp)
1885 x $s3,12($inp)
1886 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
1887 la $key,0($key1)
1888 bras $ra,_s390x_AES_encrypt
1889 lm${g} %r2,%r5,2*$SIZE_T($sp)
1890 x $s0,$tweak+0($sp) # ^=tweak
1891 x $s1,$tweak+4($sp)
1892 x $s2,$tweak+8($sp)
1893 x $s3,$tweak+12($sp)
1894 st $s0,0($out,$inp)
1895 st $s1,4($out,$inp)
1896 st $s2,8($out,$inp)
1897 st $s3,12($out,$inp)
1898 brct${g} $len,.Lxts_enc_loop
1899
1900 llgc $len,`2*$SIZE_T-1`($sp)
1901 nill $len,0x0f # $len%16
1902 jz .Lxts_enc_done
1903
1904 la $i3,0($inp,$out) # put aside real $out
1905.Lxts_enc_steal:
1906 llgc %r0,16($inp)
1907 llgc %r1,0($out,$inp)
1908 stc %r0,0($out,$inp)
1909 stc %r1,16($out,$inp)
1910 la $inp,1($inp)
1911 brct $len,.Lxts_enc_steal
1912 la $out,0($i3) # restore real $out
1913
1914 # generate last tweak...
1915 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1916 lrvg $s3,$tweak+8($sp)
1917 lghi %r1,0x87
1918 srag %r0,$s3,63 # broadcast upper bit
1919 ngr %r1,%r0 # rem
1920 srlg %r0,$s1,63 # carry bit from lower half
1921 sllg $s1,$s1,1
1922 sllg $s3,$s3,1
1923 xgr $s1,%r1
1924 ogr $s3,%r0
1925 lrvgr $s1,$s1 # flip byte order
1926 lrvgr $s3,$s3
1927 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1928 stg $s1,$tweak+0($sp) # save the tweak
1929 llgfr $s1,$s1
1930 srlg $s2,$s3,32
1931 stg $s3,$tweak+8($sp)
1932 llgfr $s3,$s3
1933
1934 x $s0,0($out) # ^=*(inp)|stolen cipther-text
1935 x $s1,4($out)
1936 x $s2,8($out)
1937 x $s3,12($out)
1938 st${g} $out,4*$SIZE_T($sp)
1939 la $key,0($key1)
1940 bras $ra,_s390x_AES_encrypt
1941 l${g} $out,4*$SIZE_T($sp)
1942 x $s0,`$tweak+0`($sp) # ^=tweak
1943 x $s1,`$tweak+4`($sp)
1944 x $s2,`$tweak+8`($sp)
1945 x $s3,`$tweak+12`($sp)
1946 st $s0,0($out)
1947 st $s1,4($out)
1948 st $s2,8($out)
1949 st $s3,12($out)
1950
1951.Lxts_enc_done:
1952 stg $sp,$tweak+0($sp) # wipe tweak
1953 stg $sp,$twesk+8($sp)
1954 lm${g} %r6,$ra,6*$SIZE_T($sp)
1955 br $ra
1956.size AES_xts_encrypt,.-AES_xts_encrypt
1957___
1958# void AES_xts_decrypt(const char *inp,char *out,size_t len,
1959# const AES_KEY *key1, const AES_KEY *key2,u64 secno);
1960#
1961$code.=<<___;
1962.globl AES_xts_decrypt
1963.type AES_xts_decrypt,\@function
1964.align 16
1965AES_xts_decrypt:
1966 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1967 xgr %r4,%r3
1968 xgr %r3,%r4
1969___
1970$code.=<<___ if ($SIZE_T==4);
1971 llgfr $len,$len
1972___
1973$code.=<<___;
1974 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1975 aghi $len,-16
1976 bcr 4,$ra # abort if less than zero. formally
1977 # wrong, because $len is unsigned,
1978 # but who can afford asking to
1979 # process more than 2^63-1 bytes?
1980 tmll $len,0x0f
1981 jnz .Lxts_dec_proceed
1982 aghi $len,16
1983.Lxts_dec_proceed:
1984___
1985$code.=<<___ if (!$softonly);
1986 llgf %r0,240($key2)
1987 lhi %r1,16
1988 clr %r0,%r1
1989 jl .Lxts_dec_software
1990
1991 stm${g} %r6,$s3,6*$SIZE_T($sp)
1992 st${g} $ra,14*$SIZE_T($sp)
1993
1994 nill $len,0xfff0 # $len&=~15
1995 slgr $out,$inp
1996
1997 # generate the tweak value
1998 l${g} $s3,$stdframe($sp) # pointer to iv
1999 la $s2,$tweak($sp)
2000 lmg $s0,$s1,0($s3)
2001 lghi $s3,16
2002 stmg $s0,$s1,0($s2)
2003 la %r1,0($key2) # $key2 is not needed past this point
2004 .long 0xb92e00aa # km $s2,$s2, generate the tweak
2005 brc 1,.-4 # can this happen?
2006
2007 l %r0,240($key1)
2008 la %r1,0($key1) # $key1 is not needed anymore
2009
2010 ltgr $len,$len
2011 jz .Lxts_dec_km_short
2012 bras $ra,_s390x_xts_km
2013 jz .Lxts_dec_km_done
2014
2015 lrvgr $s2,$s0 # make copy in reverse byte order
2016 lrvgr $s3,$s1
2017 j .Lxts_dec_km_2ndtweak
2018
2019.Lxts_dec_km_short:
2020 llgc $len,`2*$SIZE_T-1`($sp)
2021 nill $len,0x0f # $len%=16
2022 lrvg $s0,$tweak+0($sp) # load the tweak
2023 lrvg $s1,$tweak+8($sp)
2024 lrvgr $s2,$s0 # make copy in reverse byte order
2025 lrvgr $s3,$s1
2026
2027.Lxts_dec_km_2ndtweak:
2028 lghi $i1,0x87
2029 srag $i2,$s1,63 # broadcast upper bit
2030 ngr $i1,$i2 # rem
2031 srlg $i2,$s0,63 # carry bit from lower half
2032 sllg $s0,$s0,1
2033 sllg $s1,$s1,1
2034 xgr $s0,$i1
2035 ogr $s1,$i2
2036 lrvgr $i1,$s0 # flip byte order
2037 lrvgr $i2,$s1
2038
2039 xg $i1,0($inp)
2040 xg $i2,8($inp)
2041 stg $i1,0($out,$inp)
2042 stg $i2,8($out,$inp)
2043 la $i2,0($out,$inp)
2044 lghi $i3,16
2045 .long 0xb92e0066 # km $i2,$i2
2046 brc 1,.-4 # can this happen?
2047 lrvgr $i1,$s0
2048 lrvgr $i2,$s1
2049 xg $i1,0($out,$inp)
2050 xg $i2,8($out,$inp)
2051 stg $i1,0($out,$inp)
2052 stg $i2,8($out,$inp)
2053
2054 la $i3,0($out,$inp) # put aside real $out
2055.Lxts_dec_km_steal:
2056 llgc $i1,16($inp)
2057 llgc $i2,0($out,$inp)
2058 stc $i1,0($out,$inp)
2059 stc $i2,16($out,$inp)
2060 la $inp,1($inp)
2061 brct $len,.Lxts_dec_km_steal
2062
2063 lgr $s0,$s2
2064 lgr $s1,$s3
2065 xg $s0,0($i3)
2066 xg $s1,8($i3)
2067 stg $s0,0($i3)
2068 stg $s1,8($i3)
2069 la $s0,0($i3)
2070 lghi $s1,16
2071 .long 0xb92e0088 # km $s0,$s0
2072 brc 1,.-4 # can this happen?
2073 xg $s2,0($i3)
2074 xg $s3,8($i3)
2075 stg $s2,0($i3)
2076 stg $s3,8($i3)
2077.Lxts_dec_km_done:
2078 l${g} $ra,14*$SIZE_T($sp)
2079 st${g} $sp,$tweak($sp) # wipe tweak
2080 st${g} $sp,$tweak($sp)
2081 lm${g} %r6,$s3,6*$SIZE_T($sp)
2082 br $ra
2083.align 16
2084.Lxts_dec_software:
2085___
2086$code.=<<___;
2087 stm${g} %r6,$ra,6*$SIZE_T($sp)
2088
2089 srlg $len,$len,4
2090 slgr $out,$inp
2091
2092 xgr $s0,$s0 # clear upper half
2093 xgr $s1,$s1
2094 lrv $s0,$stdframe+4($sp) # load secno
2095 lrv $s1,$stdframe+0($sp)
2096 xgr $s2,$s2
2097 xgr $s3,$s3
2098 stm${g} %r2,%r5,2*$SIZE_T($sp)
2099 la $key,0($key2)
2100 larl $tbl,AES_Te
2101 bras $ra,_s390x_AES_encrypt # generate the tweak
2102 lm${g} %r2,%r5,2*$SIZE_T($sp)
2103 larl $tbl,AES_Td
2104 lt${g}r $len,$len
2105 stm $s0,$s3,$tweak($sp) # save the tweak
2106 jz .Lxts_dec_short
2107 j .Lxts_dec_enter
2108
2109.align 16
2110.Lxts_dec_loop:
2111 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2112 lrvg $s3,$tweak+8($sp)
2113 lghi %r1,0x87
2114 srag %r0,$s3,63 # broadcast upper bit
2115 ngr %r1,%r0 # rem
2116 srlg %r0,$s1,63 # carry bit from lower half
2117 sllg $s1,$s1,1
2118 sllg $s3,$s3,1
2119 xgr $s1,%r1
2120 ogr $s3,%r0
2121 lrvgr $s1,$s1 # flip byte order
2122 lrvgr $s3,$s3
2123 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2124 stg $s1,$tweak+0($sp) # save the tweak
2125 llgfr $s1,$s1
2126 srlg $s2,$s3,32
2127 stg $s3,$tweak+8($sp)
2128 llgfr $s3,$s3
2129.Lxts_dec_enter:
2130 x $s0,0($inp) # tweak^=*(inp)
2131 x $s1,4($inp)
2132 x $s2,8($inp)
2133 x $s3,12($inp)
2134 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
2135 la $key,0($key1)
2136 bras $ra,_s390x_AES_decrypt
2137 lm${g} %r2,%r5,2*$SIZE_T($sp)
2138 x $s0,$tweak+0($sp) # ^=tweak
2139 x $s1,$tweak+4($sp)
2140 x $s2,$tweak+8($sp)
2141 x $s3,$tweak+12($sp)
2142 st $s0,0($out,$inp)
2143 st $s1,4($out,$inp)
2144 st $s2,8($out,$inp)
2145 st $s3,12($out,$inp)
2146 la $inp,16($inp)
2147 brct${g} $len,.Lxts_dec_loop
2148
2149 llgc $len,`2*$SIZE_T-1`($sp)
2150 nill $len,0x0f # $len%16
2151 jz .Lxts_dec_done
2152
2153 # generate pair of tweaks...
2154 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2155 lrvg $s3,$tweak+8($sp)
2156 lghi %r1,0x87
2157 srag %r0,$s3,63 # broadcast upper bit
2158 ngr %r1,%r0 # rem
2159 srlg %r0,$s1,63 # carry bit from lower half
2160 sllg $s1,$s1,1
2161 sllg $s3,$s3,1
2162 xgr $s1,%r1
2163 ogr $s3,%r0
2164 lrvgr $i2,$s1 # flip byte order
2165 lrvgr $i3,$s3
2166 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
2167 j .Lxts_dec_2ndtweak
2168
2169.align 16
2170.Lxts_dec_short:
2171 llgc $len,`2*$SIZE_T-1`($sp)
2172 nill $len,0x0f # $len%16
2173 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2174 lrvg $s3,$tweak+8($sp)
2175.Lxts_dec_2ndtweak:
2176 lghi %r1,0x87
2177 srag %r0,$s3,63 # broadcast upper bit
2178 ngr %r1,%r0 # rem
2179 srlg %r0,$s1,63 # carry bit from lower half
2180 sllg $s1,$s1,1
2181 sllg $s3,$s3,1
2182 xgr $s1,%r1
2183 ogr $s3,%r0
2184 lrvgr $s1,$s1 # flip byte order
2185 lrvgr $s3,$s3
2186 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2187 stg $s1,$tweak-16+0($sp) # save the 2nd tweak
2188 llgfr $s1,$s1
2189 srlg $s2,$s3,32
2190 stg $s3,$tweak-16+8($sp)
2191 llgfr $s3,$s3
2192
2193 x $s0,0($inp) # tweak_the_2nd^=*(inp)
2194 x $s1,4($inp)
2195 x $s2,8($inp)
2196 x $s3,12($inp)
2197 stm${g} %r2,%r3,2*$SIZE_T($sp)
2198 la $key,0($key1)
2199 bras $ra,_s390x_AES_decrypt
2200 lm${g} %r2,%r5,2*$SIZE_T($sp)
2201 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
2202 x $s1,$tweak-16+4($sp)
2203 x $s2,$tweak-16+8($sp)
2204 x $s3,$tweak-16+12($sp)
2205 st $s0,0($out,$inp)
2206 st $s1,4($out,$inp)
2207 st $s2,8($out,$inp)
2208 st $s3,12($out,$inp)
2209
2210 la $i3,0($out,$inp) # put aside real $out
2211.Lxts_dec_steal:
2212 llgc %r0,16($inp)
2213 llgc %r1,0($out,$inp)
2214 stc %r0,0($out,$inp)
2215 stc %r1,16($out,$inp)
2216 la $inp,1($inp)
2217 brct $len,.Lxts_dec_steal
2218 la $out,0($i3) # restore real $out
2219
2220 lm $s0,$s3,$tweak($sp) # load the 1st tweak
2221 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
2222 x $s1,4($out)
2223 x $s2,8($out)
2224 x $s3,12($out)
2225 st${g} $out,4*$SIZE_T($sp)
2226 la $key,0($key1)
2227 bras $ra,_s390x_AES_decrypt
2228 l${g} $out,4*$SIZE_T($sp)
2229 x $s0,$tweak+0($sp) # ^=tweak
2230 x $s1,$tweak+4($sp)
2231 x $s2,$tweak+8($sp)
2232 x $s3,$tweak+12($sp)
2233 st $s0,0($out)
2234 st $s1,4($out)
2235 st $s2,8($out)
2236 st $s3,12($out)
2237 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
2238 stg $sp,$tweak-16+8($sp)
2239.Lxts_dec_done:
2240 stg $sp,$tweak+0($sp) # wipe tweak
2241 stg $sp,$twesk+8($sp)
2242 lm${g} %r6,$ra,6*$SIZE_T($sp)
2243 br $ra
2244.size AES_xts_decrypt,.-AES_xts_decrypt
1332___ 2245___
1333} 2246}
1334$code.=<<___; 2247$code.=<<___;
1335.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 2248.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2249.comm OPENSSL_s390xcap_P,16,8
1336___ 2250___
1337 2251
1338$code =~ s/\`([^\`]*)\`/eval $1/gem; 2252$code =~ s/\`([^\`]*)\`/eval $1/gem;
1339print $code; 2253print $code;
2254close STDOUT; # force flush
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl b/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl
index c57b3a2d6d..403c4d1290 100755
--- a/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-sparcv9.pl
@@ -1176,6 +1176,7 @@ ___
1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have 1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1177# undesired effect, so just omit them and sacrifice some portion of 1177# undesired effect, so just omit them and sacrifice some portion of
1178# percent in performance... 1178# percent in performance...
1179$code =~ s/fmovs.*$//gem; 1179$code =~ s/fmovs.*$//gm;
1180 1180
1181print $code; 1181print $code;
1182close STDOUT; # ensure flush
diff --git a/src/lib/libssl/src/crypto/aes/asm/aes-x86_64.pl b/src/lib/libssl/src/crypto/aes/asm/aes-x86_64.pl
index a545e892ae..48fa857d5b 100755
--- a/src/lib/libssl/src/crypto/aes/asm/aes-x86_64.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aes-x86_64.pl
@@ -588,6 +588,9 @@ $code.=<<___;
588.globl AES_encrypt 588.globl AES_encrypt
589.type AES_encrypt,\@function,3 589.type AES_encrypt,\@function,3
590.align 16 590.align 16
591.globl asm_AES_encrypt
592.hidden asm_AES_encrypt
593asm_AES_encrypt:
591AES_encrypt: 594AES_encrypt:
592 push %rbx 595 push %rbx
593 push %rbp 596 push %rbp
@@ -1184,6 +1187,9 @@ $code.=<<___;
1184.globl AES_decrypt 1187.globl AES_decrypt
1185.type AES_decrypt,\@function,3 1188.type AES_decrypt,\@function,3
1186.align 16 1189.align 16
1190.globl asm_AES_decrypt
1191.hidden asm_AES_decrypt
1192asm_AES_decrypt:
1187AES_decrypt: 1193AES_decrypt:
1188 push %rbx 1194 push %rbx
1189 push %rbp 1195 push %rbp
@@ -1277,13 +1283,13 @@ $code.=<<___;
1277___ 1283___
1278} 1284}
1279 1285
1280# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 1286# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1281# AES_KEY *key) 1287# AES_KEY *key)
1282$code.=<<___; 1288$code.=<<___;
1283.globl AES_set_encrypt_key 1289.globl private_AES_set_encrypt_key
1284.type AES_set_encrypt_key,\@function,3 1290.type private_AES_set_encrypt_key,\@function,3
1285.align 16 1291.align 16
1286AES_set_encrypt_key: 1292private_AES_set_encrypt_key:
1287 push %rbx 1293 push %rbx
1288 push %rbp 1294 push %rbp
1289 push %r12 # redundant, but allows to share 1295 push %r12 # redundant, but allows to share
@@ -1304,7 +1310,7 @@ AES_set_encrypt_key:
1304 add \$56,%rsp 1310 add \$56,%rsp
1305.Lenc_key_epilogue: 1311.Lenc_key_epilogue:
1306 ret 1312 ret
1307.size AES_set_encrypt_key,.-AES_set_encrypt_key 1313.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
1308 1314
1309.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent 1315.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
1310.align 16 1316.align 16
@@ -1547,13 +1553,13 @@ $code.=<<___;
1547___ 1553___
1548} 1554}
1549 1555
1550# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 1556# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1551# AES_KEY *key) 1557# AES_KEY *key)
1552$code.=<<___; 1558$code.=<<___;
1553.globl AES_set_decrypt_key 1559.globl private_AES_set_decrypt_key
1554.type AES_set_decrypt_key,\@function,3 1560.type private_AES_set_decrypt_key,\@function,3
1555.align 16 1561.align 16
1556AES_set_decrypt_key: 1562private_AES_set_decrypt_key:
1557 push %rbx 1563 push %rbx
1558 push %rbp 1564 push %rbp
1559 push %r12 1565 push %r12
@@ -1622,7 +1628,7 @@ $code.=<<___;
1622 add \$56,%rsp 1628 add \$56,%rsp
1623.Ldec_key_epilogue: 1629.Ldec_key_epilogue:
1624 ret 1630 ret
1625.size AES_set_decrypt_key,.-AES_set_decrypt_key 1631.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
1626___ 1632___
1627 1633
1628# void AES_cbc_encrypt (const void char *inp, unsigned char *out, 1634# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
@@ -1648,6 +1654,9 @@ $code.=<<___;
1648.type AES_cbc_encrypt,\@function,6 1654.type AES_cbc_encrypt,\@function,6
1649.align 16 1655.align 16
1650.extern OPENSSL_ia32cap_P 1656.extern OPENSSL_ia32cap_P
1657.globl asm_AES_cbc_encrypt
1658.hidden asm_AES_cbc_encrypt
1659asm_AES_cbc_encrypt:
1651AES_cbc_encrypt: 1660AES_cbc_encrypt:
1652 cmp \$0,%rdx # check length 1661 cmp \$0,%rdx # check length
1653 je .Lcbc_epilogue 1662 je .Lcbc_epilogue
@@ -2766,13 +2775,13 @@ cbc_se_handler:
2766 .rva .LSEH_end_AES_decrypt 2775 .rva .LSEH_end_AES_decrypt
2767 .rva .LSEH_info_AES_decrypt 2776 .rva .LSEH_info_AES_decrypt
2768 2777
2769 .rva .LSEH_begin_AES_set_encrypt_key 2778 .rva .LSEH_begin_private_AES_set_encrypt_key
2770 .rva .LSEH_end_AES_set_encrypt_key 2779 .rva .LSEH_end_private_AES_set_encrypt_key
2771 .rva .LSEH_info_AES_set_encrypt_key 2780 .rva .LSEH_info_private_AES_set_encrypt_key
2772 2781
2773 .rva .LSEH_begin_AES_set_decrypt_key 2782 .rva .LSEH_begin_private_AES_set_decrypt_key
2774 .rva .LSEH_end_AES_set_decrypt_key 2783 .rva .LSEH_end_private_AES_set_decrypt_key
2775 .rva .LSEH_info_AES_set_decrypt_key 2784 .rva .LSEH_info_private_AES_set_decrypt_key
2776 2785
2777 .rva .LSEH_begin_AES_cbc_encrypt 2786 .rva .LSEH_begin_AES_cbc_encrypt
2778 .rva .LSEH_end_AES_cbc_encrypt 2787 .rva .LSEH_end_AES_cbc_encrypt
@@ -2788,11 +2797,11 @@ cbc_se_handler:
2788 .byte 9,0,0,0 2797 .byte 9,0,0,0
2789 .rva block_se_handler 2798 .rva block_se_handler
2790 .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] 2799 .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
2791.LSEH_info_AES_set_encrypt_key: 2800.LSEH_info_private_AES_set_encrypt_key:
2792 .byte 9,0,0,0 2801 .byte 9,0,0,0
2793 .rva key_se_handler 2802 .rva key_se_handler
2794 .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] 2803 .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[]
2795.LSEH_info_AES_set_decrypt_key: 2804.LSEH_info_private_AES_set_decrypt_key:
2796 .byte 9,0,0,0 2805 .byte 9,0,0,0
2797 .rva key_se_handler 2806 .rva key_se_handler
2798 .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] 2807 .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[]
diff --git a/src/lib/libssl/src/crypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libssl/src/crypto/aes/asm/aesni-sha1-x86_64.pl
new file mode 100644
index 0000000000..c6f6b3334a
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aesni-sha1-x86_64.pl
@@ -0,0 +1,1249 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# June 2011
11#
12# This is AESNI-CBC+SHA1 "stitch" implementation. The idea, as spelled
13# in http://download.intel.com/design/intarch/papers/323686.pdf, is
14# that since AESNI-CBC encrypt exhibit *very* low instruction-level
15# parallelism, interleaving it with another algorithm would allow to
16# utilize processor resources better and achieve better performance.
17# SHA1 instruction sequences(*) are taken from sha1-x86_64.pl and
18# AESNI code is weaved into it. Below are performance numbers in
19# cycles per processed byte, less is better, for standalone AESNI-CBC
20# encrypt, sum of the latter and standalone SHA1, and "stitched"
21# subroutine:
22#
23# AES-128-CBC +SHA1 stitch gain
24# Westmere 3.77[+5.6] 9.37 6.65 +41%
25# Sandy Bridge 5.05[+5.2(6.3)] 10.25(11.35) 6.16(7.08) +67%(+60%)
26#
27# AES-192-CBC
28# Westmere 4.51 10.11 6.97 +45%
29# Sandy Bridge 6.05 11.25(12.35) 6.34(7.27) +77%(+70%)
30#
31# AES-256-CBC
32# Westmere 5.25 10.85 7.25 +50%
33# Sandy Bridge 7.05 12.25(13.35) 7.06(7.70) +74%(+73%)
34#
35# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
36# background information. Above numbers in parentheses are SSSE3
37# results collected on AVX-capable CPU, i.e. apply on OSes that
38# don't support AVX.
39#
40# Needless to mention that it makes no sense to implement "stitched"
41# *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1
42# fully utilize parallelism, so stitching would not give any gain
43# anyway. Well, there might be some, e.g. because of better cache
44# locality... For reference, here are performance results for
45# standalone AESNI-CBC decrypt:
46#
47# AES-128-CBC AES-192-CBC AES-256-CBC
48# Westmere 1.31 1.55 1.80
49# Sandy Bridge 0.93 1.06 1.22
50
51$flavour = shift;
52$output = shift;
53if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
54
55$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
56
57$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
58( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
59( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
60die "can't locate x86_64-xlate.pl";
61
62$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
63 =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
64 $1>=2.19);
65$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
66 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
67 $1>=2.09);
68$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
69 `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
70 $1>=10);
71
72open STDOUT,"| $^X $xlate $flavour $output";
73
74# void aesni_cbc_sha1_enc(const void *inp,
75# void *out,
76# size_t length,
77# const AES_KEY *key,
78# unsigned char *iv,
79# SHA_CTX *ctx,
80# const void *in0);
81
82$code.=<<___;
83.text
84.extern OPENSSL_ia32cap_P
85
86.globl aesni_cbc_sha1_enc
87.type aesni_cbc_sha1_enc,\@abi-omnipotent
88.align 16
89aesni_cbc_sha1_enc:
90 # caller should check for SSSE3 and AES-NI bits
91 mov OPENSSL_ia32cap_P+0(%rip),%r10d
92 mov OPENSSL_ia32cap_P+4(%rip),%r11d
93___
94$code.=<<___ if ($avx);
95 and \$`1<<28`,%r11d # mask AVX bit
96 and \$`1<<30`,%r10d # mask "Intel CPU" bit
97 or %r11d,%r10d
98 cmp \$`1<<28|1<<30`,%r10d
99 je aesni_cbc_sha1_enc_avx
100___
101$code.=<<___;
102 jmp aesni_cbc_sha1_enc_ssse3
103 ret
104.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
105___
106
107my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
108
109my $Xi=4;
110my @X=map("%xmm$_",(4..7,0..3));
111my @Tx=map("%xmm$_",(8..10));
112my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
113my @T=("%esi","%edi");
114my $j=0; my $jj=0; my $r=0; my $sn=0;
115my $K_XX_XX="%r11";
116my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13));
117my @rndkey=("%xmm14","%xmm15");
118
119sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
120{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
121 my $arg = pop;
122 $arg = "\$$arg" if ($arg*1 eq $arg);
123 $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
124}
125
126my $_rol=sub { &rol(@_) };
127my $_ror=sub { &ror(@_) };
128
129$code.=<<___;
130.type aesni_cbc_sha1_enc_ssse3,\@function,6
131.align 16
132aesni_cbc_sha1_enc_ssse3:
133 mov `($win64?56:8)`(%rsp),$inp # load 7th argument
134 #shr \$6,$len # debugging artefact
135 #jz .Lepilogue_ssse3 # debugging artefact
136 push %rbx
137 push %rbp
138 push %r12
139 push %r13
140 push %r14
141 push %r15
142 lea `-104-($win64?10*16:0)`(%rsp),%rsp
143 #mov $in0,$inp # debugging artefact
144 #lea 64(%rsp),$ctx # debugging artefact
145___
146$code.=<<___ if ($win64);
147 movaps %xmm6,96+0(%rsp)
148 movaps %xmm7,96+16(%rsp)
149 movaps %xmm8,96+32(%rsp)
150 movaps %xmm9,96+48(%rsp)
151 movaps %xmm10,96+64(%rsp)
152 movaps %xmm11,96+80(%rsp)
153 movaps %xmm12,96+96(%rsp)
154 movaps %xmm13,96+112(%rsp)
155 movaps %xmm14,96+128(%rsp)
156 movaps %xmm15,96+144(%rsp)
157.Lprologue_ssse3:
158___
159$code.=<<___;
160 mov $in0,%r12 # reassign arguments
161 mov $out,%r13
162 mov $len,%r14
163 mov $key,%r15
164 movdqu ($ivp),$iv # load IV
165 mov $ivp,88(%rsp) # save $ivp
166___
167my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments
168my $rounds="${ivp}d";
169$code.=<<___;
170 shl \$6,$len
171 sub $in0,$out
172 mov 240($key),$rounds
173 add $inp,$len # end of input
174
175 lea K_XX_XX(%rip),$K_XX_XX
176 mov 0($ctx),$A # load context
177 mov 4($ctx),$B
178 mov 8($ctx),$C
179 mov 12($ctx),$D
180 mov $B,@T[0] # magic seed
181 mov 16($ctx),$E
182
183 movdqa 64($K_XX_XX),@X[2] # pbswap mask
184 movdqa 0($K_XX_XX),@Tx[1] # K_00_19
185 movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
186 movdqu 16($inp),@X[-3&7]
187 movdqu 32($inp),@X[-2&7]
188 movdqu 48($inp),@X[-1&7]
189 pshufb @X[2],@X[-4&7] # byte swap
190 add \$64,$inp
191 pshufb @X[2],@X[-3&7]
192 pshufb @X[2],@X[-2&7]
193 pshufb @X[2],@X[-1&7]
194 paddd @Tx[1],@X[-4&7] # add K_00_19
195 paddd @Tx[1],@X[-3&7]
196 paddd @Tx[1],@X[-2&7]
197 movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU
198 psubd @Tx[1],@X[-4&7] # restore X[]
199 movdqa @X[-3&7],16(%rsp)
200 psubd @Tx[1],@X[-3&7]
201 movdqa @X[-2&7],32(%rsp)
202 psubd @Tx[1],@X[-2&7]
203 movups ($key),$rndkey0 # $key[0]
204 movups 16($key),$rndkey[0] # forward reference
205 jmp .Loop_ssse3
206___
207
208my $aesenc=sub {
209 use integer;
210 my ($n,$k)=($r/10,$r%10);
211 if ($k==0) {
212 $code.=<<___;
213 movups `16*$n`($in0),$in # load input
214 xorps $rndkey0,$in
215___
216 $code.=<<___ if ($n);
217 movups $iv,`16*($n-1)`($out,$in0) # write output
218___
219 $code.=<<___;
220 xorps $in,$iv
221 aesenc $rndkey[0],$iv
222 movups `32+16*$k`($key),$rndkey[1]
223___
224 } elsif ($k==9) {
225 $sn++;
226 $code.=<<___;
227 cmp \$11,$rounds
228 jb .Laesenclast$sn
229 movups `32+16*($k+0)`($key),$rndkey[1]
230 aesenc $rndkey[0],$iv
231 movups `32+16*($k+1)`($key),$rndkey[0]
232 aesenc $rndkey[1],$iv
233 je .Laesenclast$sn
234 movups `32+16*($k+2)`($key),$rndkey[1]
235 aesenc $rndkey[0],$iv
236 movups `32+16*($k+3)`($key),$rndkey[0]
237 aesenc $rndkey[1],$iv
238.Laesenclast$sn:
239 aesenclast $rndkey[0],$iv
240 movups 16($key),$rndkey[1] # forward reference
241___
242 } else {
243 $code.=<<___;
244 aesenc $rndkey[0],$iv
245 movups `32+16*$k`($key),$rndkey[1]
246___
247 }
248 $r++; unshift(@rndkey,pop(@rndkey));
249};
250
251sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
252{ use integer;
253 my $body = shift;
254 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
255 my ($a,$b,$c,$d,$e);
256
257 &movdqa (@X[0],@X[-3&7]);
258 eval(shift(@insns));
259 eval(shift(@insns));
260 &movdqa (@Tx[0],@X[-1&7]);
261 &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]"
262 eval(shift(@insns));
263 eval(shift(@insns));
264
265 &paddd (@Tx[1],@X[-1&7]);
266 eval(shift(@insns));
267 eval(shift(@insns));
268 &psrldq (@Tx[0],4); # "X[-3]", 3 dwords
269 eval(shift(@insns));
270 eval(shift(@insns));
271 &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
272 eval(shift(@insns));
273 eval(shift(@insns));
274
275 &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
276 eval(shift(@insns));
277 eval(shift(@insns));
278 eval(shift(@insns));
279 eval(shift(@insns));
280
281 &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
282 eval(shift(@insns));
283 eval(shift(@insns));
284 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
285 eval(shift(@insns));
286 eval(shift(@insns));
287
288 &movdqa (@Tx[2],@X[0]);
289 &movdqa (@Tx[0],@X[0]);
290 eval(shift(@insns));
291 eval(shift(@insns));
292 eval(shift(@insns));
293 eval(shift(@insns));
294
295 &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword
296 &paddd (@X[0],@X[0]);
297 eval(shift(@insns));
298 eval(shift(@insns));
299 eval(shift(@insns));
300 eval(shift(@insns));
301
302 &psrld (@Tx[0],31);
303 eval(shift(@insns));
304 eval(shift(@insns));
305 &movdqa (@Tx[1],@Tx[2]);
306 eval(shift(@insns));
307 eval(shift(@insns));
308
309 &psrld (@Tx[2],30);
310 &por (@X[0],@Tx[0]); # "X[0]"<<<=1
311 eval(shift(@insns));
312 eval(shift(@insns));
313 eval(shift(@insns));
314 eval(shift(@insns));
315
316 &pslld (@Tx[1],2);
317 &pxor (@X[0],@Tx[2]);
318 eval(shift(@insns));
319 eval(shift(@insns));
320 &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
321 eval(shift(@insns));
322 eval(shift(@insns));
323
324 &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
325
326 foreach (@insns) { eval; } # remaining instructions [if any]
327
328 $Xi++; push(@X,shift(@X)); # "rotate" X[]
329 push(@Tx,shift(@Tx));
330}
331
332sub Xupdate_ssse3_32_79()
333{ use integer;
334 my $body = shift;
335 my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
336 my ($a,$b,$c,$d,$e);
337
338 &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8);
339 eval(shift(@insns)); # body_20_39
340 &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
341 &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]"
342 eval(shift(@insns));
343 eval(shift(@insns));
344 eval(shift(@insns)); # rol
345
346 &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
347 eval(shift(@insns));
348 eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
349 if ($Xi%5) {
350 &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
351 } else { # ... or load next one
352 &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
353 }
354 &paddd (@Tx[1],@X[-1&7]);
355 eval(shift(@insns)); # ror
356 eval(shift(@insns));
357
358 &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]"
359 eval(shift(@insns)); # body_20_39
360 eval(shift(@insns));
361 eval(shift(@insns));
362 eval(shift(@insns)); # rol
363
364 &movdqa (@Tx[0],@X[0]);
365 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
366 eval(shift(@insns));
367 eval(shift(@insns));
368 eval(shift(@insns)); # ror
369 eval(shift(@insns));
370
371 &pslld (@X[0],2);
372 eval(shift(@insns)); # body_20_39
373 eval(shift(@insns));
374 &psrld (@Tx[0],30);
375 eval(shift(@insns));
376 eval(shift(@insns)); # rol
377 eval(shift(@insns));
378 eval(shift(@insns));
379 eval(shift(@insns)); # ror
380 eval(shift(@insns));
381
382 &por (@X[0],@Tx[0]); # "X[0]"<<<=2
383 eval(shift(@insns)); # body_20_39
384 eval(shift(@insns));
385 &movdqa (@Tx[1],@X[0]) if ($Xi<19);
386 eval(shift(@insns));
387 eval(shift(@insns)); # rol
388 eval(shift(@insns));
389 eval(shift(@insns));
390 eval(shift(@insns)); # rol
391 eval(shift(@insns));
392
393 foreach (@insns) { eval; } # remaining instructions
394
395 $Xi++; push(@X,shift(@X)); # "rotate" X[]
396 push(@Tx,shift(@Tx));
397}
398
399sub Xuplast_ssse3_80()
400{ use integer;
401 my $body = shift;
402 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
403 my ($a,$b,$c,$d,$e);
404
405 eval(shift(@insns));
406 &paddd (@Tx[1],@X[-1&7]);
407 eval(shift(@insns));
408 eval(shift(@insns));
409 eval(shift(@insns));
410 eval(shift(@insns));
411
412 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
413
414 foreach (@insns) { eval; } # remaining instructions
415
416 &cmp ($inp,$len);
417 &je (".Ldone_ssse3");
418
419 unshift(@Tx,pop(@Tx));
420
421 &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask
422 &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19
423 &movdqu (@X[-4&7],"0($inp)"); # load input
424 &movdqu (@X[-3&7],"16($inp)");
425 &movdqu (@X[-2&7],"32($inp)");
426 &movdqu (@X[-1&7],"48($inp)");
427 &pshufb (@X[-4&7],@X[2]); # byte swap
428 &add ($inp,64);
429
430 $Xi=0;
431}
432
433sub Xloop_ssse3()
434{ use integer;
435 my $body = shift;
436 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
437 my ($a,$b,$c,$d,$e);
438
439 eval(shift(@insns));
440 eval(shift(@insns));
441 &pshufb (@X[($Xi-3)&7],@X[2]);
442 eval(shift(@insns));
443 eval(shift(@insns));
444 &paddd (@X[($Xi-4)&7],@Tx[1]);
445 eval(shift(@insns));
446 eval(shift(@insns));
447 eval(shift(@insns));
448 eval(shift(@insns));
449 &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU
450 eval(shift(@insns));
451 eval(shift(@insns));
452 &psubd (@X[($Xi-4)&7],@Tx[1]);
453
454 foreach (@insns) { eval; }
455 $Xi++;
456}
457
458sub Xtail_ssse3()
459{ use integer;
460 my $body = shift;
461 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
462 my ($a,$b,$c,$d,$e);
463
464 foreach (@insns) { eval; }
465}
466
467sub body_00_19 () {
468 use integer;
469 my ($k,$n);
470 my @r=(
471 '($a,$b,$c,$d,$e)=@V;'.
472 '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer
473 '&xor ($c,$d);',
474 '&mov (@T[1],$a);', # $b in next round
475 '&$_rol ($a,5);',
476 '&and (@T[0],$c);', # ($b&($c^$d))
477 '&xor ($c,$d);', # restore $c
478 '&xor (@T[0],$d);',
479 '&add ($e,$a);',
480 '&$_ror ($b,$j?7:2);', # $b>>>2
481 '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
482 );
483 $n = scalar(@r);
484 $k = (($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds
485 @r[$k%$n].='&$aesenc();' if ($jj==$k/$n);
486 $jj++;
487 return @r;
488}
489
490sub body_20_39 () {
491 use integer;
492 my ($k,$n);
493 my @r=(
494 '($a,$b,$c,$d,$e)=@V;'.
495 '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
496 '&xor (@T[0],$d);', # ($b^$d)
497 '&mov (@T[1],$a);', # $b in next round
498 '&$_rol ($a,5);',
499 '&xor (@T[0],$c);', # ($b^$d^$c)
500 '&add ($e,$a);',
501 '&$_ror ($b,7);', # $b>>>2
502 '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
503 );
504 $n = scalar(@r);
505 $k = (($jj+1)*8/20)*20*$n/8; # 8 aesencs per these 20 rounds
506 @r[$k%$n].='&$aesenc();' if ($jj==$k/$n);
507 $jj++;
508 return @r;
509}
510
511sub body_40_59 () {
512 use integer;
513 my ($k,$n);
514 my @r=(
515 '($a,$b,$c,$d,$e)=@V;'.
516 '&mov (@T[1],$c);',
517 '&xor ($c,$d);',
518 '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
519 '&and (@T[1],$d);',
520 '&and (@T[0],$c);', # ($b&($c^$d))
521 '&$_ror ($b,7);', # $b>>>2
522 '&add ($e,@T[1]);',
523 '&mov (@T[1],$a);', # $b in next round
524 '&$_rol ($a,5);',
525 '&add ($e,@T[0]);',
526 '&xor ($c,$d);', # restore $c
527 '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
528 );
529 $n = scalar(@r);
530 $k=(($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds
531 @r[$k%$n].='&$aesenc();' if ($jj==$k/$n);
532 $jj++;
533 return @r;
534}
535$code.=<<___;
536.align 16
537.Loop_ssse3:
538___
539 &Xupdate_ssse3_16_31(\&body_00_19);
540 &Xupdate_ssse3_16_31(\&body_00_19);
541 &Xupdate_ssse3_16_31(\&body_00_19);
542 &Xupdate_ssse3_16_31(\&body_00_19);
543 &Xupdate_ssse3_32_79(\&body_00_19);
544 &Xupdate_ssse3_32_79(\&body_20_39);
545 &Xupdate_ssse3_32_79(\&body_20_39);
546 &Xupdate_ssse3_32_79(\&body_20_39);
547 &Xupdate_ssse3_32_79(\&body_20_39);
548 &Xupdate_ssse3_32_79(\&body_20_39);
549 &Xupdate_ssse3_32_79(\&body_40_59);
550 &Xupdate_ssse3_32_79(\&body_40_59);
551 &Xupdate_ssse3_32_79(\&body_40_59);
552 &Xupdate_ssse3_32_79(\&body_40_59);
553 &Xupdate_ssse3_32_79(\&body_40_59);
554 &Xupdate_ssse3_32_79(\&body_20_39);
555 &Xuplast_ssse3_80(\&body_20_39); # can jump to "done"
556
557 $saved_j=$j; @saved_V=@V;
558 $saved_r=$r; @saved_rndkey=@rndkey;
559
560 &Xloop_ssse3(\&body_20_39);
561 &Xloop_ssse3(\&body_20_39);
562 &Xloop_ssse3(\&body_20_39);
563
564$code.=<<___;
565 movups $iv,48($out,$in0) # write output
566 lea 64($in0),$in0
567
568 add 0($ctx),$A # update context
569 add 4($ctx),@T[0]
570 add 8($ctx),$C
571 add 12($ctx),$D
572 mov $A,0($ctx)
573 add 16($ctx),$E
574 mov @T[0],4($ctx)
575 mov @T[0],$B # magic seed
576 mov $C,8($ctx)
577 mov $D,12($ctx)
578 mov $E,16($ctx)
579 jmp .Loop_ssse3
580
581.align 16
582.Ldone_ssse3:
583___
584 $jj=$j=$saved_j; @V=@saved_V;
585 $r=$saved_r; @rndkey=@saved_rndkey;
586
587 &Xtail_ssse3(\&body_20_39);
588 &Xtail_ssse3(\&body_20_39);
589 &Xtail_ssse3(\&body_20_39);
590
591$code.=<<___;
592 movups $iv,48($out,$in0) # write output
593 mov 88(%rsp),$ivp # restore $ivp
594
595 add 0($ctx),$A # update context
596 add 4($ctx),@T[0]
597 add 8($ctx),$C
598 mov $A,0($ctx)
599 add 12($ctx),$D
600 mov @T[0],4($ctx)
601 add 16($ctx),$E
602 mov $C,8($ctx)
603 mov $D,12($ctx)
604 mov $E,16($ctx)
605 movups $iv,($ivp) # write IV
606___
607$code.=<<___ if ($win64);
608 movaps 96+0(%rsp),%xmm6
609 movaps 96+16(%rsp),%xmm7
610 movaps 96+32(%rsp),%xmm8
611 movaps 96+48(%rsp),%xmm9
612 movaps 96+64(%rsp),%xmm10
613 movaps 96+80(%rsp),%xmm11
614 movaps 96+96(%rsp),%xmm12
615 movaps 96+112(%rsp),%xmm13
616 movaps 96+128(%rsp),%xmm14
617 movaps 96+144(%rsp),%xmm15
618___
619$code.=<<___;
620 lea `104+($win64?10*16:0)`(%rsp),%rsi
621 mov 0(%rsi),%r15
622 mov 8(%rsi),%r14
623 mov 16(%rsi),%r13
624 mov 24(%rsi),%r12
625 mov 32(%rsi),%rbp
626 mov 40(%rsi),%rbx
627 lea 48(%rsi),%rsp
628.Lepilogue_ssse3:
629 ret
630.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
631___
632
633$j=$jj=$r=$sn=0;
634
635if ($avx) {
636my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
637
638my $Xi=4;
639my @X=map("%xmm$_",(4..7,0..3));
640my @Tx=map("%xmm$_",(8..10));
641my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
642my @T=("%esi","%edi");
643
644my $_rol=sub { &shld(@_[0],@_) };
645my $_ror=sub { &shrd(@_[0],@_) };
646
647$code.=<<___;
648.type aesni_cbc_sha1_enc_avx,\@function,6
649.align 16
650aesni_cbc_sha1_enc_avx:
651 mov `($win64?56:8)`(%rsp),$inp # load 7th argument
652 #shr \$6,$len # debugging artefact
653 #jz .Lepilogue_avx # debugging artefact
654 push %rbx
655 push %rbp
656 push %r12
657 push %r13
658 push %r14
659 push %r15
660 lea `-104-($win64?10*16:0)`(%rsp),%rsp
661 #mov $in0,$inp # debugging artefact
662 #lea 64(%rsp),$ctx # debugging artefact
663___
664$code.=<<___ if ($win64);
665 movaps %xmm6,96+0(%rsp)
666 movaps %xmm7,96+16(%rsp)
667 movaps %xmm8,96+32(%rsp)
668 movaps %xmm9,96+48(%rsp)
669 movaps %xmm10,96+64(%rsp)
670 movaps %xmm11,96+80(%rsp)
671 movaps %xmm12,96+96(%rsp)
672 movaps %xmm13,96+112(%rsp)
673 movaps %xmm14,96+128(%rsp)
674 movaps %xmm15,96+144(%rsp)
675.Lprologue_avx:
676___
677$code.=<<___;
678 vzeroall
679 mov $in0,%r12 # reassign arguments
680 mov $out,%r13
681 mov $len,%r14
682 mov $key,%r15
683 vmovdqu ($ivp),$iv # load IV
684 mov $ivp,88(%rsp) # save $ivp
685___
686my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments
687my $rounds="${ivp}d";
688$code.=<<___;
689 shl \$6,$len
690 sub $in0,$out
691 mov 240($key),$rounds
692 add \$112,$key # size optimization
693 add $inp,$len # end of input
694
695 lea K_XX_XX(%rip),$K_XX_XX
696 mov 0($ctx),$A # load context
697 mov 4($ctx),$B
698 mov 8($ctx),$C
699 mov 12($ctx),$D
700 mov $B,@T[0] # magic seed
701 mov 16($ctx),$E
702
703 vmovdqa 64($K_XX_XX),@X[2] # pbswap mask
704 vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19
705 vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
706 vmovdqu 16($inp),@X[-3&7]
707 vmovdqu 32($inp),@X[-2&7]
708 vmovdqu 48($inp),@X[-1&7]
709 vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap
710 add \$64,$inp
711 vpshufb @X[2],@X[-3&7],@X[-3&7]
712 vpshufb @X[2],@X[-2&7],@X[-2&7]
713 vpshufb @X[2],@X[-1&7],@X[-1&7]
714 vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19
715 vpaddd @Tx[1],@X[-3&7],@X[1]
716 vpaddd @Tx[1],@X[-2&7],@X[2]
717 vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU
718 vmovdqa @X[1],16(%rsp)
719 vmovdqa @X[2],32(%rsp)
720 vmovups -112($key),$rndkey0 # $key[0]
721 vmovups 16-112($key),$rndkey[0] # forward reference
722 jmp .Loop_avx
723___
724
725my $aesenc=sub {
726 use integer;
727 my ($n,$k)=($r/10,$r%10);
728 if ($k==0) {
729 $code.=<<___;
730 vmovups `16*$n`($in0),$in # load input
731 vxorps $rndkey0,$in,$in
732___
733 $code.=<<___ if ($n);
734 vmovups $iv,`16*($n-1)`($out,$in0) # write output
735___
736 $code.=<<___;
737 vxorps $in,$iv,$iv
738 vaesenc $rndkey[0],$iv,$iv
739 vmovups `32+16*$k-112`($key),$rndkey[1]
740___
741 } elsif ($k==9) {
742 $sn++;
743 $code.=<<___;
744 cmp \$11,$rounds
745 jb .Lvaesenclast$sn
746 vaesenc $rndkey[0],$iv,$iv
747 vmovups `32+16*($k+0)-112`($key),$rndkey[1]
748 vaesenc $rndkey[1],$iv,$iv
749 vmovups `32+16*($k+1)-112`($key),$rndkey[0]
750 je .Lvaesenclast$sn
751 vaesenc $rndkey[0],$iv,$iv
752 vmovups `32+16*($k+2)-112`($key),$rndkey[1]
753 vaesenc $rndkey[1],$iv,$iv
754 vmovups `32+16*($k+3)-112`($key),$rndkey[0]
755.Lvaesenclast$sn:
756 vaesenclast $rndkey[0],$iv,$iv
757 vmovups 16-112($key),$rndkey[1] # forward reference
758___
759 } else {
760 $code.=<<___;
761 vaesenc $rndkey[0],$iv,$iv
762 vmovups `32+16*$k-112`($key),$rndkey[1]
763___
764 }
765 $r++; unshift(@rndkey,pop(@rndkey));
766};
767
768sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
769{ use integer;
770 my $body = shift;
771 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
772 my ($a,$b,$c,$d,$e);
773
774 eval(shift(@insns));
775 eval(shift(@insns));
776 &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]"
777 eval(shift(@insns));
778 eval(shift(@insns));
779
780 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
781 eval(shift(@insns));
782 eval(shift(@insns));
783 &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords
784 eval(shift(@insns));
785 eval(shift(@insns));
786 &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
787 eval(shift(@insns));
788 eval(shift(@insns));
789
790 &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
791 eval(shift(@insns));
792 eval(shift(@insns));
793 eval(shift(@insns));
794 eval(shift(@insns));
795
796 &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
797 eval(shift(@insns));
798 eval(shift(@insns));
799 &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
800 eval(shift(@insns));
801 eval(shift(@insns));
802
803 &vpsrld (@Tx[0],@X[0],31);
804 eval(shift(@insns));
805 eval(shift(@insns));
806 eval(shift(@insns));
807 eval(shift(@insns));
808
809 &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword
810 &vpaddd (@X[0],@X[0],@X[0]);
811 eval(shift(@insns));
812 eval(shift(@insns));
813 eval(shift(@insns));
814 eval(shift(@insns));
815
816 &vpsrld (@Tx[1],@Tx[2],30);
817 &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1
818 eval(shift(@insns));
819 eval(shift(@insns));
820 eval(shift(@insns));
821 eval(shift(@insns));
822
823 &vpslld (@Tx[2],@Tx[2],2);
824 &vpxor (@X[0],@X[0],@Tx[1]);
825 eval(shift(@insns));
826 eval(shift(@insns));
827 eval(shift(@insns));
828 eval(shift(@insns));
829
830 &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2
831 eval(shift(@insns));
832 eval(shift(@insns));
833 &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
834 eval(shift(@insns));
835 eval(shift(@insns));
836
837
838 foreach (@insns) { eval; } # remaining instructions [if any]
839
840 $Xi++; push(@X,shift(@X)); # "rotate" X[]
841 push(@Tx,shift(@Tx));
842}
843
844sub Xupdate_avx_32_79()
845{ use integer;
846 my $body = shift;
847 my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
848 my ($a,$b,$c,$d,$e);
849
850 &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]"
851 &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
852 eval(shift(@insns)); # body_20_39
853 eval(shift(@insns));
854 eval(shift(@insns));
855 eval(shift(@insns)); # rol
856
857 &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
858 eval(shift(@insns));
859 eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
860 if ($Xi%5) {
861 &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
862 } else { # ... or load next one
863 &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
864 }
865 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
866 eval(shift(@insns)); # ror
867 eval(shift(@insns));
868
869 &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]"
870 eval(shift(@insns)); # body_20_39
871 eval(shift(@insns));
872 eval(shift(@insns));
873 eval(shift(@insns)); # rol
874
875 &vpsrld (@Tx[0],@X[0],30);
876 &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
877 eval(shift(@insns));
878 eval(shift(@insns));
879 eval(shift(@insns)); # ror
880 eval(shift(@insns));
881
882 &vpslld (@X[0],@X[0],2);
883 eval(shift(@insns)); # body_20_39
884 eval(shift(@insns));
885 eval(shift(@insns));
886 eval(shift(@insns)); # rol
887 eval(shift(@insns));
888 eval(shift(@insns));
889 eval(shift(@insns)); # ror
890 eval(shift(@insns));
891
892 &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2
893 eval(shift(@insns)); # body_20_39
894 eval(shift(@insns));
895 &vmovdqa (@Tx[1],@X[0]) if ($Xi<19);
896 eval(shift(@insns));
897 eval(shift(@insns)); # rol
898 eval(shift(@insns));
899 eval(shift(@insns));
900 eval(shift(@insns)); # rol
901 eval(shift(@insns));
902
903 foreach (@insns) { eval; } # remaining instructions
904
905 $Xi++; push(@X,shift(@X)); # "rotate" X[]
906 push(@Tx,shift(@Tx));
907}
908
909sub Xuplast_avx_80()
910{ use integer;
911 my $body = shift;
912 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
913 my ($a,$b,$c,$d,$e);
914
915 eval(shift(@insns));
916 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
917 eval(shift(@insns));
918 eval(shift(@insns));
919 eval(shift(@insns));
920 eval(shift(@insns));
921
922 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
923
924 foreach (@insns) { eval; } # remaining instructions
925
926 &cmp ($inp,$len);
927 &je (".Ldone_avx");
928
929 unshift(@Tx,pop(@Tx));
930
931 &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask
932 &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19
933 &vmovdqu(@X[-4&7],"0($inp)"); # load input
934 &vmovdqu(@X[-3&7],"16($inp)");
935 &vmovdqu(@X[-2&7],"32($inp)");
936 &vmovdqu(@X[-1&7],"48($inp)");
937 &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap
938 &add ($inp,64);
939
940 $Xi=0;
941}
942
943sub Xloop_avx()
944{ use integer;
945 my $body = shift;
946 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
947 my ($a,$b,$c,$d,$e);
948
949 eval(shift(@insns));
950 eval(shift(@insns));
951 &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
952 eval(shift(@insns));
953 eval(shift(@insns));
954 &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]);
955 eval(shift(@insns));
956 eval(shift(@insns));
957 eval(shift(@insns));
958 eval(shift(@insns));
959 &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU
960 eval(shift(@insns));
961 eval(shift(@insns));
962
963 foreach (@insns) { eval; }
964 $Xi++;
965}
966
967sub Xtail_avx()
968{ use integer;
969 my $body = shift;
970 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
971 my ($a,$b,$c,$d,$e);
972
973 foreach (@insns) { eval; }
974}
975
976$code.=<<___;
977.align 16
978.Loop_avx:
979___
980 &Xupdate_avx_16_31(\&body_00_19);
981 &Xupdate_avx_16_31(\&body_00_19);
982 &Xupdate_avx_16_31(\&body_00_19);
983 &Xupdate_avx_16_31(\&body_00_19);
984 &Xupdate_avx_32_79(\&body_00_19);
985 &Xupdate_avx_32_79(\&body_20_39);
986 &Xupdate_avx_32_79(\&body_20_39);
987 &Xupdate_avx_32_79(\&body_20_39);
988 &Xupdate_avx_32_79(\&body_20_39);
989 &Xupdate_avx_32_79(\&body_20_39);
990 &Xupdate_avx_32_79(\&body_40_59);
991 &Xupdate_avx_32_79(\&body_40_59);
992 &Xupdate_avx_32_79(\&body_40_59);
993 &Xupdate_avx_32_79(\&body_40_59);
994 &Xupdate_avx_32_79(\&body_40_59);
995 &Xupdate_avx_32_79(\&body_20_39);
996 &Xuplast_avx_80(\&body_20_39); # can jump to "done"
997
998 $saved_j=$j; @saved_V=@V;
999 $saved_r=$r; @saved_rndkey=@rndkey;
1000
1001 &Xloop_avx(\&body_20_39);
1002 &Xloop_avx(\&body_20_39);
1003 &Xloop_avx(\&body_20_39);
1004
1005$code.=<<___;
1006 vmovups $iv,48($out,$in0) # write output
1007 lea 64($in0),$in0
1008
1009 add 0($ctx),$A # update context
1010 add 4($ctx),@T[0]
1011 add 8($ctx),$C
1012 add 12($ctx),$D
1013 mov $A,0($ctx)
1014 add 16($ctx),$E
1015 mov @T[0],4($ctx)
1016 mov @T[0],$B # magic seed
1017 mov $C,8($ctx)
1018 mov $D,12($ctx)
1019 mov $E,16($ctx)
1020 jmp .Loop_avx
1021
1022.align 16
1023.Ldone_avx:
1024___
1025 $jj=$j=$saved_j; @V=@saved_V;
1026 $r=$saved_r; @rndkey=@saved_rndkey;
1027
1028 &Xtail_avx(\&body_20_39);
1029 &Xtail_avx(\&body_20_39);
1030 &Xtail_avx(\&body_20_39);
1031
1032$code.=<<___;
1033 vmovups $iv,48($out,$in0) # write output
1034 mov 88(%rsp),$ivp # restore $ivp
1035
1036 add 0($ctx),$A # update context
1037 add 4($ctx),@T[0]
1038 add 8($ctx),$C
1039 mov $A,0($ctx)
1040 add 12($ctx),$D
1041 mov @T[0],4($ctx)
1042 add 16($ctx),$E
1043 mov $C,8($ctx)
1044 mov $D,12($ctx)
1045 mov $E,16($ctx)
1046 vmovups $iv,($ivp) # write IV
1047 vzeroall
1048___
1049$code.=<<___ if ($win64);
1050 movaps 96+0(%rsp),%xmm6
1051 movaps 96+16(%rsp),%xmm7
1052 movaps 96+32(%rsp),%xmm8
1053 movaps 96+48(%rsp),%xmm9
1054 movaps 96+64(%rsp),%xmm10
1055 movaps 96+80(%rsp),%xmm11
1056 movaps 96+96(%rsp),%xmm12
1057 movaps 96+112(%rsp),%xmm13
1058 movaps 96+128(%rsp),%xmm14
1059 movaps 96+144(%rsp),%xmm15
1060___
1061$code.=<<___;
1062 lea `104+($win64?10*16:0)`(%rsp),%rsi
1063 mov 0(%rsi),%r15
1064 mov 8(%rsi),%r14
1065 mov 16(%rsi),%r13
1066 mov 24(%rsi),%r12
1067 mov 32(%rsi),%rbp
1068 mov 40(%rsi),%rbx
1069 lea 48(%rsi),%rsp
1070.Lepilogue_avx:
1071 ret
1072.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
1073___
1074}
1075$code.=<<___;
1076.align 64
1077K_XX_XX:
1078.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19
1079.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39
1080.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59
1081.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79
1082.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask
1083
1084.asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
1085.align 64
1086___
1087
1088# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1089# CONTEXT *context,DISPATCHER_CONTEXT *disp)
1090if ($win64) {
1091$rec="%rcx";
1092$frame="%rdx";
1093$context="%r8";
1094$disp="%r9";
1095
1096$code.=<<___;
1097.extern __imp_RtlVirtualUnwind
1098.type ssse3_handler,\@abi-omnipotent
1099.align 16
1100ssse3_handler:
1101 push %rsi
1102 push %rdi
1103 push %rbx
1104 push %rbp
1105 push %r12
1106 push %r13
1107 push %r14
1108 push %r15
1109 pushfq
1110 sub \$64,%rsp
1111
1112 mov 120($context),%rax # pull context->Rax
1113 mov 248($context),%rbx # pull context->Rip
1114
1115 mov 8($disp),%rsi # disp->ImageBase
1116 mov 56($disp),%r11 # disp->HandlerData
1117
1118 mov 0(%r11),%r10d # HandlerData[0]
1119 lea (%rsi,%r10),%r10 # prologue label
1120 cmp %r10,%rbx # context->Rip<prologue label
1121 jb .Lcommon_seh_tail
1122
1123 mov 152($context),%rax # pull context->Rsp
1124
1125 mov 4(%r11),%r10d # HandlerData[1]
1126 lea (%rsi,%r10),%r10 # epilogue label
1127 cmp %r10,%rbx # context->Rip>=epilogue label
1128 jae .Lcommon_seh_tail
1129
1130 lea 96(%rax),%rsi
1131 lea 512($context),%rdi # &context.Xmm6
1132 mov \$20,%ecx
1133 .long 0xa548f3fc # cld; rep movsq
1134 lea `104+10*16`(%rax),%rax # adjust stack pointer
1135
1136 mov 0(%rax),%r15
1137 mov 8(%rax),%r14
1138 mov 16(%rax),%r13
1139 mov 24(%rax),%r12
1140 mov 32(%rax),%rbp
1141 mov 40(%rax),%rbx
1142 lea 48(%rax),%rax
1143 mov %rbx,144($context) # restore context->Rbx
1144 mov %rbp,160($context) # restore context->Rbp
1145 mov %r12,216($context) # restore context->R12
1146 mov %r13,224($context) # restore context->R13
1147 mov %r14,232($context) # restore context->R14
1148 mov %r15,240($context) # restore context->R15
1149
1150.Lcommon_seh_tail:
1151 mov 8(%rax),%rdi
1152 mov 16(%rax),%rsi
1153 mov %rax,152($context) # restore context->Rsp
1154 mov %rsi,168($context) # restore context->Rsi
1155 mov %rdi,176($context) # restore context->Rdi
1156
1157 mov 40($disp),%rdi # disp->ContextRecord
1158 mov $context,%rsi # context
1159 mov \$154,%ecx # sizeof(CONTEXT)
1160 .long 0xa548f3fc # cld; rep movsq
1161
1162 mov $disp,%rsi
1163 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1164 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1165 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1166 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1167 mov 40(%rsi),%r10 # disp->ContextRecord
1168 lea 56(%rsi),%r11 # &disp->HandlerData
1169 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1170 mov %r10,32(%rsp) # arg5
1171 mov %r11,40(%rsp) # arg6
1172 mov %r12,48(%rsp) # arg7
1173 mov %rcx,56(%rsp) # arg8, (NULL)
1174 call *__imp_RtlVirtualUnwind(%rip)
1175
1176 mov \$1,%eax # ExceptionContinueSearch
1177 add \$64,%rsp
1178 popfq
1179 pop %r15
1180 pop %r14
1181 pop %r13
1182 pop %r12
1183 pop %rbp
1184 pop %rbx
1185 pop %rdi
1186 pop %rsi
1187 ret
1188.size ssse3_handler,.-ssse3_handler
1189
1190.section .pdata
1191.align 4
1192 .rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3
1193 .rva .LSEH_end_aesni_cbc_sha1_enc_ssse3
1194 .rva .LSEH_info_aesni_cbc_sha1_enc_ssse3
1195___
1196$code.=<<___ if ($avx);
1197 .rva .LSEH_begin_aesni_cbc_sha1_enc_avx
1198 .rva .LSEH_end_aesni_cbc_sha1_enc_avx
1199 .rva .LSEH_info_aesni_cbc_sha1_enc_avx
1200___
1201$code.=<<___;
1202.section .xdata
1203.align 8
1204.LSEH_info_aesni_cbc_sha1_enc_ssse3:
1205 .byte 9,0,0,0
1206 .rva ssse3_handler
1207 .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[]
1208___
1209$code.=<<___ if ($avx);
1210.LSEH_info_aesni_cbc_sha1_enc_avx:
1211 .byte 9,0,0,0
1212 .rva ssse3_handler
1213 .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[]
1214___
1215}
1216
1217####################################################################
1218sub rex {
1219 local *opcode=shift;
1220 my ($dst,$src)=@_;
1221 my $rex=0;
1222
1223 $rex|=0x04 if($dst>=8);
1224 $rex|=0x01 if($src>=8);
1225 push @opcode,$rex|0x40 if($rex);
1226}
1227
1228sub aesni {
1229 my $line=shift;
1230 my @opcode=(0x66);
1231
1232 if ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
1233 my %opcodelet = (
1234 "aesenc" => 0xdc, "aesenclast" => 0xdd
1235 );
1236 return undef if (!defined($opcodelet{$1}));
1237 rex(\@opcode,$3,$2);
1238 push @opcode,0x0f,0x38,$opcodelet{$1};
1239 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
1240 return ".byte\t".join(',',@opcode);
1241 }
1242 return $line;
1243}
1244
1245$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1246$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
1247
1248print $code;
1249close STDOUT;
diff --git a/src/lib/libssl/src/crypto/aes/asm/aesni-x86.pl b/src/lib/libssl/src/crypto/aes/asm/aesni-x86.pl
new file mode 100644
index 0000000000..3dc345b585
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/aesni-x86.pl
@@ -0,0 +1,2189 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# This module implements support for Intel AES-NI extension. In
11# OpenSSL context it's used with Intel engine, but can also be used as
12# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
13# details].
14#
15# Performance.
16#
17# To start with see corresponding paragraph in aesni-x86_64.pl...
18# Instead of filling table similar to one found there I've chosen to
19# summarize *comparison* results for raw ECB, CTR and CBC benchmarks.
20# The simplified table below represents 32-bit performance relative
21# to 64-bit one in every given point. Ratios vary for different
22# encryption modes, therefore interval values.
23#
24# 16-byte 64-byte 256-byte 1-KB 8-KB
25# 53-67% 67-84% 91-94% 95-98% 97-99.5%
26#
27# Lower ratios for smaller block sizes are perfectly understandable,
28# because function call overhead is higher in 32-bit mode. Largest
29# 8-KB block performance is virtually same: 32-bit code is less than
30# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise.
31
32# January 2011
33#
34# See aesni-x86_64.pl for details. Unlike x86_64 version this module
35# interleaves at most 6 aes[enc|dec] instructions, because there are
36# not enough registers for 8x interleave [which should be optimal for
37# Sandy Bridge]. Actually, performance results for 6x interleave
38# factor presented in aesni-x86_64.pl (except for CTR) are for this
39# module.
40
41# April 2011
42#
43# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
44# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
45
46$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
47 # generates drop-in replacement for
48 # crypto/aes/asm/aes-586.pl:-)
49$inline=1; # inline _aesni_[en|de]crypt
50
51$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
52push(@INC,"${dir}","${dir}../../perlasm");
53require "x86asm.pl";
54
55&asm_init($ARGV[0],$0);
56
57if ($PREFIX eq "aesni") { $movekey=*movups; }
58else { $movekey=*movups; }
59
60$len="eax";
61$rounds="ecx";
62$key="edx";
63$inp="esi";
64$out="edi";
65$rounds_="ebx"; # backup copy for $rounds
66$key_="ebp"; # backup copy for $key
67
68$rndkey0="xmm0";
69$rndkey1="xmm1";
70$inout0="xmm2";
71$inout1="xmm3";
72$inout2="xmm4";
73$inout3="xmm5"; $in1="xmm5";
74$inout4="xmm6"; $in0="xmm6";
75$inout5="xmm7"; $ivec="xmm7";
76
77# AESNI extenstion
78sub aeskeygenassist
79{ my($dst,$src,$imm)=@_;
80 if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
81 { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); }
82}
83sub aescommon
84{ my($opcodelet,$dst,$src)=@_;
85 if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
86 { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);}
87}
88sub aesimc { aescommon(0xdb,@_); }
89sub aesenc { aescommon(0xdc,@_); }
90sub aesenclast { aescommon(0xdd,@_); }
91sub aesdec { aescommon(0xde,@_); }
92sub aesdeclast { aescommon(0xdf,@_); }
93
94# Inline version of internal aesni_[en|de]crypt1
95{ my $sn;
96sub aesni_inline_generate1
97{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
98 $sn++;
99
100 &$movekey ($rndkey0,&QWP(0,$key));
101 &$movekey ($rndkey1,&QWP(16,$key));
102 &xorps ($ivec,$rndkey0) if (defined($ivec));
103 &lea ($key,&DWP(32,$key));
104 &xorps ($inout,$ivec) if (defined($ivec));
105 &xorps ($inout,$rndkey0) if (!defined($ivec));
106 &set_label("${p}1_loop_$sn");
107 eval"&aes${p} ($inout,$rndkey1)";
108 &dec ($rounds);
109 &$movekey ($rndkey1,&QWP(0,$key));
110 &lea ($key,&DWP(16,$key));
111 &jnz (&label("${p}1_loop_$sn"));
112 eval"&aes${p}last ($inout,$rndkey1)";
113}}
114
115sub aesni_generate1 # fully unrolled loop
116{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout));
117
118 &function_begin_B("_aesni_${p}rypt1");
119 &movups ($rndkey0,&QWP(0,$key));
120 &$movekey ($rndkey1,&QWP(0x10,$key));
121 &xorps ($inout,$rndkey0);
122 &$movekey ($rndkey0,&QWP(0x20,$key));
123 &lea ($key,&DWP(0x30,$key));
124 &cmp ($rounds,11);
125 &jb (&label("${p}128"));
126 &lea ($key,&DWP(0x20,$key));
127 &je (&label("${p}192"));
128 &lea ($key,&DWP(0x20,$key));
129 eval"&aes${p} ($inout,$rndkey1)";
130 &$movekey ($rndkey1,&QWP(-0x40,$key));
131 eval"&aes${p} ($inout,$rndkey0)";
132 &$movekey ($rndkey0,&QWP(-0x30,$key));
133 &set_label("${p}192");
134 eval"&aes${p} ($inout,$rndkey1)";
135 &$movekey ($rndkey1,&QWP(-0x20,$key));
136 eval"&aes${p} ($inout,$rndkey0)";
137 &$movekey ($rndkey0,&QWP(-0x10,$key));
138 &set_label("${p}128");
139 eval"&aes${p} ($inout,$rndkey1)";
140 &$movekey ($rndkey1,&QWP(0,$key));
141 eval"&aes${p} ($inout,$rndkey0)";
142 &$movekey ($rndkey0,&QWP(0x10,$key));
143 eval"&aes${p} ($inout,$rndkey1)";
144 &$movekey ($rndkey1,&QWP(0x20,$key));
145 eval"&aes${p} ($inout,$rndkey0)";
146 &$movekey ($rndkey0,&QWP(0x30,$key));
147 eval"&aes${p} ($inout,$rndkey1)";
148 &$movekey ($rndkey1,&QWP(0x40,$key));
149 eval"&aes${p} ($inout,$rndkey0)";
150 &$movekey ($rndkey0,&QWP(0x50,$key));
151 eval"&aes${p} ($inout,$rndkey1)";
152 &$movekey ($rndkey1,&QWP(0x60,$key));
153 eval"&aes${p} ($inout,$rndkey0)";
154 &$movekey ($rndkey0,&QWP(0x70,$key));
155 eval"&aes${p} ($inout,$rndkey1)";
156 eval"&aes${p}last ($inout,$rndkey0)";
157 &ret();
158 &function_end_B("_aesni_${p}rypt1");
159}
160
161# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
162&aesni_generate1("enc") if (!$inline);
163&function_begin_B("${PREFIX}_encrypt");
164 &mov ("eax",&wparam(0));
165 &mov ($key,&wparam(2));
166 &movups ($inout0,&QWP(0,"eax"));
167 &mov ($rounds,&DWP(240,$key));
168 &mov ("eax",&wparam(1));
169 if ($inline)
170 { &aesni_inline_generate1("enc"); }
171 else
172 { &call ("_aesni_encrypt1"); }
173 &movups (&QWP(0,"eax"),$inout0);
174 &ret ();
175&function_end_B("${PREFIX}_encrypt");
176
177# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
178&aesni_generate1("dec") if(!$inline);
179&function_begin_B("${PREFIX}_decrypt");
180 &mov ("eax",&wparam(0));
181 &mov ($key,&wparam(2));
182 &movups ($inout0,&QWP(0,"eax"));
183 &mov ($rounds,&DWP(240,$key));
184 &mov ("eax",&wparam(1));
185 if ($inline)
186 { &aesni_inline_generate1("dec"); }
187 else
188 { &call ("_aesni_decrypt1"); }
189 &movups (&QWP(0,"eax"),$inout0);
190 &ret ();
191&function_end_B("${PREFIX}_decrypt");
192
193# _aesni_[en|de]cryptN are private interfaces, N denotes interleave
194# factor. Why 3x subroutine were originally used in loops? Even though
195# aes[enc|dec] latency was originally 6, it could be scheduled only
196# every *2nd* cycle. Thus 3x interleave was the one providing optimal
197# utilization, i.e. when subroutine's throughput is virtually same as
198# of non-interleaved subroutine [for number of input blocks up to 3].
199# This is why it makes no sense to implement 2x subroutine.
200# aes[enc|dec] latency in next processor generation is 8, but the
201# instructions can be scheduled every cycle. Optimal interleave for
202# new processor is therefore 8x, but it's unfeasible to accommodate it
203# in XMM registers addreassable in 32-bit mode and therefore 6x is
204# used instead...
205
206sub aesni_generate3
207{ my $p=shift;
208
209 &function_begin_B("_aesni_${p}rypt3");
210 &$movekey ($rndkey0,&QWP(0,$key));
211 &shr ($rounds,1);
212 &$movekey ($rndkey1,&QWP(16,$key));
213 &lea ($key,&DWP(32,$key));
214 &xorps ($inout0,$rndkey0);
215 &pxor ($inout1,$rndkey0);
216 &pxor ($inout2,$rndkey0);
217 &$movekey ($rndkey0,&QWP(0,$key));
218
219 &set_label("${p}3_loop");
220 eval"&aes${p} ($inout0,$rndkey1)";
221 eval"&aes${p} ($inout1,$rndkey1)";
222 &dec ($rounds);
223 eval"&aes${p} ($inout2,$rndkey1)";
224 &$movekey ($rndkey1,&QWP(16,$key));
225 eval"&aes${p} ($inout0,$rndkey0)";
226 eval"&aes${p} ($inout1,$rndkey0)";
227 &lea ($key,&DWP(32,$key));
228 eval"&aes${p} ($inout2,$rndkey0)";
229 &$movekey ($rndkey0,&QWP(0,$key));
230 &jnz (&label("${p}3_loop"));
231 eval"&aes${p} ($inout0,$rndkey1)";
232 eval"&aes${p} ($inout1,$rndkey1)";
233 eval"&aes${p} ($inout2,$rndkey1)";
234 eval"&aes${p}last ($inout0,$rndkey0)";
235 eval"&aes${p}last ($inout1,$rndkey0)";
236 eval"&aes${p}last ($inout2,$rndkey0)";
237 &ret();
238 &function_end_B("_aesni_${p}rypt3");
239}
240
241# 4x interleave is implemented to improve small block performance,
242# most notably [and naturally] 4 block by ~30%. One can argue that one
243# should have implemented 5x as well, but improvement would be <20%,
244# so it's not worth it...
245sub aesni_generate4
246{ my $p=shift;
247
248 &function_begin_B("_aesni_${p}rypt4");
249 &$movekey ($rndkey0,&QWP(0,$key));
250 &$movekey ($rndkey1,&QWP(16,$key));
251 &shr ($rounds,1);
252 &lea ($key,&DWP(32,$key));
253 &xorps ($inout0,$rndkey0);
254 &pxor ($inout1,$rndkey0);
255 &pxor ($inout2,$rndkey0);
256 &pxor ($inout3,$rndkey0);
257 &$movekey ($rndkey0,&QWP(0,$key));
258
259 &set_label("${p}4_loop");
260 eval"&aes${p} ($inout0,$rndkey1)";
261 eval"&aes${p} ($inout1,$rndkey1)";
262 &dec ($rounds);
263 eval"&aes${p} ($inout2,$rndkey1)";
264 eval"&aes${p} ($inout3,$rndkey1)";
265 &$movekey ($rndkey1,&QWP(16,$key));
266 eval"&aes${p} ($inout0,$rndkey0)";
267 eval"&aes${p} ($inout1,$rndkey0)";
268 &lea ($key,&DWP(32,$key));
269 eval"&aes${p} ($inout2,$rndkey0)";
270 eval"&aes${p} ($inout3,$rndkey0)";
271 &$movekey ($rndkey0,&QWP(0,$key));
272 &jnz (&label("${p}4_loop"));
273
274 eval"&aes${p} ($inout0,$rndkey1)";
275 eval"&aes${p} ($inout1,$rndkey1)";
276 eval"&aes${p} ($inout2,$rndkey1)";
277 eval"&aes${p} ($inout3,$rndkey1)";
278 eval"&aes${p}last ($inout0,$rndkey0)";
279 eval"&aes${p}last ($inout1,$rndkey0)";
280 eval"&aes${p}last ($inout2,$rndkey0)";
281 eval"&aes${p}last ($inout3,$rndkey0)";
282 &ret();
283 &function_end_B("_aesni_${p}rypt4");
284}
285
286sub aesni_generate6
287{ my $p=shift;
288
289 &function_begin_B("_aesni_${p}rypt6");
290 &static_label("_aesni_${p}rypt6_enter");
291 &$movekey ($rndkey0,&QWP(0,$key));
292 &shr ($rounds,1);
293 &$movekey ($rndkey1,&QWP(16,$key));
294 &lea ($key,&DWP(32,$key));
295 &xorps ($inout0,$rndkey0);
296 &pxor ($inout1,$rndkey0); # pxor does better here
297 eval"&aes${p} ($inout0,$rndkey1)";
298 &pxor ($inout2,$rndkey0);
299 eval"&aes${p} ($inout1,$rndkey1)";
300 &pxor ($inout3,$rndkey0);
301 &dec ($rounds);
302 eval"&aes${p} ($inout2,$rndkey1)";
303 &pxor ($inout4,$rndkey0);
304 eval"&aes${p} ($inout3,$rndkey1)";
305 &pxor ($inout5,$rndkey0);
306 eval"&aes${p} ($inout4,$rndkey1)";
307 &$movekey ($rndkey0,&QWP(0,$key));
308 eval"&aes${p} ($inout5,$rndkey1)";
309 &jmp (&label("_aesni_${p}rypt6_enter"));
310
311 &set_label("${p}6_loop",16);
312 eval"&aes${p} ($inout0,$rndkey1)";
313 eval"&aes${p} ($inout1,$rndkey1)";
314 &dec ($rounds);
315 eval"&aes${p} ($inout2,$rndkey1)";
316 eval"&aes${p} ($inout3,$rndkey1)";
317 eval"&aes${p} ($inout4,$rndkey1)";
318 eval"&aes${p} ($inout5,$rndkey1)";
319 &set_label("_aesni_${p}rypt6_enter",16);
320 &$movekey ($rndkey1,&QWP(16,$key));
321 eval"&aes${p} ($inout0,$rndkey0)";
322 eval"&aes${p} ($inout1,$rndkey0)";
323 &lea ($key,&DWP(32,$key));
324 eval"&aes${p} ($inout2,$rndkey0)";
325 eval"&aes${p} ($inout3,$rndkey0)";
326 eval"&aes${p} ($inout4,$rndkey0)";
327 eval"&aes${p} ($inout5,$rndkey0)";
328 &$movekey ($rndkey0,&QWP(0,$key));
329 &jnz (&label("${p}6_loop"));
330
331 eval"&aes${p} ($inout0,$rndkey1)";
332 eval"&aes${p} ($inout1,$rndkey1)";
333 eval"&aes${p} ($inout2,$rndkey1)";
334 eval"&aes${p} ($inout3,$rndkey1)";
335 eval"&aes${p} ($inout4,$rndkey1)";
336 eval"&aes${p} ($inout5,$rndkey1)";
337 eval"&aes${p}last ($inout0,$rndkey0)";
338 eval"&aes${p}last ($inout1,$rndkey0)";
339 eval"&aes${p}last ($inout2,$rndkey0)";
340 eval"&aes${p}last ($inout3,$rndkey0)";
341 eval"&aes${p}last ($inout4,$rndkey0)";
342 eval"&aes${p}last ($inout5,$rndkey0)";
343 &ret();
344 &function_end_B("_aesni_${p}rypt6");
345}
346&aesni_generate3("enc") if ($PREFIX eq "aesni");
347&aesni_generate3("dec");
348&aesni_generate4("enc") if ($PREFIX eq "aesni");
349&aesni_generate4("dec");
350&aesni_generate6("enc") if ($PREFIX eq "aesni");
351&aesni_generate6("dec");
352
353if ($PREFIX eq "aesni") {
354######################################################################
355# void aesni_ecb_encrypt (const void *in, void *out,
356# size_t length, const AES_KEY *key,
357# int enc);
358&function_begin("aesni_ecb_encrypt");
359 &mov ($inp,&wparam(0));
360 &mov ($out,&wparam(1));
361 &mov ($len,&wparam(2));
362 &mov ($key,&wparam(3));
363 &mov ($rounds_,&wparam(4));
364 &and ($len,-16);
365 &jz (&label("ecb_ret"));
366 &mov ($rounds,&DWP(240,$key));
367 &test ($rounds_,$rounds_);
368 &jz (&label("ecb_decrypt"));
369
370 &mov ($key_,$key); # backup $key
371 &mov ($rounds_,$rounds); # backup $rounds
372 &cmp ($len,0x60);
373 &jb (&label("ecb_enc_tail"));
374
375 &movdqu ($inout0,&QWP(0,$inp));
376 &movdqu ($inout1,&QWP(0x10,$inp));
377 &movdqu ($inout2,&QWP(0x20,$inp));
378 &movdqu ($inout3,&QWP(0x30,$inp));
379 &movdqu ($inout4,&QWP(0x40,$inp));
380 &movdqu ($inout5,&QWP(0x50,$inp));
381 &lea ($inp,&DWP(0x60,$inp));
382 &sub ($len,0x60);
383 &jmp (&label("ecb_enc_loop6_enter"));
384
385&set_label("ecb_enc_loop6",16);
386 &movups (&QWP(0,$out),$inout0);
387 &movdqu ($inout0,&QWP(0,$inp));
388 &movups (&QWP(0x10,$out),$inout1);
389 &movdqu ($inout1,&QWP(0x10,$inp));
390 &movups (&QWP(0x20,$out),$inout2);
391 &movdqu ($inout2,&QWP(0x20,$inp));
392 &movups (&QWP(0x30,$out),$inout3);
393 &movdqu ($inout3,&QWP(0x30,$inp));
394 &movups (&QWP(0x40,$out),$inout4);
395 &movdqu ($inout4,&QWP(0x40,$inp));
396 &movups (&QWP(0x50,$out),$inout5);
397 &lea ($out,&DWP(0x60,$out));
398 &movdqu ($inout5,&QWP(0x50,$inp));
399 &lea ($inp,&DWP(0x60,$inp));
400&set_label("ecb_enc_loop6_enter");
401
402 &call ("_aesni_encrypt6");
403
404 &mov ($key,$key_); # restore $key
405 &mov ($rounds,$rounds_); # restore $rounds
406 &sub ($len,0x60);
407 &jnc (&label("ecb_enc_loop6"));
408
409 &movups (&QWP(0,$out),$inout0);
410 &movups (&QWP(0x10,$out),$inout1);
411 &movups (&QWP(0x20,$out),$inout2);
412 &movups (&QWP(0x30,$out),$inout3);
413 &movups (&QWP(0x40,$out),$inout4);
414 &movups (&QWP(0x50,$out),$inout5);
415 &lea ($out,&DWP(0x60,$out));
416 &add ($len,0x60);
417 &jz (&label("ecb_ret"));
418
419&set_label("ecb_enc_tail");
420 &movups ($inout0,&QWP(0,$inp));
421 &cmp ($len,0x20);
422 &jb (&label("ecb_enc_one"));
423 &movups ($inout1,&QWP(0x10,$inp));
424 &je (&label("ecb_enc_two"));
425 &movups ($inout2,&QWP(0x20,$inp));
426 &cmp ($len,0x40);
427 &jb (&label("ecb_enc_three"));
428 &movups ($inout3,&QWP(0x30,$inp));
429 &je (&label("ecb_enc_four"));
430 &movups ($inout4,&QWP(0x40,$inp));
431 &xorps ($inout5,$inout5);
432 &call ("_aesni_encrypt6");
433 &movups (&QWP(0,$out),$inout0);
434 &movups (&QWP(0x10,$out),$inout1);
435 &movups (&QWP(0x20,$out),$inout2);
436 &movups (&QWP(0x30,$out),$inout3);
437 &movups (&QWP(0x40,$out),$inout4);
438 jmp (&label("ecb_ret"));
439
440&set_label("ecb_enc_one",16);
441 if ($inline)
442 { &aesni_inline_generate1("enc"); }
443 else
444 { &call ("_aesni_encrypt1"); }
445 &movups (&QWP(0,$out),$inout0);
446 &jmp (&label("ecb_ret"));
447
448&set_label("ecb_enc_two",16);
449 &xorps ($inout2,$inout2);
450 &call ("_aesni_encrypt3");
451 &movups (&QWP(0,$out),$inout0);
452 &movups (&QWP(0x10,$out),$inout1);
453 &jmp (&label("ecb_ret"));
454
455&set_label("ecb_enc_three",16);
456 &call ("_aesni_encrypt3");
457 &movups (&QWP(0,$out),$inout0);
458 &movups (&QWP(0x10,$out),$inout1);
459 &movups (&QWP(0x20,$out),$inout2);
460 &jmp (&label("ecb_ret"));
461
462&set_label("ecb_enc_four",16);
463 &call ("_aesni_encrypt4");
464 &movups (&QWP(0,$out),$inout0);
465 &movups (&QWP(0x10,$out),$inout1);
466 &movups (&QWP(0x20,$out),$inout2);
467 &movups (&QWP(0x30,$out),$inout3);
468 &jmp (&label("ecb_ret"));
469######################################################################
470&set_label("ecb_decrypt",16);
471 &mov ($key_,$key); # backup $key
472 &mov ($rounds_,$rounds); # backup $rounds
473 &cmp ($len,0x60);
474 &jb (&label("ecb_dec_tail"));
475
476 &movdqu ($inout0,&QWP(0,$inp));
477 &movdqu ($inout1,&QWP(0x10,$inp));
478 &movdqu ($inout2,&QWP(0x20,$inp));
479 &movdqu ($inout3,&QWP(0x30,$inp));
480 &movdqu ($inout4,&QWP(0x40,$inp));
481 &movdqu ($inout5,&QWP(0x50,$inp));
482 &lea ($inp,&DWP(0x60,$inp));
483 &sub ($len,0x60);
484 &jmp (&label("ecb_dec_loop6_enter"));
485
486&set_label("ecb_dec_loop6",16);
487 &movups (&QWP(0,$out),$inout0);
488 &movdqu ($inout0,&QWP(0,$inp));
489 &movups (&QWP(0x10,$out),$inout1);
490 &movdqu ($inout1,&QWP(0x10,$inp));
491 &movups (&QWP(0x20,$out),$inout2);
492 &movdqu ($inout2,&QWP(0x20,$inp));
493 &movups (&QWP(0x30,$out),$inout3);
494 &movdqu ($inout3,&QWP(0x30,$inp));
495 &movups (&QWP(0x40,$out),$inout4);
496 &movdqu ($inout4,&QWP(0x40,$inp));
497 &movups (&QWP(0x50,$out),$inout5);
498 &lea ($out,&DWP(0x60,$out));
499 &movdqu ($inout5,&QWP(0x50,$inp));
500 &lea ($inp,&DWP(0x60,$inp));
501&set_label("ecb_dec_loop6_enter");
502
503 &call ("_aesni_decrypt6");
504
505 &mov ($key,$key_); # restore $key
506 &mov ($rounds,$rounds_); # restore $rounds
507 &sub ($len,0x60);
508 &jnc (&label("ecb_dec_loop6"));
509
510 &movups (&QWP(0,$out),$inout0);
511 &movups (&QWP(0x10,$out),$inout1);
512 &movups (&QWP(0x20,$out),$inout2);
513 &movups (&QWP(0x30,$out),$inout3);
514 &movups (&QWP(0x40,$out),$inout4);
515 &movups (&QWP(0x50,$out),$inout5);
516 &lea ($out,&DWP(0x60,$out));
517 &add ($len,0x60);
518 &jz (&label("ecb_ret"));
519
520&set_label("ecb_dec_tail");
521 &movups ($inout0,&QWP(0,$inp));
522 &cmp ($len,0x20);
523 &jb (&label("ecb_dec_one"));
524 &movups ($inout1,&QWP(0x10,$inp));
525 &je (&label("ecb_dec_two"));
526 &movups ($inout2,&QWP(0x20,$inp));
527 &cmp ($len,0x40);
528 &jb (&label("ecb_dec_three"));
529 &movups ($inout3,&QWP(0x30,$inp));
530 &je (&label("ecb_dec_four"));
531 &movups ($inout4,&QWP(0x40,$inp));
532 &xorps ($inout5,$inout5);
533 &call ("_aesni_decrypt6");
534 &movups (&QWP(0,$out),$inout0);
535 &movups (&QWP(0x10,$out),$inout1);
536 &movups (&QWP(0x20,$out),$inout2);
537 &movups (&QWP(0x30,$out),$inout3);
538 &movups (&QWP(0x40,$out),$inout4);
539 &jmp (&label("ecb_ret"));
540
541&set_label("ecb_dec_one",16);
542 if ($inline)
543 { &aesni_inline_generate1("dec"); }
544 else
545 { &call ("_aesni_decrypt1"); }
546 &movups (&QWP(0,$out),$inout0);
547 &jmp (&label("ecb_ret"));
548
549&set_label("ecb_dec_two",16);
550 &xorps ($inout2,$inout2);
551 &call ("_aesni_decrypt3");
552 &movups (&QWP(0,$out),$inout0);
553 &movups (&QWP(0x10,$out),$inout1);
554 &jmp (&label("ecb_ret"));
555
556&set_label("ecb_dec_three",16);
557 &call ("_aesni_decrypt3");
558 &movups (&QWP(0,$out),$inout0);
559 &movups (&QWP(0x10,$out),$inout1);
560 &movups (&QWP(0x20,$out),$inout2);
561 &jmp (&label("ecb_ret"));
562
563&set_label("ecb_dec_four",16);
564 &call ("_aesni_decrypt4");
565 &movups (&QWP(0,$out),$inout0);
566 &movups (&QWP(0x10,$out),$inout1);
567 &movups (&QWP(0x20,$out),$inout2);
568 &movups (&QWP(0x30,$out),$inout3);
569
570&set_label("ecb_ret");
571&function_end("aesni_ecb_encrypt");
572
573######################################################################
574# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
575# size_t blocks, const AES_KEY *key,
576# const char *ivec,char *cmac);
577#
578# Handles only complete blocks, operates on 64-bit counter and
579# does not update *ivec! Nor does it finalize CMAC value
580# (see engine/eng_aesni.c for details)
581#
582{ my $cmac=$inout1;
583&function_begin("aesni_ccm64_encrypt_blocks");
584 &mov ($inp,&wparam(0));
585 &mov ($out,&wparam(1));
586 &mov ($len,&wparam(2));
587 &mov ($key,&wparam(3));
588 &mov ($rounds_,&wparam(4));
589 &mov ($rounds,&wparam(5));
590 &mov ($key_,"esp");
591 &sub ("esp",60);
592 &and ("esp",-16); # align stack
593 &mov (&DWP(48,"esp"),$key_);
594
595 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
596 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
597 &mov ($rounds,&DWP(240,$key));
598
599 # compose byte-swap control mask for pshufb on stack
600 &mov (&DWP(0,"esp"),0x0c0d0e0f);
601 &mov (&DWP(4,"esp"),0x08090a0b);
602 &mov (&DWP(8,"esp"),0x04050607);
603 &mov (&DWP(12,"esp"),0x00010203);
604
605 # compose counter increment vector on stack
606 &mov ($rounds_,1);
607 &xor ($key_,$key_);
608 &mov (&DWP(16,"esp"),$rounds_);
609 &mov (&DWP(20,"esp"),$key_);
610 &mov (&DWP(24,"esp"),$key_);
611 &mov (&DWP(28,"esp"),$key_);
612
613 &shr ($rounds,1);
614 &lea ($key_,&DWP(0,$key));
615 &movdqa ($inout3,&QWP(0,"esp"));
616 &movdqa ($inout0,$ivec);
617 &mov ($rounds_,$rounds);
618 &pshufb ($ivec,$inout3);
619
620&set_label("ccm64_enc_outer");
621 &$movekey ($rndkey0,&QWP(0,$key_));
622 &mov ($rounds,$rounds_);
623 &movups ($in0,&QWP(0,$inp));
624
625 &xorps ($inout0,$rndkey0);
626 &$movekey ($rndkey1,&QWP(16,$key_));
627 &xorps ($rndkey0,$in0);
628 &lea ($key,&DWP(32,$key_));
629 &xorps ($cmac,$rndkey0); # cmac^=inp
630 &$movekey ($rndkey0,&QWP(0,$key));
631
632&set_label("ccm64_enc2_loop");
633 &aesenc ($inout0,$rndkey1);
634 &dec ($rounds);
635 &aesenc ($cmac,$rndkey1);
636 &$movekey ($rndkey1,&QWP(16,$key));
637 &aesenc ($inout0,$rndkey0);
638 &lea ($key,&DWP(32,$key));
639 &aesenc ($cmac,$rndkey0);
640 &$movekey ($rndkey0,&QWP(0,$key));
641 &jnz (&label("ccm64_enc2_loop"));
642 &aesenc ($inout0,$rndkey1);
643 &aesenc ($cmac,$rndkey1);
644 &paddq ($ivec,&QWP(16,"esp"));
645 &aesenclast ($inout0,$rndkey0);
646 &aesenclast ($cmac,$rndkey0);
647
648 &dec ($len);
649 &lea ($inp,&DWP(16,$inp));
650 &xorps ($in0,$inout0); # inp^=E(ivec)
651 &movdqa ($inout0,$ivec);
652 &movups (&QWP(0,$out),$in0); # save output
653 &lea ($out,&DWP(16,$out));
654 &pshufb ($inout0,$inout3);
655 &jnz (&label("ccm64_enc_outer"));
656
657 &mov ("esp",&DWP(48,"esp"));
658 &mov ($out,&wparam(5));
659 &movups (&QWP(0,$out),$cmac);
660&function_end("aesni_ccm64_encrypt_blocks");
661
662&function_begin("aesni_ccm64_decrypt_blocks");
663 &mov ($inp,&wparam(0));
664 &mov ($out,&wparam(1));
665 &mov ($len,&wparam(2));
666 &mov ($key,&wparam(3));
667 &mov ($rounds_,&wparam(4));
668 &mov ($rounds,&wparam(5));
669 &mov ($key_,"esp");
670 &sub ("esp",60);
671 &and ("esp",-16); # align stack
672 &mov (&DWP(48,"esp"),$key_);
673
674 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
675 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
676 &mov ($rounds,&DWP(240,$key));
677
678 # compose byte-swap control mask for pshufb on stack
679 &mov (&DWP(0,"esp"),0x0c0d0e0f);
680 &mov (&DWP(4,"esp"),0x08090a0b);
681 &mov (&DWP(8,"esp"),0x04050607);
682 &mov (&DWP(12,"esp"),0x00010203);
683
684 # compose counter increment vector on stack
685 &mov ($rounds_,1);
686 &xor ($key_,$key_);
687 &mov (&DWP(16,"esp"),$rounds_);
688 &mov (&DWP(20,"esp"),$key_);
689 &mov (&DWP(24,"esp"),$key_);
690 &mov (&DWP(28,"esp"),$key_);
691
692 &movdqa ($inout3,&QWP(0,"esp")); # bswap mask
693 &movdqa ($inout0,$ivec);
694
695 &mov ($key_,$key);
696 &mov ($rounds_,$rounds);
697
698 &pshufb ($ivec,$inout3);
699 if ($inline)
700 { &aesni_inline_generate1("enc"); }
701 else
702 { &call ("_aesni_encrypt1"); }
703 &movups ($in0,&QWP(0,$inp)); # load inp
704 &paddq ($ivec,&QWP(16,"esp"));
705 &lea ($inp,&QWP(16,$inp));
706 &jmp (&label("ccm64_dec_outer"));
707
708&set_label("ccm64_dec_outer",16);
709 &xorps ($in0,$inout0); # inp ^= E(ivec)
710 &movdqa ($inout0,$ivec);
711 &mov ($rounds,$rounds_);
712 &movups (&QWP(0,$out),$in0); # save output
713 &lea ($out,&DWP(16,$out));
714 &pshufb ($inout0,$inout3);
715
716 &sub ($len,1);
717 &jz (&label("ccm64_dec_break"));
718
719 &$movekey ($rndkey0,&QWP(0,$key_));
720 &shr ($rounds,1);
721 &$movekey ($rndkey1,&QWP(16,$key_));
722 &xorps ($in0,$rndkey0);
723 &lea ($key,&DWP(32,$key_));
724 &xorps ($inout0,$rndkey0);
725 &xorps ($cmac,$in0); # cmac^=out
726 &$movekey ($rndkey0,&QWP(0,$key));
727
728&set_label("ccm64_dec2_loop");
729 &aesenc ($inout0,$rndkey1);
730 &dec ($rounds);
731 &aesenc ($cmac,$rndkey1);
732 &$movekey ($rndkey1,&QWP(16,$key));
733 &aesenc ($inout0,$rndkey0);
734 &lea ($key,&DWP(32,$key));
735 &aesenc ($cmac,$rndkey0);
736 &$movekey ($rndkey0,&QWP(0,$key));
737 &jnz (&label("ccm64_dec2_loop"));
738 &movups ($in0,&QWP(0,$inp)); # load inp
739 &paddq ($ivec,&QWP(16,"esp"));
740 &aesenc ($inout0,$rndkey1);
741 &aesenc ($cmac,$rndkey1);
742 &lea ($inp,&QWP(16,$inp));
743 &aesenclast ($inout0,$rndkey0);
744 &aesenclast ($cmac,$rndkey0);
745 &jmp (&label("ccm64_dec_outer"));
746
747&set_label("ccm64_dec_break",16);
748 &mov ($key,$key_);
749 if ($inline)
750 { &aesni_inline_generate1("enc",$cmac,$in0); }
751 else
752 { &call ("_aesni_encrypt1",$cmac); }
753
754 &mov ("esp",&DWP(48,"esp"));
755 &mov ($out,&wparam(5));
756 &movups (&QWP(0,$out),$cmac);
757&function_end("aesni_ccm64_decrypt_blocks");
758}
759
760######################################################################
761# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
762# size_t blocks, const AES_KEY *key,
763# const char *ivec);
764#
765# Handles only complete blocks, operates on 32-bit counter and
766# does not update *ivec! (see engine/eng_aesni.c for details)
767#
768# stack layout:
769# 0 pshufb mask
770# 16 vector addend: 0,6,6,6
771# 32 counter-less ivec
772# 48 1st triplet of counter vector
773# 64 2nd triplet of counter vector
774# 80 saved %esp
775
776&function_begin("aesni_ctr32_encrypt_blocks");
777 &mov ($inp,&wparam(0));
778 &mov ($out,&wparam(1));
779 &mov ($len,&wparam(2));
780 &mov ($key,&wparam(3));
781 &mov ($rounds_,&wparam(4));
782 &mov ($key_,"esp");
783 &sub ("esp",88);
784 &and ("esp",-16); # align stack
785 &mov (&DWP(80,"esp"),$key_);
786
787 &cmp ($len,1);
788 &je (&label("ctr32_one_shortcut"));
789
790 &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
791
792 # compose byte-swap control mask for pshufb on stack
793 &mov (&DWP(0,"esp"),0x0c0d0e0f);
794 &mov (&DWP(4,"esp"),0x08090a0b);
795 &mov (&DWP(8,"esp"),0x04050607);
796 &mov (&DWP(12,"esp"),0x00010203);
797
798 # compose counter increment vector on stack
799 &mov ($rounds,6);
800 &xor ($key_,$key_);
801 &mov (&DWP(16,"esp"),$rounds);
802 &mov (&DWP(20,"esp"),$rounds);
803 &mov (&DWP(24,"esp"),$rounds);
804 &mov (&DWP(28,"esp"),$key_);
805
806 &pextrd ($rounds_,$inout5,3); # pull 32-bit counter
807 &pinsrd ($inout5,$key_,3); # wipe 32-bit counter
808
809 &mov ($rounds,&DWP(240,$key)); # key->rounds
810
811 # compose 2 vectors of 3x32-bit counters
812 &bswap ($rounds_);
813 &pxor ($rndkey1,$rndkey1);
814 &pxor ($rndkey0,$rndkey0);
815 &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask
816 &pinsrd ($rndkey1,$rounds_,0);
817 &lea ($key_,&DWP(3,$rounds_));
818 &pinsrd ($rndkey0,$key_,0);
819 &inc ($rounds_);
820 &pinsrd ($rndkey1,$rounds_,1);
821 &inc ($key_);
822 &pinsrd ($rndkey0,$key_,1);
823 &inc ($rounds_);
824 &pinsrd ($rndkey1,$rounds_,2);
825 &inc ($key_);
826 &pinsrd ($rndkey0,$key_,2);
827 &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet
828 &pshufb ($rndkey1,$inout0); # byte swap
829 &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet
830 &pshufb ($rndkey0,$inout0); # byte swap
831
832 &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword
833 &pshufd ($inout1,$rndkey1,2<<6);
834 &cmp ($len,6);
835 &jb (&label("ctr32_tail"));
836 &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec
837 &shr ($rounds,1);
838 &mov ($key_,$key); # backup $key
839 &mov ($rounds_,$rounds); # backup $rounds
840 &sub ($len,6);
841 &jmp (&label("ctr32_loop6"));
842
843&set_label("ctr32_loop6",16);
844 &pshufd ($inout2,$rndkey1,1<<6);
845 &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec
846 &pshufd ($inout3,$rndkey0,3<<6);
847 &por ($inout0,$rndkey1); # merge counter-less ivec
848 &pshufd ($inout4,$rndkey0,2<<6);
849 &por ($inout1,$rndkey1);
850 &pshufd ($inout5,$rndkey0,1<<6);
851 &por ($inout2,$rndkey1);
852 &por ($inout3,$rndkey1);
853 &por ($inout4,$rndkey1);
854 &por ($inout5,$rndkey1);
855
856 # inlining _aesni_encrypt6's prologue gives ~4% improvement...
857 &$movekey ($rndkey0,&QWP(0,$key_));
858 &$movekey ($rndkey1,&QWP(16,$key_));
859 &lea ($key,&DWP(32,$key_));
860 &dec ($rounds);
861 &pxor ($inout0,$rndkey0);
862 &pxor ($inout1,$rndkey0);
863 &aesenc ($inout0,$rndkey1);
864 &pxor ($inout2,$rndkey0);
865 &aesenc ($inout1,$rndkey1);
866 &pxor ($inout3,$rndkey0);
867 &aesenc ($inout2,$rndkey1);
868 &pxor ($inout4,$rndkey0);
869 &aesenc ($inout3,$rndkey1);
870 &pxor ($inout5,$rndkey0);
871 &aesenc ($inout4,$rndkey1);
872 &$movekey ($rndkey0,&QWP(0,$key));
873 &aesenc ($inout5,$rndkey1);
874
875 &call (&label("_aesni_encrypt6_enter"));
876
877 &movups ($rndkey1,&QWP(0,$inp));
878 &movups ($rndkey0,&QWP(0x10,$inp));
879 &xorps ($inout0,$rndkey1);
880 &movups ($rndkey1,&QWP(0x20,$inp));
881 &xorps ($inout1,$rndkey0);
882 &movups (&QWP(0,$out),$inout0);
883 &movdqa ($rndkey0,&QWP(16,"esp")); # load increment
884 &xorps ($inout2,$rndkey1);
885 &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet
886 &movups (&QWP(0x10,$out),$inout1);
887 &movups (&QWP(0x20,$out),$inout2);
888
889 &paddd ($rndkey1,$rndkey0); # 1st triplet increment
890 &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment
891 &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask
892
893 &movups ($inout1,&QWP(0x30,$inp));
894 &movups ($inout2,&QWP(0x40,$inp));
895 &xorps ($inout3,$inout1);
896 &movups ($inout1,&QWP(0x50,$inp));
897 &lea ($inp,&DWP(0x60,$inp));
898 &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet
899 &pshufb ($rndkey1,$inout0); # byte swap
900 &xorps ($inout4,$inout2);
901 &movups (&QWP(0x30,$out),$inout3);
902 &xorps ($inout5,$inout1);
903 &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet
904 &pshufb ($rndkey0,$inout0); # byte swap
905 &movups (&QWP(0x40,$out),$inout4);
906 &pshufd ($inout0,$rndkey1,3<<6);
907 &movups (&QWP(0x50,$out),$inout5);
908 &lea ($out,&DWP(0x60,$out));
909
910 &mov ($rounds,$rounds_);
911 &pshufd ($inout1,$rndkey1,2<<6);
912 &sub ($len,6);
913 &jnc (&label("ctr32_loop6"));
914
915 &add ($len,6);
916 &jz (&label("ctr32_ret"));
917 &mov ($key,$key_);
918 &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
919 &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec
920
921&set_label("ctr32_tail");
922 &por ($inout0,$inout5);
923 &cmp ($len,2);
924 &jb (&label("ctr32_one"));
925
926 &pshufd ($inout2,$rndkey1,1<<6);
927 &por ($inout1,$inout5);
928 &je (&label("ctr32_two"));
929
930 &pshufd ($inout3,$rndkey0,3<<6);
931 &por ($inout2,$inout5);
932 &cmp ($len,4);
933 &jb (&label("ctr32_three"));
934
935 &pshufd ($inout4,$rndkey0,2<<6);
936 &por ($inout3,$inout5);
937 &je (&label("ctr32_four"));
938
939 &por ($inout4,$inout5);
940 &call ("_aesni_encrypt6");
941 &movups ($rndkey1,&QWP(0,$inp));
942 &movups ($rndkey0,&QWP(0x10,$inp));
943 &xorps ($inout0,$rndkey1);
944 &movups ($rndkey1,&QWP(0x20,$inp));
945 &xorps ($inout1,$rndkey0);
946 &movups ($rndkey0,&QWP(0x30,$inp));
947 &xorps ($inout2,$rndkey1);
948 &movups ($rndkey1,&QWP(0x40,$inp));
949 &xorps ($inout3,$rndkey0);
950 &movups (&QWP(0,$out),$inout0);
951 &xorps ($inout4,$rndkey1);
952 &movups (&QWP(0x10,$out),$inout1);
953 &movups (&QWP(0x20,$out),$inout2);
954 &movups (&QWP(0x30,$out),$inout3);
955 &movups (&QWP(0x40,$out),$inout4);
956 &jmp (&label("ctr32_ret"));
957
958&set_label("ctr32_one_shortcut",16);
959 &movups ($inout0,&QWP(0,$rounds_)); # load ivec
960 &mov ($rounds,&DWP(240,$key));
961
962&set_label("ctr32_one");
963 if ($inline)
964 { &aesni_inline_generate1("enc"); }
965 else
966 { &call ("_aesni_encrypt1"); }
967 &movups ($in0,&QWP(0,$inp));
968 &xorps ($in0,$inout0);
969 &movups (&QWP(0,$out),$in0);
970 &jmp (&label("ctr32_ret"));
971
972&set_label("ctr32_two",16);
973 &call ("_aesni_encrypt3");
974 &movups ($inout3,&QWP(0,$inp));
975 &movups ($inout4,&QWP(0x10,$inp));
976 &xorps ($inout0,$inout3);
977 &xorps ($inout1,$inout4);
978 &movups (&QWP(0,$out),$inout0);
979 &movups (&QWP(0x10,$out),$inout1);
980 &jmp (&label("ctr32_ret"));
981
982&set_label("ctr32_three",16);
983 &call ("_aesni_encrypt3");
984 &movups ($inout3,&QWP(0,$inp));
985 &movups ($inout4,&QWP(0x10,$inp));
986 &xorps ($inout0,$inout3);
987 &movups ($inout5,&QWP(0x20,$inp));
988 &xorps ($inout1,$inout4);
989 &movups (&QWP(0,$out),$inout0);
990 &xorps ($inout2,$inout5);
991 &movups (&QWP(0x10,$out),$inout1);
992 &movups (&QWP(0x20,$out),$inout2);
993 &jmp (&label("ctr32_ret"));
994
995&set_label("ctr32_four",16);
996 &call ("_aesni_encrypt4");
997 &movups ($inout4,&QWP(0,$inp));
998 &movups ($inout5,&QWP(0x10,$inp));
999 &movups ($rndkey1,&QWP(0x20,$inp));
1000 &xorps ($inout0,$inout4);
1001 &movups ($rndkey0,&QWP(0x30,$inp));
1002 &xorps ($inout1,$inout5);
1003 &movups (&QWP(0,$out),$inout0);
1004 &xorps ($inout2,$rndkey1);
1005 &movups (&QWP(0x10,$out),$inout1);
1006 &xorps ($inout3,$rndkey0);
1007 &movups (&QWP(0x20,$out),$inout2);
1008 &movups (&QWP(0x30,$out),$inout3);
1009
1010&set_label("ctr32_ret");
1011 &mov ("esp",&DWP(80,"esp"));
1012&function_end("aesni_ctr32_encrypt_blocks");
1013
1014######################################################################
1015# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len,
1016# const AES_KEY *key1, const AES_KEY *key2
1017# const unsigned char iv[16]);
1018#
1019{ my ($tweak,$twtmp,$twres,$twmask)=($rndkey1,$rndkey0,$inout0,$inout1);
1020
1021&function_begin("aesni_xts_encrypt");
1022 &mov ($key,&wparam(4)); # key2
1023 &mov ($inp,&wparam(5)); # clear-text tweak
1024
1025 &mov ($rounds,&DWP(240,$key)); # key2->rounds
1026 &movups ($inout0,&QWP(0,$inp));
1027 if ($inline)
1028 { &aesni_inline_generate1("enc"); }
1029 else
1030 { &call ("_aesni_encrypt1"); }
1031
1032 &mov ($inp,&wparam(0));
1033 &mov ($out,&wparam(1));
1034 &mov ($len,&wparam(2));
1035 &mov ($key,&wparam(3)); # key1
1036
1037 &mov ($key_,"esp");
1038 &sub ("esp",16*7+8);
1039 &mov ($rounds,&DWP(240,$key)); # key1->rounds
1040 &and ("esp",-16); # align stack
1041
1042 &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant
1043 &mov (&DWP(16*6+4,"esp"),0);
1044 &mov (&DWP(16*6+8,"esp"),1);
1045 &mov (&DWP(16*6+12,"esp"),0);
1046 &mov (&DWP(16*7+0,"esp"),$len); # save original $len
1047 &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp
1048
1049 &movdqa ($tweak,$inout0);
1050 &pxor ($twtmp,$twtmp);
1051 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1052 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1053
1054 &and ($len,-16);
1055 &mov ($key_,$key); # backup $key
1056 &mov ($rounds_,$rounds); # backup $rounds
1057 &sub ($len,16*6);
1058 &jc (&label("xts_enc_short"));
1059
1060 &shr ($rounds,1);
1061 &mov ($rounds_,$rounds);
1062 &jmp (&label("xts_enc_loop6"));
1063
1064&set_label("xts_enc_loop6",16);
1065 for ($i=0;$i<4;$i++) {
1066 &pshufd ($twres,$twtmp,0x13);
1067 &pxor ($twtmp,$twtmp);
1068 &movdqa (&QWP(16*$i,"esp"),$tweak);
1069 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1070 &pand ($twres,$twmask); # isolate carry and residue
1071 &pcmpgtd ($twtmp,$tweak); # broadcast upper bits
1072 &pxor ($tweak,$twres);
1073 }
1074 &pshufd ($inout5,$twtmp,0x13);
1075 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1076 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1077 &$movekey ($rndkey0,&QWP(0,$key_));
1078 &pand ($inout5,$twmask); # isolate carry and residue
1079 &movups ($inout0,&QWP(0,$inp)); # load input
1080 &pxor ($inout5,$tweak);
1081
1082 # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
1083 &movdqu ($inout1,&QWP(16*1,$inp));
1084 &xorps ($inout0,$rndkey0); # input^=rndkey[0]
1085 &movdqu ($inout2,&QWP(16*2,$inp));
1086 &pxor ($inout1,$rndkey0);
1087 &movdqu ($inout3,&QWP(16*3,$inp));
1088 &pxor ($inout2,$rndkey0);
1089 &movdqu ($inout4,&QWP(16*4,$inp));
1090 &pxor ($inout3,$rndkey0);
1091 &movdqu ($rndkey1,&QWP(16*5,$inp));
1092 &pxor ($inout4,$rndkey0);
1093 &lea ($inp,&DWP(16*6,$inp));
1094 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1095 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1096 &pxor ($inout5,$rndkey1);
1097
1098 &$movekey ($rndkey1,&QWP(16,$key_));
1099 &lea ($key,&DWP(32,$key_));
1100 &pxor ($inout1,&QWP(16*1,"esp"));
1101 &aesenc ($inout0,$rndkey1);
1102 &pxor ($inout2,&QWP(16*2,"esp"));
1103 &aesenc ($inout1,$rndkey1);
1104 &pxor ($inout3,&QWP(16*3,"esp"));
1105 &dec ($rounds);
1106 &aesenc ($inout2,$rndkey1);
1107 &pxor ($inout4,&QWP(16*4,"esp"));
1108 &aesenc ($inout3,$rndkey1);
1109 &pxor ($inout5,$rndkey0);
1110 &aesenc ($inout4,$rndkey1);
1111 &$movekey ($rndkey0,&QWP(0,$key));
1112 &aesenc ($inout5,$rndkey1);
1113 &call (&label("_aesni_encrypt6_enter"));
1114
1115 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1116 &pxor ($twtmp,$twtmp);
1117 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1118 &pcmpgtd ($twtmp,$tweak); # broadcast upper bits
1119 &xorps ($inout1,&QWP(16*1,"esp"));
1120 &movups (&QWP(16*0,$out),$inout0); # write output
1121 &xorps ($inout2,&QWP(16*2,"esp"));
1122 &movups (&QWP(16*1,$out),$inout1);
1123 &xorps ($inout3,&QWP(16*3,"esp"));
1124 &movups (&QWP(16*2,$out),$inout2);
1125 &xorps ($inout4,&QWP(16*4,"esp"));
1126 &movups (&QWP(16*3,$out),$inout3);
1127 &xorps ($inout5,$tweak);
1128 &movups (&QWP(16*4,$out),$inout4);
1129 &pshufd ($twres,$twtmp,0x13);
1130 &movups (&QWP(16*5,$out),$inout5);
1131 &lea ($out,&DWP(16*6,$out));
1132 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1133
1134 &pxor ($twtmp,$twtmp);
1135 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1136 &pand ($twres,$twmask); # isolate carry and residue
1137 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1138 &mov ($rounds,$rounds_); # restore $rounds
1139 &pxor ($tweak,$twres);
1140
1141 &sub ($len,16*6);
1142 &jnc (&label("xts_enc_loop6"));
1143
1144 &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
1145 &mov ($key,$key_); # restore $key
1146 &mov ($rounds_,$rounds);
1147
1148&set_label("xts_enc_short");
1149 &add ($len,16*6);
1150 &jz (&label("xts_enc_done6x"));
1151
1152 &movdqa ($inout3,$tweak); # put aside previous tweak
1153 &cmp ($len,0x20);
1154 &jb (&label("xts_enc_one"));
1155
1156 &pshufd ($twres,$twtmp,0x13);
1157 &pxor ($twtmp,$twtmp);
1158 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1159 &pand ($twres,$twmask); # isolate carry and residue
1160 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1161 &pxor ($tweak,$twres);
1162 &je (&label("xts_enc_two"));
1163
1164 &pshufd ($twres,$twtmp,0x13);
1165 &pxor ($twtmp,$twtmp);
1166 &movdqa ($inout4,$tweak); # put aside previous tweak
1167 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1168 &pand ($twres,$twmask); # isolate carry and residue
1169 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1170 &pxor ($tweak,$twres);
1171 &cmp ($len,0x40);
1172 &jb (&label("xts_enc_three"));
1173
1174 &pshufd ($twres,$twtmp,0x13);
1175 &pxor ($twtmp,$twtmp);
1176 &movdqa ($inout5,$tweak); # put aside previous tweak
1177 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1178 &pand ($twres,$twmask); # isolate carry and residue
1179 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1180 &pxor ($tweak,$twres);
1181 &movdqa (&QWP(16*0,"esp"),$inout3);
1182 &movdqa (&QWP(16*1,"esp"),$inout4);
1183 &je (&label("xts_enc_four"));
1184
1185 &movdqa (&QWP(16*2,"esp"),$inout5);
1186 &pshufd ($inout5,$twtmp,0x13);
1187 &movdqa (&QWP(16*3,"esp"),$tweak);
1188 &paddq ($tweak,$tweak); # &psllq($inout0,1);
1189 &pand ($inout5,$twmask); # isolate carry and residue
1190 &pxor ($inout5,$tweak);
1191
1192 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1193 &movdqu ($inout1,&QWP(16*1,$inp));
1194 &movdqu ($inout2,&QWP(16*2,$inp));
1195 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1196 &movdqu ($inout3,&QWP(16*3,$inp));
1197 &pxor ($inout1,&QWP(16*1,"esp"));
1198 &movdqu ($inout4,&QWP(16*4,$inp));
1199 &pxor ($inout2,&QWP(16*2,"esp"));
1200 &lea ($inp,&DWP(16*5,$inp));
1201 &pxor ($inout3,&QWP(16*3,"esp"));
1202 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1203 &pxor ($inout4,$inout5);
1204
1205 &call ("_aesni_encrypt6");
1206
1207 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1208 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1209 &xorps ($inout1,&QWP(16*1,"esp"));
1210 &xorps ($inout2,&QWP(16*2,"esp"));
1211 &movups (&QWP(16*0,$out),$inout0); # write output
1212 &xorps ($inout3,&QWP(16*3,"esp"));
1213 &movups (&QWP(16*1,$out),$inout1);
1214 &xorps ($inout4,$tweak);
1215 &movups (&QWP(16*2,$out),$inout2);
1216 &movups (&QWP(16*3,$out),$inout3);
1217 &movups (&QWP(16*4,$out),$inout4);
1218 &lea ($out,&DWP(16*5,$out));
1219 &jmp (&label("xts_enc_done"));
1220
1221&set_label("xts_enc_one",16);
1222 &movups ($inout0,&QWP(16*0,$inp)); # load input
1223 &lea ($inp,&DWP(16*1,$inp));
1224 &xorps ($inout0,$inout3); # input^=tweak
1225 if ($inline)
1226 { &aesni_inline_generate1("enc"); }
1227 else
1228 { &call ("_aesni_encrypt1"); }
1229 &xorps ($inout0,$inout3); # output^=tweak
1230 &movups (&QWP(16*0,$out),$inout0); # write output
1231 &lea ($out,&DWP(16*1,$out));
1232
1233 &movdqa ($tweak,$inout3); # last tweak
1234 &jmp (&label("xts_enc_done"));
1235
1236&set_label("xts_enc_two",16);
1237 &movaps ($inout4,$tweak); # put aside last tweak
1238
1239 &movups ($inout0,&QWP(16*0,$inp)); # load input
1240 &movups ($inout1,&QWP(16*1,$inp));
1241 &lea ($inp,&DWP(16*2,$inp));
1242 &xorps ($inout0,$inout3); # input^=tweak
1243 &xorps ($inout1,$inout4);
1244 &xorps ($inout2,$inout2);
1245
1246 &call ("_aesni_encrypt3");
1247
1248 &xorps ($inout0,$inout3); # output^=tweak
1249 &xorps ($inout1,$inout4);
1250 &movups (&QWP(16*0,$out),$inout0); # write output
1251 &movups (&QWP(16*1,$out),$inout1);
1252 &lea ($out,&DWP(16*2,$out));
1253
1254 &movdqa ($tweak,$inout4); # last tweak
1255 &jmp (&label("xts_enc_done"));
1256
1257&set_label("xts_enc_three",16);
1258 &movaps ($inout5,$tweak); # put aside last tweak
1259 &movups ($inout0,&QWP(16*0,$inp)); # load input
1260 &movups ($inout1,&QWP(16*1,$inp));
1261 &movups ($inout2,&QWP(16*2,$inp));
1262 &lea ($inp,&DWP(16*3,$inp));
1263 &xorps ($inout0,$inout3); # input^=tweak
1264 &xorps ($inout1,$inout4);
1265 &xorps ($inout2,$inout5);
1266
1267 &call ("_aesni_encrypt3");
1268
1269 &xorps ($inout0,$inout3); # output^=tweak
1270 &xorps ($inout1,$inout4);
1271 &xorps ($inout2,$inout5);
1272 &movups (&QWP(16*0,$out),$inout0); # write output
1273 &movups (&QWP(16*1,$out),$inout1);
1274 &movups (&QWP(16*2,$out),$inout2);
1275 &lea ($out,&DWP(16*3,$out));
1276
1277 &movdqa ($tweak,$inout5); # last tweak
1278 &jmp (&label("xts_enc_done"));
1279
1280&set_label("xts_enc_four",16);
1281 &movaps ($inout4,$tweak); # put aside last tweak
1282
1283 &movups ($inout0,&QWP(16*0,$inp)); # load input
1284 &movups ($inout1,&QWP(16*1,$inp));
1285 &movups ($inout2,&QWP(16*2,$inp));
1286 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1287 &movups ($inout3,&QWP(16*3,$inp));
1288 &lea ($inp,&DWP(16*4,$inp));
1289 &xorps ($inout1,&QWP(16*1,"esp"));
1290 &xorps ($inout2,$inout5);
1291 &xorps ($inout3,$inout4);
1292
1293 &call ("_aesni_encrypt4");
1294
1295 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1296 &xorps ($inout1,&QWP(16*1,"esp"));
1297 &xorps ($inout2,$inout5);
1298 &movups (&QWP(16*0,$out),$inout0); # write output
1299 &xorps ($inout3,$inout4);
1300 &movups (&QWP(16*1,$out),$inout1);
1301 &movups (&QWP(16*2,$out),$inout2);
1302 &movups (&QWP(16*3,$out),$inout3);
1303 &lea ($out,&DWP(16*4,$out));
1304
1305 &movdqa ($tweak,$inout4); # last tweak
1306 &jmp (&label("xts_enc_done"));
1307
1308&set_label("xts_enc_done6x",16); # $tweak is pre-calculated
1309 &mov ($len,&DWP(16*7+0,"esp")); # restore original $len
1310 &and ($len,15);
1311 &jz (&label("xts_enc_ret"));
1312 &movdqa ($inout3,$tweak);
1313 &mov (&DWP(16*7+0,"esp"),$len); # save $len%16
1314 &jmp (&label("xts_enc_steal"));
1315
1316&set_label("xts_enc_done",16);
1317 &mov ($len,&DWP(16*7+0,"esp")); # restore original $len
1318 &pxor ($twtmp,$twtmp);
1319 &and ($len,15);
1320 &jz (&label("xts_enc_ret"));
1321
1322 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1323 &mov (&DWP(16*7+0,"esp"),$len); # save $len%16
1324 &pshufd ($inout3,$twtmp,0x13);
1325 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1326 &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue
1327 &pxor ($inout3,$tweak);
1328
1329&set_label("xts_enc_steal");
1330 &movz ($rounds,&BP(0,$inp));
1331 &movz ($key,&BP(-16,$out));
1332 &lea ($inp,&DWP(1,$inp));
1333 &mov (&BP(-16,$out),&LB($rounds));
1334 &mov (&BP(0,$out),&LB($key));
1335 &lea ($out,&DWP(1,$out));
1336 &sub ($len,1);
1337 &jnz (&label("xts_enc_steal"));
1338
1339 &sub ($out,&DWP(16*7+0,"esp")); # rewind $out
1340 &mov ($key,$key_); # restore $key
1341 &mov ($rounds,$rounds_); # restore $rounds
1342
1343 &movups ($inout0,&QWP(-16,$out)); # load input
1344 &xorps ($inout0,$inout3); # input^=tweak
1345 if ($inline)
1346 { &aesni_inline_generate1("enc"); }
1347 else
1348 { &call ("_aesni_encrypt1"); }
1349 &xorps ($inout0,$inout3); # output^=tweak
1350 &movups (&QWP(-16,$out),$inout0); # write output
1351
1352&set_label("xts_enc_ret");
1353 &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp
1354&function_end("aesni_xts_encrypt");
1355
1356&function_begin("aesni_xts_decrypt");
1357 &mov ($key,&wparam(4)); # key2
1358 &mov ($inp,&wparam(5)); # clear-text tweak
1359
1360 &mov ($rounds,&DWP(240,$key)); # key2->rounds
1361 &movups ($inout0,&QWP(0,$inp));
1362 if ($inline)
1363 { &aesni_inline_generate1("enc"); }
1364 else
1365 { &call ("_aesni_encrypt1"); }
1366
1367 &mov ($inp,&wparam(0));
1368 &mov ($out,&wparam(1));
1369 &mov ($len,&wparam(2));
1370 &mov ($key,&wparam(3)); # key1
1371
1372 &mov ($key_,"esp");
1373 &sub ("esp",16*7+8);
1374 &and ("esp",-16); # align stack
1375
1376 &xor ($rounds_,$rounds_); # if(len%16) len-=16;
1377 &test ($len,15);
1378 &setnz (&LB($rounds_));
1379 &shl ($rounds_,4);
1380 &sub ($len,$rounds_);
1381
1382 &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant
1383 &mov (&DWP(16*6+4,"esp"),0);
1384 &mov (&DWP(16*6+8,"esp"),1);
1385 &mov (&DWP(16*6+12,"esp"),0);
1386 &mov (&DWP(16*7+0,"esp"),$len); # save original $len
1387 &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp
1388
1389 &mov ($rounds,&DWP(240,$key)); # key1->rounds
1390 &mov ($key_,$key); # backup $key
1391 &mov ($rounds_,$rounds); # backup $rounds
1392
1393 &movdqa ($tweak,$inout0);
1394 &pxor ($twtmp,$twtmp);
1395 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1396 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1397
1398 &and ($len,-16);
1399 &sub ($len,16*6);
1400 &jc (&label("xts_dec_short"));
1401
1402 &shr ($rounds,1);
1403 &mov ($rounds_,$rounds);
1404 &jmp (&label("xts_dec_loop6"));
1405
1406&set_label("xts_dec_loop6",16);
1407 for ($i=0;$i<4;$i++) {
1408 &pshufd ($twres,$twtmp,0x13);
1409 &pxor ($twtmp,$twtmp);
1410 &movdqa (&QWP(16*$i,"esp"),$tweak);
1411 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1412 &pand ($twres,$twmask); # isolate carry and residue
1413 &pcmpgtd ($twtmp,$tweak); # broadcast upper bits
1414 &pxor ($tweak,$twres);
1415 }
1416 &pshufd ($inout5,$twtmp,0x13);
1417 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1418 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1419 &$movekey ($rndkey0,&QWP(0,$key_));
1420 &pand ($inout5,$twmask); # isolate carry and residue
1421 &movups ($inout0,&QWP(0,$inp)); # load input
1422 &pxor ($inout5,$tweak);
1423
1424 # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
1425 &movdqu ($inout1,&QWP(16*1,$inp));
1426 &xorps ($inout0,$rndkey0); # input^=rndkey[0]
1427 &movdqu ($inout2,&QWP(16*2,$inp));
1428 &pxor ($inout1,$rndkey0);
1429 &movdqu ($inout3,&QWP(16*3,$inp));
1430 &pxor ($inout2,$rndkey0);
1431 &movdqu ($inout4,&QWP(16*4,$inp));
1432 &pxor ($inout3,$rndkey0);
1433 &movdqu ($rndkey1,&QWP(16*5,$inp));
1434 &pxor ($inout4,$rndkey0);
1435 &lea ($inp,&DWP(16*6,$inp));
1436 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1437 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1438 &pxor ($inout5,$rndkey1);
1439
1440 &$movekey ($rndkey1,&QWP(16,$key_));
1441 &lea ($key,&DWP(32,$key_));
1442 &pxor ($inout1,&QWP(16*1,"esp"));
1443 &aesdec ($inout0,$rndkey1);
1444 &pxor ($inout2,&QWP(16*2,"esp"));
1445 &aesdec ($inout1,$rndkey1);
1446 &pxor ($inout3,&QWP(16*3,"esp"));
1447 &dec ($rounds);
1448 &aesdec ($inout2,$rndkey1);
1449 &pxor ($inout4,&QWP(16*4,"esp"));
1450 &aesdec ($inout3,$rndkey1);
1451 &pxor ($inout5,$rndkey0);
1452 &aesdec ($inout4,$rndkey1);
1453 &$movekey ($rndkey0,&QWP(0,$key));
1454 &aesdec ($inout5,$rndkey1);
1455 &call (&label("_aesni_decrypt6_enter"));
1456
1457 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1458 &pxor ($twtmp,$twtmp);
1459 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1460 &pcmpgtd ($twtmp,$tweak); # broadcast upper bits
1461 &xorps ($inout1,&QWP(16*1,"esp"));
1462 &movups (&QWP(16*0,$out),$inout0); # write output
1463 &xorps ($inout2,&QWP(16*2,"esp"));
1464 &movups (&QWP(16*1,$out),$inout1);
1465 &xorps ($inout3,&QWP(16*3,"esp"));
1466 &movups (&QWP(16*2,$out),$inout2);
1467 &xorps ($inout4,&QWP(16*4,"esp"));
1468 &movups (&QWP(16*3,$out),$inout3);
1469 &xorps ($inout5,$tweak);
1470 &movups (&QWP(16*4,$out),$inout4);
1471 &pshufd ($twres,$twtmp,0x13);
1472 &movups (&QWP(16*5,$out),$inout5);
1473 &lea ($out,&DWP(16*6,$out));
1474 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1475
1476 &pxor ($twtmp,$twtmp);
1477 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1478 &pand ($twres,$twmask); # isolate carry and residue
1479 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1480 &mov ($rounds,$rounds_); # restore $rounds
1481 &pxor ($tweak,$twres);
1482
1483 &sub ($len,16*6);
1484 &jnc (&label("xts_dec_loop6"));
1485
1486 &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
1487 &mov ($key,$key_); # restore $key
1488 &mov ($rounds_,$rounds);
1489
1490&set_label("xts_dec_short");
1491 &add ($len,16*6);
1492 &jz (&label("xts_dec_done6x"));
1493
1494 &movdqa ($inout3,$tweak); # put aside previous tweak
1495 &cmp ($len,0x20);
1496 &jb (&label("xts_dec_one"));
1497
1498 &pshufd ($twres,$twtmp,0x13);
1499 &pxor ($twtmp,$twtmp);
1500 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1501 &pand ($twres,$twmask); # isolate carry and residue
1502 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1503 &pxor ($tweak,$twres);
1504 &je (&label("xts_dec_two"));
1505
1506 &pshufd ($twres,$twtmp,0x13);
1507 &pxor ($twtmp,$twtmp);
1508 &movdqa ($inout4,$tweak); # put aside previous tweak
1509 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1510 &pand ($twres,$twmask); # isolate carry and residue
1511 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1512 &pxor ($tweak,$twres);
1513 &cmp ($len,0x40);
1514 &jb (&label("xts_dec_three"));
1515
1516 &pshufd ($twres,$twtmp,0x13);
1517 &pxor ($twtmp,$twtmp);
1518 &movdqa ($inout5,$tweak); # put aside previous tweak
1519 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1520 &pand ($twres,$twmask); # isolate carry and residue
1521 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1522 &pxor ($tweak,$twres);
1523 &movdqa (&QWP(16*0,"esp"),$inout3);
1524 &movdqa (&QWP(16*1,"esp"),$inout4);
1525 &je (&label("xts_dec_four"));
1526
1527 &movdqa (&QWP(16*2,"esp"),$inout5);
1528 &pshufd ($inout5,$twtmp,0x13);
1529 &movdqa (&QWP(16*3,"esp"),$tweak);
1530 &paddq ($tweak,$tweak); # &psllq($inout0,1);
1531 &pand ($inout5,$twmask); # isolate carry and residue
1532 &pxor ($inout5,$tweak);
1533
1534 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1535 &movdqu ($inout1,&QWP(16*1,$inp));
1536 &movdqu ($inout2,&QWP(16*2,$inp));
1537 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1538 &movdqu ($inout3,&QWP(16*3,$inp));
1539 &pxor ($inout1,&QWP(16*1,"esp"));
1540 &movdqu ($inout4,&QWP(16*4,$inp));
1541 &pxor ($inout2,&QWP(16*2,"esp"));
1542 &lea ($inp,&DWP(16*5,$inp));
1543 &pxor ($inout3,&QWP(16*3,"esp"));
1544 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1545 &pxor ($inout4,$inout5);
1546
1547 &call ("_aesni_decrypt6");
1548
1549 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1550 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1551 &xorps ($inout1,&QWP(16*1,"esp"));
1552 &xorps ($inout2,&QWP(16*2,"esp"));
1553 &movups (&QWP(16*0,$out),$inout0); # write output
1554 &xorps ($inout3,&QWP(16*3,"esp"));
1555 &movups (&QWP(16*1,$out),$inout1);
1556 &xorps ($inout4,$tweak);
1557 &movups (&QWP(16*2,$out),$inout2);
1558 &movups (&QWP(16*3,$out),$inout3);
1559 &movups (&QWP(16*4,$out),$inout4);
1560 &lea ($out,&DWP(16*5,$out));
1561 &jmp (&label("xts_dec_done"));
1562
1563&set_label("xts_dec_one",16);
1564 &movups ($inout0,&QWP(16*0,$inp)); # load input
1565 &lea ($inp,&DWP(16*1,$inp));
1566 &xorps ($inout0,$inout3); # input^=tweak
1567 if ($inline)
1568 { &aesni_inline_generate1("dec"); }
1569 else
1570 { &call ("_aesni_decrypt1"); }
1571 &xorps ($inout0,$inout3); # output^=tweak
1572 &movups (&QWP(16*0,$out),$inout0); # write output
1573 &lea ($out,&DWP(16*1,$out));
1574
1575 &movdqa ($tweak,$inout3); # last tweak
1576 &jmp (&label("xts_dec_done"));
1577
1578&set_label("xts_dec_two",16);
1579 &movaps ($inout4,$tweak); # put aside last tweak
1580
1581 &movups ($inout0,&QWP(16*0,$inp)); # load input
1582 &movups ($inout1,&QWP(16*1,$inp));
1583 &lea ($inp,&DWP(16*2,$inp));
1584 &xorps ($inout0,$inout3); # input^=tweak
1585 &xorps ($inout1,$inout4);
1586
1587 &call ("_aesni_decrypt3");
1588
1589 &xorps ($inout0,$inout3); # output^=tweak
1590 &xorps ($inout1,$inout4);
1591 &movups (&QWP(16*0,$out),$inout0); # write output
1592 &movups (&QWP(16*1,$out),$inout1);
1593 &lea ($out,&DWP(16*2,$out));
1594
1595 &movdqa ($tweak,$inout4); # last tweak
1596 &jmp (&label("xts_dec_done"));
1597
1598&set_label("xts_dec_three",16);
1599 &movaps ($inout5,$tweak); # put aside last tweak
1600 &movups ($inout0,&QWP(16*0,$inp)); # load input
1601 &movups ($inout1,&QWP(16*1,$inp));
1602 &movups ($inout2,&QWP(16*2,$inp));
1603 &lea ($inp,&DWP(16*3,$inp));
1604 &xorps ($inout0,$inout3); # input^=tweak
1605 &xorps ($inout1,$inout4);
1606 &xorps ($inout2,$inout5);
1607
1608 &call ("_aesni_decrypt3");
1609
1610 &xorps ($inout0,$inout3); # output^=tweak
1611 &xorps ($inout1,$inout4);
1612 &xorps ($inout2,$inout5);
1613 &movups (&QWP(16*0,$out),$inout0); # write output
1614 &movups (&QWP(16*1,$out),$inout1);
1615 &movups (&QWP(16*2,$out),$inout2);
1616 &lea ($out,&DWP(16*3,$out));
1617
1618 &movdqa ($tweak,$inout5); # last tweak
1619 &jmp (&label("xts_dec_done"));
1620
1621&set_label("xts_dec_four",16);
1622 &movaps ($inout4,$tweak); # put aside last tweak
1623
1624 &movups ($inout0,&QWP(16*0,$inp)); # load input
1625 &movups ($inout1,&QWP(16*1,$inp));
1626 &movups ($inout2,&QWP(16*2,$inp));
1627 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1628 &movups ($inout3,&QWP(16*3,$inp));
1629 &lea ($inp,&DWP(16*4,$inp));
1630 &xorps ($inout1,&QWP(16*1,"esp"));
1631 &xorps ($inout2,$inout5);
1632 &xorps ($inout3,$inout4);
1633
1634 &call ("_aesni_decrypt4");
1635
1636 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1637 &xorps ($inout1,&QWP(16*1,"esp"));
1638 &xorps ($inout2,$inout5);
1639 &movups (&QWP(16*0,$out),$inout0); # write output
1640 &xorps ($inout3,$inout4);
1641 &movups (&QWP(16*1,$out),$inout1);
1642 &movups (&QWP(16*2,$out),$inout2);
1643 &movups (&QWP(16*3,$out),$inout3);
1644 &lea ($out,&DWP(16*4,$out));
1645
1646 &movdqa ($tweak,$inout4); # last tweak
1647 &jmp (&label("xts_dec_done"));
1648
1649&set_label("xts_dec_done6x",16); # $tweak is pre-calculated
1650 &mov ($len,&DWP(16*7+0,"esp")); # restore original $len
1651 &and ($len,15);
1652 &jz (&label("xts_dec_ret"));
1653 &mov (&DWP(16*7+0,"esp"),$len); # save $len%16
1654 &jmp (&label("xts_dec_only_one_more"));
1655
1656&set_label("xts_dec_done",16);
1657 &mov ($len,&DWP(16*7+0,"esp")); # restore original $len
1658 &pxor ($twtmp,$twtmp);
1659 &and ($len,15);
1660 &jz (&label("xts_dec_ret"));
1661
1662 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1663 &mov (&DWP(16*7+0,"esp"),$len); # save $len%16
1664 &pshufd ($twres,$twtmp,0x13);
1665 &pxor ($twtmp,$twtmp);
1666 &movdqa ($twmask,&QWP(16*6,"esp"));
1667 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1668 &pand ($twres,$twmask); # isolate carry and residue
1669 &pcmpgtd($twtmp,$tweak); # broadcast upper bits
1670 &pxor ($tweak,$twres);
1671
1672&set_label("xts_dec_only_one_more");
1673 &pshufd ($inout3,$twtmp,0x13);
1674 &movdqa ($inout4,$tweak); # put aside previous tweak
1675 &paddq ($tweak,$tweak); # &psllq($tweak,1);
1676 &pand ($inout3,$twmask); # isolate carry and residue
1677 &pxor ($inout3,$tweak);
1678
1679 &mov ($key,$key_); # restore $key
1680 &mov ($rounds,$rounds_); # restore $rounds
1681
1682 &movups ($inout0,&QWP(0,$inp)); # load input
1683 &xorps ($inout0,$inout3); # input^=tweak
1684 if ($inline)
1685 { &aesni_inline_generate1("dec"); }
1686 else
1687 { &call ("_aesni_decrypt1"); }
1688 &xorps ($inout0,$inout3); # output^=tweak
1689 &movups (&QWP(0,$out),$inout0); # write output
1690
1691&set_label("xts_dec_steal");
1692 &movz ($rounds,&BP(16,$inp));
1693 &movz ($key,&BP(0,$out));
1694 &lea ($inp,&DWP(1,$inp));
1695 &mov (&BP(0,$out),&LB($rounds));
1696 &mov (&BP(16,$out),&LB($key));
1697 &lea ($out,&DWP(1,$out));
1698 &sub ($len,1);
1699 &jnz (&label("xts_dec_steal"));
1700
1701 &sub ($out,&DWP(16*7+0,"esp")); # rewind $out
1702 &mov ($key,$key_); # restore $key
1703 &mov ($rounds,$rounds_); # restore $rounds
1704
1705 &movups ($inout0,&QWP(0,$out)); # load input
1706 &xorps ($inout0,$inout4); # input^=tweak
1707 if ($inline)
1708 { &aesni_inline_generate1("dec"); }
1709 else
1710 { &call ("_aesni_decrypt1"); }
1711 &xorps ($inout0,$inout4); # output^=tweak
1712 &movups (&QWP(0,$out),$inout0); # write output
1713
1714&set_label("xts_dec_ret");
1715 &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp
1716&function_end("aesni_xts_decrypt");
1717}
1718}
1719
1720######################################################################
1721# void $PREFIX_cbc_encrypt (const void *inp, void *out,
1722# size_t length, const AES_KEY *key,
1723# unsigned char *ivp,const int enc);
1724&function_begin("${PREFIX}_cbc_encrypt");
1725 &mov ($inp,&wparam(0));
1726 &mov ($rounds_,"esp");
1727 &mov ($out,&wparam(1));
1728 &sub ($rounds_,24);
1729 &mov ($len,&wparam(2));
1730 &and ($rounds_,-16);
1731 &mov ($key,&wparam(3));
1732 &mov ($key_,&wparam(4));
1733 &test ($len,$len);
1734 &jz (&label("cbc_abort"));
1735
1736 &cmp (&wparam(5),0);
1737 &xchg ($rounds_,"esp"); # alloca
1738 &movups ($ivec,&QWP(0,$key_)); # load IV
1739 &mov ($rounds,&DWP(240,$key));
1740 &mov ($key_,$key); # backup $key
1741 &mov (&DWP(16,"esp"),$rounds_); # save original %esp
1742 &mov ($rounds_,$rounds); # backup $rounds
1743 &je (&label("cbc_decrypt"));
1744
1745 &movaps ($inout0,$ivec);
1746 &cmp ($len,16);
1747 &jb (&label("cbc_enc_tail"));
1748 &sub ($len,16);
1749 &jmp (&label("cbc_enc_loop"));
1750
1751&set_label("cbc_enc_loop",16);
1752 &movups ($ivec,&QWP(0,$inp)); # input actually
1753 &lea ($inp,&DWP(16,$inp));
1754 if ($inline)
1755 { &aesni_inline_generate1("enc",$inout0,$ivec); }
1756 else
1757 { &xorps($inout0,$ivec); &call("_aesni_encrypt1"); }
1758 &mov ($rounds,$rounds_); # restore $rounds
1759 &mov ($key,$key_); # restore $key
1760 &movups (&QWP(0,$out),$inout0); # store output
1761 &lea ($out,&DWP(16,$out));
1762 &sub ($len,16);
1763 &jnc (&label("cbc_enc_loop"));
1764 &add ($len,16);
1765 &jnz (&label("cbc_enc_tail"));
1766 &movaps ($ivec,$inout0);
1767 &jmp (&label("cbc_ret"));
1768
1769&set_label("cbc_enc_tail");
1770 &mov ("ecx",$len); # zaps $rounds
1771 &data_word(0xA4F3F689); # rep movsb
1772 &mov ("ecx",16); # zero tail
1773 &sub ("ecx",$len);
1774 &xor ("eax","eax"); # zaps $len
1775 &data_word(0xAAF3F689); # rep stosb
1776 &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block
1777 &mov ($rounds,$rounds_); # restore $rounds
1778 &mov ($inp,$out); # $inp and $out are the same
1779 &mov ($key,$key_); # restore $key
1780 &jmp (&label("cbc_enc_loop"));
1781######################################################################
1782&set_label("cbc_decrypt",16);
1783 &cmp ($len,0x50);
1784 &jbe (&label("cbc_dec_tail"));
1785 &movaps (&QWP(0,"esp"),$ivec); # save IV
1786 &sub ($len,0x50);
1787 &jmp (&label("cbc_dec_loop6_enter"));
1788
1789&set_label("cbc_dec_loop6",16);
1790 &movaps (&QWP(0,"esp"),$rndkey0); # save IV
1791 &movups (&QWP(0,$out),$inout5);
1792 &lea ($out,&DWP(0x10,$out));
1793&set_label("cbc_dec_loop6_enter");
1794 &movdqu ($inout0,&QWP(0,$inp));
1795 &movdqu ($inout1,&QWP(0x10,$inp));
1796 &movdqu ($inout2,&QWP(0x20,$inp));
1797 &movdqu ($inout3,&QWP(0x30,$inp));
1798 &movdqu ($inout4,&QWP(0x40,$inp));
1799 &movdqu ($inout5,&QWP(0x50,$inp));
1800
1801 &call ("_aesni_decrypt6");
1802
1803 &movups ($rndkey1,&QWP(0,$inp));
1804 &movups ($rndkey0,&QWP(0x10,$inp));
1805 &xorps ($inout0,&QWP(0,"esp")); # ^=IV
1806 &xorps ($inout1,$rndkey1);
1807 &movups ($rndkey1,&QWP(0x20,$inp));
1808 &xorps ($inout2,$rndkey0);
1809 &movups ($rndkey0,&QWP(0x30,$inp));
1810 &xorps ($inout3,$rndkey1);
1811 &movups ($rndkey1,&QWP(0x40,$inp));
1812 &xorps ($inout4,$rndkey0);
1813 &movups ($rndkey0,&QWP(0x50,$inp)); # IV
1814 &xorps ($inout5,$rndkey1);
1815 &movups (&QWP(0,$out),$inout0);
1816 &movups (&QWP(0x10,$out),$inout1);
1817 &lea ($inp,&DWP(0x60,$inp));
1818 &movups (&QWP(0x20,$out),$inout2);
1819 &mov ($rounds,$rounds_) # restore $rounds
1820 &movups (&QWP(0x30,$out),$inout3);
1821 &mov ($key,$key_); # restore $key
1822 &movups (&QWP(0x40,$out),$inout4);
1823 &lea ($out,&DWP(0x50,$out));
1824 &sub ($len,0x60);
1825 &ja (&label("cbc_dec_loop6"));
1826
1827 &movaps ($inout0,$inout5);
1828 &movaps ($ivec,$rndkey0);
1829 &add ($len,0x50);
1830 &jle (&label("cbc_dec_tail_collected"));
1831 &movups (&QWP(0,$out),$inout0);
1832 &lea ($out,&DWP(0x10,$out));
1833&set_label("cbc_dec_tail");
1834 &movups ($inout0,&QWP(0,$inp));
1835 &movaps ($in0,$inout0);
1836 &cmp ($len,0x10);
1837 &jbe (&label("cbc_dec_one"));
1838
1839 &movups ($inout1,&QWP(0x10,$inp));
1840 &movaps ($in1,$inout1);
1841 &cmp ($len,0x20);
1842 &jbe (&label("cbc_dec_two"));
1843
1844 &movups ($inout2,&QWP(0x20,$inp));
1845 &cmp ($len,0x30);
1846 &jbe (&label("cbc_dec_three"));
1847
1848 &movups ($inout3,&QWP(0x30,$inp));
1849 &cmp ($len,0x40);
1850 &jbe (&label("cbc_dec_four"));
1851
1852 &movups ($inout4,&QWP(0x40,$inp));
1853 &movaps (&QWP(0,"esp"),$ivec); # save IV
1854 &movups ($inout0,&QWP(0,$inp));
1855 &xorps ($inout5,$inout5);
1856 &call ("_aesni_decrypt6");
1857 &movups ($rndkey1,&QWP(0,$inp));
1858 &movups ($rndkey0,&QWP(0x10,$inp));
1859 &xorps ($inout0,&QWP(0,"esp")); # ^= IV
1860 &xorps ($inout1,$rndkey1);
1861 &movups ($rndkey1,&QWP(0x20,$inp));
1862 &xorps ($inout2,$rndkey0);
1863 &movups ($rndkey0,&QWP(0x30,$inp));
1864 &xorps ($inout3,$rndkey1);
1865 &movups ($ivec,&QWP(0x40,$inp)); # IV
1866 &xorps ($inout4,$rndkey0);
1867 &movups (&QWP(0,$out),$inout0);
1868 &movups (&QWP(0x10,$out),$inout1);
1869 &movups (&QWP(0x20,$out),$inout2);
1870 &movups (&QWP(0x30,$out),$inout3);
1871 &lea ($out,&DWP(0x40,$out));
1872 &movaps ($inout0,$inout4);
1873 &sub ($len,0x50);
1874 &jmp (&label("cbc_dec_tail_collected"));
1875
1876&set_label("cbc_dec_one",16);
1877 if ($inline)
1878 { &aesni_inline_generate1("dec"); }
1879 else
1880 { &call ("_aesni_decrypt1"); }
1881 &xorps ($inout0,$ivec);
1882 &movaps ($ivec,$in0);
1883 &sub ($len,0x10);
1884 &jmp (&label("cbc_dec_tail_collected"));
1885
1886&set_label("cbc_dec_two",16);
1887 &xorps ($inout2,$inout2);
1888 &call ("_aesni_decrypt3");
1889 &xorps ($inout0,$ivec);
1890 &xorps ($inout1,$in0);
1891 &movups (&QWP(0,$out),$inout0);
1892 &movaps ($inout0,$inout1);
1893 &lea ($out,&DWP(0x10,$out));
1894 &movaps ($ivec,$in1);
1895 &sub ($len,0x20);
1896 &jmp (&label("cbc_dec_tail_collected"));
1897
1898&set_label("cbc_dec_three",16);
1899 &call ("_aesni_decrypt3");
1900 &xorps ($inout0,$ivec);
1901 &xorps ($inout1,$in0);
1902 &xorps ($inout2,$in1);
1903 &movups (&QWP(0,$out),$inout0);
1904 &movaps ($inout0,$inout2);
1905 &movups (&QWP(0x10,$out),$inout1);
1906 &lea ($out,&DWP(0x20,$out));
1907 &movups ($ivec,&QWP(0x20,$inp));
1908 &sub ($len,0x30);
1909 &jmp (&label("cbc_dec_tail_collected"));
1910
1911&set_label("cbc_dec_four",16);
1912 &call ("_aesni_decrypt4");
1913 &movups ($rndkey1,&QWP(0x10,$inp));
1914 &movups ($rndkey0,&QWP(0x20,$inp));
1915 &xorps ($inout0,$ivec);
1916 &movups ($ivec,&QWP(0x30,$inp));
1917 &xorps ($inout1,$in0);
1918 &movups (&QWP(0,$out),$inout0);
1919 &xorps ($inout2,$rndkey1);
1920 &movups (&QWP(0x10,$out),$inout1);
1921 &xorps ($inout3,$rndkey0);
1922 &movups (&QWP(0x20,$out),$inout2);
1923 &lea ($out,&DWP(0x30,$out));
1924 &movaps ($inout0,$inout3);
1925 &sub ($len,0x40);
1926
1927&set_label("cbc_dec_tail_collected");
1928 &and ($len,15);
1929 &jnz (&label("cbc_dec_tail_partial"));
1930 &movups (&QWP(0,$out),$inout0);
1931 &jmp (&label("cbc_ret"));
1932
1933&set_label("cbc_dec_tail_partial",16);
1934 &movaps (&QWP(0,"esp"),$inout0);
1935 &mov ("ecx",16);
1936 &mov ($inp,"esp");
1937 &sub ("ecx",$len);
1938 &data_word(0xA4F3F689); # rep movsb
1939
1940&set_label("cbc_ret");
1941 &mov ("esp",&DWP(16,"esp")); # pull original %esp
1942 &mov ($key_,&wparam(4));
1943 &movups (&QWP(0,$key_),$ivec); # output IV
1944&set_label("cbc_abort");
1945&function_end("${PREFIX}_cbc_encrypt");
1946
1947######################################################################
1948# Mechanical port from aesni-x86_64.pl.
1949#
1950# _aesni_set_encrypt_key is private interface,
1951# input:
1952# "eax" const unsigned char *userKey
1953# $rounds int bits
1954# $key AES_KEY *key
1955# output:
1956# "eax" return code
1957# $round rounds
1958
1959&function_begin_B("_aesni_set_encrypt_key");
1960 &test ("eax","eax");
1961 &jz (&label("bad_pointer"));
1962 &test ($key,$key);
1963 &jz (&label("bad_pointer"));
1964
1965 &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
1966 &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0
1967 &lea ($key,&DWP(16,$key));
1968 &cmp ($rounds,256);
1969 &je (&label("14rounds"));
1970 &cmp ($rounds,192);
1971 &je (&label("12rounds"));
1972 &cmp ($rounds,128);
1973 &jne (&label("bad_keybits"));
1974
1975&set_label("10rounds",16);
1976 &mov ($rounds,9);
1977 &$movekey (&QWP(-16,$key),"xmm0"); # round 0
1978 &aeskeygenassist("xmm1","xmm0",0x01); # round 1
1979 &call (&label("key_128_cold"));
1980 &aeskeygenassist("xmm1","xmm0",0x2); # round 2
1981 &call (&label("key_128"));
1982 &aeskeygenassist("xmm1","xmm0",0x04); # round 3
1983 &call (&label("key_128"));
1984 &aeskeygenassist("xmm1","xmm0",0x08); # round 4
1985 &call (&label("key_128"));
1986 &aeskeygenassist("xmm1","xmm0",0x10); # round 5
1987 &call (&label("key_128"));
1988 &aeskeygenassist("xmm1","xmm0",0x20); # round 6
1989 &call (&label("key_128"));
1990 &aeskeygenassist("xmm1","xmm0",0x40); # round 7
1991 &call (&label("key_128"));
1992 &aeskeygenassist("xmm1","xmm0",0x80); # round 8
1993 &call (&label("key_128"));
1994 &aeskeygenassist("xmm1","xmm0",0x1b); # round 9
1995 &call (&label("key_128"));
1996 &aeskeygenassist("xmm1","xmm0",0x36); # round 10
1997 &call (&label("key_128"));
1998 &$movekey (&QWP(0,$key),"xmm0");
1999 &mov (&DWP(80,$key),$rounds);
2000 &xor ("eax","eax");
2001 &ret();
2002
2003&set_label("key_128",16);
2004 &$movekey (&QWP(0,$key),"xmm0");
2005 &lea ($key,&DWP(16,$key));
2006&set_label("key_128_cold");
2007 &shufps ("xmm4","xmm0",0b00010000);
2008 &xorps ("xmm0","xmm4");
2009 &shufps ("xmm4","xmm0",0b10001100);
2010 &xorps ("xmm0","xmm4");
2011 &shufps ("xmm1","xmm1",0b11111111); # critical path
2012 &xorps ("xmm0","xmm1");
2013 &ret();
2014
2015&set_label("12rounds",16);
2016 &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
2017 &mov ($rounds,11);
2018 &$movekey (&QWP(-16,$key),"xmm0") # round 0
2019 &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2
2020 &call (&label("key_192a_cold"));
2021 &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3
2022 &call (&label("key_192b"));
2023 &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5
2024 &call (&label("key_192a"));
2025 &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6
2026 &call (&label("key_192b"));
2027 &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8
2028 &call (&label("key_192a"));
2029 &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9
2030 &call (&label("key_192b"));
2031 &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11
2032 &call (&label("key_192a"));
2033 &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12
2034 &call (&label("key_192b"));
2035 &$movekey (&QWP(0,$key),"xmm0");
2036 &mov (&DWP(48,$key),$rounds);
2037 &xor ("eax","eax");
2038 &ret();
2039
2040&set_label("key_192a",16);
2041 &$movekey (&QWP(0,$key),"xmm0");
2042 &lea ($key,&DWP(16,$key));
2043&set_label("key_192a_cold",16);
2044 &movaps ("xmm5","xmm2");
2045&set_label("key_192b_warm");
2046 &shufps ("xmm4","xmm0",0b00010000);
2047 &movdqa ("xmm3","xmm2");
2048 &xorps ("xmm0","xmm4");
2049 &shufps ("xmm4","xmm0",0b10001100);
2050 &pslldq ("xmm3",4);
2051 &xorps ("xmm0","xmm4");
2052 &pshufd ("xmm1","xmm1",0b01010101); # critical path
2053 &pxor ("xmm2","xmm3");
2054 &pxor ("xmm0","xmm1");
2055 &pshufd ("xmm3","xmm0",0b11111111);
2056 &pxor ("xmm2","xmm3");
2057 &ret();
2058
2059&set_label("key_192b",16);
2060 &movaps ("xmm3","xmm0");
2061 &shufps ("xmm5","xmm0",0b01000100);
2062 &$movekey (&QWP(0,$key),"xmm5");
2063 &shufps ("xmm3","xmm2",0b01001110);
2064 &$movekey (&QWP(16,$key),"xmm3");
2065 &lea ($key,&DWP(32,$key));
2066 &jmp (&label("key_192b_warm"));
2067
2068&set_label("14rounds",16);
2069 &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
2070 &mov ($rounds,13);
2071 &lea ($key,&DWP(16,$key));
2072 &$movekey (&QWP(-32,$key),"xmm0"); # round 0
2073 &$movekey (&QWP(-16,$key),"xmm2"); # round 1
2074 &aeskeygenassist("xmm1","xmm2",0x01); # round 2
2075 &call (&label("key_256a_cold"));
2076 &aeskeygenassist("xmm1","xmm0",0x01); # round 3
2077 &call (&label("key_256b"));
2078 &aeskeygenassist("xmm1","xmm2",0x02); # round 4
2079 &call (&label("key_256a"));
2080 &aeskeygenassist("xmm1","xmm0",0x02); # round 5
2081 &call (&label("key_256b"));
2082 &aeskeygenassist("xmm1","xmm2",0x04); # round 6
2083 &call (&label("key_256a"));
2084 &aeskeygenassist("xmm1","xmm0",0x04); # round 7
2085 &call (&label("key_256b"));
2086 &aeskeygenassist("xmm1","xmm2",0x08); # round 8
2087 &call (&label("key_256a"));
2088 &aeskeygenassist("xmm1","xmm0",0x08); # round 9
2089 &call (&label("key_256b"));
2090 &aeskeygenassist("xmm1","xmm2",0x10); # round 10
2091 &call (&label("key_256a"));
2092 &aeskeygenassist("xmm1","xmm0",0x10); # round 11
2093 &call (&label("key_256b"));
2094 &aeskeygenassist("xmm1","xmm2",0x20); # round 12
2095 &call (&label("key_256a"));
2096 &aeskeygenassist("xmm1","xmm0",0x20); # round 13
2097 &call (&label("key_256b"));
2098 &aeskeygenassist("xmm1","xmm2",0x40); # round 14
2099 &call (&label("key_256a"));
2100 &$movekey (&QWP(0,$key),"xmm0");
2101 &mov (&DWP(16,$key),$rounds);
2102 &xor ("eax","eax");
2103 &ret();
2104
2105&set_label("key_256a",16);
2106 &$movekey (&QWP(0,$key),"xmm2");
2107 &lea ($key,&DWP(16,$key));
2108&set_label("key_256a_cold");
2109 &shufps ("xmm4","xmm0",0b00010000);
2110 &xorps ("xmm0","xmm4");
2111 &shufps ("xmm4","xmm0",0b10001100);
2112 &xorps ("xmm0","xmm4");
2113 &shufps ("xmm1","xmm1",0b11111111); # critical path
2114 &xorps ("xmm0","xmm1");
2115 &ret();
2116
2117&set_label("key_256b",16);
2118 &$movekey (&QWP(0,$key),"xmm0");
2119 &lea ($key,&DWP(16,$key));
2120
2121 &shufps ("xmm4","xmm2",0b00010000);
2122 &xorps ("xmm2","xmm4");
2123 &shufps ("xmm4","xmm2",0b10001100);
2124 &xorps ("xmm2","xmm4");
2125 &shufps ("xmm1","xmm1",0b10101010); # critical path
2126 &xorps ("xmm2","xmm1");
2127 &ret();
2128
2129&set_label("bad_pointer",4);
2130 &mov ("eax",-1);
2131 &ret ();
2132&set_label("bad_keybits",4);
2133 &mov ("eax",-2);
2134 &ret ();
2135&function_end_B("_aesni_set_encrypt_key");
2136
2137# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
2138# AES_KEY *key)
2139&function_begin_B("${PREFIX}_set_encrypt_key");
2140 &mov ("eax",&wparam(0));
2141 &mov ($rounds,&wparam(1));
2142 &mov ($key,&wparam(2));
2143 &call ("_aesni_set_encrypt_key");
2144 &ret ();
2145&function_end_B("${PREFIX}_set_encrypt_key");
2146
2147# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
2148# AES_KEY *key)
2149&function_begin_B("${PREFIX}_set_decrypt_key");
2150 &mov ("eax",&wparam(0));
2151 &mov ($rounds,&wparam(1));
2152 &mov ($key,&wparam(2));
2153 &call ("_aesni_set_encrypt_key");
2154 &mov ($key,&wparam(2));
2155 &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key
2156 &test ("eax","eax");
2157 &jnz (&label("dec_key_ret"));
2158 &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
2159
2160 &$movekey ("xmm0",&QWP(0,$key)); # just swap
2161 &$movekey ("xmm1",&QWP(0,"eax"));
2162 &$movekey (&QWP(0,"eax"),"xmm0");
2163 &$movekey (&QWP(0,$key),"xmm1");
2164 &lea ($key,&DWP(16,$key));
2165 &lea ("eax",&DWP(-16,"eax"));
2166
2167&set_label("dec_key_inverse");
2168 &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
2169 &$movekey ("xmm1",&QWP(0,"eax"));
2170 &aesimc ("xmm0","xmm0");
2171 &aesimc ("xmm1","xmm1");
2172 &lea ($key,&DWP(16,$key));
2173 &lea ("eax",&DWP(-16,"eax"));
2174 &$movekey (&QWP(16,"eax"),"xmm0");
2175 &$movekey (&QWP(-16,$key),"xmm1");
2176 &cmp ("eax",$key);
2177 &ja (&label("dec_key_inverse"));
2178
2179 &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
2180 &aesimc ("xmm0","xmm0");
2181 &$movekey (&QWP(0,$key),"xmm0");
2182
2183 &xor ("eax","eax"); # return success
2184&set_label("dec_key_ret");
2185 &ret ();
2186&function_end_B("${PREFIX}_set_decrypt_key");
2187&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
2188
2189&asm_finish();
diff --git a/src/lib/libssl/src/crypto/aes/asm/aesni-x86_64.pl b/src/lib/libssl/src/crypto/aes/asm/aesni-x86_64.pl
index 49e0f4b351..499f3b3f42 100644
--- a/src/lib/libssl/src/crypto/aes/asm/aesni-x86_64.pl
+++ b/src/lib/libssl/src/crypto/aes/asm/aesni-x86_64.pl
@@ -11,6 +11,151 @@
11# OpenSSL context it's used with Intel engine, but can also be used as 11# OpenSSL context it's used with Intel engine, but can also be used as
12# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for 12# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for
13# details]. 13# details].
14#
15# Performance.
16#
17# Given aes(enc|dec) instructions' latency asymptotic performance for
18# non-parallelizable modes such as CBC encrypt is 3.75 cycles per byte
19# processed with 128-bit key. And given their throughput asymptotic
20# performance for parallelizable modes is 1.25 cycles per byte. Being
21# asymptotic limit it's not something you commonly achieve in reality,
22# but how close does one get? Below are results collected for
23# different modes and block sized. Pairs of numbers are for en-/
24# decryption.
25#
26# 16-byte 64-byte 256-byte 1-KB 8-KB
27# ECB 4.25/4.25 1.38/1.38 1.28/1.28 1.26/1.26 1.26/1.26
28# CTR 5.42/5.42 1.92/1.92 1.44/1.44 1.28/1.28 1.26/1.26
29# CBC 4.38/4.43 4.15/1.43 4.07/1.32 4.07/1.29 4.06/1.28
30# CCM 5.66/9.42 4.42/5.41 4.16/4.40 4.09/4.15 4.06/4.07
31# OFB 5.42/5.42 4.64/4.64 4.44/4.44 4.39/4.39 4.38/4.38
32# CFB 5.73/5.85 5.56/5.62 5.48/5.56 5.47/5.55 5.47/5.55
33#
34# ECB, CTR, CBC and CCM results are free from EVP overhead. This means
35# that otherwise used 'openssl speed -evp aes-128-??? -engine aesni
36# [-decrypt]' will exhibit 10-15% worse results for smaller blocks.
37# The results were collected with specially crafted speed.c benchmark
38# in order to compare them with results reported in "Intel Advanced
39# Encryption Standard (AES) New Instruction Set" White Paper Revision
40# 3.0 dated May 2010. All above results are consistently better. This
41# module also provides better performance for block sizes smaller than
42# 128 bytes in points *not* represented in the above table.
43#
44# Looking at the results for 8-KB buffer.
45#
46# CFB and OFB results are far from the limit, because implementation
47# uses "generic" CRYPTO_[c|o]fb128_encrypt interfaces relying on
48# single-block aesni_encrypt, which is not the most optimal way to go.
49# CBC encrypt result is unexpectedly high and there is no documented
50# explanation for it. Seemingly there is a small penalty for feeding
51# the result back to AES unit the way it's done in CBC mode. There is
52# nothing one can do and the result appears optimal. CCM result is
53# identical to CBC, because CBC-MAC is essentially CBC encrypt without
54# saving output. CCM CTR "stays invisible," because it's neatly
55# interleaved wih CBC-MAC. This provides ~30% improvement over
56# "straghtforward" CCM implementation with CTR and CBC-MAC performed
57# disjointly. Parallelizable modes practically achieve the theoretical
58# limit.
59#
60# Looking at how results vary with buffer size.
61#
62# Curves are practically saturated at 1-KB buffer size. In most cases
63# "256-byte" performance is >95%, and "64-byte" is ~90% of "8-KB" one.
64# CTR curve doesn't follow this pattern and is "slowest" changing one
65# with "256-byte" result being 87% of "8-KB." This is because overhead
66# in CTR mode is most computationally intensive. Small-block CCM
67# decrypt is slower than encrypt, because first CTR and last CBC-MAC
68# iterations can't be interleaved.
69#
70# Results for 192- and 256-bit keys.
71#
72# EVP-free results were observed to scale perfectly with number of
73# rounds for larger block sizes, i.e. 192-bit result being 10/12 times
74# lower and 256-bit one - 10/14. Well, in CBC encrypt case differences
75# are a tad smaller, because the above mentioned penalty biases all
76# results by same constant value. In similar way function call
77# overhead affects small-block performance, as well as OFB and CFB
78# results. Differences are not large, most common coefficients are
79# 10/11.7 and 10/13.4 (as opposite to 10/12.0 and 10/14.0), but one
80# observe even 10/11.2 and 10/12.4 (CTR, OFB, CFB)...
81
82# January 2011
83#
84# While Westmere processor features 6 cycles latency for aes[enc|dec]
85# instructions, which can be scheduled every second cycle, Sandy
86# Bridge spends 8 cycles per instruction, but it can schedule them
87# every cycle. This means that code targeting Westmere would perform
88# suboptimally on Sandy Bridge. Therefore this update.
89#
90# In addition, non-parallelizable CBC encrypt (as well as CCM) is
91# optimized. Relative improvement might appear modest, 8% on Westmere,
92# but in absolute terms it's 3.77 cycles per byte encrypted with
93# 128-bit key on Westmere, and 5.07 - on Sandy Bridge. These numbers
94# should be compared to asymptotic limits of 3.75 for Westmere and
95# 5.00 for Sandy Bridge. Actually, the fact that they get this close
96# to asymptotic limits is quite amazing. Indeed, the limit is
97# calculated as latency times number of rounds, 10 for 128-bit key,
98# and divided by 16, the number of bytes in block, or in other words
99# it accounts *solely* for aesenc instructions. But there are extra
100# instructions, and numbers so close to the asymptotic limits mean
101# that it's as if it takes as little as *one* additional cycle to
102# execute all of them. How is it possible? It is possible thanks to
103# out-of-order execution logic, which manages to overlap post-
104# processing of previous block, things like saving the output, with
105# actual encryption of current block, as well as pre-processing of
106# current block, things like fetching input and xor-ing it with
107# 0-round element of the key schedule, with actual encryption of
108# previous block. Keep this in mind...
109#
110# For parallelizable modes, such as ECB, CBC decrypt, CTR, higher
111# performance is achieved by interleaving instructions working on
112# independent blocks. In which case asymptotic limit for such modes
113# can be obtained by dividing above mentioned numbers by AES
114# instructions' interleave factor. Westmere can execute at most 3
115# instructions at a time, meaning that optimal interleave factor is 3,
116# and that's where the "magic" number of 1.25 come from. "Optimal
117# interleave factor" means that increase of interleave factor does
118# not improve performance. The formula has proven to reflect reality
119# pretty well on Westmere... Sandy Bridge on the other hand can
120# execute up to 8 AES instructions at a time, so how does varying
121# interleave factor affect the performance? Here is table for ECB
122# (numbers are cycles per byte processed with 128-bit key):
123#
124# instruction interleave factor 3x 6x 8x
125# theoretical asymptotic limit 1.67 0.83 0.625
126# measured performance for 8KB block 1.05 0.86 0.84
127#
128# "as if" interleave factor 4.7x 5.8x 6.0x
129#
130# Further data for other parallelizable modes:
131#
132# CBC decrypt 1.16 0.93 0.93
133# CTR 1.14 0.91 n/a
134#
135# Well, given 3x column it's probably inappropriate to call the limit
136# asymptotic, if it can be surpassed, isn't it? What happens there?
137# Rewind to CBC paragraph for the answer. Yes, out-of-order execution
138# magic is responsible for this. Processor overlaps not only the
139# additional instructions with AES ones, but even AES instuctions
140# processing adjacent triplets of independent blocks. In the 6x case
141# additional instructions still claim disproportionally small amount
142# of additional cycles, but in 8x case number of instructions must be
143# a tad too high for out-of-order logic to cope with, and AES unit
144# remains underutilized... As you can see 8x interleave is hardly
145# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl
146# utilizies 6x interleave because of limited register bank capacity.
147#
148# Higher interleave factors do have negative impact on Westmere
149# performance. While for ECB mode it's negligible ~1.5%, other
150# parallelizables perform ~5% worse, which is outweighed by ~25%
151# improvement on Sandy Bridge. To balance regression on Westmere
152# CTR mode was implemented with 6x aesenc interleave factor.
153
154# April 2011
155#
156# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing
157# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like
158# in CTR mode AES instruction interleave factor was chosen to be 6x.
14 159
15$PREFIX="aesni"; # if $PREFIX is set to "AES", the script 160$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
16 # generates drop-in replacement for 161 # generates drop-in replacement for
@@ -29,7 +174,7 @@ die "can't locate x86_64-xlate.pl";
29 174
30open STDOUT,"| $^X $xlate $flavour $output"; 175open STDOUT,"| $^X $xlate $flavour $output";
31 176
32$movkey = $PREFIX eq "aesni" ? "movaps" : "movups"; 177$movkey = $PREFIX eq "aesni" ? "movups" : "movups";
33@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order 178@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
34 ("%rdi","%rsi","%rdx","%rcx"); # Unix order 179 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
35 180
@@ -41,18 +186,20 @@ $inp="%rdi";
41$out="%rsi"; 186$out="%rsi";
42$len="%rdx"; 187$len="%rdx";
43$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! 188$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!!
44$ivp="%r8"; # cbc 189$ivp="%r8"; # cbc, ctr, ...
45 190
46$rnds_="%r10d"; # backup copy for $rounds 191$rnds_="%r10d"; # backup copy for $rounds
47$key_="%r11"; # backup copy for $key 192$key_="%r11"; # backup copy for $key
48 193
49# %xmm register layout 194# %xmm register layout
50$inout0="%xmm0"; $inout1="%xmm1"; 195$rndkey0="%xmm0"; $rndkey1="%xmm1";
51$inout2="%xmm2"; $inout3="%xmm3"; 196$inout0="%xmm2"; $inout1="%xmm3";
52$rndkey0="%xmm4"; $rndkey1="%xmm5"; 197$inout2="%xmm4"; $inout3="%xmm5";
53 198$inout4="%xmm6"; $inout5="%xmm7";
54$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt 199$inout6="%xmm8"; $inout7="%xmm9";
55$in1="%xmm8"; $in2="%xmm9"; 200
201$in2="%xmm6"; $in1="%xmm7"; # used in CBC decrypt, CTR, ...
202$in0="%xmm8"; $iv="%xmm9";
56 203
57# Inline version of internal aesni_[en|de]crypt1. 204# Inline version of internal aesni_[en|de]crypt1.
58# 205#
@@ -60,20 +207,29 @@ $in1="%xmm8"; $in2="%xmm9";
60# cycles which take care of loop variables... 207# cycles which take care of loop variables...
61{ my $sn; 208{ my $sn;
62sub aesni_generate1 { 209sub aesni_generate1 {
63my ($p,$key,$rounds)=@_; 210my ($p,$key,$rounds,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
64++$sn; 211++$sn;
65$code.=<<___; 212$code.=<<___;
66 $movkey ($key),$rndkey0 213 $movkey ($key),$rndkey0
67 $movkey 16($key),$rndkey1 214 $movkey 16($key),$rndkey1
215___
216$code.=<<___ if (defined($ivec));
217 xorps $rndkey0,$ivec
218 lea 32($key),$key
219 xorps $ivec,$inout
220___
221$code.=<<___ if (!defined($ivec));
68 lea 32($key),$key 222 lea 32($key),$key
69 pxor $rndkey0,$inout0 223 xorps $rndkey0,$inout
224___
225$code.=<<___;
70.Loop_${p}1_$sn: 226.Loop_${p}1_$sn:
71 aes${p} $rndkey1,$inout0 227 aes${p} $rndkey1,$inout
72 dec $rounds 228 dec $rounds
73 $movkey ($key),$rndkey1 229 $movkey ($key),$rndkey1
74 lea 16($key),$key 230 lea 16($key),$key
75 jnz .Loop_${p}1_$sn # loop body is 16 bytes 231 jnz .Loop_${p}1_$sn # loop body is 16 bytes
76 aes${p}last $rndkey1,$inout0 232 aes${p}last $rndkey1,$inout
77___ 233___
78}} 234}}
79# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); 235# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key);
@@ -86,7 +242,7 @@ $code.=<<___;
86.align 16 242.align 16
87${PREFIX}_encrypt: 243${PREFIX}_encrypt:
88 movups ($inp),$inout0 # load input 244 movups ($inp),$inout0 # load input
89 mov 240($key),$rounds # pull $rounds 245 mov 240($key),$rounds # key->rounds
90___ 246___
91 &aesni_generate1("enc",$key,$rounds); 247 &aesni_generate1("enc",$key,$rounds);
92$code.=<<___; 248$code.=<<___;
@@ -99,7 +255,7 @@ $code.=<<___;
99.align 16 255.align 16
100${PREFIX}_decrypt: 256${PREFIX}_decrypt:
101 movups ($inp),$inout0 # load input 257 movups ($inp),$inout0 # load input
102 mov 240($key),$rounds # pull $rounds 258 mov 240($key),$rounds # key->rounds
103___ 259___
104 &aesni_generate1("dec",$key,$rounds); 260 &aesni_generate1("dec",$key,$rounds);
105$code.=<<___; 261$code.=<<___;
@@ -109,16 +265,16 @@ $code.=<<___;
109___ 265___
110} 266}
111 267
112# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave 268# _aesni_[en|de]cryptN are private interfaces, N denotes interleave
113# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] 269# factor. Why 3x subroutine were originally used in loops? Even though
114# latency is 6, it turned out that it can be scheduled only every 270# aes[enc|dec] latency was originally 6, it could be scheduled only
115# *second* cycle. Thus 3x interleave is the one providing optimal 271# every *2nd* cycle. Thus 3x interleave was the one providing optimal
116# utilization, i.e. when subroutine's throughput is virtually same as 272# utilization, i.e. when subroutine's throughput is virtually same as
117# of non-interleaved subroutine [for number of input blocks up to 3]. 273# of non-interleaved subroutine [for number of input blocks up to 3].
118# This is why it makes no sense to implement 2x subroutine. As soon 274# This is why it makes no sense to implement 2x subroutine.
119# as/if Intel improves throughput by making it possible to schedule 275# aes[enc|dec] latency in next processor generation is 8, but the
120# the instructions in question *every* cycles I would have to 276# instructions can be scheduled every cycle. Optimal interleave for
121# implement 6x interleave and use it in loop... 277# new processor is therefore 8x...
122sub aesni_generate3 { 278sub aesni_generate3 {
123my $dir=shift; 279my $dir=shift;
124# As already mentioned it takes in $key and $rounds, which are *not* 280# As already mentioned it takes in $key and $rounds, which are *not*
@@ -131,25 +287,25 @@ _aesni_${dir}rypt3:
131 shr \$1,$rounds 287 shr \$1,$rounds
132 $movkey 16($key),$rndkey1 288 $movkey 16($key),$rndkey1
133 lea 32($key),$key 289 lea 32($key),$key
134 pxor $rndkey0,$inout0 290 xorps $rndkey0,$inout0
135 pxor $rndkey0,$inout1 291 xorps $rndkey0,$inout1
136 pxor $rndkey0,$inout2 292 xorps $rndkey0,$inout2
293 $movkey ($key),$rndkey0
137 294
138.L${dir}_loop3: 295.L${dir}_loop3:
139 aes${dir} $rndkey1,$inout0 296 aes${dir} $rndkey1,$inout0
140 $movkey ($key),$rndkey0
141 aes${dir} $rndkey1,$inout1 297 aes${dir} $rndkey1,$inout1
142 dec $rounds 298 dec $rounds
143 aes${dir} $rndkey1,$inout2 299 aes${dir} $rndkey1,$inout2
144 aes${dir} $rndkey0,$inout0
145 $movkey 16($key),$rndkey1 300 $movkey 16($key),$rndkey1
301 aes${dir} $rndkey0,$inout0
146 aes${dir} $rndkey0,$inout1 302 aes${dir} $rndkey0,$inout1
147 lea 32($key),$key 303 lea 32($key),$key
148 aes${dir} $rndkey0,$inout2 304 aes${dir} $rndkey0,$inout2
305 $movkey ($key),$rndkey0
149 jnz .L${dir}_loop3 306 jnz .L${dir}_loop3
150 307
151 aes${dir} $rndkey1,$inout0 308 aes${dir} $rndkey1,$inout0
152 $movkey ($key),$rndkey0
153 aes${dir} $rndkey1,$inout1 309 aes${dir} $rndkey1,$inout1
154 aes${dir} $rndkey1,$inout2 310 aes${dir} $rndkey1,$inout2
155 aes${dir}last $rndkey0,$inout0 311 aes${dir}last $rndkey0,$inout0
@@ -175,28 +331,28 @@ _aesni_${dir}rypt4:
175 shr \$1,$rounds 331 shr \$1,$rounds
176 $movkey 16($key),$rndkey1 332 $movkey 16($key),$rndkey1
177 lea 32($key),$key 333 lea 32($key),$key
178 pxor $rndkey0,$inout0 334 xorps $rndkey0,$inout0
179 pxor $rndkey0,$inout1 335 xorps $rndkey0,$inout1
180 pxor $rndkey0,$inout2 336 xorps $rndkey0,$inout2
181 pxor $rndkey0,$inout3 337 xorps $rndkey0,$inout3
338 $movkey ($key),$rndkey0
182 339
183.L${dir}_loop4: 340.L${dir}_loop4:
184 aes${dir} $rndkey1,$inout0 341 aes${dir} $rndkey1,$inout0
185 $movkey ($key),$rndkey0
186 aes${dir} $rndkey1,$inout1 342 aes${dir} $rndkey1,$inout1
187 dec $rounds 343 dec $rounds
188 aes${dir} $rndkey1,$inout2 344 aes${dir} $rndkey1,$inout2
189 aes${dir} $rndkey1,$inout3 345 aes${dir} $rndkey1,$inout3
190 aes${dir} $rndkey0,$inout0
191 $movkey 16($key),$rndkey1 346 $movkey 16($key),$rndkey1
347 aes${dir} $rndkey0,$inout0
192 aes${dir} $rndkey0,$inout1 348 aes${dir} $rndkey0,$inout1
193 lea 32($key),$key 349 lea 32($key),$key
194 aes${dir} $rndkey0,$inout2 350 aes${dir} $rndkey0,$inout2
195 aes${dir} $rndkey0,$inout3 351 aes${dir} $rndkey0,$inout3
352 $movkey ($key),$rndkey0
196 jnz .L${dir}_loop4 353 jnz .L${dir}_loop4
197 354
198 aes${dir} $rndkey1,$inout0 355 aes${dir} $rndkey1,$inout0
199 $movkey ($key),$rndkey0
200 aes${dir} $rndkey1,$inout1 356 aes${dir} $rndkey1,$inout1
201 aes${dir} $rndkey1,$inout2 357 aes${dir} $rndkey1,$inout2
202 aes${dir} $rndkey1,$inout3 358 aes${dir} $rndkey1,$inout3
@@ -208,12 +364,158 @@ _aesni_${dir}rypt4:
208.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 364.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4
209___ 365___
210} 366}
367sub aesni_generate6 {
368my $dir=shift;
369# As already mentioned it takes in $key and $rounds, which are *not*
370# preserved. $inout[0-5] is cipher/clear text...
371$code.=<<___;
372.type _aesni_${dir}rypt6,\@abi-omnipotent
373.align 16
374_aesni_${dir}rypt6:
375 $movkey ($key),$rndkey0
376 shr \$1,$rounds
377 $movkey 16($key),$rndkey1
378 lea 32($key),$key
379 xorps $rndkey0,$inout0
380 pxor $rndkey0,$inout1
381 aes${dir} $rndkey1,$inout0
382 pxor $rndkey0,$inout2
383 aes${dir} $rndkey1,$inout1
384 pxor $rndkey0,$inout3
385 aes${dir} $rndkey1,$inout2
386 pxor $rndkey0,$inout4
387 aes${dir} $rndkey1,$inout3
388 pxor $rndkey0,$inout5
389 dec $rounds
390 aes${dir} $rndkey1,$inout4
391 $movkey ($key),$rndkey0
392 aes${dir} $rndkey1,$inout5
393 jmp .L${dir}_loop6_enter
394.align 16
395.L${dir}_loop6:
396 aes${dir} $rndkey1,$inout0
397 aes${dir} $rndkey1,$inout1
398 dec $rounds
399 aes${dir} $rndkey1,$inout2
400 aes${dir} $rndkey1,$inout3
401 aes${dir} $rndkey1,$inout4
402 aes${dir} $rndkey1,$inout5
403.L${dir}_loop6_enter: # happens to be 16-byte aligned
404 $movkey 16($key),$rndkey1
405 aes${dir} $rndkey0,$inout0
406 aes${dir} $rndkey0,$inout1
407 lea 32($key),$key
408 aes${dir} $rndkey0,$inout2
409 aes${dir} $rndkey0,$inout3
410 aes${dir} $rndkey0,$inout4
411 aes${dir} $rndkey0,$inout5
412 $movkey ($key),$rndkey0
413 jnz .L${dir}_loop6
414
415 aes${dir} $rndkey1,$inout0
416 aes${dir} $rndkey1,$inout1
417 aes${dir} $rndkey1,$inout2
418 aes${dir} $rndkey1,$inout3
419 aes${dir} $rndkey1,$inout4
420 aes${dir} $rndkey1,$inout5
421 aes${dir}last $rndkey0,$inout0
422 aes${dir}last $rndkey0,$inout1
423 aes${dir}last $rndkey0,$inout2
424 aes${dir}last $rndkey0,$inout3
425 aes${dir}last $rndkey0,$inout4
426 aes${dir}last $rndkey0,$inout5
427 ret
428.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6
429___
430}
431sub aesni_generate8 {
432my $dir=shift;
433# As already mentioned it takes in $key and $rounds, which are *not*
434# preserved. $inout[0-7] is cipher/clear text...
435$code.=<<___;
436.type _aesni_${dir}rypt8,\@abi-omnipotent
437.align 16
438_aesni_${dir}rypt8:
439 $movkey ($key),$rndkey0
440 shr \$1,$rounds
441 $movkey 16($key),$rndkey1
442 lea 32($key),$key
443 xorps $rndkey0,$inout0
444 xorps $rndkey0,$inout1
445 aes${dir} $rndkey1,$inout0
446 pxor $rndkey0,$inout2
447 aes${dir} $rndkey1,$inout1
448 pxor $rndkey0,$inout3
449 aes${dir} $rndkey1,$inout2
450 pxor $rndkey0,$inout4
451 aes${dir} $rndkey1,$inout3
452 pxor $rndkey0,$inout5
453 dec $rounds
454 aes${dir} $rndkey1,$inout4
455 pxor $rndkey0,$inout6
456 aes${dir} $rndkey1,$inout5
457 pxor $rndkey0,$inout7
458 $movkey ($key),$rndkey0
459 aes${dir} $rndkey1,$inout6
460 aes${dir} $rndkey1,$inout7
461 $movkey 16($key),$rndkey1
462 jmp .L${dir}_loop8_enter
463.align 16
464.L${dir}_loop8:
465 aes${dir} $rndkey1,$inout0
466 aes${dir} $rndkey1,$inout1
467 dec $rounds
468 aes${dir} $rndkey1,$inout2
469 aes${dir} $rndkey1,$inout3
470 aes${dir} $rndkey1,$inout4
471 aes${dir} $rndkey1,$inout5
472 aes${dir} $rndkey1,$inout6
473 aes${dir} $rndkey1,$inout7
474 $movkey 16($key),$rndkey1
475.L${dir}_loop8_enter: # happens to be 16-byte aligned
476 aes${dir} $rndkey0,$inout0
477 aes${dir} $rndkey0,$inout1
478 lea 32($key),$key
479 aes${dir} $rndkey0,$inout2
480 aes${dir} $rndkey0,$inout3
481 aes${dir} $rndkey0,$inout4
482 aes${dir} $rndkey0,$inout5
483 aes${dir} $rndkey0,$inout6
484 aes${dir} $rndkey0,$inout7
485 $movkey ($key),$rndkey0
486 jnz .L${dir}_loop8
487
488 aes${dir} $rndkey1,$inout0
489 aes${dir} $rndkey1,$inout1
490 aes${dir} $rndkey1,$inout2
491 aes${dir} $rndkey1,$inout3
492 aes${dir} $rndkey1,$inout4
493 aes${dir} $rndkey1,$inout5
494 aes${dir} $rndkey1,$inout6
495 aes${dir} $rndkey1,$inout7
496 aes${dir}last $rndkey0,$inout0
497 aes${dir}last $rndkey0,$inout1
498 aes${dir}last $rndkey0,$inout2
499 aes${dir}last $rndkey0,$inout3
500 aes${dir}last $rndkey0,$inout4
501 aes${dir}last $rndkey0,$inout5
502 aes${dir}last $rndkey0,$inout6
503 aes${dir}last $rndkey0,$inout7
504 ret
505.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8
506___
507}
211&aesni_generate3("enc") if ($PREFIX eq "aesni"); 508&aesni_generate3("enc") if ($PREFIX eq "aesni");
212&aesni_generate3("dec"); 509&aesni_generate3("dec");
213&aesni_generate4("enc") if ($PREFIX eq "aesni"); 510&aesni_generate4("enc") if ($PREFIX eq "aesni");
214&aesni_generate4("dec"); 511&aesni_generate4("dec");
512&aesni_generate6("enc") if ($PREFIX eq "aesni");
513&aesni_generate6("dec");
514&aesni_generate8("enc") if ($PREFIX eq "aesni");
515&aesni_generate8("dec");
215 516
216if ($PREFIX eq "aesni") { 517if ($PREFIX eq "aesni") {
518########################################################################
217# void aesni_ecb_encrypt (const void *in, void *out, 519# void aesni_ecb_encrypt (const void *in, void *out,
218# size_t length, const AES_KEY *key, 520# size_t length, const AES_KEY *key,
219# int enc); 521# int enc);
@@ -222,54 +524,98 @@ $code.=<<___;
222.type aesni_ecb_encrypt,\@function,5 524.type aesni_ecb_encrypt,\@function,5
223.align 16 525.align 16
224aesni_ecb_encrypt: 526aesni_ecb_encrypt:
225 cmp \$16,$len # check length
226 jb .Lecb_ret
227
228 mov 240($key),$rounds # pull $rounds
229 and \$-16,$len 527 and \$-16,$len
528 jz .Lecb_ret
529
530 mov 240($key),$rounds # key->rounds
531 $movkey ($key),$rndkey0
230 mov $key,$key_ # backup $key 532 mov $key,$key_ # backup $key
231 test %r8d,%r8d # 5th argument
232 mov $rounds,$rnds_ # backup $rounds 533 mov $rounds,$rnds_ # backup $rounds
534 test %r8d,%r8d # 5th argument
233 jz .Lecb_decrypt 535 jz .Lecb_decrypt
234#--------------------------- ECB ENCRYPT ------------------------------# 536#--------------------------- ECB ENCRYPT ------------------------------#
235 sub \$0x40,$len 537 cmp \$0x80,$len
236 jbe .Lecb_enc_tail 538 jb .Lecb_enc_tail
237 jmp .Lecb_enc_loop3 539
540 movdqu ($inp),$inout0
541 movdqu 0x10($inp),$inout1
542 movdqu 0x20($inp),$inout2
543 movdqu 0x30($inp),$inout3
544 movdqu 0x40($inp),$inout4
545 movdqu 0x50($inp),$inout5
546 movdqu 0x60($inp),$inout6
547 movdqu 0x70($inp),$inout7
548 lea 0x80($inp),$inp
549 sub \$0x80,$len
550 jmp .Lecb_enc_loop8_enter
238.align 16 551.align 16
239.Lecb_enc_loop3: 552.Lecb_enc_loop8:
240 movups ($inp),$inout0 553 movups $inout0,($out)
241 movups 0x10($inp),$inout1
242 movups 0x20($inp),$inout2
243 call _aesni_encrypt3
244 sub \$0x30,$len
245 lea 0x30($inp),$inp
246 lea 0x30($out),$out
247 movups $inout0,-0x30($out)
248 mov $rnds_,$rounds # restore $rounds
249 movups $inout1,-0x20($out)
250 mov $key_,$key # restore $key 554 mov $key_,$key # restore $key
251 movups $inout2,-0x10($out) 555 movdqu ($inp),$inout0
252 ja .Lecb_enc_loop3 556 mov $rnds_,$rounds # restore $rounds
557 movups $inout1,0x10($out)
558 movdqu 0x10($inp),$inout1
559 movups $inout2,0x20($out)
560 movdqu 0x20($inp),$inout2
561 movups $inout3,0x30($out)
562 movdqu 0x30($inp),$inout3
563 movups $inout4,0x40($out)
564 movdqu 0x40($inp),$inout4
565 movups $inout5,0x50($out)
566 movdqu 0x50($inp),$inout5
567 movups $inout6,0x60($out)
568 movdqu 0x60($inp),$inout6
569 movups $inout7,0x70($out)
570 lea 0x80($out),$out
571 movdqu 0x70($inp),$inout7
572 lea 0x80($inp),$inp
573.Lecb_enc_loop8_enter:
574
575 call _aesni_encrypt8
576
577 sub \$0x80,$len
578 jnc .Lecb_enc_loop8
253 579
254.Lecb_enc_tail: 580 movups $inout0,($out)
255 add \$0x40,$len 581 mov $key_,$key # restore $key
582 movups $inout1,0x10($out)
583 mov $rnds_,$rounds # restore $rounds
584 movups $inout2,0x20($out)
585 movups $inout3,0x30($out)
586 movups $inout4,0x40($out)
587 movups $inout5,0x50($out)
588 movups $inout6,0x60($out)
589 movups $inout7,0x70($out)
590 lea 0x80($out),$out
591 add \$0x80,$len
256 jz .Lecb_ret 592 jz .Lecb_ret
257 593
258 cmp \$0x10,$len 594.Lecb_enc_tail:
259 movups ($inp),$inout0 595 movups ($inp),$inout0
260 je .Lecb_enc_one
261 cmp \$0x20,$len 596 cmp \$0x20,$len
597 jb .Lecb_enc_one
262 movups 0x10($inp),$inout1 598 movups 0x10($inp),$inout1
263 je .Lecb_enc_two 599 je .Lecb_enc_two
264 cmp \$0x30,$len
265 movups 0x20($inp),$inout2 600 movups 0x20($inp),$inout2
266 je .Lecb_enc_three 601 cmp \$0x40,$len
602 jb .Lecb_enc_three
267 movups 0x30($inp),$inout3 603 movups 0x30($inp),$inout3
268 call _aesni_encrypt4 604 je .Lecb_enc_four
605 movups 0x40($inp),$inout4
606 cmp \$0x60,$len
607 jb .Lecb_enc_five
608 movups 0x50($inp),$inout5
609 je .Lecb_enc_six
610 movdqu 0x60($inp),$inout6
611 call _aesni_encrypt8
269 movups $inout0,($out) 612 movups $inout0,($out)
270 movups $inout1,0x10($out) 613 movups $inout1,0x10($out)
271 movups $inout2,0x20($out) 614 movups $inout2,0x20($out)
272 movups $inout3,0x30($out) 615 movups $inout3,0x30($out)
616 movups $inout4,0x40($out)
617 movups $inout5,0x50($out)
618 movups $inout6,0x60($out)
273 jmp .Lecb_ret 619 jmp .Lecb_ret
274.align 16 620.align 16
275.Lecb_enc_one: 621.Lecb_enc_one:
@@ -280,6 +626,7 @@ $code.=<<___;
280 jmp .Lecb_ret 626 jmp .Lecb_ret
281.align 16 627.align 16
282.Lecb_enc_two: 628.Lecb_enc_two:
629 xorps $inout2,$inout2
283 call _aesni_encrypt3 630 call _aesni_encrypt3
284 movups $inout0,($out) 631 movups $inout0,($out)
285 movups $inout1,0x10($out) 632 movups $inout1,0x10($out)
@@ -291,47 +638,121 @@ $code.=<<___;
291 movups $inout1,0x10($out) 638 movups $inout1,0x10($out)
292 movups $inout2,0x20($out) 639 movups $inout2,0x20($out)
293 jmp .Lecb_ret 640 jmp .Lecb_ret
641.align 16
642.Lecb_enc_four:
643 call _aesni_encrypt4
644 movups $inout0,($out)
645 movups $inout1,0x10($out)
646 movups $inout2,0x20($out)
647 movups $inout3,0x30($out)
648 jmp .Lecb_ret
649.align 16
650.Lecb_enc_five:
651 xorps $inout5,$inout5
652 call _aesni_encrypt6
653 movups $inout0,($out)
654 movups $inout1,0x10($out)
655 movups $inout2,0x20($out)
656 movups $inout3,0x30($out)
657 movups $inout4,0x40($out)
658 jmp .Lecb_ret
659.align 16
660.Lecb_enc_six:
661 call _aesni_encrypt6
662 movups $inout0,($out)
663 movups $inout1,0x10($out)
664 movups $inout2,0x20($out)
665 movups $inout3,0x30($out)
666 movups $inout4,0x40($out)
667 movups $inout5,0x50($out)
668 jmp .Lecb_ret
294 #--------------------------- ECB DECRYPT ------------------------------# 669 #--------------------------- ECB DECRYPT ------------------------------#
295.align 16 670.align 16
296.Lecb_decrypt: 671.Lecb_decrypt:
297 sub \$0x40,$len 672 cmp \$0x80,$len
298 jbe .Lecb_dec_tail 673 jb .Lecb_dec_tail
299 jmp .Lecb_dec_loop3 674
675 movdqu ($inp),$inout0
676 movdqu 0x10($inp),$inout1
677 movdqu 0x20($inp),$inout2
678 movdqu 0x30($inp),$inout3
679 movdqu 0x40($inp),$inout4
680 movdqu 0x50($inp),$inout5
681 movdqu 0x60($inp),$inout6
682 movdqu 0x70($inp),$inout7
683 lea 0x80($inp),$inp
684 sub \$0x80,$len
685 jmp .Lecb_dec_loop8_enter
300.align 16 686.align 16
301.Lecb_dec_loop3: 687.Lecb_dec_loop8:
302 movups ($inp),$inout0 688 movups $inout0,($out)
303 movups 0x10($inp),$inout1
304 movups 0x20($inp),$inout2
305 call _aesni_decrypt3
306 sub \$0x30,$len
307 lea 0x30($inp),$inp
308 lea 0x30($out),$out
309 movups $inout0,-0x30($out)
310 mov $rnds_,$rounds # restore $rounds
311 movups $inout1,-0x20($out)
312 mov $key_,$key # restore $key 689 mov $key_,$key # restore $key
313 movups $inout2,-0x10($out) 690 movdqu ($inp),$inout0
314 ja .Lecb_dec_loop3 691 mov $rnds_,$rounds # restore $rounds
692 movups $inout1,0x10($out)
693 movdqu 0x10($inp),$inout1
694 movups $inout2,0x20($out)
695 movdqu 0x20($inp),$inout2
696 movups $inout3,0x30($out)
697 movdqu 0x30($inp),$inout3
698 movups $inout4,0x40($out)
699 movdqu 0x40($inp),$inout4
700 movups $inout5,0x50($out)
701 movdqu 0x50($inp),$inout5
702 movups $inout6,0x60($out)
703 movdqu 0x60($inp),$inout6
704 movups $inout7,0x70($out)
705 lea 0x80($out),$out
706 movdqu 0x70($inp),$inout7
707 lea 0x80($inp),$inp
708.Lecb_dec_loop8_enter:
709
710 call _aesni_decrypt8
711
712 $movkey ($key_),$rndkey0
713 sub \$0x80,$len
714 jnc .Lecb_dec_loop8
315 715
316.Lecb_dec_tail: 716 movups $inout0,($out)
317 add \$0x40,$len 717 mov $key_,$key # restore $key
718 movups $inout1,0x10($out)
719 mov $rnds_,$rounds # restore $rounds
720 movups $inout2,0x20($out)
721 movups $inout3,0x30($out)
722 movups $inout4,0x40($out)
723 movups $inout5,0x50($out)
724 movups $inout6,0x60($out)
725 movups $inout7,0x70($out)
726 lea 0x80($out),$out
727 add \$0x80,$len
318 jz .Lecb_ret 728 jz .Lecb_ret
319 729
320 cmp \$0x10,$len 730.Lecb_dec_tail:
321 movups ($inp),$inout0 731 movups ($inp),$inout0
322 je .Lecb_dec_one
323 cmp \$0x20,$len 732 cmp \$0x20,$len
733 jb .Lecb_dec_one
324 movups 0x10($inp),$inout1 734 movups 0x10($inp),$inout1
325 je .Lecb_dec_two 735 je .Lecb_dec_two
326 cmp \$0x30,$len
327 movups 0x20($inp),$inout2 736 movups 0x20($inp),$inout2
328 je .Lecb_dec_three 737 cmp \$0x40,$len
738 jb .Lecb_dec_three
329 movups 0x30($inp),$inout3 739 movups 0x30($inp),$inout3
330 call _aesni_decrypt4 740 je .Lecb_dec_four
741 movups 0x40($inp),$inout4
742 cmp \$0x60,$len
743 jb .Lecb_dec_five
744 movups 0x50($inp),$inout5
745 je .Lecb_dec_six
746 movups 0x60($inp),$inout6
747 $movkey ($key),$rndkey0
748 call _aesni_decrypt8
331 movups $inout0,($out) 749 movups $inout0,($out)
332 movups $inout1,0x10($out) 750 movups $inout1,0x10($out)
333 movups $inout2,0x20($out) 751 movups $inout2,0x20($out)
334 movups $inout3,0x30($out) 752 movups $inout3,0x30($out)
753 movups $inout4,0x40($out)
754 movups $inout5,0x50($out)
755 movups $inout6,0x60($out)
335 jmp .Lecb_ret 756 jmp .Lecb_ret
336.align 16 757.align 16
337.Lecb_dec_one: 758.Lecb_dec_one:
@@ -342,6 +763,7 @@ $code.=<<___;
342 jmp .Lecb_ret 763 jmp .Lecb_ret
343.align 16 764.align 16
344.Lecb_dec_two: 765.Lecb_dec_two:
766 xorps $inout2,$inout2
345 call _aesni_decrypt3 767 call _aesni_decrypt3
346 movups $inout0,($out) 768 movups $inout0,($out)
347 movups $inout1,0x10($out) 769 movups $inout1,0x10($out)
@@ -352,17 +774,1353 @@ $code.=<<___;
352 movups $inout0,($out) 774 movups $inout0,($out)
353 movups $inout1,0x10($out) 775 movups $inout1,0x10($out)
354 movups $inout2,0x20($out) 776 movups $inout2,0x20($out)
777 jmp .Lecb_ret
778.align 16
779.Lecb_dec_four:
780 call _aesni_decrypt4
781 movups $inout0,($out)
782 movups $inout1,0x10($out)
783 movups $inout2,0x20($out)
784 movups $inout3,0x30($out)
785 jmp .Lecb_ret
786.align 16
787.Lecb_dec_five:
788 xorps $inout5,$inout5
789 call _aesni_decrypt6
790 movups $inout0,($out)
791 movups $inout1,0x10($out)
792 movups $inout2,0x20($out)
793 movups $inout3,0x30($out)
794 movups $inout4,0x40($out)
795 jmp .Lecb_ret
796.align 16
797.Lecb_dec_six:
798 call _aesni_decrypt6
799 movups $inout0,($out)
800 movups $inout1,0x10($out)
801 movups $inout2,0x20($out)
802 movups $inout3,0x30($out)
803 movups $inout4,0x40($out)
804 movups $inout5,0x50($out)
355 805
356.Lecb_ret: 806.Lecb_ret:
357 ret 807 ret
358.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 808.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
359___ 809___
810
811{
812######################################################################
813# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
814# size_t blocks, const AES_KEY *key,
815# const char *ivec,char *cmac);
816#
817# Handles only complete blocks, operates on 64-bit counter and
818# does not update *ivec! Nor does it finalize CMAC value
819# (see engine/eng_aesni.c for details)
820#
821{
822my $cmac="%r9"; # 6th argument
823
824my $increment="%xmm6";
825my $bswap_mask="%xmm7";
826
827$code.=<<___;
828.globl aesni_ccm64_encrypt_blocks
829.type aesni_ccm64_encrypt_blocks,\@function,6
830.align 16
831aesni_ccm64_encrypt_blocks:
832___
833$code.=<<___ if ($win64);
834 lea -0x58(%rsp),%rsp
835 movaps %xmm6,(%rsp)
836 movaps %xmm7,0x10(%rsp)
837 movaps %xmm8,0x20(%rsp)
838 movaps %xmm9,0x30(%rsp)
839.Lccm64_enc_body:
840___
841$code.=<<___;
842 mov 240($key),$rounds # key->rounds
843 movdqu ($ivp),$iv
844 movdqa .Lincrement64(%rip),$increment
845 movdqa .Lbswap_mask(%rip),$bswap_mask
846
847 shr \$1,$rounds
848 lea 0($key),$key_
849 movdqu ($cmac),$inout1
850 movdqa $iv,$inout0
851 mov $rounds,$rnds_
852 pshufb $bswap_mask,$iv
853 jmp .Lccm64_enc_outer
854.align 16
855.Lccm64_enc_outer:
856 $movkey ($key_),$rndkey0
857 mov $rnds_,$rounds
858 movups ($inp),$in0 # load inp
859
860 xorps $rndkey0,$inout0 # counter
861 $movkey 16($key_),$rndkey1
862 xorps $in0,$rndkey0
863 lea 32($key_),$key
864 xorps $rndkey0,$inout1 # cmac^=inp
865 $movkey ($key),$rndkey0
866
867.Lccm64_enc2_loop:
868 aesenc $rndkey1,$inout0
869 dec $rounds
870 aesenc $rndkey1,$inout1
871 $movkey 16($key),$rndkey1
872 aesenc $rndkey0,$inout0
873 lea 32($key),$key
874 aesenc $rndkey0,$inout1
875 $movkey 0($key),$rndkey0
876 jnz .Lccm64_enc2_loop
877 aesenc $rndkey1,$inout0
878 aesenc $rndkey1,$inout1
879 paddq $increment,$iv
880 aesenclast $rndkey0,$inout0
881 aesenclast $rndkey0,$inout1
882
883 dec $len
884 lea 16($inp),$inp
885 xorps $inout0,$in0 # inp ^= E(iv)
886 movdqa $iv,$inout0
887 movups $in0,($out) # save output
888 lea 16($out),$out
889 pshufb $bswap_mask,$inout0
890 jnz .Lccm64_enc_outer
891
892 movups $inout1,($cmac)
893___
894$code.=<<___ if ($win64);
895 movaps (%rsp),%xmm6
896 movaps 0x10(%rsp),%xmm7
897 movaps 0x20(%rsp),%xmm8
898 movaps 0x30(%rsp),%xmm9
899 lea 0x58(%rsp),%rsp
900.Lccm64_enc_ret:
901___
902$code.=<<___;
903 ret
904.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
905___
906######################################################################
907$code.=<<___;
908.globl aesni_ccm64_decrypt_blocks
909.type aesni_ccm64_decrypt_blocks,\@function,6
910.align 16
911aesni_ccm64_decrypt_blocks:
912___
913$code.=<<___ if ($win64);
914 lea -0x58(%rsp),%rsp
915 movaps %xmm6,(%rsp)
916 movaps %xmm7,0x10(%rsp)
917 movaps %xmm8,0x20(%rsp)
918 movaps %xmm9,0x30(%rsp)
919.Lccm64_dec_body:
920___
921$code.=<<___;
922 mov 240($key),$rounds # key->rounds
923 movups ($ivp),$iv
924 movdqu ($cmac),$inout1
925 movdqa .Lincrement64(%rip),$increment
926 movdqa .Lbswap_mask(%rip),$bswap_mask
927
928 movaps $iv,$inout0
929 mov $rounds,$rnds_
930 mov $key,$key_
931 pshufb $bswap_mask,$iv
932___
933 &aesni_generate1("enc",$key,$rounds);
934$code.=<<___;
935 movups ($inp),$in0 # load inp
936 paddq $increment,$iv
937 lea 16($inp),$inp
938 jmp .Lccm64_dec_outer
939.align 16
940.Lccm64_dec_outer:
941 xorps $inout0,$in0 # inp ^= E(iv)
942 movdqa $iv,$inout0
943 mov $rnds_,$rounds
944 movups $in0,($out) # save output
945 lea 16($out),$out
946 pshufb $bswap_mask,$inout0
947
948 sub \$1,$len
949 jz .Lccm64_dec_break
950
951 $movkey ($key_),$rndkey0
952 shr \$1,$rounds
953 $movkey 16($key_),$rndkey1
954 xorps $rndkey0,$in0
955 lea 32($key_),$key
956 xorps $rndkey0,$inout0
957 xorps $in0,$inout1 # cmac^=out
958 $movkey ($key),$rndkey0
959
960.Lccm64_dec2_loop:
961 aesenc $rndkey1,$inout0
962 dec $rounds
963 aesenc $rndkey1,$inout1
964 $movkey 16($key),$rndkey1
965 aesenc $rndkey0,$inout0
966 lea 32($key),$key
967 aesenc $rndkey0,$inout1
968 $movkey 0($key),$rndkey0
969 jnz .Lccm64_dec2_loop
970 movups ($inp),$in0 # load inp
971 paddq $increment,$iv
972 aesenc $rndkey1,$inout0
973 aesenc $rndkey1,$inout1
974 lea 16($inp),$inp
975 aesenclast $rndkey0,$inout0
976 aesenclast $rndkey0,$inout1
977 jmp .Lccm64_dec_outer
978
979.align 16
980.Lccm64_dec_break:
981 #xorps $in0,$inout1 # cmac^=out
982___
983 &aesni_generate1("enc",$key_,$rounds,$inout1,$in0);
984$code.=<<___;
985 movups $inout1,($cmac)
986___
987$code.=<<___ if ($win64);
988 movaps (%rsp),%xmm6
989 movaps 0x10(%rsp),%xmm7
990 movaps 0x20(%rsp),%xmm8
991 movaps 0x30(%rsp),%xmm9
992 lea 0x58(%rsp),%rsp
993.Lccm64_dec_ret:
994___
995$code.=<<___;
996 ret
997.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
998___
999}
1000######################################################################
1001# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
1002# size_t blocks, const AES_KEY *key,
1003# const char *ivec);
1004#
1005# Handles only complete blocks, operates on 32-bit counter and
1006# does not update *ivec! (see engine/eng_aesni.c for details)
1007#
1008{
1009my $reserved = $win64?0:-0x28;
1010my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
1011my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
1012my $bswap_mask="%xmm15";
1013
1014$code.=<<___;
1015.globl aesni_ctr32_encrypt_blocks
1016.type aesni_ctr32_encrypt_blocks,\@function,5
1017.align 16
1018aesni_ctr32_encrypt_blocks:
1019___
1020$code.=<<___ if ($win64);
1021 lea -0xc8(%rsp),%rsp
1022 movaps %xmm6,0x20(%rsp)
1023 movaps %xmm7,0x30(%rsp)
1024 movaps %xmm8,0x40(%rsp)
1025 movaps %xmm9,0x50(%rsp)
1026 movaps %xmm10,0x60(%rsp)
1027 movaps %xmm11,0x70(%rsp)
1028 movaps %xmm12,0x80(%rsp)
1029 movaps %xmm13,0x90(%rsp)
1030 movaps %xmm14,0xa0(%rsp)
1031 movaps %xmm15,0xb0(%rsp)
1032.Lctr32_body:
1033___
1034$code.=<<___;
1035 cmp \$1,$len
1036 je .Lctr32_one_shortcut
1037
1038 movdqu ($ivp),$ivec
1039 movdqa .Lbswap_mask(%rip),$bswap_mask
1040 xor $rounds,$rounds
1041 pextrd \$3,$ivec,$rnds_ # pull 32-bit counter
1042 pinsrd \$3,$rounds,$ivec # wipe 32-bit counter
1043
1044 mov 240($key),$rounds # key->rounds
1045 bswap $rnds_
1046 pxor $iv0,$iv0 # vector of 3 32-bit counters
1047 pxor $iv1,$iv1 # vector of 3 32-bit counters
1048 pinsrd \$0,$rnds_,$iv0
1049 lea 3($rnds_),$key_
1050 pinsrd \$0,$key_,$iv1
1051 inc $rnds_
1052 pinsrd \$1,$rnds_,$iv0
1053 inc $key_
1054 pinsrd \$1,$key_,$iv1
1055 inc $rnds_
1056 pinsrd \$2,$rnds_,$iv0
1057 inc $key_
1058 pinsrd \$2,$key_,$iv1
1059 movdqa $iv0,$reserved(%rsp)
1060 pshufb $bswap_mask,$iv0
1061 movdqa $iv1,`$reserved+0x10`(%rsp)
1062 pshufb $bswap_mask,$iv1
1063
1064 pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword
1065 pshufd \$`2<<6`,$iv0,$inout1
1066 pshufd \$`1<<6`,$iv0,$inout2
1067 cmp \$6,$len
1068 jb .Lctr32_tail
1069 shr \$1,$rounds
1070 mov $key,$key_ # backup $key
1071 mov $rounds,$rnds_ # backup $rounds
1072 sub \$6,$len
1073 jmp .Lctr32_loop6
1074
1075.align 16
1076.Lctr32_loop6:
1077 pshufd \$`3<<6`,$iv1,$inout3
1078 por $ivec,$inout0 # merge counter-less ivec
1079 $movkey ($key_),$rndkey0
1080 pshufd \$`2<<6`,$iv1,$inout4
1081 por $ivec,$inout1
1082 $movkey 16($key_),$rndkey1
1083 pshufd \$`1<<6`,$iv1,$inout5
1084 por $ivec,$inout2
1085 por $ivec,$inout3
1086 xorps $rndkey0,$inout0
1087 por $ivec,$inout4
1088 por $ivec,$inout5
1089
1090 # inline _aesni_encrypt6 and interleave last rounds
1091 # with own code...
1092
1093 pxor $rndkey0,$inout1
1094 aesenc $rndkey1,$inout0
1095 lea 32($key_),$key
1096 pxor $rndkey0,$inout2
1097 aesenc $rndkey1,$inout1
1098 movdqa .Lincrement32(%rip),$iv1
1099 pxor $rndkey0,$inout3
1100 aesenc $rndkey1,$inout2
1101 movdqa $reserved(%rsp),$iv0
1102 pxor $rndkey0,$inout4
1103 aesenc $rndkey1,$inout3
1104 pxor $rndkey0,$inout5
1105 $movkey ($key),$rndkey0
1106 dec $rounds
1107 aesenc $rndkey1,$inout4
1108 aesenc $rndkey1,$inout5
1109 jmp .Lctr32_enc_loop6_enter
1110.align 16
1111.Lctr32_enc_loop6:
1112 aesenc $rndkey1,$inout0
1113 aesenc $rndkey1,$inout1
1114 dec $rounds
1115 aesenc $rndkey1,$inout2
1116 aesenc $rndkey1,$inout3
1117 aesenc $rndkey1,$inout4
1118 aesenc $rndkey1,$inout5
1119.Lctr32_enc_loop6_enter:
1120 $movkey 16($key),$rndkey1
1121 aesenc $rndkey0,$inout0
1122 aesenc $rndkey0,$inout1
1123 lea 32($key),$key
1124 aesenc $rndkey0,$inout2
1125 aesenc $rndkey0,$inout3
1126 aesenc $rndkey0,$inout4
1127 aesenc $rndkey0,$inout5
1128 $movkey ($key),$rndkey0
1129 jnz .Lctr32_enc_loop6
1130
1131 aesenc $rndkey1,$inout0
1132 paddd $iv1,$iv0 # increment counter vector
1133 aesenc $rndkey1,$inout1
1134 paddd `$reserved+0x10`(%rsp),$iv1
1135 aesenc $rndkey1,$inout2
1136 movdqa $iv0,$reserved(%rsp) # save counter vector
1137 aesenc $rndkey1,$inout3
1138 movdqa $iv1,`$reserved+0x10`(%rsp)
1139 aesenc $rndkey1,$inout4
1140 pshufb $bswap_mask,$iv0 # byte swap
1141 aesenc $rndkey1,$inout5
1142 pshufb $bswap_mask,$iv1
1143
1144 aesenclast $rndkey0,$inout0
1145 movups ($inp),$in0 # load input
1146 aesenclast $rndkey0,$inout1
1147 movups 0x10($inp),$in1
1148 aesenclast $rndkey0,$inout2
1149 movups 0x20($inp),$in2
1150 aesenclast $rndkey0,$inout3
1151 movups 0x30($inp),$in3
1152 aesenclast $rndkey0,$inout4
1153 movups 0x40($inp),$rndkey1
1154 aesenclast $rndkey0,$inout5
1155 movups 0x50($inp),$rndkey0
1156 lea 0x60($inp),$inp
1157
1158 xorps $inout0,$in0 # xor
1159 pshufd \$`3<<6`,$iv0,$inout0
1160 xorps $inout1,$in1
1161 pshufd \$`2<<6`,$iv0,$inout1
1162 movups $in0,($out) # store output
1163 xorps $inout2,$in2
1164 pshufd \$`1<<6`,$iv0,$inout2
1165 movups $in1,0x10($out)
1166 xorps $inout3,$in3
1167 movups $in2,0x20($out)
1168 xorps $inout4,$rndkey1
1169 movups $in3,0x30($out)
1170 xorps $inout5,$rndkey0
1171 movups $rndkey1,0x40($out)
1172 movups $rndkey0,0x50($out)
1173 lea 0x60($out),$out
1174 mov $rnds_,$rounds
1175 sub \$6,$len
1176 jnc .Lctr32_loop6
1177
1178 add \$6,$len
1179 jz .Lctr32_done
1180 mov $key_,$key # restore $key
1181 lea 1($rounds,$rounds),$rounds # restore original value
1182
1183.Lctr32_tail:
1184 por $ivec,$inout0
1185 movups ($inp),$in0
1186 cmp \$2,$len
1187 jb .Lctr32_one
1188
1189 por $ivec,$inout1
1190 movups 0x10($inp),$in1
1191 je .Lctr32_two
1192
1193 pshufd \$`3<<6`,$iv1,$inout3
1194 por $ivec,$inout2
1195 movups 0x20($inp),$in2
1196 cmp \$4,$len
1197 jb .Lctr32_three
1198
1199 pshufd \$`2<<6`,$iv1,$inout4
1200 por $ivec,$inout3
1201 movups 0x30($inp),$in3
1202 je .Lctr32_four
1203
1204 por $ivec,$inout4
1205 xorps $inout5,$inout5
1206
1207 call _aesni_encrypt6
1208
1209 movups 0x40($inp),$rndkey1
1210 xorps $inout0,$in0
1211 xorps $inout1,$in1
1212 movups $in0,($out)
1213 xorps $inout2,$in2
1214 movups $in1,0x10($out)
1215 xorps $inout3,$in3
1216 movups $in2,0x20($out)
1217 xorps $inout4,$rndkey1
1218 movups $in3,0x30($out)
1219 movups $rndkey1,0x40($out)
1220 jmp .Lctr32_done
1221
1222.align 16
1223.Lctr32_one_shortcut:
1224 movups ($ivp),$inout0
1225 movups ($inp),$in0
1226 mov 240($key),$rounds # key->rounds
1227.Lctr32_one:
1228___
1229 &aesni_generate1("enc",$key,$rounds);
1230$code.=<<___;
1231 xorps $inout0,$in0
1232 movups $in0,($out)
1233 jmp .Lctr32_done
1234
1235.align 16
1236.Lctr32_two:
1237 xorps $inout2,$inout2
1238 call _aesni_encrypt3
1239 xorps $inout0,$in0
1240 xorps $inout1,$in1
1241 movups $in0,($out)
1242 movups $in1,0x10($out)
1243 jmp .Lctr32_done
1244
1245.align 16
1246.Lctr32_three:
1247 call _aesni_encrypt3
1248 xorps $inout0,$in0
1249 xorps $inout1,$in1
1250 movups $in0,($out)
1251 xorps $inout2,$in2
1252 movups $in1,0x10($out)
1253 movups $in2,0x20($out)
1254 jmp .Lctr32_done
1255
1256.align 16
1257.Lctr32_four:
1258 call _aesni_encrypt4
1259 xorps $inout0,$in0
1260 xorps $inout1,$in1
1261 movups $in0,($out)
1262 xorps $inout2,$in2
1263 movups $in1,0x10($out)
1264 xorps $inout3,$in3
1265 movups $in2,0x20($out)
1266 movups $in3,0x30($out)
1267
1268.Lctr32_done:
1269___
1270$code.=<<___ if ($win64);
1271 movaps 0x20(%rsp),%xmm6
1272 movaps 0x30(%rsp),%xmm7
1273 movaps 0x40(%rsp),%xmm8
1274 movaps 0x50(%rsp),%xmm9
1275 movaps 0x60(%rsp),%xmm10
1276 movaps 0x70(%rsp),%xmm11
1277 movaps 0x80(%rsp),%xmm12
1278 movaps 0x90(%rsp),%xmm13
1279 movaps 0xa0(%rsp),%xmm14
1280 movaps 0xb0(%rsp),%xmm15
1281 lea 0xc8(%rsp),%rsp
1282.Lctr32_ret:
1283___
1284$code.=<<___;
1285 ret
1286.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1287___
360} 1288}
361 1289
1290######################################################################
1291# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len,
1292# const AES_KEY *key1, const AES_KEY *key2
1293# const unsigned char iv[16]);
1294#
1295{
1296my @tweak=map("%xmm$_",(10..15));
1297my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
1298my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
1299my $frame_size = 0x68 + ($win64?160:0);
1300
1301$code.=<<___;
1302.globl aesni_xts_encrypt
1303.type aesni_xts_encrypt,\@function,6
1304.align 16
1305aesni_xts_encrypt:
1306 lea -$frame_size(%rsp),%rsp
1307___
1308$code.=<<___ if ($win64);
1309 movaps %xmm6,0x60(%rsp)
1310 movaps %xmm7,0x70(%rsp)
1311 movaps %xmm8,0x80(%rsp)
1312 movaps %xmm9,0x90(%rsp)
1313 movaps %xmm10,0xa0(%rsp)
1314 movaps %xmm11,0xb0(%rsp)
1315 movaps %xmm12,0xc0(%rsp)
1316 movaps %xmm13,0xd0(%rsp)
1317 movaps %xmm14,0xe0(%rsp)
1318 movaps %xmm15,0xf0(%rsp)
1319.Lxts_enc_body:
1320___
1321$code.=<<___;
1322 movups ($ivp),@tweak[5] # load clear-text tweak
1323 mov 240(%r8),$rounds # key2->rounds
1324 mov 240($key),$rnds_ # key1->rounds
1325___
1326 # generate the tweak
1327 &aesni_generate1("enc",$key2,$rounds,@tweak[5]);
1328$code.=<<___;
1329 mov $key,$key_ # backup $key
1330 mov $rnds_,$rounds # backup $rounds
1331 mov $len,$len_ # backup $len
1332 and \$-16,$len
1333
1334 movdqa .Lxts_magic(%rip),$twmask
1335 pxor $twtmp,$twtmp
1336 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1337___
1338 for ($i=0;$i<4;$i++) {
1339 $code.=<<___;
1340 pshufd \$0x13,$twtmp,$twres
1341 pxor $twtmp,$twtmp
1342 movdqa @tweak[5],@tweak[$i]
1343 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1344 pand $twmask,$twres # isolate carry and residue
1345 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1346 pxor $twres,@tweak[5]
1347___
1348 }
1349$code.=<<___;
1350 sub \$16*6,$len
1351 jc .Lxts_enc_short
1352
1353 shr \$1,$rounds
1354 sub \$1,$rounds
1355 mov $rounds,$rnds_
1356 jmp .Lxts_enc_grandloop
1357
1358.align 16
1359.Lxts_enc_grandloop:
1360 pshufd \$0x13,$twtmp,$twres
1361 movdqa @tweak[5],@tweak[4]
1362 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1363 movdqu `16*0`($inp),$inout0 # load input
1364 pand $twmask,$twres # isolate carry and residue
1365 movdqu `16*1`($inp),$inout1
1366 pxor $twres,@tweak[5]
1367
1368 movdqu `16*2`($inp),$inout2
1369 pxor @tweak[0],$inout0 # input^=tweak
1370 movdqu `16*3`($inp),$inout3
1371 pxor @tweak[1],$inout1
1372 movdqu `16*4`($inp),$inout4
1373 pxor @tweak[2],$inout2
1374 movdqu `16*5`($inp),$inout5
1375 lea `16*6`($inp),$inp
1376 pxor @tweak[3],$inout3
1377 $movkey ($key_),$rndkey0
1378 pxor @tweak[4],$inout4
1379 pxor @tweak[5],$inout5
1380
1381 # inline _aesni_encrypt6 and interleave first and last rounds
1382 # with own code...
1383 $movkey 16($key_),$rndkey1
1384 pxor $rndkey0,$inout0
1385 pxor $rndkey0,$inout1
1386 movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks
1387 aesenc $rndkey1,$inout0
1388 lea 32($key_),$key
1389 pxor $rndkey0,$inout2
1390 movdqa @tweak[1],`16*1`(%rsp)
1391 aesenc $rndkey1,$inout1
1392 pxor $rndkey0,$inout3
1393 movdqa @tweak[2],`16*2`(%rsp)
1394 aesenc $rndkey1,$inout2
1395 pxor $rndkey0,$inout4
1396 movdqa @tweak[3],`16*3`(%rsp)
1397 aesenc $rndkey1,$inout3
1398 pxor $rndkey0,$inout5
1399 $movkey ($key),$rndkey0
1400 dec $rounds
1401 movdqa @tweak[4],`16*4`(%rsp)
1402 aesenc $rndkey1,$inout4
1403 movdqa @tweak[5],`16*5`(%rsp)
1404 aesenc $rndkey1,$inout5
1405 pxor $twtmp,$twtmp
1406 pcmpgtd @tweak[5],$twtmp
1407 jmp .Lxts_enc_loop6_enter
1408
1409.align 16
1410.Lxts_enc_loop6:
1411 aesenc $rndkey1,$inout0
1412 aesenc $rndkey1,$inout1
1413 dec $rounds
1414 aesenc $rndkey1,$inout2
1415 aesenc $rndkey1,$inout3
1416 aesenc $rndkey1,$inout4
1417 aesenc $rndkey1,$inout5
1418.Lxts_enc_loop6_enter:
1419 $movkey 16($key),$rndkey1
1420 aesenc $rndkey0,$inout0
1421 aesenc $rndkey0,$inout1
1422 lea 32($key),$key
1423 aesenc $rndkey0,$inout2
1424 aesenc $rndkey0,$inout3
1425 aesenc $rndkey0,$inout4
1426 aesenc $rndkey0,$inout5
1427 $movkey ($key),$rndkey0
1428 jnz .Lxts_enc_loop6
1429
1430 pshufd \$0x13,$twtmp,$twres
1431 pxor $twtmp,$twtmp
1432 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1433 aesenc $rndkey1,$inout0
1434 pand $twmask,$twres # isolate carry and residue
1435 aesenc $rndkey1,$inout1
1436 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1437 aesenc $rndkey1,$inout2
1438 pxor $twres,@tweak[5]
1439 aesenc $rndkey1,$inout3
1440 aesenc $rndkey1,$inout4
1441 aesenc $rndkey1,$inout5
1442 $movkey 16($key),$rndkey1
1443
1444 pshufd \$0x13,$twtmp,$twres
1445 pxor $twtmp,$twtmp
1446 movdqa @tweak[5],@tweak[0]
1447 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1448 aesenc $rndkey0,$inout0
1449 pand $twmask,$twres # isolate carry and residue
1450 aesenc $rndkey0,$inout1
1451 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1452 aesenc $rndkey0,$inout2
1453 pxor $twres,@tweak[5]
1454 aesenc $rndkey0,$inout3
1455 aesenc $rndkey0,$inout4
1456 aesenc $rndkey0,$inout5
1457 $movkey 32($key),$rndkey0
1458
1459 pshufd \$0x13,$twtmp,$twres
1460 pxor $twtmp,$twtmp
1461 movdqa @tweak[5],@tweak[1]
1462 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1463 aesenc $rndkey1,$inout0
1464 pand $twmask,$twres # isolate carry and residue
1465 aesenc $rndkey1,$inout1
1466 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1467 aesenc $rndkey1,$inout2
1468 pxor $twres,@tweak[5]
1469 aesenc $rndkey1,$inout3
1470 aesenc $rndkey1,$inout4
1471 aesenc $rndkey1,$inout5
1472
1473 pshufd \$0x13,$twtmp,$twres
1474 pxor $twtmp,$twtmp
1475 movdqa @tweak[5],@tweak[2]
1476 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1477 aesenclast $rndkey0,$inout0
1478 pand $twmask,$twres # isolate carry and residue
1479 aesenclast $rndkey0,$inout1
1480 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1481 aesenclast $rndkey0,$inout2
1482 pxor $twres,@tweak[5]
1483 aesenclast $rndkey0,$inout3
1484 aesenclast $rndkey0,$inout4
1485 aesenclast $rndkey0,$inout5
1486
1487 pshufd \$0x13,$twtmp,$twres
1488 pxor $twtmp,$twtmp
1489 movdqa @tweak[5],@tweak[3]
1490 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1491 xorps `16*0`(%rsp),$inout0 # output^=tweak
1492 pand $twmask,$twres # isolate carry and residue
1493 xorps `16*1`(%rsp),$inout1
1494 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1495 pxor $twres,@tweak[5]
1496
1497 xorps `16*2`(%rsp),$inout2
1498 movups $inout0,`16*0`($out) # write output
1499 xorps `16*3`(%rsp),$inout3
1500 movups $inout1,`16*1`($out)
1501 xorps `16*4`(%rsp),$inout4
1502 movups $inout2,`16*2`($out)
1503 xorps `16*5`(%rsp),$inout5
1504 movups $inout3,`16*3`($out)
1505 mov $rnds_,$rounds # restore $rounds
1506 movups $inout4,`16*4`($out)
1507 movups $inout5,`16*5`($out)
1508 lea `16*6`($out),$out
1509 sub \$16*6,$len
1510 jnc .Lxts_enc_grandloop
1511
1512 lea 3($rounds,$rounds),$rounds # restore original value
1513 mov $key_,$key # restore $key
1514 mov $rounds,$rnds_ # backup $rounds
1515
1516.Lxts_enc_short:
1517 add \$16*6,$len
1518 jz .Lxts_enc_done
1519
1520 cmp \$0x20,$len
1521 jb .Lxts_enc_one
1522 je .Lxts_enc_two
1523
1524 cmp \$0x40,$len
1525 jb .Lxts_enc_three
1526 je .Lxts_enc_four
1527
1528 pshufd \$0x13,$twtmp,$twres
1529 movdqa @tweak[5],@tweak[4]
1530 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1531 movdqu ($inp),$inout0
1532 pand $twmask,$twres # isolate carry and residue
1533 movdqu 16*1($inp),$inout1
1534 pxor $twres,@tweak[5]
1535
1536 movdqu 16*2($inp),$inout2
1537 pxor @tweak[0],$inout0
1538 movdqu 16*3($inp),$inout3
1539 pxor @tweak[1],$inout1
1540 movdqu 16*4($inp),$inout4
1541 lea 16*5($inp),$inp
1542 pxor @tweak[2],$inout2
1543 pxor @tweak[3],$inout3
1544 pxor @tweak[4],$inout4
1545
1546 call _aesni_encrypt6
1547
1548 xorps @tweak[0],$inout0
1549 movdqa @tweak[5],@tweak[0]
1550 xorps @tweak[1],$inout1
1551 xorps @tweak[2],$inout2
1552 movdqu $inout0,($out)
1553 xorps @tweak[3],$inout3
1554 movdqu $inout1,16*1($out)
1555 xorps @tweak[4],$inout4
1556 movdqu $inout2,16*2($out)
1557 movdqu $inout3,16*3($out)
1558 movdqu $inout4,16*4($out)
1559 lea 16*5($out),$out
1560 jmp .Lxts_enc_done
1561
1562.align 16
1563.Lxts_enc_one:
1564 movups ($inp),$inout0
1565 lea 16*1($inp),$inp
1566 xorps @tweak[0],$inout0
1567___
1568 &aesni_generate1("enc",$key,$rounds);
1569$code.=<<___;
1570 xorps @tweak[0],$inout0
1571 movdqa @tweak[1],@tweak[0]
1572 movups $inout0,($out)
1573 lea 16*1($out),$out
1574 jmp .Lxts_enc_done
1575
1576.align 16
1577.Lxts_enc_two:
1578 movups ($inp),$inout0
1579 movups 16($inp),$inout1
1580 lea 32($inp),$inp
1581 xorps @tweak[0],$inout0
1582 xorps @tweak[1],$inout1
1583
1584 call _aesni_encrypt3
1585
1586 xorps @tweak[0],$inout0
1587 movdqa @tweak[2],@tweak[0]
1588 xorps @tweak[1],$inout1
1589 movups $inout0,($out)
1590 movups $inout1,16*1($out)
1591 lea 16*2($out),$out
1592 jmp .Lxts_enc_done
1593
1594.align 16
1595.Lxts_enc_three:
1596 movups ($inp),$inout0
1597 movups 16*1($inp),$inout1
1598 movups 16*2($inp),$inout2
1599 lea 16*3($inp),$inp
1600 xorps @tweak[0],$inout0
1601 xorps @tweak[1],$inout1
1602 xorps @tweak[2],$inout2
1603
1604 call _aesni_encrypt3
1605
1606 xorps @tweak[0],$inout0
1607 movdqa @tweak[3],@tweak[0]
1608 xorps @tweak[1],$inout1
1609 xorps @tweak[2],$inout2
1610 movups $inout0,($out)
1611 movups $inout1,16*1($out)
1612 movups $inout2,16*2($out)
1613 lea 16*3($out),$out
1614 jmp .Lxts_enc_done
1615
1616.align 16
1617.Lxts_enc_four:
1618 movups ($inp),$inout0
1619 movups 16*1($inp),$inout1
1620 movups 16*2($inp),$inout2
1621 xorps @tweak[0],$inout0
1622 movups 16*3($inp),$inout3
1623 lea 16*4($inp),$inp
1624 xorps @tweak[1],$inout1
1625 xorps @tweak[2],$inout2
1626 xorps @tweak[3],$inout3
1627
1628 call _aesni_encrypt4
1629
1630 xorps @tweak[0],$inout0
1631 movdqa @tweak[5],@tweak[0]
1632 xorps @tweak[1],$inout1
1633 xorps @tweak[2],$inout2
1634 movups $inout0,($out)
1635 xorps @tweak[3],$inout3
1636 movups $inout1,16*1($out)
1637 movups $inout2,16*2($out)
1638 movups $inout3,16*3($out)
1639 lea 16*4($out),$out
1640 jmp .Lxts_enc_done
1641
1642.align 16
1643.Lxts_enc_done:
1644 and \$15,$len_
1645 jz .Lxts_enc_ret
1646 mov $len_,$len
1647
1648.Lxts_enc_steal:
1649 movzb ($inp),%eax # borrow $rounds ...
1650 movzb -16($out),%ecx # ... and $key
1651 lea 1($inp),$inp
1652 mov %al,-16($out)
1653 mov %cl,0($out)
1654 lea 1($out),$out
1655 sub \$1,$len
1656 jnz .Lxts_enc_steal
1657
1658 sub $len_,$out # rewind $out
1659 mov $key_,$key # restore $key
1660 mov $rnds_,$rounds # restore $rounds
1661
1662 movups -16($out),$inout0
1663 xorps @tweak[0],$inout0
1664___
1665 &aesni_generate1("enc",$key,$rounds);
1666$code.=<<___;
1667 xorps @tweak[0],$inout0
1668 movups $inout0,-16($out)
1669
1670.Lxts_enc_ret:
1671___
1672$code.=<<___ if ($win64);
1673 movaps 0x60(%rsp),%xmm6
1674 movaps 0x70(%rsp),%xmm7
1675 movaps 0x80(%rsp),%xmm8
1676 movaps 0x90(%rsp),%xmm9
1677 movaps 0xa0(%rsp),%xmm10
1678 movaps 0xb0(%rsp),%xmm11
1679 movaps 0xc0(%rsp),%xmm12
1680 movaps 0xd0(%rsp),%xmm13
1681 movaps 0xe0(%rsp),%xmm14
1682 movaps 0xf0(%rsp),%xmm15
1683___
1684$code.=<<___;
1685 lea $frame_size(%rsp),%rsp
1686.Lxts_enc_epilogue:
1687 ret
1688.size aesni_xts_encrypt,.-aesni_xts_encrypt
1689___
1690
1691$code.=<<___;
1692.globl aesni_xts_decrypt
1693.type aesni_xts_decrypt,\@function,6
1694.align 16
1695aesni_xts_decrypt:
1696 lea -$frame_size(%rsp),%rsp
1697___
1698$code.=<<___ if ($win64);
1699 movaps %xmm6,0x60(%rsp)
1700 movaps %xmm7,0x70(%rsp)
1701 movaps %xmm8,0x80(%rsp)
1702 movaps %xmm9,0x90(%rsp)
1703 movaps %xmm10,0xa0(%rsp)
1704 movaps %xmm11,0xb0(%rsp)
1705 movaps %xmm12,0xc0(%rsp)
1706 movaps %xmm13,0xd0(%rsp)
1707 movaps %xmm14,0xe0(%rsp)
1708 movaps %xmm15,0xf0(%rsp)
1709.Lxts_dec_body:
1710___
1711$code.=<<___;
1712 movups ($ivp),@tweak[5] # load clear-text tweak
1713 mov 240($key2),$rounds # key2->rounds
1714 mov 240($key),$rnds_ # key1->rounds
1715___
1716 # generate the tweak
1717 &aesni_generate1("enc",$key2,$rounds,@tweak[5]);
1718$code.=<<___;
1719 xor %eax,%eax # if ($len%16) len-=16;
1720 test \$15,$len
1721 setnz %al
1722 shl \$4,%rax
1723 sub %rax,$len
1724
1725 mov $key,$key_ # backup $key
1726 mov $rnds_,$rounds # backup $rounds
1727 mov $len,$len_ # backup $len
1728 and \$-16,$len
1729
1730 movdqa .Lxts_magic(%rip),$twmask
1731 pxor $twtmp,$twtmp
1732 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1733___
1734 for ($i=0;$i<4;$i++) {
1735 $code.=<<___;
1736 pshufd \$0x13,$twtmp,$twres
1737 pxor $twtmp,$twtmp
1738 movdqa @tweak[5],@tweak[$i]
1739 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1740 pand $twmask,$twres # isolate carry and residue
1741 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1742 pxor $twres,@tweak[5]
1743___
1744 }
1745$code.=<<___;
1746 sub \$16*6,$len
1747 jc .Lxts_dec_short
1748
1749 shr \$1,$rounds
1750 sub \$1,$rounds
1751 mov $rounds,$rnds_
1752 jmp .Lxts_dec_grandloop
1753
1754.align 16
1755.Lxts_dec_grandloop:
1756 pshufd \$0x13,$twtmp,$twres
1757 movdqa @tweak[5],@tweak[4]
1758 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1759 movdqu `16*0`($inp),$inout0 # load input
1760 pand $twmask,$twres # isolate carry and residue
1761 movdqu `16*1`($inp),$inout1
1762 pxor $twres,@tweak[5]
1763
1764 movdqu `16*2`($inp),$inout2
1765 pxor @tweak[0],$inout0 # input^=tweak
1766 movdqu `16*3`($inp),$inout3
1767 pxor @tweak[1],$inout1
1768 movdqu `16*4`($inp),$inout4
1769 pxor @tweak[2],$inout2
1770 movdqu `16*5`($inp),$inout5
1771 lea `16*6`($inp),$inp
1772 pxor @tweak[3],$inout3
1773 $movkey ($key_),$rndkey0
1774 pxor @tweak[4],$inout4
1775 pxor @tweak[5],$inout5
1776
1777 # inline _aesni_decrypt6 and interleave first and last rounds
1778 # with own code...
1779 $movkey 16($key_),$rndkey1
1780 pxor $rndkey0,$inout0
1781 pxor $rndkey0,$inout1
1782 movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks
1783 aesdec $rndkey1,$inout0
1784 lea 32($key_),$key
1785 pxor $rndkey0,$inout2
1786 movdqa @tweak[1],`16*1`(%rsp)
1787 aesdec $rndkey1,$inout1
1788 pxor $rndkey0,$inout3
1789 movdqa @tweak[2],`16*2`(%rsp)
1790 aesdec $rndkey1,$inout2
1791 pxor $rndkey0,$inout4
1792 movdqa @tweak[3],`16*3`(%rsp)
1793 aesdec $rndkey1,$inout3
1794 pxor $rndkey0,$inout5
1795 $movkey ($key),$rndkey0
1796 dec $rounds
1797 movdqa @tweak[4],`16*4`(%rsp)
1798 aesdec $rndkey1,$inout4
1799 movdqa @tweak[5],`16*5`(%rsp)
1800 aesdec $rndkey1,$inout5
1801 pxor $twtmp,$twtmp
1802 pcmpgtd @tweak[5],$twtmp
1803 jmp .Lxts_dec_loop6_enter
1804
1805.align 16
1806.Lxts_dec_loop6:
1807 aesdec $rndkey1,$inout0
1808 aesdec $rndkey1,$inout1
1809 dec $rounds
1810 aesdec $rndkey1,$inout2
1811 aesdec $rndkey1,$inout3
1812 aesdec $rndkey1,$inout4
1813 aesdec $rndkey1,$inout5
1814.Lxts_dec_loop6_enter:
1815 $movkey 16($key),$rndkey1
1816 aesdec $rndkey0,$inout0
1817 aesdec $rndkey0,$inout1
1818 lea 32($key),$key
1819 aesdec $rndkey0,$inout2
1820 aesdec $rndkey0,$inout3
1821 aesdec $rndkey0,$inout4
1822 aesdec $rndkey0,$inout5
1823 $movkey ($key),$rndkey0
1824 jnz .Lxts_dec_loop6
1825
1826 pshufd \$0x13,$twtmp,$twres
1827 pxor $twtmp,$twtmp
1828 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1829 aesdec $rndkey1,$inout0
1830 pand $twmask,$twres # isolate carry and residue
1831 aesdec $rndkey1,$inout1
1832 pcmpgtd @tweak[5],$twtmp # broadcast upper bits
1833 aesdec $rndkey1,$inout2
1834 pxor $twres,@tweak[5]
1835 aesdec $rndkey1,$inout3
1836 aesdec $rndkey1,$inout4
1837 aesdec $rndkey1,$inout5
1838 $movkey 16($key),$rndkey1
1839
1840 pshufd \$0x13,$twtmp,$twres
1841 pxor $twtmp,$twtmp
1842 movdqa @tweak[5],@tweak[0]
1843 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1844 aesdec $rndkey0,$inout0
1845 pand $twmask,$twres # isolate carry and residue
1846 aesdec $rndkey0,$inout1
1847 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1848 aesdec $rndkey0,$inout2
1849 pxor $twres,@tweak[5]
1850 aesdec $rndkey0,$inout3
1851 aesdec $rndkey0,$inout4
1852 aesdec $rndkey0,$inout5
1853 $movkey 32($key),$rndkey0
1854
1855 pshufd \$0x13,$twtmp,$twres
1856 pxor $twtmp,$twtmp
1857 movdqa @tweak[5],@tweak[1]
1858 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1859 aesdec $rndkey1,$inout0
1860 pand $twmask,$twres # isolate carry and residue
1861 aesdec $rndkey1,$inout1
1862 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1863 aesdec $rndkey1,$inout2
1864 pxor $twres,@tweak[5]
1865 aesdec $rndkey1,$inout3
1866 aesdec $rndkey1,$inout4
1867 aesdec $rndkey1,$inout5
1868
1869 pshufd \$0x13,$twtmp,$twres
1870 pxor $twtmp,$twtmp
1871 movdqa @tweak[5],@tweak[2]
1872 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1873 aesdeclast $rndkey0,$inout0
1874 pand $twmask,$twres # isolate carry and residue
1875 aesdeclast $rndkey0,$inout1
1876 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1877 aesdeclast $rndkey0,$inout2
1878 pxor $twres,@tweak[5]
1879 aesdeclast $rndkey0,$inout3
1880 aesdeclast $rndkey0,$inout4
1881 aesdeclast $rndkey0,$inout5
1882
1883 pshufd \$0x13,$twtmp,$twres
1884 pxor $twtmp,$twtmp
1885 movdqa @tweak[5],@tweak[3]
1886 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1887 xorps `16*0`(%rsp),$inout0 # output^=tweak
1888 pand $twmask,$twres # isolate carry and residue
1889 xorps `16*1`(%rsp),$inout1
1890 pcmpgtd @tweak[5],$twtmp # broadcat upper bits
1891 pxor $twres,@tweak[5]
1892
1893 xorps `16*2`(%rsp),$inout2
1894 movups $inout0,`16*0`($out) # write output
1895 xorps `16*3`(%rsp),$inout3
1896 movups $inout1,`16*1`($out)
1897 xorps `16*4`(%rsp),$inout4
1898 movups $inout2,`16*2`($out)
1899 xorps `16*5`(%rsp),$inout5
1900 movups $inout3,`16*3`($out)
1901 mov $rnds_,$rounds # restore $rounds
1902 movups $inout4,`16*4`($out)
1903 movups $inout5,`16*5`($out)
1904 lea `16*6`($out),$out
1905 sub \$16*6,$len
1906 jnc .Lxts_dec_grandloop
1907
1908 lea 3($rounds,$rounds),$rounds # restore original value
1909 mov $key_,$key # restore $key
1910 mov $rounds,$rnds_ # backup $rounds
1911
1912.Lxts_dec_short:
1913 add \$16*6,$len
1914 jz .Lxts_dec_done
1915
1916 cmp \$0x20,$len
1917 jb .Lxts_dec_one
1918 je .Lxts_dec_two
1919
1920 cmp \$0x40,$len
1921 jb .Lxts_dec_three
1922 je .Lxts_dec_four
1923
1924 pshufd \$0x13,$twtmp,$twres
1925 movdqa @tweak[5],@tweak[4]
1926 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1927 movdqu ($inp),$inout0
1928 pand $twmask,$twres # isolate carry and residue
1929 movdqu 16*1($inp),$inout1
1930 pxor $twres,@tweak[5]
1931
1932 movdqu 16*2($inp),$inout2
1933 pxor @tweak[0],$inout0
1934 movdqu 16*3($inp),$inout3
1935 pxor @tweak[1],$inout1
1936 movdqu 16*4($inp),$inout4
1937 lea 16*5($inp),$inp
1938 pxor @tweak[2],$inout2
1939 pxor @tweak[3],$inout3
1940 pxor @tweak[4],$inout4
1941
1942 call _aesni_decrypt6
1943
1944 xorps @tweak[0],$inout0
1945 xorps @tweak[1],$inout1
1946 xorps @tweak[2],$inout2
1947 movdqu $inout0,($out)
1948 xorps @tweak[3],$inout3
1949 movdqu $inout1,16*1($out)
1950 xorps @tweak[4],$inout4
1951 movdqu $inout2,16*2($out)
1952 pxor $twtmp,$twtmp
1953 movdqu $inout3,16*3($out)
1954 pcmpgtd @tweak[5],$twtmp
1955 movdqu $inout4,16*4($out)
1956 lea 16*5($out),$out
1957 pshufd \$0x13,$twtmp,@tweak[1] # $twres
1958 and \$15,$len_
1959 jz .Lxts_dec_ret
1960
1961 movdqa @tweak[5],@tweak[0]
1962 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
1963 pand $twmask,@tweak[1] # isolate carry and residue
1964 pxor @tweak[5],@tweak[1]
1965 jmp .Lxts_dec_done2
1966
1967.align 16
1968.Lxts_dec_one:
1969 movups ($inp),$inout0
1970 lea 16*1($inp),$inp
1971 xorps @tweak[0],$inout0
1972___
1973 &aesni_generate1("dec",$key,$rounds);
1974$code.=<<___;
1975 xorps @tweak[0],$inout0
1976 movdqa @tweak[1],@tweak[0]
1977 movups $inout0,($out)
1978 movdqa @tweak[2],@tweak[1]
1979 lea 16*1($out),$out
1980 jmp .Lxts_dec_done
1981
1982.align 16
1983.Lxts_dec_two:
1984 movups ($inp),$inout0
1985 movups 16($inp),$inout1
1986 lea 32($inp),$inp
1987 xorps @tweak[0],$inout0
1988 xorps @tweak[1],$inout1
1989
1990 call _aesni_decrypt3
1991
1992 xorps @tweak[0],$inout0
1993 movdqa @tweak[2],@tweak[0]
1994 xorps @tweak[1],$inout1
1995 movdqa @tweak[3],@tweak[1]
1996 movups $inout0,($out)
1997 movups $inout1,16*1($out)
1998 lea 16*2($out),$out
1999 jmp .Lxts_dec_done
2000
2001.align 16
2002.Lxts_dec_three:
2003 movups ($inp),$inout0
2004 movups 16*1($inp),$inout1
2005 movups 16*2($inp),$inout2
2006 lea 16*3($inp),$inp
2007 xorps @tweak[0],$inout0
2008 xorps @tweak[1],$inout1
2009 xorps @tweak[2],$inout2
2010
2011 call _aesni_decrypt3
2012
2013 xorps @tweak[0],$inout0
2014 movdqa @tweak[3],@tweak[0]
2015 xorps @tweak[1],$inout1
2016 movdqa @tweak[5],@tweak[1]
2017 xorps @tweak[2],$inout2
2018 movups $inout0,($out)
2019 movups $inout1,16*1($out)
2020 movups $inout2,16*2($out)
2021 lea 16*3($out),$out
2022 jmp .Lxts_dec_done
2023
2024.align 16
2025.Lxts_dec_four:
2026 pshufd \$0x13,$twtmp,$twres
2027 movdqa @tweak[5],@tweak[4]
2028 paddq @tweak[5],@tweak[5] # psllq 1,$tweak
2029 movups ($inp),$inout0
2030 pand $twmask,$twres # isolate carry and residue
2031 movups 16*1($inp),$inout1
2032 pxor $twres,@tweak[5]
2033
2034 movups 16*2($inp),$inout2
2035 xorps @tweak[0],$inout0
2036 movups 16*3($inp),$inout3
2037 lea 16*4($inp),$inp
2038 xorps @tweak[1],$inout1
2039 xorps @tweak[2],$inout2
2040 xorps @tweak[3],$inout3
2041
2042 call _aesni_decrypt4
2043
2044 xorps @tweak[0],$inout0
2045 movdqa @tweak[4],@tweak[0]
2046 xorps @tweak[1],$inout1
2047 movdqa @tweak[5],@tweak[1]
2048 xorps @tweak[2],$inout2
2049 movups $inout0,($out)
2050 xorps @tweak[3],$inout3
2051 movups $inout1,16*1($out)
2052 movups $inout2,16*2($out)
2053 movups $inout3,16*3($out)
2054 lea 16*4($out),$out
2055 jmp .Lxts_dec_done
2056
2057.align 16
2058.Lxts_dec_done:
2059 and \$15,$len_
2060 jz .Lxts_dec_ret
2061.Lxts_dec_done2:
2062 mov $len_,$len
2063 mov $key_,$key # restore $key
2064 mov $rnds_,$rounds # restore $rounds
2065
2066 movups ($inp),$inout0
2067 xorps @tweak[1],$inout0
2068___
2069 &aesni_generate1("dec",$key,$rounds);
2070$code.=<<___;
2071 xorps @tweak[1],$inout0
2072 movups $inout0,($out)
2073
2074.Lxts_dec_steal:
2075 movzb 16($inp),%eax # borrow $rounds ...
2076 movzb ($out),%ecx # ... and $key
2077 lea 1($inp),$inp
2078 mov %al,($out)
2079 mov %cl,16($out)
2080 lea 1($out),$out
2081 sub \$1,$len
2082 jnz .Lxts_dec_steal
2083
2084 sub $len_,$out # rewind $out
2085 mov $key_,$key # restore $key
2086 mov $rnds_,$rounds # restore $rounds
2087
2088 movups ($out),$inout0
2089 xorps @tweak[0],$inout0
2090___
2091 &aesni_generate1("dec",$key,$rounds);
2092$code.=<<___;
2093 xorps @tweak[0],$inout0
2094 movups $inout0,($out)
2095
2096.Lxts_dec_ret:
2097___
2098$code.=<<___ if ($win64);
2099 movaps 0x60(%rsp),%xmm6
2100 movaps 0x70(%rsp),%xmm7
2101 movaps 0x80(%rsp),%xmm8
2102 movaps 0x90(%rsp),%xmm9
2103 movaps 0xa0(%rsp),%xmm10
2104 movaps 0xb0(%rsp),%xmm11
2105 movaps 0xc0(%rsp),%xmm12
2106 movaps 0xd0(%rsp),%xmm13
2107 movaps 0xe0(%rsp),%xmm14
2108 movaps 0xf0(%rsp),%xmm15
2109___
2110$code.=<<___;
2111 lea $frame_size(%rsp),%rsp
2112.Lxts_dec_epilogue:
2113 ret
2114.size aesni_xts_decrypt,.-aesni_xts_decrypt
2115___
2116} }}
2117
2118########################################################################
362# void $PREFIX_cbc_encrypt (const void *inp, void *out, 2119# void $PREFIX_cbc_encrypt (const void *inp, void *out,
363# size_t length, const AES_KEY *key, 2120# size_t length, const AES_KEY *key,
364# unsigned char *ivp,const int enc); 2121# unsigned char *ivp,const int enc);
365$reserved = $win64?0x40:-0x18; # used in decrypt 2122{
2123my $reserved = $win64?0x40:-0x18; # used in decrypt
366$code.=<<___; 2124$code.=<<___;
367.globl ${PREFIX}_cbc_encrypt 2125.globl ${PREFIX}_cbc_encrypt
368.type ${PREFIX}_cbc_encrypt,\@function,6 2126.type ${PREFIX}_cbc_encrypt,\@function,6
@@ -371,30 +2129,30 @@ ${PREFIX}_cbc_encrypt:
371 test $len,$len # check length 2129 test $len,$len # check length
372 jz .Lcbc_ret 2130 jz .Lcbc_ret
373 2131
374 mov 240($key),$rnds_ # pull $rounds 2132 mov 240($key),$rnds_ # key->rounds
375 mov $key,$key_ # backup $key 2133 mov $key,$key_ # backup $key
376 test %r9d,%r9d # 6th argument 2134 test %r9d,%r9d # 6th argument
377 jz .Lcbc_decrypt 2135 jz .Lcbc_decrypt
378#--------------------------- CBC ENCRYPT ------------------------------# 2136#--------------------------- CBC ENCRYPT ------------------------------#
379 movups ($ivp),$inout0 # load iv as initial state 2137 movups ($ivp),$inout0 # load iv as initial state
380 cmp \$16,$len
381 mov $rnds_,$rounds 2138 mov $rnds_,$rounds
2139 cmp \$16,$len
382 jb .Lcbc_enc_tail 2140 jb .Lcbc_enc_tail
383 sub \$16,$len 2141 sub \$16,$len
384 jmp .Lcbc_enc_loop 2142 jmp .Lcbc_enc_loop
385.align 16 2143.align 16
386.Lcbc_enc_loop: 2144.Lcbc_enc_loop:
387 movups ($inp),$inout1 # load input 2145 movups ($inp),$inout1 # load input
388 lea 16($inp),$inp 2146 lea 16($inp),$inp
389 pxor $inout1,$inout0 2147 #xorps $inout1,$inout0
390___ 2148___
391 &aesni_generate1("enc",$key,$rounds); 2149 &aesni_generate1("enc",$key,$rounds,$inout0,$inout1);
392$code.=<<___; 2150$code.=<<___;
393 sub \$16,$len
394 lea 16($out),$out
395 mov $rnds_,$rounds # restore $rounds 2151 mov $rnds_,$rounds # restore $rounds
396 mov $key_,$key # restore $key 2152 mov $key_,$key # restore $key
397 movups $inout0,-16($out) # store output 2153 movups $inout0,0($out) # store output
2154 lea 16($out),$out
2155 sub \$16,$len
398 jnc .Lcbc_enc_loop 2156 jnc .Lcbc_enc_loop
399 add \$16,$len 2157 add \$16,$len
400 jnz .Lcbc_enc_tail 2158 jnz .Lcbc_enc_tail
@@ -429,92 +2187,238 @@ $code.=<<___ if ($win64);
429___ 2187___
430$code.=<<___; 2188$code.=<<___;
431 movups ($ivp),$iv 2189 movups ($ivp),$iv
432 sub \$0x40,$len
433 mov $rnds_,$rounds 2190 mov $rnds_,$rounds
2191 cmp \$0x70,$len
434 jbe .Lcbc_dec_tail 2192 jbe .Lcbc_dec_tail
435 jmp .Lcbc_dec_loop3 2193 shr \$1,$rnds_
436.align 16 2194 sub \$0x70,$len
437.Lcbc_dec_loop3: 2195 mov $rnds_,$rounds
438 movups ($inp),$inout0 2196 movaps $iv,$reserved(%rsp)
2197 jmp .Lcbc_dec_loop8_enter
2198.align 16
2199.Lcbc_dec_loop8:
2200 movaps $rndkey0,$reserved(%rsp) # save IV
2201 movups $inout7,($out)
2202 lea 0x10($out),$out
2203.Lcbc_dec_loop8_enter:
2204 $movkey ($key),$rndkey0
2205 movups ($inp),$inout0 # load input
439 movups 0x10($inp),$inout1 2206 movups 0x10($inp),$inout1
440 movups 0x20($inp),$inout2 2207 $movkey 16($key),$rndkey1
441 movaps $inout0,$in0
442 movaps $inout1,$in1
443 movaps $inout2,$in2
444 call _aesni_decrypt3
445 sub \$0x30,$len
446 lea 0x30($inp),$inp
447 lea 0x30($out),$out
448 pxor $iv,$inout0
449 pxor $in0,$inout1
450 movaps $in2,$iv
451 pxor $in1,$inout2
452 movups $inout0,-0x30($out)
453 mov $rnds_,$rounds # restore $rounds
454 movups $inout1,-0x20($out)
455 mov $key_,$key # restore $key
456 movups $inout2,-0x10($out)
457 ja .Lcbc_dec_loop3
458 2208
459.Lcbc_dec_tail: 2209 lea 32($key),$key
460 add \$0x40,$len 2210 movdqu 0x20($inp),$inout2
461 movups $iv,($ivp) 2211 xorps $rndkey0,$inout0
462 jz .Lcbc_dec_ret 2212 movdqu 0x30($inp),$inout3
2213 xorps $rndkey0,$inout1
2214 movdqu 0x40($inp),$inout4
2215 aesdec $rndkey1,$inout0
2216 pxor $rndkey0,$inout2
2217 movdqu 0x50($inp),$inout5
2218 aesdec $rndkey1,$inout1
2219 pxor $rndkey0,$inout3
2220 movdqu 0x60($inp),$inout6
2221 aesdec $rndkey1,$inout2
2222 pxor $rndkey0,$inout4
2223 movdqu 0x70($inp),$inout7
2224 aesdec $rndkey1,$inout3
2225 pxor $rndkey0,$inout5
2226 dec $rounds
2227 aesdec $rndkey1,$inout4
2228 pxor $rndkey0,$inout6
2229 aesdec $rndkey1,$inout5
2230 pxor $rndkey0,$inout7
2231 $movkey ($key),$rndkey0
2232 aesdec $rndkey1,$inout6
2233 aesdec $rndkey1,$inout7
2234 $movkey 16($key),$rndkey1
463 2235
2236 call .Ldec_loop8_enter
2237
2238 movups ($inp),$rndkey1 # re-load input
2239 movups 0x10($inp),$rndkey0
2240 xorps $reserved(%rsp),$inout0 # ^= IV
2241 xorps $rndkey1,$inout1
2242 movups 0x20($inp),$rndkey1
2243 xorps $rndkey0,$inout2
2244 movups 0x30($inp),$rndkey0
2245 xorps $rndkey1,$inout3
2246 movups 0x40($inp),$rndkey1
2247 xorps $rndkey0,$inout4
2248 movups 0x50($inp),$rndkey0
2249 xorps $rndkey1,$inout5
2250 movups 0x60($inp),$rndkey1
2251 xorps $rndkey0,$inout6
2252 movups 0x70($inp),$rndkey0 # IV
2253 xorps $rndkey1,$inout7
2254 movups $inout0,($out)
2255 movups $inout1,0x10($out)
2256 movups $inout2,0x20($out)
2257 movups $inout3,0x30($out)
2258 mov $rnds_,$rounds # restore $rounds
2259 movups $inout4,0x40($out)
2260 mov $key_,$key # restore $key
2261 movups $inout5,0x50($out)
2262 lea 0x80($inp),$inp
2263 movups $inout6,0x60($out)
2264 lea 0x70($out),$out
2265 sub \$0x80,$len
2266 ja .Lcbc_dec_loop8
2267
2268 movaps $inout7,$inout0
2269 movaps $rndkey0,$iv
2270 add \$0x70,$len
2271 jle .Lcbc_dec_tail_collected
2272 movups $inout0,($out)
2273 lea 1($rnds_,$rnds_),$rounds
2274 lea 0x10($out),$out
2275.Lcbc_dec_tail:
464 movups ($inp),$inout0 2276 movups ($inp),$inout0
465 cmp \$0x10,$len
466 movaps $inout0,$in0 2277 movaps $inout0,$in0
2278 cmp \$0x10,$len
467 jbe .Lcbc_dec_one 2279 jbe .Lcbc_dec_one
2280
468 movups 0x10($inp),$inout1 2281 movups 0x10($inp),$inout1
469 cmp \$0x20,$len
470 movaps $inout1,$in1 2282 movaps $inout1,$in1
2283 cmp \$0x20,$len
471 jbe .Lcbc_dec_two 2284 jbe .Lcbc_dec_two
2285
472 movups 0x20($inp),$inout2 2286 movups 0x20($inp),$inout2
473 cmp \$0x30,$len
474 movaps $inout2,$in2 2287 movaps $inout2,$in2
2288 cmp \$0x30,$len
475 jbe .Lcbc_dec_three 2289 jbe .Lcbc_dec_three
2290
476 movups 0x30($inp),$inout3 2291 movups 0x30($inp),$inout3
477 call _aesni_decrypt4 2292 cmp \$0x40,$len
478 pxor $iv,$inout0 2293 jbe .Lcbc_dec_four
479 movups 0x30($inp),$iv 2294
480 pxor $in0,$inout1 2295 movups 0x40($inp),$inout4
2296 cmp \$0x50,$len
2297 jbe .Lcbc_dec_five
2298
2299 movups 0x50($inp),$inout5
2300 cmp \$0x60,$len
2301 jbe .Lcbc_dec_six
2302
2303 movups 0x60($inp),$inout6
2304 movaps $iv,$reserved(%rsp) # save IV
2305 call _aesni_decrypt8
2306 movups ($inp),$rndkey1
2307 movups 0x10($inp),$rndkey0
2308 xorps $reserved(%rsp),$inout0 # ^= IV
2309 xorps $rndkey1,$inout1
2310 movups 0x20($inp),$rndkey1
2311 xorps $rndkey0,$inout2
2312 movups 0x30($inp),$rndkey0
2313 xorps $rndkey1,$inout3
2314 movups 0x40($inp),$rndkey1
2315 xorps $rndkey0,$inout4
2316 movups 0x50($inp),$rndkey0
2317 xorps $rndkey1,$inout5
2318 movups 0x60($inp),$iv # IV
2319 xorps $rndkey0,$inout6
481 movups $inout0,($out) 2320 movups $inout0,($out)
482 pxor $in1,$inout2
483 movups $inout1,0x10($out) 2321 movups $inout1,0x10($out)
484 pxor $in2,$inout3
485 movups $inout2,0x20($out) 2322 movups $inout2,0x20($out)
486 movaps $inout3,$inout0 2323 movups $inout3,0x30($out)
487 lea 0x30($out),$out 2324 movups $inout4,0x40($out)
2325 movups $inout5,0x50($out)
2326 lea 0x60($out),$out
2327 movaps $inout6,$inout0
2328 sub \$0x70,$len
488 jmp .Lcbc_dec_tail_collected 2329 jmp .Lcbc_dec_tail_collected
489.align 16 2330.align 16
490.Lcbc_dec_one: 2331.Lcbc_dec_one:
491___ 2332___
492 &aesni_generate1("dec",$key,$rounds); 2333 &aesni_generate1("dec",$key,$rounds);
493$code.=<<___; 2334$code.=<<___;
494 pxor $iv,$inout0 2335 xorps $iv,$inout0
495 movaps $in0,$iv 2336 movaps $in0,$iv
2337 sub \$0x10,$len
496 jmp .Lcbc_dec_tail_collected 2338 jmp .Lcbc_dec_tail_collected
497.align 16 2339.align 16
498.Lcbc_dec_two: 2340.Lcbc_dec_two:
2341 xorps $inout2,$inout2
499 call _aesni_decrypt3 2342 call _aesni_decrypt3
500 pxor $iv,$inout0 2343 xorps $iv,$inout0
501 pxor $in0,$inout1 2344 xorps $in0,$inout1
502 movups $inout0,($out) 2345 movups $inout0,($out)
503 movaps $in1,$iv 2346 movaps $in1,$iv
504 movaps $inout1,$inout0 2347 movaps $inout1,$inout0
505 lea 0x10($out),$out 2348 lea 0x10($out),$out
2349 sub \$0x20,$len
506 jmp .Lcbc_dec_tail_collected 2350 jmp .Lcbc_dec_tail_collected
507.align 16 2351.align 16
508.Lcbc_dec_three: 2352.Lcbc_dec_three:
509 call _aesni_decrypt3 2353 call _aesni_decrypt3
510 pxor $iv,$inout0 2354 xorps $iv,$inout0
511 pxor $in0,$inout1 2355 xorps $in0,$inout1
512 movups $inout0,($out) 2356 movups $inout0,($out)
513 pxor $in1,$inout2 2357 xorps $in1,$inout2
514 movups $inout1,0x10($out) 2358 movups $inout1,0x10($out)
515 movaps $in2,$iv 2359 movaps $in2,$iv
516 movaps $inout2,$inout0 2360 movaps $inout2,$inout0
517 lea 0x20($out),$out 2361 lea 0x20($out),$out
2362 sub \$0x30,$len
2363 jmp .Lcbc_dec_tail_collected
2364.align 16
2365.Lcbc_dec_four:
2366 call _aesni_decrypt4
2367 xorps $iv,$inout0
2368 movups 0x30($inp),$iv
2369 xorps $in0,$inout1
2370 movups $inout0,($out)
2371 xorps $in1,$inout2
2372 movups $inout1,0x10($out)
2373 xorps $in2,$inout3
2374 movups $inout2,0x20($out)
2375 movaps $inout3,$inout0
2376 lea 0x30($out),$out
2377 sub \$0x40,$len
2378 jmp .Lcbc_dec_tail_collected
2379.align 16
2380.Lcbc_dec_five:
2381 xorps $inout5,$inout5
2382 call _aesni_decrypt6
2383 movups 0x10($inp),$rndkey1
2384 movups 0x20($inp),$rndkey0
2385 xorps $iv,$inout0
2386 xorps $in0,$inout1
2387 xorps $rndkey1,$inout2
2388 movups 0x30($inp),$rndkey1
2389 xorps $rndkey0,$inout3
2390 movups 0x40($inp),$iv
2391 xorps $rndkey1,$inout4
2392 movups $inout0,($out)
2393 movups $inout1,0x10($out)
2394 movups $inout2,0x20($out)
2395 movups $inout3,0x30($out)
2396 lea 0x40($out),$out
2397 movaps $inout4,$inout0
2398 sub \$0x50,$len
2399 jmp .Lcbc_dec_tail_collected
2400.align 16
2401.Lcbc_dec_six:
2402 call _aesni_decrypt6
2403 movups 0x10($inp),$rndkey1
2404 movups 0x20($inp),$rndkey0
2405 xorps $iv,$inout0
2406 xorps $in0,$inout1
2407 xorps $rndkey1,$inout2
2408 movups 0x30($inp),$rndkey1
2409 xorps $rndkey0,$inout3
2410 movups 0x40($inp),$rndkey0
2411 xorps $rndkey1,$inout4
2412 movups 0x50($inp),$iv
2413 xorps $rndkey0,$inout5
2414 movups $inout0,($out)
2415 movups $inout1,0x10($out)
2416 movups $inout2,0x20($out)
2417 movups $inout3,0x30($out)
2418 movups $inout4,0x40($out)
2419 lea 0x50($out),$out
2420 movaps $inout5,$inout0
2421 sub \$0x60,$len
518 jmp .Lcbc_dec_tail_collected 2422 jmp .Lcbc_dec_tail_collected
519.align 16 2423.align 16
520.Lcbc_dec_tail_collected: 2424.Lcbc_dec_tail_collected:
@@ -523,10 +2427,12 @@ $code.=<<___;
523 jnz .Lcbc_dec_tail_partial 2427 jnz .Lcbc_dec_tail_partial
524 movups $inout0,($out) 2428 movups $inout0,($out)
525 jmp .Lcbc_dec_ret 2429 jmp .Lcbc_dec_ret
2430.align 16
526.Lcbc_dec_tail_partial: 2431.Lcbc_dec_tail_partial:
527 movaps $inout0,$reserved(%rsp) 2432 movaps $inout0,$reserved(%rsp)
2433 mov \$16,%rcx
528 mov $out,%rdi 2434 mov $out,%rdi
529 mov $len,%rcx 2435 sub $len,%rcx
530 lea $reserved(%rsp),%rsi 2436 lea $reserved(%rsp),%rsi
531 .long 0x9066A4F3 # rep movsb 2437 .long 0x9066A4F3 # rep movsb
532 2438
@@ -544,7 +2450,7 @@ $code.=<<___;
544 ret 2450 ret
545.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt 2451.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
546___ 2452___
547 2453}
548# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, 2454# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey,
549# int bits, AES_KEY *key) 2455# int bits, AES_KEY *key)
550{ my ($inp,$bits,$key) = @_4args; 2456{ my ($inp,$bits,$key) = @_4args;
@@ -556,7 +2462,7 @@ $code.=<<___;
556.align 16 2462.align 16
557${PREFIX}_set_decrypt_key: 2463${PREFIX}_set_decrypt_key:
558 .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 2464 .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
559 call _aesni_set_encrypt_key 2465 call __aesni_set_encrypt_key
560 shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key 2466 shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key
561 test %eax,%eax 2467 test %eax,%eax
562 jnz .Ldec_key_ret 2468 jnz .Ldec_key_ret
@@ -576,9 +2482,9 @@ ${PREFIX}_set_decrypt_key:
576 aesimc %xmm1,%xmm1 2482 aesimc %xmm1,%xmm1
577 lea 16($key),$key 2483 lea 16($key),$key
578 lea -16($inp),$inp 2484 lea -16($inp),$inp
579 cmp $key,$inp
580 $movkey %xmm0,16($inp) 2485 $movkey %xmm0,16($inp)
581 $movkey %xmm1,-16($key) 2486 $movkey %xmm1,-16($key)
2487 cmp $key,$inp
582 ja .Ldec_key_inverse 2488 ja .Ldec_key_inverse
583 2489
584 $movkey ($key),%xmm0 # inverse middle 2490 $movkey ($key),%xmm0 # inverse middle
@@ -605,16 +2511,16 @@ $code.=<<___;
605.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent 2511.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent
606.align 16 2512.align 16
607${PREFIX}_set_encrypt_key: 2513${PREFIX}_set_encrypt_key:
608_aesni_set_encrypt_key: 2514__aesni_set_encrypt_key:
609 .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 2515 .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
610 test $inp,$inp
611 mov \$-1,%rax 2516 mov \$-1,%rax
2517 test $inp,$inp
612 jz .Lenc_key_ret 2518 jz .Lenc_key_ret
613 test $key,$key 2519 test $key,$key
614 jz .Lenc_key_ret 2520 jz .Lenc_key_ret
615 2521
616 movups ($inp),%xmm0 # pull first 128 bits of *userKey 2522 movups ($inp),%xmm0 # pull first 128 bits of *userKey
617 pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0 2523 xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0
618 lea 16($key),%rax 2524 lea 16($key),%rax
619 cmp \$256,$bits 2525 cmp \$256,$bits
620 je .L14rounds 2526 je .L14rounds
@@ -729,11 +2635,11 @@ _aesni_set_encrypt_key:
729 lea 16(%rax),%rax 2635 lea 16(%rax),%rax
730.Lkey_expansion_128_cold: 2636.Lkey_expansion_128_cold:
731 shufps \$0b00010000,%xmm0,%xmm4 2637 shufps \$0b00010000,%xmm0,%xmm4
732 pxor %xmm4, %xmm0 2638 xorps %xmm4, %xmm0
733 shufps \$0b10001100,%xmm0,%xmm4 2639 shufps \$0b10001100,%xmm0,%xmm4
734 pxor %xmm4, %xmm0 2640 xorps %xmm4, %xmm0
735 pshufd \$0b11111111,%xmm1,%xmm1 # critical path 2641 shufps \$0b11111111,%xmm1,%xmm1 # critical path
736 pxor %xmm1,%xmm0 2642 xorps %xmm1,%xmm0
737 ret 2643 ret
738 2644
739.align 16 2645.align 16
@@ -744,11 +2650,11 @@ _aesni_set_encrypt_key:
744 movaps %xmm2, %xmm5 2650 movaps %xmm2, %xmm5
745.Lkey_expansion_192b_warm: 2651.Lkey_expansion_192b_warm:
746 shufps \$0b00010000,%xmm0,%xmm4 2652 shufps \$0b00010000,%xmm0,%xmm4
747 movaps %xmm2,%xmm3 2653 movdqa %xmm2,%xmm3
748 pxor %xmm4,%xmm0 2654 xorps %xmm4,%xmm0
749 shufps \$0b10001100,%xmm0,%xmm4 2655 shufps \$0b10001100,%xmm0,%xmm4
750 pslldq \$4,%xmm3 2656 pslldq \$4,%xmm3
751 pxor %xmm4,%xmm0 2657 xorps %xmm4,%xmm0
752 pshufd \$0b01010101,%xmm1,%xmm1 # critical path 2658 pshufd \$0b01010101,%xmm1,%xmm1 # critical path
753 pxor %xmm3,%xmm2 2659 pxor %xmm3,%xmm2
754 pxor %xmm1,%xmm0 2660 pxor %xmm1,%xmm0
@@ -772,11 +2678,11 @@ _aesni_set_encrypt_key:
772 lea 16(%rax),%rax 2678 lea 16(%rax),%rax
773.Lkey_expansion_256a_cold: 2679.Lkey_expansion_256a_cold:
774 shufps \$0b00010000,%xmm0,%xmm4 2680 shufps \$0b00010000,%xmm0,%xmm4
775 pxor %xmm4,%xmm0 2681 xorps %xmm4,%xmm0
776 shufps \$0b10001100,%xmm0,%xmm4 2682 shufps \$0b10001100,%xmm0,%xmm4
777 pxor %xmm4,%xmm0 2683 xorps %xmm4,%xmm0
778 pshufd \$0b11111111,%xmm1,%xmm1 # critical path 2684 shufps \$0b11111111,%xmm1,%xmm1 # critical path
779 pxor %xmm1,%xmm0 2685 xorps %xmm1,%xmm0
780 ret 2686 ret
781 2687
782.align 16 2688.align 16
@@ -785,17 +2691,28 @@ _aesni_set_encrypt_key:
785 lea 16(%rax),%rax 2691 lea 16(%rax),%rax
786 2692
787 shufps \$0b00010000,%xmm2,%xmm4 2693 shufps \$0b00010000,%xmm2,%xmm4
788 pxor %xmm4,%xmm2 2694 xorps %xmm4,%xmm2
789 shufps \$0b10001100,%xmm2,%xmm4 2695 shufps \$0b10001100,%xmm2,%xmm4
790 pxor %xmm4,%xmm2 2696 xorps %xmm4,%xmm2
791 pshufd \$0b10101010,%xmm1,%xmm1 # critical path 2697 shufps \$0b10101010,%xmm1,%xmm1 # critical path
792 pxor %xmm1,%xmm2 2698 xorps %xmm1,%xmm2
793 ret 2699 ret
794.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key 2700.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
2701.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
795___ 2702___
796} 2703}
797 2704
798$code.=<<___; 2705$code.=<<___;
2706.align 64
2707.Lbswap_mask:
2708 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2709.Lincrement32:
2710 .long 6,6,6,0
2711.Lincrement64:
2712 .long 1,0,0,0
2713.Lxts_magic:
2714 .long 0x87,0,1,0
2715
799.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>" 2716.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
800.align 64 2717.align 64
801___ 2718___
@@ -810,9 +2727,11 @@ $disp="%r9";
810 2727
811$code.=<<___; 2728$code.=<<___;
812.extern __imp_RtlVirtualUnwind 2729.extern __imp_RtlVirtualUnwind
813.type cbc_se_handler,\@abi-omnipotent 2730___
2731$code.=<<___ if ($PREFIX eq "aesni");
2732.type ecb_se_handler,\@abi-omnipotent
814.align 16 2733.align 16
815cbc_se_handler: 2734ecb_se_handler:
816 push %rsi 2735 push %rsi
817 push %rdi 2736 push %rdi
818 push %rbx 2737 push %rbx
@@ -825,42 +2744,132 @@ cbc_se_handler:
825 sub \$64,%rsp 2744 sub \$64,%rsp
826 2745
827 mov 152($context),%rax # pull context->Rsp 2746 mov 152($context),%rax # pull context->Rsp
2747
2748 jmp .Lcommon_seh_tail
2749.size ecb_se_handler,.-ecb_se_handler
2750
2751.type ccm64_se_handler,\@abi-omnipotent
2752.align 16
2753ccm64_se_handler:
2754 push %rsi
2755 push %rdi
2756 push %rbx
2757 push %rbp
2758 push %r12
2759 push %r13
2760 push %r14
2761 push %r15
2762 pushfq
2763 sub \$64,%rsp
2764
2765 mov 120($context),%rax # pull context->Rax
828 mov 248($context),%rbx # pull context->Rip 2766 mov 248($context),%rbx # pull context->Rip
829 2767
830 lea .Lcbc_decrypt(%rip),%r10 2768 mov 8($disp),%rsi # disp->ImageBase
831 cmp %r10,%rbx # context->Rip<"prologue" label 2769 mov 56($disp),%r11 # disp->HandlerData
832 jb .Lin_prologue
833 2770
834 lea .Lcbc_decrypt_body(%rip),%r10 2771 mov 0(%r11),%r10d # HandlerData[0]
835 cmp %r10,%rbx # context->Rip<cbc_decrypt_body 2772 lea (%rsi,%r10),%r10 # prologue label
836 jb .Lrestore_rax 2773 cmp %r10,%rbx # context->Rip<prologue label
2774 jb .Lcommon_seh_tail
837 2775
838 lea .Lcbc_ret(%rip),%r10 2776 mov 152($context),%rax # pull context->Rsp
839 cmp %r10,%rbx # context->Rip>="epilogue" label
840 jae .Lin_prologue
841 2777
842 lea 0(%rax),%rsi # top of stack 2778 mov 4(%r11),%r10d # HandlerData[1]
2779 lea (%rsi,%r10),%r10 # epilogue label
2780 cmp %r10,%rbx # context->Rip>=epilogue label
2781 jae .Lcommon_seh_tail
2782
2783 lea 0(%rax),%rsi # %xmm save area
843 lea 512($context),%rdi # &context.Xmm6 2784 lea 512($context),%rdi # &context.Xmm6
844 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) 2785 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
845 .long 0xa548f3fc # cld; rep movsq 2786 .long 0xa548f3fc # cld; rep movsq
846 lea 0x58(%rax),%rax # adjust stack pointer 2787 lea 0x58(%rax),%rax # adjust stack pointer
847 jmp .Lin_prologue
848 2788
849.Lrestore_rax: 2789 jmp .Lcommon_seh_tail
850 mov 120($context),%rax 2790.size ccm64_se_handler,.-ccm64_se_handler
851.Lin_prologue:
852 mov 8(%rax),%rdi
853 mov 16(%rax),%rsi
854 mov %rax,152($context) # restore context->Rsp
855 mov %rsi,168($context) # restore context->Rsi
856 mov %rdi,176($context) # restore context->Rdi
857 2791
858 jmp .Lcommon_seh_exit 2792.type ctr32_se_handler,\@abi-omnipotent
859.size cbc_se_handler,.-cbc_se_handler 2793.align 16
2794ctr32_se_handler:
2795 push %rsi
2796 push %rdi
2797 push %rbx
2798 push %rbp
2799 push %r12
2800 push %r13
2801 push %r14
2802 push %r15
2803 pushfq
2804 sub \$64,%rsp
860 2805
861.type ecb_se_handler,\@abi-omnipotent 2806 mov 120($context),%rax # pull context->Rax
2807 mov 248($context),%rbx # pull context->Rip
2808
2809 lea .Lctr32_body(%rip),%r10
2810 cmp %r10,%rbx # context->Rip<"prologue" label
2811 jb .Lcommon_seh_tail
2812
2813 mov 152($context),%rax # pull context->Rsp
2814
2815 lea .Lctr32_ret(%rip),%r10
2816 cmp %r10,%rbx
2817 jae .Lcommon_seh_tail
2818
2819 lea 0x20(%rax),%rsi # %xmm save area
2820 lea 512($context),%rdi # &context.Xmm6
2821 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2822 .long 0xa548f3fc # cld; rep movsq
2823 lea 0xc8(%rax),%rax # adjust stack pointer
2824
2825 jmp .Lcommon_seh_tail
2826.size ctr32_se_handler,.-ctr32_se_handler
2827
2828.type xts_se_handler,\@abi-omnipotent
862.align 16 2829.align 16
863ecb_se_handler: 2830xts_se_handler:
2831 push %rsi
2832 push %rdi
2833 push %rbx
2834 push %rbp
2835 push %r12
2836 push %r13
2837 push %r14
2838 push %r15
2839 pushfq
2840 sub \$64,%rsp
2841
2842 mov 120($context),%rax # pull context->Rax
2843 mov 248($context),%rbx # pull context->Rip
2844
2845 mov 8($disp),%rsi # disp->ImageBase
2846 mov 56($disp),%r11 # disp->HandlerData
2847
2848 mov 0(%r11),%r10d # HandlerData[0]
2849 lea (%rsi,%r10),%r10 # prologue lable
2850 cmp %r10,%rbx # context->Rip<prologue label
2851 jb .Lcommon_seh_tail
2852
2853 mov 152($context),%rax # pull context->Rsp
2854
2855 mov 4(%r11),%r10d # HandlerData[1]
2856 lea (%rsi,%r10),%r10 # epilogue label
2857 cmp %r10,%rbx # context->Rip>=epilogue label
2858 jae .Lcommon_seh_tail
2859
2860 lea 0x60(%rax),%rsi # %xmm save area
2861 lea 512($context),%rdi # & context.Xmm6
2862 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2863 .long 0xa548f3fc # cld; rep movsq
2864 lea 0x68+160(%rax),%rax # adjust stack pointer
2865
2866 jmp .Lcommon_seh_tail
2867.size xts_se_handler,.-xts_se_handler
2868___
2869$code.=<<___;
2870.type cbc_se_handler,\@abi-omnipotent
2871.align 16
2872cbc_se_handler:
864 push %rsi 2873 push %rsi
865 push %rdi 2874 push %rdi
866 push %rbx 2875 push %rbx
@@ -873,13 +2882,37 @@ ecb_se_handler:
873 sub \$64,%rsp 2882 sub \$64,%rsp
874 2883
875 mov 152($context),%rax # pull context->Rsp 2884 mov 152($context),%rax # pull context->Rsp
2885 mov 248($context),%rbx # pull context->Rip
2886
2887 lea .Lcbc_decrypt(%rip),%r10
2888 cmp %r10,%rbx # context->Rip<"prologue" label
2889 jb .Lcommon_seh_tail
2890
2891 lea .Lcbc_decrypt_body(%rip),%r10
2892 cmp %r10,%rbx # context->Rip<cbc_decrypt_body
2893 jb .Lrestore_cbc_rax
2894
2895 lea .Lcbc_ret(%rip),%r10
2896 cmp %r10,%rbx # context->Rip>="epilogue" label
2897 jae .Lcommon_seh_tail
2898
2899 lea 0(%rax),%rsi # top of stack
2900 lea 512($context),%rdi # &context.Xmm6
2901 mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
2902 .long 0xa548f3fc # cld; rep movsq
2903 lea 0x58(%rax),%rax # adjust stack pointer
2904 jmp .Lcommon_seh_tail
2905
2906.Lrestore_cbc_rax:
2907 mov 120($context),%rax
2908
2909.Lcommon_seh_tail:
876 mov 8(%rax),%rdi 2910 mov 8(%rax),%rdi
877 mov 16(%rax),%rsi 2911 mov 16(%rax),%rsi
2912 mov %rax,152($context) # restore context->Rsp
878 mov %rsi,168($context) # restore context->Rsi 2913 mov %rsi,168($context) # restore context->Rsi
879 mov %rdi,176($context) # restore context->Rdi 2914 mov %rdi,176($context) # restore context->Rdi
880 2915
881.Lcommon_seh_exit:
882
883 mov 40($disp),%rdi # disp->ContextRecord 2916 mov 40($disp),%rdi # disp->ContextRecord
884 mov $context,%rsi # context 2917 mov $context,%rsi # context
885 mov \$154,%ecx # sizeof(CONTEXT) 2918 mov \$154,%ecx # sizeof(CONTEXT)
@@ -915,10 +2948,33 @@ ecb_se_handler:
915 2948
916.section .pdata 2949.section .pdata
917.align 4 2950.align 4
918 .rva .LSEH_begin_${PREFIX}_ecb_encrypt 2951___
919 .rva .LSEH_end_${PREFIX}_ecb_encrypt 2952$code.=<<___ if ($PREFIX eq "aesni");
2953 .rva .LSEH_begin_aesni_ecb_encrypt
2954 .rva .LSEH_end_aesni_ecb_encrypt
920 .rva .LSEH_info_ecb 2955 .rva .LSEH_info_ecb
921 2956
2957 .rva .LSEH_begin_aesni_ccm64_encrypt_blocks
2958 .rva .LSEH_end_aesni_ccm64_encrypt_blocks
2959 .rva .LSEH_info_ccm64_enc
2960
2961 .rva .LSEH_begin_aesni_ccm64_decrypt_blocks
2962 .rva .LSEH_end_aesni_ccm64_decrypt_blocks
2963 .rva .LSEH_info_ccm64_dec
2964
2965 .rva .LSEH_begin_aesni_ctr32_encrypt_blocks
2966 .rva .LSEH_end_aesni_ctr32_encrypt_blocks
2967 .rva .LSEH_info_ctr32
2968
2969 .rva .LSEH_begin_aesni_xts_encrypt
2970 .rva .LSEH_end_aesni_xts_encrypt
2971 .rva .LSEH_info_xts_enc
2972
2973 .rva .LSEH_begin_aesni_xts_decrypt
2974 .rva .LSEH_end_aesni_xts_decrypt
2975 .rva .LSEH_info_xts_dec
2976___
2977$code.=<<___;
922 .rva .LSEH_begin_${PREFIX}_cbc_encrypt 2978 .rva .LSEH_begin_${PREFIX}_cbc_encrypt
923 .rva .LSEH_end_${PREFIX}_cbc_encrypt 2979 .rva .LSEH_end_${PREFIX}_cbc_encrypt
924 .rva .LSEH_info_cbc 2980 .rva .LSEH_info_cbc
@@ -932,28 +2988,49 @@ ecb_se_handler:
932 .rva .LSEH_info_key 2988 .rva .LSEH_info_key
933.section .xdata 2989.section .xdata
934.align 8 2990.align 8
2991___
2992$code.=<<___ if ($PREFIX eq "aesni");
935.LSEH_info_ecb: 2993.LSEH_info_ecb:
936 .byte 9,0,0,0 2994 .byte 9,0,0,0
937 .rva ecb_se_handler 2995 .rva ecb_se_handler
2996.LSEH_info_ccm64_enc:
2997 .byte 9,0,0,0
2998 .rva ccm64_se_handler
2999 .rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[]
3000.LSEH_info_ccm64_dec:
3001 .byte 9,0,0,0
3002 .rva ccm64_se_handler
3003 .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[]
3004.LSEH_info_ctr32:
3005 .byte 9,0,0,0
3006 .rva ctr32_se_handler
3007.LSEH_info_xts_enc:
3008 .byte 9,0,0,0
3009 .rva xts_se_handler
3010 .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[]
3011.LSEH_info_xts_dec:
3012 .byte 9,0,0,0
3013 .rva xts_se_handler
3014 .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[]
3015___
3016$code.=<<___;
938.LSEH_info_cbc: 3017.LSEH_info_cbc:
939 .byte 9,0,0,0 3018 .byte 9,0,0,0
940 .rva cbc_se_handler 3019 .rva cbc_se_handler
941.LSEH_info_key: 3020.LSEH_info_key:
942 .byte 0x01,0x04,0x01,0x00 3021 .byte 0x01,0x04,0x01,0x00
943 .byte 0x04,0x02,0x00,0x00 3022 .byte 0x04,0x02,0x00,0x00 # sub rsp,8
944___ 3023___
945} 3024}
946 3025
947sub rex { 3026sub rex {
948 local *opcode=shift; 3027 local *opcode=shift;
949 my ($dst,$src)=@_; 3028 my ($dst,$src)=@_;
950 3029 my $rex=0;
951 if ($dst>=8 || $src>=8) { 3030
952 $rex=0x40; 3031 $rex|=0x04 if($dst>=8);
953 $rex|=0x04 if($dst>=8); 3032 $rex|=0x01 if($src>=8);
954 $rex|=0x01 if($src>=8); 3033 push @opcode,$rex|0x40 if($rex);
955 push @opcode,$rex;
956 }
957} 3034}
958 3035
959sub aesni { 3036sub aesni {
@@ -989,4 +3066,3 @@ $code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
989print $code; 3066print $code;
990 3067
991close STDOUT; 3068close STDOUT;
992
diff --git a/src/lib/libssl/src/crypto/aes/asm/bsaes-x86_64.pl b/src/lib/libssl/src/crypto/aes/asm/bsaes-x86_64.pl
new file mode 100644
index 0000000000..c9c6312fa7
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/bsaes-x86_64.pl
@@ -0,0 +1,3044 @@
1#!/usr/bin/env perl
2
3###################################################################
4### AES-128 [originally in CTR mode] ###
5### bitsliced implementation for Intel Core 2 processors ###
6### requires support of SSE extensions up to SSSE3 ###
7### Author: Emilia Käsper and Peter Schwabe ###
8### Date: 2009-03-19 ###
9### Public domain ###
10### ###
11### See http://homes.esat.kuleuven.be/~ekasper/#software for ###
12### further information. ###
13###################################################################
14#
15# September 2011.
16#
17# Started as transliteration to "perlasm" the original code has
18# undergone following changes:
19#
20# - code was made position-independent;
21# - rounds were folded into a loop resulting in >5x size reduction
22# from 12.5KB to 2.2KB;
23# - above was possibile thanks to mixcolumns() modification that
24# allowed to feed its output back to aesenc[last], this was
25# achieved at cost of two additional inter-registers moves;
26# - some instruction reordering and interleaving;
27# - this module doesn't implement key setup subroutine, instead it
28# relies on conversion of "conventional" key schedule as returned
29# by AES_set_encrypt_key (see discussion below);
30# - first and last round keys are treated differently, which allowed
31# to skip one shiftrows(), reduce bit-sliced key schedule and
32# speed-up conversion by 22%;
33# - support for 192- and 256-bit keys was added;
34#
35# Resulting performance in CPU cycles spent to encrypt one byte out
36# of 4096-byte buffer with 128-bit key is:
37#
38# Emilia's this(*) difference
39#
40# Core 2 9.30 8.69 +7%
41# Nehalem(**) 7.63 6.98 +9%
42# Atom 17.1 17.4 -2%(***)
43#
44# (*) Comparison is not completely fair, because "this" is ECB,
45# i.e. no extra processing such as counter values calculation
46# and xor-ing input as in Emilia's CTR implementation is
47# performed. However, the CTR calculations stand for not more
48# than 1% of total time, so comparison is *rather* fair.
49#
50# (**) Results were collected on Westmere, which is considered to
51# be equivalent to Nehalem for this code.
52#
53# (***) Slowdown on Atom is rather strange per se, because original
54# implementation has a number of 9+-bytes instructions, which
55# are bad for Atom front-end, and which I eliminated completely.
56# In attempt to address deterioration sbox() was tested in FP
57# SIMD "domain" (movaps instead of movdqa, xorps instead of
58# pxor, etc.). While it resulted in nominal 4% improvement on
59# Atom, it hurted Westmere by more than 2x factor.
60#
61# As for key schedule conversion subroutine. Interface to OpenSSL
62# relies on per-invocation on-the-fly conversion. This naturally
63# has impact on performance, especially for short inputs. Conversion
64# time in CPU cycles and its ratio to CPU cycles spent in 8x block
65# function is:
66#
67# conversion conversion/8x block
68# Core 2 240 0.22
69# Nehalem 180 0.20
70# Atom 430 0.19
71#
72# The ratio values mean that 128-byte blocks will be processed
73# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
74# etc. Then keep in mind that input sizes not divisible by 128 are
75# *effectively* slower, especially shortest ones, e.g. consecutive
76# 144-byte blocks are processed 44% slower than one would expect,
77# 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings"
78# it's still faster than ["hyper-threading-safe" code path in]
79# aes-x86_64.pl on all lengths above 64 bytes...
80#
81# October 2011.
82#
83# Add decryption procedure. Performance in CPU cycles spent to decrypt
84# one byte out of 4096-byte buffer with 128-bit key is:
85#
86# Core 2 11.0
87# Nehalem 9.16
88# Atom 20.9
89#
90# November 2011.
91#
92# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is
93# suboptimal, but XTS is meant to be used with larger blocks...
94#
95# <appro@openssl.org>
96
97$flavour = shift;
98$output = shift;
99if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
100
101$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
102
103$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
104( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
105( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
106die "can't locate x86_64-xlate.pl";
107
108open STDOUT,"| $^X $xlate $flavour $output";
109
110my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
111my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
112my $ecb=0; # suppress unreferenced ECB subroutines, spare some space...
113
114{
115my ($key,$rounds,$const)=("%rax","%r10d","%r11");
116
117sub Sbox {
118# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
119# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
120my @b=@_[0..7];
121my @t=@_[8..11];
122my @s=@_[12..15];
123 &InBasisChange (@b);
124 &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
125 &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
126}
127
128sub InBasisChange {
129# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
130# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
131my @b=@_[0..7];
132$code.=<<___;
133 pxor @b[6], @b[5]
134 pxor @b[1], @b[2]
135 pxor @b[0], @b[3]
136 pxor @b[2], @b[6]
137 pxor @b[0], @b[5]
138
139 pxor @b[3], @b[6]
140 pxor @b[7], @b[3]
141 pxor @b[5], @b[7]
142 pxor @b[4], @b[3]
143 pxor @b[5], @b[4]
144 pxor @b[1], @b[3]
145
146 pxor @b[7], @b[2]
147 pxor @b[5], @b[1]
148___
149}
150
151sub OutBasisChange {
152# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
153# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
154my @b=@_[0..7];
155$code.=<<___;
156 pxor @b[6], @b[0]
157 pxor @b[4], @b[1]
158 pxor @b[0], @b[2]
159 pxor @b[6], @b[4]
160 pxor @b[1], @b[6]
161
162 pxor @b[5], @b[1]
163 pxor @b[3], @b[5]
164 pxor @b[7], @b[3]
165 pxor @b[5], @b[7]
166 pxor @b[5], @b[2]
167
168 pxor @b[7], @b[4]
169___
170}
171
172sub InvSbox {
173# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
174# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
175my @b=@_[0..7];
176my @t=@_[8..11];
177my @s=@_[12..15];
178 &InvInBasisChange (@b);
179 &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
180 &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
181}
182
183sub InvInBasisChange { # OutBasisChange in reverse
184my @b=@_[5,1,2,6,3,7,0,4];
185$code.=<<___
186 pxor @b[7], @b[4]
187
188 pxor @b[5], @b[7]
189 pxor @b[5], @b[2]
190 pxor @b[7], @b[3]
191 pxor @b[3], @b[5]
192 pxor @b[5], @b[1]
193
194 pxor @b[1], @b[6]
195 pxor @b[0], @b[2]
196 pxor @b[6], @b[4]
197 pxor @b[6], @b[0]
198 pxor @b[4], @b[1]
199___
200}
201
202sub InvOutBasisChange { # InBasisChange in reverse
203my @b=@_[2,5,7,3,6,1,0,4];
204$code.=<<___;
205 pxor @b[5], @b[1]
206 pxor @b[7], @b[2]
207
208 pxor @b[1], @b[3]
209 pxor @b[5], @b[4]
210 pxor @b[5], @b[7]
211 pxor @b[4], @b[3]
212 pxor @b[0], @b[5]
213 pxor @b[7], @b[3]
214 pxor @b[2], @b[6]
215 pxor @b[1], @b[2]
216 pxor @b[3], @b[6]
217
218 pxor @b[0], @b[3]
219 pxor @b[6], @b[5]
220___
221}
222
223sub Mul_GF4 {
224#;*************************************************************
225#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
226#;*************************************************************
227my ($x0,$x1,$y0,$y1,$t0)=@_;
228$code.=<<___;
229 movdqa $y0, $t0
230 pxor $y1, $t0
231 pand $x0, $t0
232 pxor $x1, $x0
233 pand $y0, $x1
234 pand $y1, $x0
235 pxor $x1, $x0
236 pxor $t0, $x1
237___
238}
239
240sub Mul_GF4_N { # not used, see next subroutine
241# multiply and scale by N
242my ($x0,$x1,$y0,$y1,$t0)=@_;
243$code.=<<___;
244 movdqa $y0, $t0
245 pxor $y1, $t0
246 pand $x0, $t0
247 pxor $x1, $x0
248 pand $y0, $x1
249 pand $y1, $x0
250 pxor $x0, $x1
251 pxor $t0, $x0
252___
253}
254
255sub Mul_GF4_N_GF4 {
256# interleaved Mul_GF4_N and Mul_GF4
257my ($x0,$x1,$y0,$y1,$t0,
258 $x2,$x3,$y2,$y3,$t1)=@_;
259$code.=<<___;
260 movdqa $y0, $t0
261 movdqa $y2, $t1
262 pxor $y1, $t0
263 pxor $y3, $t1
264 pand $x0, $t0
265 pand $x2, $t1
266 pxor $x1, $x0
267 pxor $x3, $x2
268 pand $y0, $x1
269 pand $y2, $x3
270 pand $y1, $x0
271 pand $y3, $x2
272 pxor $x0, $x1
273 pxor $x3, $x2
274 pxor $t0, $x0
275 pxor $t1, $x3
276___
277}
278sub Mul_GF16_2 {
279my @x=@_[0..7];
280my @y=@_[8..11];
281my @t=@_[12..15];
282$code.=<<___;
283 movdqa @x[0], @t[0]
284 movdqa @x[1], @t[1]
285___
286 &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]);
287$code.=<<___;
288 pxor @x[2], @t[0]
289 pxor @x[3], @t[1]
290 pxor @y[2], @y[0]
291 pxor @y[3], @y[1]
292___
293 Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
294 @x[2], @x[3], @y[2], @y[3], @t[2]);
295$code.=<<___;
296 pxor @t[0], @x[0]
297 pxor @t[0], @x[2]
298 pxor @t[1], @x[1]
299 pxor @t[1], @x[3]
300
301 movdqa @x[4], @t[0]
302 movdqa @x[5], @t[1]
303 pxor @x[6], @t[0]
304 pxor @x[7], @t[1]
305___
306 &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
307 @x[6], @x[7], @y[2], @y[3], @t[2]);
308$code.=<<___;
309 pxor @y[2], @y[0]
310 pxor @y[3], @y[1]
311___
312 &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]);
313$code.=<<___;
314 pxor @t[0], @x[4]
315 pxor @t[0], @x[6]
316 pxor @t[1], @x[5]
317 pxor @t[1], @x[7]
318___
319}
320sub Inv_GF256 {
321#;********************************************************************
322#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
323#;********************************************************************
324my @x=@_[0..7];
325my @t=@_[8..11];
326my @s=@_[12..15];
327# direct optimizations from hardware
328$code.=<<___;
329 movdqa @x[4], @t[3]
330 movdqa @x[5], @t[2]
331 movdqa @x[1], @t[1]
332 movdqa @x[7], @s[1]
333 movdqa @x[0], @s[0]
334
335 pxor @x[6], @t[3]
336 pxor @x[7], @t[2]
337 pxor @x[3], @t[1]
338 movdqa @t[3], @s[2]
339 pxor @x[6], @s[1]
340 movdqa @t[2], @t[0]
341 pxor @x[2], @s[0]
342 movdqa @t[3], @s[3]
343
344 por @t[1], @t[2]
345 por @s[0], @t[3]
346 pxor @t[0], @s[3]
347 pand @s[0], @s[2]
348 pxor @t[1], @s[0]
349 pand @t[1], @t[0]
350 pand @s[0], @s[3]
351 movdqa @x[3], @s[0]
352 pxor @x[2], @s[0]
353 pand @s[0], @s[1]
354 pxor @s[1], @t[3]
355 pxor @s[1], @t[2]
356 movdqa @x[4], @s[1]
357 movdqa @x[1], @s[0]
358 pxor @x[5], @s[1]
359 pxor @x[0], @s[0]
360 movdqa @s[1], @t[1]
361 pand @s[0], @s[1]
362 por @s[0], @t[1]
363 pxor @s[1], @t[0]
364 pxor @s[3], @t[3]
365 pxor @s[2], @t[2]
366 pxor @s[3], @t[1]
367 movdqa @x[7], @s[0]
368 pxor @s[2], @t[0]
369 movdqa @x[6], @s[1]
370 pxor @s[2], @t[1]
371 movdqa @x[5], @s[2]
372 pand @x[3], @s[0]
373 movdqa @x[4], @s[3]
374 pand @x[2], @s[1]
375 pand @x[1], @s[2]
376 por @x[0], @s[3]
377 pxor @s[0], @t[3]
378 pxor @s[1], @t[2]
379 pxor @s[2], @t[1]
380 pxor @s[3], @t[0]
381
382 #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
383
384 # new smaller inversion
385
386 movdqa @t[3], @s[0]
387 pand @t[1], @t[3]
388 pxor @t[2], @s[0]
389
390 movdqa @t[0], @s[2]
391 movdqa @s[0], @s[3]
392 pxor @t[3], @s[2]
393 pand @s[2], @s[3]
394
395 movdqa @t[1], @s[1]
396 pxor @t[2], @s[3]
397 pxor @t[0], @s[1]
398
399 pxor @t[2], @t[3]
400
401 pand @t[3], @s[1]
402
403 movdqa @s[2], @t[2]
404 pxor @t[0], @s[1]
405
406 pxor @s[1], @t[2]
407 pxor @s[1], @t[1]
408
409 pand @t[0], @t[2]
410
411 pxor @t[2], @s[2]
412 pxor @t[2], @t[1]
413
414 pand @s[3], @s[2]
415
416 pxor @s[0], @s[2]
417___
418# output in s3, s2, s1, t1
419
420# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
421
422# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
423 &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
424
425### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
426}
427
428# AES linear components
429
430sub ShiftRows {
431my @x=@_[0..7];
432my $mask=pop;
433$code.=<<___;
434 pxor 0x00($key),@x[0]
435 pxor 0x10($key),@x[1]
436 pshufb $mask,@x[0]
437 pxor 0x20($key),@x[2]
438 pshufb $mask,@x[1]
439 pxor 0x30($key),@x[3]
440 pshufb $mask,@x[2]
441 pxor 0x40($key),@x[4]
442 pshufb $mask,@x[3]
443 pxor 0x50($key),@x[5]
444 pshufb $mask,@x[4]
445 pxor 0x60($key),@x[6]
446 pshufb $mask,@x[5]
447 pxor 0x70($key),@x[7]
448 pshufb $mask,@x[6]
449 lea 0x80($key),$key
450 pshufb $mask,@x[7]
451___
452}
453
454sub MixColumns {
455# modified to emit output in order suitable for feeding back to aesenc[last]
456my @x=@_[0..7];
457my @t=@_[8..15];
458$code.=<<___;
459 pshufd \$0x93, @x[0], @t[0] # x0 <<< 32
460 pshufd \$0x93, @x[1], @t[1]
461 pxor @t[0], @x[0] # x0 ^ (x0 <<< 32)
462 pshufd \$0x93, @x[2], @t[2]
463 pxor @t[1], @x[1]
464 pshufd \$0x93, @x[3], @t[3]
465 pxor @t[2], @x[2]
466 pshufd \$0x93, @x[4], @t[4]
467 pxor @t[3], @x[3]
468 pshufd \$0x93, @x[5], @t[5]
469 pxor @t[4], @x[4]
470 pshufd \$0x93, @x[6], @t[6]
471 pxor @t[5], @x[5]
472 pshufd \$0x93, @x[7], @t[7]
473 pxor @t[6], @x[6]
474 pxor @t[7], @x[7]
475
476 pxor @x[0], @t[1]
477 pxor @x[7], @t[0]
478 pxor @x[7], @t[1]
479 pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64)
480 pxor @x[1], @t[2]
481 pshufd \$0x4E, @x[1], @x[1]
482 pxor @x[4], @t[5]
483 pxor @t[0], @x[0]
484 pxor @x[5], @t[6]
485 pxor @t[1], @x[1]
486 pxor @x[3], @t[4]
487 pshufd \$0x4E, @x[4], @t[0]
488 pxor @x[6], @t[7]
489 pshufd \$0x4E, @x[5], @t[1]
490 pxor @x[2], @t[3]
491 pshufd \$0x4E, @x[3], @x[4]
492 pxor @x[7], @t[3]
493 pshufd \$0x4E, @x[7], @x[5]
494 pxor @x[7], @t[4]
495 pshufd \$0x4E, @x[6], @x[3]
496 pxor @t[4], @t[0]
497 pshufd \$0x4E, @x[2], @x[6]
498 pxor @t[5], @t[1]
499
500 pxor @t[3], @x[4]
501 pxor @t[7], @x[5]
502 pxor @t[6], @x[3]
503 movdqa @t[0], @x[2]
504 pxor @t[2], @x[6]
505 movdqa @t[1], @x[7]
506___
507}
508
509sub InvMixColumns {
510my @x=@_[0..7];
511my @t=@_[8..15];
512
513$code.=<<___;
514 # multiplication by 0x0e
515 pshufd \$0x93, @x[7], @t[7]
516 movdqa @x[2], @t[2]
517 pxor @x[5], @x[7] # 7 5
518 pxor @x[5], @x[2] # 2 5
519 pshufd \$0x93, @x[0], @t[0]
520 movdqa @x[5], @t[5]
521 pxor @x[0], @x[5] # 5 0 [1]
522 pxor @x[1], @x[0] # 0 1
523 pshufd \$0x93, @x[1], @t[1]
524 pxor @x[2], @x[1] # 1 25
525 pxor @x[6], @x[0] # 01 6 [2]
526 pxor @x[3], @x[1] # 125 3 [4]
527 pshufd \$0x93, @x[3], @t[3]
528 pxor @x[0], @x[2] # 25 016 [3]
529 pxor @x[7], @x[3] # 3 75
530 pxor @x[6], @x[7] # 75 6 [0]
531 pshufd \$0x93, @x[6], @t[6]
532 movdqa @x[4], @t[4]
533 pxor @x[4], @x[6] # 6 4
534 pxor @x[3], @x[4] # 4 375 [6]
535 pxor @x[7], @x[3] # 375 756=36
536 pxor @t[5], @x[6] # 64 5 [7]
537 pxor @t[2], @x[3] # 36 2
538 pxor @t[4], @x[3] # 362 4 [5]
539 pshufd \$0x93, @t[5], @t[5]
540___
541 my @y = @x[7,5,0,2,1,3,4,6];
542$code.=<<___;
543 # multiplication by 0x0b
544 pxor @y[0], @y[1]
545 pxor @t[0], @y[0]
546 pxor @t[1], @y[1]
547 pshufd \$0x93, @t[2], @t[2]
548 pxor @t[5], @y[0]
549 pxor @t[6], @y[1]
550 pxor @t[7], @y[0]
551 pshufd \$0x93, @t[4], @t[4]
552 pxor @t[6], @t[7] # clobber t[7]
553 pxor @y[0], @y[1]
554
555 pxor @t[0], @y[3]
556 pshufd \$0x93, @t[0], @t[0]
557 pxor @t[1], @y[2]
558 pxor @t[1], @y[4]
559 pxor @t[2], @y[2]
560 pshufd \$0x93, @t[1], @t[1]
561 pxor @t[2], @y[3]
562 pxor @t[2], @y[5]
563 pxor @t[7], @y[2]
564 pshufd \$0x93, @t[2], @t[2]
565 pxor @t[3], @y[3]
566 pxor @t[3], @y[6]
567 pxor @t[3], @y[4]
568 pshufd \$0x93, @t[3], @t[3]
569 pxor @t[4], @y[7]
570 pxor @t[4], @y[5]
571 pxor @t[7], @y[7]
572 pxor @t[5], @y[3]
573 pxor @t[4], @y[4]
574 pxor @t[5], @t[7] # clobber t[7] even more
575
576 pxor @t[7], @y[5]
577 pshufd \$0x93, @t[4], @t[4]
578 pxor @t[7], @y[6]
579 pxor @t[7], @y[4]
580
581 pxor @t[5], @t[7]
582 pshufd \$0x93, @t[5], @t[5]
583 pxor @t[6], @t[7] # restore t[7]
584
585 # multiplication by 0x0d
586 pxor @y[7], @y[4]
587 pxor @t[4], @y[7]
588 pshufd \$0x93, @t[6], @t[6]
589 pxor @t[0], @y[2]
590 pxor @t[5], @y[7]
591 pxor @t[2], @y[2]
592 pshufd \$0x93, @t[7], @t[7]
593
594 pxor @y[1], @y[3]
595 pxor @t[1], @y[1]
596 pxor @t[0], @y[0]
597 pxor @t[0], @y[3]
598 pxor @t[5], @y[1]
599 pxor @t[5], @y[0]
600 pxor @t[7], @y[1]
601 pshufd \$0x93, @t[0], @t[0]
602 pxor @t[6], @y[0]
603 pxor @y[1], @y[3]
604 pxor @t[1], @y[4]
605 pshufd \$0x93, @t[1], @t[1]
606
607 pxor @t[7], @y[7]
608 pxor @t[2], @y[4]
609 pxor @t[2], @y[5]
610 pshufd \$0x93, @t[2], @t[2]
611 pxor @t[6], @y[2]
612 pxor @t[3], @t[6] # clobber t[6]
613 pxor @y[7], @y[4]
614 pxor @t[6], @y[3]
615
616 pxor @t[6], @y[6]
617 pxor @t[5], @y[5]
618 pxor @t[4], @y[6]
619 pshufd \$0x93, @t[4], @t[4]
620 pxor @t[6], @y[5]
621 pxor @t[7], @y[6]
622 pxor @t[3], @t[6] # restore t[6]
623
624 pshufd \$0x93, @t[5], @t[5]
625 pshufd \$0x93, @t[6], @t[6]
626 pshufd \$0x93, @t[7], @t[7]
627 pshufd \$0x93, @t[3], @t[3]
628
629 # multiplication by 0x09
630 pxor @y[1], @y[4]
631 pxor @y[1], @t[1] # t[1]=y[1]
632 pxor @t[5], @t[0] # clobber t[0]
633 pxor @t[5], @t[1]
634 pxor @t[0], @y[3]
635 pxor @y[0], @t[0] # t[0]=y[0]
636 pxor @t[6], @t[1]
637 pxor @t[7], @t[6] # clobber t[6]
638 pxor @t[1], @y[4]
639 pxor @t[4], @y[7]
640 pxor @y[4], @t[4] # t[4]=y[4]
641 pxor @t[3], @y[6]
642 pxor @y[3], @t[3] # t[3]=y[3]
643 pxor @t[2], @y[5]
644 pxor @y[2], @t[2] # t[2]=y[2]
645 pxor @t[7], @t[3]
646 pxor @y[5], @t[5] # t[5]=y[5]
647 pxor @t[6], @t[2]
648 pxor @t[6], @t[5]
649 pxor @y[6], @t[6] # t[6]=y[6]
650 pxor @y[7], @t[7] # t[7]=y[7]
651
652 movdqa @t[0],@XMM[0]
653 movdqa @t[1],@XMM[1]
654 movdqa @t[2],@XMM[2]
655 movdqa @t[3],@XMM[3]
656 movdqa @t[4],@XMM[4]
657 movdqa @t[5],@XMM[5]
658 movdqa @t[6],@XMM[6]
659 movdqa @t[7],@XMM[7]
660___
661}
662
663sub aesenc { # not used
664my @b=@_[0..7];
665my @t=@_[8..15];
666$code.=<<___;
667 movdqa 0x30($const),@t[0] # .LSR
668___
669 &ShiftRows (@b,@t[0]);
670 &Sbox (@b,@t);
671 &MixColumns (@b[0,1,4,6,3,7,2,5],@t);
672}
673
674sub aesenclast { # not used
675my @b=@_[0..7];
676my @t=@_[8..15];
677$code.=<<___;
678 movdqa 0x40($const),@t[0] # .LSRM0
679___
680 &ShiftRows (@b,@t[0]);
681 &Sbox (@b,@t);
682$code.=<<___
683 pxor 0x00($key),@b[0]
684 pxor 0x10($key),@b[1]
685 pxor 0x20($key),@b[4]
686 pxor 0x30($key),@b[6]
687 pxor 0x40($key),@b[3]
688 pxor 0x50($key),@b[7]
689 pxor 0x60($key),@b[2]
690 pxor 0x70($key),@b[5]
691___
692}
693
694sub swapmove {
695my ($a,$b,$n,$mask,$t)=@_;
696$code.=<<___;
697 movdqa $b,$t
698 psrlq \$$n,$b
699 pxor $a,$b
700 pand $mask,$b
701 pxor $b,$a
702 psllq \$$n,$b
703 pxor $t,$b
704___
705}
706sub swapmove2x {
707my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
708$code.=<<___;
709 movdqa $b0,$t0
710 psrlq \$$n,$b0
711 movdqa $b1,$t1
712 psrlq \$$n,$b1
713 pxor $a0,$b0
714 pxor $a1,$b1
715 pand $mask,$b0
716 pand $mask,$b1
717 pxor $b0,$a0
718 psllq \$$n,$b0
719 pxor $b1,$a1
720 psllq \$$n,$b1
721 pxor $t0,$b0
722 pxor $t1,$b1
723___
724}
725
726sub bitslice {
727my @x=reverse(@_[0..7]);
728my ($t0,$t1,$t2,$t3)=@_[8..11];
729$code.=<<___;
730 movdqa 0x00($const),$t0 # .LBS0
731 movdqa 0x10($const),$t1 # .LBS1
732___
733 &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
734 &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
735$code.=<<___;
736 movdqa 0x20($const),$t0 # .LBS2
737___
738 &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
739 &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
740
741 &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
742 &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
743}
744
745$code.=<<___;
746.text
747
748.extern asm_AES_encrypt
749.extern asm_AES_decrypt
750
751.type _bsaes_encrypt8,\@abi-omnipotent
752.align 64
753_bsaes_encrypt8:
754 lea .LBS0(%rip), $const # constants table
755
756 movdqa ($key), @XMM[9] # round 0 key
757 lea 0x10($key), $key
758 movdqa 0x50($const), @XMM[8] # .LM0SR
759 pxor @XMM[9], @XMM[0] # xor with round0 key
760 pxor @XMM[9], @XMM[1]
761 pshufb @XMM[8], @XMM[0]
762 pxor @XMM[9], @XMM[2]
763 pshufb @XMM[8], @XMM[1]
764 pxor @XMM[9], @XMM[3]
765 pshufb @XMM[8], @XMM[2]
766 pxor @XMM[9], @XMM[4]
767 pshufb @XMM[8], @XMM[3]
768 pxor @XMM[9], @XMM[5]
769 pshufb @XMM[8], @XMM[4]
770 pxor @XMM[9], @XMM[6]
771 pshufb @XMM[8], @XMM[5]
772 pxor @XMM[9], @XMM[7]
773 pshufb @XMM[8], @XMM[6]
774 pshufb @XMM[8], @XMM[7]
775_bsaes_encrypt8_bitslice:
776___
777 &bitslice (@XMM[0..7, 8..11]);
778$code.=<<___;
779 dec $rounds
780 jmp .Lenc_sbox
781.align 16
782.Lenc_loop:
783___
784 &ShiftRows (@XMM[0..7, 8]);
785$code.=".Lenc_sbox:\n";
786 &Sbox (@XMM[0..7, 8..15]);
787$code.=<<___;
788 dec $rounds
789 jl .Lenc_done
790___
791 &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
792$code.=<<___;
793 movdqa 0x30($const), @XMM[8] # .LSR
794 jnz .Lenc_loop
795 movdqa 0x40($const), @XMM[8] # .LSRM0
796 jmp .Lenc_loop
797.align 16
798.Lenc_done:
799___
800 # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
801 &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
802$code.=<<___;
803 movdqa ($key), @XMM[8] # last round key
804 pxor @XMM[8], @XMM[4]
805 pxor @XMM[8], @XMM[6]
806 pxor @XMM[8], @XMM[3]
807 pxor @XMM[8], @XMM[7]
808 pxor @XMM[8], @XMM[2]
809 pxor @XMM[8], @XMM[5]
810 pxor @XMM[8], @XMM[0]
811 pxor @XMM[8], @XMM[1]
812 ret
813.size _bsaes_encrypt8,.-_bsaes_encrypt8
814
815.type _bsaes_decrypt8,\@abi-omnipotent
816.align 64
817_bsaes_decrypt8:
818 lea .LBS0(%rip), $const # constants table
819
820 movdqa ($key), @XMM[9] # round 0 key
821 lea 0x10($key), $key
822 movdqa -0x30($const), @XMM[8] # .LM0ISR
823 pxor @XMM[9], @XMM[0] # xor with round0 key
824 pxor @XMM[9], @XMM[1]
825 pshufb @XMM[8], @XMM[0]
826 pxor @XMM[9], @XMM[2]
827 pshufb @XMM[8], @XMM[1]
828 pxor @XMM[9], @XMM[3]
829 pshufb @XMM[8], @XMM[2]
830 pxor @XMM[9], @XMM[4]
831 pshufb @XMM[8], @XMM[3]
832 pxor @XMM[9], @XMM[5]
833 pshufb @XMM[8], @XMM[4]
834 pxor @XMM[9], @XMM[6]
835 pshufb @XMM[8], @XMM[5]
836 pxor @XMM[9], @XMM[7]
837 pshufb @XMM[8], @XMM[6]
838 pshufb @XMM[8], @XMM[7]
839___
840 &bitslice (@XMM[0..7, 8..11]);
841$code.=<<___;
842 dec $rounds
843 jmp .Ldec_sbox
844.align 16
845.Ldec_loop:
846___
847 &ShiftRows (@XMM[0..7, 8]);
848$code.=".Ldec_sbox:\n";
849 &InvSbox (@XMM[0..7, 8..15]);
850$code.=<<___;
851 dec $rounds
852 jl .Ldec_done
853___
854 &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
855$code.=<<___;
856 movdqa -0x10($const), @XMM[8] # .LISR
857 jnz .Ldec_loop
858 movdqa -0x20($const), @XMM[8] # .LISRM0
859 jmp .Ldec_loop
860.align 16
861.Ldec_done:
862___
863 &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
864$code.=<<___;
865 movdqa ($key), @XMM[8] # last round key
866 pxor @XMM[8], @XMM[6]
867 pxor @XMM[8], @XMM[4]
868 pxor @XMM[8], @XMM[2]
869 pxor @XMM[8], @XMM[7]
870 pxor @XMM[8], @XMM[3]
871 pxor @XMM[8], @XMM[5]
872 pxor @XMM[8], @XMM[0]
873 pxor @XMM[8], @XMM[1]
874 ret
875.size _bsaes_decrypt8,.-_bsaes_decrypt8
876___
877}
878{
879my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11");
880
881sub bitslice_key {
882my @x=reverse(@_[0..7]);
883my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
884
885 &swapmove (@x[0,1],1,$bs0,$t2,$t3);
886$code.=<<___;
887 #&swapmove(@x[2,3],1,$t0,$t2,$t3);
888 movdqa @x[0], @x[2]
889 movdqa @x[1], @x[3]
890___
891 #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
892
893 &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
894$code.=<<___;
895 #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
896 movdqa @x[0], @x[4]
897 movdqa @x[2], @x[6]
898 movdqa @x[1], @x[5]
899 movdqa @x[3], @x[7]
900___
901 &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
902 &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
903}
904
905$code.=<<___;
906.type _bsaes_key_convert,\@abi-omnipotent
907.align 16
908_bsaes_key_convert:
909 lea .Lmasks(%rip), $const
910 movdqu ($inp), %xmm7 # load round 0 key
911 lea 0x10($inp), $inp
912 movdqa 0x00($const), %xmm0 # 0x01...
913 movdqa 0x10($const), %xmm1 # 0x02...
914 movdqa 0x20($const), %xmm2 # 0x04...
915 movdqa 0x30($const), %xmm3 # 0x08...
916 movdqa 0x40($const), %xmm4 # .LM0
917 pcmpeqd %xmm5, %xmm5 # .LNOT
918
919 movdqu ($inp), %xmm6 # load round 1 key
920 movdqa %xmm7, ($out) # save round 0 key
921 lea 0x10($out), $out
922 dec $rounds
923 jmp .Lkey_loop
924.align 16
925.Lkey_loop:
926 pshufb %xmm4, %xmm6 # .LM0
927
928 movdqa %xmm0, %xmm8
929 movdqa %xmm1, %xmm9
930
931 pand %xmm6, %xmm8
932 pand %xmm6, %xmm9
933 movdqa %xmm2, %xmm10
934 pcmpeqb %xmm0, %xmm8
935 psllq \$4, %xmm0 # 0x10...
936 movdqa %xmm3, %xmm11
937 pcmpeqb %xmm1, %xmm9
938 psllq \$4, %xmm1 # 0x20...
939
940 pand %xmm6, %xmm10
941 pand %xmm6, %xmm11
942 movdqa %xmm0, %xmm12
943 pcmpeqb %xmm2, %xmm10
944 psllq \$4, %xmm2 # 0x40...
945 movdqa %xmm1, %xmm13
946 pcmpeqb %xmm3, %xmm11
947 psllq \$4, %xmm3 # 0x80...
948
949 movdqa %xmm2, %xmm14
950 movdqa %xmm3, %xmm15
951 pxor %xmm5, %xmm8 # "pnot"
952 pxor %xmm5, %xmm9
953
954 pand %xmm6, %xmm12
955 pand %xmm6, %xmm13
956 movdqa %xmm8, 0x00($out) # write bit-sliced round key
957 pcmpeqb %xmm0, %xmm12
958 psrlq \$4, %xmm0 # 0x01...
959 movdqa %xmm9, 0x10($out)
960 pcmpeqb %xmm1, %xmm13
961 psrlq \$4, %xmm1 # 0x02...
962 lea 0x10($inp), $inp
963
964 pand %xmm6, %xmm14
965 pand %xmm6, %xmm15
966 movdqa %xmm10, 0x20($out)
967 pcmpeqb %xmm2, %xmm14
968 psrlq \$4, %xmm2 # 0x04...
969 movdqa %xmm11, 0x30($out)
970 pcmpeqb %xmm3, %xmm15
971 psrlq \$4, %xmm3 # 0x08...
972 movdqu ($inp), %xmm6 # load next round key
973
974 pxor %xmm5, %xmm13 # "pnot"
975 pxor %xmm5, %xmm14
976 movdqa %xmm12, 0x40($out)
977 movdqa %xmm13, 0x50($out)
978 movdqa %xmm14, 0x60($out)
979 movdqa %xmm15, 0x70($out)
980 lea 0x80($out),$out
981 dec $rounds
982 jnz .Lkey_loop
983
984 movdqa 0x50($const), %xmm7 # .L63
985 #movdqa %xmm6, ($out) # don't save last round key
986 ret
987.size _bsaes_key_convert,.-_bsaes_key_convert
988___
989}
990
991if (0 && !$win64) { # following four functions are unsupported interface
992 # used for benchmarking...
993$code.=<<___;
994.globl bsaes_enc_key_convert
995.type bsaes_enc_key_convert,\@function,2
996.align 16
997bsaes_enc_key_convert:
998 mov 240($inp),%r10d # pass rounds
999 mov $inp,%rcx # pass key
1000 mov $out,%rax # pass key schedule
1001 call _bsaes_key_convert
1002 pxor %xmm6,%xmm7 # fix up last round key
1003 movdqa %xmm7,(%rax) # save last round key
1004 ret
1005.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
1006
1007.globl bsaes_encrypt_128
1008.type bsaes_encrypt_128,\@function,4
1009.align 16
1010bsaes_encrypt_128:
1011.Lenc128_loop:
1012 movdqu 0x00($inp), @XMM[0] # load input
1013 movdqu 0x10($inp), @XMM[1]
1014 movdqu 0x20($inp), @XMM[2]
1015 movdqu 0x30($inp), @XMM[3]
1016 movdqu 0x40($inp), @XMM[4]
1017 movdqu 0x50($inp), @XMM[5]
1018 movdqu 0x60($inp), @XMM[6]
1019 movdqu 0x70($inp), @XMM[7]
1020 mov $key, %rax # pass the $key
1021 lea 0x80($inp), $inp
1022 mov \$10,%r10d
1023
1024 call _bsaes_encrypt8
1025
1026 movdqu @XMM[0], 0x00($out) # write output
1027 movdqu @XMM[1], 0x10($out)
1028 movdqu @XMM[4], 0x20($out)
1029 movdqu @XMM[6], 0x30($out)
1030 movdqu @XMM[3], 0x40($out)
1031 movdqu @XMM[7], 0x50($out)
1032 movdqu @XMM[2], 0x60($out)
1033 movdqu @XMM[5], 0x70($out)
1034 lea 0x80($out), $out
1035 sub \$0x80,$len
1036 ja .Lenc128_loop
1037 ret
1038.size bsaes_encrypt_128,.-bsaes_encrypt_128
1039
1040.globl bsaes_dec_key_convert
1041.type bsaes_dec_key_convert,\@function,2
1042.align 16
1043bsaes_dec_key_convert:
1044 mov 240($inp),%r10d # pass rounds
1045 mov $inp,%rcx # pass key
1046 mov $out,%rax # pass key schedule
1047 call _bsaes_key_convert
1048 pxor ($out),%xmm7 # fix up round 0 key
1049 movdqa %xmm6,(%rax) # save last round key
1050 movdqa %xmm7,($out)
1051 ret
1052.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
1053
1054.globl bsaes_decrypt_128
1055.type bsaes_decrypt_128,\@function,4
1056.align 16
1057bsaes_decrypt_128:
1058.Ldec128_loop:
1059 movdqu 0x00($inp), @XMM[0] # load input
1060 movdqu 0x10($inp), @XMM[1]
1061 movdqu 0x20($inp), @XMM[2]
1062 movdqu 0x30($inp), @XMM[3]
1063 movdqu 0x40($inp), @XMM[4]
1064 movdqu 0x50($inp), @XMM[5]
1065 movdqu 0x60($inp), @XMM[6]
1066 movdqu 0x70($inp), @XMM[7]
1067 mov $key, %rax # pass the $key
1068 lea 0x80($inp), $inp
1069 mov \$10,%r10d
1070
1071 call _bsaes_decrypt8
1072
1073 movdqu @XMM[0], 0x00($out) # write output
1074 movdqu @XMM[1], 0x10($out)
1075 movdqu @XMM[6], 0x20($out)
1076 movdqu @XMM[4], 0x30($out)
1077 movdqu @XMM[2], 0x40($out)
1078 movdqu @XMM[7], 0x50($out)
1079 movdqu @XMM[3], 0x60($out)
1080 movdqu @XMM[5], 0x70($out)
1081 lea 0x80($out), $out
1082 sub \$0x80,$len
1083 ja .Ldec128_loop
1084 ret
1085.size bsaes_decrypt_128,.-bsaes_decrypt_128
1086___
1087}
1088{
1089######################################################################
1090#
1091# OpenSSL interface
1092#
1093my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d")
1094 : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
1095my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15");
1096
1097if ($ecb) {
1098$code.=<<___;
1099.globl bsaes_ecb_encrypt_blocks
1100.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent
1101.align 16
1102bsaes_ecb_encrypt_blocks:
1103 mov %rsp, %rax
1104.Lecb_enc_prologue:
1105 push %rbp
1106 push %rbx
1107 push %r12
1108 push %r13
1109 push %r14
1110 push %r15
1111 lea -0x48(%rsp),%rsp
1112___
1113$code.=<<___ if ($win64);
1114 lea -0xa0(%rsp), %rsp
1115 movaps %xmm6, 0x40(%rsp)
1116 movaps %xmm7, 0x50(%rsp)
1117 movaps %xmm8, 0x60(%rsp)
1118 movaps %xmm9, 0x70(%rsp)
1119 movaps %xmm10, 0x80(%rsp)
1120 movaps %xmm11, 0x90(%rsp)
1121 movaps %xmm12, 0xa0(%rsp)
1122 movaps %xmm13, 0xb0(%rsp)
1123 movaps %xmm14, 0xc0(%rsp)
1124 movaps %xmm15, 0xd0(%rsp)
1125.Lecb_enc_body:
1126___
1127$code.=<<___;
1128 mov %rsp,%rbp # backup %rsp
1129 mov 240($arg4),%eax # rounds
1130 mov $arg1,$inp # backup arguments
1131 mov $arg2,$out
1132 mov $arg3,$len
1133 mov $arg4,$key
1134 cmp \$8,$arg3
1135 jb .Lecb_enc_short
1136
1137 mov %eax,%ebx # backup rounds
1138 shl \$7,%rax # 128 bytes per inner round key
1139 sub \$`128-32`,%rax # size of bit-sliced key schedule
1140 sub %rax,%rsp
1141 mov %rsp,%rax # pass key schedule
1142 mov $key,%rcx # pass key
1143 mov %ebx,%r10d # pass rounds
1144 call _bsaes_key_convert
1145 pxor %xmm6,%xmm7 # fix up last round key
1146 movdqa %xmm7,(%rax) # save last round key
1147
1148 sub \$8,$len
1149.Lecb_enc_loop:
1150 movdqu 0x00($inp), @XMM[0] # load input
1151 movdqu 0x10($inp), @XMM[1]
1152 movdqu 0x20($inp), @XMM[2]
1153 movdqu 0x30($inp), @XMM[3]
1154 movdqu 0x40($inp), @XMM[4]
1155 movdqu 0x50($inp), @XMM[5]
1156 mov %rsp, %rax # pass key schedule
1157 movdqu 0x60($inp), @XMM[6]
1158 mov %ebx,%r10d # pass rounds
1159 movdqu 0x70($inp), @XMM[7]
1160 lea 0x80($inp), $inp
1161
1162 call _bsaes_encrypt8
1163
1164 movdqu @XMM[0], 0x00($out) # write output
1165 movdqu @XMM[1], 0x10($out)
1166 movdqu @XMM[4], 0x20($out)
1167 movdqu @XMM[6], 0x30($out)
1168 movdqu @XMM[3], 0x40($out)
1169 movdqu @XMM[7], 0x50($out)
1170 movdqu @XMM[2], 0x60($out)
1171 movdqu @XMM[5], 0x70($out)
1172 lea 0x80($out), $out
1173 sub \$8,$len
1174 jnc .Lecb_enc_loop
1175
1176 add \$8,$len
1177 jz .Lecb_enc_done
1178
1179 movdqu 0x00($inp), @XMM[0] # load input
1180 mov %rsp, %rax # pass key schedule
1181 mov %ebx,%r10d # pass rounds
1182 cmp \$2,$len
1183 jb .Lecb_enc_one
1184 movdqu 0x10($inp), @XMM[1]
1185 je .Lecb_enc_two
1186 movdqu 0x20($inp), @XMM[2]
1187 cmp \$4,$len
1188 jb .Lecb_enc_three
1189 movdqu 0x30($inp), @XMM[3]
1190 je .Lecb_enc_four
1191 movdqu 0x40($inp), @XMM[4]
1192 cmp \$6,$len
1193 jb .Lecb_enc_five
1194 movdqu 0x50($inp), @XMM[5]
1195 je .Lecb_enc_six
1196 movdqu 0x60($inp), @XMM[6]
1197 call _bsaes_encrypt8
1198 movdqu @XMM[0], 0x00($out) # write output
1199 movdqu @XMM[1], 0x10($out)
1200 movdqu @XMM[4], 0x20($out)
1201 movdqu @XMM[6], 0x30($out)
1202 movdqu @XMM[3], 0x40($out)
1203 movdqu @XMM[7], 0x50($out)
1204 movdqu @XMM[2], 0x60($out)
1205 jmp .Lecb_enc_done
1206.align 16
1207.Lecb_enc_six:
1208 call _bsaes_encrypt8
1209 movdqu @XMM[0], 0x00($out) # write output
1210 movdqu @XMM[1], 0x10($out)
1211 movdqu @XMM[4], 0x20($out)
1212 movdqu @XMM[6], 0x30($out)
1213 movdqu @XMM[3], 0x40($out)
1214 movdqu @XMM[7], 0x50($out)
1215 jmp .Lecb_enc_done
1216.align 16
1217.Lecb_enc_five:
1218 call _bsaes_encrypt8
1219 movdqu @XMM[0], 0x00($out) # write output
1220 movdqu @XMM[1], 0x10($out)
1221 movdqu @XMM[4], 0x20($out)
1222 movdqu @XMM[6], 0x30($out)
1223 movdqu @XMM[3], 0x40($out)
1224 jmp .Lecb_enc_done
1225.align 16
1226.Lecb_enc_four:
1227 call _bsaes_encrypt8
1228 movdqu @XMM[0], 0x00($out) # write output
1229 movdqu @XMM[1], 0x10($out)
1230 movdqu @XMM[4], 0x20($out)
1231 movdqu @XMM[6], 0x30($out)
1232 jmp .Lecb_enc_done
1233.align 16
1234.Lecb_enc_three:
1235 call _bsaes_encrypt8
1236 movdqu @XMM[0], 0x00($out) # write output
1237 movdqu @XMM[1], 0x10($out)
1238 movdqu @XMM[4], 0x20($out)
1239 jmp .Lecb_enc_done
1240.align 16
1241.Lecb_enc_two:
1242 call _bsaes_encrypt8
1243 movdqu @XMM[0], 0x00($out) # write output
1244 movdqu @XMM[1], 0x10($out)
1245 jmp .Lecb_enc_done
1246.align 16
1247.Lecb_enc_one:
1248 call _bsaes_encrypt8
1249 movdqu @XMM[0], 0x00($out) # write output
1250 jmp .Lecb_enc_done
1251.align 16
1252.Lecb_enc_short:
1253 lea ($inp), $arg1
1254 lea ($out), $arg2
1255 lea ($key), $arg3
1256 call asm_AES_encrypt
1257 lea 16($inp), $inp
1258 lea 16($out), $out
1259 dec $len
1260 jnz .Lecb_enc_short
1261
1262.Lecb_enc_done:
1263 lea (%rsp),%rax
1264 pxor %xmm0, %xmm0
1265.Lecb_enc_bzero: # wipe key schedule [if any]
1266 movdqa %xmm0, 0x00(%rax)
1267 movdqa %xmm0, 0x10(%rax)
1268 lea 0x20(%rax), %rax
1269 cmp %rax, %rbp
1270 jb .Lecb_enc_bzero
1271
1272 lea (%rbp),%rsp # restore %rsp
1273___
1274$code.=<<___ if ($win64);
1275 movaps 0x40(%rbp), %xmm6
1276 movaps 0x50(%rbp), %xmm7
1277 movaps 0x60(%rbp), %xmm8
1278 movaps 0x70(%rbp), %xmm9
1279 movaps 0x80(%rbp), %xmm10
1280 movaps 0x90(%rbp), %xmm11
1281 movaps 0xa0(%rbp), %xmm12
1282 movaps 0xb0(%rbp), %xmm13
1283 movaps 0xc0(%rbp), %xmm14
1284 movaps 0xd0(%rbp), %xmm15
1285 lea 0xa0(%rbp), %rsp
1286___
1287$code.=<<___;
1288 mov 0x48(%rsp), %r15
1289 mov 0x50(%rsp), %r14
1290 mov 0x58(%rsp), %r13
1291 mov 0x60(%rsp), %r12
1292 mov 0x68(%rsp), %rbx
1293 mov 0x70(%rsp), %rax
1294 lea 0x78(%rsp), %rsp
1295 mov %rax, %rbp
1296.Lecb_enc_epilogue:
1297 ret
1298.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
1299
1300.globl bsaes_ecb_decrypt_blocks
1301.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent
1302.align 16
1303bsaes_ecb_decrypt_blocks:
1304 mov %rsp, %rax
1305.Lecb_dec_prologue:
1306 push %rbp
1307 push %rbx
1308 push %r12
1309 push %r13
1310 push %r14
1311 push %r15
1312 lea -0x48(%rsp),%rsp
1313___
1314$code.=<<___ if ($win64);
1315 lea -0xa0(%rsp), %rsp
1316 movaps %xmm6, 0x40(%rsp)
1317 movaps %xmm7, 0x50(%rsp)
1318 movaps %xmm8, 0x60(%rsp)
1319 movaps %xmm9, 0x70(%rsp)
1320 movaps %xmm10, 0x80(%rsp)
1321 movaps %xmm11, 0x90(%rsp)
1322 movaps %xmm12, 0xa0(%rsp)
1323 movaps %xmm13, 0xb0(%rsp)
1324 movaps %xmm14, 0xc0(%rsp)
1325 movaps %xmm15, 0xd0(%rsp)
1326.Lecb_dec_body:
1327___
1328$code.=<<___;
1329 mov %rsp,%rbp # backup %rsp
1330 mov 240($arg4),%eax # rounds
1331 mov $arg1,$inp # backup arguments
1332 mov $arg2,$out
1333 mov $arg3,$len
1334 mov $arg4,$key
1335 cmp \$8,$arg3
1336 jb .Lecb_dec_short
1337
1338 mov %eax,%ebx # backup rounds
1339 shl \$7,%rax # 128 bytes per inner round key
1340 sub \$`128-32`,%rax # size of bit-sliced key schedule
1341 sub %rax,%rsp
1342 mov %rsp,%rax # pass key schedule
1343 mov $key,%rcx # pass key
1344 mov %ebx,%r10d # pass rounds
1345 call _bsaes_key_convert
1346 pxor (%rsp),%xmm7 # fix up 0 round key
1347 movdqa %xmm6,(%rax) # save last round key
1348 movdqa %xmm7,(%rsp)
1349
1350 sub \$8,$len
1351.Lecb_dec_loop:
1352 movdqu 0x00($inp), @XMM[0] # load input
1353 movdqu 0x10($inp), @XMM[1]
1354 movdqu 0x20($inp), @XMM[2]
1355 movdqu 0x30($inp), @XMM[3]
1356 movdqu 0x40($inp), @XMM[4]
1357 movdqu 0x50($inp), @XMM[5]
1358 mov %rsp, %rax # pass key schedule
1359 movdqu 0x60($inp), @XMM[6]
1360 mov %ebx,%r10d # pass rounds
1361 movdqu 0x70($inp), @XMM[7]
1362 lea 0x80($inp), $inp
1363
1364 call _bsaes_decrypt8
1365
1366 movdqu @XMM[0], 0x00($out) # write output
1367 movdqu @XMM[1], 0x10($out)
1368 movdqu @XMM[6], 0x20($out)
1369 movdqu @XMM[4], 0x30($out)
1370 movdqu @XMM[2], 0x40($out)
1371 movdqu @XMM[7], 0x50($out)
1372 movdqu @XMM[3], 0x60($out)
1373 movdqu @XMM[5], 0x70($out)
1374 lea 0x80($out), $out
1375 sub \$8,$len
1376 jnc .Lecb_dec_loop
1377
1378 add \$8,$len
1379 jz .Lecb_dec_done
1380
1381 movdqu 0x00($inp), @XMM[0] # load input
1382 mov %rsp, %rax # pass key schedule
1383 mov %ebx,%r10d # pass rounds
1384 cmp \$2,$len
1385 jb .Lecb_dec_one
1386 movdqu 0x10($inp), @XMM[1]
1387 je .Lecb_dec_two
1388 movdqu 0x20($inp), @XMM[2]
1389 cmp \$4,$len
1390 jb .Lecb_dec_three
1391 movdqu 0x30($inp), @XMM[3]
1392 je .Lecb_dec_four
1393 movdqu 0x40($inp), @XMM[4]
1394 cmp \$6,$len
1395 jb .Lecb_dec_five
1396 movdqu 0x50($inp), @XMM[5]
1397 je .Lecb_dec_six
1398 movdqu 0x60($inp), @XMM[6]
1399 call _bsaes_decrypt8
1400 movdqu @XMM[0], 0x00($out) # write output
1401 movdqu @XMM[1], 0x10($out)
1402 movdqu @XMM[6], 0x20($out)
1403 movdqu @XMM[4], 0x30($out)
1404 movdqu @XMM[2], 0x40($out)
1405 movdqu @XMM[7], 0x50($out)
1406 movdqu @XMM[3], 0x60($out)
1407 jmp .Lecb_dec_done
1408.align 16
1409.Lecb_dec_six:
1410 call _bsaes_decrypt8
1411 movdqu @XMM[0], 0x00($out) # write output
1412 movdqu @XMM[1], 0x10($out)
1413 movdqu @XMM[6], 0x20($out)
1414 movdqu @XMM[4], 0x30($out)
1415 movdqu @XMM[2], 0x40($out)
1416 movdqu @XMM[7], 0x50($out)
1417 jmp .Lecb_dec_done
1418.align 16
1419.Lecb_dec_five:
1420 call _bsaes_decrypt8
1421 movdqu @XMM[0], 0x00($out) # write output
1422 movdqu @XMM[1], 0x10($out)
1423 movdqu @XMM[6], 0x20($out)
1424 movdqu @XMM[4], 0x30($out)
1425 movdqu @XMM[2], 0x40($out)
1426 jmp .Lecb_dec_done
1427.align 16
1428.Lecb_dec_four:
1429 call _bsaes_decrypt8
1430 movdqu @XMM[0], 0x00($out) # write output
1431 movdqu @XMM[1], 0x10($out)
1432 movdqu @XMM[6], 0x20($out)
1433 movdqu @XMM[4], 0x30($out)
1434 jmp .Lecb_dec_done
1435.align 16
1436.Lecb_dec_three:
1437 call _bsaes_decrypt8
1438 movdqu @XMM[0], 0x00($out) # write output
1439 movdqu @XMM[1], 0x10($out)
1440 movdqu @XMM[6], 0x20($out)
1441 jmp .Lecb_dec_done
1442.align 16
1443.Lecb_dec_two:
1444 call _bsaes_decrypt8
1445 movdqu @XMM[0], 0x00($out) # write output
1446 movdqu @XMM[1], 0x10($out)
1447 jmp .Lecb_dec_done
1448.align 16
1449.Lecb_dec_one:
1450 call _bsaes_decrypt8
1451 movdqu @XMM[0], 0x00($out) # write output
1452 jmp .Lecb_dec_done
1453.align 16
1454.Lecb_dec_short:
1455 lea ($inp), $arg1
1456 lea ($out), $arg2
1457 lea ($key), $arg3
1458 call asm_AES_decrypt
1459 lea 16($inp), $inp
1460 lea 16($out), $out
1461 dec $len
1462 jnz .Lecb_dec_short
1463
1464.Lecb_dec_done:
1465 lea (%rsp),%rax
1466 pxor %xmm0, %xmm0
1467.Lecb_dec_bzero: # wipe key schedule [if any]
1468 movdqa %xmm0, 0x00(%rax)
1469 movdqa %xmm0, 0x10(%rax)
1470 lea 0x20(%rax), %rax
1471 cmp %rax, %rbp
1472 jb .Lecb_dec_bzero
1473
1474 lea (%rbp),%rsp # restore %rsp
1475___
1476$code.=<<___ if ($win64);
1477 movaps 0x40(%rbp), %xmm6
1478 movaps 0x50(%rbp), %xmm7
1479 movaps 0x60(%rbp), %xmm8
1480 movaps 0x70(%rbp), %xmm9
1481 movaps 0x80(%rbp), %xmm10
1482 movaps 0x90(%rbp), %xmm11
1483 movaps 0xa0(%rbp), %xmm12
1484 movaps 0xb0(%rbp), %xmm13
1485 movaps 0xc0(%rbp), %xmm14
1486 movaps 0xd0(%rbp), %xmm15
1487 lea 0xa0(%rbp), %rsp
1488___
1489$code.=<<___;
1490 mov 0x48(%rsp), %r15
1491 mov 0x50(%rsp), %r14
1492 mov 0x58(%rsp), %r13
1493 mov 0x60(%rsp), %r12
1494 mov 0x68(%rsp), %rbx
1495 mov 0x70(%rsp), %rax
1496 lea 0x78(%rsp), %rsp
1497 mov %rax, %rbp
1498.Lecb_dec_epilogue:
1499 ret
1500.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
1501___
1502}
1503$code.=<<___;
1504.extern asm_AES_cbc_encrypt
1505.globl bsaes_cbc_encrypt
1506.type bsaes_cbc_encrypt,\@abi-omnipotent
1507.align 16
1508bsaes_cbc_encrypt:
1509___
1510$code.=<<___ if ($win64);
1511 mov 48(%rsp),$arg6 # pull direction flag
1512___
1513$code.=<<___;
1514 cmp \$0,$arg6
1515 jne asm_AES_cbc_encrypt
1516 cmp \$128,$arg3
1517 jb asm_AES_cbc_encrypt
1518
1519 mov %rsp, %rax
1520.Lcbc_dec_prologue:
1521 push %rbp
1522 push %rbx
1523 push %r12
1524 push %r13
1525 push %r14
1526 push %r15
1527 lea -0x48(%rsp), %rsp
1528___
1529$code.=<<___ if ($win64);
1530 mov 0xa0(%rsp),$arg5 # pull ivp
1531 lea -0xa0(%rsp), %rsp
1532 movaps %xmm6, 0x40(%rsp)
1533 movaps %xmm7, 0x50(%rsp)
1534 movaps %xmm8, 0x60(%rsp)
1535 movaps %xmm9, 0x70(%rsp)
1536 movaps %xmm10, 0x80(%rsp)
1537 movaps %xmm11, 0x90(%rsp)
1538 movaps %xmm12, 0xa0(%rsp)
1539 movaps %xmm13, 0xb0(%rsp)
1540 movaps %xmm14, 0xc0(%rsp)
1541 movaps %xmm15, 0xd0(%rsp)
1542.Lcbc_dec_body:
1543___
1544$code.=<<___;
1545 mov %rsp, %rbp # backup %rsp
1546 mov 240($arg4), %eax # rounds
1547 mov $arg1, $inp # backup arguments
1548 mov $arg2, $out
1549 mov $arg3, $len
1550 mov $arg4, $key
1551 mov $arg5, %rbx
1552 shr \$4, $len # bytes to blocks
1553
1554 mov %eax, %edx # rounds
1555 shl \$7, %rax # 128 bytes per inner round key
1556 sub \$`128-32`, %rax # size of bit-sliced key schedule
1557 sub %rax, %rsp
1558
1559 mov %rsp, %rax # pass key schedule
1560 mov $key, %rcx # pass key
1561 mov %edx, %r10d # pass rounds
1562 call _bsaes_key_convert
1563 pxor (%rsp),%xmm7 # fix up 0 round key
1564 movdqa %xmm6,(%rax) # save last round key
1565 movdqa %xmm7,(%rsp)
1566
1567 movdqu (%rbx), @XMM[15] # load IV
1568 sub \$8,$len
1569.Lcbc_dec_loop:
1570 movdqu 0x00($inp), @XMM[0] # load input
1571 movdqu 0x10($inp), @XMM[1]
1572 movdqu 0x20($inp), @XMM[2]
1573 movdqu 0x30($inp), @XMM[3]
1574 movdqu 0x40($inp), @XMM[4]
1575 movdqu 0x50($inp), @XMM[5]
1576 mov %rsp, %rax # pass key schedule
1577 movdqu 0x60($inp), @XMM[6]
1578 mov %edx,%r10d # pass rounds
1579 movdqu 0x70($inp), @XMM[7]
1580 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1581
1582 call _bsaes_decrypt8
1583
1584 pxor 0x20(%rbp), @XMM[0] # ^= IV
1585 movdqu 0x00($inp), @XMM[8] # re-load input
1586 movdqu 0x10($inp), @XMM[9]
1587 pxor @XMM[8], @XMM[1]
1588 movdqu 0x20($inp), @XMM[10]
1589 pxor @XMM[9], @XMM[6]
1590 movdqu 0x30($inp), @XMM[11]
1591 pxor @XMM[10], @XMM[4]
1592 movdqu 0x40($inp), @XMM[12]
1593 pxor @XMM[11], @XMM[2]
1594 movdqu 0x50($inp), @XMM[13]
1595 pxor @XMM[12], @XMM[7]
1596 movdqu 0x60($inp), @XMM[14]
1597 pxor @XMM[13], @XMM[3]
1598 movdqu 0x70($inp), @XMM[15] # IV
1599 pxor @XMM[14], @XMM[5]
1600 movdqu @XMM[0], 0x00($out) # write output
1601 lea 0x80($inp), $inp
1602 movdqu @XMM[1], 0x10($out)
1603 movdqu @XMM[6], 0x20($out)
1604 movdqu @XMM[4], 0x30($out)
1605 movdqu @XMM[2], 0x40($out)
1606 movdqu @XMM[7], 0x50($out)
1607 movdqu @XMM[3], 0x60($out)
1608 movdqu @XMM[5], 0x70($out)
1609 lea 0x80($out), $out
1610 sub \$8,$len
1611 jnc .Lcbc_dec_loop
1612
1613 add \$8,$len
1614 jz .Lcbc_dec_done
1615
1616 movdqu 0x00($inp), @XMM[0] # load input
1617 mov %rsp, %rax # pass key schedule
1618 mov %edx, %r10d # pass rounds
1619 cmp \$2,$len
1620 jb .Lcbc_dec_one
1621 movdqu 0x10($inp), @XMM[1]
1622 je .Lcbc_dec_two
1623 movdqu 0x20($inp), @XMM[2]
1624 cmp \$4,$len
1625 jb .Lcbc_dec_three
1626 movdqu 0x30($inp), @XMM[3]
1627 je .Lcbc_dec_four
1628 movdqu 0x40($inp), @XMM[4]
1629 cmp \$6,$len
1630 jb .Lcbc_dec_five
1631 movdqu 0x50($inp), @XMM[5]
1632 je .Lcbc_dec_six
1633 movdqu 0x60($inp), @XMM[6]
1634 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1635 call _bsaes_decrypt8
1636 pxor 0x20(%rbp), @XMM[0] # ^= IV
1637 movdqu 0x00($inp), @XMM[8] # re-load input
1638 movdqu 0x10($inp), @XMM[9]
1639 pxor @XMM[8], @XMM[1]
1640 movdqu 0x20($inp), @XMM[10]
1641 pxor @XMM[9], @XMM[6]
1642 movdqu 0x30($inp), @XMM[11]
1643 pxor @XMM[10], @XMM[4]
1644 movdqu 0x40($inp), @XMM[12]
1645 pxor @XMM[11], @XMM[2]
1646 movdqu 0x50($inp), @XMM[13]
1647 pxor @XMM[12], @XMM[7]
1648 movdqu 0x60($inp), @XMM[15] # IV
1649 pxor @XMM[13], @XMM[3]
1650 movdqu @XMM[0], 0x00($out) # write output
1651 movdqu @XMM[1], 0x10($out)
1652 movdqu @XMM[6], 0x20($out)
1653 movdqu @XMM[4], 0x30($out)
1654 movdqu @XMM[2], 0x40($out)
1655 movdqu @XMM[7], 0x50($out)
1656 movdqu @XMM[3], 0x60($out)
1657 jmp .Lcbc_dec_done
1658.align 16
1659.Lcbc_dec_six:
1660 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1661 call _bsaes_decrypt8
1662 pxor 0x20(%rbp), @XMM[0] # ^= IV
1663 movdqu 0x00($inp), @XMM[8] # re-load input
1664 movdqu 0x10($inp), @XMM[9]
1665 pxor @XMM[8], @XMM[1]
1666 movdqu 0x20($inp), @XMM[10]
1667 pxor @XMM[9], @XMM[6]
1668 movdqu 0x30($inp), @XMM[11]
1669 pxor @XMM[10], @XMM[4]
1670 movdqu 0x40($inp), @XMM[12]
1671 pxor @XMM[11], @XMM[2]
1672 movdqu 0x50($inp), @XMM[15] # IV
1673 pxor @XMM[12], @XMM[7]
1674 movdqu @XMM[0], 0x00($out) # write output
1675 movdqu @XMM[1], 0x10($out)
1676 movdqu @XMM[6], 0x20($out)
1677 movdqu @XMM[4], 0x30($out)
1678 movdqu @XMM[2], 0x40($out)
1679 movdqu @XMM[7], 0x50($out)
1680 jmp .Lcbc_dec_done
1681.align 16
1682.Lcbc_dec_five:
1683 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1684 call _bsaes_decrypt8
1685 pxor 0x20(%rbp), @XMM[0] # ^= IV
1686 movdqu 0x00($inp), @XMM[8] # re-load input
1687 movdqu 0x10($inp), @XMM[9]
1688 pxor @XMM[8], @XMM[1]
1689 movdqu 0x20($inp), @XMM[10]
1690 pxor @XMM[9], @XMM[6]
1691 movdqu 0x30($inp), @XMM[11]
1692 pxor @XMM[10], @XMM[4]
1693 movdqu 0x40($inp), @XMM[15] # IV
1694 pxor @XMM[11], @XMM[2]
1695 movdqu @XMM[0], 0x00($out) # write output
1696 movdqu @XMM[1], 0x10($out)
1697 movdqu @XMM[6], 0x20($out)
1698 movdqu @XMM[4], 0x30($out)
1699 movdqu @XMM[2], 0x40($out)
1700 jmp .Lcbc_dec_done
1701.align 16
1702.Lcbc_dec_four:
1703 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1704 call _bsaes_decrypt8
1705 pxor 0x20(%rbp), @XMM[0] # ^= IV
1706 movdqu 0x00($inp), @XMM[8] # re-load input
1707 movdqu 0x10($inp), @XMM[9]
1708 pxor @XMM[8], @XMM[1]
1709 movdqu 0x20($inp), @XMM[10]
1710 pxor @XMM[9], @XMM[6]
1711 movdqu 0x30($inp), @XMM[15] # IV
1712 pxor @XMM[10], @XMM[4]
1713 movdqu @XMM[0], 0x00($out) # write output
1714 movdqu @XMM[1], 0x10($out)
1715 movdqu @XMM[6], 0x20($out)
1716 movdqu @XMM[4], 0x30($out)
1717 jmp .Lcbc_dec_done
1718.align 16
1719.Lcbc_dec_three:
1720 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1721 call _bsaes_decrypt8
1722 pxor 0x20(%rbp), @XMM[0] # ^= IV
1723 movdqu 0x00($inp), @XMM[8] # re-load input
1724 movdqu 0x10($inp), @XMM[9]
1725 pxor @XMM[8], @XMM[1]
1726 movdqu 0x20($inp), @XMM[15] # IV
1727 pxor @XMM[9], @XMM[6]
1728 movdqu @XMM[0], 0x00($out) # write output
1729 movdqu @XMM[1], 0x10($out)
1730 movdqu @XMM[6], 0x20($out)
1731 jmp .Lcbc_dec_done
1732.align 16
1733.Lcbc_dec_two:
1734 movdqa @XMM[15], 0x20(%rbp) # put aside IV
1735 call _bsaes_decrypt8
1736 pxor 0x20(%rbp), @XMM[0] # ^= IV
1737 movdqu 0x00($inp), @XMM[8] # re-load input
1738 movdqu 0x10($inp), @XMM[15] # IV
1739 pxor @XMM[8], @XMM[1]
1740 movdqu @XMM[0], 0x00($out) # write output
1741 movdqu @XMM[1], 0x10($out)
1742 jmp .Lcbc_dec_done
1743.align 16
1744.Lcbc_dec_one:
1745 lea ($inp), $arg1
1746 lea 0x20(%rbp), $arg2 # buffer output
1747 lea ($key), $arg3
1748 call asm_AES_decrypt # doesn't touch %xmm
1749 pxor 0x20(%rbp), @XMM[15] # ^= IV
1750 movdqu @XMM[15], ($out) # write output
1751 movdqa @XMM[0], @XMM[15] # IV
1752
1753.Lcbc_dec_done:
1754 movdqu @XMM[15], (%rbx) # return IV
1755 lea (%rsp), %rax
1756 pxor %xmm0, %xmm0
1757.Lcbc_dec_bzero: # wipe key schedule [if any]
1758 movdqa %xmm0, 0x00(%rax)
1759 movdqa %xmm0, 0x10(%rax)
1760 lea 0x20(%rax), %rax
1761 cmp %rax, %rbp
1762 ja .Lcbc_dec_bzero
1763
1764 lea (%rbp),%rsp # restore %rsp
1765___
1766$code.=<<___ if ($win64);
1767 movaps 0x40(%rbp), %xmm6
1768 movaps 0x50(%rbp), %xmm7
1769 movaps 0x60(%rbp), %xmm8
1770 movaps 0x70(%rbp), %xmm9
1771 movaps 0x80(%rbp), %xmm10
1772 movaps 0x90(%rbp), %xmm11
1773 movaps 0xa0(%rbp), %xmm12
1774 movaps 0xb0(%rbp), %xmm13
1775 movaps 0xc0(%rbp), %xmm14
1776 movaps 0xd0(%rbp), %xmm15
1777 lea 0xa0(%rbp), %rsp
1778___
1779$code.=<<___;
1780 mov 0x48(%rsp), %r15
1781 mov 0x50(%rsp), %r14
1782 mov 0x58(%rsp), %r13
1783 mov 0x60(%rsp), %r12
1784 mov 0x68(%rsp), %rbx
1785 mov 0x70(%rsp), %rax
1786 lea 0x78(%rsp), %rsp
1787 mov %rax, %rbp
1788.Lcbc_dec_epilogue:
1789 ret
1790.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
1791
1792.globl bsaes_ctr32_encrypt_blocks
1793.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
1794.align 16
1795bsaes_ctr32_encrypt_blocks:
1796 mov %rsp, %rax
1797.Lctr_enc_prologue:
1798 push %rbp
1799 push %rbx
1800 push %r12
1801 push %r13
1802 push %r14
1803 push %r15
1804 lea -0x48(%rsp), %rsp
1805___
1806$code.=<<___ if ($win64);
1807 mov 0xa0(%rsp),$arg5 # pull ivp
1808 lea -0xa0(%rsp), %rsp
1809 movaps %xmm6, 0x40(%rsp)
1810 movaps %xmm7, 0x50(%rsp)
1811 movaps %xmm8, 0x60(%rsp)
1812 movaps %xmm9, 0x70(%rsp)
1813 movaps %xmm10, 0x80(%rsp)
1814 movaps %xmm11, 0x90(%rsp)
1815 movaps %xmm12, 0xa0(%rsp)
1816 movaps %xmm13, 0xb0(%rsp)
1817 movaps %xmm14, 0xc0(%rsp)
1818 movaps %xmm15, 0xd0(%rsp)
1819.Lctr_enc_body:
1820___
1821$code.=<<___;
1822 mov %rsp, %rbp # backup %rsp
1823 movdqu ($arg5), %xmm0 # load counter
1824 mov 240($arg4), %eax # rounds
1825 mov $arg1, $inp # backup arguments
1826 mov $arg2, $out
1827 mov $arg3, $len
1828 mov $arg4, $key
1829 movdqa %xmm0, 0x20(%rbp) # copy counter
1830 cmp \$8, $arg3
1831 jb .Lctr_enc_short
1832
1833 mov %eax, %ebx # rounds
1834 shl \$7, %rax # 128 bytes per inner round key
1835 sub \$`128-32`, %rax # size of bit-sliced key schedule
1836 sub %rax, %rsp
1837
1838 mov %rsp, %rax # pass key schedule
1839 mov $key, %rcx # pass key
1840 mov %ebx, %r10d # pass rounds
1841 call _bsaes_key_convert
1842 pxor %xmm6,%xmm7 # fix up last round key
1843 movdqa %xmm7,(%rax) # save last round key
1844
1845 movdqa (%rsp), @XMM[9] # load round0 key
1846 lea .LADD1(%rip), %r11
1847 movdqa 0x20(%rbp), @XMM[0] # counter copy
1848 movdqa -0x20(%r11), @XMM[8] # .LSWPUP
1849 pshufb @XMM[8], @XMM[9] # byte swap upper part
1850 pshufb @XMM[8], @XMM[0]
1851 movdqa @XMM[9], (%rsp) # save adjusted round0 key
1852 jmp .Lctr_enc_loop
1853.align 16
1854.Lctr_enc_loop:
1855 movdqa @XMM[0], 0x20(%rbp) # save counter
1856 movdqa @XMM[0], @XMM[1] # prepare 8 counter values
1857 movdqa @XMM[0], @XMM[2]
1858 paddd 0x00(%r11), @XMM[1] # .LADD1
1859 movdqa @XMM[0], @XMM[3]
1860 paddd 0x10(%r11), @XMM[2] # .LADD2
1861 movdqa @XMM[0], @XMM[4]
1862 paddd 0x20(%r11), @XMM[3] # .LADD3
1863 movdqa @XMM[0], @XMM[5]
1864 paddd 0x30(%r11), @XMM[4] # .LADD4
1865 movdqa @XMM[0], @XMM[6]
1866 paddd 0x40(%r11), @XMM[5] # .LADD5
1867 movdqa @XMM[0], @XMM[7]
1868 paddd 0x50(%r11), @XMM[6] # .LADD6
1869 paddd 0x60(%r11), @XMM[7] # .LADD7
1870
1871 # Borrow prologue from _bsaes_encrypt8 to use the opportunity
1872 # to flip byte order in 32-bit counter
1873 movdqa (%rsp), @XMM[9] # round 0 key
1874 lea 0x10(%rsp), %rax # pass key schedule
1875 movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR
1876 pxor @XMM[9], @XMM[0] # xor with round0 key
1877 pxor @XMM[9], @XMM[1]
1878 pshufb @XMM[8], @XMM[0]
1879 pxor @XMM[9], @XMM[2]
1880 pshufb @XMM[8], @XMM[1]
1881 pxor @XMM[9], @XMM[3]
1882 pshufb @XMM[8], @XMM[2]
1883 pxor @XMM[9], @XMM[4]
1884 pshufb @XMM[8], @XMM[3]
1885 pxor @XMM[9], @XMM[5]
1886 pshufb @XMM[8], @XMM[4]
1887 pxor @XMM[9], @XMM[6]
1888 pshufb @XMM[8], @XMM[5]
1889 pxor @XMM[9], @XMM[7]
1890 pshufb @XMM[8], @XMM[6]
1891 lea .LBS0(%rip), %r11 # constants table
1892 pshufb @XMM[8], @XMM[7]
1893 mov %ebx,%r10d # pass rounds
1894
1895 call _bsaes_encrypt8_bitslice
1896
1897 sub \$8,$len
1898 jc .Lctr_enc_loop_done
1899
1900 movdqu 0x00($inp), @XMM[8] # load input
1901 movdqu 0x10($inp), @XMM[9]
1902 movdqu 0x20($inp), @XMM[10]
1903 movdqu 0x30($inp), @XMM[11]
1904 movdqu 0x40($inp), @XMM[12]
1905 movdqu 0x50($inp), @XMM[13]
1906 movdqu 0x60($inp), @XMM[14]
1907 movdqu 0x70($inp), @XMM[15]
1908 lea 0x80($inp),$inp
1909 pxor @XMM[0], @XMM[8]
1910 movdqa 0x20(%rbp), @XMM[0] # load counter
1911 pxor @XMM[9], @XMM[1]
1912 movdqu @XMM[8], 0x00($out) # write output
1913 pxor @XMM[10], @XMM[4]
1914 movdqu @XMM[1], 0x10($out)
1915 pxor @XMM[11], @XMM[6]
1916 movdqu @XMM[4], 0x20($out)
1917 pxor @XMM[12], @XMM[3]
1918 movdqu @XMM[6], 0x30($out)
1919 pxor @XMM[13], @XMM[7]
1920 movdqu @XMM[3], 0x40($out)
1921 pxor @XMM[14], @XMM[2]
1922 movdqu @XMM[7], 0x50($out)
1923 pxor @XMM[15], @XMM[5]
1924 movdqu @XMM[2], 0x60($out)
1925 lea .LADD1(%rip), %r11
1926 movdqu @XMM[5], 0x70($out)
1927 lea 0x80($out), $out
1928 paddd 0x70(%r11), @XMM[0] # .LADD8
1929 jnz .Lctr_enc_loop
1930
1931 jmp .Lctr_enc_done
1932.align 16
1933.Lctr_enc_loop_done:
1934 add \$8, $len
1935 movdqu 0x00($inp), @XMM[8] # load input
1936 pxor @XMM[8], @XMM[0]
1937 movdqu @XMM[0], 0x00($out) # write output
1938 cmp \$2,$len
1939 jb .Lctr_enc_done
1940 movdqu 0x10($inp), @XMM[9]
1941 pxor @XMM[9], @XMM[1]
1942 movdqu @XMM[1], 0x10($out)
1943 je .Lctr_enc_done
1944 movdqu 0x20($inp), @XMM[10]
1945 pxor @XMM[10], @XMM[4]
1946 movdqu @XMM[4], 0x20($out)
1947 cmp \$4,$len
1948 jb .Lctr_enc_done
1949 movdqu 0x30($inp), @XMM[11]
1950 pxor @XMM[11], @XMM[6]
1951 movdqu @XMM[6], 0x30($out)
1952 je .Lctr_enc_done
1953 movdqu 0x40($inp), @XMM[12]
1954 pxor @XMM[12], @XMM[3]
1955 movdqu @XMM[3], 0x40($out)
1956 cmp \$6,$len
1957 jb .Lctr_enc_done
1958 movdqu 0x50($inp), @XMM[13]
1959 pxor @XMM[13], @XMM[7]
1960 movdqu @XMM[7], 0x50($out)
1961 je .Lctr_enc_done
1962 movdqu 0x60($inp), @XMM[14]
1963 pxor @XMM[14], @XMM[2]
1964 movdqu @XMM[2], 0x60($out)
1965 jmp .Lctr_enc_done
1966
1967.align 16
1968.Lctr_enc_short:
1969 lea 0x20(%rbp), $arg1
1970 lea 0x30(%rbp), $arg2
1971 lea ($key), $arg3
1972 call asm_AES_encrypt
1973 movdqu ($inp), @XMM[1]
1974 lea 16($inp), $inp
1975 mov 0x2c(%rbp), %eax # load 32-bit counter
1976 bswap %eax
1977 pxor 0x30(%rbp), @XMM[1]
1978 inc %eax # increment
1979 movdqu @XMM[1], ($out)
1980 bswap %eax
1981 lea 16($out), $out
1982 mov %eax, 0x2c(%rsp) # save 32-bit counter
1983 dec $len
1984 jnz .Lctr_enc_short
1985
1986.Lctr_enc_done:
1987 lea (%rsp), %rax
1988 pxor %xmm0, %xmm0
1989.Lctr_enc_bzero: # wipe key schedule [if any]
1990 movdqa %xmm0, 0x00(%rax)
1991 movdqa %xmm0, 0x10(%rax)
1992 lea 0x20(%rax), %rax
1993 cmp %rax, %rbp
1994 ja .Lctr_enc_bzero
1995
1996 lea (%rbp),%rsp # restore %rsp
1997___
1998$code.=<<___ if ($win64);
1999 movaps 0x40(%rbp), %xmm6
2000 movaps 0x50(%rbp), %xmm7
2001 movaps 0x60(%rbp), %xmm8
2002 movaps 0x70(%rbp), %xmm9
2003 movaps 0x80(%rbp), %xmm10
2004 movaps 0x90(%rbp), %xmm11
2005 movaps 0xa0(%rbp), %xmm12
2006 movaps 0xb0(%rbp), %xmm13
2007 movaps 0xc0(%rbp), %xmm14
2008 movaps 0xd0(%rbp), %xmm15
2009 lea 0xa0(%rbp), %rsp
2010___
2011$code.=<<___;
2012 mov 0x48(%rsp), %r15
2013 mov 0x50(%rsp), %r14
2014 mov 0x58(%rsp), %r13
2015 mov 0x60(%rsp), %r12
2016 mov 0x68(%rsp), %rbx
2017 mov 0x70(%rsp), %rax
2018 lea 0x78(%rsp), %rsp
2019 mov %rax, %rbp
2020.Lctr_enc_epilogue:
2021 ret
2022.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
2023___
2024######################################################################
2025# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
2026# const AES_KEY *key1, const AES_KEY *key2,
2027# const unsigned char iv[16]);
2028#
2029my ($twmask,$twres,$twtmp)=@XMM[13..15];
2030$code.=<<___;
2031.globl bsaes_xts_encrypt
2032.type bsaes_xts_encrypt,\@abi-omnipotent
2033.align 16
2034bsaes_xts_encrypt:
2035 mov %rsp, %rax
2036.Lxts_enc_prologue:
2037 push %rbp
2038 push %rbx
2039 push %r12
2040 push %r13
2041 push %r14
2042 push %r15
2043 lea -0x48(%rsp), %rsp
2044___
2045$code.=<<___ if ($win64);
2046 mov 0xa0(%rsp),$arg5 # pull key2
2047 mov 0xa8(%rsp),$arg6 # pull ivp
2048 lea -0xa0(%rsp), %rsp
2049 movaps %xmm6, 0x40(%rsp)
2050 movaps %xmm7, 0x50(%rsp)
2051 movaps %xmm8, 0x60(%rsp)
2052 movaps %xmm9, 0x70(%rsp)
2053 movaps %xmm10, 0x80(%rsp)
2054 movaps %xmm11, 0x90(%rsp)
2055 movaps %xmm12, 0xa0(%rsp)
2056 movaps %xmm13, 0xb0(%rsp)
2057 movaps %xmm14, 0xc0(%rsp)
2058 movaps %xmm15, 0xd0(%rsp)
2059.Lxts_enc_body:
2060___
2061$code.=<<___;
2062 mov %rsp, %rbp # backup %rsp
2063 mov $arg1, $inp # backup arguments
2064 mov $arg2, $out
2065 mov $arg3, $len
2066 mov $arg4, $key
2067
2068 lea ($arg6), $arg1
2069 lea 0x20(%rbp), $arg2
2070 lea ($arg5), $arg3
2071 call asm_AES_encrypt # generate initial tweak
2072
2073 mov 240($key), %eax # rounds
2074 mov $len, %rbx # backup $len
2075
2076 mov %eax, %edx # rounds
2077 shl \$7, %rax # 128 bytes per inner round key
2078 sub \$`128-32`, %rax # size of bit-sliced key schedule
2079 sub %rax, %rsp
2080
2081 mov %rsp, %rax # pass key schedule
2082 mov $key, %rcx # pass key
2083 mov %edx, %r10d # pass rounds
2084 call _bsaes_key_convert
2085 pxor %xmm6, %xmm7 # fix up last round key
2086 movdqa %xmm7, (%rax) # save last round key
2087
2088 and \$-16, $len
2089 sub \$0x80, %rsp # place for tweak[8]
2090 movdqa 0x20(%rbp), @XMM[7] # initial tweak
2091
2092 pxor $twtmp, $twtmp
2093 movdqa .Lxts_magic(%rip), $twmask
2094 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2095
2096 sub \$0x80, $len
2097 jc .Lxts_enc_short
2098 jmp .Lxts_enc_loop
2099
2100.align 16
2101.Lxts_enc_loop:
2102___
2103 for ($i=0;$i<7;$i++) {
2104 $code.=<<___;
2105 pshufd \$0x13, $twtmp, $twres
2106 pxor $twtmp, $twtmp
2107 movdqa @XMM[7], @XMM[$i]
2108 movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
2109 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2110 pand $twmask, $twres # isolate carry and residue
2111 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2112 pxor $twres, @XMM[7]
2113___
2114 $code.=<<___ if ($i>=1);
2115 movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
2116___
2117 $code.=<<___ if ($i>=2);
2118 pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
2119___
2120 }
2121$code.=<<___;
2122 movdqu 0x60($inp), @XMM[8+6]
2123 pxor @XMM[8+5], @XMM[5]
2124 movdqu 0x70($inp), @XMM[8+7]
2125 lea 0x80($inp), $inp
2126 movdqa @XMM[7], 0x70(%rsp)
2127 pxor @XMM[8+6], @XMM[6]
2128 lea 0x80(%rsp), %rax # pass key schedule
2129 pxor @XMM[8+7], @XMM[7]
2130 mov %edx, %r10d # pass rounds
2131
2132 call _bsaes_encrypt8
2133
2134 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2135 pxor 0x10(%rsp), @XMM[1]
2136 movdqu @XMM[0], 0x00($out) # write output
2137 pxor 0x20(%rsp), @XMM[4]
2138 movdqu @XMM[1], 0x10($out)
2139 pxor 0x30(%rsp), @XMM[6]
2140 movdqu @XMM[4], 0x20($out)
2141 pxor 0x40(%rsp), @XMM[3]
2142 movdqu @XMM[6], 0x30($out)
2143 pxor 0x50(%rsp), @XMM[7]
2144 movdqu @XMM[3], 0x40($out)
2145 pxor 0x60(%rsp), @XMM[2]
2146 movdqu @XMM[7], 0x50($out)
2147 pxor 0x70(%rsp), @XMM[5]
2148 movdqu @XMM[2], 0x60($out)
2149 movdqu @XMM[5], 0x70($out)
2150 lea 0x80($out), $out
2151
2152 movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
2153 pxor $twtmp, $twtmp
2154 movdqa .Lxts_magic(%rip), $twmask
2155 pcmpgtd @XMM[7], $twtmp
2156 pshufd \$0x13, $twtmp, $twres
2157 pxor $twtmp, $twtmp
2158 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2159 pand $twmask, $twres # isolate carry and residue
2160 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2161 pxor $twres, @XMM[7]
2162
2163 sub \$0x80,$len
2164 jnc .Lxts_enc_loop
2165
2166.Lxts_enc_short:
2167 add \$0x80, $len
2168 jz .Lxts_enc_done
2169___
2170 for ($i=0;$i<7;$i++) {
2171 $code.=<<___;
2172 pshufd \$0x13, $twtmp, $twres
2173 pxor $twtmp, $twtmp
2174 movdqa @XMM[7], @XMM[$i]
2175 movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
2176 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2177 pand $twmask, $twres # isolate carry and residue
2178 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2179 pxor $twres, @XMM[7]
2180___
2181 $code.=<<___ if ($i>=1);
2182 movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
2183 cmp \$`0x10*$i`,$len
2184 je .Lxts_enc_$i
2185___
2186 $code.=<<___ if ($i>=2);
2187 pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
2188___
2189 }
2190$code.=<<___;
2191 movdqu 0x60($inp), @XMM[8+6]
2192 pxor @XMM[8+5], @XMM[5]
2193 movdqa @XMM[7], 0x70(%rsp)
2194 lea 0x70($inp), $inp
2195 pxor @XMM[8+6], @XMM[6]
2196 lea 0x80(%rsp), %rax # pass key schedule
2197 mov %edx, %r10d # pass rounds
2198
2199 call _bsaes_encrypt8
2200
2201 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2202 pxor 0x10(%rsp), @XMM[1]
2203 movdqu @XMM[0], 0x00($out) # write output
2204 pxor 0x20(%rsp), @XMM[4]
2205 movdqu @XMM[1], 0x10($out)
2206 pxor 0x30(%rsp), @XMM[6]
2207 movdqu @XMM[4], 0x20($out)
2208 pxor 0x40(%rsp), @XMM[3]
2209 movdqu @XMM[6], 0x30($out)
2210 pxor 0x50(%rsp), @XMM[7]
2211 movdqu @XMM[3], 0x40($out)
2212 pxor 0x60(%rsp), @XMM[2]
2213 movdqu @XMM[7], 0x50($out)
2214 movdqu @XMM[2], 0x60($out)
2215 lea 0x70($out), $out
2216
2217 movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
2218 jmp .Lxts_enc_done
2219.align 16
2220.Lxts_enc_6:
2221 pxor @XMM[8+4], @XMM[4]
2222 lea 0x60($inp), $inp
2223 pxor @XMM[8+5], @XMM[5]
2224 lea 0x80(%rsp), %rax # pass key schedule
2225 mov %edx, %r10d # pass rounds
2226
2227 call _bsaes_encrypt8
2228
2229 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2230 pxor 0x10(%rsp), @XMM[1]
2231 movdqu @XMM[0], 0x00($out) # write output
2232 pxor 0x20(%rsp), @XMM[4]
2233 movdqu @XMM[1], 0x10($out)
2234 pxor 0x30(%rsp), @XMM[6]
2235 movdqu @XMM[4], 0x20($out)
2236 pxor 0x40(%rsp), @XMM[3]
2237 movdqu @XMM[6], 0x30($out)
2238 pxor 0x50(%rsp), @XMM[7]
2239 movdqu @XMM[3], 0x40($out)
2240 movdqu @XMM[7], 0x50($out)
2241 lea 0x60($out), $out
2242
2243 movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
2244 jmp .Lxts_enc_done
2245.align 16
2246.Lxts_enc_5:
2247 pxor @XMM[8+3], @XMM[3]
2248 lea 0x50($inp), $inp
2249 pxor @XMM[8+4], @XMM[4]
2250 lea 0x80(%rsp), %rax # pass key schedule
2251 mov %edx, %r10d # pass rounds
2252
2253 call _bsaes_encrypt8
2254
2255 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2256 pxor 0x10(%rsp), @XMM[1]
2257 movdqu @XMM[0], 0x00($out) # write output
2258 pxor 0x20(%rsp), @XMM[4]
2259 movdqu @XMM[1], 0x10($out)
2260 pxor 0x30(%rsp), @XMM[6]
2261 movdqu @XMM[4], 0x20($out)
2262 pxor 0x40(%rsp), @XMM[3]
2263 movdqu @XMM[6], 0x30($out)
2264 movdqu @XMM[3], 0x40($out)
2265 lea 0x50($out), $out
2266
2267 movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
2268 jmp .Lxts_enc_done
2269.align 16
2270.Lxts_enc_4:
2271 pxor @XMM[8+2], @XMM[2]
2272 lea 0x40($inp), $inp
2273 pxor @XMM[8+3], @XMM[3]
2274 lea 0x80(%rsp), %rax # pass key schedule
2275 mov %edx, %r10d # pass rounds
2276
2277 call _bsaes_encrypt8
2278
2279 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2280 pxor 0x10(%rsp), @XMM[1]
2281 movdqu @XMM[0], 0x00($out) # write output
2282 pxor 0x20(%rsp), @XMM[4]
2283 movdqu @XMM[1], 0x10($out)
2284 pxor 0x30(%rsp), @XMM[6]
2285 movdqu @XMM[4], 0x20($out)
2286 movdqu @XMM[6], 0x30($out)
2287 lea 0x40($out), $out
2288
2289 movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
2290 jmp .Lxts_enc_done
2291.align 16
2292.Lxts_enc_3:
2293 pxor @XMM[8+1], @XMM[1]
2294 lea 0x30($inp), $inp
2295 pxor @XMM[8+2], @XMM[2]
2296 lea 0x80(%rsp), %rax # pass key schedule
2297 mov %edx, %r10d # pass rounds
2298
2299 call _bsaes_encrypt8
2300
2301 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2302 pxor 0x10(%rsp), @XMM[1]
2303 movdqu @XMM[0], 0x00($out) # write output
2304 pxor 0x20(%rsp), @XMM[4]
2305 movdqu @XMM[1], 0x10($out)
2306 movdqu @XMM[4], 0x20($out)
2307 lea 0x30($out), $out
2308
2309 movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
2310 jmp .Lxts_enc_done
2311.align 16
2312.Lxts_enc_2:
2313 pxor @XMM[8+0], @XMM[0]
2314 lea 0x20($inp), $inp
2315 pxor @XMM[8+1], @XMM[1]
2316 lea 0x80(%rsp), %rax # pass key schedule
2317 mov %edx, %r10d # pass rounds
2318
2319 call _bsaes_encrypt8
2320
2321 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2322 pxor 0x10(%rsp), @XMM[1]
2323 movdqu @XMM[0], 0x00($out) # write output
2324 movdqu @XMM[1], 0x10($out)
2325 lea 0x20($out), $out
2326
2327 movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
2328 jmp .Lxts_enc_done
2329.align 16
2330.Lxts_enc_1:
2331 pxor @XMM[0], @XMM[8]
2332 lea 0x10($inp), $inp
2333 movdqa @XMM[8], 0x20(%rbp)
2334 lea 0x20(%rbp), $arg1
2335 lea 0x20(%rbp), $arg2
2336 lea ($key), $arg3
2337 call asm_AES_encrypt # doesn't touch %xmm
2338 pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
2339 #pxor @XMM[8], @XMM[0]
2340 #lea 0x80(%rsp), %rax # pass key schedule
2341 #mov %edx, %r10d # pass rounds
2342 #call _bsaes_encrypt8
2343 #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2344 movdqu @XMM[0], 0x00($out) # write output
2345 lea 0x10($out), $out
2346
2347 movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
2348
2349.Lxts_enc_done:
2350 and \$15, %ebx
2351 jz .Lxts_enc_ret
2352 mov $out, %rdx
2353
2354.Lxts_enc_steal:
2355 movzb ($inp), %eax
2356 movzb -16(%rdx), %ecx
2357 lea 1($inp), $inp
2358 mov %al, -16(%rdx)
2359 mov %cl, 0(%rdx)
2360 lea 1(%rdx), %rdx
2361 sub \$1,%ebx
2362 jnz .Lxts_enc_steal
2363
2364 movdqu -16($out), @XMM[0]
2365 lea 0x20(%rbp), $arg1
2366 pxor @XMM[7], @XMM[0]
2367 lea 0x20(%rbp), $arg2
2368 movdqa @XMM[0], 0x20(%rbp)
2369 lea ($key), $arg3
2370 call asm_AES_encrypt # doesn't touch %xmm
2371 pxor 0x20(%rbp), @XMM[7]
2372 movdqu @XMM[7], -16($out)
2373
2374.Lxts_enc_ret:
2375 lea (%rsp), %rax
2376 pxor %xmm0, %xmm0
2377.Lxts_enc_bzero: # wipe key schedule [if any]
2378 movdqa %xmm0, 0x00(%rax)
2379 movdqa %xmm0, 0x10(%rax)
2380 lea 0x20(%rax), %rax
2381 cmp %rax, %rbp
2382 ja .Lxts_enc_bzero
2383
2384 lea (%rbp),%rsp # restore %rsp
2385___
2386$code.=<<___ if ($win64);
2387 movaps 0x40(%rbp), %xmm6
2388 movaps 0x50(%rbp), %xmm7
2389 movaps 0x60(%rbp), %xmm8
2390 movaps 0x70(%rbp), %xmm9
2391 movaps 0x80(%rbp), %xmm10
2392 movaps 0x90(%rbp), %xmm11
2393 movaps 0xa0(%rbp), %xmm12
2394 movaps 0xb0(%rbp), %xmm13
2395 movaps 0xc0(%rbp), %xmm14
2396 movaps 0xd0(%rbp), %xmm15
2397 lea 0xa0(%rbp), %rsp
2398___
2399$code.=<<___;
2400 mov 0x48(%rsp), %r15
2401 mov 0x50(%rsp), %r14
2402 mov 0x58(%rsp), %r13
2403 mov 0x60(%rsp), %r12
2404 mov 0x68(%rsp), %rbx
2405 mov 0x70(%rsp), %rax
2406 lea 0x78(%rsp), %rsp
2407 mov %rax, %rbp
2408.Lxts_enc_epilogue:
2409 ret
2410.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
2411
2412.globl bsaes_xts_decrypt
2413.type bsaes_xts_decrypt,\@abi-omnipotent
2414.align 16
2415bsaes_xts_decrypt:
2416 mov %rsp, %rax
2417.Lxts_dec_prologue:
2418 push %rbp
2419 push %rbx
2420 push %r12
2421 push %r13
2422 push %r14
2423 push %r15
2424 lea -0x48(%rsp), %rsp
2425___
2426$code.=<<___ if ($win64);
2427 mov 0xa0(%rsp),$arg5 # pull key2
2428 mov 0xa8(%rsp),$arg6 # pull ivp
2429 lea -0xa0(%rsp), %rsp
2430 movaps %xmm6, 0x40(%rsp)
2431 movaps %xmm7, 0x50(%rsp)
2432 movaps %xmm8, 0x60(%rsp)
2433 movaps %xmm9, 0x70(%rsp)
2434 movaps %xmm10, 0x80(%rsp)
2435 movaps %xmm11, 0x90(%rsp)
2436 movaps %xmm12, 0xa0(%rsp)
2437 movaps %xmm13, 0xb0(%rsp)
2438 movaps %xmm14, 0xc0(%rsp)
2439 movaps %xmm15, 0xd0(%rsp)
2440.Lxts_dec_body:
2441___
2442$code.=<<___;
2443 mov %rsp, %rbp # backup %rsp
2444 mov $arg1, $inp # backup arguments
2445 mov $arg2, $out
2446 mov $arg3, $len
2447 mov $arg4, $key
2448
2449 lea ($arg6), $arg1
2450 lea 0x20(%rbp), $arg2
2451 lea ($arg5), $arg3
2452 call asm_AES_encrypt # generate initial tweak
2453
2454 mov 240($key), %eax # rounds
2455 mov $len, %rbx # backup $len
2456
2457 mov %eax, %edx # rounds
2458 shl \$7, %rax # 128 bytes per inner round key
2459 sub \$`128-32`, %rax # size of bit-sliced key schedule
2460 sub %rax, %rsp
2461
2462 mov %rsp, %rax # pass key schedule
2463 mov $key, %rcx # pass key
2464 mov %edx, %r10d # pass rounds
2465 call _bsaes_key_convert
2466 pxor (%rsp), %xmm7 # fix up round 0 key
2467 movdqa %xmm6, (%rax) # save last round key
2468 movdqa %xmm7, (%rsp)
2469
2470 xor %eax, %eax # if ($len%16) len-=16;
2471 and \$-16, $len
2472 test \$15, %ebx
2473 setnz %al
2474 shl \$4, %rax
2475 sub %rax, $len
2476
2477 sub \$0x80, %rsp # place for tweak[8]
2478 movdqa 0x20(%rbp), @XMM[7] # initial tweak
2479
2480 pxor $twtmp, $twtmp
2481 movdqa .Lxts_magic(%rip), $twmask
2482 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2483
2484 sub \$0x80, $len
2485 jc .Lxts_dec_short
2486 jmp .Lxts_dec_loop
2487
2488.align 16
2489.Lxts_dec_loop:
2490___
2491 for ($i=0;$i<7;$i++) {
2492 $code.=<<___;
2493 pshufd \$0x13, $twtmp, $twres
2494 pxor $twtmp, $twtmp
2495 movdqa @XMM[7], @XMM[$i]
2496 movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
2497 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2498 pand $twmask, $twres # isolate carry and residue
2499 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2500 pxor $twres, @XMM[7]
2501___
2502 $code.=<<___ if ($i>=1);
2503 movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
2504___
2505 $code.=<<___ if ($i>=2);
2506 pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
2507___
2508 }
2509$code.=<<___;
2510 movdqu 0x60($inp), @XMM[8+6]
2511 pxor @XMM[8+5], @XMM[5]
2512 movdqu 0x70($inp), @XMM[8+7]
2513 lea 0x80($inp), $inp
2514 movdqa @XMM[7], 0x70(%rsp)
2515 pxor @XMM[8+6], @XMM[6]
2516 lea 0x80(%rsp), %rax # pass key schedule
2517 pxor @XMM[8+7], @XMM[7]
2518 mov %edx, %r10d # pass rounds
2519
2520 call _bsaes_decrypt8
2521
2522 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2523 pxor 0x10(%rsp), @XMM[1]
2524 movdqu @XMM[0], 0x00($out) # write output
2525 pxor 0x20(%rsp), @XMM[6]
2526 movdqu @XMM[1], 0x10($out)
2527 pxor 0x30(%rsp), @XMM[4]
2528 movdqu @XMM[6], 0x20($out)
2529 pxor 0x40(%rsp), @XMM[2]
2530 movdqu @XMM[4], 0x30($out)
2531 pxor 0x50(%rsp), @XMM[7]
2532 movdqu @XMM[2], 0x40($out)
2533 pxor 0x60(%rsp), @XMM[3]
2534 movdqu @XMM[7], 0x50($out)
2535 pxor 0x70(%rsp), @XMM[5]
2536 movdqu @XMM[3], 0x60($out)
2537 movdqu @XMM[5], 0x70($out)
2538 lea 0x80($out), $out
2539
2540 movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
2541 pxor $twtmp, $twtmp
2542 movdqa .Lxts_magic(%rip), $twmask
2543 pcmpgtd @XMM[7], $twtmp
2544 pshufd \$0x13, $twtmp, $twres
2545 pxor $twtmp, $twtmp
2546 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2547 pand $twmask, $twres # isolate carry and residue
2548 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2549 pxor $twres, @XMM[7]
2550
2551 sub \$0x80,$len
2552 jnc .Lxts_dec_loop
2553
2554.Lxts_dec_short:
2555 add \$0x80, $len
2556 jz .Lxts_dec_done
2557___
2558 for ($i=0;$i<7;$i++) {
2559 $code.=<<___;
2560 pshufd \$0x13, $twtmp, $twres
2561 pxor $twtmp, $twtmp
2562 movdqa @XMM[7], @XMM[$i]
2563 movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
2564 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2565 pand $twmask, $twres # isolate carry and residue
2566 pcmpgtd @XMM[7], $twtmp # broadcast upper bits
2567 pxor $twres, @XMM[7]
2568___
2569 $code.=<<___ if ($i>=1);
2570 movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
2571 cmp \$`0x10*$i`,$len
2572 je .Lxts_dec_$i
2573___
2574 $code.=<<___ if ($i>=2);
2575 pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
2576___
2577 }
2578$code.=<<___;
2579 movdqu 0x60($inp), @XMM[8+6]
2580 pxor @XMM[8+5], @XMM[5]
2581 movdqa @XMM[7], 0x70(%rsp)
2582 lea 0x70($inp), $inp
2583 pxor @XMM[8+6], @XMM[6]
2584 lea 0x80(%rsp), %rax # pass key schedule
2585 mov %edx, %r10d # pass rounds
2586
2587 call _bsaes_decrypt8
2588
2589 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2590 pxor 0x10(%rsp), @XMM[1]
2591 movdqu @XMM[0], 0x00($out) # write output
2592 pxor 0x20(%rsp), @XMM[6]
2593 movdqu @XMM[1], 0x10($out)
2594 pxor 0x30(%rsp), @XMM[4]
2595 movdqu @XMM[6], 0x20($out)
2596 pxor 0x40(%rsp), @XMM[2]
2597 movdqu @XMM[4], 0x30($out)
2598 pxor 0x50(%rsp), @XMM[7]
2599 movdqu @XMM[2], 0x40($out)
2600 pxor 0x60(%rsp), @XMM[3]
2601 movdqu @XMM[7], 0x50($out)
2602 movdqu @XMM[3], 0x60($out)
2603 lea 0x70($out), $out
2604
2605 movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
2606 jmp .Lxts_dec_done
2607.align 16
2608.Lxts_dec_6:
2609 pxor @XMM[8+4], @XMM[4]
2610 lea 0x60($inp), $inp
2611 pxor @XMM[8+5], @XMM[5]
2612 lea 0x80(%rsp), %rax # pass key schedule
2613 mov %edx, %r10d # pass rounds
2614
2615 call _bsaes_decrypt8
2616
2617 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2618 pxor 0x10(%rsp), @XMM[1]
2619 movdqu @XMM[0], 0x00($out) # write output
2620 pxor 0x20(%rsp), @XMM[6]
2621 movdqu @XMM[1], 0x10($out)
2622 pxor 0x30(%rsp), @XMM[4]
2623 movdqu @XMM[6], 0x20($out)
2624 pxor 0x40(%rsp), @XMM[2]
2625 movdqu @XMM[4], 0x30($out)
2626 pxor 0x50(%rsp), @XMM[7]
2627 movdqu @XMM[2], 0x40($out)
2628 movdqu @XMM[7], 0x50($out)
2629 lea 0x60($out), $out
2630
2631 movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
2632 jmp .Lxts_dec_done
2633.align 16
2634.Lxts_dec_5:
2635 pxor @XMM[8+3], @XMM[3]
2636 lea 0x50($inp), $inp
2637 pxor @XMM[8+4], @XMM[4]
2638 lea 0x80(%rsp), %rax # pass key schedule
2639 mov %edx, %r10d # pass rounds
2640
2641 call _bsaes_decrypt8
2642
2643 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2644 pxor 0x10(%rsp), @XMM[1]
2645 movdqu @XMM[0], 0x00($out) # write output
2646 pxor 0x20(%rsp), @XMM[6]
2647 movdqu @XMM[1], 0x10($out)
2648 pxor 0x30(%rsp), @XMM[4]
2649 movdqu @XMM[6], 0x20($out)
2650 pxor 0x40(%rsp), @XMM[2]
2651 movdqu @XMM[4], 0x30($out)
2652 movdqu @XMM[2], 0x40($out)
2653 lea 0x50($out), $out
2654
2655 movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
2656 jmp .Lxts_dec_done
2657.align 16
2658.Lxts_dec_4:
2659 pxor @XMM[8+2], @XMM[2]
2660 lea 0x40($inp), $inp
2661 pxor @XMM[8+3], @XMM[3]
2662 lea 0x80(%rsp), %rax # pass key schedule
2663 mov %edx, %r10d # pass rounds
2664
2665 call _bsaes_decrypt8
2666
2667 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2668 pxor 0x10(%rsp), @XMM[1]
2669 movdqu @XMM[0], 0x00($out) # write output
2670 pxor 0x20(%rsp), @XMM[6]
2671 movdqu @XMM[1], 0x10($out)
2672 pxor 0x30(%rsp), @XMM[4]
2673 movdqu @XMM[6], 0x20($out)
2674 movdqu @XMM[4], 0x30($out)
2675 lea 0x40($out), $out
2676
2677 movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
2678 jmp .Lxts_dec_done
2679.align 16
2680.Lxts_dec_3:
2681 pxor @XMM[8+1], @XMM[1]
2682 lea 0x30($inp), $inp
2683 pxor @XMM[8+2], @XMM[2]
2684 lea 0x80(%rsp), %rax # pass key schedule
2685 mov %edx, %r10d # pass rounds
2686
2687 call _bsaes_decrypt8
2688
2689 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2690 pxor 0x10(%rsp), @XMM[1]
2691 movdqu @XMM[0], 0x00($out) # write output
2692 pxor 0x20(%rsp), @XMM[6]
2693 movdqu @XMM[1], 0x10($out)
2694 movdqu @XMM[6], 0x20($out)
2695 lea 0x30($out), $out
2696
2697 movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
2698 jmp .Lxts_dec_done
2699.align 16
2700.Lxts_dec_2:
2701 pxor @XMM[8+0], @XMM[0]
2702 lea 0x20($inp), $inp
2703 pxor @XMM[8+1], @XMM[1]
2704 lea 0x80(%rsp), %rax # pass key schedule
2705 mov %edx, %r10d # pass rounds
2706
2707 call _bsaes_decrypt8
2708
2709 pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2710 pxor 0x10(%rsp), @XMM[1]
2711 movdqu @XMM[0], 0x00($out) # write output
2712 movdqu @XMM[1], 0x10($out)
2713 lea 0x20($out), $out
2714
2715 movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
2716 jmp .Lxts_dec_done
2717.align 16
2718.Lxts_dec_1:
2719 pxor @XMM[0], @XMM[8]
2720 lea 0x10($inp), $inp
2721 movdqa @XMM[8], 0x20(%rbp)
2722 lea 0x20(%rbp), $arg1
2723 lea 0x20(%rbp), $arg2
2724 lea ($key), $arg3
2725 call asm_AES_decrypt # doesn't touch %xmm
2726 pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
2727 #pxor @XMM[8], @XMM[0]
2728 #lea 0x80(%rsp), %rax # pass key schedule
2729 #mov %edx, %r10d # pass rounds
2730 #call _bsaes_decrypt8
2731 #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
2732 movdqu @XMM[0], 0x00($out) # write output
2733 lea 0x10($out), $out
2734
2735 movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
2736
2737.Lxts_dec_done:
2738 and \$15, %ebx
2739 jz .Lxts_dec_ret
2740
2741 pxor $twtmp, $twtmp
2742 movdqa .Lxts_magic(%rip), $twmask
2743 pcmpgtd @XMM[7], $twtmp
2744 pshufd \$0x13, $twtmp, $twres
2745 movdqa @XMM[7], @XMM[6]
2746 paddq @XMM[7], @XMM[7] # psllq 1,$tweak
2747 pand $twmask, $twres # isolate carry and residue
2748 movdqu ($inp), @XMM[0]
2749 pxor $twres, @XMM[7]
2750
2751 lea 0x20(%rbp), $arg1
2752 pxor @XMM[7], @XMM[0]
2753 lea 0x20(%rbp), $arg2
2754 movdqa @XMM[0], 0x20(%rbp)
2755 lea ($key), $arg3
2756 call asm_AES_decrypt # doesn't touch %xmm
2757 pxor 0x20(%rbp), @XMM[7]
2758 mov $out, %rdx
2759 movdqu @XMM[7], ($out)
2760
2761.Lxts_dec_steal:
2762 movzb 16($inp), %eax
2763 movzb (%rdx), %ecx
2764 lea 1($inp), $inp
2765 mov %al, (%rdx)
2766 mov %cl, 16(%rdx)
2767 lea 1(%rdx), %rdx
2768 sub \$1,%ebx
2769 jnz .Lxts_dec_steal
2770
2771 movdqu ($out), @XMM[0]
2772 lea 0x20(%rbp), $arg1
2773 pxor @XMM[6], @XMM[0]
2774 lea 0x20(%rbp), $arg2
2775 movdqa @XMM[0], 0x20(%rbp)
2776 lea ($key), $arg3
2777 call asm_AES_decrypt # doesn't touch %xmm
2778 pxor 0x20(%rbp), @XMM[6]
2779 movdqu @XMM[6], ($out)
2780
2781.Lxts_dec_ret:
2782 lea (%rsp), %rax
2783 pxor %xmm0, %xmm0
2784.Lxts_dec_bzero: # wipe key schedule [if any]
2785 movdqa %xmm0, 0x00(%rax)
2786 movdqa %xmm0, 0x10(%rax)
2787 lea 0x20(%rax), %rax
2788 cmp %rax, %rbp
2789 ja .Lxts_dec_bzero
2790
2791 lea (%rbp),%rsp # restore %rsp
2792___
2793$code.=<<___ if ($win64);
2794 movaps 0x40(%rbp), %xmm6
2795 movaps 0x50(%rbp), %xmm7
2796 movaps 0x60(%rbp), %xmm8
2797 movaps 0x70(%rbp), %xmm9
2798 movaps 0x80(%rbp), %xmm10
2799 movaps 0x90(%rbp), %xmm11
2800 movaps 0xa0(%rbp), %xmm12
2801 movaps 0xb0(%rbp), %xmm13
2802 movaps 0xc0(%rbp), %xmm14
2803 movaps 0xd0(%rbp), %xmm15
2804 lea 0xa0(%rbp), %rsp
2805___
2806$code.=<<___;
2807 mov 0x48(%rsp), %r15
2808 mov 0x50(%rsp), %r14
2809 mov 0x58(%rsp), %r13
2810 mov 0x60(%rsp), %r12
2811 mov 0x68(%rsp), %rbx
2812 mov 0x70(%rsp), %rax
2813 lea 0x78(%rsp), %rsp
2814 mov %rax, %rbp
2815.Lxts_dec_epilogue:
2816 ret
2817.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
2818___
2819}
2820$code.=<<___;
2821.type _bsaes_const,\@object
2822.align 64
2823_bsaes_const:
2824.LM0ISR: # InvShiftRows constants
2825 .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
2826.LISRM0:
2827 .quad 0x01040b0e0205080f, 0x0306090c00070a0d
2828.LISR:
2829 .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
2830.LBS0: # bit-slice constants
2831 .quad 0x5555555555555555, 0x5555555555555555
2832.LBS1:
2833 .quad 0x3333333333333333, 0x3333333333333333
2834.LBS2:
2835 .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
2836.LSR: # shiftrows constants
2837 .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
2838.LSRM0:
2839 .quad 0x0304090e00050a0f, 0x01060b0c0207080d
2840.LM0SR:
2841 .quad 0x0a0e02060f03070b, 0x0004080c05090d01
2842.LSWPUP: # byte-swap upper dword
2843 .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
2844.LSWPUPM0SR:
2845 .quad 0x0a0d02060c03070b, 0x0004080f05090e01
2846.LADD1: # counter increment constants
2847 .quad 0x0000000000000000, 0x0000000100000000
2848.LADD2:
2849 .quad 0x0000000000000000, 0x0000000200000000
2850.LADD3:
2851 .quad 0x0000000000000000, 0x0000000300000000
2852.LADD4:
2853 .quad 0x0000000000000000, 0x0000000400000000
2854.LADD5:
2855 .quad 0x0000000000000000, 0x0000000500000000
2856.LADD6:
2857 .quad 0x0000000000000000, 0x0000000600000000
2858.LADD7:
2859 .quad 0x0000000000000000, 0x0000000700000000
2860.LADD8:
2861 .quad 0x0000000000000000, 0x0000000800000000
2862.Lxts_magic:
2863 .long 0x87,0,1,0
2864.Lmasks:
2865 .quad 0x0101010101010101, 0x0101010101010101
2866 .quad 0x0202020202020202, 0x0202020202020202
2867 .quad 0x0404040404040404, 0x0404040404040404
2868 .quad 0x0808080808080808, 0x0808080808080808
2869.LM0:
2870 .quad 0x02060a0e03070b0f, 0x0004080c0105090d
2871.L63:
2872 .quad 0x6363636363636363, 0x6363636363636363
2873.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov"
2874.align 64
2875.size _bsaes_const,.-_bsaes_const
2876___
2877
2878# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
2879# CONTEXT *context,DISPATCHER_CONTEXT *disp)
2880if ($win64) {
2881$rec="%rcx";
2882$frame="%rdx";
2883$context="%r8";
2884$disp="%r9";
2885
2886$code.=<<___;
2887.extern __imp_RtlVirtualUnwind
2888.type se_handler,\@abi-omnipotent
2889.align 16
2890se_handler:
2891 push %rsi
2892 push %rdi
2893 push %rbx
2894 push %rbp
2895 push %r12
2896 push %r13
2897 push %r14
2898 push %r15
2899 pushfq
2900 sub \$64,%rsp
2901
2902 mov 120($context),%rax # pull context->Rax
2903 mov 248($context),%rbx # pull context->Rip
2904
2905 mov 8($disp),%rsi # disp->ImageBase
2906 mov 56($disp),%r11 # disp->HandlerData
2907
2908 mov 0(%r11),%r10d # HandlerData[0]
2909 lea (%rsi,%r10),%r10 # prologue label
2910 cmp %r10,%rbx # context->Rip<prologue label
2911 jb .Lin_prologue
2912
2913 mov 152($context),%rax # pull context->Rsp
2914
2915 mov 4(%r11),%r10d # HandlerData[1]
2916 lea (%rsi,%r10),%r10 # epilogue label
2917 cmp %r10,%rbx # context->Rip>=epilogue label
2918 jae .Lin_prologue
2919
2920 mov 160($context),%rax # pull context->Rbp
2921
2922 lea 0x40(%rax),%rsi # %xmm save area
2923 lea 512($context),%rdi # &context.Xmm6
2924 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
2925 .long 0xa548f3fc # cld; rep movsq
2926 lea 0xa0(%rax),%rax # adjust stack pointer
2927
2928 mov 0x70(%rax),%rbp
2929 mov 0x68(%rax),%rbx
2930 mov 0x60(%rax),%r12
2931 mov 0x58(%rax),%r13
2932 mov 0x50(%rax),%r14
2933 mov 0x48(%rax),%r15
2934 lea 0x78(%rax),%rax # adjust stack pointer
2935 mov %rbx,144($context) # restore context->Rbx
2936 mov %rbp,160($context) # restore context->Rbp
2937 mov %r12,216($context) # restore context->R12
2938 mov %r13,224($context) # restore context->R13
2939 mov %r14,232($context) # restore context->R14
2940 mov %r15,240($context) # restore context->R15
2941
2942.Lin_prologue:
2943 mov %rax,152($context) # restore context->Rsp
2944
2945 mov 40($disp),%rdi # disp->ContextRecord
2946 mov $context,%rsi # context
2947 mov \$`1232/8`,%ecx # sizeof(CONTEXT)
2948 .long 0xa548f3fc # cld; rep movsq
2949
2950 mov $disp,%rsi
2951 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
2952 mov 8(%rsi),%rdx # arg2, disp->ImageBase
2953 mov 0(%rsi),%r8 # arg3, disp->ControlPc
2954 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
2955 mov 40(%rsi),%r10 # disp->ContextRecord
2956 lea 56(%rsi),%r11 # &disp->HandlerData
2957 lea 24(%rsi),%r12 # &disp->EstablisherFrame
2958 mov %r10,32(%rsp) # arg5
2959 mov %r11,40(%rsp) # arg6
2960 mov %r12,48(%rsp) # arg7
2961 mov %rcx,56(%rsp) # arg8, (NULL)
2962 call *__imp_RtlVirtualUnwind(%rip)
2963
2964 mov \$1,%eax # ExceptionContinueSearch
2965 add \$64,%rsp
2966 popfq
2967 pop %r15
2968 pop %r14
2969 pop %r13
2970 pop %r12
2971 pop %rbp
2972 pop %rbx
2973 pop %rdi
2974 pop %rsi
2975 ret
2976.size se_handler,.-se_handler
2977
2978.section .pdata
2979.align 4
2980___
2981$code.=<<___ if ($ecb);
2982 .rva .Lecb_enc_prologue
2983 .rva .Lecb_enc_epilogue
2984 .rva .Lecb_enc_info
2985
2986 .rva .Lecb_dec_prologue
2987 .rva .Lecb_dec_epilogue
2988 .rva .Lecb_dec_info
2989___
2990$code.=<<___;
2991 .rva .Lcbc_dec_prologue
2992 .rva .Lcbc_dec_epilogue
2993 .rva .Lcbc_dec_info
2994
2995 .rva .Lctr_enc_prologue
2996 .rva .Lctr_enc_epilogue
2997 .rva .Lctr_enc_info
2998
2999 .rva .Lxts_enc_prologue
3000 .rva .Lxts_enc_epilogue
3001 .rva .Lxts_enc_info
3002
3003 .rva .Lxts_dec_prologue
3004 .rva .Lxts_dec_epilogue
3005 .rva .Lxts_dec_info
3006
3007.section .xdata
3008.align 8
3009___
3010$code.=<<___ if ($ecb);
3011.Lecb_enc_info:
3012 .byte 9,0,0,0
3013 .rva se_handler
3014 .rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[]
3015.Lecb_dec_info:
3016 .byte 9,0,0,0
3017 .rva se_handler
3018 .rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[]
3019___
3020$code.=<<___;
3021.Lcbc_dec_info:
3022 .byte 9,0,0,0
3023 .rva se_handler
3024 .rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[]
3025.Lctr_enc_info:
3026 .byte 9,0,0,0
3027 .rva se_handler
3028 .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[]
3029.Lxts_enc_info:
3030 .byte 9,0,0,0
3031 .rva se_handler
3032 .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[]
3033.Lxts_dec_info:
3034 .byte 9,0,0,0
3035 .rva se_handler
3036 .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[]
3037___
3038}
3039
3040$code =~ s/\`([^\`]*)\`/eval($1)/gem;
3041
3042print $code;
3043
3044close STDOUT;
diff --git a/src/lib/libssl/src/crypto/aes/asm/vpaes-x86.pl b/src/lib/libssl/src/crypto/aes/asm/vpaes-x86.pl
new file mode 100644
index 0000000000..1533e2c304
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/vpaes-x86.pl
@@ -0,0 +1,903 @@
1#!/usr/bin/env perl
2
3######################################################################
4## Constant-time SSSE3 AES core implementation.
5## version 0.1
6##
7## By Mike Hamburg (Stanford University), 2009
8## Public domain.
9##
10## For details see http://shiftleft.org/papers/vector_aes/ and
11## http://crypto.stanford.edu/vpaes/.
12
13######################################################################
14# September 2011.
15#
16# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for
17# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt
18# doesn't handle partial vectors (doesn't have to if called from
19# EVP only). "Drop-in" implies that this module doesn't share key
20# schedule structure with the original nor does it make assumption
21# about its alignment...
22#
23# Performance summary. aes-586.pl column lists large-block CBC
24# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
25# byte processed with 128-bit key, and vpaes-x86.pl column - [also
26# large-block CBC] encrypt/decrypt.
27#
28# aes-586.pl vpaes-x86.pl
29#
30# Core 2(**) 29.1/42.3/18.3 22.0/25.6(***)
31# Nehalem 27.9/40.4/18.1 10.3/12.0
32# Atom 102./119./60.1 64.5/85.3(***)
33#
34# (*) "Hyper-threading" in the context refers rather to cache shared
35# among multiple cores, than to specifically Intel HTT. As vast
36# majority of contemporary cores share cache, slower code path
37# is common place. In other words "with-hyper-threading-off"
38# results are presented mostly for reference purposes.
39#
40# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
41#
42# (***) Less impressive improvement on Core 2 and Atom is due to slow
43# pshufb, yet it's respectable +32%/65% improvement on Core 2
44# and +58%/40% on Atom (as implied, over "hyper-threading-safe"
45# code path).
46#
47# <appro@openssl.org>
48
49$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
50push(@INC,"${dir}","${dir}../../perlasm");
51require "x86asm.pl";
52
53&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
54
55$PREFIX="vpaes";
56
57my ($round, $base, $magic, $key, $const, $inp, $out)=
58 ("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
59
60&static_label("_vpaes_consts");
61&static_label("_vpaes_schedule_low_round");
62
63&set_label("_vpaes_consts",64);
64$k_inv=-0x30; # inv, inva
65 &data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309);
66 &data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C);
67
68$k_s0F=-0x10; # s0F
69 &data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F);
70
71$k_ipt=0x00; # input transform (lo, hi)
72 &data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090);
73 &data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC);
74
75$k_sb1=0x20; # sb1u, sb1t
76 &data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E);
77 &data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1);
78$k_sb2=0x40; # sb2u, sb2t
79 &data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955);
80 &data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8);
81$k_sbo=0x60; # sbou, sbot
82 &data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A);
83 &data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1);
84
85$k_mc_forward=0x80; # mc_forward
86 &data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D);
87 &data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201);
88 &data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605);
89 &data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09);
90
91$k_mc_backward=0xc0; # mc_backward
92 &data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F);
93 &data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B);
94 &data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407);
95 &data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003);
96
97$k_sr=0x100; # sr
98 &data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C);
99 &data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C);
100 &data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C);
101 &data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C);
102
103$k_rcon=0x140; # rcon
104 &data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808);
105
106$k_s63=0x150; # s63: all equal to 0x63 transformed
107 &data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B);
108
109$k_opt=0x160; # output transform
110 &data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121);
111 &data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1);
112
113$k_deskew=0x180; # deskew tables: inverts the sbox's "skew"
114 &data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A);
115 &data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB);
116##
117## Decryption stuff
118## Key schedule constants
119##
120$k_dksd=0x1a0; # decryption key schedule: invskew x*D
121 &data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4);
122 &data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA);
123$k_dksb=0x1c0; # decryption key schedule: invskew x*B
124 &data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386);
125 &data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F);
126$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63
127 &data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C);
128 &data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A);
129$k_dks9=0x200; # decryption key schedule: invskew x*9
130 &data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334);
131 &data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC);
132
133##
134## Decryption stuff
135## Round function constants
136##
137$k_dipt=0x220; # decryption input transform
138 &data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E);
139 &data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772);
140
141$k_dsb9=0x240; # decryption sbox output *9*u, *9*t
142 &data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50);
143 &data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E);
144$k_dsbd=0x260; # decryption sbox output *D*u, *D*t
145 &data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13);
146 &data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D);
147$k_dsbb=0x280; # decryption sbox output *B*u, *B*t
148 &data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6);
149 &data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E);
150$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
151 &data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004);
152 &data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B);
153$k_dsbo=0x2c0; # decryption sbox final output
154 &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
155 &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
156&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
157&align (64);
158
159&function_begin_B("_vpaes_preheat");
160 &add ($const,&DWP(0,"esp"));
161 &movdqa ("xmm7",&QWP($k_inv,$const));
162 &movdqa ("xmm6",&QWP($k_s0F,$const));
163 &ret ();
164&function_end_B("_vpaes_preheat");
165
166##
167## _aes_encrypt_core
168##
169## AES-encrypt %xmm0.
170##
171## Inputs:
172## %xmm0 = input
173## %xmm6-%xmm7 as in _vpaes_preheat
174## (%edx) = scheduled keys
175##
176## Output in %xmm0
177## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx
178##
179##
180&function_begin_B("_vpaes_encrypt_core");
181 &mov ($magic,16);
182 &mov ($round,&DWP(240,$key));
183 &movdqa ("xmm1","xmm6")
184 &movdqa ("xmm2",&QWP($k_ipt,$const));
185 &pandn ("xmm1","xmm0");
186 &movdqu ("xmm5",&QWP(0,$key));
187 &psrld ("xmm1",4);
188 &pand ("xmm0","xmm6");
189 &pshufb ("xmm2","xmm0");
190 &movdqa ("xmm0",&QWP($k_ipt+16,$const));
191 &pshufb ("xmm0","xmm1");
192 &pxor ("xmm2","xmm5");
193 &pxor ("xmm0","xmm2");
194 &add ($key,16);
195 &lea ($base,&DWP($k_mc_backward,$const));
196 &jmp (&label("enc_entry"));
197
198
199&set_label("enc_loop",16);
200 # middle of middle round
201 &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u
202 &pshufb ("xmm4","xmm2"); # 4 = sb1u
203 &pxor ("xmm4","xmm5"); # 4 = sb1u + k
204 &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
205 &pshufb ("xmm0","xmm3"); # 0 = sb1t
206 &pxor ("xmm0","xmm4"); # 0 = A
207 &movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u
208 &pshufb ("xmm5","xmm2"); # 4 = sb2u
209 &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
210 &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
211 &pshufb ("xmm2","xmm3"); # 2 = sb2t
212 &pxor ("xmm2","xmm5"); # 2 = 2A
213 &movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[]
214 &movdqa ("xmm3","xmm0"); # 3 = A
215 &pshufb ("xmm0","xmm1"); # 0 = B
216 &add ($key,16); # next key
217 &pxor ("xmm0","xmm2"); # 0 = 2A+B
218 &pshufb ("xmm3","xmm4"); # 3 = D
219 &add ($magic,16); # next mc
220 &pxor ("xmm3","xmm0"); # 3 = 2A+B+D
221 &pshufb ("xmm0","xmm1"); # 0 = 2B+C
222 &and ($magic,0x30); # ... mod 4
223 &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D
224 &sub ($round,1); # nr--
225
226&set_label("enc_entry");
227 # top of round
228 &movdqa ("xmm1","xmm6"); # 1 : i
229 &pandn ("xmm1","xmm0"); # 1 = i<<4
230 &psrld ("xmm1",4); # 1 = i
231 &pand ("xmm0","xmm6"); # 0 = k
232 &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
233 &pshufb ("xmm5","xmm0"); # 2 = a/k
234 &pxor ("xmm0","xmm1"); # 0 = j
235 &movdqa ("xmm3","xmm7"); # 3 : 1/i
236 &pshufb ("xmm3","xmm1"); # 3 = 1/i
237 &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k
238 &movdqa ("xmm4","xmm7"); # 4 : 1/j
239 &pshufb ("xmm4","xmm0"); # 4 = 1/j
240 &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k
241 &movdqa ("xmm2","xmm7"); # 2 : 1/iak
242 &pshufb ("xmm2","xmm3"); # 2 = 1/iak
243 &pxor ("xmm2","xmm0"); # 2 = io
244 &movdqa ("xmm3","xmm7"); # 3 : 1/jak
245 &movdqu ("xmm5",&QWP(0,$key));
246 &pshufb ("xmm3","xmm4"); # 3 = 1/jak
247 &pxor ("xmm3","xmm1"); # 3 = jo
248 &jnz (&label("enc_loop"));
249
250 # middle of last round
251 &movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo
252 &movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16
253 &pshufb ("xmm4","xmm2"); # 4 = sbou
254 &pxor ("xmm4","xmm5"); # 4 = sb1u + k
255 &pshufb ("xmm0","xmm3"); # 0 = sb1t
256 &movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[]
257 &pxor ("xmm0","xmm4"); # 0 = A
258 &pshufb ("xmm0","xmm1");
259 &ret ();
260&function_end_B("_vpaes_encrypt_core");
261
262##
263## Decryption core
264##
265## Same API as encryption core.
266##
267&function_begin_B("_vpaes_decrypt_core");
268 &mov ($round,&DWP(240,$key));
269 &lea ($base,&DWP($k_dsbd,$const));
270 &movdqa ("xmm1","xmm6");
271 &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
272 &pandn ("xmm1","xmm0");
273 &mov ($magic,$round);
274 &psrld ("xmm1",4)
275 &movdqu ("xmm5",&QWP(0,$key));
276 &shl ($magic,4);
277 &pand ("xmm0","xmm6");
278 &pshufb ("xmm2","xmm0");
279 &movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base));
280 &xor ($magic,0x30);
281 &pshufb ("xmm0","xmm1");
282 &and ($magic,0x30);
283 &pxor ("xmm2","xmm5");
284 &movdqa ("xmm5",&QWP($k_mc_forward+48,$const));
285 &pxor ("xmm0","xmm2");
286 &add ($key,16);
287 &lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic));
288 &jmp (&label("dec_entry"));
289
290&set_label("dec_loop",16);
291##
292## Inverse mix columns
293##
294 &movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u
295 &pshufb ("xmm4","xmm2"); # 4 = sb9u
296 &pxor ("xmm4","xmm0");
297 &movdqa ("xmm0",&QWP(-0x10,$base)); # 0 : sb9t
298 &pshufb ("xmm0","xmm3"); # 0 = sb9t
299 &pxor ("xmm0","xmm4"); # 0 = ch
300 &add ($key,16); # next round key
301
302 &pshufb ("xmm0","xmm5"); # MC ch
303 &movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu
304 &pshufb ("xmm4","xmm2"); # 4 = sbdu
305 &pxor ("xmm4","xmm0"); # 4 = ch
306 &movdqa ("xmm0",&QWP(0x10,$base)); # 0 : sbdt
307 &pshufb ("xmm0","xmm3"); # 0 = sbdt
308 &pxor ("xmm0","xmm4"); # 0 = ch
309 &sub ($round,1); # nr--
310
311 &pshufb ("xmm0","xmm5"); # MC ch
312 &movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu
313 &pshufb ("xmm4","xmm2"); # 4 = sbbu
314 &pxor ("xmm4","xmm0"); # 4 = ch
315 &movdqa ("xmm0",&QWP(0x30,$base)); # 0 : sbbt
316 &pshufb ("xmm0","xmm3"); # 0 = sbbt
317 &pxor ("xmm0","xmm4"); # 0 = ch
318
319 &pshufb ("xmm0","xmm5"); # MC ch
320 &movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu
321 &pshufb ("xmm4","xmm2"); # 4 = sbeu
322 &pxor ("xmm4","xmm0"); # 4 = ch
323 &movdqa ("xmm0",&QWP(0x50,$base)); # 0 : sbet
324 &pshufb ("xmm0","xmm3"); # 0 = sbet
325 &pxor ("xmm0","xmm4"); # 0 = ch
326
327 &palignr("xmm5","xmm5",12);
328
329&set_label("dec_entry");
330 # top of round
331 &movdqa ("xmm1","xmm6"); # 1 : i
332 &pandn ("xmm1","xmm0"); # 1 = i<<4
333 &psrld ("xmm1",4); # 1 = i
334 &pand ("xmm0","xmm6"); # 0 = k
335 &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
336 &pshufb ("xmm2","xmm0"); # 2 = a/k
337 &pxor ("xmm0","xmm1"); # 0 = j
338 &movdqa ("xmm3","xmm7"); # 3 : 1/i
339 &pshufb ("xmm3","xmm1"); # 3 = 1/i
340 &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
341 &movdqa ("xmm4","xmm7"); # 4 : 1/j
342 &pshufb ("xmm4","xmm0"); # 4 = 1/j
343 &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
344 &movdqa ("xmm2","xmm7"); # 2 : 1/iak
345 &pshufb ("xmm2","xmm3"); # 2 = 1/iak
346 &pxor ("xmm2","xmm0"); # 2 = io
347 &movdqa ("xmm3","xmm7"); # 3 : 1/jak
348 &pshufb ("xmm3","xmm4"); # 3 = 1/jak
349 &pxor ("xmm3","xmm1"); # 3 = jo
350 &movdqu ("xmm0",&QWP(0,$key));
351 &jnz (&label("dec_loop"));
352
353 # middle of last round
354 &movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou
355 &pshufb ("xmm4","xmm2"); # 4 = sbou
356 &pxor ("xmm4","xmm0"); # 4 = sb1u + k
357 &movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot
358 &movdqa ("xmm2",&QWP(0,$magic));
359 &pshufb ("xmm0","xmm3"); # 0 = sb1t
360 &pxor ("xmm0","xmm4"); # 0 = A
361 &pshufb ("xmm0","xmm2");
362 &ret ();
363&function_end_B("_vpaes_decrypt_core");
364
365########################################################
366## ##
367## AES key schedule ##
368## ##
369########################################################
370&function_begin_B("_vpaes_schedule_core");
371 &add ($const,&DWP(0,"esp"));
372 &movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned)
373 &movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon
374
375 # input transform
376 &movdqa ("xmm3","xmm0");
377 &lea ($base,&DWP($k_ipt,$const));
378 &movdqa (&QWP(4,"esp"),"xmm2"); # xmm8
379 &call ("_vpaes_schedule_transform");
380 &movdqa ("xmm7","xmm0");
381
382 &test ($out,$out);
383 &jnz (&label("schedule_am_decrypting"));
384
385 # encrypting, output zeroth round key after transform
386 &movdqu (&QWP(0,$key),"xmm0");
387 &jmp (&label("schedule_go"));
388
389&set_label("schedule_am_decrypting");
390 # decrypting, output zeroth round key after shiftrows
391 &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
392 &pshufb ("xmm3","xmm1");
393 &movdqu (&QWP(0,$key),"xmm3");
394 &xor ($magic,0x30);
395
396&set_label("schedule_go");
397 &cmp ($round,192);
398 &ja (&label("schedule_256"));
399 &je (&label("schedule_192"));
400 # 128: fall though
401
402##
403## .schedule_128
404##
405## 128-bit specific part of key schedule.
406##
407## This schedule is really simple, because all its parts
408## are accomplished by the subroutines.
409##
410&set_label("schedule_128");
411 &mov ($round,10);
412
413&set_label("loop_schedule_128");
414 &call ("_vpaes_schedule_round");
415 &dec ($round);
416 &jz (&label("schedule_mangle_last"));
417 &call ("_vpaes_schedule_mangle"); # write output
418 &jmp (&label("loop_schedule_128"));
419
420##
421## .aes_schedule_192
422##
423## 192-bit specific part of key schedule.
424##
425## The main body of this schedule is the same as the 128-bit
426## schedule, but with more smearing. The long, high side is
427## stored in %xmm7 as before, and the short, low side is in
428## the high bits of %xmm6.
429##
430## This schedule is somewhat nastier, however, because each
431## round produces 192 bits of key material, or 1.5 round keys.
432## Therefore, on each cycle we do 2 rounds and produce 3 round
433## keys.
434##
435&set_label("schedule_192",16);
436 &movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned)
437 &call ("_vpaes_schedule_transform"); # input transform
438 &movdqa ("xmm6","xmm0"); # save short part
439 &pxor ("xmm4","xmm4"); # clear 4
440 &movhlps("xmm6","xmm4"); # clobber low side with zeros
441 &mov ($round,4);
442
443&set_label("loop_schedule_192");
444 &call ("_vpaes_schedule_round");
445 &palignr("xmm0","xmm6",8);
446 &call ("_vpaes_schedule_mangle"); # save key n
447 &call ("_vpaes_schedule_192_smear");
448 &call ("_vpaes_schedule_mangle"); # save key n+1
449 &call ("_vpaes_schedule_round");
450 &dec ($round);
451 &jz (&label("schedule_mangle_last"));
452 &call ("_vpaes_schedule_mangle"); # save key n+2
453 &call ("_vpaes_schedule_192_smear");
454 &jmp (&label("loop_schedule_192"));
455
456##
457## .aes_schedule_256
458##
459## 256-bit specific part of key schedule.
460##
461## The structure here is very similar to the 128-bit
462## schedule, but with an additional "low side" in
463## %xmm6. The low side's rounds are the same as the
464## high side's, except no rcon and no rotation.
465##
466&set_label("schedule_256",16);
467 &movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned)
468 &call ("_vpaes_schedule_transform"); # input transform
469 &mov ($round,7);
470
471&set_label("loop_schedule_256");
472 &call ("_vpaes_schedule_mangle"); # output low result
473 &movdqa ("xmm6","xmm0"); # save cur_lo in xmm6
474
475 # high round
476 &call ("_vpaes_schedule_round");
477 &dec ($round);
478 &jz (&label("schedule_mangle_last"));
479 &call ("_vpaes_schedule_mangle");
480
481 # low round. swap xmm7 and xmm6
482 &pshufd ("xmm0","xmm0",0xFF);
483 &movdqa (&QWP(20,"esp"),"xmm7");
484 &movdqa ("xmm7","xmm6");
485 &call ("_vpaes_schedule_low_round");
486 &movdqa ("xmm7",&QWP(20,"esp"));
487
488 &jmp (&label("loop_schedule_256"));
489
490##
491## .aes_schedule_mangle_last
492##
493## Mangler for last round of key schedule
494## Mangles %xmm0
495## when encrypting, outputs out(%xmm0) ^ 63
496## when decrypting, outputs unskew(%xmm0)
497##
498## Always called right before return... jumps to cleanup and exits
499##
500&set_label("schedule_mangle_last",16);
501 # schedule last round key from xmm0
502 &lea ($base,&DWP($k_deskew,$const));
503 &test ($out,$out);
504 &jnz (&label("schedule_mangle_last_dec"));
505
506 # encrypting
507 &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
508 &pshufb ("xmm0","xmm1"); # output permute
509 &lea ($base,&DWP($k_opt,$const)); # prepare to output transform
510 &add ($key,32);
511
512&set_label("schedule_mangle_last_dec");
513 &add ($key,-16);
514 &pxor ("xmm0",&QWP($k_s63,$const));
515 &call ("_vpaes_schedule_transform"); # output transform
516 &movdqu (&QWP(0,$key),"xmm0"); # save last key
517
518 # cleanup
519 &pxor ("xmm0","xmm0");
520 &pxor ("xmm1","xmm1");
521 &pxor ("xmm2","xmm2");
522 &pxor ("xmm3","xmm3");
523 &pxor ("xmm4","xmm4");
524 &pxor ("xmm5","xmm5");
525 &pxor ("xmm6","xmm6");
526 &pxor ("xmm7","xmm7");
527 &ret ();
528&function_end_B("_vpaes_schedule_core");
529
530##
531## .aes_schedule_192_smear
532##
533## Smear the short, low side in the 192-bit key schedule.
534##
535## Inputs:
536## %xmm7: high side, b a x y
537## %xmm6: low side, d c 0 0
538## %xmm13: 0
539##
540## Outputs:
541## %xmm6: b+c+d b+c 0 0
542## %xmm0: b+c+d b+c b a
543##
544&function_begin_B("_vpaes_schedule_192_smear");
545 &pshufd ("xmm0","xmm6",0x80); # d c 0 0 -> c 0 0 0
546 &pxor ("xmm6","xmm0"); # -> c+d c 0 0
547 &pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a
548 &pxor ("xmm6","xmm0"); # -> b+c+d b+c b a
549 &movdqa ("xmm0","xmm6");
550 &pxor ("xmm1","xmm1");
551 &movhlps("xmm6","xmm1"); # clobber low side with zeros
552 &ret ();
553&function_end_B("_vpaes_schedule_192_smear");
554
555##
556## .aes_schedule_round
557##
558## Runs one main round of the key schedule on %xmm0, %xmm7
559##
560## Specifically, runs subbytes on the high dword of %xmm0
561## then rotates it by one byte and xors into the low dword of
562## %xmm7.
563##
564## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
565## next rcon.
566##
567## Smears the dwords of %xmm7 by xoring the low into the
568## second low, result into third, result into highest.
569##
570## Returns results in %xmm7 = %xmm0.
571## Clobbers %xmm1-%xmm5.
572##
573&function_begin_B("_vpaes_schedule_round");
574 # extract rcon from xmm8
575 &movdqa ("xmm2",&QWP(8,"esp")); # xmm8
576 &pxor ("xmm1","xmm1");
577 &palignr("xmm1","xmm2",15);
578 &palignr("xmm2","xmm2",15);
579 &pxor ("xmm7","xmm1");
580
581 # rotate
582 &pshufd ("xmm0","xmm0",0xFF);
583 &palignr("xmm0","xmm0",1);
584
585 # fall through...
586 &movdqa (&QWP(8,"esp"),"xmm2"); # xmm8
587
588 # low round: same as high round, but no rotation and no rcon.
589&set_label("_vpaes_schedule_low_round");
590 # smear xmm7
591 &movdqa ("xmm1","xmm7");
592 &pslldq ("xmm7",4);
593 &pxor ("xmm7","xmm1");
594 &movdqa ("xmm1","xmm7");
595 &pslldq ("xmm7",8);
596 &pxor ("xmm7","xmm1");
597 &pxor ("xmm7",&QWP($k_s63,$const));
598
599 # subbyte
600 &movdqa ("xmm4",&QWP($k_s0F,$const));
601 &movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j
602 &movdqa ("xmm1","xmm4");
603 &pandn ("xmm1","xmm0");
604 &psrld ("xmm1",4); # 1 = i
605 &pand ("xmm0","xmm4"); # 0 = k
606 &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
607 &pshufb ("xmm2","xmm0"); # 2 = a/k
608 &pxor ("xmm0","xmm1"); # 0 = j
609 &movdqa ("xmm3","xmm5"); # 3 : 1/i
610 &pshufb ("xmm3","xmm1"); # 3 = 1/i
611 &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
612 &movdqa ("xmm4","xmm5"); # 4 : 1/j
613 &pshufb ("xmm4","xmm0"); # 4 = 1/j
614 &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
615 &movdqa ("xmm2","xmm5"); # 2 : 1/iak
616 &pshufb ("xmm2","xmm3"); # 2 = 1/iak
617 &pxor ("xmm2","xmm0"); # 2 = io
618 &movdqa ("xmm3","xmm5"); # 3 : 1/jak
619 &pshufb ("xmm3","xmm4"); # 3 = 1/jak
620 &pxor ("xmm3","xmm1"); # 3 = jo
621 &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou
622 &pshufb ("xmm4","xmm2"); # 4 = sbou
623 &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot
624 &pshufb ("xmm0","xmm3"); # 0 = sb1t
625 &pxor ("xmm0","xmm4"); # 0 = sbox output
626
627 # add in smeared stuff
628 &pxor ("xmm0","xmm7");
629 &movdqa ("xmm7","xmm0");
630 &ret ();
631&function_end_B("_vpaes_schedule_round");
632
633##
634## .aes_schedule_transform
635##
636## Linear-transform %xmm0 according to tables at (%ebx)
637##
638## Output in %xmm0
639## Clobbers %xmm1, %xmm2
640##
641&function_begin_B("_vpaes_schedule_transform");
642 &movdqa ("xmm2",&QWP($k_s0F,$const));
643 &movdqa ("xmm1","xmm2");
644 &pandn ("xmm1","xmm0");
645 &psrld ("xmm1",4);
646 &pand ("xmm0","xmm2");
647 &movdqa ("xmm2",&QWP(0,$base));
648 &pshufb ("xmm2","xmm0");
649 &movdqa ("xmm0",&QWP(16,$base));
650 &pshufb ("xmm0","xmm1");
651 &pxor ("xmm0","xmm2");
652 &ret ();
653&function_end_B("_vpaes_schedule_transform");
654
655##
656## .aes_schedule_mangle
657##
658## Mangle xmm0 from (basis-transformed) standard version
659## to our version.
660##
661## On encrypt,
662## xor with 0x63
663## multiply by circulant 0,1,1,1
664## apply shiftrows transform
665##
666## On decrypt,
667## xor with 0x63
668## multiply by "inverse mixcolumns" circulant E,B,D,9
669## deskew
670## apply shiftrows transform
671##
672##
673## Writes out to (%edx), and increments or decrements it
674## Keeps track of round number mod 4 in %ecx
675## Preserves xmm0
676## Clobbers xmm1-xmm5
677##
678&function_begin_B("_vpaes_schedule_mangle");
679 &movdqa ("xmm4","xmm0"); # save xmm0 for later
680 &movdqa ("xmm5",&QWP($k_mc_forward,$const));
681 &test ($out,$out);
682 &jnz (&label("schedule_mangle_dec"));
683
684 # encrypting
685 &add ($key,16);
686 &pxor ("xmm4",&QWP($k_s63,$const));
687 &pshufb ("xmm4","xmm5");
688 &movdqa ("xmm3","xmm4");
689 &pshufb ("xmm4","xmm5");
690 &pxor ("xmm3","xmm4");
691 &pshufb ("xmm4","xmm5");
692 &pxor ("xmm3","xmm4");
693
694 &jmp (&label("schedule_mangle_both"));
695
696&set_label("schedule_mangle_dec",16);
697 # inverse mix columns
698 &movdqa ("xmm2",&QWP($k_s0F,$const));
699 &lea ($inp,&DWP($k_dksd,$const));
700 &movdqa ("xmm1","xmm2");
701 &pandn ("xmm1","xmm4");
702 &psrld ("xmm1",4); # 1 = hi
703 &pand ("xmm4","xmm2"); # 4 = lo
704
705 &movdqa ("xmm2",&QWP(0,$inp));
706 &pshufb ("xmm2","xmm4");
707 &movdqa ("xmm3",&QWP(0x10,$inp));
708 &pshufb ("xmm3","xmm1");
709 &pxor ("xmm3","xmm2");
710 &pshufb ("xmm3","xmm5");
711
712 &movdqa ("xmm2",&QWP(0x20,$inp));
713 &pshufb ("xmm2","xmm4");
714 &pxor ("xmm2","xmm3");
715 &movdqa ("xmm3",&QWP(0x30,$inp));
716 &pshufb ("xmm3","xmm1");
717 &pxor ("xmm3","xmm2");
718 &pshufb ("xmm3","xmm5");
719
720 &movdqa ("xmm2",&QWP(0x40,$inp));
721 &pshufb ("xmm2","xmm4");
722 &pxor ("xmm2","xmm3");
723 &movdqa ("xmm3",&QWP(0x50,$inp));
724 &pshufb ("xmm3","xmm1");
725 &pxor ("xmm3","xmm2");
726 &pshufb ("xmm3","xmm5");
727
728 &movdqa ("xmm2",&QWP(0x60,$inp));
729 &pshufb ("xmm2","xmm4");
730 &pxor ("xmm2","xmm3");
731 &movdqa ("xmm3",&QWP(0x70,$inp));
732 &pshufb ("xmm3","xmm1");
733 &pxor ("xmm3","xmm2");
734
735 &add ($key,-16);
736
737&set_label("schedule_mangle_both");
738 &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
739 &pshufb ("xmm3","xmm1");
740 &add ($magic,-16);
741 &and ($magic,0x30);
742 &movdqu (&QWP(0,$key),"xmm3");
743 &ret ();
744&function_end_B("_vpaes_schedule_mangle");
745
746#
747# Interface to OpenSSL
748#
749&function_begin("${PREFIX}_set_encrypt_key");
750 &mov ($inp,&wparam(0)); # inp
751 &lea ($base,&DWP(-56,"esp"));
752 &mov ($round,&wparam(1)); # bits
753 &and ($base,-16);
754 &mov ($key,&wparam(2)); # key
755 &xchg ($base,"esp"); # alloca
756 &mov (&DWP(48,"esp"),$base);
757
758 &mov ($base,$round);
759 &shr ($base,5);
760 &add ($base,5);
761 &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
762 &mov ($magic,0x30);
763 &mov ($out,0);
764
765 &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
766 &call ("_vpaes_schedule_core");
767&set_label("pic_point");
768
769 &mov ("esp",&DWP(48,"esp"));
770 &xor ("eax","eax");
771&function_end("${PREFIX}_set_encrypt_key");
772
773&function_begin("${PREFIX}_set_decrypt_key");
774 &mov ($inp,&wparam(0)); # inp
775 &lea ($base,&DWP(-56,"esp"));
776 &mov ($round,&wparam(1)); # bits
777 &and ($base,-16);
778 &mov ($key,&wparam(2)); # key
779 &xchg ($base,"esp"); # alloca
780 &mov (&DWP(48,"esp"),$base);
781
782 &mov ($base,$round);
783 &shr ($base,5);
784 &add ($base,5);
785 &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
786 &shl ($base,4);
787 &lea ($key,&DWP(16,$key,$base));
788
789 &mov ($out,1);
790 &mov ($magic,$round);
791 &shr ($magic,1);
792 &and ($magic,32);
793 &xor ($magic,32); # nbist==192?0:32;
794
795 &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
796 &call ("_vpaes_schedule_core");
797&set_label("pic_point");
798
799 &mov ("esp",&DWP(48,"esp"));
800 &xor ("eax","eax");
801&function_end("${PREFIX}_set_decrypt_key");
802
803&function_begin("${PREFIX}_encrypt");
804 &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
805 &call ("_vpaes_preheat");
806&set_label("pic_point");
807 &mov ($inp,&wparam(0)); # inp
808 &lea ($base,&DWP(-56,"esp"));
809 &mov ($out,&wparam(1)); # out
810 &and ($base,-16);
811 &mov ($key,&wparam(2)); # key
812 &xchg ($base,"esp"); # alloca
813 &mov (&DWP(48,"esp"),$base);
814
815 &movdqu ("xmm0",&QWP(0,$inp));
816 &call ("_vpaes_encrypt_core");
817 &movdqu (&QWP(0,$out),"xmm0");
818
819 &mov ("esp",&DWP(48,"esp"));
820&function_end("${PREFIX}_encrypt");
821
822&function_begin("${PREFIX}_decrypt");
823 &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
824 &call ("_vpaes_preheat");
825&set_label("pic_point");
826 &mov ($inp,&wparam(0)); # inp
827 &lea ($base,&DWP(-56,"esp"));
828 &mov ($out,&wparam(1)); # out
829 &and ($base,-16);
830 &mov ($key,&wparam(2)); # key
831 &xchg ($base,"esp"); # alloca
832 &mov (&DWP(48,"esp"),$base);
833
834 &movdqu ("xmm0",&QWP(0,$inp));
835 &call ("_vpaes_decrypt_core");
836 &movdqu (&QWP(0,$out),"xmm0");
837
838 &mov ("esp",&DWP(48,"esp"));
839&function_end("${PREFIX}_decrypt");
840
841&function_begin("${PREFIX}_cbc_encrypt");
842 &mov ($inp,&wparam(0)); # inp
843 &mov ($out,&wparam(1)); # out
844 &mov ($round,&wparam(2)); # len
845 &mov ($key,&wparam(3)); # key
846 &sub ($round,16);
847 &jc (&label("cbc_abort"));
848 &lea ($base,&DWP(-56,"esp"));
849 &mov ($const,&wparam(4)); # ivp
850 &and ($base,-16);
851 &mov ($magic,&wparam(5)); # enc
852 &xchg ($base,"esp"); # alloca
853 &movdqu ("xmm1",&QWP(0,$const)); # load IV
854 &sub ($out,$inp);
855 &mov (&DWP(48,"esp"),$base);
856
857 &mov (&DWP(0,"esp"),$out); # save out
858 &mov (&DWP(4,"esp"),$key) # save key
859 &mov (&DWP(8,"esp"),$const); # save ivp
860 &mov ($out,$round); # $out works as $len
861
862 &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
863 &call ("_vpaes_preheat");
864&set_label("pic_point");
865 &cmp ($magic,0);
866 &je (&label("cbc_dec_loop"));
867 &jmp (&label("cbc_enc_loop"));
868
869&set_label("cbc_enc_loop",16);
870 &movdqu ("xmm0",&QWP(0,$inp)); # load input
871 &pxor ("xmm0","xmm1"); # inp^=iv
872 &call ("_vpaes_encrypt_core");
873 &mov ($base,&DWP(0,"esp")); # restore out
874 &mov ($key,&DWP(4,"esp")); # restore key
875 &movdqa ("xmm1","xmm0");
876 &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output
877 &lea ($inp,&DWP(16,$inp));
878 &sub ($out,16);
879 &jnc (&label("cbc_enc_loop"));
880 &jmp (&label("cbc_done"));
881
882&set_label("cbc_dec_loop",16);
883 &movdqu ("xmm0",&QWP(0,$inp)); # load input
884 &movdqa (&QWP(16,"esp"),"xmm1"); # save IV
885 &movdqa (&QWP(32,"esp"),"xmm0"); # save future IV
886 &call ("_vpaes_decrypt_core");
887 &mov ($base,&DWP(0,"esp")); # restore out
888 &mov ($key,&DWP(4,"esp")); # restore key
889 &pxor ("xmm0",&QWP(16,"esp")); # out^=iv
890 &movdqa ("xmm1",&QWP(32,"esp")); # load next IV
891 &movdqu (&QWP(0,$base,$inp),"xmm0"); # write output
892 &lea ($inp,&DWP(16,$inp));
893 &sub ($out,16);
894 &jnc (&label("cbc_dec_loop"));
895
896&set_label("cbc_done");
897 &mov ($base,&DWP(8,"esp")); # restore ivp
898 &mov ("esp",&DWP(48,"esp"));
899 &movdqu (&QWP(0,$base),"xmm1"); # write IV
900&set_label("cbc_abort");
901&function_end("${PREFIX}_cbc_encrypt");
902
903&asm_finish();
diff --git a/src/lib/libssl/src/crypto/aes/asm/vpaes-x86_64.pl b/src/lib/libssl/src/crypto/aes/asm/vpaes-x86_64.pl
new file mode 100644
index 0000000000..37998db5e1
--- /dev/null
+++ b/src/lib/libssl/src/crypto/aes/asm/vpaes-x86_64.pl
@@ -0,0 +1,1206 @@
1#!/usr/bin/env perl
2
3######################################################################
4## Constant-time SSSE3 AES core implementation.
5## version 0.1
6##
7## By Mike Hamburg (Stanford University), 2009
8## Public domain.
9##
10## For details see http://shiftleft.org/papers/vector_aes/ and
11## http://crypto.stanford.edu/vpaes/.
12
13######################################################################
14# September 2011.
15#
16# Interface to OpenSSL as "almost" drop-in replacement for
17# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt
18# doesn't handle partial vectors (doesn't have to if called from
19# EVP only). "Drop-in" implies that this module doesn't share key
20# schedule structure with the original nor does it make assumption
21# about its alignment...
22#
23# Performance summary. aes-x86_64.pl column lists large-block CBC
24# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
25# byte processed with 128-bit key, and vpaes-x86_64.pl column -
26# [also large-block CBC] encrypt/decrypt.
27#
28# aes-x86_64.pl vpaes-x86_64.pl
29#
30# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***)
31# Nehalem 30.5/42.2/14.6 9.8/11.8
32# Atom 63.9/79.0/32.1 64.0/84.8(***)
33#
34# (*) "Hyper-threading" in the context refers rather to cache shared
35# among multiple cores, than to specifically Intel HTT. As vast
36# majority of contemporary cores share cache, slower code path
37# is common place. In other words "with-hyper-threading-off"
38# results are presented mostly for reference purposes.
39#
40# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
41#
42# (***) Less impressive improvement on Core 2 and Atom is due to slow
43# pshufb, yet it's respectable +40%/78% improvement on Core 2
44# (as implied, over "hyper-threading-safe" code path).
45#
46# <appro@openssl.org>
47
48$flavour = shift;
49$output = shift;
50if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
51
52$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
53
54$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
56( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
57die "can't locate x86_64-xlate.pl";
58
59open STDOUT,"| $^X $xlate $flavour $output";
60
61$PREFIX="vpaes";
62
63$code.=<<___;
64.text
65
66##
67## _aes_encrypt_core
68##
69## AES-encrypt %xmm0.
70##
71## Inputs:
72## %xmm0 = input
73## %xmm9-%xmm15 as in _vpaes_preheat
74## (%rdx) = scheduled keys
75##
76## Output in %xmm0
77## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
78## Preserves %xmm6 - %xmm8 so you get some local vectors
79##
80##
81.type _vpaes_encrypt_core,\@abi-omnipotent
82.align 16
83_vpaes_encrypt_core:
84 mov %rdx, %r9
85 mov \$16, %r11
86 mov 240(%rdx),%eax
87 movdqa %xmm9, %xmm1
88 movdqa .Lk_ipt(%rip), %xmm2 # iptlo
89 pandn %xmm0, %xmm1
90 movdqu (%r9), %xmm5 # round0 key
91 psrld \$4, %xmm1
92 pand %xmm9, %xmm0
93 pshufb %xmm0, %xmm2
94 movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi
95 pshufb %xmm1, %xmm0
96 pxor %xmm5, %xmm2
97 pxor %xmm2, %xmm0
98 add \$16, %r9
99 lea .Lk_mc_backward(%rip),%r10
100 jmp .Lenc_entry
101
102.align 16
103.Lenc_loop:
104 # middle of middle round
105 movdqa %xmm13, %xmm4 # 4 : sb1u
106 pshufb %xmm2, %xmm4 # 4 = sb1u
107 pxor %xmm5, %xmm4 # 4 = sb1u + k
108 movdqa %xmm12, %xmm0 # 0 : sb1t
109 pshufb %xmm3, %xmm0 # 0 = sb1t
110 pxor %xmm4, %xmm0 # 0 = A
111 movdqa %xmm15, %xmm5 # 4 : sb2u
112 pshufb %xmm2, %xmm5 # 4 = sb2u
113 movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
114 movdqa %xmm14, %xmm2 # 2 : sb2t
115 pshufb %xmm3, %xmm2 # 2 = sb2t
116 pxor %xmm5, %xmm2 # 2 = 2A
117 movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
118 movdqa %xmm0, %xmm3 # 3 = A
119 pshufb %xmm1, %xmm0 # 0 = B
120 add \$16, %r9 # next key
121 pxor %xmm2, %xmm0 # 0 = 2A+B
122 pshufb %xmm4, %xmm3 # 3 = D
123 add \$16, %r11 # next mc
124 pxor %xmm0, %xmm3 # 3 = 2A+B+D
125 pshufb %xmm1, %xmm0 # 0 = 2B+C
126 and \$0x30, %r11 # ... mod 4
127 pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D
128 sub \$1,%rax # nr--
129
130.Lenc_entry:
131 # top of round
132 movdqa %xmm9, %xmm1 # 1 : i
133 pandn %xmm0, %xmm1 # 1 = i<<4
134 psrld \$4, %xmm1 # 1 = i
135 pand %xmm9, %xmm0 # 0 = k
136 movdqa %xmm11, %xmm5 # 2 : a/k
137 pshufb %xmm0, %xmm5 # 2 = a/k
138 pxor %xmm1, %xmm0 # 0 = j
139 movdqa %xmm10, %xmm3 # 3 : 1/i
140 pshufb %xmm1, %xmm3 # 3 = 1/i
141 pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k
142 movdqa %xmm10, %xmm4 # 4 : 1/j
143 pshufb %xmm0, %xmm4 # 4 = 1/j
144 pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k
145 movdqa %xmm10, %xmm2 # 2 : 1/iak
146 pshufb %xmm3, %xmm2 # 2 = 1/iak
147 pxor %xmm0, %xmm2 # 2 = io
148 movdqa %xmm10, %xmm3 # 3 : 1/jak
149 movdqu (%r9), %xmm5
150 pshufb %xmm4, %xmm3 # 3 = 1/jak
151 pxor %xmm1, %xmm3 # 3 = jo
152 jnz .Lenc_loop
153
154 # middle of last round
155 movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
156 movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
157 pshufb %xmm2, %xmm4 # 4 = sbou
158 pxor %xmm5, %xmm4 # 4 = sb1u + k
159 pshufb %xmm3, %xmm0 # 0 = sb1t
160 movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
161 pxor %xmm4, %xmm0 # 0 = A
162 pshufb %xmm1, %xmm0
163 ret
164.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
165
166##
167## Decryption core
168##
169## Same API as encryption core.
170##
171.type _vpaes_decrypt_core,\@abi-omnipotent
172.align 16
173_vpaes_decrypt_core:
174 mov %rdx, %r9 # load key
175 mov 240(%rdx),%eax
176 movdqa %xmm9, %xmm1
177 movdqa .Lk_dipt(%rip), %xmm2 # iptlo
178 pandn %xmm0, %xmm1
179 mov %rax, %r11
180 psrld \$4, %xmm1
181 movdqu (%r9), %xmm5 # round0 key
182 shl \$4, %r11
183 pand %xmm9, %xmm0
184 pshufb %xmm0, %xmm2
185 movdqa .Lk_dipt+16(%rip), %xmm0 # ipthi
186 xor \$0x30, %r11
187 lea .Lk_dsbd(%rip),%r10
188 pshufb %xmm1, %xmm0
189 and \$0x30, %r11
190 pxor %xmm5, %xmm2
191 movdqa .Lk_mc_forward+48(%rip), %xmm5
192 pxor %xmm2, %xmm0
193 add \$16, %r9
194 add %r10, %r11
195 jmp .Ldec_entry
196
197.align 16
198.Ldec_loop:
199##
200## Inverse mix columns
201##
202 movdqa -0x20(%r10),%xmm4 # 4 : sb9u
203 pshufb %xmm2, %xmm4 # 4 = sb9u
204 pxor %xmm0, %xmm4
205 movdqa -0x10(%r10),%xmm0 # 0 : sb9t
206 pshufb %xmm3, %xmm0 # 0 = sb9t
207 pxor %xmm4, %xmm0 # 0 = ch
208 add \$16, %r9 # next round key
209
210 pshufb %xmm5, %xmm0 # MC ch
211 movdqa 0x00(%r10),%xmm4 # 4 : sbdu
212 pshufb %xmm2, %xmm4 # 4 = sbdu
213 pxor %xmm0, %xmm4 # 4 = ch
214 movdqa 0x10(%r10),%xmm0 # 0 : sbdt
215 pshufb %xmm3, %xmm0 # 0 = sbdt
216 pxor %xmm4, %xmm0 # 0 = ch
217 sub \$1,%rax # nr--
218
219 pshufb %xmm5, %xmm0 # MC ch
220 movdqa 0x20(%r10),%xmm4 # 4 : sbbu
221 pshufb %xmm2, %xmm4 # 4 = sbbu
222 pxor %xmm0, %xmm4 # 4 = ch
223 movdqa 0x30(%r10),%xmm0 # 0 : sbbt
224 pshufb %xmm3, %xmm0 # 0 = sbbt
225 pxor %xmm4, %xmm0 # 0 = ch
226
227 pshufb %xmm5, %xmm0 # MC ch
228 movdqa 0x40(%r10),%xmm4 # 4 : sbeu
229 pshufb %xmm2, %xmm4 # 4 = sbeu
230 pxor %xmm0, %xmm4 # 4 = ch
231 movdqa 0x50(%r10),%xmm0 # 0 : sbet
232 pshufb %xmm3, %xmm0 # 0 = sbet
233 pxor %xmm4, %xmm0 # 0 = ch
234
235 palignr \$12, %xmm5, %xmm5
236
237.Ldec_entry:
238 # top of round
239 movdqa %xmm9, %xmm1 # 1 : i
240 pandn %xmm0, %xmm1 # 1 = i<<4
241 psrld \$4, %xmm1 # 1 = i
242 pand %xmm9, %xmm0 # 0 = k
243 movdqa %xmm11, %xmm2 # 2 : a/k
244 pshufb %xmm0, %xmm2 # 2 = a/k
245 pxor %xmm1, %xmm0 # 0 = j
246 movdqa %xmm10, %xmm3 # 3 : 1/i
247 pshufb %xmm1, %xmm3 # 3 = 1/i
248 pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
249 movdqa %xmm10, %xmm4 # 4 : 1/j
250 pshufb %xmm0, %xmm4 # 4 = 1/j
251 pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
252 movdqa %xmm10, %xmm2 # 2 : 1/iak
253 pshufb %xmm3, %xmm2 # 2 = 1/iak
254 pxor %xmm0, %xmm2 # 2 = io
255 movdqa %xmm10, %xmm3 # 3 : 1/jak
256 pshufb %xmm4, %xmm3 # 3 = 1/jak
257 pxor %xmm1, %xmm3 # 3 = jo
258 movdqu (%r9), %xmm0
259 jnz .Ldec_loop
260
261 # middle of last round
262 movdqa 0x60(%r10), %xmm4 # 3 : sbou
263 pshufb %xmm2, %xmm4 # 4 = sbou
264 pxor %xmm0, %xmm4 # 4 = sb1u + k
265 movdqa 0x70(%r10), %xmm0 # 0 : sbot
266 movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
267 pshufb %xmm3, %xmm0 # 0 = sb1t
268 pxor %xmm4, %xmm0 # 0 = A
269 pshufb %xmm2, %xmm0
270 ret
271.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
272
273########################################################
274## ##
275## AES key schedule ##
276## ##
277########################################################
278.type _vpaes_schedule_core,\@abi-omnipotent
279.align 16
280_vpaes_schedule_core:
281 # rdi = key
282 # rsi = size in bits
283 # rdx = buffer
284 # rcx = direction. 0=encrypt, 1=decrypt
285
286 call _vpaes_preheat # load the tables
287 movdqa .Lk_rcon(%rip), %xmm8 # load rcon
288 movdqu (%rdi), %xmm0 # load key (unaligned)
289
290 # input transform
291 movdqa %xmm0, %xmm3
292 lea .Lk_ipt(%rip), %r11
293 call _vpaes_schedule_transform
294 movdqa %xmm0, %xmm7
295
296 lea .Lk_sr(%rip),%r10
297 test %rcx, %rcx
298 jnz .Lschedule_am_decrypting
299
300 # encrypting, output zeroth round key after transform
301 movdqu %xmm0, (%rdx)
302 jmp .Lschedule_go
303
304.Lschedule_am_decrypting:
305 # decrypting, output zeroth round key after shiftrows
306 movdqa (%r8,%r10),%xmm1
307 pshufb %xmm1, %xmm3
308 movdqu %xmm3, (%rdx)
309 xor \$0x30, %r8
310
311.Lschedule_go:
312 cmp \$192, %esi
313 ja .Lschedule_256
314 je .Lschedule_192
315 # 128: fall though
316
317##
318## .schedule_128
319##
320## 128-bit specific part of key schedule.
321##
322## This schedule is really simple, because all its parts
323## are accomplished by the subroutines.
324##
325.Lschedule_128:
326 mov \$10, %esi
327
328.Loop_schedule_128:
329 call _vpaes_schedule_round
330 dec %rsi
331 jz .Lschedule_mangle_last
332 call _vpaes_schedule_mangle # write output
333 jmp .Loop_schedule_128
334
335##
336## .aes_schedule_192
337##
338## 192-bit specific part of key schedule.
339##
340## The main body of this schedule is the same as the 128-bit
341## schedule, but with more smearing. The long, high side is
342## stored in %xmm7 as before, and the short, low side is in
343## the high bits of %xmm6.
344##
345## This schedule is somewhat nastier, however, because each
346## round produces 192 bits of key material, or 1.5 round keys.
347## Therefore, on each cycle we do 2 rounds and produce 3 round
348## keys.
349##
350.align 16
351.Lschedule_192:
352 movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
353 call _vpaes_schedule_transform # input transform
354 movdqa %xmm0, %xmm6 # save short part
355 pxor %xmm4, %xmm4 # clear 4
356 movhlps %xmm4, %xmm6 # clobber low side with zeros
357 mov \$4, %esi
358
359.Loop_schedule_192:
360 call _vpaes_schedule_round
361 palignr \$8,%xmm6,%xmm0
362 call _vpaes_schedule_mangle # save key n
363 call _vpaes_schedule_192_smear
364 call _vpaes_schedule_mangle # save key n+1
365 call _vpaes_schedule_round
366 dec %rsi
367 jz .Lschedule_mangle_last
368 call _vpaes_schedule_mangle # save key n+2
369 call _vpaes_schedule_192_smear
370 jmp .Loop_schedule_192
371
372##
373## .aes_schedule_256
374##
375## 256-bit specific part of key schedule.
376##
377## The structure here is very similar to the 128-bit
378## schedule, but with an additional "low side" in
379## %xmm6. The low side's rounds are the same as the
380## high side's, except no rcon and no rotation.
381##
382.align 16
383.Lschedule_256:
384 movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
385 call _vpaes_schedule_transform # input transform
386 mov \$7, %esi
387
388.Loop_schedule_256:
389 call _vpaes_schedule_mangle # output low result
390 movdqa %xmm0, %xmm6 # save cur_lo in xmm6
391
392 # high round
393 call _vpaes_schedule_round
394 dec %rsi
395 jz .Lschedule_mangle_last
396 call _vpaes_schedule_mangle
397
398 # low round. swap xmm7 and xmm6
399 pshufd \$0xFF, %xmm0, %xmm0
400 movdqa %xmm7, %xmm5
401 movdqa %xmm6, %xmm7
402 call _vpaes_schedule_low_round
403 movdqa %xmm5, %xmm7
404
405 jmp .Loop_schedule_256
406
407
408##
409## .aes_schedule_mangle_last
410##
411## Mangler for last round of key schedule
412## Mangles %xmm0
413## when encrypting, outputs out(%xmm0) ^ 63
414## when decrypting, outputs unskew(%xmm0)
415##
416## Always called right before return... jumps to cleanup and exits
417##
418.align 16
419.Lschedule_mangle_last:
420 # schedule last round key from xmm0
421 lea .Lk_deskew(%rip),%r11 # prepare to deskew
422 test %rcx, %rcx
423 jnz .Lschedule_mangle_last_dec
424
425 # encrypting
426 movdqa (%r8,%r10),%xmm1
427 pshufb %xmm1, %xmm0 # output permute
428 lea .Lk_opt(%rip), %r11 # prepare to output transform
429 add \$32, %rdx
430
431.Lschedule_mangle_last_dec:
432 add \$-16, %rdx
433 pxor .Lk_s63(%rip), %xmm0
434 call _vpaes_schedule_transform # output transform
435 movdqu %xmm0, (%rdx) # save last key
436
437 # cleanup
438 pxor %xmm0, %xmm0
439 pxor %xmm1, %xmm1
440 pxor %xmm2, %xmm2
441 pxor %xmm3, %xmm3
442 pxor %xmm4, %xmm4
443 pxor %xmm5, %xmm5
444 pxor %xmm6, %xmm6
445 pxor %xmm7, %xmm7
446 ret
447.size _vpaes_schedule_core,.-_vpaes_schedule_core
448
449##
450## .aes_schedule_192_smear
451##
452## Smear the short, low side in the 192-bit key schedule.
453##
454## Inputs:
455## %xmm7: high side, b a x y
456## %xmm6: low side, d c 0 0
457## %xmm13: 0
458##
459## Outputs:
460## %xmm6: b+c+d b+c 0 0
461## %xmm0: b+c+d b+c b a
462##
463.type _vpaes_schedule_192_smear,\@abi-omnipotent
464.align 16
465_vpaes_schedule_192_smear:
466 pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0
467 pxor %xmm0, %xmm6 # -> c+d c 0 0
468 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
469 pxor %xmm0, %xmm6 # -> b+c+d b+c b a
470 movdqa %xmm6, %xmm0
471 pxor %xmm1, %xmm1
472 movhlps %xmm1, %xmm6 # clobber low side with zeros
473 ret
474.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
475
476##
477## .aes_schedule_round
478##
479## Runs one main round of the key schedule on %xmm0, %xmm7
480##
481## Specifically, runs subbytes on the high dword of %xmm0
482## then rotates it by one byte and xors into the low dword of
483## %xmm7.
484##
485## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
486## next rcon.
487##
488## Smears the dwords of %xmm7 by xoring the low into the
489## second low, result into third, result into highest.
490##
491## Returns results in %xmm7 = %xmm0.
492## Clobbers %xmm1-%xmm4, %r11.
493##
494.type _vpaes_schedule_round,\@abi-omnipotent
495.align 16
496_vpaes_schedule_round:
497 # extract rcon from xmm8
498 pxor %xmm1, %xmm1
499 palignr \$15, %xmm8, %xmm1
500 palignr \$15, %xmm8, %xmm8
501 pxor %xmm1, %xmm7
502
503 # rotate
504 pshufd \$0xFF, %xmm0, %xmm0
505 palignr \$1, %xmm0, %xmm0
506
507 # fall through...
508
509 # low round: same as high round, but no rotation and no rcon.
510_vpaes_schedule_low_round:
511 # smear xmm7
512 movdqa %xmm7, %xmm1
513 pslldq \$4, %xmm7
514 pxor %xmm1, %xmm7
515 movdqa %xmm7, %xmm1
516 pslldq \$8, %xmm7
517 pxor %xmm1, %xmm7
518 pxor .Lk_s63(%rip), %xmm7
519
520 # subbytes
521 movdqa %xmm9, %xmm1
522 pandn %xmm0, %xmm1
523 psrld \$4, %xmm1 # 1 = i
524 pand %xmm9, %xmm0 # 0 = k
525 movdqa %xmm11, %xmm2 # 2 : a/k
526 pshufb %xmm0, %xmm2 # 2 = a/k
527 pxor %xmm1, %xmm0 # 0 = j
528 movdqa %xmm10, %xmm3 # 3 : 1/i
529 pshufb %xmm1, %xmm3 # 3 = 1/i
530 pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
531 movdqa %xmm10, %xmm4 # 4 : 1/j
532 pshufb %xmm0, %xmm4 # 4 = 1/j
533 pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
534 movdqa %xmm10, %xmm2 # 2 : 1/iak
535 pshufb %xmm3, %xmm2 # 2 = 1/iak
536 pxor %xmm0, %xmm2 # 2 = io
537 movdqa %xmm10, %xmm3 # 3 : 1/jak
538 pshufb %xmm4, %xmm3 # 3 = 1/jak
539 pxor %xmm1, %xmm3 # 3 = jo
540 movdqa %xmm13, %xmm4 # 4 : sbou
541 pshufb %xmm2, %xmm4 # 4 = sbou
542 movdqa %xmm12, %xmm0 # 0 : sbot
543 pshufb %xmm3, %xmm0 # 0 = sb1t
544 pxor %xmm4, %xmm0 # 0 = sbox output
545
546 # add in smeared stuff
547 pxor %xmm7, %xmm0
548 movdqa %xmm0, %xmm7
549 ret
550.size _vpaes_schedule_round,.-_vpaes_schedule_round
551
552##
553## .aes_schedule_transform
554##
555## Linear-transform %xmm0 according to tables at (%r11)
556##
557## Requires that %xmm9 = 0x0F0F... as in preheat
558## Output in %xmm0
559## Clobbers %xmm1, %xmm2
560##
561.type _vpaes_schedule_transform,\@abi-omnipotent
562.align 16
563_vpaes_schedule_transform:
564 movdqa %xmm9, %xmm1
565 pandn %xmm0, %xmm1
566 psrld \$4, %xmm1
567 pand %xmm9, %xmm0
568 movdqa (%r11), %xmm2 # lo
569 pshufb %xmm0, %xmm2
570 movdqa 16(%r11), %xmm0 # hi
571 pshufb %xmm1, %xmm0
572 pxor %xmm2, %xmm0
573 ret
574.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
575
576##
577## .aes_schedule_mangle
578##
579## Mangle xmm0 from (basis-transformed) standard version
580## to our version.
581##
582## On encrypt,
583## xor with 0x63
584## multiply by circulant 0,1,1,1
585## apply shiftrows transform
586##
587## On decrypt,
588## xor with 0x63
589## multiply by "inverse mixcolumns" circulant E,B,D,9
590## deskew
591## apply shiftrows transform
592##
593##
594## Writes out to (%rdx), and increments or decrements it
595## Keeps track of round number mod 4 in %r8
596## Preserves xmm0
597## Clobbers xmm1-xmm5
598##
599.type _vpaes_schedule_mangle,\@abi-omnipotent
600.align 16
601_vpaes_schedule_mangle:
602 movdqa %xmm0, %xmm4 # save xmm0 for later
603 movdqa .Lk_mc_forward(%rip),%xmm5
604 test %rcx, %rcx
605 jnz .Lschedule_mangle_dec
606
607 # encrypting
608 add \$16, %rdx
609 pxor .Lk_s63(%rip),%xmm4
610 pshufb %xmm5, %xmm4
611 movdqa %xmm4, %xmm3
612 pshufb %xmm5, %xmm4
613 pxor %xmm4, %xmm3
614 pshufb %xmm5, %xmm4
615 pxor %xmm4, %xmm3
616
617 jmp .Lschedule_mangle_both
618.align 16
619.Lschedule_mangle_dec:
620 # inverse mix columns
621 lea .Lk_dksd(%rip),%r11
622 movdqa %xmm9, %xmm1
623 pandn %xmm4, %xmm1
624 psrld \$4, %xmm1 # 1 = hi
625 pand %xmm9, %xmm4 # 4 = lo
626
627 movdqa 0x00(%r11), %xmm2
628 pshufb %xmm4, %xmm2
629 movdqa 0x10(%r11), %xmm3
630 pshufb %xmm1, %xmm3
631 pxor %xmm2, %xmm3
632 pshufb %xmm5, %xmm3
633
634 movdqa 0x20(%r11), %xmm2
635 pshufb %xmm4, %xmm2
636 pxor %xmm3, %xmm2
637 movdqa 0x30(%r11), %xmm3
638 pshufb %xmm1, %xmm3
639 pxor %xmm2, %xmm3
640 pshufb %xmm5, %xmm3
641
642 movdqa 0x40(%r11), %xmm2
643 pshufb %xmm4, %xmm2
644 pxor %xmm3, %xmm2
645 movdqa 0x50(%r11), %xmm3
646 pshufb %xmm1, %xmm3
647 pxor %xmm2, %xmm3
648 pshufb %xmm5, %xmm3
649
650 movdqa 0x60(%r11), %xmm2
651 pshufb %xmm4, %xmm2
652 pxor %xmm3, %xmm2
653 movdqa 0x70(%r11), %xmm3
654 pshufb %xmm1, %xmm3
655 pxor %xmm2, %xmm3
656
657 add \$-16, %rdx
658
659.Lschedule_mangle_both:
660 movdqa (%r8,%r10),%xmm1
661 pshufb %xmm1,%xmm3
662 add \$-16, %r8
663 and \$0x30, %r8
664 movdqu %xmm3, (%rdx)
665 ret
666.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
667
668#
669# Interface to OpenSSL
670#
671.globl ${PREFIX}_set_encrypt_key
672.type ${PREFIX}_set_encrypt_key,\@function,3
673.align 16
674${PREFIX}_set_encrypt_key:
675___
676$code.=<<___ if ($win64);
677 lea -0xb8(%rsp),%rsp
678 movaps %xmm6,0x10(%rsp)
679 movaps %xmm7,0x20(%rsp)
680 movaps %xmm8,0x30(%rsp)
681 movaps %xmm9,0x40(%rsp)
682 movaps %xmm10,0x50(%rsp)
683 movaps %xmm11,0x60(%rsp)
684 movaps %xmm12,0x70(%rsp)
685 movaps %xmm13,0x80(%rsp)
686 movaps %xmm14,0x90(%rsp)
687 movaps %xmm15,0xa0(%rsp)
688.Lenc_key_body:
689___
690$code.=<<___;
691 mov %esi,%eax
692 shr \$5,%eax
693 add \$5,%eax
694 mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
695
696 mov \$0,%ecx
697 mov \$0x30,%r8d
698 call _vpaes_schedule_core
699___
700$code.=<<___ if ($win64);
701 movaps 0x10(%rsp),%xmm6
702 movaps 0x20(%rsp),%xmm7
703 movaps 0x30(%rsp),%xmm8
704 movaps 0x40(%rsp),%xmm9
705 movaps 0x50(%rsp),%xmm10
706 movaps 0x60(%rsp),%xmm11
707 movaps 0x70(%rsp),%xmm12
708 movaps 0x80(%rsp),%xmm13
709 movaps 0x90(%rsp),%xmm14
710 movaps 0xa0(%rsp),%xmm15
711 lea 0xb8(%rsp),%rsp
712.Lenc_key_epilogue:
713___
714$code.=<<___;
715 xor %eax,%eax
716 ret
717.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
718
719.globl ${PREFIX}_set_decrypt_key
720.type ${PREFIX}_set_decrypt_key,\@function,3
721.align 16
722${PREFIX}_set_decrypt_key:
723___
724$code.=<<___ if ($win64);
725 lea -0xb8(%rsp),%rsp
726 movaps %xmm6,0x10(%rsp)
727 movaps %xmm7,0x20(%rsp)
728 movaps %xmm8,0x30(%rsp)
729 movaps %xmm9,0x40(%rsp)
730 movaps %xmm10,0x50(%rsp)
731 movaps %xmm11,0x60(%rsp)
732 movaps %xmm12,0x70(%rsp)
733 movaps %xmm13,0x80(%rsp)
734 movaps %xmm14,0x90(%rsp)
735 movaps %xmm15,0xa0(%rsp)
736.Ldec_key_body:
737___
738$code.=<<___;
739 mov %esi,%eax
740 shr \$5,%eax
741 add \$5,%eax
742 mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
743 shl \$4,%eax
744 lea 16(%rdx,%rax),%rdx
745
746 mov \$1,%ecx
747 mov %esi,%r8d
748 shr \$1,%r8d
749 and \$32,%r8d
750 xor \$32,%r8d # nbits==192?0:32
751 call _vpaes_schedule_core
752___
753$code.=<<___ if ($win64);
754 movaps 0x10(%rsp),%xmm6
755 movaps 0x20(%rsp),%xmm7
756 movaps 0x30(%rsp),%xmm8
757 movaps 0x40(%rsp),%xmm9
758 movaps 0x50(%rsp),%xmm10
759 movaps 0x60(%rsp),%xmm11
760 movaps 0x70(%rsp),%xmm12
761 movaps 0x80(%rsp),%xmm13
762 movaps 0x90(%rsp),%xmm14
763 movaps 0xa0(%rsp),%xmm15
764 lea 0xb8(%rsp),%rsp
765.Ldec_key_epilogue:
766___
767$code.=<<___;
768 xor %eax,%eax
769 ret
770.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
771
772.globl ${PREFIX}_encrypt
773.type ${PREFIX}_encrypt,\@function,3
774.align 16
775${PREFIX}_encrypt:
776___
777$code.=<<___ if ($win64);
778 lea -0xb8(%rsp),%rsp
779 movaps %xmm6,0x10(%rsp)
780 movaps %xmm7,0x20(%rsp)
781 movaps %xmm8,0x30(%rsp)
782 movaps %xmm9,0x40(%rsp)
783 movaps %xmm10,0x50(%rsp)
784 movaps %xmm11,0x60(%rsp)
785 movaps %xmm12,0x70(%rsp)
786 movaps %xmm13,0x80(%rsp)
787 movaps %xmm14,0x90(%rsp)
788 movaps %xmm15,0xa0(%rsp)
789.Lenc_body:
790___
791$code.=<<___;
792 movdqu (%rdi),%xmm0
793 call _vpaes_preheat
794 call _vpaes_encrypt_core
795 movdqu %xmm0,(%rsi)
796___
797$code.=<<___ if ($win64);
798 movaps 0x10(%rsp),%xmm6
799 movaps 0x20(%rsp),%xmm7
800 movaps 0x30(%rsp),%xmm8
801 movaps 0x40(%rsp),%xmm9
802 movaps 0x50(%rsp),%xmm10
803 movaps 0x60(%rsp),%xmm11
804 movaps 0x70(%rsp),%xmm12
805 movaps 0x80(%rsp),%xmm13
806 movaps 0x90(%rsp),%xmm14
807 movaps 0xa0(%rsp),%xmm15
808 lea 0xb8(%rsp),%rsp
809.Lenc_epilogue:
810___
811$code.=<<___;
812 ret
813.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
814
815.globl ${PREFIX}_decrypt
816.type ${PREFIX}_decrypt,\@function,3
817.align 16
818${PREFIX}_decrypt:
819___
820$code.=<<___ if ($win64);
821 lea -0xb8(%rsp),%rsp
822 movaps %xmm6,0x10(%rsp)
823 movaps %xmm7,0x20(%rsp)
824 movaps %xmm8,0x30(%rsp)
825 movaps %xmm9,0x40(%rsp)
826 movaps %xmm10,0x50(%rsp)
827 movaps %xmm11,0x60(%rsp)
828 movaps %xmm12,0x70(%rsp)
829 movaps %xmm13,0x80(%rsp)
830 movaps %xmm14,0x90(%rsp)
831 movaps %xmm15,0xa0(%rsp)
832.Ldec_body:
833___
834$code.=<<___;
835 movdqu (%rdi),%xmm0
836 call _vpaes_preheat
837 call _vpaes_decrypt_core
838 movdqu %xmm0,(%rsi)
839___
840$code.=<<___ if ($win64);
841 movaps 0x10(%rsp),%xmm6
842 movaps 0x20(%rsp),%xmm7
843 movaps 0x30(%rsp),%xmm8
844 movaps 0x40(%rsp),%xmm9
845 movaps 0x50(%rsp),%xmm10
846 movaps 0x60(%rsp),%xmm11
847 movaps 0x70(%rsp),%xmm12
848 movaps 0x80(%rsp),%xmm13
849 movaps 0x90(%rsp),%xmm14
850 movaps 0xa0(%rsp),%xmm15
851 lea 0xb8(%rsp),%rsp
852.Ldec_epilogue:
853___
854$code.=<<___;
855 ret
856.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt
857___
858{
859my ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
860# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
861# size_t length, const AES_KEY *key,
862# unsigned char *ivp,const int enc);
863$code.=<<___;
864.globl ${PREFIX}_cbc_encrypt
865.type ${PREFIX}_cbc_encrypt,\@function,6
866.align 16
867${PREFIX}_cbc_encrypt:
868 xchg $key,$len
869___
870($len,$key)=($key,$len);
871$code.=<<___;
872 sub \$16,$len
873 jc .Lcbc_abort
874___
875$code.=<<___ if ($win64);
876 lea -0xb8(%rsp),%rsp
877 movaps %xmm6,0x10(%rsp)
878 movaps %xmm7,0x20(%rsp)
879 movaps %xmm8,0x30(%rsp)
880 movaps %xmm9,0x40(%rsp)
881 movaps %xmm10,0x50(%rsp)
882 movaps %xmm11,0x60(%rsp)
883 movaps %xmm12,0x70(%rsp)
884 movaps %xmm13,0x80(%rsp)
885 movaps %xmm14,0x90(%rsp)
886 movaps %xmm15,0xa0(%rsp)
887.Lcbc_body:
888___
889$code.=<<___;
890 movdqu ($ivp),%xmm6 # load IV
891 sub $inp,$out
892 call _vpaes_preheat
893 cmp \$0,${enc}d
894 je .Lcbc_dec_loop
895 jmp .Lcbc_enc_loop
896.align 16
897.Lcbc_enc_loop:
898 movdqu ($inp),%xmm0
899 pxor %xmm6,%xmm0
900 call _vpaes_encrypt_core
901 movdqa %xmm0,%xmm6
902 movdqu %xmm0,($out,$inp)
903 lea 16($inp),$inp
904 sub \$16,$len
905 jnc .Lcbc_enc_loop
906 jmp .Lcbc_done
907.align 16
908.Lcbc_dec_loop:
909 movdqu ($inp),%xmm0
910 movdqa %xmm0,%xmm7
911 call _vpaes_decrypt_core
912 pxor %xmm6,%xmm0
913 movdqa %xmm7,%xmm6
914 movdqu %xmm0,($out,$inp)
915 lea 16($inp),$inp
916 sub \$16,$len
917 jnc .Lcbc_dec_loop
918.Lcbc_done:
919 movdqu %xmm6,($ivp) # save IV
920___
921$code.=<<___ if ($win64);
922 movaps 0x10(%rsp),%xmm6
923 movaps 0x20(%rsp),%xmm7
924 movaps 0x30(%rsp),%xmm8
925 movaps 0x40(%rsp),%xmm9
926 movaps 0x50(%rsp),%xmm10
927 movaps 0x60(%rsp),%xmm11
928 movaps 0x70(%rsp),%xmm12
929 movaps 0x80(%rsp),%xmm13
930 movaps 0x90(%rsp),%xmm14
931 movaps 0xa0(%rsp),%xmm15
932 lea 0xb8(%rsp),%rsp
933.Lcbc_epilogue:
934___
935$code.=<<___;
936.Lcbc_abort:
937 ret
938.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
939___
940}
941$code.=<<___;
942##
943## _aes_preheat
944##
945## Fills register %r10 -> .aes_consts (so you can -fPIC)
946## and %xmm9-%xmm15 as specified below.
947##
948.type _vpaes_preheat,\@abi-omnipotent
949.align 16
950_vpaes_preheat:
951 lea .Lk_s0F(%rip), %r10
952 movdqa -0x20(%r10), %xmm10 # .Lk_inv
953 movdqa -0x10(%r10), %xmm11 # .Lk_inv+16
954 movdqa 0x00(%r10), %xmm9 # .Lk_s0F
955 movdqa 0x30(%r10), %xmm13 # .Lk_sb1
956 movdqa 0x40(%r10), %xmm12 # .Lk_sb1+16
957 movdqa 0x50(%r10), %xmm15 # .Lk_sb2
958 movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16
959 ret
960.size _vpaes_preheat,.-_vpaes_preheat
961########################################################
962## ##
963## Constants ##
964## ##
965########################################################
966.type _vpaes_consts,\@object
967.align 64
968_vpaes_consts:
969.Lk_inv: # inv, inva
970 .quad 0x0E05060F0D080180, 0x040703090A0B0C02
971 .quad 0x01040A060F0B0780, 0x030D0E0C02050809
972
973.Lk_s0F: # s0F
974 .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
975
976.Lk_ipt: # input transform (lo, hi)
977 .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
978 .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
979
980.Lk_sb1: # sb1u, sb1t
981 .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
982 .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
983.Lk_sb2: # sb2u, sb2t
984 .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
985 .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
986.Lk_sbo: # sbou, sbot
987 .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
988 .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
989
990.Lk_mc_forward: # mc_forward
991 .quad 0x0407060500030201, 0x0C0F0E0D080B0A09
992 .quad 0x080B0A0904070605, 0x000302010C0F0E0D
993 .quad 0x0C0F0E0D080B0A09, 0x0407060500030201
994 .quad 0x000302010C0F0E0D, 0x080B0A0904070605
995
996.Lk_mc_backward:# mc_backward
997 .quad 0x0605040702010003, 0x0E0D0C0F0A09080B
998 .quad 0x020100030E0D0C0F, 0x0A09080B06050407
999 .quad 0x0E0D0C0F0A09080B, 0x0605040702010003
1000 .quad 0x0A09080B06050407, 0x020100030E0D0C0F
1001
1002.Lk_sr: # sr
1003 .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
1004 .quad 0x030E09040F0A0500, 0x0B06010C07020D08
1005 .quad 0x0F060D040B020900, 0x070E050C030A0108
1006 .quad 0x0B0E0104070A0D00, 0x0306090C0F020508
1007
1008.Lk_rcon: # rcon
1009 .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
1010
1011.Lk_s63: # s63: all equal to 0x63 transformed
1012 .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
1013
1014.Lk_opt: # output transform
1015 .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
1016 .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
1017
1018.Lk_deskew: # deskew tables: inverts the sbox's "skew"
1019 .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
1020 .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
1021
1022##
1023## Decryption stuff
1024## Key schedule constants
1025##
1026.Lk_dksd: # decryption key schedule: invskew x*D
1027 .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
1028 .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
1029.Lk_dksb: # decryption key schedule: invskew x*B
1030 .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
1031 .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
1032.Lk_dkse: # decryption key schedule: invskew x*E + 0x63
1033 .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
1034 .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
1035.Lk_dks9: # decryption key schedule: invskew x*9
1036 .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
1037 .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
1038
1039##
1040## Decryption stuff
1041## Round function constants
1042##
1043.Lk_dipt: # decryption input transform
1044 .quad 0x0F505B040B545F00, 0x154A411E114E451A
1045 .quad 0x86E383E660056500, 0x12771772F491F194
1046
1047.Lk_dsb9: # decryption sbox output *9*u, *9*t
1048 .quad 0x851C03539A86D600, 0xCAD51F504F994CC9
1049 .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
1050.Lk_dsbd: # decryption sbox output *D*u, *D*t
1051 .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
1052 .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
1053.Lk_dsbb: # decryption sbox output *B*u, *B*t
1054 .quad 0xD022649296B44200, 0x602646F6B0F2D404
1055 .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
1056.Lk_dsbe: # decryption sbox output *E*u, *E*t
1057 .quad 0x46F2929626D4D000, 0x2242600464B4F6B0
1058 .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
1059.Lk_dsbo: # decryption sbox final output
1060 .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1061 .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1062.asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
1063.align 64
1064.size _vpaes_consts,.-_vpaes_consts
1065___
1066
1067if ($win64) {
1068# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1069# CONTEXT *context,DISPATCHER_CONTEXT *disp)
1070$rec="%rcx";
1071$frame="%rdx";
1072$context="%r8";
1073$disp="%r9";
1074
1075$code.=<<___;
1076.extern __imp_RtlVirtualUnwind
1077.type se_handler,\@abi-omnipotent
1078.align 16
1079se_handler:
1080 push %rsi
1081 push %rdi
1082 push %rbx
1083 push %rbp
1084 push %r12
1085 push %r13
1086 push %r14
1087 push %r15
1088 pushfq
1089 sub \$64,%rsp
1090
1091 mov 120($context),%rax # pull context->Rax
1092 mov 248($context),%rbx # pull context->Rip
1093
1094 mov 8($disp),%rsi # disp->ImageBase
1095 mov 56($disp),%r11 # disp->HandlerData
1096
1097 mov 0(%r11),%r10d # HandlerData[0]
1098 lea (%rsi,%r10),%r10 # prologue label
1099 cmp %r10,%rbx # context->Rip<prologue label
1100 jb .Lin_prologue
1101
1102 mov 152($context),%rax # pull context->Rsp
1103
1104 mov 4(%r11),%r10d # HandlerData[1]
1105 lea (%rsi,%r10),%r10 # epilogue label
1106 cmp %r10,%rbx # context->Rip>=epilogue label
1107 jae .Lin_prologue
1108
1109 lea 16(%rax),%rsi # %xmm save area
1110 lea 512($context),%rdi # &context.Xmm6
1111 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
1112 .long 0xa548f3fc # cld; rep movsq
1113 lea 0xb8(%rax),%rax # adjust stack pointer
1114
1115.Lin_prologue:
1116 mov 8(%rax),%rdi
1117 mov 16(%rax),%rsi
1118 mov %rax,152($context) # restore context->Rsp
1119 mov %rsi,168($context) # restore context->Rsi
1120 mov %rdi,176($context) # restore context->Rdi
1121
1122 mov 40($disp),%rdi # disp->ContextRecord
1123 mov $context,%rsi # context
1124 mov \$`1232/8`,%ecx # sizeof(CONTEXT)
1125 .long 0xa548f3fc # cld; rep movsq
1126
1127 mov $disp,%rsi
1128 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1129 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1130 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1131 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1132 mov 40(%rsi),%r10 # disp->ContextRecord
1133 lea 56(%rsi),%r11 # &disp->HandlerData
1134 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1135 mov %r10,32(%rsp) # arg5
1136 mov %r11,40(%rsp) # arg6
1137 mov %r12,48(%rsp) # arg7
1138 mov %rcx,56(%rsp) # arg8, (NULL)
1139 call *__imp_RtlVirtualUnwind(%rip)
1140
1141 mov \$1,%eax # ExceptionContinueSearch
1142 add \$64,%rsp
1143 popfq
1144 pop %r15
1145 pop %r14
1146 pop %r13
1147 pop %r12
1148 pop %rbp
1149 pop %rbx
1150 pop %rdi
1151 pop %rsi
1152 ret
1153.size se_handler,.-se_handler
1154
1155.section .pdata
1156.align 4
1157 .rva .LSEH_begin_${PREFIX}_set_encrypt_key
1158 .rva .LSEH_end_${PREFIX}_set_encrypt_key
1159 .rva .LSEH_info_${PREFIX}_set_encrypt_key
1160
1161 .rva .LSEH_begin_${PREFIX}_set_decrypt_key
1162 .rva .LSEH_end_${PREFIX}_set_decrypt_key
1163 .rva .LSEH_info_${PREFIX}_set_decrypt_key
1164
1165 .rva .LSEH_begin_${PREFIX}_encrypt
1166 .rva .LSEH_end_${PREFIX}_encrypt
1167 .rva .LSEH_info_${PREFIX}_encrypt
1168
1169 .rva .LSEH_begin_${PREFIX}_decrypt
1170 .rva .LSEH_end_${PREFIX}_decrypt
1171 .rva .LSEH_info_${PREFIX}_decrypt
1172
1173 .rva .LSEH_begin_${PREFIX}_cbc_encrypt
1174 .rva .LSEH_end_${PREFIX}_cbc_encrypt
1175 .rva .LSEH_info_${PREFIX}_cbc_encrypt
1176
1177.section .xdata
1178.align 8
1179.LSEH_info_${PREFIX}_set_encrypt_key:
1180 .byte 9,0,0,0
1181 .rva se_handler
1182 .rva .Lenc_key_body,.Lenc_key_epilogue # HandlerData[]
1183.LSEH_info_${PREFIX}_set_decrypt_key:
1184 .byte 9,0,0,0
1185 .rva se_handler
1186 .rva .Ldec_key_body,.Ldec_key_epilogue # HandlerData[]
1187.LSEH_info_${PREFIX}_encrypt:
1188 .byte 9,0,0,0
1189 .rva se_handler
1190 .rva .Lenc_body,.Lenc_epilogue # HandlerData[]
1191.LSEH_info_${PREFIX}_decrypt:
1192 .byte 9,0,0,0
1193 .rva se_handler
1194 .rva .Ldec_body,.Ldec_epilogue # HandlerData[]
1195.LSEH_info_${PREFIX}_cbc_encrypt:
1196 .byte 9,0,0,0
1197 .rva se_handler
1198 .rva .Lcbc_body,.Lcbc_epilogue # HandlerData[]
1199___
1200}
1201
1202$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1203
1204print $code;
1205
1206close STDOUT;
diff --git a/src/lib/libssl/src/crypto/arm_arch.h b/src/lib/libssl/src/crypto/arm_arch.h
new file mode 100644
index 0000000000..5a83107680
--- /dev/null
+++ b/src/lib/libssl/src/crypto/arm_arch.h
@@ -0,0 +1,51 @@
1#ifndef __ARM_ARCH_H__
2#define __ARM_ARCH_H__
3
4#if !defined(__ARM_ARCH__)
5# if defined(__CC_ARM)
6# define __ARM_ARCH__ __TARGET_ARCH_ARM
7# if defined(__BIG_ENDIAN)
8# define __ARMEB__
9# else
10# define __ARMEL__
11# endif
12# elif defined(__GNUC__)
13 /*
14 * Why doesn't gcc define __ARM_ARCH__? Instead it defines
15 * bunch of below macros. See all_architectires[] table in
16 * gcc/config/arm/arm.c. On a side note it defines
17 * __ARMEL__/__ARMEB__ for little-/big-endian.
18 */
19# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
20 defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \
21 defined(__ARM_ARCH_7EM__)
22# define __ARM_ARCH__ 7
23# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
24 defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \
25 defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__) || \
26 defined(__ARM_ARCH_6T2__)
27# define __ARM_ARCH__ 6
28# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
29 defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__) || \
30 defined(__ARM_ARCH_5TEJ__)
31# define __ARM_ARCH__ 5
32# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
33# define __ARM_ARCH__ 4
34# else
35# error "unsupported ARM architecture"
36# endif
37# endif
38#endif
39
40#ifdef OPENSSL_FIPSCANISTER
41#include <openssl/fipssyms.h>
42#endif
43
44#if !__ASSEMBLER__
45extern unsigned int OPENSSL_armcap_P;
46
47#define ARMV7_NEON (1<<0)
48#define ARMV7_TICK (1<<1)
49#endif
50
51#endif
diff --git a/src/lib/libssl/src/crypto/armcap.c b/src/lib/libssl/src/crypto/armcap.c
new file mode 100644
index 0000000000..5258d2fbdd
--- /dev/null
+++ b/src/lib/libssl/src/crypto/armcap.c
@@ -0,0 +1,80 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <setjmp.h>
5#include <signal.h>
6#include <crypto.h>
7
8#include "arm_arch.h"
9
10unsigned int OPENSSL_armcap_P;
11
12static sigset_t all_masked;
13
14static sigjmp_buf ill_jmp;
15static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
16
17/*
18 * Following subroutines could have been inlined, but it's not all
19 * ARM compilers support inline assembler...
20 */
21void _armv7_neon_probe(void);
22unsigned int _armv7_tick(void);
23
24unsigned int OPENSSL_rdtsc(void)
25 {
26 if (OPENSSL_armcap_P|ARMV7_TICK)
27 return _armv7_tick();
28 else
29 return 0;
30 }
31
32#if defined(__GNUC__) && __GNUC__>=2
33void OPENSSL_cpuid_setup(void) __attribute__((constructor));
34#endif
35void OPENSSL_cpuid_setup(void)
36 {
37 char *e;
38 struct sigaction ill_oact,ill_act;
39 sigset_t oset;
40 static int trigger=0;
41
42 if (trigger) return;
43 trigger=1;
44
45 if ((e=getenv("OPENSSL_armcap")))
46 {
47 OPENSSL_armcap_P=strtoul(e,NULL,0);
48 return;
49 }
50
51 sigfillset(&all_masked);
52 sigdelset(&all_masked,SIGILL);
53 sigdelset(&all_masked,SIGTRAP);
54 sigdelset(&all_masked,SIGFPE);
55 sigdelset(&all_masked,SIGBUS);
56 sigdelset(&all_masked,SIGSEGV);
57
58 OPENSSL_armcap_P = 0;
59
60 memset(&ill_act,0,sizeof(ill_act));
61 ill_act.sa_handler = ill_handler;
62 ill_act.sa_mask = all_masked;
63
64 sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
65 sigaction(SIGILL,&ill_act,&ill_oact);
66
67 if (sigsetjmp(ill_jmp,1) == 0)
68 {
69 _armv7_neon_probe();
70 OPENSSL_armcap_P |= ARMV7_NEON;
71 }
72 if (sigsetjmp(ill_jmp,1) == 0)
73 {
74 _armv7_tick();
75 OPENSSL_armcap_P |= ARMV7_TICK;
76 }
77
78 sigaction (SIGILL,&ill_oact,NULL);
79 sigprocmask(SIG_SETMASK,&oset,NULL);
80 }
diff --git a/src/lib/libssl/src/crypto/armv4cpuid.S b/src/lib/libssl/src/crypto/armv4cpuid.S
new file mode 100644
index 0000000000..2d618deaa4
--- /dev/null
+++ b/src/lib/libssl/src/crypto/armv4cpuid.S
@@ -0,0 +1,154 @@
1#include "arm_arch.h"
2
3.text
4.code 32
5
6.align 5
7.global _armv7_neon_probe
8.type _armv7_neon_probe,%function
9_armv7_neon_probe:
10 .word 0xf26ee1fe @ vorr q15,q15,q15
11 .word 0xe12fff1e @ bx lr
12.size _armv7_neon_probe,.-_armv7_neon_probe
13
14.global _armv7_tick
15.type _armv7_tick,%function
16_armv7_tick:
17 mrc p15,0,r0,c9,c13,0
18 .word 0xe12fff1e @ bx lr
19.size _armv7_tick,.-_armv7_tick
20
21.global OPENSSL_atomic_add
22.type OPENSSL_atomic_add,%function
23OPENSSL_atomic_add:
24#if __ARM_ARCH__>=6
25.Ladd: ldrex r2,[r0]
26 add r3,r2,r1
27 strex r2,r3,[r0]
28 cmp r2,#0
29 bne .Ladd
30 mov r0,r3
31 .word 0xe12fff1e @ bx lr
32#else
33 stmdb sp!,{r4-r6,lr}
34 ldr r2,.Lspinlock
35 adr r3,.Lspinlock
36 mov r4,r0
37 mov r5,r1
38 add r6,r3,r2 @ &spinlock
39 b .+8
40.Lspin: bl sched_yield
41 mov r0,#-1
42 swp r0,r0,[r6]
43 cmp r0,#0
44 bne .Lspin
45
46 ldr r2,[r4]
47 add r2,r2,r5
48 str r2,[r4]
49 str r0,[r6] @ release spinlock
50 ldmia sp!,{r4-r6,lr}
51 tst lr,#1
52 moveq pc,lr
53 .word 0xe12fff1e @ bx lr
54#endif
55.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
56
57.global OPENSSL_cleanse
58.type OPENSSL_cleanse,%function
59OPENSSL_cleanse:
60 eor ip,ip,ip
61 cmp r1,#7
62 subhs r1,r1,#4
63 bhs .Lot
64 cmp r1,#0
65 beq .Lcleanse_done
66.Little:
67 strb ip,[r0],#1
68 subs r1,r1,#1
69 bhi .Little
70 b .Lcleanse_done
71
72.Lot: tst r0,#3
73 beq .Laligned
74 strb ip,[r0],#1
75 sub r1,r1,#1
76 b .Lot
77.Laligned:
78 str ip,[r0],#4
79 subs r1,r1,#4
80 bhs .Laligned
81 adds r1,r1,#4
82 bne .Little
83.Lcleanse_done:
84 tst lr,#1
85 moveq pc,lr
86 .word 0xe12fff1e @ bx lr
87.size OPENSSL_cleanse,.-OPENSSL_cleanse
88
89.global OPENSSL_wipe_cpu
90.type OPENSSL_wipe_cpu,%function
91OPENSSL_wipe_cpu:
92 ldr r0,.LOPENSSL_armcap
93 adr r1,.LOPENSSL_armcap
94 ldr r0,[r1,r0]
95 eor r2,r2,r2
96 eor r3,r3,r3
97 eor ip,ip,ip
98 tst r0,#1
99 beq .Lwipe_done
100 .word 0xf3000150 @ veor q0, q0, q0
101 .word 0xf3022152 @ veor q1, q1, q1
102 .word 0xf3044154 @ veor q2, q2, q2
103 .word 0xf3066156 @ veor q3, q3, q3
104 .word 0xf34001f0 @ veor q8, q8, q8
105 .word 0xf34221f2 @ veor q9, q9, q9
106 .word 0xf34441f4 @ veor q10, q10, q10
107 .word 0xf34661f6 @ veor q11, q11, q11
108 .word 0xf34881f8 @ veor q12, q12, q12
109 .word 0xf34aa1fa @ veor q13, q13, q13
110 .word 0xf34cc1fc @ veor q14, q14, q14
111 .word 0xf34ee1fe @ veor q15, q15, q15
112.Lwipe_done:
113 mov r0,sp
114 tst lr,#1
115 moveq pc,lr
116 .word 0xe12fff1e @ bx lr
117.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
118
119.global OPENSSL_instrument_bus
120.type OPENSSL_instrument_bus,%function
121OPENSSL_instrument_bus:
122 eor r0,r0,r0
123 tst lr,#1
124 moveq pc,lr
125 .word 0xe12fff1e @ bx lr
126.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
127
128.global OPENSSL_instrument_bus2
129.type OPENSSL_instrument_bus2,%function
130OPENSSL_instrument_bus2:
131 eor r0,r0,r0
132 tst lr,#1
133 moveq pc,lr
134 .word 0xe12fff1e @ bx lr
135.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
136
137.align 5
138.LOPENSSL_armcap:
139.word OPENSSL_armcap_P-.LOPENSSL_armcap
140#if __ARM_ARCH__>=6
141.align 5
142#else
143.Lspinlock:
144.word atomic_add_spinlock-.Lspinlock
145.align 5
146
147.data
148.align 2
149atomic_add_spinlock:
150.word 0
151#endif
152
153.comm OPENSSL_armcap_P,4,4
154.hidden OPENSSL_armcap_P
diff --git a/src/lib/libssl/src/crypto/asn1/ameth_lib.c b/src/lib/libssl/src/crypto/asn1/ameth_lib.c
index 5a581b90ea..a19e058fca 100644
--- a/src/lib/libssl/src/crypto/asn1/ameth_lib.c
+++ b/src/lib/libssl/src/crypto/asn1/ameth_lib.c
@@ -69,6 +69,7 @@ extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[];
69extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; 69extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth;
70extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth; 70extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth;
71extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth; 71extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth;
72extern const EVP_PKEY_ASN1_METHOD cmac_asn1_meth;
72 73
73/* Keep this sorted in type order !! */ 74/* Keep this sorted in type order !! */
74static const EVP_PKEY_ASN1_METHOD *standard_methods[] = 75static const EVP_PKEY_ASN1_METHOD *standard_methods[] =
@@ -90,7 +91,8 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] =
90#ifndef OPENSSL_NO_EC 91#ifndef OPENSSL_NO_EC
91 &eckey_asn1_meth, 92 &eckey_asn1_meth,
92#endif 93#endif
93 &hmac_asn1_meth 94 &hmac_asn1_meth,
95 &cmac_asn1_meth
94 }; 96 };
95 97
96typedef int sk_cmp_fn_type(const char * const *a, const char * const *b); 98typedef int sk_cmp_fn_type(const char * const *a, const char * const *b);
@@ -291,6 +293,8 @@ EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags,
291 if (!ameth) 293 if (!ameth)
292 return NULL; 294 return NULL;
293 295
296 memset(ameth, 0, sizeof(EVP_PKEY_ASN1_METHOD));
297
294 ameth->pkey_id = id; 298 ameth->pkey_id = id;
295 ameth->pkey_base_id = id; 299 ameth->pkey_base_id = id;
296 ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC; 300 ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC;
@@ -325,6 +329,9 @@ EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags,
325 ameth->old_priv_encode = 0; 329 ameth->old_priv_encode = 0;
326 ameth->old_priv_decode = 0; 330 ameth->old_priv_decode = 0;
327 331
332 ameth->item_verify = 0;
333 ameth->item_sign = 0;
334
328 ameth->pkey_size = 0; 335 ameth->pkey_size = 0;
329 ameth->pkey_bits = 0; 336 ameth->pkey_bits = 0;
330 337
@@ -376,6 +383,9 @@ void EVP_PKEY_asn1_copy(EVP_PKEY_ASN1_METHOD *dst,
376 dst->pkey_free = src->pkey_free; 383 dst->pkey_free = src->pkey_free;
377 dst->pkey_ctrl = src->pkey_ctrl; 384 dst->pkey_ctrl = src->pkey_ctrl;
378 385
386 dst->item_sign = src->item_sign;
387 dst->item_verify = src->item_verify;
388
379 } 389 }
380 390
381void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth) 391void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth)
diff --git a/src/lib/libssl/src/crypto/asn1/asn1_locl.h b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
index 5aa65e28f5..9fcf0d9530 100644
--- a/src/lib/libssl/src/crypto/asn1/asn1_locl.h
+++ b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
@@ -102,6 +102,10 @@ struct evp_pkey_asn1_method_st
102 int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b); 102 int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b);
103 int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent, 103 int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent,
104 ASN1_PCTX *pctx); 104 ASN1_PCTX *pctx);
105 int (*sig_print)(BIO *out,
106 const X509_ALGOR *sigalg, const ASN1_STRING *sig,
107 int indent, ASN1_PCTX *pctx);
108
105 109
106 void (*pkey_free)(EVP_PKEY *pkey); 110 void (*pkey_free)(EVP_PKEY *pkey);
107 int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2); 111 int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2);
@@ -111,6 +115,13 @@ struct evp_pkey_asn1_method_st
111 int (*old_priv_decode)(EVP_PKEY *pkey, 115 int (*old_priv_decode)(EVP_PKEY *pkey,
112 const unsigned char **pder, int derlen); 116 const unsigned char **pder, int derlen);
113 int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder); 117 int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder);
118 /* Custom ASN1 signature verification */
119 int (*item_verify)(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
120 X509_ALGOR *a, ASN1_BIT_STRING *sig,
121 EVP_PKEY *pkey);
122 int (*item_sign)(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
123 X509_ALGOR *alg1, X509_ALGOR *alg2,
124 ASN1_BIT_STRING *sig);
114 125
115 } /* EVP_PKEY_ASN1_METHOD */; 126 } /* EVP_PKEY_ASN1_METHOD */;
116 127
diff --git a/src/lib/libssl/src/crypto/asn1/asn_mime.c b/src/lib/libssl/src/crypto/asn1/asn_mime.c
index c1d1b12291..54a704a969 100644
--- a/src/lib/libssl/src/crypto/asn1/asn_mime.c
+++ b/src/lib/libssl/src/crypto/asn1/asn_mime.c
@@ -377,8 +377,12 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags,
377 BIO *tmpbio; 377 BIO *tmpbio;
378 const ASN1_AUX *aux = it->funcs; 378 const ASN1_AUX *aux = it->funcs;
379 ASN1_STREAM_ARG sarg; 379 ASN1_STREAM_ARG sarg;
380 int rv = 1;
380 381
381 if (!(flags & SMIME_DETACHED)) 382 /* If data is not deteched or resigning then the output BIO is
383 * already set up to finalise when it is written through.
384 */
385 if (!(flags & SMIME_DETACHED) || (flags & PKCS7_REUSE_DIGEST))
382 { 386 {
383 SMIME_crlf_copy(data, out, flags); 387 SMIME_crlf_copy(data, out, flags);
384 return 1; 388 return 1;
@@ -405,7 +409,7 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags,
405 409
406 /* Finalize structure */ 410 /* Finalize structure */
407 if (aux->asn1_cb(ASN1_OP_DETACHED_POST, &val, it, &sarg) <= 0) 411 if (aux->asn1_cb(ASN1_OP_DETACHED_POST, &val, it, &sarg) <= 0)
408 return 0; 412 rv = 0;
409 413
410 /* Now remove any digests prepended to the BIO */ 414 /* Now remove any digests prepended to the BIO */
411 415
@@ -416,7 +420,7 @@ static int asn1_output_data(BIO *out, BIO *data, ASN1_VALUE *val, int flags,
416 sarg.ndef_bio = tmpbio; 420 sarg.ndef_bio = tmpbio;
417 } 421 }
418 422
419 return 1; 423 return rv;
420 424
421 } 425 }
422 426
@@ -486,9 +490,9 @@ ASN1_VALUE *SMIME_read_ASN1(BIO *bio, BIO **bcont, const ASN1_ITEM *it)
486 490
487 if(strcmp(hdr->value, "application/x-pkcs7-signature") && 491 if(strcmp(hdr->value, "application/x-pkcs7-signature") &&
488 strcmp(hdr->value, "application/pkcs7-signature")) { 492 strcmp(hdr->value, "application/pkcs7-signature")) {
489 sk_MIME_HEADER_pop_free(headers, mime_hdr_free);
490 ASN1err(ASN1_F_SMIME_READ_ASN1,ASN1_R_SIG_INVALID_MIME_TYPE); 493 ASN1err(ASN1_F_SMIME_READ_ASN1,ASN1_R_SIG_INVALID_MIME_TYPE);
491 ERR_add_error_data(2, "type: ", hdr->value); 494 ERR_add_error_data(2, "type: ", hdr->value);
495 sk_MIME_HEADER_pop_free(headers, mime_hdr_free);
492 sk_BIO_pop_free(parts, BIO_vfree); 496 sk_BIO_pop_free(parts, BIO_vfree);
493 return NULL; 497 return NULL;
494 } 498 }
@@ -801,7 +805,7 @@ static MIME_HEADER *mime_hdr_new(char *name, char *value)
801 if(name) { 805 if(name) {
802 if(!(tmpname = BUF_strdup(name))) return NULL; 806 if(!(tmpname = BUF_strdup(name))) return NULL;
803 for(p = tmpname ; *p; p++) { 807 for(p = tmpname ; *p; p++) {
804 c = *p; 808 c = (unsigned char)*p;
805 if(isupper(c)) { 809 if(isupper(c)) {
806 c = tolower(c); 810 c = tolower(c);
807 *p = c; 811 *p = c;
@@ -811,7 +815,7 @@ static MIME_HEADER *mime_hdr_new(char *name, char *value)
811 if(value) { 815 if(value) {
812 if(!(tmpval = BUF_strdup(value))) return NULL; 816 if(!(tmpval = BUF_strdup(value))) return NULL;
813 for(p = tmpval ; *p; p++) { 817 for(p = tmpval ; *p; p++) {
814 c = *p; 818 c = (unsigned char)*p;
815 if(isupper(c)) { 819 if(isupper(c)) {
816 c = tolower(c); 820 c = tolower(c);
817 *p = c; 821 *p = c;
@@ -835,7 +839,7 @@ static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value)
835 tmpname = BUF_strdup(name); 839 tmpname = BUF_strdup(name);
836 if(!tmpname) return 0; 840 if(!tmpname) return 0;
837 for(p = tmpname ; *p; p++) { 841 for(p = tmpname ; *p; p++) {
838 c = *p; 842 c = (unsigned char)*p;
839 if(isupper(c)) { 843 if(isupper(c)) {
840 c = tolower(c); 844 c = tolower(c);
841 *p = c; 845 *p = c;
@@ -858,12 +862,17 @@ static int mime_hdr_addparam(MIME_HEADER *mhdr, char *name, char *value)
858static int mime_hdr_cmp(const MIME_HEADER * const *a, 862static int mime_hdr_cmp(const MIME_HEADER * const *a,
859 const MIME_HEADER * const *b) 863 const MIME_HEADER * const *b)
860{ 864{
865 if (!(*a)->name || !(*b)->name)
866 return !!(*a)->name - !!(*b)->name;
867
861 return(strcmp((*a)->name, (*b)->name)); 868 return(strcmp((*a)->name, (*b)->name));
862} 869}
863 870
864static int mime_param_cmp(const MIME_PARAM * const *a, 871static int mime_param_cmp(const MIME_PARAM * const *a,
865 const MIME_PARAM * const *b) 872 const MIME_PARAM * const *b)
866{ 873{
874 if (!(*a)->param_name || !(*b)->param_name)
875 return !!(*a)->param_name - !!(*b)->param_name;
867 return(strcmp((*a)->param_name, (*b)->param_name)); 876 return(strcmp((*a)->param_name, (*b)->param_name));
868} 877}
869 878
diff --git a/src/lib/libssl/src/crypto/bio/bss_dgram.c b/src/lib/libssl/src/crypto/bio/bss_dgram.c
index 71ebe987b6..1b1e4bec81 100644
--- a/src/lib/libssl/src/crypto/bio/bss_dgram.c
+++ b/src/lib/libssl/src/crypto/bio/bss_dgram.c
@@ -70,6 +70,13 @@
70#include <sys/timeb.h> 70#include <sys/timeb.h>
71#endif 71#endif
72 72
73#ifndef OPENSSL_NO_SCTP
74#include <netinet/sctp.h>
75#include <fcntl.h>
76#define OPENSSL_SCTP_DATA_CHUNK_TYPE 0x00
77#define OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE 0xc0
78#endif
79
73#ifdef OPENSSL_SYS_LINUX 80#ifdef OPENSSL_SYS_LINUX
74#define IP_MTU 14 /* linux is lame */ 81#define IP_MTU 14 /* linux is lame */
75#endif 82#endif
@@ -88,6 +95,18 @@ static int dgram_new(BIO *h);
88static int dgram_free(BIO *data); 95static int dgram_free(BIO *data);
89static int dgram_clear(BIO *bio); 96static int dgram_clear(BIO *bio);
90 97
98#ifndef OPENSSL_NO_SCTP
99static int dgram_sctp_write(BIO *h, const char *buf, int num);
100static int dgram_sctp_read(BIO *h, char *buf, int size);
101static int dgram_sctp_puts(BIO *h, const char *str);
102static long dgram_sctp_ctrl(BIO *h, int cmd, long arg1, void *arg2);
103static int dgram_sctp_new(BIO *h);
104static int dgram_sctp_free(BIO *data);
105#ifdef SCTP_AUTHENTICATION_EVENT
106static void dgram_sctp_handle_auth_free_key_event(BIO *b, union sctp_notification *snp);
107#endif
108#endif
109
91static int BIO_dgram_should_retry(int s); 110static int BIO_dgram_should_retry(int s);
92 111
93static void get_current_time(struct timeval *t); 112static void get_current_time(struct timeval *t);
@@ -106,6 +125,22 @@ static BIO_METHOD methods_dgramp=
106 NULL, 125 NULL,
107 }; 126 };
108 127
128#ifndef OPENSSL_NO_SCTP
129static BIO_METHOD methods_dgramp_sctp=
130 {
131 BIO_TYPE_DGRAM_SCTP,
132 "datagram sctp socket",
133 dgram_sctp_write,
134 dgram_sctp_read,
135 dgram_sctp_puts,
136 NULL, /* dgram_gets, */
137 dgram_sctp_ctrl,
138 dgram_sctp_new,
139 dgram_sctp_free,
140 NULL,
141 };
142#endif
143
109typedef struct bio_dgram_data_st 144typedef struct bio_dgram_data_st
110 { 145 {
111 union { 146 union {
@@ -122,6 +157,40 @@ typedef struct bio_dgram_data_st
122 struct timeval socket_timeout; 157 struct timeval socket_timeout;
123 } bio_dgram_data; 158 } bio_dgram_data;
124 159
160#ifndef OPENSSL_NO_SCTP
161typedef struct bio_dgram_sctp_save_message_st
162 {
163 BIO *bio;
164 char *data;
165 int length;
166 } bio_dgram_sctp_save_message;
167
168typedef struct bio_dgram_sctp_data_st
169 {
170 union {
171 struct sockaddr sa;
172 struct sockaddr_in sa_in;
173#if OPENSSL_USE_IPV6
174 struct sockaddr_in6 sa_in6;
175#endif
176 } peer;
177 unsigned int connected;
178 unsigned int _errno;
179 unsigned int mtu;
180 struct bio_dgram_sctp_sndinfo sndinfo;
181 struct bio_dgram_sctp_rcvinfo rcvinfo;
182 struct bio_dgram_sctp_prinfo prinfo;
183 void (*handle_notifications)(BIO *bio, void *context, void *buf);
184 void* notification_context;
185 int in_handshake;
186 int ccs_rcvd;
187 int ccs_sent;
188 int save_shutdown;
189 int peer_auth_tested;
190 bio_dgram_sctp_save_message saved_message;
191 } bio_dgram_sctp_data;
192#endif
193
125BIO_METHOD *BIO_s_datagram(void) 194BIO_METHOD *BIO_s_datagram(void)
126 { 195 {
127 return(&methods_dgramp); 196 return(&methods_dgramp);
@@ -547,6 +616,27 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr)
547 ret = 0; 616 ret = 0;
548#endif 617#endif
549 break; 618 break;
619 case BIO_CTRL_DGRAM_GET_FALLBACK_MTU:
620 switch (data->peer.sa.sa_family)
621 {
622 case AF_INET:
623 ret = 576 - 20 - 8;
624 break;
625#if OPENSSL_USE_IPV6
626 case AF_INET6:
627#ifdef IN6_IS_ADDR_V4MAPPED
628 if (IN6_IS_ADDR_V4MAPPED(&data->peer.sa_in6.sin6_addr))
629 ret = 576 - 20 - 8;
630 else
631#endif
632 ret = 1280 - 40 - 8;
633 break;
634#endif
635 default:
636 ret = 576 - 20 - 8;
637 break;
638 }
639 break;
550 case BIO_CTRL_DGRAM_GET_MTU: 640 case BIO_CTRL_DGRAM_GET_MTU:
551 return data->mtu; 641 return data->mtu;
552 break; 642 break;
@@ -738,6 +828,912 @@ static int dgram_puts(BIO *bp, const char *str)
738 return(ret); 828 return(ret);
739 } 829 }
740 830
831#ifndef OPENSSL_NO_SCTP
832BIO_METHOD *BIO_s_datagram_sctp(void)
833 {
834 return(&methods_dgramp_sctp);
835 }
836
837BIO *BIO_new_dgram_sctp(int fd, int close_flag)
838 {
839 BIO *bio;
840 int ret, optval = 20000;
841 int auth_data = 0, auth_forward = 0;
842 unsigned char *p;
843 struct sctp_authchunk auth;
844 struct sctp_authchunks *authchunks;
845 socklen_t sockopt_len;
846#ifdef SCTP_AUTHENTICATION_EVENT
847#ifdef SCTP_EVENT
848 struct sctp_event event;
849#else
850 struct sctp_event_subscribe event;
851#endif
852#endif
853
854 bio=BIO_new(BIO_s_datagram_sctp());
855 if (bio == NULL) return(NULL);
856 BIO_set_fd(bio,fd,close_flag);
857
858 /* Activate SCTP-AUTH for DATA and FORWARD-TSN chunks */
859 auth.sauth_chunk = OPENSSL_SCTP_DATA_CHUNK_TYPE;
860 ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk));
861 OPENSSL_assert(ret >= 0);
862 auth.sauth_chunk = OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE;
863 ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk));
864 OPENSSL_assert(ret >= 0);
865
866 /* Test if activation was successful. When using accept(),
867 * SCTP-AUTH has to be activated for the listening socket
868 * already, otherwise the connected socket won't use it. */
869 sockopt_len = (socklen_t)(sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t));
870 authchunks = OPENSSL_malloc(sockopt_len);
871 memset(authchunks, 0, sizeof(sockopt_len));
872 ret = getsockopt(fd, IPPROTO_SCTP, SCTP_LOCAL_AUTH_CHUNKS, authchunks, &sockopt_len);
873 OPENSSL_assert(ret >= 0);
874
875 for (p = (unsigned char*) authchunks + sizeof(sctp_assoc_t);
876 p < (unsigned char*) authchunks + sockopt_len;
877 p += sizeof(uint8_t))
878 {
879 if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1;
880 if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1;
881 }
882
883 OPENSSL_free(authchunks);
884
885 OPENSSL_assert(auth_data);
886 OPENSSL_assert(auth_forward);
887
888#ifdef SCTP_AUTHENTICATION_EVENT
889#ifdef SCTP_EVENT
890 memset(&event, 0, sizeof(struct sctp_event));
891 event.se_assoc_id = 0;
892 event.se_type = SCTP_AUTHENTICATION_EVENT;
893 event.se_on = 1;
894 ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event));
895 OPENSSL_assert(ret >= 0);
896#else
897 sockopt_len = (socklen_t) sizeof(struct sctp_event_subscribe);
898 ret = getsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, &sockopt_len);
899 OPENSSL_assert(ret >= 0);
900
901 event.sctp_authentication_event = 1;
902
903 ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe));
904 OPENSSL_assert(ret >= 0);
905#endif
906#endif
907
908 /* Disable partial delivery by setting the min size
909 * larger than the max record size of 2^14 + 2048 + 13
910 */
911 ret = setsockopt(fd, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT, &optval, sizeof(optval));
912 OPENSSL_assert(ret >= 0);
913
914 return(bio);
915 }
916
917int BIO_dgram_is_sctp(BIO *bio)
918 {
919 return (BIO_method_type(bio) == BIO_TYPE_DGRAM_SCTP);
920 }
921
922static int dgram_sctp_new(BIO *bi)
923 {
924 bio_dgram_sctp_data *data = NULL;
925
926 bi->init=0;
927 bi->num=0;
928 data = OPENSSL_malloc(sizeof(bio_dgram_sctp_data));
929 if (data == NULL)
930 return 0;
931 memset(data, 0x00, sizeof(bio_dgram_sctp_data));
932#ifdef SCTP_PR_SCTP_NONE
933 data->prinfo.pr_policy = SCTP_PR_SCTP_NONE;
934#endif
935 bi->ptr = data;
936
937 bi->flags=0;
938 return(1);
939 }
940
941static int dgram_sctp_free(BIO *a)
942 {
943 bio_dgram_sctp_data *data;
944
945 if (a == NULL) return(0);
946 if ( ! dgram_clear(a))
947 return 0;
948
949 data = (bio_dgram_sctp_data *)a->ptr;
950 if(data != NULL) OPENSSL_free(data);
951
952 return(1);
953 }
954
955#ifdef SCTP_AUTHENTICATION_EVENT
956void dgram_sctp_handle_auth_free_key_event(BIO *b, union sctp_notification *snp)
957 {
958 unsigned int sockopt_len = 0;
959 int ret;
960 struct sctp_authkey_event* authkeyevent = &snp->sn_auth_event;
961
962 if (authkeyevent->auth_indication == SCTP_AUTH_FREE_KEY)
963 {
964 struct sctp_authkeyid authkeyid;
965
966 /* delete key */
967 authkeyid.scact_keynumber = authkeyevent->auth_keynumber;
968 sockopt_len = sizeof(struct sctp_authkeyid);
969 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DELETE_KEY,
970 &authkeyid, sockopt_len);
971 }
972 }
973#endif
974
975static int dgram_sctp_read(BIO *b, char *out, int outl)
976 {
977 int ret = 0, n = 0, i, optval;
978 socklen_t optlen;
979 bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr;
980 union sctp_notification *snp;
981 struct msghdr msg;
982 struct iovec iov;
983 struct cmsghdr *cmsg;
984 char cmsgbuf[512];
985
986 if (out != NULL)
987 {
988 clear_socket_error();
989
990 do
991 {
992 memset(&data->rcvinfo, 0x00, sizeof(struct bio_dgram_sctp_rcvinfo));
993 iov.iov_base = out;
994 iov.iov_len = outl;
995 msg.msg_name = NULL;
996 msg.msg_namelen = 0;
997 msg.msg_iov = &iov;
998 msg.msg_iovlen = 1;
999 msg.msg_control = cmsgbuf;
1000 msg.msg_controllen = 512;
1001 msg.msg_flags = 0;
1002 n = recvmsg(b->num, &msg, 0);
1003
1004 if (msg.msg_controllen > 0)
1005 {
1006 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg))
1007 {
1008 if (cmsg->cmsg_level != IPPROTO_SCTP)
1009 continue;
1010#ifdef SCTP_RCVINFO
1011 if (cmsg->cmsg_type == SCTP_RCVINFO)
1012 {
1013 struct sctp_rcvinfo *rcvinfo;
1014
1015 rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmsg);
1016 data->rcvinfo.rcv_sid = rcvinfo->rcv_sid;
1017 data->rcvinfo.rcv_ssn = rcvinfo->rcv_ssn;
1018 data->rcvinfo.rcv_flags = rcvinfo->rcv_flags;
1019 data->rcvinfo.rcv_ppid = rcvinfo->rcv_ppid;
1020 data->rcvinfo.rcv_tsn = rcvinfo->rcv_tsn;
1021 data->rcvinfo.rcv_cumtsn = rcvinfo->rcv_cumtsn;
1022 data->rcvinfo.rcv_context = rcvinfo->rcv_context;
1023 }
1024#endif
1025#ifdef SCTP_SNDRCV
1026 if (cmsg->cmsg_type == SCTP_SNDRCV)
1027 {
1028 struct sctp_sndrcvinfo *sndrcvinfo;
1029
1030 sndrcvinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
1031 data->rcvinfo.rcv_sid = sndrcvinfo->sinfo_stream;
1032 data->rcvinfo.rcv_ssn = sndrcvinfo->sinfo_ssn;
1033 data->rcvinfo.rcv_flags = sndrcvinfo->sinfo_flags;
1034 data->rcvinfo.rcv_ppid = sndrcvinfo->sinfo_ppid;
1035 data->rcvinfo.rcv_tsn = sndrcvinfo->sinfo_tsn;
1036 data->rcvinfo.rcv_cumtsn = sndrcvinfo->sinfo_cumtsn;
1037 data->rcvinfo.rcv_context = sndrcvinfo->sinfo_context;
1038 }
1039#endif
1040 }
1041 }
1042
1043 if (n <= 0)
1044 {
1045 if (n < 0)
1046 ret = n;
1047 break;
1048 }
1049
1050 if (msg.msg_flags & MSG_NOTIFICATION)
1051 {
1052 snp = (union sctp_notification*) out;
1053 if (snp->sn_header.sn_type == SCTP_SENDER_DRY_EVENT)
1054 {
1055#ifdef SCTP_EVENT
1056 struct sctp_event event;
1057#else
1058 struct sctp_event_subscribe event;
1059 socklen_t eventsize;
1060#endif
1061 /* If a message has been delayed until the socket
1062 * is dry, it can be sent now.
1063 */
1064 if (data->saved_message.length > 0)
1065 {
1066 dgram_sctp_write(data->saved_message.bio, data->saved_message.data,
1067 data->saved_message.length);
1068 OPENSSL_free(data->saved_message.data);
1069 data->saved_message.length = 0;
1070 }
1071
1072 /* disable sender dry event */
1073#ifdef SCTP_EVENT
1074 memset(&event, 0, sizeof(struct sctp_event));
1075 event.se_assoc_id = 0;
1076 event.se_type = SCTP_SENDER_DRY_EVENT;
1077 event.se_on = 0;
1078 i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event));
1079 OPENSSL_assert(i >= 0);
1080#else
1081 eventsize = sizeof(struct sctp_event_subscribe);
1082 i = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize);
1083 OPENSSL_assert(i >= 0);
1084
1085 event.sctp_sender_dry_event = 0;
1086
1087 i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe));
1088 OPENSSL_assert(i >= 0);
1089#endif
1090 }
1091
1092#ifdef SCTP_AUTHENTICATION_EVENT
1093 if (snp->sn_header.sn_type == SCTP_AUTHENTICATION_EVENT)
1094 dgram_sctp_handle_auth_free_key_event(b, snp);
1095#endif
1096
1097 if (data->handle_notifications != NULL)
1098 data->handle_notifications(b, data->notification_context, (void*) out);
1099
1100 memset(out, 0, outl);
1101 }
1102 else
1103 ret += n;
1104 }
1105 while ((msg.msg_flags & MSG_NOTIFICATION) && (msg.msg_flags & MSG_EOR) && (ret < outl));
1106
1107 if (ret > 0 && !(msg.msg_flags & MSG_EOR))
1108 {
1109 /* Partial message read, this should never happen! */
1110
1111 /* The buffer was too small, this means the peer sent
1112 * a message that was larger than allowed. */
1113 if (ret == outl)
1114 return -1;
1115
1116 /* Test if socket buffer can handle max record
1117 * size (2^14 + 2048 + 13)
1118 */
1119 optlen = (socklen_t) sizeof(int);
1120 ret = getsockopt(b->num, SOL_SOCKET, SO_RCVBUF, &optval, &optlen);
1121 OPENSSL_assert(ret >= 0);
1122 OPENSSL_assert(optval >= 18445);
1123
1124 /* Test if SCTP doesn't partially deliver below
1125 * max record size (2^14 + 2048 + 13)
1126 */
1127 optlen = (socklen_t) sizeof(int);
1128 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT,
1129 &optval, &optlen);
1130 OPENSSL_assert(ret >= 0);
1131 OPENSSL_assert(optval >= 18445);
1132
1133 /* Partially delivered notification??? Probably a bug.... */
1134 OPENSSL_assert(!(msg.msg_flags & MSG_NOTIFICATION));
1135
1136 /* Everything seems ok till now, so it's most likely
1137 * a message dropped by PR-SCTP.
1138 */
1139 memset(out, 0, outl);
1140 BIO_set_retry_read(b);
1141 return -1;
1142 }
1143
1144 BIO_clear_retry_flags(b);
1145 if (ret < 0)
1146 {
1147 if (BIO_dgram_should_retry(ret))
1148 {
1149 BIO_set_retry_read(b);
1150 data->_errno = get_last_socket_error();
1151 }
1152 }
1153
1154 /* Test if peer uses SCTP-AUTH before continuing */
1155 if (!data->peer_auth_tested)
1156 {
1157 int ii, auth_data = 0, auth_forward = 0;
1158 unsigned char *p;
1159 struct sctp_authchunks *authchunks;
1160
1161 optlen = (socklen_t)(sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t));
1162 authchunks = OPENSSL_malloc(optlen);
1163 memset(authchunks, 0, sizeof(optlen));
1164 ii = getsockopt(b->num, IPPROTO_SCTP, SCTP_PEER_AUTH_CHUNKS, authchunks, &optlen);
1165 OPENSSL_assert(ii >= 0);
1166
1167 for (p = (unsigned char*) authchunks + sizeof(sctp_assoc_t);
1168 p < (unsigned char*) authchunks + optlen;
1169 p += sizeof(uint8_t))
1170 {
1171 if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1;
1172 if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1;
1173 }
1174
1175 OPENSSL_free(authchunks);
1176
1177 if (!auth_data || !auth_forward)
1178 {
1179 BIOerr(BIO_F_DGRAM_SCTP_READ,BIO_R_CONNECT_ERROR);
1180 return -1;
1181 }
1182
1183 data->peer_auth_tested = 1;
1184 }
1185 }
1186 return(ret);
1187 }
1188
1189static int dgram_sctp_write(BIO *b, const char *in, int inl)
1190 {
1191 int ret;
1192 bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr;
1193 struct bio_dgram_sctp_sndinfo *sinfo = &(data->sndinfo);
1194 struct bio_dgram_sctp_prinfo *pinfo = &(data->prinfo);
1195 struct bio_dgram_sctp_sndinfo handshake_sinfo;
1196 struct iovec iov[1];
1197 struct msghdr msg;
1198 struct cmsghdr *cmsg;
1199#if defined(SCTP_SNDINFO) && defined(SCTP_PRINFO)
1200 char cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndinfo)) + CMSG_SPACE(sizeof(struct sctp_prinfo))];
1201 struct sctp_sndinfo *sndinfo;
1202 struct sctp_prinfo *prinfo;
1203#else
1204 char cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
1205 struct sctp_sndrcvinfo *sndrcvinfo;
1206#endif
1207
1208 clear_socket_error();
1209
1210 /* If we're send anything else than application data,
1211 * disable all user parameters and flags.
1212 */
1213 if (in[0] != 23) {
1214 memset(&handshake_sinfo, 0x00, sizeof(struct bio_dgram_sctp_sndinfo));
1215#ifdef SCTP_SACK_IMMEDIATELY
1216 handshake_sinfo.snd_flags = SCTP_SACK_IMMEDIATELY;
1217#endif
1218 sinfo = &handshake_sinfo;
1219 }
1220
1221 /* If we have to send a shutdown alert message and the
1222 * socket is not dry yet, we have to save it and send it
1223 * as soon as the socket gets dry.
1224 */
1225 if (data->save_shutdown && !BIO_dgram_sctp_wait_for_dry(b))
1226 {
1227 data->saved_message.bio = b;
1228 data->saved_message.length = inl;
1229 data->saved_message.data = OPENSSL_malloc(inl);
1230 memcpy(data->saved_message.data, in, inl);
1231 return inl;
1232 }
1233
1234 iov[0].iov_base = (char *)in;
1235 iov[0].iov_len = inl;
1236 msg.msg_name = NULL;
1237 msg.msg_namelen = 0;
1238 msg.msg_iov = iov;
1239 msg.msg_iovlen = 1;
1240 msg.msg_control = (caddr_t)cmsgbuf;
1241 msg.msg_controllen = 0;
1242 msg.msg_flags = 0;
1243#if defined(SCTP_SNDINFO) && defined(SCTP_PRINFO)
1244 cmsg = (struct cmsghdr *)cmsgbuf;
1245 cmsg->cmsg_level = IPPROTO_SCTP;
1246 cmsg->cmsg_type = SCTP_SNDINFO;
1247 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndinfo));
1248 sndinfo = (struct sctp_sndinfo *)CMSG_DATA(cmsg);
1249 memset(sndinfo, 0, sizeof(struct sctp_sndinfo));
1250 sndinfo->snd_sid = sinfo->snd_sid;
1251 sndinfo->snd_flags = sinfo->snd_flags;
1252 sndinfo->snd_ppid = sinfo->snd_ppid;
1253 sndinfo->snd_context = sinfo->snd_context;
1254 msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_sndinfo));
1255
1256 cmsg = (struct cmsghdr *)&cmsgbuf[CMSG_SPACE(sizeof(struct sctp_sndinfo))];
1257 cmsg->cmsg_level = IPPROTO_SCTP;
1258 cmsg->cmsg_type = SCTP_PRINFO;
1259 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_prinfo));
1260 prinfo = (struct sctp_prinfo *)CMSG_DATA(cmsg);
1261 memset(prinfo, 0, sizeof(struct sctp_prinfo));
1262 prinfo->pr_policy = pinfo->pr_policy;
1263 prinfo->pr_value = pinfo->pr_value;
1264 msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_prinfo));
1265#else
1266 cmsg = (struct cmsghdr *)cmsgbuf;
1267 cmsg->cmsg_level = IPPROTO_SCTP;
1268 cmsg->cmsg_type = SCTP_SNDRCV;
1269 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
1270 sndrcvinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
1271 memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo));
1272 sndrcvinfo->sinfo_stream = sinfo->snd_sid;
1273 sndrcvinfo->sinfo_flags = sinfo->snd_flags;
1274#ifdef __FreeBSD__
1275 sndrcvinfo->sinfo_flags |= pinfo->pr_policy;
1276#endif
1277 sndrcvinfo->sinfo_ppid = sinfo->snd_ppid;
1278 sndrcvinfo->sinfo_context = sinfo->snd_context;
1279 sndrcvinfo->sinfo_timetolive = pinfo->pr_value;
1280 msg.msg_controllen += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo));
1281#endif
1282
1283 ret = sendmsg(b->num, &msg, 0);
1284
1285 BIO_clear_retry_flags(b);
1286 if (ret <= 0)
1287 {
1288 if (BIO_dgram_should_retry(ret))
1289 {
1290 BIO_set_retry_write(b);
1291 data->_errno = get_last_socket_error();
1292 }
1293 }
1294 return(ret);
1295 }
1296
1297static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr)
1298 {
1299 long ret=1;
1300 bio_dgram_sctp_data *data = NULL;
1301 unsigned int sockopt_len = 0;
1302 struct sctp_authkeyid authkeyid;
1303 struct sctp_authkey *authkey;
1304
1305 data = (bio_dgram_sctp_data *)b->ptr;
1306
1307 switch (cmd)
1308 {
1309 case BIO_CTRL_DGRAM_QUERY_MTU:
1310 /* Set to maximum (2^14)
1311 * and ignore user input to enable transport
1312 * protocol fragmentation.
1313 * Returns always 2^14.
1314 */
1315 data->mtu = 16384;
1316 ret = data->mtu;
1317 break;
1318 case BIO_CTRL_DGRAM_SET_MTU:
1319 /* Set to maximum (2^14)
1320 * and ignore input to enable transport
1321 * protocol fragmentation.
1322 * Returns always 2^14.
1323 */
1324 data->mtu = 16384;
1325 ret = data->mtu;
1326 break;
1327 case BIO_CTRL_DGRAM_SET_CONNECTED:
1328 case BIO_CTRL_DGRAM_CONNECT:
1329 /* Returns always -1. */
1330 ret = -1;
1331 break;
1332 case BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT:
1333 /* SCTP doesn't need the DTLS timer
1334 * Returns always 1.
1335 */
1336 break;
1337 case BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE:
1338 if (num > 0)
1339 data->in_handshake = 1;
1340 else
1341 data->in_handshake = 0;
1342
1343 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_NODELAY, &data->in_handshake, sizeof(int));
1344 break;
1345 case BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY:
1346 /* New shared key for SCTP AUTH.
1347 * Returns 0 on success, -1 otherwise.
1348 */
1349
1350 /* Get active key */
1351 sockopt_len = sizeof(struct sctp_authkeyid);
1352 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len);
1353 if (ret < 0) break;
1354
1355 /* Add new key */
1356 sockopt_len = sizeof(struct sctp_authkey) + 64 * sizeof(uint8_t);
1357 authkey = OPENSSL_malloc(sockopt_len);
1358 memset(authkey, 0x00, sockopt_len);
1359 authkey->sca_keynumber = authkeyid.scact_keynumber + 1;
1360#ifndef __FreeBSD__
1361 /* This field is missing in FreeBSD 8.2 and earlier,
1362 * and FreeBSD 8.3 and higher work without it.
1363 */
1364 authkey->sca_keylength = 64;
1365#endif
1366 memcpy(&authkey->sca_key[0], ptr, 64 * sizeof(uint8_t));
1367
1368 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_KEY, authkey, sockopt_len);
1369 if (ret < 0) break;
1370
1371 /* Reset active key */
1372 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY,
1373 &authkeyid, sizeof(struct sctp_authkeyid));
1374 if (ret < 0) break;
1375
1376 break;
1377 case BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY:
1378 /* Returns 0 on success, -1 otherwise. */
1379
1380 /* Get active key */
1381 sockopt_len = sizeof(struct sctp_authkeyid);
1382 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len);
1383 if (ret < 0) break;
1384
1385 /* Set active key */
1386 authkeyid.scact_keynumber = authkeyid.scact_keynumber + 1;
1387 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY,
1388 &authkeyid, sizeof(struct sctp_authkeyid));
1389 if (ret < 0) break;
1390
1391 /* CCS has been sent, so remember that and fall through
1392 * to check if we need to deactivate an old key
1393 */
1394 data->ccs_sent = 1;
1395
1396 case BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD:
1397 /* Returns 0 on success, -1 otherwise. */
1398
1399 /* Has this command really been called or is this just a fall-through? */
1400 if (cmd == BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD)
1401 data->ccs_rcvd = 1;
1402
1403 /* CSS has been both, received and sent, so deactivate an old key */
1404 if (data->ccs_rcvd == 1 && data->ccs_sent == 1)
1405 {
1406 /* Get active key */
1407 sockopt_len = sizeof(struct sctp_authkeyid);
1408 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_ACTIVE_KEY, &authkeyid, &sockopt_len);
1409 if (ret < 0) break;
1410
1411 /* Deactivate key or delete second last key if
1412 * SCTP_AUTHENTICATION_EVENT is not available.
1413 */
1414 authkeyid.scact_keynumber = authkeyid.scact_keynumber - 1;
1415#ifdef SCTP_AUTH_DEACTIVATE_KEY
1416 sockopt_len = sizeof(struct sctp_authkeyid);
1417 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DEACTIVATE_KEY,
1418 &authkeyid, sockopt_len);
1419 if (ret < 0) break;
1420#endif
1421#ifndef SCTP_AUTHENTICATION_EVENT
1422 if (authkeyid.scact_keynumber > 0)
1423 {
1424 authkeyid.scact_keynumber = authkeyid.scact_keynumber - 1;
1425 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_AUTH_DELETE_KEY,
1426 &authkeyid, sizeof(struct sctp_authkeyid));
1427 if (ret < 0) break;
1428 }
1429#endif
1430
1431 data->ccs_rcvd = 0;
1432 data->ccs_sent = 0;
1433 }
1434 break;
1435 case BIO_CTRL_DGRAM_SCTP_GET_SNDINFO:
1436 /* Returns the size of the copied struct. */
1437 if (num > (long) sizeof(struct bio_dgram_sctp_sndinfo))
1438 num = sizeof(struct bio_dgram_sctp_sndinfo);
1439
1440 memcpy(ptr, &(data->sndinfo), num);
1441 ret = num;
1442 break;
1443 case BIO_CTRL_DGRAM_SCTP_SET_SNDINFO:
1444 /* Returns the size of the copied struct. */
1445 if (num > (long) sizeof(struct bio_dgram_sctp_sndinfo))
1446 num = sizeof(struct bio_dgram_sctp_sndinfo);
1447
1448 memcpy(&(data->sndinfo), ptr, num);
1449 break;
1450 case BIO_CTRL_DGRAM_SCTP_GET_RCVINFO:
1451 /* Returns the size of the copied struct. */
1452 if (num > (long) sizeof(struct bio_dgram_sctp_rcvinfo))
1453 num = sizeof(struct bio_dgram_sctp_rcvinfo);
1454
1455 memcpy(ptr, &data->rcvinfo, num);
1456
1457 ret = num;
1458 break;
1459 case BIO_CTRL_DGRAM_SCTP_SET_RCVINFO:
1460 /* Returns the size of the copied struct. */
1461 if (num > (long) sizeof(struct bio_dgram_sctp_rcvinfo))
1462 num = sizeof(struct bio_dgram_sctp_rcvinfo);
1463
1464 memcpy(&(data->rcvinfo), ptr, num);
1465 break;
1466 case BIO_CTRL_DGRAM_SCTP_GET_PRINFO:
1467 /* Returns the size of the copied struct. */
1468 if (num > (long) sizeof(struct bio_dgram_sctp_prinfo))
1469 num = sizeof(struct bio_dgram_sctp_prinfo);
1470
1471 memcpy(ptr, &(data->prinfo), num);
1472 ret = num;
1473 break;
1474 case BIO_CTRL_DGRAM_SCTP_SET_PRINFO:
1475 /* Returns the size of the copied struct. */
1476 if (num > (long) sizeof(struct bio_dgram_sctp_prinfo))
1477 num = sizeof(struct bio_dgram_sctp_prinfo);
1478
1479 memcpy(&(data->prinfo), ptr, num);
1480 break;
1481 case BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN:
1482 /* Returns always 1. */
1483 if (num > 0)
1484 data->save_shutdown = 1;
1485 else
1486 data->save_shutdown = 0;
1487 break;
1488
1489 default:
1490 /* Pass to default ctrl function to
1491 * process SCTP unspecific commands
1492 */
1493 ret=dgram_ctrl(b, cmd, num, ptr);
1494 break;
1495 }
1496 return(ret);
1497 }
1498
1499int BIO_dgram_sctp_notification_cb(BIO *b,
1500 void (*handle_notifications)(BIO *bio, void *context, void *buf),
1501 void *context)
1502 {
1503 bio_dgram_sctp_data *data = (bio_dgram_sctp_data *) b->ptr;
1504
1505 if (handle_notifications != NULL)
1506 {
1507 data->handle_notifications = handle_notifications;
1508 data->notification_context = context;
1509 }
1510 else
1511 return -1;
1512
1513 return 0;
1514 }
1515
1516int BIO_dgram_sctp_wait_for_dry(BIO *b)
1517{
1518 int is_dry = 0;
1519 int n, sockflags, ret;
1520 union sctp_notification snp;
1521 struct msghdr msg;
1522 struct iovec iov;
1523#ifdef SCTP_EVENT
1524 struct sctp_event event;
1525#else
1526 struct sctp_event_subscribe event;
1527 socklen_t eventsize;
1528#endif
1529 bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr;
1530
1531 /* set sender dry event */
1532#ifdef SCTP_EVENT
1533 memset(&event, 0, sizeof(struct sctp_event));
1534 event.se_assoc_id = 0;
1535 event.se_type = SCTP_SENDER_DRY_EVENT;
1536 event.se_on = 1;
1537 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event));
1538#else
1539 eventsize = sizeof(struct sctp_event_subscribe);
1540 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize);
1541 if (ret < 0)
1542 return -1;
1543
1544 event.sctp_sender_dry_event = 1;
1545
1546 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe));
1547#endif
1548 if (ret < 0)
1549 return -1;
1550
1551 /* peek for notification */
1552 memset(&snp, 0x00, sizeof(union sctp_notification));
1553 iov.iov_base = (char *)&snp;
1554 iov.iov_len = sizeof(union sctp_notification);
1555 msg.msg_name = NULL;
1556 msg.msg_namelen = 0;
1557 msg.msg_iov = &iov;
1558 msg.msg_iovlen = 1;
1559 msg.msg_control = NULL;
1560 msg.msg_controllen = 0;
1561 msg.msg_flags = 0;
1562
1563 n = recvmsg(b->num, &msg, MSG_PEEK);
1564 if (n <= 0)
1565 {
1566 if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK))
1567 return -1;
1568 else
1569 return 0;
1570 }
1571
1572 /* if we find a notification, process it and try again if necessary */
1573 while (msg.msg_flags & MSG_NOTIFICATION)
1574 {
1575 memset(&snp, 0x00, sizeof(union sctp_notification));
1576 iov.iov_base = (char *)&snp;
1577 iov.iov_len = sizeof(union sctp_notification);
1578 msg.msg_name = NULL;
1579 msg.msg_namelen = 0;
1580 msg.msg_iov = &iov;
1581 msg.msg_iovlen = 1;
1582 msg.msg_control = NULL;
1583 msg.msg_controllen = 0;
1584 msg.msg_flags = 0;
1585
1586 n = recvmsg(b->num, &msg, 0);
1587 if (n <= 0)
1588 {
1589 if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK))
1590 return -1;
1591 else
1592 return is_dry;
1593 }
1594
1595 if (snp.sn_header.sn_type == SCTP_SENDER_DRY_EVENT)
1596 {
1597 is_dry = 1;
1598
1599 /* disable sender dry event */
1600#ifdef SCTP_EVENT
1601 memset(&event, 0, sizeof(struct sctp_event));
1602 event.se_assoc_id = 0;
1603 event.se_type = SCTP_SENDER_DRY_EVENT;
1604 event.se_on = 0;
1605 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event));
1606#else
1607 eventsize = (socklen_t) sizeof(struct sctp_event_subscribe);
1608 ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize);
1609 if (ret < 0)
1610 return -1;
1611
1612 event.sctp_sender_dry_event = 0;
1613
1614 ret = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe));
1615#endif
1616 if (ret < 0)
1617 return -1;
1618 }
1619
1620#ifdef SCTP_AUTHENTICATION_EVENT
1621 if (snp.sn_header.sn_type == SCTP_AUTHENTICATION_EVENT)
1622 dgram_sctp_handle_auth_free_key_event(b, &snp);
1623#endif
1624
1625 if (data->handle_notifications != NULL)
1626 data->handle_notifications(b, data->notification_context, (void*) &snp);
1627
1628 /* found notification, peek again */
1629 memset(&snp, 0x00, sizeof(union sctp_notification));
1630 iov.iov_base = (char *)&snp;
1631 iov.iov_len = sizeof(union sctp_notification);
1632 msg.msg_name = NULL;
1633 msg.msg_namelen = 0;
1634 msg.msg_iov = &iov;
1635 msg.msg_iovlen = 1;
1636 msg.msg_control = NULL;
1637 msg.msg_controllen = 0;
1638 msg.msg_flags = 0;
1639
1640 /* if we have seen the dry already, don't wait */
1641 if (is_dry)
1642 {
1643 sockflags = fcntl(b->num, F_GETFL, 0);
1644 fcntl(b->num, F_SETFL, O_NONBLOCK);
1645 }
1646
1647 n = recvmsg(b->num, &msg, MSG_PEEK);
1648
1649 if (is_dry)
1650 {
1651 fcntl(b->num, F_SETFL, sockflags);
1652 }
1653
1654 if (n <= 0)
1655 {
1656 if ((n < 0) && (get_last_socket_error() != EAGAIN) && (get_last_socket_error() != EWOULDBLOCK))
1657 return -1;
1658 else
1659 return is_dry;
1660 }
1661 }
1662
1663 /* read anything else */
1664 return is_dry;
1665}
1666
1667int BIO_dgram_sctp_msg_waiting(BIO *b)
1668 {
1669 int n, sockflags;
1670 union sctp_notification snp;
1671 struct msghdr msg;
1672 struct iovec iov;
1673 bio_dgram_sctp_data *data = (bio_dgram_sctp_data *)b->ptr;
1674
1675 /* Check if there are any messages waiting to be read */
1676 do
1677 {
1678 memset(&snp, 0x00, sizeof(union sctp_notification));
1679 iov.iov_base = (char *)&snp;
1680 iov.iov_len = sizeof(union sctp_notification);
1681 msg.msg_name = NULL;
1682 msg.msg_namelen = 0;
1683 msg.msg_iov = &iov;
1684 msg.msg_iovlen = 1;
1685 msg.msg_control = NULL;
1686 msg.msg_controllen = 0;
1687 msg.msg_flags = 0;
1688
1689 sockflags = fcntl(b->num, F_GETFL, 0);
1690 fcntl(b->num, F_SETFL, O_NONBLOCK);
1691 n = recvmsg(b->num, &msg, MSG_PEEK);
1692 fcntl(b->num, F_SETFL, sockflags);
1693
1694 /* if notification, process and try again */
1695 if (n > 0 && (msg.msg_flags & MSG_NOTIFICATION))
1696 {
1697#ifdef SCTP_AUTHENTICATION_EVENT
1698 if (snp.sn_header.sn_type == SCTP_AUTHENTICATION_EVENT)
1699 dgram_sctp_handle_auth_free_key_event(b, &snp);
1700#endif
1701
1702 memset(&snp, 0x00, sizeof(union sctp_notification));
1703 iov.iov_base = (char *)&snp;
1704 iov.iov_len = sizeof(union sctp_notification);
1705 msg.msg_name = NULL;
1706 msg.msg_namelen = 0;
1707 msg.msg_iov = &iov;
1708 msg.msg_iovlen = 1;
1709 msg.msg_control = NULL;
1710 msg.msg_controllen = 0;
1711 msg.msg_flags = 0;
1712 n = recvmsg(b->num, &msg, 0);
1713
1714 if (data->handle_notifications != NULL)
1715 data->handle_notifications(b, data->notification_context, (void*) &snp);
1716 }
1717
1718 } while (n > 0 && (msg.msg_flags & MSG_NOTIFICATION));
1719
1720 /* Return 1 if there is a message to be read, return 0 otherwise. */
1721 if (n > 0)
1722 return 1;
1723 else
1724 return 0;
1725 }
1726
1727static int dgram_sctp_puts(BIO *bp, const char *str)
1728 {
1729 int n,ret;
1730
1731 n=strlen(str);
1732 ret=dgram_sctp_write(bp,str,n);
1733 return(ret);
1734 }
1735#endif
1736
741static int BIO_dgram_should_retry(int i) 1737static int BIO_dgram_should_retry(int i)
742 { 1738 {
743 int err; 1739 int err;
diff --git a/src/lib/libssl/src/crypto/bn/asm/armv4-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/armv4-gf2m.pl
new file mode 100644
index 0000000000..c52e0b75b5
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/armv4-gf2m.pl
@@ -0,0 +1,278 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# May 2011
11#
12# The module implements bn_GF2m_mul_2x2 polynomial multiplication
13# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
14# C for the time being... Except that it has two code paths: pure
15# integer code suitable for any ARMv4 and later CPU and NEON code
16# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs
17# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50%
18# faster than compiler-generated code. For ECDH and ECDSA verify (but
19# not for ECDSA sign) it means 25%-45% improvement depending on key
20# length, more for longer keys. Even though NEON 1x1 multiplication
21# runs in even less cycles, ~30, improvement is measurable only on
22# longer keys. One has to optimize code elsewhere to get NEON glow...
23
24while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
25open STDOUT,">$output";
26
27sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
28sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
29sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
30
31$code=<<___;
32#include "arm_arch.h"
33
34.text
35.code 32
36
37#if __ARM_ARCH__>=7
38.fpu neon
39
40.type mul_1x1_neon,%function
41.align 5
42mul_1x1_neon:
43 vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a
44 vmull.p8 `&Q("d0")`,d16,d17 @ a·bb
45 vshl.u64 `&Dlo("q2")`,d16,#16
46 vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb
47 vshl.u64 `&Dlo("q3")`,d16,#24
48 vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb
49 vshr.u64 `&Dlo("q1")`,#8
50 vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb
51 vshl.u64 `&Dhi("q1")`,#24
52 veor d0,`&Dlo("q1")`
53 vshr.u64 `&Dlo("q2")`,#16
54 veor d0,`&Dhi("q1")`
55 vshl.u64 `&Dhi("q2")`,#16
56 veor d0,`&Dlo("q2")`
57 vshr.u64 `&Dlo("q3")`,#24
58 veor d0,`&Dhi("q2")`
59 vshl.u64 `&Dhi("q3")`,#8
60 veor d0,`&Dlo("q3")`
61 veor d0,`&Dhi("q3")`
62 bx lr
63.size mul_1x1_neon,.-mul_1x1_neon
64#endif
65___
66################
67# private interface to mul_1x1_ialu
68#
69$a="r1";
70$b="r0";
71
72($a0,$a1,$a2,$a12,$a4,$a14)=
73($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
74
75$mask="r12";
76
77$code.=<<___;
78.type mul_1x1_ialu,%function
79.align 5
80mul_1x1_ialu:
81 mov $a0,#0
82 bic $a1,$a,#3<<30 @ a1=a&0x3fffffff
83 str $a0,[sp,#0] @ tab[0]=0
84 add $a2,$a1,$a1 @ a2=a1<<1
85 str $a1,[sp,#4] @ tab[1]=a1
86 eor $a12,$a1,$a2 @ a1^a2
87 str $a2,[sp,#8] @ tab[2]=a2
88 mov $a4,$a1,lsl#2 @ a4=a1<<2
89 str $a12,[sp,#12] @ tab[3]=a1^a2
90 eor $a14,$a1,$a4 @ a1^a4
91 str $a4,[sp,#16] @ tab[4]=a4
92 eor $a0,$a2,$a4 @ a2^a4
93 str $a14,[sp,#20] @ tab[5]=a1^a4
94 eor $a12,$a12,$a4 @ a1^a2^a4
95 str $a0,[sp,#24] @ tab[6]=a2^a4
96 and $i0,$mask,$b,lsl#2
97 str $a12,[sp,#28] @ tab[7]=a1^a2^a4
98
99 and $i1,$mask,$b,lsr#1
100 ldr $lo,[sp,$i0] @ tab[b & 0x7]
101 and $i0,$mask,$b,lsr#4
102 ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7]
103 and $i1,$mask,$b,lsr#7
104 ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7]
105 eor $lo,$lo,$t1,lsl#3 @ stall
106 mov $hi,$t1,lsr#29
107 ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7]
108
109 and $i0,$mask,$b,lsr#10
110 eor $lo,$lo,$t0,lsl#6
111 eor $hi,$hi,$t0,lsr#26
112 ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7]
113
114 and $i1,$mask,$b,lsr#13
115 eor $lo,$lo,$t1,lsl#9
116 eor $hi,$hi,$t1,lsr#23
117 ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7]
118
119 and $i0,$mask,$b,lsr#16
120 eor $lo,$lo,$t0,lsl#12
121 eor $hi,$hi,$t0,lsr#20
122 ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7]
123
124 and $i1,$mask,$b,lsr#19
125 eor $lo,$lo,$t1,lsl#15
126 eor $hi,$hi,$t1,lsr#17
127 ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7]
128
129 and $i0,$mask,$b,lsr#22
130 eor $lo,$lo,$t0,lsl#18
131 eor $hi,$hi,$t0,lsr#14
132 ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7]
133
134 and $i1,$mask,$b,lsr#25
135 eor $lo,$lo,$t1,lsl#21
136 eor $hi,$hi,$t1,lsr#11
137 ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7]
138
139 tst $a,#1<<30
140 and $i0,$mask,$b,lsr#28
141 eor $lo,$lo,$t0,lsl#24
142 eor $hi,$hi,$t0,lsr#8
143 ldr $t0,[sp,$i0] @ tab[b >> 30 ]
144
145 eorne $lo,$lo,$b,lsl#30
146 eorne $hi,$hi,$b,lsr#2
147 tst $a,#1<<31
148 eor $lo,$lo,$t1,lsl#27
149 eor $hi,$hi,$t1,lsr#5
150 eorne $lo,$lo,$b,lsl#31
151 eorne $hi,$hi,$b,lsr#1
152 eor $lo,$lo,$t0,lsl#30
153 eor $hi,$hi,$t0,lsr#2
154
155 mov pc,lr
156.size mul_1x1_ialu,.-mul_1x1_ialu
157___
158################
159# void bn_GF2m_mul_2x2(BN_ULONG *r,
160# BN_ULONG a1,BN_ULONG a0,
161# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
162
163($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));
164
165$code.=<<___;
166.global bn_GF2m_mul_2x2
167.type bn_GF2m_mul_2x2,%function
168.align 5
169bn_GF2m_mul_2x2:
170#if __ARM_ARCH__>=7
171 ldr r12,.LOPENSSL_armcap
172.Lpic: ldr r12,[pc,r12]
173 tst r12,#1
174 beq .Lialu
175
176 veor $A1,$A1
177 vmov.32 $B1,r3,r3 @ two copies of b1
178 vmov.32 ${A1}[0],r1 @ a1
179
180 veor $A0,$A0
181 vld1.32 ${B0}[],[sp,:32] @ two copies of b0
182 vmov.32 ${A0}[0],r2 @ a0
183 mov r12,lr
184
185 vmov d16,$A1
186 vmov d17,$B1
187 bl mul_1x1_neon @ a1·b1
188 vmov $A1B1,d0
189
190 vmov d16,$A0
191 vmov d17,$B0
192 bl mul_1x1_neon @ a0·b0
193 vmov $A0B0,d0
194
195 veor d16,$A0,$A1
196 veor d17,$B0,$B1
197 veor $A0,$A0B0,$A1B1
198 bl mul_1x1_neon @ (a0+a1)·(b0+b1)
199
200 veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1
201 vshl.u64 d1,d0,#32
202 vshr.u64 d0,d0,#32
203 veor $A0B0,d1
204 veor $A1B1,d0
205 vst1.32 {${A0B0}[0]},[r0,:32]!
206 vst1.32 {${A0B0}[1]},[r0,:32]!
207 vst1.32 {${A1B1}[0]},[r0,:32]!
208 vst1.32 {${A1B1}[1]},[r0,:32]
209 bx r12
210.align 4
211.Lialu:
212#endif
213___
214$ret="r10"; # reassigned 1st argument
215$code.=<<___;
216 stmdb sp!,{r4-r10,lr}
217 mov $ret,r0 @ reassign 1st argument
218 mov $b,r3 @ $b=b1
219 ldr r3,[sp,#32] @ load b0
220 mov $mask,#7<<2
221 sub sp,sp,#32 @ allocate tab[8]
222
223 bl mul_1x1_ialu @ a1·b1
224 str $lo,[$ret,#8]
225 str $hi,[$ret,#12]
226
227 eor $b,$b,r3 @ flip b0 and b1
228 eor $a,$a,r2 @ flip a0 and a1
229 eor r3,r3,$b
230 eor r2,r2,$a
231 eor $b,$b,r3
232 eor $a,$a,r2
233 bl mul_1x1_ialu @ a0·b0
234 str $lo,[$ret]
235 str $hi,[$ret,#4]
236
237 eor $a,$a,r2
238 eor $b,$b,r3
239 bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
240___
241@r=map("r$_",(6..9));
242$code.=<<___;
243 ldmia $ret,{@r[0]-@r[3]}
244 eor $lo,$lo,$hi
245 eor $hi,$hi,@r[1]
246 eor $lo,$lo,@r[0]
247 eor $hi,$hi,@r[2]
248 eor $lo,$lo,@r[3]
249 eor $hi,$hi,@r[3]
250 str $hi,[$ret,#8]
251 eor $lo,$lo,$hi
252 add sp,sp,#32 @ destroy tab[8]
253 str $lo,[$ret,#4]
254
255#if __ARM_ARCH__>=5
256 ldmia sp!,{r4-r10,pc}
257#else
258 ldmia sp!,{r4-r10,lr}
259 tst lr,#1
260 moveq pc,lr @ be binary compatible with V4, yet
261 bx lr @ interoperable with Thumb ISA:-)
262#endif
263.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
264#if __ARM_ARCH__>=7
265.align 5
266.LOPENSSL_armcap:
267.word OPENSSL_armcap_P-(.Lpic+8)
268#endif
269.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
270.align 5
271
272.comm OPENSSL_armcap_P,4,4
273___
274
275$code =~ s/\`([^\`]*)\`/eval $1/gem;
276$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
277print $code;
278close STDOUT; # enforce flush
diff --git a/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl b/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl
index 14e0d2d1dd..f78a8b5f0f 100644
--- a/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/armv4-mont.pl
@@ -23,6 +23,9 @@
23# than 1/2KB. Windows CE port would be trivial, as it's exclusively 23# than 1/2KB. Windows CE port would be trivial, as it's exclusively
24# about decorations, ABI and instruction syntax are identical. 24# about decorations, ABI and instruction syntax are identical.
25 25
26while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
27open STDOUT,">$output";
28
26$num="r0"; # starts as num argument, but holds &tp[num-1] 29$num="r0"; # starts as num argument, but holds &tp[num-1]
27$ap="r1"; 30$ap="r1";
28$bp="r2"; $bi="r2"; $rp="r2"; 31$bp="r2"; $bi="r2"; $rp="r2";
@@ -89,9 +92,9 @@ bn_mul_mont:
89.L1st: 92.L1st:
90 ldr $aj,[$ap],#4 @ ap[j],ap++ 93 ldr $aj,[$ap],#4 @ ap[j],ap++
91 mov $alo,$ahi 94 mov $alo,$ahi
95 ldr $nj,[$np],#4 @ np[j],np++
92 mov $ahi,#0 96 mov $ahi,#0
93 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] 97 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
94 ldr $nj,[$np],#4 @ np[j],np++
95 mov $nhi,#0 98 mov $nhi,#0
96 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 99 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
97 adds $nlo,$nlo,$alo 100 adds $nlo,$nlo,$alo
@@ -101,21 +104,21 @@ bn_mul_mont:
101 bne .L1st 104 bne .L1st
102 105
103 adds $nlo,$nlo,$ahi 106 adds $nlo,$nlo,$ahi
107 ldr $tp,[$_bp] @ restore bp
104 mov $nhi,#0 108 mov $nhi,#0
109 ldr $n0,[$_n0] @ restore n0
105 adc $nhi,$nhi,#0 110 adc $nhi,$nhi,#0
106 ldr $tp,[$_bp] @ restore bp
107 str $nlo,[$num] @ tp[num-1]= 111 str $nlo,[$num] @ tp[num-1]=
108 ldr $n0,[$_n0] @ restore n0
109 str $nhi,[$num,#4] @ tp[num]= 112 str $nhi,[$num,#4] @ tp[num]=
110 113
111.Louter: 114.Louter:
112 sub $tj,$num,sp @ "original" $num-1 value 115 sub $tj,$num,sp @ "original" $num-1 value
113 sub $ap,$ap,$tj @ "rewind" ap to &ap[1] 116 sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
114 sub $np,$np,$tj @ "rewind" np to &np[1]
115 ldr $bi,[$tp,#4]! @ *(++bp) 117 ldr $bi,[$tp,#4]! @ *(++bp)
118 sub $np,$np,$tj @ "rewind" np to &np[1]
116 ldr $aj,[$ap,#-4] @ ap[0] 119 ldr $aj,[$ap,#-4] @ ap[0]
117 ldr $nj,[$np,#-4] @ np[0]
118 ldr $alo,[sp] @ tp[0] 120 ldr $alo,[sp] @ tp[0]
121 ldr $nj,[$np,#-4] @ np[0]
119 ldr $tj,[sp,#4] @ tp[1] 122 ldr $tj,[sp,#4] @ tp[1]
120 123
121 mov $ahi,#0 124 mov $ahi,#0
@@ -129,13 +132,13 @@ bn_mul_mont:
129.Linner: 132.Linner:
130 ldr $aj,[$ap],#4 @ ap[j],ap++ 133 ldr $aj,[$ap],#4 @ ap[j],ap++
131 adds $alo,$ahi,$tj @ +=tp[j] 134 adds $alo,$ahi,$tj @ +=tp[j]
135 ldr $nj,[$np],#4 @ np[j],np++
132 mov $ahi,#0 136 mov $ahi,#0
133 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] 137 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
134 ldr $nj,[$np],#4 @ np[j],np++
135 mov $nhi,#0 138 mov $nhi,#0
136 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 139 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
137 ldr $tj,[$tp,#8] @ tp[j+1]
138 adc $ahi,$ahi,#0 140 adc $ahi,$ahi,#0
141 ldr $tj,[$tp,#8] @ tp[j+1]
139 adds $nlo,$nlo,$alo 142 adds $nlo,$nlo,$alo
140 str $nlo,[$tp],#4 @ tp[j-1]=,tp++ 143 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
141 adc $nlo,$nhi,#0 144 adc $nlo,$nhi,#0
@@ -144,13 +147,13 @@ bn_mul_mont:
144 147
145 adds $nlo,$nlo,$ahi 148 adds $nlo,$nlo,$ahi
146 mov $nhi,#0 149 mov $nhi,#0
150 ldr $tp,[$_bp] @ restore bp
147 adc $nhi,$nhi,#0 151 adc $nhi,$nhi,#0
152 ldr $n0,[$_n0] @ restore n0
148 adds $nlo,$nlo,$tj 153 adds $nlo,$nlo,$tj
149 adc $nhi,$nhi,#0
150 ldr $tp,[$_bp] @ restore bp
151 ldr $tj,[$_bpend] @ restore &bp[num] 154 ldr $tj,[$_bpend] @ restore &bp[num]
155 adc $nhi,$nhi,#0
152 str $nlo,[$num] @ tp[num-1]= 156 str $nlo,[$num] @ tp[num-1]=
153 ldr $n0,[$_n0] @ restore n0
154 str $nhi,[$num,#4] @ tp[num]= 157 str $nhi,[$num,#4] @ tp[num]=
155 158
156 cmp $tp,$tj 159 cmp $tp,$tj
diff --git a/src/lib/libssl/src/crypto/bn/asm/ia64-mont.pl b/src/lib/libssl/src/crypto/bn/asm/ia64-mont.pl
new file mode 100644
index 0000000000..e258658428
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/ia64-mont.pl
@@ -0,0 +1,851 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# January 2010
11#
12# "Teaser" Montgomery multiplication module for IA-64. There are
13# several possibilities for improvement:
14#
15# - modulo-scheduling outer loop would eliminate quite a number of
16# stalls after ldf8, xma and getf.sig outside inner loop and
17# improve shorter key performance;
18# - shorter vector support [with input vectors being fetched only
19# once] should be added;
20# - 2x unroll with help of n0[1] would make the code scalable on
21# "wider" IA-64, "wider" than Itanium 2 that is, which is not of
22# acute interest, because upcoming Tukwila's individual cores are
23# reportedly based on Itanium 2 design;
24# - dedicated squaring procedure(?);
25#
26# January 2010
27#
28# Shorter vector support is implemented by zero-padding ap and np
29# vectors up to 8 elements, or 512 bits. This means that 256-bit
30# inputs will be processed only 2 times faster than 512-bit inputs,
31# not 4 [as one would expect, because algorithm complexity is n^2].
32# The reason for padding is that inputs shorter than 512 bits won't
33# be processed faster anyway, because minimal critical path of the
34# core loop happens to match 512-bit timing. Either way, it resulted
35# in >100% improvement of 512-bit RSA sign benchmark and 50% - of
36# 1024-bit one [in comparison to original version of *this* module].
37#
38# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with*
39# this module is:
40# sign verify sign/s verify/s
41# rsa 512 bits 0.000290s 0.000024s 3452.8 42031.4
42# rsa 1024 bits 0.000793s 0.000058s 1261.7 17172.0
43# rsa 2048 bits 0.005908s 0.000148s 169.3 6754.0
44# rsa 4096 bits 0.033456s 0.000469s 29.9 2133.6
45# dsa 512 bits 0.000253s 0.000198s 3949.9 5057.0
46# dsa 1024 bits 0.000585s 0.000607s 1708.4 1647.4
47# dsa 2048 bits 0.001453s 0.001703s 688.1 587.4
48#
49# ... and *without* (but still with ia64.S):
50#
51# rsa 512 bits 0.000670s 0.000041s 1491.8 24145.5
52# rsa 1024 bits 0.001988s 0.000080s 502.9 12499.3
53# rsa 2048 bits 0.008702s 0.000189s 114.9 5293.9
54# rsa 4096 bits 0.043860s 0.000533s 22.8 1875.9
55# dsa 512 bits 0.000441s 0.000427s 2265.3 2340.6
56# dsa 1024 bits 0.000823s 0.000867s 1215.6 1153.2
57# dsa 2048 bits 0.001894s 0.002179s 528.1 458.9
58#
59# As it can be seen, RSA sign performance improves by 130-30%,
60# hereafter less for longer keys, while verify - by 74-13%.
61# DSA performance improves by 115-30%.
62
63if ($^O eq "hpux") {
64 $ADDP="addp4";
65 for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
66} else { $ADDP="add"; }
67
68$code=<<___;
69.explicit
70.text
71
72// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
73// const BN_ULONG *bp,const BN_ULONG *np,
74// const BN_ULONG *n0p,int num);
75.align 64
76.global bn_mul_mont#
77.proc bn_mul_mont#
78bn_mul_mont:
79 .prologue
80 .body
81{ .mmi; cmp4.le p6,p7=2,r37;;
82(p6) cmp4.lt.unc p8,p9=8,r37
83 mov ret0=r0 };;
84{ .bbb;
85(p9) br.cond.dptk.many bn_mul_mont_8
86(p8) br.cond.dpnt.many bn_mul_mont_general
87(p7) br.ret.spnt.many b0 };;
88.endp bn_mul_mont#
89
90prevfs=r2; prevpr=r3; prevlc=r10; prevsp=r11;
91
92rptr=r8; aptr=r9; bptr=r14; nptr=r15;
93tptr=r16; // &tp[0]
94tp_1=r17; // &tp[-1]
95num=r18; len=r19; lc=r20;
96topbit=r21; // carry bit from tmp[num]
97
98n0=f6;
99m0=f7;
100bi=f8;
101
102.align 64
103.local bn_mul_mont_general#
104.proc bn_mul_mont_general#
105bn_mul_mont_general:
106 .prologue
107{ .mmi; .save ar.pfs,prevfs
108 alloc prevfs=ar.pfs,6,2,0,8
109 $ADDP aptr=0,in1
110 .save ar.lc,prevlc
111 mov prevlc=ar.lc }
112{ .mmi; .vframe prevsp
113 mov prevsp=sp
114 $ADDP bptr=0,in2
115 .save pr,prevpr
116 mov prevpr=pr };;
117
118 .body
119 .rotf alo[6],nlo[4],ahi[8],nhi[6]
120 .rotr a[3],n[3],t[2]
121
122{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
123 ldf8 alo[4]=[aptr],16 // ap[0]
124 $ADDP r30=8,in1 };;
125{ .mmi; ldf8 alo[3]=[r30],16 // ap[1]
126 ldf8 alo[2]=[aptr],16 // ap[2]
127 $ADDP in4=0,in4 };;
128{ .mmi; ldf8 alo[1]=[r30] // ap[3]
129 ldf8 n0=[in4] // n0
130 $ADDP rptr=0,in0 }
131{ .mmi; $ADDP nptr=0,in3
132 mov r31=16
133 zxt4 num=in5 };;
134{ .mmi; ldf8 nlo[2]=[nptr],8 // np[0]
135 shladd len=num,3,r0
136 shladd r31=num,3,r31 };;
137{ .mmi; ldf8 nlo[1]=[nptr],8 // np[1]
138 add lc=-5,num
139 sub r31=sp,r31 };;
140{ .mfb; and sp=-16,r31 // alloca
141 xmpy.hu ahi[2]=alo[4],bi // ap[0]*bp[0]
142 nop.b 0 }
143{ .mfb; nop.m 0
144 xmpy.lu alo[4]=alo[4],bi
145 brp.loop.imp .L1st_ctop,.L1st_cend-16
146 };;
147{ .mfi; nop.m 0
148 xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[0]
149 add tp_1=8,sp }
150{ .mfi; nop.m 0
151 xma.lu alo[3]=alo[3],bi,ahi[2]
152 mov pr.rot=0x20001f<<16
153 // ------^----- (p40) at first (p23)
154 // ----------^^ p[16:20]=1
155 };;
156{ .mfi; nop.m 0
157 xmpy.lu m0=alo[4],n0 // (ap[0]*bp[0])*n0
158 mov ar.lc=lc }
159{ .mfi; nop.m 0
160 fcvt.fxu.s1 nhi[1]=f0
161 mov ar.ec=8 };;
162
163.align 32
164.L1st_ctop:
165.pred.rel "mutex",p40,p42
166{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
167 (p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
168 (p40) add n[2]=n[2],a[2] } // (p23) }
169{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)(p16)
170 (p18) xma.lu alo[2]=alo[2],bi,ahi[1]
171 (p42) add n[2]=n[2],a[2],1 };; // (p23)
172{ .mfi; (p21) getf.sig a[0]=alo[5]
173 (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
174 (p42) cmp.leu p41,p39=n[2],a[2] } // (p23)
175{ .mfi; (p23) st8 [tp_1]=n[2],8
176 (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
177 (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
178{ .mmb; (p21) getf.sig n[0]=nlo[3]
179 (p16) nop.m 0
180 br.ctop.sptk .L1st_ctop };;
181.L1st_cend:
182
183{ .mmi; getf.sig a[0]=ahi[6] // (p24)
184 getf.sig n[0]=nhi[4]
185 add num=-1,num };; // num--
186{ .mmi; .pred.rel "mutex",p40,p42
187(p40) add n[0]=n[0],a[0]
188(p42) add n[0]=n[0],a[0],1
189 sub aptr=aptr,len };; // rewind
190{ .mmi; .pred.rel "mutex",p40,p42
191(p40) cmp.ltu p41,p39=n[0],a[0]
192(p42) cmp.leu p41,p39=n[0],a[0]
193 sub nptr=nptr,len };;
194{ .mmi; .pred.rel "mutex",p39,p41
195(p39) add topbit=r0,r0
196(p41) add topbit=r0,r0,1
197 nop.i 0 }
198{ .mmi; st8 [tp_1]=n[0]
199 add tptr=16,sp
200 add tp_1=8,sp };;
201
202.Louter:
203{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
204 ldf8 ahi[3]=[tptr] // tp[0]
205 add r30=8,aptr };;
206{ .mmi; ldf8 alo[4]=[aptr],16 // ap[0]
207 ldf8 alo[3]=[r30],16 // ap[1]
208 add r31=8,nptr };;
209{ .mfb; ldf8 alo[2]=[aptr],16 // ap[2]
210 xma.hu ahi[2]=alo[4],bi,ahi[3] // ap[0]*bp[i]+tp[0]
211 brp.loop.imp .Linner_ctop,.Linner_cend-16
212 }
213{ .mfb; ldf8 alo[1]=[r30] // ap[3]
214 xma.lu alo[4]=alo[4],bi,ahi[3]
215 clrrrb.pr };;
216{ .mfi; ldf8 nlo[2]=[nptr],16 // np[0]
217 xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[i]
218 nop.i 0 }
219{ .mfi; ldf8 nlo[1]=[r31] // np[1]
220 xma.lu alo[3]=alo[3],bi,ahi[2]
221 mov pr.rot=0x20101f<<16
222 // ------^----- (p40) at first (p23)
223 // --------^--- (p30) at first (p22)
224 // ----------^^ p[16:20]=1
225 };;
226{ .mfi; st8 [tptr]=r0 // tp[0] is already accounted
227 xmpy.lu m0=alo[4],n0 // (ap[0]*bp[i]+tp[0])*n0
228 mov ar.lc=lc }
229{ .mfi;
230 fcvt.fxu.s1 nhi[1]=f0
231 mov ar.ec=8 };;
232
233// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in
234// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7
235// in latter case accounts for two-tick pipeline stall, which means
236// that its performance would be ~20% lower than optimal one. No
237// attempt was made to address this, because original Itanium is
238// hardly represented out in the wild...
239.align 32
240.Linner_ctop:
241.pred.rel "mutex",p40,p42
242.pred.rel "mutex",p30,p32
243{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
244 (p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
245 (p40) add n[2]=n[2],a[2] } // (p23)
246{ .mfi; (p16) nop.m 0
247 (p18) xma.lu alo[2]=alo[2],bi,ahi[1]
248 (p42) add n[2]=n[2],a[2],1 };; // (p23)
249{ .mfi; (p21) getf.sig a[0]=alo[5]
250 (p16) nop.f 0
251 (p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
252{ .mfi; (p21) ld8 t[0]=[tptr],8
253 (p16) nop.f 0
254 (p42) cmp.leu p41,p39=n[2],a[2] };; // (p23)
255{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)
256 (p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
257 (p30) add a[1]=a[1],t[1] } // (p22)
258{ .mfi; (p16) nop.m 0
259 (p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
260 (p32) add a[1]=a[1],t[1],1 };; // (p22)
261{ .mmi; (p21) getf.sig n[0]=nlo[3]
262 (p16) nop.m 0
263 (p30) cmp.ltu p31,p29=a[1],t[1] } // (p22)
264{ .mmb; (p23) st8 [tp_1]=n[2],8
265 (p32) cmp.leu p31,p29=a[1],t[1] // (p22)
266 br.ctop.sptk .Linner_ctop };;
267.Linner_cend:
268
269{ .mmi; getf.sig a[0]=ahi[6] // (p24)
270 getf.sig n[0]=nhi[4]
271 nop.i 0 };;
272
273{ .mmi; .pred.rel "mutex",p31,p33
274(p31) add a[0]=a[0],topbit
275(p33) add a[0]=a[0],topbit,1
276 mov topbit=r0 };;
277{ .mfi; .pred.rel "mutex",p31,p33
278(p31) cmp.ltu p32,p30=a[0],topbit
279(p33) cmp.leu p32,p30=a[0],topbit
280 }
281{ .mfi; .pred.rel "mutex",p40,p42
282(p40) add n[0]=n[0],a[0]
283(p42) add n[0]=n[0],a[0],1
284 };;
285{ .mmi; .pred.rel "mutex",p44,p46
286(p40) cmp.ltu p41,p39=n[0],a[0]
287(p42) cmp.leu p41,p39=n[0],a[0]
288(p32) add topbit=r0,r0,1 }
289
290{ .mmi; st8 [tp_1]=n[0],8
291 cmp4.ne p6,p0=1,num
292 sub aptr=aptr,len };; // rewind
293{ .mmi; sub nptr=nptr,len
294(p41) add topbit=r0,r0,1
295 add tptr=16,sp }
296{ .mmb; add tp_1=8,sp
297 add num=-1,num // num--
298(p6) br.cond.sptk.many .Louter };;
299
300{ .mbb; add lc=4,lc
301 brp.loop.imp .Lsub_ctop,.Lsub_cend-16
302 clrrrb.pr };;
303{ .mii; nop.m 0
304 mov pr.rot=0x10001<<16
305 // ------^---- (p33) at first (p17)
306 mov ar.lc=lc }
307{ .mii; nop.m 0
308 mov ar.ec=3
309 nop.i 0 };;
310
311.Lsub_ctop:
312.pred.rel "mutex",p33,p35
313{ .mfi; (p16) ld8 t[0]=[tptr],8 // t=*(tp++)
314 (p16) nop.f 0
315 (p33) sub n[1]=t[1],n[1] } // (p17)
316{ .mfi; (p16) ld8 n[0]=[nptr],8 // n=*(np++)
317 (p16) nop.f 0
318 (p35) sub n[1]=t[1],n[1],1 };; // (p17)
319{ .mib; (p18) st8 [rptr]=n[2],8 // *(rp++)=r
320 (p33) cmp.gtu p34,p32=n[1],t[1] // (p17)
321 (p18) nop.b 0 }
322{ .mib; (p18) nop.m 0
323 (p35) cmp.geu p34,p32=n[1],t[1] // (p17)
324 br.ctop.sptk .Lsub_ctop };;
325.Lsub_cend:
326
327{ .mmb; .pred.rel "mutex",p34,p36
328(p34) sub topbit=topbit,r0 // (p19)
329(p36) sub topbit=topbit,r0,1
330 brp.loop.imp .Lcopy_ctop,.Lcopy_cend-16
331 }
332{ .mmb; sub rptr=rptr,len // rewind
333 sub tptr=tptr,len
334 clrrrb.pr };;
335{ .mmi; and aptr=tptr,topbit
336 andcm bptr=rptr,topbit
337 mov pr.rot=1<<16 };;
338{ .mii; or nptr=aptr,bptr
339 mov ar.lc=lc
340 mov ar.ec=3 };;
341
342.Lcopy_ctop:
343{ .mmb; (p16) ld8 n[0]=[nptr],8
344 (p18) st8 [tptr]=r0,8
345 (p16) nop.b 0 }
346{ .mmb; (p16) nop.m 0
347 (p18) st8 [rptr]=n[2],8
348 br.ctop.sptk .Lcopy_ctop };;
349.Lcopy_cend:
350
351{ .mmi; mov ret0=1 // signal "handled"
352 rum 1<<5 // clear um.mfh
353 mov ar.lc=prevlc }
354{ .mib; .restore sp
355 mov sp=prevsp
356 mov pr=prevpr,0x1ffff
357 br.ret.sptk.many b0 };;
358.endp bn_mul_mont_general#
359
360a1=r16; a2=r17; a3=r18; a4=r19; a5=r20; a6=r21; a7=r22; a8=r23;
361n1=r24; n2=r25; n3=r26; n4=r27; n5=r28; n6=r29; n7=r30; n8=r31;
362t0=r15;
363
364ai0=f8; ai1=f9; ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15;
365ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23;
366
367.align 64
368.skip 48 // aligns loop body
369.local bn_mul_mont_8#
370.proc bn_mul_mont_8#
371bn_mul_mont_8:
372 .prologue
373{ .mmi; .save ar.pfs,prevfs
374 alloc prevfs=ar.pfs,6,2,0,8
375 .vframe prevsp
376 mov prevsp=sp
377 .save ar.lc,prevlc
378 mov prevlc=ar.lc }
379{ .mmi; add r17=-6*16,sp
380 add sp=-7*16,sp
381 .save pr,prevpr
382 mov prevpr=pr };;
383
384{ .mmi; .save.gf 0,0x10
385 stf.spill [sp]=f16,-16
386 .save.gf 0,0x20
387 stf.spill [r17]=f17,32
388 add r16=-5*16,prevsp};;
389{ .mmi; .save.gf 0,0x40
390 stf.spill [r16]=f18,32
391 .save.gf 0,0x80
392 stf.spill [r17]=f19,32
393 $ADDP aptr=0,in1 };;
394{ .mmi; .save.gf 0,0x100
395 stf.spill [r16]=f20,32
396 .save.gf 0,0x200
397 stf.spill [r17]=f21,32
398 $ADDP r29=8,in1 };;
399{ .mmi; .save.gf 0,0x400
400 stf.spill [r16]=f22
401 .save.gf 0,0x800
402 stf.spill [r17]=f23
403 $ADDP rptr=0,in0 };;
404
405 .body
406 .rotf bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10]
407 .rotr t[8]
408
409// load input vectors padding them to 8 elements
410{ .mmi; ldf8 ai0=[aptr],16 // ap[0]
411 ldf8 ai1=[r29],16 // ap[1]
412 $ADDP bptr=0,in2 }
413{ .mmi; $ADDP r30=8,in2
414 $ADDP nptr=0,in3
415 $ADDP r31=8,in3 };;
416{ .mmi; ldf8 bj[7]=[bptr],16 // bp[0]
417 ldf8 bj[6]=[r30],16 // bp[1]
418 cmp4.le p4,p5=3,in5 }
419{ .mmi; ldf8 ni0=[nptr],16 // np[0]
420 ldf8 ni1=[r31],16 // np[1]
421 cmp4.le p6,p7=4,in5 };;
422
423{ .mfi; (p4)ldf8 ai2=[aptr],16 // ap[2]
424 (p5)fcvt.fxu ai2=f0
425 cmp4.le p8,p9=5,in5 }
426{ .mfi; (p6)ldf8 ai3=[r29],16 // ap[3]
427 (p7)fcvt.fxu ai3=f0
428 cmp4.le p10,p11=6,in5 }
429{ .mfi; (p4)ldf8 bj[5]=[bptr],16 // bp[2]
430 (p5)fcvt.fxu bj[5]=f0
431 cmp4.le p12,p13=7,in5 }
432{ .mfi; (p6)ldf8 bj[4]=[r30],16 // bp[3]
433 (p7)fcvt.fxu bj[4]=f0
434 cmp4.le p14,p15=8,in5 }
435{ .mfi; (p4)ldf8 ni2=[nptr],16 // np[2]
436 (p5)fcvt.fxu ni2=f0
437 addp4 r28=-1,in5 }
438{ .mfi; (p6)ldf8 ni3=[r31],16 // np[3]
439 (p7)fcvt.fxu ni3=f0
440 $ADDP in4=0,in4 };;
441
442{ .mfi; ldf8 n0=[in4]
443 fcvt.fxu tf[1]=f0
444 nop.i 0 }
445
446{ .mfi; (p8)ldf8 ai4=[aptr],16 // ap[4]
447 (p9)fcvt.fxu ai4=f0
448 mov t[0]=r0 }
449{ .mfi; (p10)ldf8 ai5=[r29],16 // ap[5]
450 (p11)fcvt.fxu ai5=f0
451 mov t[1]=r0 }
452{ .mfi; (p8)ldf8 bj[3]=[bptr],16 // bp[4]
453 (p9)fcvt.fxu bj[3]=f0
454 mov t[2]=r0 }
455{ .mfi; (p10)ldf8 bj[2]=[r30],16 // bp[5]
456 (p11)fcvt.fxu bj[2]=f0
457 mov t[3]=r0 }
458{ .mfi; (p8)ldf8 ni4=[nptr],16 // np[4]
459 (p9)fcvt.fxu ni4=f0
460 mov t[4]=r0 }
461{ .mfi; (p10)ldf8 ni5=[r31],16 // np[5]
462 (p11)fcvt.fxu ni5=f0
463 mov t[5]=r0 };;
464
465{ .mfi; (p12)ldf8 ai6=[aptr],16 // ap[6]
466 (p13)fcvt.fxu ai6=f0
467 mov t[6]=r0 }
468{ .mfi; (p14)ldf8 ai7=[r29],16 // ap[7]
469 (p15)fcvt.fxu ai7=f0
470 mov t[7]=r0 }
471{ .mfi; (p12)ldf8 bj[1]=[bptr],16 // bp[6]
472 (p13)fcvt.fxu bj[1]=f0
473 mov ar.lc=r28 }
474{ .mfi; (p14)ldf8 bj[0]=[r30],16 // bp[7]
475 (p15)fcvt.fxu bj[0]=f0
476 mov ar.ec=1 }
477{ .mfi; (p12)ldf8 ni6=[nptr],16 // np[6]
478 (p13)fcvt.fxu ni6=f0
479 mov pr.rot=1<<16 }
480{ .mfb; (p14)ldf8 ni7=[r31],16 // np[7]
481 (p15)fcvt.fxu ni7=f0
482 brp.loop.imp .Louter_8_ctop,.Louter_8_cend-16
483 };;
484
485// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt
486// to measure with help of Interval Time Counter indicated that the
487// factor is a tad higher: 33 or 34, if not 35. Exact measurement and
488// addressing the issue is problematic, because I don't have access
489// to platform-specific instruction-level profiler. On Itanium it
490// should run in 56*n ticks, because of higher xma latency...
491.Louter_8_ctop:
492 .pred.rel "mutex",p40,p42
493 .pred.rel "mutex",p48,p50
494{ .mfi; (p16) nop.m 0 // 0:
495 (p16) xma.hu ahi[0]=ai0,bj[7],tf[1] // ap[0]*b[i]+t[0]
496 (p40) add a3=a3,n3 } // (p17) a3+=n3
497{ .mfi; (p42) add a3=a3,n3,1
498 (p16) xma.lu alo[0]=ai0,bj[7],tf[1]
499 (p16) nop.i 0 };;
500{ .mii; (p17) getf.sig a7=alo[8] // 1:
501 (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
502 (p50) add t[6]=t[6],a3,1 };;
503{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
504 (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
505 (p40) cmp.ltu p43,p41=a3,n3 }
506{ .mfi; (p42) cmp.leu p43,p41=a3,n3
507 (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
508 (p16) nop.i 0 };;
509{ .mii; (p17) getf.sig n5=nlo[6] // 3:
510 (p48) cmp.ltu p51,p49=t[6],a3
511 (p50) cmp.leu p51,p49=t[6],a3 };;
512 .pred.rel "mutex",p41,p43
513 .pred.rel "mutex",p49,p51
514{ .mfi; (p16) nop.m 0 // 4:
515 (p16) xma.hu ahi[1]=ai1,bj[7],ahi[0] // ap[1]*b[i]
516 (p41) add a4=a4,n4 } // (p17) a4+=n4
517{ .mfi; (p43) add a4=a4,n4,1
518 (p16) xma.lu alo[1]=ai1,bj[7],ahi[0]
519 (p16) nop.i 0 };;
520{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
521 (p16) xmpy.lu mj[0]=alo[0],n0 // (ap[0]*b[i]+t[0])*n0
522 (p51) add t[5]=t[5],a4,1 };;
523{ .mfi; (p16) nop.m 0 // 6:
524 (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
525 (p41) cmp.ltu p42,p40=a4,n4 }
526{ .mfi; (p43) cmp.leu p42,p40=a4,n4
527 (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
528 (p16) nop.i 0 };;
529{ .mii; (p17) getf.sig n6=nlo[7] // 7:
530 (p49) cmp.ltu p50,p48=t[5],a4
531 (p51) cmp.leu p50,p48=t[5],a4 };;
532 .pred.rel "mutex",p40,p42
533 .pred.rel "mutex",p48,p50
534{ .mfi; (p16) nop.m 0 // 8:
535 (p16) xma.hu ahi[2]=ai2,bj[7],ahi[1] // ap[2]*b[i]
536 (p40) add a5=a5,n5 } // (p17) a5+=n5
537{ .mfi; (p42) add a5=a5,n5,1
538 (p16) xma.lu alo[2]=ai2,bj[7],ahi[1]
539 (p16) nop.i 0 };;
540{ .mii; (p16) getf.sig a1=alo[1] // 9:
541 (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
542 (p50) add t[4]=t[4],a5,1 };;
543{ .mfi; (p16) nop.m 0 // 10:
544 (p16) xma.hu nhi[0]=ni0,mj[0],alo[0] // np[0]*m0
545 (p40) cmp.ltu p43,p41=a5,n5 }
546{ .mfi; (p42) cmp.leu p43,p41=a5,n5
547 (p16) xma.lu nlo[0]=ni0,mj[0],alo[0]
548 (p16) nop.i 0 };;
549{ .mii; (p17) getf.sig n7=nlo[8] // 11:
550 (p48) cmp.ltu p51,p49=t[4],a5
551 (p50) cmp.leu p51,p49=t[4],a5 };;
552 .pred.rel "mutex",p41,p43
553 .pred.rel "mutex",p49,p51
554{ .mfi; (p17) getf.sig n8=nhi[8] // 12:
555 (p16) xma.hu ahi[3]=ai3,bj[7],ahi[2] // ap[3]*b[i]
556 (p41) add a6=a6,n6 } // (p17) a6+=n6
557{ .mfi; (p43) add a6=a6,n6,1
558 (p16) xma.lu alo[3]=ai3,bj[7],ahi[2]
559 (p16) nop.i 0 };;
560{ .mii; (p16) getf.sig a2=alo[2] // 13:
561 (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
562 (p51) add t[3]=t[3],a6,1 };;
563{ .mfi; (p16) nop.m 0 // 14:
564 (p16) xma.hu nhi[1]=ni1,mj[0],nhi[0] // np[1]*m0
565 (p41) cmp.ltu p42,p40=a6,n6 }
566{ .mfi; (p43) cmp.leu p42,p40=a6,n6
567 (p16) xma.lu nlo[1]=ni1,mj[0],nhi[0]
568 (p16) nop.i 0 };;
569{ .mii; (p16) nop.m 0 // 15:
570 (p49) cmp.ltu p50,p48=t[3],a6
571 (p51) cmp.leu p50,p48=t[3],a6 };;
572 .pred.rel "mutex",p40,p42
573 .pred.rel "mutex",p48,p50
574{ .mfi; (p16) nop.m 0 // 16:
575 (p16) xma.hu ahi[4]=ai4,bj[7],ahi[3] // ap[4]*b[i]
576 (p40) add a7=a7,n7 } // (p17) a7+=n7
577{ .mfi; (p42) add a7=a7,n7,1
578 (p16) xma.lu alo[4]=ai4,bj[7],ahi[3]
579 (p16) nop.i 0 };;
580{ .mii; (p16) getf.sig a3=alo[3] // 17:
581 (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
582 (p50) add t[2]=t[2],a7,1 };;
583{ .mfi; (p16) nop.m 0 // 18:
584 (p16) xma.hu nhi[2]=ni2,mj[0],nhi[1] // np[2]*m0
585 (p40) cmp.ltu p43,p41=a7,n7 }
586{ .mfi; (p42) cmp.leu p43,p41=a7,n7
587 (p16) xma.lu nlo[2]=ni2,mj[0],nhi[1]
588 (p16) nop.i 0 };;
589{ .mii; (p16) getf.sig n1=nlo[1] // 19:
590 (p48) cmp.ltu p51,p49=t[2],a7
591 (p50) cmp.leu p51,p49=t[2],a7 };;
592 .pred.rel "mutex",p41,p43
593 .pred.rel "mutex",p49,p51
594{ .mfi; (p16) nop.m 0 // 20:
595 (p16) xma.hu ahi[5]=ai5,bj[7],ahi[4] // ap[5]*b[i]
596 (p41) add a8=a8,n8 } // (p17) a8+=n8
597{ .mfi; (p43) add a8=a8,n8,1
598 (p16) xma.lu alo[5]=ai5,bj[7],ahi[4]
599 (p16) nop.i 0 };;
600{ .mii; (p16) getf.sig a4=alo[4] // 21:
601 (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
602 (p51) add t[1]=t[1],a8,1 };;
603{ .mfi; (p16) nop.m 0 // 22:
604 (p16) xma.hu nhi[3]=ni3,mj[0],nhi[2] // np[3]*m0
605 (p41) cmp.ltu p42,p40=a8,n8 }
606{ .mfi; (p43) cmp.leu p42,p40=a8,n8
607 (p16) xma.lu nlo[3]=ni3,mj[0],nhi[2]
608 (p16) nop.i 0 };;
609{ .mii; (p16) getf.sig n2=nlo[2] // 23:
610 (p49) cmp.ltu p50,p48=t[1],a8
611 (p51) cmp.leu p50,p48=t[1],a8 };;
612{ .mfi; (p16) nop.m 0 // 24:
613 (p16) xma.hu ahi[6]=ai6,bj[7],ahi[5] // ap[6]*b[i]
614 (p16) add a1=a1,n1 } // (p16) a1+=n1
615{ .mfi; (p16) nop.m 0
616 (p16) xma.lu alo[6]=ai6,bj[7],ahi[5]
617 (p17) mov t[0]=r0 };;
618{ .mii; (p16) getf.sig a5=alo[5] // 25:
619 (p16) add t0=t[7],a1 // (p16) t[7]+=a1
620 (p42) add t[0]=t[0],r0,1 };;
621{ .mfi; (p16) setf.sig tf[0]=t0 // 26:
622 (p16) xma.hu nhi[4]=ni4,mj[0],nhi[3] // np[4]*m0
623 (p50) add t[0]=t[0],r0,1 }
624{ .mfi; (p16) cmp.ltu.unc p42,p40=a1,n1
625 (p16) xma.lu nlo[4]=ni4,mj[0],nhi[3]
626 (p16) nop.i 0 };;
627{ .mii; (p16) getf.sig n3=nlo[3] // 27:
628 (p16) cmp.ltu.unc p50,p48=t0,a1
629 (p16) nop.i 0 };;
630 .pred.rel "mutex",p40,p42
631 .pred.rel "mutex",p48,p50
632{ .mfi; (p16) nop.m 0 // 28:
633 (p16) xma.hu ahi[7]=ai7,bj[7],ahi[6] // ap[7]*b[i]
634 (p40) add a2=a2,n2 } // (p16) a2+=n2
635{ .mfi; (p42) add a2=a2,n2,1
636 (p16) xma.lu alo[7]=ai7,bj[7],ahi[6]
637 (p16) nop.i 0 };;
638{ .mii; (p16) getf.sig a6=alo[6] // 29:
639 (p48) add t[6]=t[6],a2 // (p16) t[6]+=a2
640 (p50) add t[6]=t[6],a2,1 };;
641{ .mfi; (p16) nop.m 0 // 30:
642 (p16) xma.hu nhi[5]=ni5,mj[0],nhi[4] // np[5]*m0
643 (p40) cmp.ltu p41,p39=a2,n2 }
644{ .mfi; (p42) cmp.leu p41,p39=a2,n2
645 (p16) xma.lu nlo[5]=ni5,mj[0],nhi[4]
646 (p16) nop.i 0 };;
647{ .mfi; (p16) getf.sig n4=nlo[4] // 31:
648 (p16) nop.f 0
649 (p48) cmp.ltu p49,p47=t[6],a2 }
650{ .mfb; (p50) cmp.leu p49,p47=t[6],a2
651 (p16) nop.f 0
652 br.ctop.sptk.many .Louter_8_ctop };;
653.Louter_8_cend:
654
655// above loop has to execute one more time, without (p16), which is
656// replaced with merged move of np[8] to GPR bank
657 .pred.rel "mutex",p40,p42
658 .pred.rel "mutex",p48,p50
659{ .mmi; (p0) getf.sig n1=ni0 // 0:
660 (p40) add a3=a3,n3 // (p17) a3+=n3
661 (p42) add a3=a3,n3,1 };;
662{ .mii; (p17) getf.sig a7=alo[8] // 1:
663 (p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
664 (p50) add t[6]=t[6],a3,1 };;
665{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
666 (p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
667 (p40) cmp.ltu p43,p41=a3,n3 }
668{ .mfi; (p42) cmp.leu p43,p41=a3,n3
669 (p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
670 (p0) nop.i 0 };;
671{ .mii; (p17) getf.sig n5=nlo[6] // 3:
672 (p48) cmp.ltu p51,p49=t[6],a3
673 (p50) cmp.leu p51,p49=t[6],a3 };;
674 .pred.rel "mutex",p41,p43
675 .pred.rel "mutex",p49,p51
676{ .mmi; (p0) getf.sig n2=ni1 // 4:
677 (p41) add a4=a4,n4 // (p17) a4+=n4
678 (p43) add a4=a4,n4,1 };;
679{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
680 (p0) nop.f 0
681 (p51) add t[5]=t[5],a4,1 };;
682{ .mfi; (p0) getf.sig n3=ni2 // 6:
683 (p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
684 (p41) cmp.ltu p42,p40=a4,n4 }
685{ .mfi; (p43) cmp.leu p42,p40=a4,n4
686 (p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
687 (p0) nop.i 0 };;
688{ .mii; (p17) getf.sig n6=nlo[7] // 7:
689 (p49) cmp.ltu p50,p48=t[5],a4
690 (p51) cmp.leu p50,p48=t[5],a4 };;
691 .pred.rel "mutex",p40,p42
692 .pred.rel "mutex",p48,p50
693{ .mii; (p0) getf.sig n4=ni3 // 8:
694 (p40) add a5=a5,n5 // (p17) a5+=n5
695 (p42) add a5=a5,n5,1 };;
696{ .mii; (p0) nop.m 0 // 9:
697 (p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
698 (p50) add t[4]=t[4],a5,1 };;
699{ .mii; (p0) nop.m 0 // 10:
700 (p40) cmp.ltu p43,p41=a5,n5
701 (p42) cmp.leu p43,p41=a5,n5 };;
702{ .mii; (p17) getf.sig n7=nlo[8] // 11:
703 (p48) cmp.ltu p51,p49=t[4],a5
704 (p50) cmp.leu p51,p49=t[4],a5 };;
705 .pred.rel "mutex",p41,p43
706 .pred.rel "mutex",p49,p51
707{ .mii; (p17) getf.sig n8=nhi[8] // 12:
708 (p41) add a6=a6,n6 // (p17) a6+=n6
709 (p43) add a6=a6,n6,1 };;
710{ .mii; (p0) getf.sig n5=ni4 // 13:
711 (p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
712 (p51) add t[3]=t[3],a6,1 };;
713{ .mii; (p0) nop.m 0 // 14:
714 (p41) cmp.ltu p42,p40=a6,n6
715 (p43) cmp.leu p42,p40=a6,n6 };;
716{ .mii; (p0) getf.sig n6=ni5 // 15:
717 (p49) cmp.ltu p50,p48=t[3],a6
718 (p51) cmp.leu p50,p48=t[3],a6 };;
719 .pred.rel "mutex",p40,p42
720 .pred.rel "mutex",p48,p50
721{ .mii; (p0) nop.m 0 // 16:
722 (p40) add a7=a7,n7 // (p17) a7+=n7
723 (p42) add a7=a7,n7,1 };;
724{ .mii; (p0) nop.m 0 // 17:
725 (p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
726 (p50) add t[2]=t[2],a7,1 };;
727{ .mii; (p0) nop.m 0 // 18:
728 (p40) cmp.ltu p43,p41=a7,n7
729 (p42) cmp.leu p43,p41=a7,n7 };;
730{ .mii; (p0) getf.sig n7=ni6 // 19:
731 (p48) cmp.ltu p51,p49=t[2],a7
732 (p50) cmp.leu p51,p49=t[2],a7 };;
733 .pred.rel "mutex",p41,p43
734 .pred.rel "mutex",p49,p51
735{ .mii; (p0) nop.m 0 // 20:
736 (p41) add a8=a8,n8 // (p17) a8+=n8
737 (p43) add a8=a8,n8,1 };;
738{ .mmi; (p0) nop.m 0 // 21:
739 (p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
740 (p51) add t[1]=t[1],a8,1 }
741{ .mmi; (p17) mov t[0]=r0
742 (p41) cmp.ltu p42,p40=a8,n8
743 (p43) cmp.leu p42,p40=a8,n8 };;
744{ .mmi; (p0) getf.sig n8=ni7 // 22:
745 (p49) cmp.ltu p50,p48=t[1],a8
746 (p51) cmp.leu p50,p48=t[1],a8 }
747{ .mmi; (p42) add t[0]=t[0],r0,1
748 (p0) add r16=-7*16,prevsp
749 (p0) add r17=-6*16,prevsp };;
750
751// subtract np[8] from carrybit|tmp[8]
752// carrybit|tmp[8] layout upon exit from above loop is:
753// t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant)
754{ .mmi; (p50)add t[0]=t[0],r0,1
755 add r18=-5*16,prevsp
756 sub n1=t0,n1 };;
757{ .mmi; cmp.gtu p34,p32=n1,t0;;
758 .pred.rel "mutex",p32,p34
759 (p32)sub n2=t[7],n2
760 (p34)sub n2=t[7],n2,1 };;
761{ .mii; (p32)cmp.gtu p35,p33=n2,t[7]
762 (p34)cmp.geu p35,p33=n2,t[7];;
763 .pred.rel "mutex",p33,p35
764 (p33)sub n3=t[6],n3 }
765{ .mmi; (p35)sub n3=t[6],n3,1;;
766 (p33)cmp.gtu p34,p32=n3,t[6]
767 (p35)cmp.geu p34,p32=n3,t[6] };;
768 .pred.rel "mutex",p32,p34
769{ .mii; (p32)sub n4=t[5],n4
770 (p34)sub n4=t[5],n4,1;;
771 (p32)cmp.gtu p35,p33=n4,t[5] }
772{ .mmi; (p34)cmp.geu p35,p33=n4,t[5];;
773 .pred.rel "mutex",p33,p35
774 (p33)sub n5=t[4],n5
775 (p35)sub n5=t[4],n5,1 };;
776{ .mii; (p33)cmp.gtu p34,p32=n5,t[4]
777 (p35)cmp.geu p34,p32=n5,t[4];;
778 .pred.rel "mutex",p32,p34
779 (p32)sub n6=t[3],n6 }
780{ .mmi; (p34)sub n6=t[3],n6,1;;
781 (p32)cmp.gtu p35,p33=n6,t[3]
782 (p34)cmp.geu p35,p33=n6,t[3] };;
783 .pred.rel "mutex",p33,p35
784{ .mii; (p33)sub n7=t[2],n7
785 (p35)sub n7=t[2],n7,1;;
786 (p33)cmp.gtu p34,p32=n7,t[2] }
787{ .mmi; (p35)cmp.geu p34,p32=n7,t[2];;
788 .pred.rel "mutex",p32,p34
789 (p32)sub n8=t[1],n8
790 (p34)sub n8=t[1],n8,1 };;
791{ .mii; (p32)cmp.gtu p35,p33=n8,t[1]
792 (p34)cmp.geu p35,p33=n8,t[1];;
793 .pred.rel "mutex",p33,p35
794 (p33)sub a8=t[0],r0 }
795{ .mmi; (p35)sub a8=t[0],r0,1;;
796 (p33)cmp.gtu p34,p32=a8,t[0]
797 (p35)cmp.geu p34,p32=a8,t[0] };;
798
799// save the result, either tmp[num] or tmp[num]-np[num]
800 .pred.rel "mutex",p32,p34
801{ .mmi; (p32)st8 [rptr]=n1,8
802 (p34)st8 [rptr]=t0,8
803 add r19=-4*16,prevsp};;
804{ .mmb; (p32)st8 [rptr]=n2,8
805 (p34)st8 [rptr]=t[7],8
806 (p5)br.cond.dpnt.few .Ldone };;
807{ .mmb; (p32)st8 [rptr]=n3,8
808 (p34)st8 [rptr]=t[6],8
809 (p7)br.cond.dpnt.few .Ldone };;
810{ .mmb; (p32)st8 [rptr]=n4,8
811 (p34)st8 [rptr]=t[5],8
812 (p9)br.cond.dpnt.few .Ldone };;
813{ .mmb; (p32)st8 [rptr]=n5,8
814 (p34)st8 [rptr]=t[4],8
815 (p11)br.cond.dpnt.few .Ldone };;
816{ .mmb; (p32)st8 [rptr]=n6,8
817 (p34)st8 [rptr]=t[3],8
818 (p13)br.cond.dpnt.few .Ldone };;
819{ .mmb; (p32)st8 [rptr]=n7,8
820 (p34)st8 [rptr]=t[2],8
821 (p15)br.cond.dpnt.few .Ldone };;
822{ .mmb; (p32)st8 [rptr]=n8,8
823 (p34)st8 [rptr]=t[1],8
824 nop.b 0 };;
825.Ldone: // epilogue
826{ .mmi; ldf.fill f16=[r16],64
827 ldf.fill f17=[r17],64
828 nop.i 0 }
829{ .mmi; ldf.fill f18=[r18],64
830 ldf.fill f19=[r19],64
831 mov pr=prevpr,0x1ffff };;
832{ .mmi; ldf.fill f20=[r16]
833 ldf.fill f21=[r17]
834 mov ar.lc=prevlc }
835{ .mmi; ldf.fill f22=[r18]
836 ldf.fill f23=[r19]
837 mov ret0=1 } // signal "handled"
838{ .mib; rum 1<<5
839 .restore sp
840 mov sp=prevsp
841 br.ret.sptk.many b0 };;
842.endp bn_mul_mont_8#
843
844.type copyright#,\@object
845copyright:
846stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>"
847___
848
849$output=shift and open STDOUT,">$output";
850print $code;
851close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips-mont.pl b/src/lib/libssl/src/crypto/bn/asm/mips-mont.pl
new file mode 100644
index 0000000000..b944a12b8e
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/mips-mont.pl
@@ -0,0 +1,426 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# This module doesn't present direct interest for OpenSSL, because it
11# doesn't provide better performance for longer keys, at least not on
12# in-order-execution cores. While 512-bit RSA sign operations can be
13# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
14# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
15# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
16# verify:-( All comparisons are against bn_mul_mont-free assembler.
17# The module might be of interest to embedded system developers, as
18# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
19# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
20# code.
21
22######################################################################
23# There is a number of MIPS ABI in use, O32 and N32/64 are most
24# widely used. Then there is a new contender: NUBI. It appears that if
25# one picks the latter, it's possible to arrange code in ABI neutral
26# manner. Therefore let's stick to NUBI register layout:
27#
28($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
29($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
30($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
31($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
32#
33# The return value is placed in $a0. Following coding rules facilitate
34# interoperability:
35#
36# - never ever touch $tp, "thread pointer", former $gp;
37# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
38# old code];
39# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
40#
41# For reference here is register layout for N32/64 MIPS ABIs:
42#
43# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
44# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
46# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
47# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
48#
49$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
50
51if ($flavour =~ /64|n32/i) {
52 $PTR_ADD="dadd"; # incidentally works even on n32
53 $PTR_SUB="dsub"; # incidentally works even on n32
54 $REG_S="sd";
55 $REG_L="ld";
56 $SZREG=8;
57} else {
58 $PTR_ADD="add";
59 $PTR_SUB="sub";
60 $REG_S="sw";
61 $REG_L="lw";
62 $SZREG=4;
63}
64$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
65#
66# <appro@openssl.org>
67#
68######################################################################
69
70while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
71open STDOUT,">$output";
72
73if ($flavour =~ /64|n32/i) {
74 $LD="ld";
75 $ST="sd";
76 $MULTU="dmultu";
77 $ADDU="daddu";
78 $SUBU="dsubu";
79 $BNSZ=8;
80} else {
81 $LD="lw";
82 $ST="sw";
83 $MULTU="multu";
84 $ADDU="addu";
85 $SUBU="subu";
86 $BNSZ=4;
87}
88
89# int bn_mul_mont(
90$rp=$a0; # BN_ULONG *rp,
91$ap=$a1; # const BN_ULONG *ap,
92$bp=$a2; # const BN_ULONG *bp,
93$np=$a3; # const BN_ULONG *np,
94$n0=$a4; # const BN_ULONG *n0,
95$num=$a5; # int num);
96
97$lo0=$a6;
98$hi0=$a7;
99$lo1=$t1;
100$hi1=$t2;
101$aj=$s0;
102$bi=$s1;
103$nj=$s2;
104$tp=$s3;
105$alo=$s4;
106$ahi=$s5;
107$nlo=$s6;
108$nhi=$s7;
109$tj=$s8;
110$i=$s9;
111$j=$s10;
112$m1=$s11;
113
114$FRAMESIZE=14;
115
116$code=<<___;
117.text
118
119.set noat
120.set noreorder
121
122.align 5
123.globl bn_mul_mont
124.ent bn_mul_mont
125bn_mul_mont:
126___
127$code.=<<___ if ($flavour =~ /o32/i);
128 lw $n0,16($sp)
129 lw $num,20($sp)
130___
131$code.=<<___;
132 slt $at,$num,4
133 bnez $at,1f
134 li $t0,0
135 slt $at,$num,17 # on in-order CPU
136 bnezl $at,bn_mul_mont_internal
137 nop
1381: jr $ra
139 li $a0,0
140.end bn_mul_mont
141
142.align 5
143.ent bn_mul_mont_internal
144bn_mul_mont_internal:
145 .frame $fp,$FRAMESIZE*$SZREG,$ra
146 .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
147 $PTR_SUB $sp,$FRAMESIZE*$SZREG
148 $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
149 $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
150 $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
151 $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
152 $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
153 $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
154 $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
155 $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
156 $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
157___
158$code.=<<___ if ($flavour =~ /nubi/i);
159 $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
160 $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
161 $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
162 $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
163___
164$code.=<<___;
165 move $fp,$sp
166
167 .set reorder
168 $LD $n0,0($n0)
169 $LD $bi,0($bp) # bp[0]
170 $LD $aj,0($ap) # ap[0]
171 $LD $nj,0($np) # np[0]
172
173 $PTR_SUB $sp,2*$BNSZ # place for two extra words
174 sll $num,`log($BNSZ)/log(2)`
175 li $at,-4096
176 $PTR_SUB $sp,$num
177 and $sp,$at
178
179 $MULTU $aj,$bi
180 $LD $alo,$BNSZ($ap)
181 $LD $nlo,$BNSZ($np)
182 mflo $lo0
183 mfhi $hi0
184 $MULTU $lo0,$n0
185 mflo $m1
186
187 $MULTU $alo,$bi
188 mflo $alo
189 mfhi $ahi
190
191 $MULTU $nj,$m1
192 mflo $lo1
193 mfhi $hi1
194 $MULTU $nlo,$m1
195 $ADDU $lo1,$lo0
196 sltu $at,$lo1,$lo0
197 $ADDU $hi1,$at
198 mflo $nlo
199 mfhi $nhi
200
201 move $tp,$sp
202 li $j,2*$BNSZ
203.align 4
204.L1st:
205 .set noreorder
206 $PTR_ADD $aj,$ap,$j
207 $PTR_ADD $nj,$np,$j
208 $LD $aj,($aj)
209 $LD $nj,($nj)
210
211 $MULTU $aj,$bi
212 $ADDU $lo0,$alo,$hi0
213 $ADDU $lo1,$nlo,$hi1
214 sltu $at,$lo0,$hi0
215 sltu $t0,$lo1,$hi1
216 $ADDU $hi0,$ahi,$at
217 $ADDU $hi1,$nhi,$t0
218 mflo $alo
219 mfhi $ahi
220
221 $ADDU $lo1,$lo0
222 sltu $at,$lo1,$lo0
223 $MULTU $nj,$m1
224 $ADDU $hi1,$at
225 addu $j,$BNSZ
226 $ST $lo1,($tp)
227 sltu $t0,$j,$num
228 mflo $nlo
229 mfhi $nhi
230
231 bnez $t0,.L1st
232 $PTR_ADD $tp,$BNSZ
233 .set reorder
234
235 $ADDU $lo0,$alo,$hi0
236 sltu $at,$lo0,$hi0
237 $ADDU $hi0,$ahi,$at
238
239 $ADDU $lo1,$nlo,$hi1
240 sltu $t0,$lo1,$hi1
241 $ADDU $hi1,$nhi,$t0
242 $ADDU $lo1,$lo0
243 sltu $at,$lo1,$lo0
244 $ADDU $hi1,$at
245
246 $ST $lo1,($tp)
247
248 $ADDU $hi1,$hi0
249 sltu $at,$hi1,$hi0
250 $ST $hi1,$BNSZ($tp)
251 $ST $at,2*$BNSZ($tp)
252
253 li $i,$BNSZ
254.align 4
255.Louter:
256 $PTR_ADD $bi,$bp,$i
257 $LD $bi,($bi)
258 $LD $aj,($ap)
259 $LD $alo,$BNSZ($ap)
260 $LD $tj,($sp)
261
262 $MULTU $aj,$bi
263 $LD $nj,($np)
264 $LD $nlo,$BNSZ($np)
265 mflo $lo0
266 mfhi $hi0
267 $ADDU $lo0,$tj
268 $MULTU $lo0,$n0
269 sltu $at,$lo0,$tj
270 $ADDU $hi0,$at
271 mflo $m1
272
273 $MULTU $alo,$bi
274 mflo $alo
275 mfhi $ahi
276
277 $MULTU $nj,$m1
278 mflo $lo1
279 mfhi $hi1
280
281 $MULTU $nlo,$m1
282 $ADDU $lo1,$lo0
283 sltu $at,$lo1,$lo0
284 $ADDU $hi1,$at
285 mflo $nlo
286 mfhi $nhi
287
288 move $tp,$sp
289 li $j,2*$BNSZ
290 $LD $tj,$BNSZ($tp)
291.align 4
292.Linner:
293 .set noreorder
294 $PTR_ADD $aj,$ap,$j
295 $PTR_ADD $nj,$np,$j
296 $LD $aj,($aj)
297 $LD $nj,($nj)
298
299 $MULTU $aj,$bi
300 $ADDU $lo0,$alo,$hi0
301 $ADDU $lo1,$nlo,$hi1
302 sltu $at,$lo0,$hi0
303 sltu $t0,$lo1,$hi1
304 $ADDU $hi0,$ahi,$at
305 $ADDU $hi1,$nhi,$t0
306 mflo $alo
307 mfhi $ahi
308
309 $ADDU $lo0,$tj
310 addu $j,$BNSZ
311 $MULTU $nj,$m1
312 sltu $at,$lo0,$tj
313 $ADDU $lo1,$lo0
314 $ADDU $hi0,$at
315 sltu $t0,$lo1,$lo0
316 $LD $tj,2*$BNSZ($tp)
317 $ADDU $hi1,$t0
318 sltu $at,$j,$num
319 mflo $nlo
320 mfhi $nhi
321 $ST $lo1,($tp)
322 bnez $at,.Linner
323 $PTR_ADD $tp,$BNSZ
324 .set reorder
325
326 $ADDU $lo0,$alo,$hi0
327 sltu $at,$lo0,$hi0
328 $ADDU $hi0,$ahi,$at
329 $ADDU $lo0,$tj
330 sltu $t0,$lo0,$tj
331 $ADDU $hi0,$t0
332
333 $LD $tj,2*$BNSZ($tp)
334 $ADDU $lo1,$nlo,$hi1
335 sltu $at,$lo1,$hi1
336 $ADDU $hi1,$nhi,$at
337 $ADDU $lo1,$lo0
338 sltu $t0,$lo1,$lo0
339 $ADDU $hi1,$t0
340 $ST $lo1,($tp)
341
342 $ADDU $lo1,$hi1,$hi0
343 sltu $hi1,$lo1,$hi0
344 $ADDU $lo1,$tj
345 sltu $at,$lo1,$tj
346 $ADDU $hi1,$at
347 $ST $lo1,$BNSZ($tp)
348 $ST $hi1,2*$BNSZ($tp)
349
350 addu $i,$BNSZ
351 sltu $t0,$i,$num
352 bnez $t0,.Louter
353
354 .set noreorder
355 $PTR_ADD $tj,$sp,$num # &tp[num]
356 move $tp,$sp
357 move $ap,$sp
358 li $hi0,0 # clear borrow bit
359
360.align 4
361.Lsub: $LD $lo0,($tp)
362 $LD $lo1,($np)
363 $PTR_ADD $tp,$BNSZ
364 $PTR_ADD $np,$BNSZ
365 $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
366 sgtu $at,$lo1,$lo0
367 $SUBU $lo0,$lo1,$hi0
368 sgtu $hi0,$lo0,$lo1
369 $ST $lo0,($rp)
370 or $hi0,$at
371 sltu $at,$tp,$tj
372 bnez $at,.Lsub
373 $PTR_ADD $rp,$BNSZ
374
375 $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
376 move $tp,$sp
377 $PTR_SUB $rp,$num # restore rp
378 not $hi1,$hi0
379
380 and $ap,$hi0,$sp
381 and $bp,$hi1,$rp
382 or $ap,$ap,$bp # ap=borrow?tp:rp
383
384.align 4
385.Lcopy: $LD $aj,($ap)
386 $PTR_ADD $ap,$BNSZ
387 $ST $zero,($tp)
388 $PTR_ADD $tp,$BNSZ
389 sltu $at,$tp,$tj
390 $ST $aj,($rp)
391 bnez $at,.Lcopy
392 $PTR_ADD $rp,$BNSZ
393
394 li $a0,1
395 li $t0,1
396
397 .set noreorder
398 move $sp,$fp
399 $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
400 $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
401 $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
402 $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
403 $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
404 $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
405 $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
406 $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
407 $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
408___
409$code.=<<___ if ($flavour =~ /nubi/i);
410 $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
411 $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
412 $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
413 $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
414___
415$code.=<<___;
416 jr $ra
417 $PTR_ADD $sp,$FRAMESIZE*$SZREG
418.end bn_mul_mont_internal
419.rdata
420.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
421___
422
423$code =~ s/\`([^\`]*)\`/eval $1/gem;
424
425print $code;
426close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips.pl b/src/lib/libssl/src/crypto/bn/asm/mips.pl
new file mode 100644
index 0000000000..c162a3ec23
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/mips.pl
@@ -0,0 +1,2585 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project.
6#
7# Rights for redistribution and usage in source and binary forms are
8# granted according to the OpenSSL license. Warranty of any kind is
9# disclaimed.
10# ====================================================================
11
12
13# July 1999
14#
15# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
16#
17# The module is designed to work with either of the "new" MIPS ABI(5),
18# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
19# IRIX 5.x not only because it doesn't support new ABIs but also
20# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
21# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
22# cause illegal instruction exception:-(
23#
24# In addition the code depends on preprocessor flags set up by MIPSpro
25# compiler driver (either as or cc) and therefore (probably?) can't be
26# compiled by the GNU assembler. GNU C driver manages fine though...
27# I mean as long as -mmips-as is specified or is the default option,
28# because then it simply invokes /usr/bin/as which in turn takes
29# perfect care of the preprocessor definitions. Another neat feature
30# offered by the MIPSpro assembler is an optimization pass. This gave
31# me the opportunity to have the code looking more regular as all those
32# architecture dependent instruction rescheduling details were left to
33# the assembler. Cool, huh?
34#
35# Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
36# goes way over 3 times faster!
37#
38# <appro@fy.chalmers.se>
39
40# October 2010
41#
42# Adapt the module even for 32-bit ABIs and other OSes. The former was
43# achieved by mechanical replacement of 64-bit arithmetic instructions
44# such as dmultu, daddu, etc. with their 32-bit counterparts and
45# adjusting offsets denoting multiples of BN_ULONG. Above mentioned
46# >3x performance improvement naturally does not apply to 32-bit code
47# [because there is no instruction 32-bit compiler can't use], one
48# has to content with 40-85% improvement depending on benchmark and
49# key length, more for longer keys.
50
51$flavour = shift;
52while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
53open STDOUT,">$output";
54
55if ($flavour =~ /64|n32/i) {
56 $LD="ld";
57 $ST="sd";
58 $MULTU="dmultu";
59 $DIVU="ddivu";
60 $ADDU="daddu";
61 $SUBU="dsubu";
62 $SRL="dsrl";
63 $SLL="dsll";
64 $BNSZ=8;
65 $PTR_ADD="daddu";
66 $PTR_SUB="dsubu";
67 $SZREG=8;
68 $REG_S="sd";
69 $REG_L="ld";
70} else {
71 $LD="lw";
72 $ST="sw";
73 $MULTU="multu";
74 $DIVU="divu";
75 $ADDU="addu";
76 $SUBU="subu";
77 $SRL="srl";
78 $SLL="sll";
79 $BNSZ=4;
80 $PTR_ADD="addu";
81 $PTR_SUB="subu";
82 $SZREG=4;
83 $REG_S="sw";
84 $REG_L="lw";
85 $code=".set mips2\n";
86}
87
88# Below is N32/64 register layout used in the original module.
89#
90($zero,$at,$v0,$v1)=map("\$$_",(0..3));
91($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
92($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
93($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
94($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
95($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7);
96#
97# No special adaptation is required for O32. NUBI on the other hand
98# is treated by saving/restoring ($v1,$t0..$t3).
99
100$gp=$v1 if ($flavour =~ /nubi/i);
101
102$minus4=$v1;
103
104$code.=<<___;
105.rdata
106.asciiz "mips3.s, Version 1.2"
107.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
108
109.text
110.set noat
111
112.align 5
113.globl bn_mul_add_words
114.ent bn_mul_add_words
115bn_mul_add_words:
116 .set noreorder
117 bgtz $a2,bn_mul_add_words_internal
118 move $v0,$zero
119 jr $ra
120 move $a0,$v0
121.end bn_mul_add_words
122
123.align 5
124.ent bn_mul_add_words_internal
125bn_mul_add_words_internal:
126___
127$code.=<<___ if ($flavour =~ /nubi/i);
128 .frame $sp,6*$SZREG,$ra
129 .mask 0x8000f008,-$SZREG
130 .set noreorder
131 $PTR_SUB $sp,6*$SZREG
132 $REG_S $ra,5*$SZREG($sp)
133 $REG_S $t3,4*$SZREG($sp)
134 $REG_S $t2,3*$SZREG($sp)
135 $REG_S $t1,2*$SZREG($sp)
136 $REG_S $t0,1*$SZREG($sp)
137 $REG_S $gp,0*$SZREG($sp)
138___
139$code.=<<___;
140 .set reorder
141 li $minus4,-4
142 and $ta0,$a2,$minus4
143 $LD $t0,0($a1)
144 beqz $ta0,.L_bn_mul_add_words_tail
145
146.L_bn_mul_add_words_loop:
147 $MULTU $t0,$a3
148 $LD $t1,0($a0)
149 $LD $t2,$BNSZ($a1)
150 $LD $t3,$BNSZ($a0)
151 $LD $ta0,2*$BNSZ($a1)
152 $LD $ta1,2*$BNSZ($a0)
153 $ADDU $t1,$v0
154 sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit
155 # values", but it seems to work fine
156 # even on 64-bit registers.
157 mflo $at
158 mfhi $t0
159 $ADDU $t1,$at
160 $ADDU $v0,$t0
161 $MULTU $t2,$a3
162 sltu $at,$t1,$at
163 $ST $t1,0($a0)
164 $ADDU $v0,$at
165
166 $LD $ta2,3*$BNSZ($a1)
167 $LD $ta3,3*$BNSZ($a0)
168 $ADDU $t3,$v0
169 sltu $v0,$t3,$v0
170 mflo $at
171 mfhi $t2
172 $ADDU $t3,$at
173 $ADDU $v0,$t2
174 $MULTU $ta0,$a3
175 sltu $at,$t3,$at
176 $ST $t3,$BNSZ($a0)
177 $ADDU $v0,$at
178
179 subu $a2,4
180 $PTR_ADD $a0,4*$BNSZ
181 $PTR_ADD $a1,4*$BNSZ
182 $ADDU $ta1,$v0
183 sltu $v0,$ta1,$v0
184 mflo $at
185 mfhi $ta0
186 $ADDU $ta1,$at
187 $ADDU $v0,$ta0
188 $MULTU $ta2,$a3
189 sltu $at,$ta1,$at
190 $ST $ta1,-2*$BNSZ($a0)
191 $ADDU $v0,$at
192
193
194 and $ta0,$a2,$minus4
195 $ADDU $ta3,$v0
196 sltu $v0,$ta3,$v0
197 mflo $at
198 mfhi $ta2
199 $ADDU $ta3,$at
200 $ADDU $v0,$ta2
201 sltu $at,$ta3,$at
202 $ST $ta3,-$BNSZ($a0)
203 $ADDU $v0,$at
204 .set noreorder
205 bgtzl $ta0,.L_bn_mul_add_words_loop
206 $LD $t0,0($a1)
207
208 beqz $a2,.L_bn_mul_add_words_return
209 nop
210
211.L_bn_mul_add_words_tail:
212 .set reorder
213 $LD $t0,0($a1)
214 $MULTU $t0,$a3
215 $LD $t1,0($a0)
216 subu $a2,1
217 $ADDU $t1,$v0
218 sltu $v0,$t1,$v0
219 mflo $at
220 mfhi $t0
221 $ADDU $t1,$at
222 $ADDU $v0,$t0
223 sltu $at,$t1,$at
224 $ST $t1,0($a0)
225 $ADDU $v0,$at
226 beqz $a2,.L_bn_mul_add_words_return
227
228 $LD $t0,$BNSZ($a1)
229 $MULTU $t0,$a3
230 $LD $t1,$BNSZ($a0)
231 subu $a2,1
232 $ADDU $t1,$v0
233 sltu $v0,$t1,$v0
234 mflo $at
235 mfhi $t0
236 $ADDU $t1,$at
237 $ADDU $v0,$t0
238 sltu $at,$t1,$at
239 $ST $t1,$BNSZ($a0)
240 $ADDU $v0,$at
241 beqz $a2,.L_bn_mul_add_words_return
242
243 $LD $t0,2*$BNSZ($a1)
244 $MULTU $t0,$a3
245 $LD $t1,2*$BNSZ($a0)
246 $ADDU $t1,$v0
247 sltu $v0,$t1,$v0
248 mflo $at
249 mfhi $t0
250 $ADDU $t1,$at
251 $ADDU $v0,$t0
252 sltu $at,$t1,$at
253 $ST $t1,2*$BNSZ($a0)
254 $ADDU $v0,$at
255
256.L_bn_mul_add_words_return:
257 .set noreorder
258___
259$code.=<<___ if ($flavour =~ /nubi/i);
260 $REG_L $t3,4*$SZREG($sp)
261 $REG_L $t2,3*$SZREG($sp)
262 $REG_L $t1,2*$SZREG($sp)
263 $REG_L $t0,1*$SZREG($sp)
264 $REG_L $gp,0*$SZREG($sp)
265 $PTR_ADD $sp,6*$SZREG
266___
267$code.=<<___;
268 jr $ra
269 move $a0,$v0
270.end bn_mul_add_words_internal
271
272.align 5
273.globl bn_mul_words
274.ent bn_mul_words
275bn_mul_words:
276 .set noreorder
277 bgtz $a2,bn_mul_words_internal
278 move $v0,$zero
279 jr $ra
280 move $a0,$v0
281.end bn_mul_words
282
283.align 5
284.ent bn_mul_words_internal
285bn_mul_words_internal:
286___
287$code.=<<___ if ($flavour =~ /nubi/i);
288 .frame $sp,6*$SZREG,$ra
289 .mask 0x8000f008,-$SZREG
290 .set noreorder
291 $PTR_SUB $sp,6*$SZREG
292 $REG_S $ra,5*$SZREG($sp)
293 $REG_S $t3,4*$SZREG($sp)
294 $REG_S $t2,3*$SZREG($sp)
295 $REG_S $t1,2*$SZREG($sp)
296 $REG_S $t0,1*$SZREG($sp)
297 $REG_S $gp,0*$SZREG($sp)
298___
299$code.=<<___;
300 .set reorder
301 li $minus4,-4
302 and $ta0,$a2,$minus4
303 $LD $t0,0($a1)
304 beqz $ta0,.L_bn_mul_words_tail
305
306.L_bn_mul_words_loop:
307 $MULTU $t0,$a3
308 $LD $t2,$BNSZ($a1)
309 $LD $ta0,2*$BNSZ($a1)
310 $LD $ta2,3*$BNSZ($a1)
311 mflo $at
312 mfhi $t0
313 $ADDU $v0,$at
314 sltu $t1,$v0,$at
315 $MULTU $t2,$a3
316 $ST $v0,0($a0)
317 $ADDU $v0,$t1,$t0
318
319 subu $a2,4
320 $PTR_ADD $a0,4*$BNSZ
321 $PTR_ADD $a1,4*$BNSZ
322 mflo $at
323 mfhi $t2
324 $ADDU $v0,$at
325 sltu $t3,$v0,$at
326 $MULTU $ta0,$a3
327 $ST $v0,-3*$BNSZ($a0)
328 $ADDU $v0,$t3,$t2
329
330 mflo $at
331 mfhi $ta0
332 $ADDU $v0,$at
333 sltu $ta1,$v0,$at
334 $MULTU $ta2,$a3
335 $ST $v0,-2*$BNSZ($a0)
336 $ADDU $v0,$ta1,$ta0
337
338 and $ta0,$a2,$minus4
339 mflo $at
340 mfhi $ta2
341 $ADDU $v0,$at
342 sltu $ta3,$v0,$at
343 $ST $v0,-$BNSZ($a0)
344 $ADDU $v0,$ta3,$ta2
345 .set noreorder
346 bgtzl $ta0,.L_bn_mul_words_loop
347 $LD $t0,0($a1)
348
349 beqz $a2,.L_bn_mul_words_return
350 nop
351
352.L_bn_mul_words_tail:
353 .set reorder
354 $LD $t0,0($a1)
355 $MULTU $t0,$a3
356 subu $a2,1
357 mflo $at
358 mfhi $t0
359 $ADDU $v0,$at
360 sltu $t1,$v0,$at
361 $ST $v0,0($a0)
362 $ADDU $v0,$t1,$t0
363 beqz $a2,.L_bn_mul_words_return
364
365 $LD $t0,$BNSZ($a1)
366 $MULTU $t0,$a3
367 subu $a2,1
368 mflo $at
369 mfhi $t0
370 $ADDU $v0,$at
371 sltu $t1,$v0,$at
372 $ST $v0,$BNSZ($a0)
373 $ADDU $v0,$t1,$t0
374 beqz $a2,.L_bn_mul_words_return
375
376 $LD $t0,2*$BNSZ($a1)
377 $MULTU $t0,$a3
378 mflo $at
379 mfhi $t0
380 $ADDU $v0,$at
381 sltu $t1,$v0,$at
382 $ST $v0,2*$BNSZ($a0)
383 $ADDU $v0,$t1,$t0
384
385.L_bn_mul_words_return:
386 .set noreorder
387___
388$code.=<<___ if ($flavour =~ /nubi/i);
389 $REG_L $t3,4*$SZREG($sp)
390 $REG_L $t2,3*$SZREG($sp)
391 $REG_L $t1,2*$SZREG($sp)
392 $REG_L $t0,1*$SZREG($sp)
393 $REG_L $gp,0*$SZREG($sp)
394 $PTR_ADD $sp,6*$SZREG
395___
396$code.=<<___;
397 jr $ra
398 move $a0,$v0
399.end bn_mul_words_internal
400
401.align 5
402.globl bn_sqr_words
403.ent bn_sqr_words
404bn_sqr_words:
405 .set noreorder
406 bgtz $a2,bn_sqr_words_internal
407 move $v0,$zero
408 jr $ra
409 move $a0,$v0
410.end bn_sqr_words
411
412.align 5
413.ent bn_sqr_words_internal
414bn_sqr_words_internal:
415___
416$code.=<<___ if ($flavour =~ /nubi/i);
417 .frame $sp,6*$SZREG,$ra
418 .mask 0x8000f008,-$SZREG
419 .set noreorder
420 $PTR_SUB $sp,6*$SZREG
421 $REG_S $ra,5*$SZREG($sp)
422 $REG_S $t3,4*$SZREG($sp)
423 $REG_S $t2,3*$SZREG($sp)
424 $REG_S $t1,2*$SZREG($sp)
425 $REG_S $t0,1*$SZREG($sp)
426 $REG_S $gp,0*$SZREG($sp)
427___
428$code.=<<___;
429 .set reorder
430 li $minus4,-4
431 and $ta0,$a2,$minus4
432 $LD $t0,0($a1)
433 beqz $ta0,.L_bn_sqr_words_tail
434
435.L_bn_sqr_words_loop:
436 $MULTU $t0,$t0
437 $LD $t2,$BNSZ($a1)
438 $LD $ta0,2*$BNSZ($a1)
439 $LD $ta2,3*$BNSZ($a1)
440 mflo $t1
441 mfhi $t0
442 $ST $t1,0($a0)
443 $ST $t0,$BNSZ($a0)
444
445 $MULTU $t2,$t2
446 subu $a2,4
447 $PTR_ADD $a0,8*$BNSZ
448 $PTR_ADD $a1,4*$BNSZ
449 mflo $t3
450 mfhi $t2
451 $ST $t3,-6*$BNSZ($a0)
452 $ST $t2,-5*$BNSZ($a0)
453
454 $MULTU $ta0,$ta0
455 mflo $ta1
456 mfhi $ta0
457 $ST $ta1,-4*$BNSZ($a0)
458 $ST $ta0,-3*$BNSZ($a0)
459
460
461 $MULTU $ta2,$ta2
462 and $ta0,$a2,$minus4
463 mflo $ta3
464 mfhi $ta2
465 $ST $ta3,-2*$BNSZ($a0)
466 $ST $ta2,-$BNSZ($a0)
467
468 .set noreorder
469 bgtzl $ta0,.L_bn_sqr_words_loop
470 $LD $t0,0($a1)
471
472 beqz $a2,.L_bn_sqr_words_return
473 nop
474
475.L_bn_sqr_words_tail:
476 .set reorder
477 $LD $t0,0($a1)
478 $MULTU $t0,$t0
479 subu $a2,1
480 mflo $t1
481 mfhi $t0
482 $ST $t1,0($a0)
483 $ST $t0,$BNSZ($a0)
484 beqz $a2,.L_bn_sqr_words_return
485
486 $LD $t0,$BNSZ($a1)
487 $MULTU $t0,$t0
488 subu $a2,1
489 mflo $t1
490 mfhi $t0
491 $ST $t1,2*$BNSZ($a0)
492 $ST $t0,3*$BNSZ($a0)
493 beqz $a2,.L_bn_sqr_words_return
494
495 $LD $t0,2*$BNSZ($a1)
496 $MULTU $t0,$t0
497 mflo $t1
498 mfhi $t0
499 $ST $t1,4*$BNSZ($a0)
500 $ST $t0,5*$BNSZ($a0)
501
502.L_bn_sqr_words_return:
503 .set noreorder
504___
505$code.=<<___ if ($flavour =~ /nubi/i);
506 $REG_L $t3,4*$SZREG($sp)
507 $REG_L $t2,3*$SZREG($sp)
508 $REG_L $t1,2*$SZREG($sp)
509 $REG_L $t0,1*$SZREG($sp)
510 $REG_L $gp,0*$SZREG($sp)
511 $PTR_ADD $sp,6*$SZREG
512___
513$code.=<<___;
514 jr $ra
515 move $a0,$v0
516
517.end bn_sqr_words_internal
518
519.align 5
520.globl bn_add_words
521.ent bn_add_words
522bn_add_words:
523 .set noreorder
524 bgtz $a3,bn_add_words_internal
525 move $v0,$zero
526 jr $ra
527 move $a0,$v0
528.end bn_add_words
529
530.align 5
531.ent bn_add_words_internal
532bn_add_words_internal:
533___
534$code.=<<___ if ($flavour =~ /nubi/i);
535 .frame $sp,6*$SZREG,$ra
536 .mask 0x8000f008,-$SZREG
537 .set noreorder
538 $PTR_SUB $sp,6*$SZREG
539 $REG_S $ra,5*$SZREG($sp)
540 $REG_S $t3,4*$SZREG($sp)
541 $REG_S $t2,3*$SZREG($sp)
542 $REG_S $t1,2*$SZREG($sp)
543 $REG_S $t0,1*$SZREG($sp)
544 $REG_S $gp,0*$SZREG($sp)
545___
546$code.=<<___;
547 .set reorder
548 li $minus4,-4
549 and $at,$a3,$minus4
550 $LD $t0,0($a1)
551 beqz $at,.L_bn_add_words_tail
552
553.L_bn_add_words_loop:
554 $LD $ta0,0($a2)
555 subu $a3,4
556 $LD $t1,$BNSZ($a1)
557 and $at,$a3,$minus4
558 $LD $t2,2*$BNSZ($a1)
559 $PTR_ADD $a2,4*$BNSZ
560 $LD $t3,3*$BNSZ($a1)
561 $PTR_ADD $a0,4*$BNSZ
562 $LD $ta1,-3*$BNSZ($a2)
563 $PTR_ADD $a1,4*$BNSZ
564 $LD $ta2,-2*$BNSZ($a2)
565 $LD $ta3,-$BNSZ($a2)
566 $ADDU $ta0,$t0
567 sltu $t8,$ta0,$t0
568 $ADDU $t0,$ta0,$v0
569 sltu $v0,$t0,$ta0
570 $ST $t0,-4*$BNSZ($a0)
571 $ADDU $v0,$t8
572
573 $ADDU $ta1,$t1
574 sltu $t9,$ta1,$t1
575 $ADDU $t1,$ta1,$v0
576 sltu $v0,$t1,$ta1
577 $ST $t1,-3*$BNSZ($a0)
578 $ADDU $v0,$t9
579
580 $ADDU $ta2,$t2
581 sltu $t8,$ta2,$t2
582 $ADDU $t2,$ta2,$v0
583 sltu $v0,$t2,$ta2
584 $ST $t2,-2*$BNSZ($a0)
585 $ADDU $v0,$t8
586
587 $ADDU $ta3,$t3
588 sltu $t9,$ta3,$t3
589 $ADDU $t3,$ta3,$v0
590 sltu $v0,$t3,$ta3
591 $ST $t3,-$BNSZ($a0)
592 $ADDU $v0,$t9
593
594 .set noreorder
595 bgtzl $at,.L_bn_add_words_loop
596 $LD $t0,0($a1)
597
598 beqz $a3,.L_bn_add_words_return
599 nop
600
601.L_bn_add_words_tail:
602 .set reorder
603 $LD $t0,0($a1)
604 $LD $ta0,0($a2)
605 $ADDU $ta0,$t0
606 subu $a3,1
607 sltu $t8,$ta0,$t0
608 $ADDU $t0,$ta0,$v0
609 sltu $v0,$t0,$ta0
610 $ST $t0,0($a0)
611 $ADDU $v0,$t8
612 beqz $a3,.L_bn_add_words_return
613
614 $LD $t1,$BNSZ($a1)
615 $LD $ta1,$BNSZ($a2)
616 $ADDU $ta1,$t1
617 subu $a3,1
618 sltu $t9,$ta1,$t1
619 $ADDU $t1,$ta1,$v0
620 sltu $v0,$t1,$ta1
621 $ST $t1,$BNSZ($a0)
622 $ADDU $v0,$t9
623 beqz $a3,.L_bn_add_words_return
624
625 $LD $t2,2*$BNSZ($a1)
626 $LD $ta2,2*$BNSZ($a2)
627 $ADDU $ta2,$t2
628 sltu $t8,$ta2,$t2
629 $ADDU $t2,$ta2,$v0
630 sltu $v0,$t2,$ta2
631 $ST $t2,2*$BNSZ($a0)
632 $ADDU $v0,$t8
633
634.L_bn_add_words_return:
635 .set noreorder
636___
637$code.=<<___ if ($flavour =~ /nubi/i);
638 $REG_L $t3,4*$SZREG($sp)
639 $REG_L $t2,3*$SZREG($sp)
640 $REG_L $t1,2*$SZREG($sp)
641 $REG_L $t0,1*$SZREG($sp)
642 $REG_L $gp,0*$SZREG($sp)
643 $PTR_ADD $sp,6*$SZREG
644___
645$code.=<<___;
646 jr $ra
647 move $a0,$v0
648
649.end bn_add_words_internal
650
651.align 5
652.globl bn_sub_words
653.ent bn_sub_words
654bn_sub_words:
655 .set noreorder
656 bgtz $a3,bn_sub_words_internal
657 move $v0,$zero
658 jr $ra
659 move $a0,$zero
660.end bn_sub_words
661
662.align 5
663.ent bn_sub_words_internal
664bn_sub_words_internal:
665___
666$code.=<<___ if ($flavour =~ /nubi/i);
667 .frame $sp,6*$SZREG,$ra
668 .mask 0x8000f008,-$SZREG
669 .set noreorder
670 $PTR_SUB $sp,6*$SZREG
671 $REG_S $ra,5*$SZREG($sp)
672 $REG_S $t3,4*$SZREG($sp)
673 $REG_S $t2,3*$SZREG($sp)
674 $REG_S $t1,2*$SZREG($sp)
675 $REG_S $t0,1*$SZREG($sp)
676 $REG_S $gp,0*$SZREG($sp)
677___
678$code.=<<___;
679 .set reorder
680 li $minus4,-4
681 and $at,$a3,$minus4
682 $LD $t0,0($a1)
683 beqz $at,.L_bn_sub_words_tail
684
685.L_bn_sub_words_loop:
686 $LD $ta0,0($a2)
687 subu $a3,4
688 $LD $t1,$BNSZ($a1)
689 and $at,$a3,$minus4
690 $LD $t2,2*$BNSZ($a1)
691 $PTR_ADD $a2,4*$BNSZ
692 $LD $t3,3*$BNSZ($a1)
693 $PTR_ADD $a0,4*$BNSZ
694 $LD $ta1,-3*$BNSZ($a2)
695 $PTR_ADD $a1,4*$BNSZ
696 $LD $ta2,-2*$BNSZ($a2)
697 $LD $ta3,-$BNSZ($a2)
698 sltu $t8,$t0,$ta0
699 $SUBU $ta0,$t0,$ta0
700 $SUBU $t0,$ta0,$v0
701 sgtu $v0,$t0,$ta0
702 $ST $t0,-4*$BNSZ($a0)
703 $ADDU $v0,$t8
704
705 sltu $t9,$t1,$ta1
706 $SUBU $ta1,$t1,$ta1
707 $SUBU $t1,$ta1,$v0
708 sgtu $v0,$t1,$ta1
709 $ST $t1,-3*$BNSZ($a0)
710 $ADDU $v0,$t9
711
712
713 sltu $t8,$t2,$ta2
714 $SUBU $ta2,$t2,$ta2
715 $SUBU $t2,$ta2,$v0
716 sgtu $v0,$t2,$ta2
717 $ST $t2,-2*$BNSZ($a0)
718 $ADDU $v0,$t8
719
720 sltu $t9,$t3,$ta3
721 $SUBU $ta3,$t3,$ta3
722 $SUBU $t3,$ta3,$v0
723 sgtu $v0,$t3,$ta3
724 $ST $t3,-$BNSZ($a0)
725 $ADDU $v0,$t9
726
727 .set noreorder
728 bgtzl $at,.L_bn_sub_words_loop
729 $LD $t0,0($a1)
730
731 beqz $a3,.L_bn_sub_words_return
732 nop
733
734.L_bn_sub_words_tail:
735 .set reorder
736 $LD $t0,0($a1)
737 $LD $ta0,0($a2)
738 subu $a3,1
739 sltu $t8,$t0,$ta0
740 $SUBU $ta0,$t0,$ta0
741 $SUBU $t0,$ta0,$v0
742 sgtu $v0,$t0,$ta0
743 $ST $t0,0($a0)
744 $ADDU $v0,$t8
745 beqz $a3,.L_bn_sub_words_return
746
747 $LD $t1,$BNSZ($a1)
748 subu $a3,1
749 $LD $ta1,$BNSZ($a2)
750 sltu $t9,$t1,$ta1
751 $SUBU $ta1,$t1,$ta1
752 $SUBU $t1,$ta1,$v0
753 sgtu $v0,$t1,$ta1
754 $ST $t1,$BNSZ($a0)
755 $ADDU $v0,$t9
756 beqz $a3,.L_bn_sub_words_return
757
758 $LD $t2,2*$BNSZ($a1)
759 $LD $ta2,2*$BNSZ($a2)
760 sltu $t8,$t2,$ta2
761 $SUBU $ta2,$t2,$ta2
762 $SUBU $t2,$ta2,$v0
763 sgtu $v0,$t2,$ta2
764 $ST $t2,2*$BNSZ($a0)
765 $ADDU $v0,$t8
766
767.L_bn_sub_words_return:
768 .set noreorder
769___
770$code.=<<___ if ($flavour =~ /nubi/i);
771 $REG_L $t3,4*$SZREG($sp)
772 $REG_L $t2,3*$SZREG($sp)
773 $REG_L $t1,2*$SZREG($sp)
774 $REG_L $t0,1*$SZREG($sp)
775 $REG_L $gp,0*$SZREG($sp)
776 $PTR_ADD $sp,6*$SZREG
777___
778$code.=<<___;
779 jr $ra
780 move $a0,$v0
781.end bn_sub_words_internal
782
783.align 5
784.globl bn_div_3_words
785.ent bn_div_3_words
786bn_div_3_words:
787 .set noreorder
788 move $a3,$a0 # we know that bn_div_words does not
789 # touch $a3, $ta2, $ta3 and preserves $a2
790 # so that we can save two arguments
791 # and return address in registers
792 # instead of stack:-)
793
794 $LD $a0,($a3)
795 move $ta2,$a1
796 bne $a0,$a2,bn_div_3_words_internal
797 $LD $a1,-$BNSZ($a3)
798 li $v0,-1
799 jr $ra
800 move $a0,$v0
801.end bn_div_3_words
802
803.align 5
804.ent bn_div_3_words_internal
805bn_div_3_words_internal:
806___
807$code.=<<___ if ($flavour =~ /nubi/i);
808 .frame $sp,6*$SZREG,$ra
809 .mask 0x8000f008,-$SZREG
810 .set noreorder
811 $PTR_SUB $sp,6*$SZREG
812 $REG_S $ra,5*$SZREG($sp)
813 $REG_S $t3,4*$SZREG($sp)
814 $REG_S $t2,3*$SZREG($sp)
815 $REG_S $t1,2*$SZREG($sp)
816 $REG_S $t0,1*$SZREG($sp)
817 $REG_S $gp,0*$SZREG($sp)
818___
819$code.=<<___;
820 .set reorder
821 move $ta3,$ra
822 bal bn_div_words
823 move $ra,$ta3
824 $MULTU $ta2,$v0
825 $LD $t2,-2*$BNSZ($a3)
826 move $ta0,$zero
827 mfhi $t1
828 mflo $t0
829 sltu $t8,$t1,$a1
830.L_bn_div_3_words_inner_loop:
831 bnez $t8,.L_bn_div_3_words_inner_loop_done
832 sgeu $at,$t2,$t0
833 seq $t9,$t1,$a1
834 and $at,$t9
835 sltu $t3,$t0,$ta2
836 $ADDU $a1,$a2
837 $SUBU $t1,$t3
838 $SUBU $t0,$ta2
839 sltu $t8,$t1,$a1
840 sltu $ta0,$a1,$a2
841 or $t8,$ta0
842 .set noreorder
843 beqzl $at,.L_bn_div_3_words_inner_loop
844 $SUBU $v0,1
845 .set reorder
846.L_bn_div_3_words_inner_loop_done:
847 .set noreorder
848___
849$code.=<<___ if ($flavour =~ /nubi/i);
850 $REG_L $t3,4*$SZREG($sp)
851 $REG_L $t2,3*$SZREG($sp)
852 $REG_L $t1,2*$SZREG($sp)
853 $REG_L $t0,1*$SZREG($sp)
854 $REG_L $gp,0*$SZREG($sp)
855 $PTR_ADD $sp,6*$SZREG
856___
857$code.=<<___;
858 jr $ra
859 move $a0,$v0
860.end bn_div_3_words_internal
861
862.align 5
863.globl bn_div_words
864.ent bn_div_words
865bn_div_words:
866 .set noreorder
867 bnez $a2,bn_div_words_internal
868 li $v0,-1 # I would rather signal div-by-zero
869 # which can be done with 'break 7'
870 jr $ra
871 move $a0,$v0
872.end bn_div_words
873
874.align 5
875.ent bn_div_words_internal
876bn_div_words_internal:
877___
878$code.=<<___ if ($flavour =~ /nubi/i);
879 .frame $sp,6*$SZREG,$ra
880 .mask 0x8000f008,-$SZREG
881 .set noreorder
882 $PTR_SUB $sp,6*$SZREG
883 $REG_S $ra,5*$SZREG($sp)
884 $REG_S $t3,4*$SZREG($sp)
885 $REG_S $t2,3*$SZREG($sp)
886 $REG_S $t1,2*$SZREG($sp)
887 $REG_S $t0,1*$SZREG($sp)
888 $REG_S $gp,0*$SZREG($sp)
889___
890$code.=<<___;
891 move $v1,$zero
892 bltz $a2,.L_bn_div_words_body
893 move $t9,$v1
894 $SLL $a2,1
895 bgtz $a2,.-4
896 addu $t9,1
897
898 .set reorder
899 negu $t1,$t9
900 li $t2,-1
901 $SLL $t2,$t1
902 and $t2,$a0
903 $SRL $at,$a1,$t1
904 .set noreorder
905 bnezl $t2,.+8
906 break 6 # signal overflow
907 .set reorder
908 $SLL $a0,$t9
909 $SLL $a1,$t9
910 or $a0,$at
911___
912$QT=$ta0;
913$HH=$ta1;
914$DH=$v1;
915$code.=<<___;
916.L_bn_div_words_body:
917 $SRL $DH,$a2,4*$BNSZ # bits
918 sgeu $at,$a0,$a2
919 .set noreorder
920 bnezl $at,.+8
921 $SUBU $a0,$a2
922 .set reorder
923
924 li $QT,-1
925 $SRL $HH,$a0,4*$BNSZ # bits
926 $SRL $QT,4*$BNSZ # q=0xffffffff
927 beq $DH,$HH,.L_bn_div_words_skip_div1
928 $DIVU $zero,$a0,$DH
929 mflo $QT
930.L_bn_div_words_skip_div1:
931 $MULTU $a2,$QT
932 $SLL $t3,$a0,4*$BNSZ # bits
933 $SRL $at,$a1,4*$BNSZ # bits
934 or $t3,$at
935 mflo $t0
936 mfhi $t1
937.L_bn_div_words_inner_loop1:
938 sltu $t2,$t3,$t0
939 seq $t8,$HH,$t1
940 sltu $at,$HH,$t1
941 and $t2,$t8
942 sltu $v0,$t0,$a2
943 or $at,$t2
944 .set noreorder
945 beqz $at,.L_bn_div_words_inner_loop1_done
946 $SUBU $t1,$v0
947 $SUBU $t0,$a2
948 b .L_bn_div_words_inner_loop1
949 $SUBU $QT,1
950 .set reorder
951.L_bn_div_words_inner_loop1_done:
952
953 $SLL $a1,4*$BNSZ # bits
954 $SUBU $a0,$t3,$t0
955 $SLL $v0,$QT,4*$BNSZ # bits
956
957 li $QT,-1
958 $SRL $HH,$a0,4*$BNSZ # bits
959 $SRL $QT,4*$BNSZ # q=0xffffffff
960 beq $DH,$HH,.L_bn_div_words_skip_div2
961 $DIVU $zero,$a0,$DH
962 mflo $QT
963.L_bn_div_words_skip_div2:
964 $MULTU $a2,$QT
965 $SLL $t3,$a0,4*$BNSZ # bits
966 $SRL $at,$a1,4*$BNSZ # bits
967 or $t3,$at
968 mflo $t0
969 mfhi $t1
970.L_bn_div_words_inner_loop2:
971 sltu $t2,$t3,$t0
972 seq $t8,$HH,$t1
973 sltu $at,$HH,$t1
974 and $t2,$t8
975 sltu $v1,$t0,$a2
976 or $at,$t2
977 .set noreorder
978 beqz $at,.L_bn_div_words_inner_loop2_done
979 $SUBU $t1,$v1
980 $SUBU $t0,$a2
981 b .L_bn_div_words_inner_loop2
982 $SUBU $QT,1
983 .set reorder
984.L_bn_div_words_inner_loop2_done:
985
986 $SUBU $a0,$t3,$t0
987 or $v0,$QT
988 $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it
989 $SRL $a2,$t9 # restore $a2
990
991 .set noreorder
992 move $a1,$v1
993___
994$code.=<<___ if ($flavour =~ /nubi/i);
995 $REG_L $t3,4*$SZREG($sp)
996 $REG_L $t2,3*$SZREG($sp)
997 $REG_L $t1,2*$SZREG($sp)
998 $REG_L $t0,1*$SZREG($sp)
999 $REG_L $gp,0*$SZREG($sp)
1000 $PTR_ADD $sp,6*$SZREG
1001___
1002$code.=<<___;
1003 jr $ra
1004 move $a0,$v0
1005.end bn_div_words_internal
1006___
1007undef $HH; undef $QT; undef $DH;
1008
1009($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3);
1010($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3);
1011
1012($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1
1013($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2
1014
1015($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3);
1016
1017$code.=<<___;
1018
1019.align 5
1020.globl bn_mul_comba8
1021.ent bn_mul_comba8
1022bn_mul_comba8:
1023 .set noreorder
1024___
1025$code.=<<___ if ($flavour =~ /nubi/i);
1026 .frame $sp,12*$SZREG,$ra
1027 .mask 0x803ff008,-$SZREG
1028 $PTR_SUB $sp,12*$SZREG
1029 $REG_S $ra,11*$SZREG($sp)
1030 $REG_S $s5,10*$SZREG($sp)
1031 $REG_S $s4,9*$SZREG($sp)
1032 $REG_S $s3,8*$SZREG($sp)
1033 $REG_S $s2,7*$SZREG($sp)
1034 $REG_S $s1,6*$SZREG($sp)
1035 $REG_S $s0,5*$SZREG($sp)
1036 $REG_S $t3,4*$SZREG($sp)
1037 $REG_S $t2,3*$SZREG($sp)
1038 $REG_S $t1,2*$SZREG($sp)
1039 $REG_S $t0,1*$SZREG($sp)
1040 $REG_S $gp,0*$SZREG($sp)
1041___
1042$code.=<<___ if ($flavour !~ /nubi/i);
1043 .frame $sp,6*$SZREG,$ra
1044 .mask 0x003f0000,-$SZREG
1045 $PTR_SUB $sp,6*$SZREG
1046 $REG_S $s5,5*$SZREG($sp)
1047 $REG_S $s4,4*$SZREG($sp)
1048 $REG_S $s3,3*$SZREG($sp)
1049 $REG_S $s2,2*$SZREG($sp)
1050 $REG_S $s1,1*$SZREG($sp)
1051 $REG_S $s0,0*$SZREG($sp)
1052___
1053$code.=<<___;
1054
1055 .set reorder
1056 $LD $a_0,0($a1) # If compiled with -mips3 option on
1057 # R5000 box assembler barks on this
1058 # 1ine with "should not have mult/div
1059 # as last instruction in bb (R10K
1060 # bug)" warning. If anybody out there
1061 # has a clue about how to circumvent
1062 # this do send me a note.
1063 # <appro\@fy.chalmers.se>
1064
1065 $LD $b_0,0($a2)
1066 $LD $a_1,$BNSZ($a1)
1067 $LD $a_2,2*$BNSZ($a1)
1068 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1069 $LD $a_3,3*$BNSZ($a1)
1070 $LD $b_1,$BNSZ($a2)
1071 $LD $b_2,2*$BNSZ($a2)
1072 $LD $b_3,3*$BNSZ($a2)
1073 mflo $c_1
1074 mfhi $c_2
1075
1076 $LD $a_4,4*$BNSZ($a1)
1077 $LD $a_5,5*$BNSZ($a1)
1078 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
1079 $LD $a_6,6*$BNSZ($a1)
1080 $LD $a_7,7*$BNSZ($a1)
1081 $LD $b_4,4*$BNSZ($a2)
1082 $LD $b_5,5*$BNSZ($a2)
1083 mflo $t_1
1084 mfhi $t_2
1085 $ADDU $c_2,$t_1
1086 sltu $at,$c_2,$t_1
1087 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
1088 $ADDU $c_3,$t_2,$at
1089 $LD $b_6,6*$BNSZ($a2)
1090 $LD $b_7,7*$BNSZ($a2)
1091 $ST $c_1,0($a0) # r[0]=c1;
1092 mflo $t_1
1093 mfhi $t_2
1094 $ADDU $c_2,$t_1
1095 sltu $at,$c_2,$t_1
1096 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
1097 $ADDU $t_2,$at
1098 $ADDU $c_3,$t_2
1099 sltu $c_1,$c_3,$t_2
1100 $ST $c_2,$BNSZ($a0) # r[1]=c2;
1101
1102 mflo $t_1
1103 mfhi $t_2
1104 $ADDU $c_3,$t_1
1105 sltu $at,$c_3,$t_1
1106 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1107 $ADDU $t_2,$at
1108 $ADDU $c_1,$t_2
1109 mflo $t_1
1110 mfhi $t_2
1111 $ADDU $c_3,$t_1
1112 sltu $at,$c_3,$t_1
1113 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
1114 $ADDU $t_2,$at
1115 $ADDU $c_1,$t_2
1116 sltu $c_2,$c_1,$t_2
1117 mflo $t_1
1118 mfhi $t_2
1119 $ADDU $c_3,$t_1
1120 sltu $at,$c_3,$t_1
1121 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
1122 $ADDU $t_2,$at
1123 $ADDU $c_1,$t_2
1124 sltu $at,$c_1,$t_2
1125 $ADDU $c_2,$at
1126 $ST $c_3,2*$BNSZ($a0) # r[2]=c3;
1127
1128 mflo $t_1
1129 mfhi $t_2
1130 $ADDU $c_1,$t_1
1131 sltu $at,$c_1,$t_1
1132 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
1133 $ADDU $t_2,$at
1134 $ADDU $c_2,$t_2
1135 sltu $c_3,$c_2,$t_2
1136 mflo $t_1
1137 mfhi $t_2
1138 $ADDU $c_1,$t_1
1139 sltu $at,$c_1,$t_1
1140 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
1141 $ADDU $t_2,$at
1142 $ADDU $c_2,$t_2
1143 sltu $at,$c_2,$t_2
1144 $ADDU $c_3,$at
1145 mflo $t_1
1146 mfhi $t_2
1147 $ADDU $c_1,$t_1
1148 sltu $at,$c_1,$t_1
1149 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
1150 $ADDU $t_2,$at
1151 $ADDU $c_2,$t_2
1152 sltu $at,$c_2,$t_2
1153 $ADDU $c_3,$at
1154 mflo $t_1
1155 mfhi $t_2
1156 $ADDU $c_1,$t_1
1157 sltu $at,$c_1,$t_1
1158 $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1);
1159 $ADDU $t_2,$at
1160 $ADDU $c_2,$t_2
1161 sltu $at,$c_2,$t_2
1162 $ADDU $c_3,$at
1163 $ST $c_1,3*$BNSZ($a0) # r[3]=c1;
1164
1165 mflo $t_1
1166 mfhi $t_2
1167 $ADDU $c_2,$t_1
1168 sltu $at,$c_2,$t_1
1169 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
1170 $ADDU $t_2,$at
1171 $ADDU $c_3,$t_2
1172 sltu $c_1,$c_3,$t_2
1173 mflo $t_1
1174 mfhi $t_2
1175 $ADDU $c_2,$t_1
1176 sltu $at,$c_2,$t_1
1177 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1178 $ADDU $t_2,$at
1179 $ADDU $c_3,$t_2
1180 sltu $at,$c_3,$t_2
1181 $ADDU $c_1,$at
1182 mflo $t_1
1183 mfhi $t_2
1184 $ADDU $c_2,$t_1
1185 sltu $at,$c_2,$t_1
1186 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
1187 $ADDU $t_2,$at
1188 $ADDU $c_3,$t_2
1189 sltu $at,$c_3,$t_2
1190 $ADDU $c_1,$at
1191 mflo $t_1
1192 mfhi $t_2
1193 $ADDU $c_2,$t_1
1194 sltu $at,$c_2,$t_1
1195 $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1);
1196 $ADDU $t_2,$at
1197 $ADDU $c_3,$t_2
1198 sltu $at,$c_3,$t_2
1199 $ADDU $c_1,$at
1200 mflo $t_1
1201 mfhi $t_2
1202 $ADDU $c_2,$t_1
1203 sltu $at,$c_2,$t_1
1204 $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2);
1205 $ADDU $t_2,$at
1206 $ADDU $c_3,$t_2
1207 sltu $at,$c_3,$t_2
1208 $ADDU $c_1,$at
1209 $ST $c_2,4*$BNSZ($a0) # r[4]=c2;
1210
1211 mflo $t_1
1212 mfhi $t_2
1213 $ADDU $c_3,$t_1
1214 sltu $at,$c_3,$t_1
1215 $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2);
1216 $ADDU $t_2,$at
1217 $ADDU $c_1,$t_2
1218 sltu $c_2,$c_1,$t_2
1219 mflo $t_1
1220 mfhi $t_2
1221 $ADDU $c_3,$t_1
1222 sltu $at,$c_3,$t_1
1223 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
1224 $ADDU $t_2,$at
1225 $ADDU $c_1,$t_2
1226 sltu $at,$c_1,$t_2
1227 $ADDU $c_2,$at
1228 mflo $t_1
1229 mfhi $t_2
1230 $ADDU $c_3,$t_1
1231 sltu $at,$c_3,$t_1
1232 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
1233 $ADDU $t_2,$at
1234 $ADDU $c_1,$t_2
1235 sltu $at,$c_1,$t_2
1236 $ADDU $c_2,$at
1237 mflo $t_1
1238 mfhi $t_2
1239 $ADDU $c_3,$t_1
1240 sltu $at,$c_3,$t_1
1241 $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2);
1242 $ADDU $t_2,$at
1243 $ADDU $c_1,$t_2
1244 sltu $at,$c_1,$t_2
1245 $ADDU $c_2,$at
1246 mflo $t_1
1247 mfhi $t_2
1248 $ADDU $c_3,$t_1
1249 sltu $at,$c_3,$t_1
1250 $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2);
1251 $ADDU $t_2,$at
1252 $ADDU $c_1,$t_2
1253 sltu $at,$c_1,$t_2
1254 $ADDU $c_2,$at
1255 mflo $t_1
1256 mfhi $t_2
1257 $ADDU $c_3,$t_1
1258 sltu $at,$c_3,$t_1
1259 $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3);
1260 $ADDU $t_2,$at
1261 $ADDU $c_1,$t_2
1262 sltu $at,$c_1,$t_2
1263 $ADDU $c_2,$at
1264 $ST $c_3,5*$BNSZ($a0) # r[5]=c3;
1265
1266 mflo $t_1
1267 mfhi $t_2
1268 $ADDU $c_1,$t_1
1269 sltu $at,$c_1,$t_1
1270 $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3);
1271 $ADDU $t_2,$at
1272 $ADDU $c_2,$t_2
1273 sltu $c_3,$c_2,$t_2
1274 mflo $t_1
1275 mfhi $t_2
1276 $ADDU $c_1,$t_1
1277 sltu $at,$c_1,$t_1
1278 $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3);
1279 $ADDU $t_2,$at
1280 $ADDU $c_2,$t_2
1281 sltu $at,$c_2,$t_2
1282 $ADDU $c_3,$at
1283 mflo $t_1
1284 mfhi $t_2
1285 $ADDU $c_1,$t_1
1286 sltu $at,$c_1,$t_1
1287 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
1288 $ADDU $t_2,$at
1289 $ADDU $c_2,$t_2
1290 sltu $at,$c_2,$t_2
1291 $ADDU $c_3,$at
1292 mflo $t_1
1293 mfhi $t_2
1294 $ADDU $c_1,$t_1
1295 sltu $at,$c_1,$t_1
1296 $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3);
1297 $ADDU $t_2,$at
1298 $ADDU $c_2,$t_2
1299 sltu $at,$c_2,$t_2
1300 $ADDU $c_3,$at
1301 mflo $t_1
1302 mfhi $t_2
1303 $ADDU $c_1,$t_1
1304 sltu $at,$c_1,$t_1
1305 $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3);
1306 $ADDU $t_2,$at
1307 $ADDU $c_2,$t_2
1308 sltu $at,$c_2,$t_2
1309 $ADDU $c_3,$at
1310 mflo $t_1
1311 mfhi $t_2
1312 $ADDU $c_1,$t_1
1313 sltu $at,$c_1,$t_1
1314 $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3);
1315 $ADDU $t_2,$at
1316 $ADDU $c_2,$t_2
1317 sltu $at,$c_2,$t_2
1318 $ADDU $c_3,$at
1319 mflo $t_1
1320 mfhi $t_2
1321 $ADDU $c_1,$t_1
1322 sltu $at,$c_1,$t_1
1323 $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1);
1324 $ADDU $t_2,$at
1325 $ADDU $c_2,$t_2
1326 sltu $at,$c_2,$t_2
1327 $ADDU $c_3,$at
1328 $ST $c_1,6*$BNSZ($a0) # r[6]=c1;
1329
1330 mflo $t_1
1331 mfhi $t_2
1332 $ADDU $c_2,$t_1
1333 sltu $at,$c_2,$t_1
1334 $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1);
1335 $ADDU $t_2,$at
1336 $ADDU $c_3,$t_2
1337 sltu $c_1,$c_3,$t_2
1338 mflo $t_1
1339 mfhi $t_2
1340 $ADDU $c_2,$t_1
1341 sltu $at,$c_2,$t_1
1342 $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1);
1343 $ADDU $t_2,$at
1344 $ADDU $c_3,$t_2
1345 sltu $at,$c_3,$t_2
1346 $ADDU $c_1,$at
1347 mflo $t_1
1348 mfhi $t_2
1349 $ADDU $c_2,$t_1
1350 sltu $at,$c_2,$t_1
1351 $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1);
1352 $ADDU $t_2,$at
1353 $ADDU $c_3,$t_2
1354 sltu $at,$c_3,$t_2
1355 $ADDU $c_1,$at
1356 mflo $t_1
1357 mfhi $t_2
1358 $ADDU $c_2,$t_1
1359 sltu $at,$c_2,$t_1
1360 $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1);
1361 $ADDU $t_2,$at
1362 $ADDU $c_3,$t_2
1363 sltu $at,$c_3,$t_2
1364 $ADDU $c_1,$at
1365 mflo $t_1
1366 mfhi $t_2
1367 $ADDU $c_2,$t_1
1368 sltu $at,$c_2,$t_1
1369 $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1);
1370 $ADDU $t_2,$at
1371 $ADDU $c_3,$t_2
1372 sltu $at,$c_3,$t_2
1373 $ADDU $c_1,$at
1374 mflo $t_1
1375 mfhi $t_2
1376 $ADDU $c_2,$t_1
1377 sltu $at,$c_2,$t_1
1378 $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1);
1379 $ADDU $t_2,$at
1380 $ADDU $c_3,$t_2
1381 sltu $at,$c_3,$t_2
1382 $ADDU $c_1,$at
1383 mflo $t_1
1384 mfhi $t_2
1385 $ADDU $c_2,$t_1
1386 sltu $at,$c_2,$t_1
1387 $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1);
1388 $ADDU $t_2,$at
1389 $ADDU $c_3,$t_2
1390 sltu $at,$c_3,$t_2
1391 $ADDU $c_1,$at
1392 mflo $t_1
1393 mfhi $t_2
1394 $ADDU $c_2,$t_1
1395 sltu $at,$c_2,$t_1
1396 $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2);
1397 $ADDU $t_2,$at
1398 $ADDU $c_3,$t_2
1399 sltu $at,$c_3,$t_2
1400 $ADDU $c_1,$at
1401 $ST $c_2,7*$BNSZ($a0) # r[7]=c2;
1402
1403 mflo $t_1
1404 mfhi $t_2
1405 $ADDU $c_3,$t_1
1406 sltu $at,$c_3,$t_1
1407 $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2);
1408 $ADDU $t_2,$at
1409 $ADDU $c_1,$t_2
1410 sltu $c_2,$c_1,$t_2
1411 mflo $t_1
1412 mfhi $t_2
1413 $ADDU $c_3,$t_1
1414 sltu $at,$c_3,$t_1
1415 $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2);
1416 $ADDU $t_2,$at
1417 $ADDU $c_1,$t_2
1418 sltu $at,$c_1,$t_2
1419 $ADDU $c_2,$at
1420 mflo $t_1
1421 mfhi $t_2
1422 $ADDU $c_3,$t_1
1423 sltu $at,$c_3,$t_1
1424 $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2);
1425 $ADDU $t_2,$at
1426 $ADDU $c_1,$t_2
1427 sltu $at,$c_1,$t_2
1428 $ADDU $c_2,$at
1429 mflo $t_1
1430 mfhi $t_2
1431 $ADDU $c_3,$t_1
1432 sltu $at,$c_3,$t_1
1433 $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2);
1434 $ADDU $t_2,$at
1435 $ADDU $c_1,$t_2
1436 sltu $at,$c_1,$t_2
1437 $ADDU $c_2,$at
1438 mflo $t_1
1439 mfhi $t_2
1440 $ADDU $c_3,$t_1
1441 sltu $at,$c_3,$t_1
1442 $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2);
1443 $ADDU $t_2,$at
1444 $ADDU $c_1,$t_2
1445 sltu $at,$c_1,$t_2
1446 $ADDU $c_2,$at
1447 mflo $t_1
1448 mfhi $t_2
1449 $ADDU $c_3,$t_1
1450 sltu $at,$c_3,$t_1
1451 $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2);
1452 $ADDU $t_2,$at
1453 $ADDU $c_1,$t_2
1454 sltu $at,$c_1,$t_2
1455 $ADDU $c_2,$at
1456 mflo $t_1
1457 mfhi $t_2
1458 $ADDU $c_3,$t_1
1459 sltu $at,$c_3,$t_1
1460 $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3);
1461 $ADDU $t_2,$at
1462 $ADDU $c_1,$t_2
1463 sltu $at,$c_1,$t_2
1464 $ADDU $c_2,$at
1465 $ST $c_3,8*$BNSZ($a0) # r[8]=c3;
1466
1467 mflo $t_1
1468 mfhi $t_2
1469 $ADDU $c_1,$t_1
1470 sltu $at,$c_1,$t_1
1471 $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3);
1472 $ADDU $t_2,$at
1473 $ADDU $c_2,$t_2
1474 sltu $c_3,$c_2,$t_2
1475 mflo $t_1
1476 mfhi $t_2
1477 $ADDU $c_1,$t_1
1478 sltu $at,$c_1,$t_1
1479 $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3);
1480 $ADDU $t_2,$at
1481 $ADDU $c_2,$t_2
1482 sltu $at,$c_2,$t_2
1483 $ADDU $c_3,$at
1484 mflo $t_1
1485 mfhi $t_2
1486 $ADDU $c_1,$t_1
1487 sltu $at,$c_1,$t_1
1488 $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3);
1489 $ADDU $t_2,$at
1490 $ADDU $c_2,$t_2
1491 sltu $at,$c_2,$t_2
1492 $ADDU $c_3,$at
1493 mflo $t_1
1494 mfhi $t_2
1495 $ADDU $c_1,$t_1
1496 sltu $at,$c_1,$t_1
1497 $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3);
1498 $ADDU $t_2,$at
1499 $ADDU $c_2,$t_2
1500 sltu $at,$c_2,$t_2
1501 $ADDU $c_3,$at
1502 mflo $t_1
1503 mfhi $t_2
1504 $ADDU $c_1,$t_1
1505 sltu $at,$c_1,$t_1
1506 $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3);
1507 $ADDU $t_2,$at
1508 $ADDU $c_2,$t_2
1509 sltu $at,$c_2,$t_2
1510 $ADDU $c_3,$at
1511 mflo $t_1
1512 mfhi $t_2
1513 $ADDU $c_1,$t_1
1514 sltu $at,$c_1,$t_1
1515 $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1);
1516 $ADDU $t_2,$at
1517 $ADDU $c_2,$t_2
1518 sltu $at,$c_2,$t_2
1519 $ADDU $c_3,$at
1520 $ST $c_1,9*$BNSZ($a0) # r[9]=c1;
1521
1522 mflo $t_1
1523 mfhi $t_2
1524 $ADDU $c_2,$t_1
1525 sltu $at,$c_2,$t_1
1526 $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1);
1527 $ADDU $t_2,$at
1528 $ADDU $c_3,$t_2
1529 sltu $c_1,$c_3,$t_2
1530 mflo $t_1
1531 mfhi $t_2
1532 $ADDU $c_2,$t_1
1533 sltu $at,$c_2,$t_1
1534 $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1);
1535 $ADDU $t_2,$at
1536 $ADDU $c_3,$t_2
1537 sltu $at,$c_3,$t_2
1538 $ADDU $c_1,$at
1539 mflo $t_1
1540 mfhi $t_2
1541 $ADDU $c_2,$t_1
1542 sltu $at,$c_2,$t_1
1543 $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1);
1544 $ADDU $t_2,$at
1545 $ADDU $c_3,$t_2
1546 sltu $at,$c_3,$t_2
1547 $ADDU $c_1,$at
1548 mflo $t_1
1549 mfhi $t_2
1550 $ADDU $c_2,$t_1
1551 sltu $at,$c_2,$t_1
1552 $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1);
1553 $ADDU $t_2,$at
1554 $ADDU $c_3,$t_2
1555 sltu $at,$c_3,$t_2
1556 $ADDU $c_1,$at
1557 mflo $t_1
1558 mfhi $t_2
1559 $ADDU $c_2,$t_1
1560 sltu $at,$c_2,$t_1
1561 $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2);
1562 $ADDU $t_2,$at
1563 $ADDU $c_3,$t_2
1564 sltu $at,$c_3,$t_2
1565 $ADDU $c_1,$at
1566 $ST $c_2,10*$BNSZ($a0) # r[10]=c2;
1567
1568 mflo $t_1
1569 mfhi $t_2
1570 $ADDU $c_3,$t_1
1571 sltu $at,$c_3,$t_1
1572 $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2);
1573 $ADDU $t_2,$at
1574 $ADDU $c_1,$t_2
1575 sltu $c_2,$c_1,$t_2
1576 mflo $t_1
1577 mfhi $t_2
1578 $ADDU $c_3,$t_1
1579 sltu $at,$c_3,$t_1
1580 $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2);
1581 $ADDU $t_2,$at
1582 $ADDU $c_1,$t_2
1583 sltu $at,$c_1,$t_2
1584 $ADDU $c_2,$at
1585 mflo $t_1
1586 mfhi $t_2
1587 $ADDU $c_3,$t_1
1588 sltu $at,$c_3,$t_1
1589 $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2);
1590 $ADDU $t_2,$at
1591 $ADDU $c_1,$t_2
1592 sltu $at,$c_1,$t_2
1593 $ADDU $c_2,$at
1594 mflo $t_1
1595 mfhi $t_2
1596 $ADDU $c_3,$t_1
1597 sltu $at,$c_3,$t_1
1598 $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3);
1599 $ADDU $t_2,$at
1600 $ADDU $c_1,$t_2
1601 sltu $at,$c_1,$t_2
1602 $ADDU $c_2,$at
1603 $ST $c_3,11*$BNSZ($a0) # r[11]=c3;
1604
1605 mflo $t_1
1606 mfhi $t_2
1607 $ADDU $c_1,$t_1
1608 sltu $at,$c_1,$t_1
1609 $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3);
1610 $ADDU $t_2,$at
1611 $ADDU $c_2,$t_2
1612 sltu $c_3,$c_2,$t_2
1613 mflo $t_1
1614 mfhi $t_2
1615 $ADDU $c_1,$t_1
1616 sltu $at,$c_1,$t_1
1617 $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3);
1618 $ADDU $t_2,$at
1619 $ADDU $c_2,$t_2
1620 sltu $at,$c_2,$t_2
1621 $ADDU $c_3,$at
1622 mflo $t_1
1623 mfhi $t_2
1624 $ADDU $c_1,$t_1
1625 sltu $at,$c_1,$t_1
1626 $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1);
1627 $ADDU $t_2,$at
1628 $ADDU $c_2,$t_2
1629 sltu $at,$c_2,$t_2
1630 $ADDU $c_3,$at
1631 $ST $c_1,12*$BNSZ($a0) # r[12]=c1;
1632
1633 mflo $t_1
1634 mfhi $t_2
1635 $ADDU $c_2,$t_1
1636 sltu $at,$c_2,$t_1
1637 $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1);
1638 $ADDU $t_2,$at
1639 $ADDU $c_3,$t_2
1640 sltu $c_1,$c_3,$t_2
1641 mflo $t_1
1642 mfhi $t_2
1643 $ADDU $c_2,$t_1
1644 sltu $at,$c_2,$t_1
1645 $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2);
1646 $ADDU $t_2,$at
1647 $ADDU $c_3,$t_2
1648 sltu $at,$c_3,$t_2
1649 $ADDU $c_1,$at
1650 $ST $c_2,13*$BNSZ($a0) # r[13]=c2;
1651
1652 mflo $t_1
1653 mfhi $t_2
1654 $ADDU $c_3,$t_1
1655 sltu $at,$c_3,$t_1
1656 $ADDU $t_2,$at
1657 $ADDU $c_1,$t_2
1658 $ST $c_3,14*$BNSZ($a0) # r[14]=c3;
1659 $ST $c_1,15*$BNSZ($a0) # r[15]=c1;
1660
1661 .set noreorder
1662___
1663$code.=<<___ if ($flavour =~ /nubi/i);
1664 $REG_L $s5,10*$SZREG($sp)
1665 $REG_L $s4,9*$SZREG($sp)
1666 $REG_L $s3,8*$SZREG($sp)
1667 $REG_L $s2,7*$SZREG($sp)
1668 $REG_L $s1,6*$SZREG($sp)
1669 $REG_L $s0,5*$SZREG($sp)
1670 $REG_L $t3,4*$SZREG($sp)
1671 $REG_L $t2,3*$SZREG($sp)
1672 $REG_L $t1,2*$SZREG($sp)
1673 $REG_L $t0,1*$SZREG($sp)
1674 $REG_L $gp,0*$SZREG($sp)
1675 jr $ra
1676 $PTR_ADD $sp,12*$SZREG
1677___
1678$code.=<<___ if ($flavour !~ /nubi/i);
1679 $REG_L $s5,5*$SZREG($sp)
1680 $REG_L $s4,4*$SZREG($sp)
1681 $REG_L $s3,3*$SZREG($sp)
1682 $REG_L $s2,2*$SZREG($sp)
1683 $REG_L $s1,1*$SZREG($sp)
1684 $REG_L $s0,0*$SZREG($sp)
1685 jr $ra
1686 $PTR_ADD $sp,6*$SZREG
1687___
1688$code.=<<___;
1689.end bn_mul_comba8
1690
1691.align 5
1692.globl bn_mul_comba4
1693.ent bn_mul_comba4
1694bn_mul_comba4:
1695___
1696$code.=<<___ if ($flavour =~ /nubi/i);
1697 .frame $sp,6*$SZREG,$ra
1698 .mask 0x8000f008,-$SZREG
1699 .set noreorder
1700 $PTR_SUB $sp,6*$SZREG
1701 $REG_S $ra,5*$SZREG($sp)
1702 $REG_S $t3,4*$SZREG($sp)
1703 $REG_S $t2,3*$SZREG($sp)
1704 $REG_S $t1,2*$SZREG($sp)
1705 $REG_S $t0,1*$SZREG($sp)
1706 $REG_S $gp,0*$SZREG($sp)
1707___
1708$code.=<<___;
1709 .set reorder
1710 $LD $a_0,0($a1)
1711 $LD $b_0,0($a2)
1712 $LD $a_1,$BNSZ($a1)
1713 $LD $a_2,2*$BNSZ($a1)
1714 $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1715 $LD $a_3,3*$BNSZ($a1)
1716 $LD $b_1,$BNSZ($a2)
1717 $LD $b_2,2*$BNSZ($a2)
1718 $LD $b_3,3*$BNSZ($a2)
1719 mflo $c_1
1720 mfhi $c_2
1721 $ST $c_1,0($a0)
1722
1723 $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
1724 mflo $t_1
1725 mfhi $t_2
1726 $ADDU $c_2,$t_1
1727 sltu $at,$c_2,$t_1
1728 $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
1729 $ADDU $c_3,$t_2,$at
1730 mflo $t_1
1731 mfhi $t_2
1732 $ADDU $c_2,$t_1
1733 sltu $at,$c_2,$t_1
1734 $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
1735 $ADDU $t_2,$at
1736 $ADDU $c_3,$t_2
1737 sltu $c_1,$c_3,$t_2
1738 $ST $c_2,$BNSZ($a0)
1739
1740 mflo $t_1
1741 mfhi $t_2
1742 $ADDU $c_3,$t_1
1743 sltu $at,$c_3,$t_1
1744 $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1745 $ADDU $t_2,$at
1746 $ADDU $c_1,$t_2
1747 mflo $t_1
1748 mfhi $t_2
1749 $ADDU $c_3,$t_1
1750 sltu $at,$c_3,$t_1
1751 $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
1752 $ADDU $t_2,$at
1753 $ADDU $c_1,$t_2
1754 sltu $c_2,$c_1,$t_2
1755 mflo $t_1
1756 mfhi $t_2
1757 $ADDU $c_3,$t_1
1758 sltu $at,$c_3,$t_1
1759 $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
1760 $ADDU $t_2,$at
1761 $ADDU $c_1,$t_2
1762 sltu $at,$c_1,$t_2
1763 $ADDU $c_2,$at
1764 $ST $c_3,2*$BNSZ($a0)
1765
1766 mflo $t_1
1767 mfhi $t_2
1768 $ADDU $c_1,$t_1
1769 sltu $at,$c_1,$t_1
1770 $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
1771 $ADDU $t_2,$at
1772 $ADDU $c_2,$t_2
1773 sltu $c_3,$c_2,$t_2
1774 mflo $t_1
1775 mfhi $t_2
1776 $ADDU $c_1,$t_1
1777 sltu $at,$c_1,$t_1
1778 $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
1779 $ADDU $t_2,$at
1780 $ADDU $c_2,$t_2
1781 sltu $at,$c_2,$t_2
1782 $ADDU $c_3,$at
1783 mflo $t_1
1784 mfhi $t_2
1785 $ADDU $c_1,$t_1
1786 sltu $at,$c_1,$t_1
1787 $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
1788 $ADDU $t_2,$at
1789 $ADDU $c_2,$t_2
1790 sltu $at,$c_2,$t_2
1791 $ADDU $c_3,$at
1792 mflo $t_1
1793 mfhi $t_2
1794 $ADDU $c_1,$t_1
1795 sltu $at,$c_1,$t_1
1796 $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
1797 $ADDU $t_2,$at
1798 $ADDU $c_2,$t_2
1799 sltu $at,$c_2,$t_2
1800 $ADDU $c_3,$at
1801 $ST $c_1,3*$BNSZ($a0)
1802
1803 mflo $t_1
1804 mfhi $t_2
1805 $ADDU $c_2,$t_1
1806 sltu $at,$c_2,$t_1
1807 $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1808 $ADDU $t_2,$at
1809 $ADDU $c_3,$t_2
1810 sltu $c_1,$c_3,$t_2
1811 mflo $t_1
1812 mfhi $t_2
1813 $ADDU $c_2,$t_1
1814 sltu $at,$c_2,$t_1
1815 $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
1816 $ADDU $t_2,$at
1817 $ADDU $c_3,$t_2
1818 sltu $at,$c_3,$t_2
1819 $ADDU $c_1,$at
1820 mflo $t_1
1821 mfhi $t_2
1822 $ADDU $c_2,$t_1
1823 sltu $at,$c_2,$t_1
1824 $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
1825 $ADDU $t_2,$at
1826 $ADDU $c_3,$t_2
1827 sltu $at,$c_3,$t_2
1828 $ADDU $c_1,$at
1829 $ST $c_2,4*$BNSZ($a0)
1830
1831 mflo $t_1
1832 mfhi $t_2
1833 $ADDU $c_3,$t_1
1834 sltu $at,$c_3,$t_1
1835 $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
1836 $ADDU $t_2,$at
1837 $ADDU $c_1,$t_2
1838 sltu $c_2,$c_1,$t_2
1839 mflo $t_1
1840 mfhi $t_2
1841 $ADDU $c_3,$t_1
1842 sltu $at,$c_3,$t_1
1843 $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
1844 $ADDU $t_2,$at
1845 $ADDU $c_1,$t_2
1846 sltu $at,$c_1,$t_2
1847 $ADDU $c_2,$at
1848 $ST $c_3,5*$BNSZ($a0)
1849
1850 mflo $t_1
1851 mfhi $t_2
1852 $ADDU $c_1,$t_1
1853 sltu $at,$c_1,$t_1
1854 $ADDU $t_2,$at
1855 $ADDU $c_2,$t_2
1856 $ST $c_1,6*$BNSZ($a0)
1857 $ST $c_2,7*$BNSZ($a0)
1858
1859 .set noreorder
1860___
1861$code.=<<___ if ($flavour =~ /nubi/i);
1862 $REG_L $t3,4*$SZREG($sp)
1863 $REG_L $t2,3*$SZREG($sp)
1864 $REG_L $t1,2*$SZREG($sp)
1865 $REG_L $t0,1*$SZREG($sp)
1866 $REG_L $gp,0*$SZREG($sp)
1867 $PTR_ADD $sp,6*$SZREG
1868___
1869$code.=<<___;
1870 jr $ra
1871 nop
1872.end bn_mul_comba4
1873___
1874
1875($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
1876
1877$code.=<<___;
1878
1879.align 5
1880.globl bn_sqr_comba8
1881.ent bn_sqr_comba8
1882bn_sqr_comba8:
1883___
1884$code.=<<___ if ($flavour =~ /nubi/i);
1885 .frame $sp,6*$SZREG,$ra
1886 .mask 0x8000f008,-$SZREG
1887 .set noreorder
1888 $PTR_SUB $sp,6*$SZREG
1889 $REG_S $ra,5*$SZREG($sp)
1890 $REG_S $t3,4*$SZREG($sp)
1891 $REG_S $t2,3*$SZREG($sp)
1892 $REG_S $t1,2*$SZREG($sp)
1893 $REG_S $t0,1*$SZREG($sp)
1894 $REG_S $gp,0*$SZREG($sp)
1895___
1896$code.=<<___;
1897 .set reorder
1898 $LD $a_0,0($a1)
1899 $LD $a_1,$BNSZ($a1)
1900 $LD $a_2,2*$BNSZ($a1)
1901 $LD $a_3,3*$BNSZ($a1)
1902
1903 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
1904 $LD $a_4,4*$BNSZ($a1)
1905 $LD $a_5,5*$BNSZ($a1)
1906 $LD $a_6,6*$BNSZ($a1)
1907 $LD $a_7,7*$BNSZ($a1)
1908 mflo $c_1
1909 mfhi $c_2
1910 $ST $c_1,0($a0)
1911
1912 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
1913 mflo $t_1
1914 mfhi $t_2
1915 slt $c_1,$t_2,$zero
1916 $SLL $t_2,1
1917 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
1918 slt $a2,$t_1,$zero
1919 $ADDU $t_2,$a2
1920 $SLL $t_1,1
1921 $ADDU $c_2,$t_1
1922 sltu $at,$c_2,$t_1
1923 $ADDU $c_3,$t_2,$at
1924 $ST $c_2,$BNSZ($a0)
1925
1926 mflo $t_1
1927 mfhi $t_2
1928 slt $c_2,$t_2,$zero
1929 $SLL $t_2,1
1930 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1931 slt $a2,$t_1,$zero
1932 $ADDU $t_2,$a2
1933 $SLL $t_1,1
1934 $ADDU $c_3,$t_1
1935 sltu $at,$c_3,$t_1
1936 $ADDU $t_2,$at
1937 $ADDU $c_1,$t_2
1938 sltu $at,$c_1,$t_2
1939 $ADDU $c_2,$at
1940 mflo $t_1
1941 mfhi $t_2
1942 $ADDU $c_3,$t_1
1943 sltu $at,$c_3,$t_1
1944 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
1945 $ADDU $t_2,$at
1946 $ADDU $c_1,$t_2
1947 sltu $at,$c_1,$t_2
1948 $ADDU $c_2,$at
1949 $ST $c_3,2*$BNSZ($a0)
1950
1951 mflo $t_1
1952 mfhi $t_2
1953 slt $c_3,$t_2,$zero
1954 $SLL $t_2,1
1955 $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3);
1956 slt $a2,$t_1,$zero
1957 $ADDU $t_2,$a2
1958 $SLL $t_1,1
1959 $ADDU $c_1,$t_1
1960 sltu $at,$c_1,$t_1
1961 $ADDU $t_2,$at
1962 $ADDU $c_2,$t_2
1963 sltu $at,$c_2,$t_2
1964 $ADDU $c_3,$at
1965 mflo $t_1
1966 mfhi $t_2
1967 slt $at,$t_2,$zero
1968 $ADDU $c_3,$at
1969 $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
1970 $SLL $t_2,1
1971 slt $a2,$t_1,$zero
1972 $ADDU $t_2,$a2
1973 $SLL $t_1,1
1974 $ADDU $c_1,$t_1
1975 sltu $at,$c_1,$t_1
1976 $ADDU $t_2,$at
1977 $ADDU $c_2,$t_2
1978 sltu $at,$c_2,$t_2
1979 $ADDU $c_3,$at
1980 $ST $c_1,3*$BNSZ($a0)
1981
1982 mflo $t_1
1983 mfhi $t_2
1984 slt $c_1,$t_2,$zero
1985 $SLL $t_2,1
1986 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
1987 slt $a2,$t_1,$zero
1988 $ADDU $t_2,$a2
1989 $SLL $t_1,1
1990 $ADDU $c_2,$t_1
1991 sltu $at,$c_2,$t_1
1992 $ADDU $t_2,$at
1993 $ADDU $c_3,$t_2
1994 sltu $at,$c_3,$t_2
1995 $ADDU $c_1,$at
1996 mflo $t_1
1997 mfhi $t_2
1998 slt $at,$t_2,$zero
1999 $ADDU $c_1,$at
2000 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
2001 $SLL $t_2,1
2002 slt $a2,$t_1,$zero
2003 $ADDU $t_2,$a2
2004 $SLL $t_1,1
2005 $ADDU $c_2,$t_1
2006 sltu $at,$c_2,$t_1
2007 $ADDU $t_2,$at
2008 $ADDU $c_3,$t_2
2009 sltu $at,$c_3,$t_2
2010 $ADDU $c_1,$at
2011 mflo $t_1
2012 mfhi $t_2
2013 $ADDU $c_2,$t_1
2014 sltu $at,$c_2,$t_1
2015 $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2);
2016 $ADDU $t_2,$at
2017 $ADDU $c_3,$t_2
2018 sltu $at,$c_3,$t_2
2019 $ADDU $c_1,$at
2020 $ST $c_2,4*$BNSZ($a0)
2021
2022 mflo $t_1
2023 mfhi $t_2
2024 slt $c_2,$t_2,$zero
2025 $SLL $t_2,1
2026 $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2);
2027 slt $a2,$t_1,$zero
2028 $ADDU $t_2,$a2
2029 $SLL $t_1,1
2030 $ADDU $c_3,$t_1
2031 sltu $at,$c_3,$t_1
2032 $ADDU $t_2,$at
2033 $ADDU $c_1,$t_2
2034 sltu $at,$c_1,$t_2
2035 $ADDU $c_2,$at
2036 mflo $t_1
2037 mfhi $t_2
2038 slt $at,$t_2,$zero
2039 $ADDU $c_2,$at
2040 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
2041 $SLL $t_2,1
2042 slt $a2,$t_1,$zero
2043 $ADDU $t_2,$a2
2044 $SLL $t_1,1
2045 $ADDU $c_3,$t_1
2046 sltu $at,$c_3,$t_1
2047 $ADDU $t_2,$at
2048 $ADDU $c_1,$t_2
2049 sltu $at,$c_1,$t_2
2050 $ADDU $c_2,$at
2051 mflo $t_1
2052 mfhi $t_2
2053 slt $at,$t_2,$zero
2054 $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
2055 $ADDU $c_2,$at
2056 $SLL $t_2,1
2057 slt $a2,$t_1,$zero
2058 $ADDU $t_2,$a2
2059 $SLL $t_1,1
2060 $ADDU $c_3,$t_1
2061 sltu $at,$c_3,$t_1
2062 $ADDU $t_2,$at
2063 $ADDU $c_1,$t_2
2064 sltu $at,$c_1,$t_2
2065 $ADDU $c_2,$at
2066 $ST $c_3,5*$BNSZ($a0)
2067
2068 mflo $t_1
2069 mfhi $t_2
2070 slt $c_3,$t_2,$zero
2071 $SLL $t_2,1
2072 $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3);
2073 slt $a2,$t_1,$zero
2074 $ADDU $t_2,$a2
2075 $SLL $t_1,1
2076 $ADDU $c_1,$t_1
2077 sltu $at,$c_1,$t_1
2078 $ADDU $t_2,$at
2079 $ADDU $c_2,$t_2
2080 sltu $at,$c_2,$t_2
2081 $ADDU $c_3,$at
2082 mflo $t_1
2083 mfhi $t_2
2084 slt $at,$t_2,$zero
2085 $ADDU $c_3,$at
2086 $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
2087 $SLL $t_2,1
2088 slt $a2,$t_1,$zero
2089 $ADDU $t_2,$a2
2090 $SLL $t_1,1
2091 $ADDU $c_1,$t_1
2092 sltu $at,$c_1,$t_1
2093 $ADDU $t_2,$at
2094 $ADDU $c_2,$t_2
2095 sltu $at,$c_2,$t_2
2096 $ADDU $c_3,$at
2097 mflo $t_1
2098 mfhi $t_2
2099 slt $at,$t_2,$zero
2100 $ADDU $c_3,$at
2101 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2102 $SLL $t_2,1
2103 slt $a2,$t_1,$zero
2104 $ADDU $t_2,$a2
2105 $SLL $t_1,1
2106 $ADDU $c_1,$t_1
2107 sltu $at,$c_1,$t_1
2108 $ADDU $t_2,$at
2109 $ADDU $c_2,$t_2
2110 sltu $at,$c_2,$t_2
2111 $ADDU $c_3,$at
2112 mflo $t_1
2113 mfhi $t_2
2114 $ADDU $c_1,$t_1
2115 sltu $at,$c_1,$t_1
2116 $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1);
2117 $ADDU $t_2,$at
2118 $ADDU $c_2,$t_2
2119 sltu $at,$c_2,$t_2
2120 $ADDU $c_3,$at
2121 $ST $c_1,6*$BNSZ($a0)
2122
2123 mflo $t_1
2124 mfhi $t_2
2125 slt $c_1,$t_2,$zero
2126 $SLL $t_2,1
2127 $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1);
2128 slt $a2,$t_1,$zero
2129 $ADDU $t_2,$a2
2130 $SLL $t_1,1
2131 $ADDU $c_2,$t_1
2132 sltu $at,$c_2,$t_1
2133 $ADDU $t_2,$at
2134 $ADDU $c_3,$t_2
2135 sltu $at,$c_3,$t_2
2136 $ADDU $c_1,$at
2137 mflo $t_1
2138 mfhi $t_2
2139 slt $at,$t_2,$zero
2140 $ADDU $c_1,$at
2141 $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
2142 $SLL $t_2,1
2143 slt $a2,$t_1,$zero
2144 $ADDU $t_2,$a2
2145 $SLL $t_1,1
2146 $ADDU $c_2,$t_1
2147 sltu $at,$c_2,$t_1
2148 $ADDU $t_2,$at
2149 $ADDU $c_3,$t_2
2150 sltu $at,$c_3,$t_2
2151 $ADDU $c_1,$at
2152 mflo $t_1
2153 mfhi $t_2
2154 slt $at,$t_2,$zero
2155 $ADDU $c_1,$at
2156 $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
2157 $SLL $t_2,1
2158 slt $a2,$t_1,$zero
2159 $ADDU $t_2,$a2
2160 $SLL $t_1,1
2161 $ADDU $c_2,$t_1
2162 sltu $at,$c_2,$t_1
2163 $ADDU $t_2,$at
2164 $ADDU $c_3,$t_2
2165 sltu $at,$c_3,$t_2
2166 $ADDU $c_1,$at
2167 mflo $t_1
2168 mfhi $t_2
2169 slt $at,$t_2,$zero
2170 $ADDU $c_1,$at
2171 $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
2172 $SLL $t_2,1
2173 slt $a2,$t_1,$zero
2174 $ADDU $t_2,$a2
2175 $SLL $t_1,1
2176 $ADDU $c_2,$t_1
2177 sltu $at,$c_2,$t_1
2178 $ADDU $t_2,$at
2179 $ADDU $c_3,$t_2
2180 sltu $at,$c_3,$t_2
2181 $ADDU $c_1,$at
2182 $ST $c_2,7*$BNSZ($a0)
2183
2184 mflo $t_1
2185 mfhi $t_2
2186 slt $c_2,$t_2,$zero
2187 $SLL $t_2,1
2188 $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2);
2189 slt $a2,$t_1,$zero
2190 $ADDU $t_2,$a2
2191 $SLL $t_1,1
2192 $ADDU $c_3,$t_1
2193 sltu $at,$c_3,$t_1
2194 $ADDU $t_2,$at
2195 $ADDU $c_1,$t_2
2196 sltu $at,$c_1,$t_2
2197 $ADDU $c_2,$at
2198 mflo $t_1
2199 mfhi $t_2
2200 slt $at,$t_2,$zero
2201 $ADDU $c_2,$at
2202 $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
2203 $SLL $t_2,1
2204 slt $a2,$t_1,$zero
2205 $ADDU $t_2,$a2
2206 $SLL $t_1,1
2207 $ADDU $c_3,$t_1
2208 sltu $at,$c_3,$t_1
2209 $ADDU $t_2,$at
2210 $ADDU $c_1,$t_2
2211 sltu $at,$c_1,$t_2
2212 $ADDU $c_2,$at
2213 mflo $t_1
2214 mfhi $t_2
2215 slt $at,$t_2,$zero
2216 $ADDU $c_2,$at
2217 $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
2218 $SLL $t_2,1
2219 slt $a2,$t_1,$zero
2220 $ADDU $t_2,$a2
2221 $SLL $t_1,1
2222 $ADDU $c_3,$t_1
2223 sltu $at,$c_3,$t_1
2224 $ADDU $t_2,$at
2225 $ADDU $c_1,$t_2
2226 sltu $at,$c_1,$t_2
2227 $ADDU $c_2,$at
2228 mflo $t_1
2229 mfhi $t_2
2230 $ADDU $c_3,$t_1
2231 sltu $at,$c_3,$t_1
2232 $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3);
2233 $ADDU $t_2,$at
2234 $ADDU $c_1,$t_2
2235 sltu $at,$c_1,$t_2
2236 $ADDU $c_2,$at
2237 $ST $c_3,8*$BNSZ($a0)
2238
2239 mflo $t_1
2240 mfhi $t_2
2241 slt $c_3,$t_2,$zero
2242 $SLL $t_2,1
2243 $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3);
2244 slt $a2,$t_1,$zero
2245 $ADDU $t_2,$a2
2246 $SLL $t_1,1
2247 $ADDU $c_1,$t_1
2248 sltu $at,$c_1,$t_1
2249 $ADDU $t_2,$at
2250 $ADDU $c_2,$t_2
2251 sltu $at,$c_2,$t_2
2252 $ADDU $c_3,$at
2253 mflo $t_1
2254 mfhi $t_2
2255 slt $at,$t_2,$zero
2256 $ADDU $c_3,$at
2257 $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
2258 $SLL $t_2,1
2259 slt $a2,$t_1,$zero
2260 $ADDU $t_2,$a2
2261 $SLL $t_1,1
2262 $ADDU $c_1,$t_1
2263 sltu $at,$c_1,$t_1
2264 $ADDU $t_2,$at
2265 $ADDU $c_2,$t_2
2266 sltu $at,$c_2,$t_2
2267 $ADDU $c_3,$at
2268 mflo $t_1
2269 mfhi $t_2
2270 slt $at,$t_2,$zero
2271 $ADDU $c_3,$at
2272 $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
2273 $SLL $t_2,1
2274 slt $a2,$t_1,$zero
2275 $ADDU $t_2,$a2
2276 $SLL $t_1,1
2277 $ADDU $c_1,$t_1
2278 sltu $at,$c_1,$t_1
2279 $ADDU $t_2,$at
2280 $ADDU $c_2,$t_2
2281 sltu $at,$c_2,$t_2
2282 $ADDU $c_3,$at
2283 $ST $c_1,9*$BNSZ($a0)
2284
2285 mflo $t_1
2286 mfhi $t_2
2287 slt $c_1,$t_2,$zero
2288 $SLL $t_2,1
2289 $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1);
2290 slt $a2,$t_1,$zero
2291 $ADDU $t_2,$a2
2292 $SLL $t_1,1
2293 $ADDU $c_2,$t_1
2294 sltu $at,$c_2,$t_1
2295 $ADDU $t_2,$at
2296 $ADDU $c_3,$t_2
2297 sltu $at,$c_3,$t_2
2298 $ADDU $c_1,$at
2299 mflo $t_1
2300 mfhi $t_2
2301 slt $at,$t_2,$zero
2302 $ADDU $c_1,$at
2303 $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
2304 $SLL $t_2,1
2305 slt $a2,$t_1,$zero
2306 $ADDU $t_2,$a2
2307 $SLL $t_1,1
2308 $ADDU $c_2,$t_1
2309 sltu $at,$c_2,$t_1
2310 $ADDU $t_2,$at
2311 $ADDU $c_3,$t_2
2312 sltu $at,$c_3,$t_2
2313 $ADDU $c_1,$at
2314 mflo $t_1
2315 mfhi $t_2
2316 $ADDU $c_2,$t_1
2317 sltu $at,$c_2,$t_1
2318 $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2);
2319 $ADDU $t_2,$at
2320 $ADDU $c_3,$t_2
2321 sltu $at,$c_3,$t_2
2322 $ADDU $c_1,$at
2323 $ST $c_2,10*$BNSZ($a0)
2324
2325 mflo $t_1
2326 mfhi $t_2
2327 slt $c_2,$t_2,$zero
2328 $SLL $t_2,1
2329 $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2);
2330 slt $a2,$t_1,$zero
2331 $ADDU $t_2,$a2
2332 $SLL $t_1,1
2333 $ADDU $c_3,$t_1
2334 sltu $at,$c_3,$t_1
2335 $ADDU $t_2,$at
2336 $ADDU $c_1,$t_2
2337 sltu $at,$c_1,$t_2
2338 $ADDU $c_2,$at
2339 mflo $t_1
2340 mfhi $t_2
2341 slt $at,$t_2,$zero
2342 $ADDU $c_2,$at
2343 $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
2344 $SLL $t_2,1
2345 slt $a2,$t_1,$zero
2346 $ADDU $t_2,$a2
2347 $SLL $t_1,1
2348 $ADDU $c_3,$t_1
2349 sltu $at,$c_3,$t_1
2350 $ADDU $t_2,$at
2351 $ADDU $c_1,$t_2
2352 sltu $at,$c_1,$t_2
2353 $ADDU $c_2,$at
2354 $ST $c_3,11*$BNSZ($a0)
2355
2356 mflo $t_1
2357 mfhi $t_2
2358 slt $c_3,$t_2,$zero
2359 $SLL $t_2,1
2360 $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
2361 slt $a2,$t_1,$zero
2362 $ADDU $t_2,$a2
2363 $SLL $t_1,1
2364 $ADDU $c_1,$t_1
2365 sltu $at,$c_1,$t_1
2366 $ADDU $t_2,$at
2367 $ADDU $c_2,$t_2
2368 sltu $at,$c_2,$t_2
2369 $ADDU $c_3,$at
2370 mflo $t_1
2371 mfhi $t_2
2372 $ADDU $c_1,$t_1
2373 sltu $at,$c_1,$t_1
2374 $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1);
2375 $ADDU $t_2,$at
2376 $ADDU $c_2,$t_2
2377 sltu $at,$c_2,$t_2
2378 $ADDU $c_3,$at
2379 $ST $c_1,12*$BNSZ($a0)
2380
2381 mflo $t_1
2382 mfhi $t_2
2383 slt $c_1,$t_2,$zero
2384 $SLL $t_2,1
2385 $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
2386 slt $a2,$t_1,$zero
2387 $ADDU $t_2,$a2
2388 $SLL $t_1,1
2389 $ADDU $c_2,$t_1
2390 sltu $at,$c_2,$t_1
2391 $ADDU $t_2,$at
2392 $ADDU $c_3,$t_2
2393 sltu $at,$c_3,$t_2
2394 $ADDU $c_1,$at
2395 $ST $c_2,13*$BNSZ($a0)
2396
2397 mflo $t_1
2398 mfhi $t_2
2399 $ADDU $c_3,$t_1
2400 sltu $at,$c_3,$t_1
2401 $ADDU $t_2,$at
2402 $ADDU $c_1,$t_2
2403 $ST $c_3,14*$BNSZ($a0)
2404 $ST $c_1,15*$BNSZ($a0)
2405
2406 .set noreorder
2407___
2408$code.=<<___ if ($flavour =~ /nubi/i);
2409 $REG_L $t3,4*$SZREG($sp)
2410 $REG_L $t2,3*$SZREG($sp)
2411 $REG_L $t1,2*$SZREG($sp)
2412 $REG_L $t0,1*$SZREG($sp)
2413 $REG_L $gp,0*$SZREG($sp)
2414 $PTR_ADD $sp,6*$SZREG
2415___
2416$code.=<<___;
2417 jr $ra
2418 nop
2419.end bn_sqr_comba8
2420
2421.align 5
2422.globl bn_sqr_comba4
2423.ent bn_sqr_comba4
2424bn_sqr_comba4:
2425___
2426$code.=<<___ if ($flavour =~ /nubi/i);
2427 .frame $sp,6*$SZREG,$ra
2428 .mask 0x8000f008,-$SZREG
2429 .set noreorder
2430 $PTR_SUB $sp,6*$SZREG
2431 $REG_S $ra,5*$SZREG($sp)
2432 $REG_S $t3,4*$SZREG($sp)
2433 $REG_S $t2,3*$SZREG($sp)
2434 $REG_S $t1,2*$SZREG($sp)
2435 $REG_S $t0,1*$SZREG($sp)
2436 $REG_S $gp,0*$SZREG($sp)
2437___
2438$code.=<<___;
2439 .set reorder
2440 $LD $a_0,0($a1)
2441 $LD $a_1,$BNSZ($a1)
2442 $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
2443 $LD $a_2,2*$BNSZ($a1)
2444 $LD $a_3,3*$BNSZ($a1)
2445 mflo $c_1
2446 mfhi $c_2
2447 $ST $c_1,0($a0)
2448
2449 $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
2450 mflo $t_1
2451 mfhi $t_2
2452 slt $c_1,$t_2,$zero
2453 $SLL $t_2,1
2454 $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
2455 slt $a2,$t_1,$zero
2456 $ADDU $t_2,$a2
2457 $SLL $t_1,1
2458 $ADDU $c_2,$t_1
2459 sltu $at,$c_2,$t_1
2460 $ADDU $c_3,$t_2,$at
2461 $ST $c_2,$BNSZ($a0)
2462
2463 mflo $t_1
2464 mfhi $t_2
2465 slt $c_2,$t_2,$zero
2466 $SLL $t_2,1
2467 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
2468 slt $a2,$t_1,$zero
2469 $ADDU $t_2,$a2
2470 $SLL $t_1,1
2471 $ADDU $c_3,$t_1
2472 sltu $at,$c_3,$t_1
2473 $ADDU $t_2,$at
2474 $ADDU $c_1,$t_2
2475 sltu $at,$c_1,$t_2
2476 $ADDU $c_2,$at
2477 mflo $t_1
2478 mfhi $t_2
2479 $ADDU $c_3,$t_1
2480 sltu $at,$c_3,$t_1
2481 $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
2482 $ADDU $t_2,$at
2483 $ADDU $c_1,$t_2
2484 sltu $at,$c_1,$t_2
2485 $ADDU $c_2,$at
2486 $ST $c_3,2*$BNSZ($a0)
2487
2488 mflo $t_1
2489 mfhi $t_2
2490 slt $c_3,$t_2,$zero
2491 $SLL $t_2,1
2492 $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3);
2493 slt $a2,$t_1,$zero
2494 $ADDU $t_2,$a2
2495 $SLL $t_1,1
2496 $ADDU $c_1,$t_1
2497 sltu $at,$c_1,$t_1
2498 $ADDU $t_2,$at
2499 $ADDU $c_2,$t_2
2500 sltu $at,$c_2,$t_2
2501 $ADDU $c_3,$at
2502 mflo $t_1
2503 mfhi $t_2
2504 slt $at,$t_2,$zero
2505 $ADDU $c_3,$at
2506 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
2507 $SLL $t_2,1
2508 slt $a2,$t_1,$zero
2509 $ADDU $t_2,$a2
2510 $SLL $t_1,1
2511 $ADDU $c_1,$t_1
2512 sltu $at,$c_1,$t_1
2513 $ADDU $t_2,$at
2514 $ADDU $c_2,$t_2
2515 sltu $at,$c_2,$t_2
2516 $ADDU $c_3,$at
2517 $ST $c_1,3*$BNSZ($a0)
2518
2519 mflo $t_1
2520 mfhi $t_2
2521 slt $c_1,$t_2,$zero
2522 $SLL $t_2,1
2523 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
2524 slt $a2,$t_1,$zero
2525 $ADDU $t_2,$a2
2526 $SLL $t_1,1
2527 $ADDU $c_2,$t_1
2528 sltu $at,$c_2,$t_1
2529 $ADDU $t_2,$at
2530 $ADDU $c_3,$t_2
2531 sltu $at,$c_3,$t_2
2532 $ADDU $c_1,$at
2533 mflo $t_1
2534 mfhi $t_2
2535 $ADDU $c_2,$t_1
2536 sltu $at,$c_2,$t_1
2537 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
2538 $ADDU $t_2,$at
2539 $ADDU $c_3,$t_2
2540 sltu $at,$c_3,$t_2
2541 $ADDU $c_1,$at
2542 $ST $c_2,4*$BNSZ($a0)
2543
2544 mflo $t_1
2545 mfhi $t_2
2546 slt $c_2,$t_2,$zero
2547 $SLL $t_2,1
2548 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2549 slt $a2,$t_1,$zero
2550 $ADDU $t_2,$a2
2551 $SLL $t_1,1
2552 $ADDU $c_3,$t_1
2553 sltu $at,$c_3,$t_1
2554 $ADDU $t_2,$at
2555 $ADDU $c_1,$t_2
2556 sltu $at,$c_1,$t_2
2557 $ADDU $c_2,$at
2558 $ST $c_3,5*$BNSZ($a0)
2559
2560 mflo $t_1
2561 mfhi $t_2
2562 $ADDU $c_1,$t_1
2563 sltu $at,$c_1,$t_1
2564 $ADDU $t_2,$at
2565 $ADDU $c_2,$t_2
2566 $ST $c_1,6*$BNSZ($a0)
2567 $ST $c_2,7*$BNSZ($a0)
2568
2569 .set noreorder
2570___
2571$code.=<<___ if ($flavour =~ /nubi/i);
2572 $REG_L $t3,4*$SZREG($sp)
2573 $REG_L $t2,3*$SZREG($sp)
2574 $REG_L $t1,2*$SZREG($sp)
2575 $REG_L $t0,1*$SZREG($sp)
2576 $REG_L $gp,0*$SZREG($sp)
2577 $PTR_ADD $sp,6*$SZREG
2578___
2579$code.=<<___;
2580 jr $ra
2581 nop
2582.end bn_sqr_comba4
2583___
2584print $code;
2585close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/modexp512-x86_64.pl b/src/lib/libssl/src/crypto/bn/asm/modexp512-x86_64.pl
new file mode 100644
index 0000000000..54aeb01921
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/modexp512-x86_64.pl
@@ -0,0 +1,1496 @@
1#!/usr/bin/env perl
2#
3# Copyright (c) 2010-2011 Intel Corp.
4# Author: Vinodh.Gopal@intel.com
5# Jim Guilford
6# Erdinc.Ozturk@intel.com
7# Maxim.Perminov@intel.com
8#
9# More information about algorithm used can be found at:
10# http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
11#
12# ====================================================================
13# Copyright (c) 2011 The OpenSSL Project. All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# 1. Redistributions of source code must retain the above copyright
20# notice, this list of conditions and the following disclaimer.
21#
22# 2. Redistributions in binary form must reproduce the above copyright
23# notice, this list of conditions and the following disclaimer in
24# the documentation and/or other materials provided with the
25# distribution.
26#
27# 3. All advertising materials mentioning features or use of this
28# software must display the following acknowledgment:
29# "This product includes software developed by the OpenSSL Project
30# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31#
32# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
33# endorse or promote products derived from this software without
34# prior written permission. For written permission, please contact
35# licensing@OpenSSL.org.
36#
37# 5. Products derived from this software may not be called "OpenSSL"
38# nor may "OpenSSL" appear in their names without prior written
39# permission of the OpenSSL Project.
40#
41# 6. Redistributions of any form whatsoever must retain the following
42# acknowledgment:
43# "This product includes software developed by the OpenSSL Project
44# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45#
46# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
47# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
49# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
50# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
55# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
57# OF THE POSSIBILITY OF SUCH DAMAGE.
58# ====================================================================
59
60$flavour = shift;
61$output = shift;
62if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
63
64my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
65
66$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
68( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
69die "can't locate x86_64-xlate.pl";
70
71open STDOUT,"| $^X $xlate $flavour $output";
72
73use strict;
74my $code=".text\n\n";
75my $m=0;
76
77#
78# Define x512 macros
79#
80
81#MULSTEP_512_ADD MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2
82#
83# uses rax, rdx, and args
84sub MULSTEP_512_ADD
85{
86 my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_;
87 my @X=@$x; # make a copy
88$code.=<<___;
89 mov (+8*0)($SRC2), %rax
90 mul $OP # rdx:rax = %OP * [0]
91 mov ($ASRC), $X[0]
92 add %rax, $X[0]
93 adc \$0, %rdx
94 mov $X[0], $DST
95___
96for(my $i=1;$i<8;$i++) {
97$code.=<<___;
98 mov %rdx, $TMP
99
100 mov (+8*$i)($SRC2), %rax
101 mul $OP # rdx:rax = %OP * [$i]
102 mov (+8*$i)($ASRC), $X[$i]
103 add %rax, $X[$i]
104 adc \$0, %rdx
105 add $TMP, $X[$i]
106 adc \$0, %rdx
107___
108}
109$code.=<<___;
110 mov %rdx, $X[0]
111___
112}
113
114#MULSTEP_512 MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp
115#
116# uses rax, rdx, and args
117sub MULSTEP_512
118{
119 my ($x, $DST, $SRC2, $OP, $TMP)=@_;
120 my @X=@$x; # make a copy
121$code.=<<___;
122 mov (+8*0)($SRC2), %rax
123 mul $OP # rdx:rax = %OP * [0]
124 add %rax, $X[0]
125 adc \$0, %rdx
126 mov $X[0], $DST
127___
128for(my $i=1;$i<8;$i++) {
129$code.=<<___;
130 mov %rdx, $TMP
131
132 mov (+8*$i)($SRC2), %rax
133 mul $OP # rdx:rax = %OP * [$i]
134 add %rax, $X[$i]
135 adc \$0, %rdx
136 add $TMP, $X[$i]
137 adc \$0, %rdx
138___
139}
140$code.=<<___;
141 mov %rdx, $X[0]
142___
143}
144
145#
146# Swizzle Macros
147#
148
149# macro to copy data from flat space to swizzled table
150#MACRO swizzle pDst, pSrc, tmp1, tmp2
151# pDst and pSrc are modified
152sub swizzle
153{
154 my ($pDst, $pSrc, $cnt, $d0)=@_;
155$code.=<<___;
156 mov \$8, $cnt
157loop_$m:
158 mov ($pSrc), $d0
159 mov $d0#w, ($pDst)
160 shr \$16, $d0
161 mov $d0#w, (+64*1)($pDst)
162 shr \$16, $d0
163 mov $d0#w, (+64*2)($pDst)
164 shr \$16, $d0
165 mov $d0#w, (+64*3)($pDst)
166 lea 8($pSrc), $pSrc
167 lea 64*4($pDst), $pDst
168 dec $cnt
169 jnz loop_$m
170___
171
172 $m++;
173}
174
175# macro to copy data from swizzled table to flat space
176#MACRO unswizzle pDst, pSrc, tmp*3
177sub unswizzle
178{
179 my ($pDst, $pSrc, $cnt, $d0, $d1)=@_;
180$code.=<<___;
181 mov \$4, $cnt
182loop_$m:
183 movzxw (+64*3+256*0)($pSrc), $d0
184 movzxw (+64*3+256*1)($pSrc), $d1
185 shl \$16, $d0
186 shl \$16, $d1
187 mov (+64*2+256*0)($pSrc), $d0#w
188 mov (+64*2+256*1)($pSrc), $d1#w
189 shl \$16, $d0
190 shl \$16, $d1
191 mov (+64*1+256*0)($pSrc), $d0#w
192 mov (+64*1+256*1)($pSrc), $d1#w
193 shl \$16, $d0
194 shl \$16, $d1
195 mov (+64*0+256*0)($pSrc), $d0#w
196 mov (+64*0+256*1)($pSrc), $d1#w
197 mov $d0, (+8*0)($pDst)
198 mov $d1, (+8*1)($pDst)
199 lea 256*2($pSrc), $pSrc
200 lea 8*2($pDst), $pDst
201 sub \$1, $cnt
202 jnz loop_$m
203___
204
205 $m++;
206}
207
208#
209# Data Structures
210#
211
212# Reduce Data
213#
214#
215# Offset Value
216# 0C0 Carries
217# 0B8 X2[10]
218# 0B0 X2[9]
219# 0A8 X2[8]
220# 0A0 X2[7]
221# 098 X2[6]
222# 090 X2[5]
223# 088 X2[4]
224# 080 X2[3]
225# 078 X2[2]
226# 070 X2[1]
227# 068 X2[0]
228# 060 X1[12] P[10]
229# 058 X1[11] P[9] Z[8]
230# 050 X1[10] P[8] Z[7]
231# 048 X1[9] P[7] Z[6]
232# 040 X1[8] P[6] Z[5]
233# 038 X1[7] P[5] Z[4]
234# 030 X1[6] P[4] Z[3]
235# 028 X1[5] P[3] Z[2]
236# 020 X1[4] P[2] Z[1]
237# 018 X1[3] P[1] Z[0]
238# 010 X1[2] P[0] Y[2]
239# 008 X1[1] Q[1] Y[1]
240# 000 X1[0] Q[0] Y[0]
241
242my $X1_offset = 0; # 13 qwords
243my $X2_offset = $X1_offset + 13*8; # 11 qwords
244my $Carries_offset = $X2_offset + 11*8; # 1 qword
245my $Q_offset = 0; # 2 qwords
246my $P_offset = $Q_offset + 2*8; # 11 qwords
247my $Y_offset = 0; # 3 qwords
248my $Z_offset = $Y_offset + 3*8; # 9 qwords
249
250my $Red_Data_Size = $Carries_offset + 1*8; # (25 qwords)
251
252#
253# Stack Frame
254#
255#
256# offset value
257# ... <old stack contents>
258# ...
259# 280 Garray
260
261# 278 tmp16[15]
262# ... ...
263# 200 tmp16[0]
264
265# 1F8 tmp[7]
266# ... ...
267# 1C0 tmp[0]
268
269# 1B8 GT[7]
270# ... ...
271# 180 GT[0]
272
273# 178 Reduce Data
274# ... ...
275# 0B8 Reduce Data
276# 0B0 reserved
277# 0A8 reserved
278# 0A0 reserved
279# 098 reserved
280# 090 reserved
281# 088 reduce result addr
282# 080 exp[8]
283
284# ...
285# 048 exp[1]
286# 040 exp[0]
287
288# 038 reserved
289# 030 loop_idx
290# 028 pg
291# 020 i
292# 018 pData ; arg 4
293# 010 pG ; arg 2
294# 008 pResult ; arg 1
295# 000 rsp ; stack pointer before subtract
296
297my $rsp_offset = 0;
298my $pResult_offset = 8*1 + $rsp_offset;
299my $pG_offset = 8*1 + $pResult_offset;
300my $pData_offset = 8*1 + $pG_offset;
301my $i_offset = 8*1 + $pData_offset;
302my $pg_offset = 8*1 + $i_offset;
303my $loop_idx_offset = 8*1 + $pg_offset;
304my $reserved1_offset = 8*1 + $loop_idx_offset;
305my $exp_offset = 8*1 + $reserved1_offset;
306my $red_result_addr_offset= 8*9 + $exp_offset;
307my $reserved2_offset = 8*1 + $red_result_addr_offset;
308my $Reduce_Data_offset = 8*5 + $reserved2_offset;
309my $GT_offset = $Red_Data_Size + $Reduce_Data_offset;
310my $tmp_offset = 8*8 + $GT_offset;
311my $tmp16_offset = 8*8 + $tmp_offset;
312my $garray_offset = 8*16 + $tmp16_offset;
313my $mem_size = 8*8*32 + $garray_offset;
314
315#
316# Offsets within Reduce Data
317#
318#
319# struct MODF_2FOLD_MONT_512_C1_DATA {
320# UINT64 t[8][8];
321# UINT64 m[8];
322# UINT64 m1[8]; /* 2^768 % m */
323# UINT64 m2[8]; /* 2^640 % m */
324# UINT64 k1[2]; /* (- 1/m) % 2^128 */
325# };
326
327my $T = 0;
328my $M = 512; # = 8 * 8 * 8
329my $M1 = 576; # = 8 * 8 * 9 /* += 8 * 8 */
330my $M2 = 640; # = 8 * 8 * 10 /* += 8 * 8 */
331my $K1 = 704; # = 8 * 8 * 11 /* += 8 * 8 */
332
333#
334# FUNCTIONS
335#
336
337{{{
338#
339# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords)
340# and add 512-bits (8 qwords)
341# to get 640 bits (10 qwords)
342# Input: 128-bit mul source: [rdi+8*1], rbp
343# 512-bit mul source: [rsi+8*n]
344# 512-bit add source: r15, r14, ..., r9, r8
345# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0]
346# Clobbers all regs except: rcx, rsi, rdi
347$code.=<<___;
348.type MULADD_128x512,\@abi-omnipotent
349.align 16
350MULADD_128x512:
351___
352 &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx");
353$code.=<<___;
354 mov (+8*1)(%rdi), %rbp
355___
356 &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx");
357$code.=<<___;
358 ret
359.size MULADD_128x512,.-MULADD_128x512
360___
361}}}
362
363{{{
364#MULADD_256x512 MACRO pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0
365#
366# Inputs: pDst: Destination (768 bits, 12 qwords)
367# pA: Multiplicand (1024 bits, 16 qwords)
368# pB: Multiplicand (512 bits, 8 qwords)
369# Dst = Ah * B + Al
370# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits)
371# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0]
372# Uses registers: arguments, RAX, RDX
373sub MULADD_256x512
374{
375 my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_;
376$code.=<<___;
377 mov (+8*12)($pA), $OP
378___
379 &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP);
380 push(@$X,shift(@$X));
381
382$code.=<<___;
383 mov (+8*13)($pA), $OP
384___
385 &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP);
386 push(@$X,shift(@$X));
387
388$code.=<<___;
389 mov (+8*14)($pA), $OP
390___
391 &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP);
392 push(@$X,shift(@$X));
393
394$code.=<<___;
395 mov (+8*15)($pA), $OP
396___
397 &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP);
398 push(@$X,shift(@$X));
399}
400
401#
402# mont_reduce(UINT64 *x, /* 1024 bits, 16 qwords */
403# UINT64 *m, /* 512 bits, 8 qwords */
404# MODF_2FOLD_MONT_512_C1_DATA *data,
405# UINT64 *r) /* 512 bits, 8 qwords */
406# Input: x (number to be reduced): tmp16 (Implicit)
407# m (modulus): [pM] (Implicit)
408# data (reduce data): [pData] (Implicit)
409# Output: r (result): Address in [red_res_addr]
410# result also in: r9, r8, r15, r14, r13, r12, r11, r10
411
412my @X=map("%r$_",(8..15));
413
414$code.=<<___;
415.type mont_reduce,\@abi-omnipotent
416.align 16
417mont_reduce:
418___
419
420my $STACK_DEPTH = 8;
421 #
422 # X1 = Xh * M1 + Xl
423$code.=<<___;
424 lea (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi # pX1 (Dst) 769 bits, 13 qwords
425 mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi # pM1 (Bsrc) 512 bits, 8 qwords
426 add \$$M1, %rsi
427 lea (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx # X (Asrc) 1024 bits, 16 qwords
428
429___
430
431 &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X); # rotates @X 4 times
432 # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0]
433
434$code.=<<___;
435 xor %rax, %rax
436 # X1 += xl
437 add (+8*8)(%rcx), $X[4]
438 adc (+8*9)(%rcx), $X[5]
439 adc (+8*10)(%rcx), $X[6]
440 adc (+8*11)(%rcx), $X[7]
441 adc \$0, %rax
442 # X1 is now rax, r11-r8, r15-r12, tmp16[3:0]
443
444 #
445 # check for carry ;; carry stored in rax
446 mov $X[4], (+8*8)(%rdi) # rdi points to X1
447 mov $X[5], (+8*9)(%rdi)
448 mov $X[6], %rbp
449 mov $X[7], (+8*11)(%rdi)
450
451 mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
452
453 mov (+8*0)(%rdi), $X[4]
454 mov (+8*1)(%rdi), $X[5]
455 mov (+8*2)(%rdi), $X[6]
456 mov (+8*3)(%rdi), $X[7]
457
458 # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8
459 # rdi -> X1
460 # rsi -> M1
461
462 #
463 # X2 = Xh * M2 + Xl
464 # do first part (X2 = Xh * M2)
465 add \$8*10, %rdi # rdi -> pXh ; 128 bits, 2 qwords
466 # Xh is actually { [rdi+8*1], rbp }
467 add \$`$M2-$M1`, %rsi # rsi -> M2
468 lea (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx # rcx -> pX2 ; 641 bits, 11 qwords
469___
470 unshift(@X,pop(@X)); unshift(@X,pop(@X));
471$code.=<<___;
472
473 call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8
474 # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
475 mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax
476
477 # X2 += Xl
478 add (+8*8-8*10)(%rdi), $X[6] # (-8*10) is to adjust rdi -> Xh to Xl
479 adc (+8*9-8*10)(%rdi), $X[7]
480 mov $X[6], (+8*8)(%rcx)
481 mov $X[7], (+8*9)(%rcx)
482
483 adc %rax, %rax
484 mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
485
486 lea (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi # rdi -> pQ ; 128 bits, 2 qwords
487 add \$`$K1-$M2`, %rsi # rsi -> pK1 ; 128 bits, 2 qwords
488
489 # MUL_128x128t128 rdi, rcx, rsi ; Q = X2 * K1 (bottom half)
490 # B1:B0 = rsi[1:0] = K1[1:0]
491 # A1:A0 = rcx[1:0] = X2[1:0]
492 # Result = rdi[1],rbp = Q[1],rbp
493 mov (%rsi), %r8 # B0
494 mov (+8*1)(%rsi), %rbx # B1
495
496 mov (%rcx), %rax # A0
497 mul %r8 # B0
498 mov %rax, %rbp
499 mov %rdx, %r9
500
501 mov (+8*1)(%rcx), %rax # A1
502 mul %r8 # B0
503 add %rax, %r9
504
505 mov (%rcx), %rax # A0
506 mul %rbx # B1
507 add %rax, %r9
508
509 mov %r9, (+8*1)(%rdi)
510 # end MUL_128x128t128
511
512 sub \$`$K1-$M`, %rsi
513
514 mov (%rcx), $X[6]
515 mov (+8*1)(%rcx), $X[7] # r9:r8 = X2[1:0]
516
517 call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8
518 # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
519
520 # load first half of m to rdx, rdi, rbx, rax
521 # moved this here for efficiency
522 mov (+8*0)(%rsi), %rax
523 mov (+8*1)(%rsi), %rbx
524 mov (+8*2)(%rsi), %rdi
525 mov (+8*3)(%rsi), %rdx
526
527 # continue with reduction
528 mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp
529
530 add (+8*8)(%rcx), $X[6]
531 adc (+8*9)(%rcx), $X[7]
532
533 #accumulate the final carry to rbp
534 adc %rbp, %rbp
535
536 # Add in overflow corrections: R = (X2>>128) += T[overflow]
537 # R = {r9, r8, r15, r14, ..., r10}
538 shl \$3, %rbp
539 mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx # rsi -> Data (and points to T)
540 add %rcx, %rbp # pT ; 512 bits, 8 qwords, spread out
541
542 # rsi will be used to generate a mask after the addition
543 xor %rsi, %rsi
544
545 add (+8*8*0)(%rbp), $X[0]
546 adc (+8*8*1)(%rbp), $X[1]
547 adc (+8*8*2)(%rbp), $X[2]
548 adc (+8*8*3)(%rbp), $X[3]
549 adc (+8*8*4)(%rbp), $X[4]
550 adc (+8*8*5)(%rbp), $X[5]
551 adc (+8*8*6)(%rbp), $X[6]
552 adc (+8*8*7)(%rbp), $X[7]
553
554 # if there is a carry: rsi = 0xFFFFFFFFFFFFFFFF
555 # if carry is clear: rsi = 0x0000000000000000
556 sbb \$0, %rsi
557
558 # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
559 and %rsi, %rax
560 and %rsi, %rbx
561 and %rsi, %rdi
562 and %rsi, %rdx
563
564 mov \$1, %rbp
565 sub %rax, $X[0]
566 sbb %rbx, $X[1]
567 sbb %rdi, $X[2]
568 sbb %rdx, $X[3]
569
570 # if there is a borrow: rbp = 0
571 # if there is no borrow: rbp = 1
572 # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m
573 sbb \$0, %rbp
574
575 #load second half of m to rdx, rdi, rbx, rax
576
577 add \$$M, %rcx
578 mov (+8*4)(%rcx), %rax
579 mov (+8*5)(%rcx), %rbx
580 mov (+8*6)(%rcx), %rdi
581 mov (+8*7)(%rcx), %rdx
582
583 # use the rsi mask as before
584 # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
585 and %rsi, %rax
586 and %rsi, %rbx
587 and %rsi, %rdi
588 and %rsi, %rdx
589
590 # if rbp = 0, there was a borrow before, it is moved to the carry flag
591 # if rbp = 1, there was not a borrow before, carry flag is cleared
592 sub \$1, %rbp
593
594 sbb %rax, $X[4]
595 sbb %rbx, $X[5]
596 sbb %rdi, $X[6]
597 sbb %rdx, $X[7]
598
599 # write R back to memory
600
601 mov (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi
602 mov $X[0], (+8*0)(%rsi)
603 mov $X[1], (+8*1)(%rsi)
604 mov $X[2], (+8*2)(%rsi)
605 mov $X[3], (+8*3)(%rsi)
606 mov $X[4], (+8*4)(%rsi)
607 mov $X[5], (+8*5)(%rsi)
608 mov $X[6], (+8*6)(%rsi)
609 mov $X[7], (+8*7)(%rsi)
610
611 ret
612.size mont_reduce,.-mont_reduce
613___
614}}}
615
616{{{
617#MUL_512x512 MACRO pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2
618#
619# Inputs: pDst: Destination (1024 bits, 16 qwords)
620# pA: Multiplicand (512 bits, 8 qwords)
621# pB: Multiplicand (512 bits, 8 qwords)
622# Uses registers rax, rdx, args
623# B operand in [pB] and also in x7...x0
624sub MUL_512x512
625{
626 my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_;
627 my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
628 my @X=@$x; # make a copy
629
630$code.=<<___;
631 mov (+8*0)($pA), $OP
632
633 mov $X[0], %rax
634 mul $OP # rdx:rax = %OP * [0]
635 mov %rax, (+$pDst_o+8*0)($pDst)
636 mov %rdx, $X[0]
637___
638for(my $i=1;$i<8;$i++) {
639$code.=<<___;
640 mov $X[$i], %rax
641 mul $OP # rdx:rax = %OP * [$i]
642 add %rax, $X[$i-1]
643 adc \$0, %rdx
644 mov %rdx, $X[$i]
645___
646}
647
648for(my $i=1;$i<8;$i++) {
649$code.=<<___;
650 mov (+8*$i)($pA), $OP
651___
652
653 &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP);
654 push(@X,shift(@X));
655}
656
657$code.=<<___;
658 mov $X[0], (+$pDst_o+8*8)($pDst)
659 mov $X[1], (+$pDst_o+8*9)($pDst)
660 mov $X[2], (+$pDst_o+8*10)($pDst)
661 mov $X[3], (+$pDst_o+8*11)($pDst)
662 mov $X[4], (+$pDst_o+8*12)($pDst)
663 mov $X[5], (+$pDst_o+8*13)($pDst)
664 mov $X[6], (+$pDst_o+8*14)($pDst)
665 mov $X[7], (+$pDst_o+8*15)($pDst)
666___
667}
668
669#
670# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits)
671# Input: src1: Address of source 1: rdi
672# src2: Address of source 2: rsi
673# Output: dst: Address of destination: [red_res_addr]
674# src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10
675# Temp: Clobbers [tmp16], all registers
676$code.=<<___;
677.type mont_mul_a3b,\@abi-omnipotent
678.align 16
679mont_mul_a3b:
680 #
681 # multiply tmp = src1 * src2
682 # For multiply: dst = rcx, src1 = rdi, src2 = rsi
683 # stack depth is extra 8 from call
684___
685 &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx");
686$code.=<<___;
687 #
688 # Dst = tmp % m
689 # Call reduce(tmp, m, data, dst)
690
691 # tail recursion optimization: jmp to mont_reduce and return from there
692 jmp mont_reduce
693 # call mont_reduce
694 # ret
695.size mont_mul_a3b,.-mont_mul_a3b
696___
697}}}
698
699{{{
700#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4
701#
702# Input in memory [pA] and also in x7...x0
703# Uses all argument registers plus rax and rdx
704#
705# This version computes all of the off-diagonal terms into memory,
706# and then it adds in the diagonal terms
707
708sub SQR_512
709{
710 my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_;
711 my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
712 my @X=@$x; # make a copy
713$code.=<<___;
714 # ------------------
715 # first pass 01...07
716 # ------------------
717 mov $X[0], $A
718
719 mov $X[1],%rax
720 mul $A
721 mov %rax, (+$pDst_o+8*1)($pDst)
722___
723for(my $i=2;$i<8;$i++) {
724$code.=<<___;
725 mov %rdx, $X[$i-2]
726 mov $X[$i],%rax
727 mul $A
728 add %rax, $X[$i-2]
729 adc \$0, %rdx
730___
731}
732$code.=<<___;
733 mov %rdx, $x7
734
735 mov $X[0], (+$pDst_o+8*2)($pDst)
736
737 # ------------------
738 # second pass 12...17
739 # ------------------
740
741 mov (+8*1)($pA), $A
742
743 mov (+8*2)($pA),%rax
744 mul $A
745 add %rax, $X[1]
746 adc \$0, %rdx
747 mov $X[1], (+$pDst_o+8*3)($pDst)
748
749 mov %rdx, $X[0]
750 mov (+8*3)($pA),%rax
751 mul $A
752 add %rax, $X[2]
753 adc \$0, %rdx
754 add $X[0], $X[2]
755 adc \$0, %rdx
756 mov $X[2], (+$pDst_o+8*4)($pDst)
757
758 mov %rdx, $X[0]
759 mov (+8*4)($pA),%rax
760 mul $A
761 add %rax, $X[3]
762 adc \$0, %rdx
763 add $X[0], $X[3]
764 adc \$0, %rdx
765
766 mov %rdx, $X[0]
767 mov (+8*5)($pA),%rax
768 mul $A
769 add %rax, $X[4]
770 adc \$0, %rdx
771 add $X[0], $X[4]
772 adc \$0, %rdx
773
774 mov %rdx, $X[0]
775 mov $X[6],%rax
776 mul $A
777 add %rax, $X[5]
778 adc \$0, %rdx
779 add $X[0], $X[5]
780 adc \$0, %rdx
781
782 mov %rdx, $X[0]
783 mov $X[7],%rax
784 mul $A
785 add %rax, $x7
786 adc \$0, %rdx
787 add $X[0], $x7
788 adc \$0, %rdx
789
790 mov %rdx, $X[1]
791
792 # ------------------
793 # third pass 23...27
794 # ------------------
795 mov (+8*2)($pA), $A
796
797 mov (+8*3)($pA),%rax
798 mul $A
799 add %rax, $X[3]
800 adc \$0, %rdx
801 mov $X[3], (+$pDst_o+8*5)($pDst)
802
803 mov %rdx, $X[0]
804 mov (+8*4)($pA),%rax
805 mul $A
806 add %rax, $X[4]
807 adc \$0, %rdx
808 add $X[0], $X[4]
809 adc \$0, %rdx
810 mov $X[4], (+$pDst_o+8*6)($pDst)
811
812 mov %rdx, $X[0]
813 mov (+8*5)($pA),%rax
814 mul $A
815 add %rax, $X[5]
816 adc \$0, %rdx
817 add $X[0], $X[5]
818 adc \$0, %rdx
819
820 mov %rdx, $X[0]
821 mov $X[6],%rax
822 mul $A
823 add %rax, $x7
824 adc \$0, %rdx
825 add $X[0], $x7
826 adc \$0, %rdx
827
828 mov %rdx, $X[0]
829 mov $X[7],%rax
830 mul $A
831 add %rax, $X[1]
832 adc \$0, %rdx
833 add $X[0], $X[1]
834 adc \$0, %rdx
835
836 mov %rdx, $X[2]
837
838 # ------------------
839 # fourth pass 34...37
840 # ------------------
841
842 mov (+8*3)($pA), $A
843
844 mov (+8*4)($pA),%rax
845 mul $A
846 add %rax, $X[5]
847 adc \$0, %rdx
848 mov $X[5], (+$pDst_o+8*7)($pDst)
849
850 mov %rdx, $X[0]
851 mov (+8*5)($pA),%rax
852 mul $A
853 add %rax, $x7
854 adc \$0, %rdx
855 add $X[0], $x7
856 adc \$0, %rdx
857 mov $x7, (+$pDst_o+8*8)($pDst)
858
859 mov %rdx, $X[0]
860 mov $X[6],%rax
861 mul $A
862 add %rax, $X[1]
863 adc \$0, %rdx
864 add $X[0], $X[1]
865 adc \$0, %rdx
866
867 mov %rdx, $X[0]
868 mov $X[7],%rax
869 mul $A
870 add %rax, $X[2]
871 adc \$0, %rdx
872 add $X[0], $X[2]
873 adc \$0, %rdx
874
875 mov %rdx, $X[5]
876
877 # ------------------
878 # fifth pass 45...47
879 # ------------------
880 mov (+8*4)($pA), $A
881
882 mov (+8*5)($pA),%rax
883 mul $A
884 add %rax, $X[1]
885 adc \$0, %rdx
886 mov $X[1], (+$pDst_o+8*9)($pDst)
887
888 mov %rdx, $X[0]
889 mov $X[6],%rax
890 mul $A
891 add %rax, $X[2]
892 adc \$0, %rdx
893 add $X[0], $X[2]
894 adc \$0, %rdx
895 mov $X[2], (+$pDst_o+8*10)($pDst)
896
897 mov %rdx, $X[0]
898 mov $X[7],%rax
899 mul $A
900 add %rax, $X[5]
901 adc \$0, %rdx
902 add $X[0], $X[5]
903 adc \$0, %rdx
904
905 mov %rdx, $X[1]
906
907 # ------------------
908 # sixth pass 56...57
909 # ------------------
910 mov (+8*5)($pA), $A
911
912 mov $X[6],%rax
913 mul $A
914 add %rax, $X[5]
915 adc \$0, %rdx
916 mov $X[5], (+$pDst_o+8*11)($pDst)
917
918 mov %rdx, $X[0]
919 mov $X[7],%rax
920 mul $A
921 add %rax, $X[1]
922 adc \$0, %rdx
923 add $X[0], $X[1]
924 adc \$0, %rdx
925 mov $X[1], (+$pDst_o+8*12)($pDst)
926
927 mov %rdx, $X[2]
928
929 # ------------------
930 # seventh pass 67
931 # ------------------
932 mov $X[6], $A
933
934 mov $X[7],%rax
935 mul $A
936 add %rax, $X[2]
937 adc \$0, %rdx
938 mov $X[2], (+$pDst_o+8*13)($pDst)
939
940 mov %rdx, (+$pDst_o+8*14)($pDst)
941
942 # start finalize (add in squares, and double off-terms)
943 mov (+$pDst_o+8*1)($pDst), $X[0]
944 mov (+$pDst_o+8*2)($pDst), $X[1]
945 mov (+$pDst_o+8*3)($pDst), $X[2]
946 mov (+$pDst_o+8*4)($pDst), $X[3]
947 mov (+$pDst_o+8*5)($pDst), $X[4]
948 mov (+$pDst_o+8*6)($pDst), $X[5]
949
950 mov (+8*3)($pA), %rax
951 mul %rax
952 mov %rax, $x6
953 mov %rdx, $X[6]
954
955 add $X[0], $X[0]
956 adc $X[1], $X[1]
957 adc $X[2], $X[2]
958 adc $X[3], $X[3]
959 adc $X[4], $X[4]
960 adc $X[5], $X[5]
961 adc \$0, $X[6]
962
963 mov (+8*0)($pA), %rax
964 mul %rax
965 mov %rax, (+$pDst_o+8*0)($pDst)
966 mov %rdx, $A
967
968 mov (+8*1)($pA), %rax
969 mul %rax
970
971 add $A, $X[0]
972 adc %rax, $X[1]
973 adc \$0, %rdx
974
975 mov %rdx, $A
976 mov $X[0], (+$pDst_o+8*1)($pDst)
977 mov $X[1], (+$pDst_o+8*2)($pDst)
978
979 mov (+8*2)($pA), %rax
980 mul %rax
981
982 add $A, $X[2]
983 adc %rax, $X[3]
984 adc \$0, %rdx
985
986 mov %rdx, $A
987
988 mov $X[2], (+$pDst_o+8*3)($pDst)
989 mov $X[3], (+$pDst_o+8*4)($pDst)
990
991 xor $tmp, $tmp
992 add $A, $X[4]
993 adc $x6, $X[5]
994 adc \$0, $tmp
995
996 mov $X[4], (+$pDst_o+8*5)($pDst)
997 mov $X[5], (+$pDst_o+8*6)($pDst)
998
999 # %%tmp has 0/1 in column 7
1000 # %%A6 has a full value in column 7
1001
1002 mov (+$pDst_o+8*7)($pDst), $X[0]
1003 mov (+$pDst_o+8*8)($pDst), $X[1]
1004 mov (+$pDst_o+8*9)($pDst), $X[2]
1005 mov (+$pDst_o+8*10)($pDst), $X[3]
1006 mov (+$pDst_o+8*11)($pDst), $X[4]
1007 mov (+$pDst_o+8*12)($pDst), $X[5]
1008 mov (+$pDst_o+8*13)($pDst), $x6
1009 mov (+$pDst_o+8*14)($pDst), $x7
1010
1011 mov $X[7], %rax
1012 mul %rax
1013 mov %rax, $X[7]
1014 mov %rdx, $A
1015
1016 add $X[0], $X[0]
1017 adc $X[1], $X[1]
1018 adc $X[2], $X[2]
1019 adc $X[3], $X[3]
1020 adc $X[4], $X[4]
1021 adc $X[5], $X[5]
1022 adc $x6, $x6
1023 adc $x7, $x7
1024 adc \$0, $A
1025
1026 add $tmp, $X[0]
1027
1028 mov (+8*4)($pA), %rax
1029 mul %rax
1030
1031 add $X[6], $X[0]
1032 adc %rax, $X[1]
1033 adc \$0, %rdx
1034
1035 mov %rdx, $tmp
1036
1037 mov $X[0], (+$pDst_o+8*7)($pDst)
1038 mov $X[1], (+$pDst_o+8*8)($pDst)
1039
1040 mov (+8*5)($pA), %rax
1041 mul %rax
1042
1043 add $tmp, $X[2]
1044 adc %rax, $X[3]
1045 adc \$0, %rdx
1046
1047 mov %rdx, $tmp
1048
1049 mov $X[2], (+$pDst_o+8*9)($pDst)
1050 mov $X[3], (+$pDst_o+8*10)($pDst)
1051
1052 mov (+8*6)($pA), %rax
1053 mul %rax
1054
1055 add $tmp, $X[4]
1056 adc %rax, $X[5]
1057 adc \$0, %rdx
1058
1059 mov $X[4], (+$pDst_o+8*11)($pDst)
1060 mov $X[5], (+$pDst_o+8*12)($pDst)
1061
1062 add %rdx, $x6
1063 adc $X[7], $x7
1064 adc \$0, $A
1065
1066 mov $x6, (+$pDst_o+8*13)($pDst)
1067 mov $x7, (+$pDst_o+8*14)($pDst)
1068 mov $A, (+$pDst_o+8*15)($pDst)
1069___
1070}
1071
1072#
1073# sqr_reduce: subroutine to compute Result = reduce(Result * Result)
1074#
1075# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10
1076#
1077$code.=<<___;
1078.type sqr_reduce,\@abi-omnipotent
1079.align 16
1080sqr_reduce:
1081 mov (+$pResult_offset+8)(%rsp), %rcx
1082___
1083 &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi");
1084$code.=<<___;
1085 # tail recursion optimization: jmp to mont_reduce and return from there
1086 jmp mont_reduce
1087 # call mont_reduce
1088 # ret
1089.size sqr_reduce,.-sqr_reduce
1090___
1091}}}
1092
1093#
1094# MAIN FUNCTION
1095#
1096
1097#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
1098# UINT64 *g, /* 512 bits, 8 qwords */
1099# UINT64 *exp, /* 512 bits, 8 qwords */
1100# struct mod_ctx_512 *data)
1101
1102# window size = 5
1103# table size = 2^5 = 32
1104#table_entries equ 32
1105#table_size equ table_entries * 8
1106$code.=<<___;
1107.globl mod_exp_512
1108.type mod_exp_512,\@function,4
1109mod_exp_512:
1110 push %rbp
1111 push %rbx
1112 push %r12
1113 push %r13
1114 push %r14
1115 push %r15
1116
1117 # adjust stack down and then align it with cache boundary
1118 mov %rsp, %r8
1119 sub \$$mem_size, %rsp
1120 and \$-64, %rsp
1121
1122 # store previous stack pointer and arguments
1123 mov %r8, (+$rsp_offset)(%rsp)
1124 mov %rdi, (+$pResult_offset)(%rsp)
1125 mov %rsi, (+$pG_offset)(%rsp)
1126 mov %rcx, (+$pData_offset)(%rsp)
1127.Lbody:
1128 # transform g into montgomery space
1129 # GT = reduce(g * C2) = reduce(g * (2^256))
1130 # reduce expects to have the input in [tmp16]
1131 pxor %xmm4, %xmm4
1132 movdqu (+16*0)(%rsi), %xmm0
1133 movdqu (+16*1)(%rsi), %xmm1
1134 movdqu (+16*2)(%rsi), %xmm2
1135 movdqu (+16*3)(%rsi), %xmm3
1136 movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp)
1137 movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp)
1138 movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
1139 movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
1140 movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp)
1141 movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp)
1142 movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp)
1143 movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp)
1144
1145 # load pExp before rdx gets blown away
1146 movdqu (+16*0)(%rdx), %xmm0
1147 movdqu (+16*1)(%rdx), %xmm1
1148 movdqu (+16*2)(%rdx), %xmm2
1149 movdqu (+16*3)(%rdx), %xmm3
1150
1151 lea (+$GT_offset)(%rsp), %rbx
1152 mov %rbx, (+$red_result_addr_offset)(%rsp)
1153 call mont_reduce
1154
1155 # Initialize tmp = C
1156 lea (+$tmp_offset)(%rsp), %rcx
1157 xor %rax, %rax
1158 mov %rax, (+8*0)(%rcx)
1159 mov %rax, (+8*1)(%rcx)
1160 mov %rax, (+8*3)(%rcx)
1161 mov %rax, (+8*4)(%rcx)
1162 mov %rax, (+8*5)(%rcx)
1163 mov %rax, (+8*6)(%rcx)
1164 mov %rax, (+8*7)(%rcx)
1165 mov %rax, (+$exp_offset+8*8)(%rsp)
1166 movq \$1, (+8*2)(%rcx)
1167
1168 lea (+$garray_offset)(%rsp), %rbp
1169 mov %rcx, %rsi # pTmp
1170 mov %rbp, %rdi # Garray[][0]
1171___
1172
1173 &swizzle("%rdi", "%rcx", "%rax", "%rbx");
1174
1175 # for (rax = 31; rax != 0; rax--) {
1176 # tmp = reduce(tmp * G)
1177 # swizzle(pg, tmp);
1178 # pg += 2; }
1179$code.=<<___;
1180 mov \$31, %rax
1181 mov %rax, (+$i_offset)(%rsp)
1182 mov %rbp, (+$pg_offset)(%rsp)
1183 # rsi -> pTmp
1184 mov %rsi, (+$red_result_addr_offset)(%rsp)
1185 mov (+8*0)(%rsi), %r10
1186 mov (+8*1)(%rsi), %r11
1187 mov (+8*2)(%rsi), %r12
1188 mov (+8*3)(%rsi), %r13
1189 mov (+8*4)(%rsi), %r14
1190 mov (+8*5)(%rsi), %r15
1191 mov (+8*6)(%rsi), %r8
1192 mov (+8*7)(%rsi), %r9
1193init_loop:
1194 lea (+$GT_offset)(%rsp), %rdi
1195 call mont_mul_a3b
1196 lea (+$tmp_offset)(%rsp), %rsi
1197 mov (+$pg_offset)(%rsp), %rbp
1198 add \$2, %rbp
1199 mov %rbp, (+$pg_offset)(%rsp)
1200 mov %rsi, %rcx # rcx = rsi = addr of tmp
1201___
1202
1203 &swizzle("%rbp", "%rcx", "%rax", "%rbx");
1204$code.=<<___;
1205 mov (+$i_offset)(%rsp), %rax
1206 sub \$1, %rax
1207 mov %rax, (+$i_offset)(%rsp)
1208 jne init_loop
1209
1210 #
1211 # Copy exponent onto stack
1212 movdqa %xmm0, (+$exp_offset+16*0)(%rsp)
1213 movdqa %xmm1, (+$exp_offset+16*1)(%rsp)
1214 movdqa %xmm2, (+$exp_offset+16*2)(%rsp)
1215 movdqa %xmm3, (+$exp_offset+16*3)(%rsp)
1216
1217
1218 #
1219 # Do exponentiation
1220 # Initialize result to G[exp{511:507}]
1221 mov (+$exp_offset+62)(%rsp), %eax
1222 mov %rax, %rdx
1223 shr \$11, %rax
1224 and \$0x07FF, %edx
1225 mov %edx, (+$exp_offset+62)(%rsp)
1226 lea (+$garray_offset)(%rsp,%rax,2), %rsi
1227 mov (+$pResult_offset)(%rsp), %rdx
1228___
1229
1230 &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
1231
1232 #
1233 # Loop variables
1234 # rcx = [loop_idx] = index: 510-5 to 0 by 5
1235$code.=<<___;
1236 movq \$505, (+$loop_idx_offset)(%rsp)
1237
1238 mov (+$pResult_offset)(%rsp), %rcx
1239 mov %rcx, (+$red_result_addr_offset)(%rsp)
1240 mov (+8*0)(%rcx), %r10
1241 mov (+8*1)(%rcx), %r11
1242 mov (+8*2)(%rcx), %r12
1243 mov (+8*3)(%rcx), %r13
1244 mov (+8*4)(%rcx), %r14
1245 mov (+8*5)(%rcx), %r15
1246 mov (+8*6)(%rcx), %r8
1247 mov (+8*7)(%rcx), %r9
1248 jmp sqr_2
1249
1250main_loop_a3b:
1251 call sqr_reduce
1252 call sqr_reduce
1253 call sqr_reduce
1254sqr_2:
1255 call sqr_reduce
1256 call sqr_reduce
1257
1258 #
1259 # Do multiply, first look up proper value in Garray
1260 mov (+$loop_idx_offset)(%rsp), %rcx # bit index
1261 mov %rcx, %rax
1262 shr \$4, %rax # rax is word pointer
1263 mov (+$exp_offset)(%rsp,%rax,2), %edx
1264 and \$15, %rcx
1265 shrq %cl, %rdx
1266 and \$0x1F, %rdx
1267
1268 lea (+$garray_offset)(%rsp,%rdx,2), %rsi
1269 lea (+$tmp_offset)(%rsp), %rdx
1270 mov %rdx, %rdi
1271___
1272
1273 &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
1274 # rdi = tmp = pG
1275
1276 #
1277 # Call mod_mul_a1(pDst, pSrc1, pSrc2, pM, pData)
1278 # result result pG M Data
1279$code.=<<___;
1280 mov (+$pResult_offset)(%rsp), %rsi
1281 call mont_mul_a3b
1282
1283 #
1284 # finish loop
1285 mov (+$loop_idx_offset)(%rsp), %rcx
1286 sub \$5, %rcx
1287 mov %rcx, (+$loop_idx_offset)(%rsp)
1288 jge main_loop_a3b
1289
1290 #
1291
1292end_main_loop_a3b:
1293 # transform result out of Montgomery space
1294 # result = reduce(result)
1295 mov (+$pResult_offset)(%rsp), %rdx
1296 pxor %xmm4, %xmm4
1297 movdqu (+16*0)(%rdx), %xmm0
1298 movdqu (+16*1)(%rdx), %xmm1
1299 movdqu (+16*2)(%rdx), %xmm2
1300 movdqu (+16*3)(%rdx), %xmm3
1301 movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp)
1302 movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp)
1303 movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
1304 movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
1305 movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp)
1306 movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp)
1307 movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp)
1308 movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
1309 call mont_reduce
1310
1311 # If result > m, subract m
1312 # load result into r15:r8
1313 mov (+$pResult_offset)(%rsp), %rax
1314 mov (+8*0)(%rax), %r8
1315 mov (+8*1)(%rax), %r9
1316 mov (+8*2)(%rax), %r10
1317 mov (+8*3)(%rax), %r11
1318 mov (+8*4)(%rax), %r12
1319 mov (+8*5)(%rax), %r13
1320 mov (+8*6)(%rax), %r14
1321 mov (+8*7)(%rax), %r15
1322
1323 # subtract m
1324 mov (+$pData_offset)(%rsp), %rbx
1325 add \$$M, %rbx
1326
1327 sub (+8*0)(%rbx), %r8
1328 sbb (+8*1)(%rbx), %r9
1329 sbb (+8*2)(%rbx), %r10
1330 sbb (+8*3)(%rbx), %r11
1331 sbb (+8*4)(%rbx), %r12
1332 sbb (+8*5)(%rbx), %r13
1333 sbb (+8*6)(%rbx), %r14
1334 sbb (+8*7)(%rbx), %r15
1335
1336 # if Carry is clear, replace result with difference
1337 mov (+8*0)(%rax), %rsi
1338 mov (+8*1)(%rax), %rdi
1339 mov (+8*2)(%rax), %rcx
1340 mov (+8*3)(%rax), %rdx
1341 cmovnc %r8, %rsi
1342 cmovnc %r9, %rdi
1343 cmovnc %r10, %rcx
1344 cmovnc %r11, %rdx
1345 mov %rsi, (+8*0)(%rax)
1346 mov %rdi, (+8*1)(%rax)
1347 mov %rcx, (+8*2)(%rax)
1348 mov %rdx, (+8*3)(%rax)
1349
1350 mov (+8*4)(%rax), %rsi
1351 mov (+8*5)(%rax), %rdi
1352 mov (+8*6)(%rax), %rcx
1353 mov (+8*7)(%rax), %rdx
1354 cmovnc %r12, %rsi
1355 cmovnc %r13, %rdi
1356 cmovnc %r14, %rcx
1357 cmovnc %r15, %rdx
1358 mov %rsi, (+8*4)(%rax)
1359 mov %rdi, (+8*5)(%rax)
1360 mov %rcx, (+8*6)(%rax)
1361 mov %rdx, (+8*7)(%rax)
1362
1363 mov (+$rsp_offset)(%rsp), %rsi
1364 mov 0(%rsi),%r15
1365 mov 8(%rsi),%r14
1366 mov 16(%rsi),%r13
1367 mov 24(%rsi),%r12
1368 mov 32(%rsi),%rbx
1369 mov 40(%rsi),%rbp
1370 lea 48(%rsi),%rsp
1371.Lepilogue:
1372 ret
1373.size mod_exp_512, . - mod_exp_512
1374___
1375
1376if ($win64) {
1377# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1378# CONTEXT *context,DISPATCHER_CONTEXT *disp)
1379my $rec="%rcx";
1380my $frame="%rdx";
1381my $context="%r8";
1382my $disp="%r9";
1383
1384$code.=<<___;
1385.extern __imp_RtlVirtualUnwind
1386.type mod_exp_512_se_handler,\@abi-omnipotent
1387.align 16
1388mod_exp_512_se_handler:
1389 push %rsi
1390 push %rdi
1391 push %rbx
1392 push %rbp
1393 push %r12
1394 push %r13
1395 push %r14
1396 push %r15
1397 pushfq
1398 sub \$64,%rsp
1399
1400 mov 120($context),%rax # pull context->Rax
1401 mov 248($context),%rbx # pull context->Rip
1402
1403 lea .Lbody(%rip),%r10
1404 cmp %r10,%rbx # context->Rip<prologue label
1405 jb .Lin_prologue
1406
1407 mov 152($context),%rax # pull context->Rsp
1408
1409 lea .Lepilogue(%rip),%r10
1410 cmp %r10,%rbx # context->Rip>=epilogue label
1411 jae .Lin_prologue
1412
1413 mov $rsp_offset(%rax),%rax # pull saved Rsp
1414
1415 mov 32(%rax),%rbx
1416 mov 40(%rax),%rbp
1417 mov 24(%rax),%r12
1418 mov 16(%rax),%r13
1419 mov 8(%rax),%r14
1420 mov 0(%rax),%r15
1421 lea 48(%rax),%rax
1422 mov %rbx,144($context) # restore context->Rbx
1423 mov %rbp,160($context) # restore context->Rbp
1424 mov %r12,216($context) # restore context->R12
1425 mov %r13,224($context) # restore context->R13
1426 mov %r14,232($context) # restore context->R14
1427 mov %r15,240($context) # restore context->R15
1428
1429.Lin_prologue:
1430 mov 8(%rax),%rdi
1431 mov 16(%rax),%rsi
1432 mov %rax,152($context) # restore context->Rsp
1433 mov %rsi,168($context) # restore context->Rsi
1434 mov %rdi,176($context) # restore context->Rdi
1435
1436 mov 40($disp),%rdi # disp->ContextRecord
1437 mov $context,%rsi # context
1438 mov \$154,%ecx # sizeof(CONTEXT)
1439 .long 0xa548f3fc # cld; rep movsq
1440
1441 mov $disp,%rsi
1442 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1443 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1444 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1445 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1446 mov 40(%rsi),%r10 # disp->ContextRecord
1447 lea 56(%rsi),%r11 # &disp->HandlerData
1448 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1449 mov %r10,32(%rsp) # arg5
1450 mov %r11,40(%rsp) # arg6
1451 mov %r12,48(%rsp) # arg7
1452 mov %rcx,56(%rsp) # arg8, (NULL)
1453 call *__imp_RtlVirtualUnwind(%rip)
1454
1455 mov \$1,%eax # ExceptionContinueSearch
1456 add \$64,%rsp
1457 popfq
1458 pop %r15
1459 pop %r14
1460 pop %r13
1461 pop %r12
1462 pop %rbp
1463 pop %rbx
1464 pop %rdi
1465 pop %rsi
1466 ret
1467.size mod_exp_512_se_handler,.-mod_exp_512_se_handler
1468
1469.section .pdata
1470.align 4
1471 .rva .LSEH_begin_mod_exp_512
1472 .rva .LSEH_end_mod_exp_512
1473 .rva .LSEH_info_mod_exp_512
1474
1475.section .xdata
1476.align 8
1477.LSEH_info_mod_exp_512:
1478 .byte 9,0,0,0
1479 .rva mod_exp_512_se_handler
1480___
1481}
1482
1483sub reg_part {
1484my ($reg,$conv)=@_;
1485 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
1486 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
1487 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
1488 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
1489 return $reg;
1490}
1491
1492$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
1493$code =~ s/\`([^\`]*)\`/eval $1/gem;
1494$code =~ s/(\(\+[^)]+\))/eval $1/gem;
1495print $code;
1496close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/parisc-mont.pl b/src/lib/libssl/src/crypto/bn/asm/parisc-mont.pl
new file mode 100644
index 0000000000..4a766a87fb
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/parisc-mont.pl
@@ -0,0 +1,993 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# On PA-7100LC this module performs ~90-50% better, less for longer
11# keys, than code generated by gcc 3.2 for PA-RISC 1.1. Latter means
12# that compiler utilized xmpyu instruction to perform 32x32=64-bit
13# multiplication, which in turn means that "baseline" performance was
14# optimal in respect to instruction set capabilities. Fair comparison
15# with vendor compiler is problematic, because OpenSSL doesn't define
16# BN_LLONG [presumably] for historical reasons, which drives compiler
17# toward 4 times 16x16=32-bit multiplicatons [plus complementary
18# shifts and additions] instead. This means that you should observe
19# several times improvement over code generated by vendor compiler
20# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
21# improvement coefficient was never collected on PA-7100LC, or any
22# other 1.1 CPU, because I don't have access to such machine with
23# vendor compiler. But to give you a taste, PA-RISC 1.1 code path
24# reportedly outperformed code generated by cc +DA1.1 +O3 by factor
25# of ~5x on PA-8600.
26#
27# On PA-RISC 2.0 it has to compete with pa-risc2[W].s, which is
28# reportedly ~2x faster than vendor compiler generated code [according
29# to comment in pa-risc2[W].s]. Here comes a catch. Execution core of
30# this implementation is actually 32-bit one, in the sense that it
31# operates on 32-bit values. But pa-risc2[W].s operates on arrays of
32# 64-bit BN_LONGs... How do they interoperate then? No problem. This
33# module picks halves of 64-bit values in reverse order and pretends
34# they were 32-bit BN_LONGs. But can 32-bit core compete with "pure"
35# 64-bit code such as pa-risc2[W].s then? Well, the thing is that
36# 32x32=64-bit multiplication is the best even PA-RISC 2.0 can do,
37# i.e. there is no "wider" multiplication like on most other 64-bit
38# platforms. This means that even being effectively 32-bit, this
39# implementation performs "64-bit" computational task in same amount
40# of arithmetic operations, most notably multiplications. It requires
41# more memory references, most notably to tp[num], but this doesn't
42# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC
43# 2.0 code path, provides virtually same performance as pa-risc2[W].s:
44# it's ~10% better for shortest key length and ~10% worse for longest
45# one.
46#
47# In case it wasn't clear. The module has two distinct code paths:
48# PA-RISC 1.1 and PA-RISC 2.0 ones. Latter features carry-free 64-bit
49# additions and 64-bit integer loads, not to mention specific
50# instruction scheduling. In 64-bit build naturally only 2.0 code path
51# is assembled. In 32-bit application context both code paths are
52# assembled, PA-RISC 2.0 CPU is detected at run-time and proper path
53# is taken automatically. Also, in 32-bit build the module imposes
54# couple of limitations: vector lengths has to be even and vector
55# addresses has to be 64-bit aligned. Normally neither is a problem:
56# most common key lengths are even and vectors are commonly malloc-ed,
57# which ensures alignment.
58#
59# Special thanks to polarhome.com for providing HP-UX account on
60# PA-RISC 1.1 machine, and to correspondent who chose to remain
61# anonymous for testing the code on PA-RISC 2.0 machine.
62
63$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
64
65$flavour = shift;
66$output = shift;
67
68open STDOUT,">$output";
69
70if ($flavour =~ /64/) {
71 $LEVEL ="2.0W";
72 $SIZE_T =8;
73 $FRAME_MARKER =80;
74 $SAVED_RP =16;
75 $PUSH ="std";
76 $PUSHMA ="std,ma";
77 $POP ="ldd";
78 $POPMB ="ldd,mb";
79 $BN_SZ =$SIZE_T;
80} else {
81 $LEVEL ="1.1"; #$LEVEL.="\n\t.ALLOW\t2.0";
82 $SIZE_T =4;
83 $FRAME_MARKER =48;
84 $SAVED_RP =20;
85 $PUSH ="stw";
86 $PUSHMA ="stwm";
87 $POP ="ldw";
88 $POPMB ="ldwm";
89 $BN_SZ =$SIZE_T;
90 if (open CONF,"<${dir}../../opensslconf.h") {
91 while(<CONF>) {
92 if (m/#\s*define\s+SIXTY_FOUR_BIT/) {
93 $BN_SZ=8;
94 $LEVEL="2.0";
95 last;
96 }
97 }
98 close CONF;
99 }
100}
101
102$FRAME=8*$SIZE_T+$FRAME_MARKER; # 8 saved regs + frame marker
103 # [+ argument transfer]
104$LOCALS=$FRAME-$FRAME_MARKER;
105$FRAME+=32; # local variables
106
107$tp="%r31";
108$ti1="%r29";
109$ti0="%r28";
110
111$rp="%r26";
112$ap="%r25";
113$bp="%r24";
114$np="%r23";
115$n0="%r22"; # passed through stack in 32-bit
116$num="%r21"; # passed through stack in 32-bit
117$idx="%r20";
118$arrsz="%r19";
119
120$nm1="%r7";
121$nm0="%r6";
122$ab1="%r5";
123$ab0="%r4";
124
125$fp="%r3";
126$hi1="%r2";
127$hi0="%r1";
128
129$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s
130
131$fm0="%fr4"; $fti=$fm0;
132$fbi="%fr5L";
133$fn0="%fr5R";
134$fai="%fr6"; $fab0="%fr7"; $fab1="%fr8";
135$fni="%fr9"; $fnm0="%fr10"; $fnm1="%fr11";
136
137$code=<<___;
138 .LEVEL $LEVEL
139 .SPACE \$TEXT\$
140 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
141
142 .EXPORT bn_mul_mont,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
143 .ALIGN 64
144bn_mul_mont
145 .PROC
146 .CALLINFO FRAME=`$FRAME-8*$SIZE_T`,NO_CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=6
147 .ENTRY
148 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
149 $PUSHMA %r3,$FRAME(%sp)
150 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
151 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
152 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
153 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
154 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
155 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
156 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
157 ldo -$FRAME(%sp),$fp
158___
159$code.=<<___ if ($SIZE_T==4);
160 ldw `-$FRAME_MARKER-4`($fp),$n0
161 ldw `-$FRAME_MARKER-8`($fp),$num
162 nop
163 nop ; alignment
164___
165$code.=<<___ if ($BN_SZ==4);
166 comiclr,<= 6,$num,%r0 ; are vectors long enough?
167 b L\$abort
168 ldi 0,%r28 ; signal "unhandled"
169 add,ev %r0,$num,$num ; is $num even?
170 b L\$abort
171 nop
172 or $ap,$np,$ti1
173 extru,= $ti1,31,3,%r0 ; are ap and np 64-bit aligned?
174 b L\$abort
175 nop
176 nop ; alignment
177 nop
178
179 fldws 0($n0),${fn0}
180 fldws,ma 4($bp),${fbi} ; bp[0]
181___
182$code.=<<___ if ($BN_SZ==8);
183 comib,> 3,$num,L\$abort ; are vectors long enough?
184 ldi 0,%r28 ; signal "unhandled"
185 addl $num,$num,$num ; I operate on 32-bit values
186
187 fldws 4($n0),${fn0} ; only low part of n0
188 fldws 4($bp),${fbi} ; bp[0] in flipped word order
189___
190$code.=<<___;
191 fldds 0($ap),${fai} ; ap[0,1]
192 fldds 0($np),${fni} ; np[0,1]
193
194 sh2addl $num,%r0,$arrsz
195 ldi 31,$hi0
196 ldo 36($arrsz),$hi1 ; space for tp[num+1]
197 andcm $hi1,$hi0,$hi1 ; align
198 addl $hi1,%sp,%sp
199 $PUSH $fp,-$SIZE_T(%sp)
200
201 ldo `$LOCALS+16`($fp),$xfer
202 ldo `$LOCALS+32+4`($fp),$tp
203
204 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[0]
205 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[0]
206 xmpyu ${fn0},${fab0}R,${fm0}
207
208 addl $arrsz,$ap,$ap ; point at the end
209 addl $arrsz,$np,$np
210 subi 0,$arrsz,$idx ; j=0
211 ldo 8($idx),$idx ; j++++
212
213 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
214 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
215 fstds ${fab0},-16($xfer)
216 fstds ${fnm0},-8($xfer)
217 fstds ${fab1},0($xfer)
218 fstds ${fnm1},8($xfer)
219 flddx $idx($ap),${fai} ; ap[2,3]
220 flddx $idx($np),${fni} ; np[2,3]
221___
222$code.=<<___ if ($BN_SZ==4);
223 mtctl $hi0,%cr11 ; $hi0 still holds 31
224 extrd,u,*= $hi0,%sar,1,$hi0 ; executes on PA-RISC 1.0
225 b L\$parisc11
226 nop
227___
228$code.=<<___; # PA-RISC 2.0 code-path
229 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
230 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
231 ldd -16($xfer),$ab0
232 fstds ${fab0},-16($xfer)
233
234 extrd,u $ab0,31,32,$hi0
235 extrd,u $ab0,63,32,$ab0
236 ldd -8($xfer),$nm0
237 fstds ${fnm0},-8($xfer)
238 ldo 8($idx),$idx ; j++++
239 addl $ab0,$nm0,$nm0 ; low part is discarded
240 extrd,u $nm0,31,32,$hi1
241
242L\$1st
243 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
244 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
245 ldd 0($xfer),$ab1
246 fstds ${fab1},0($xfer)
247 addl $hi0,$ab1,$ab1
248 extrd,u $ab1,31,32,$hi0
249 ldd 8($xfer),$nm1
250 fstds ${fnm1},8($xfer)
251 extrd,u $ab1,63,32,$ab1
252 addl $hi1,$nm1,$nm1
253 flddx $idx($ap),${fai} ; ap[j,j+1]
254 flddx $idx($np),${fni} ; np[j,j+1]
255 addl $ab1,$nm1,$nm1
256 extrd,u $nm1,31,32,$hi1
257
258 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
259 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
260 ldd -16($xfer),$ab0
261 fstds ${fab0},-16($xfer)
262 addl $hi0,$ab0,$ab0
263 extrd,u $ab0,31,32,$hi0
264 ldd -8($xfer),$nm0
265 fstds ${fnm0},-8($xfer)
266 extrd,u $ab0,63,32,$ab0
267 addl $hi1,$nm0,$nm0
268 stw $nm1,-4($tp) ; tp[j-1]
269 addl $ab0,$nm0,$nm0
270 stw,ma $nm0,8($tp) ; tp[j-1]
271 addib,<> 8,$idx,L\$1st ; j++++
272 extrd,u $nm0,31,32,$hi1
273
274 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
275 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
276 ldd 0($xfer),$ab1
277 fstds ${fab1},0($xfer)
278 addl $hi0,$ab1,$ab1
279 extrd,u $ab1,31,32,$hi0
280 ldd 8($xfer),$nm1
281 fstds ${fnm1},8($xfer)
282 extrd,u $ab1,63,32,$ab1
283 addl $hi1,$nm1,$nm1
284 ldd -16($xfer),$ab0
285 addl $ab1,$nm1,$nm1
286 ldd -8($xfer),$nm0
287 extrd,u $nm1,31,32,$hi1
288
289 addl $hi0,$ab0,$ab0
290 extrd,u $ab0,31,32,$hi0
291 stw $nm1,-4($tp) ; tp[j-1]
292 extrd,u $ab0,63,32,$ab0
293 addl $hi1,$nm0,$nm0
294 ldd 0($xfer),$ab1
295 addl $ab0,$nm0,$nm0
296 ldd,mb 8($xfer),$nm1
297 extrd,u $nm0,31,32,$hi1
298 stw,ma $nm0,8($tp) ; tp[j-1]
299
300 ldo -1($num),$num ; i--
301 subi 0,$arrsz,$idx ; j=0
302___
303$code.=<<___ if ($BN_SZ==4);
304 fldws,ma 4($bp),${fbi} ; bp[1]
305___
306$code.=<<___ if ($BN_SZ==8);
307 fldws 0($bp),${fbi} ; bp[1] in flipped word order
308___
309$code.=<<___;
310 flddx $idx($ap),${fai} ; ap[0,1]
311 flddx $idx($np),${fni} ; np[0,1]
312 fldws 8($xfer),${fti}R ; tp[0]
313 addl $hi0,$ab1,$ab1
314 extrd,u $ab1,31,32,$hi0
315 extrd,u $ab1,63,32,$ab1
316 ldo 8($idx),$idx ; j++++
317 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
318 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
319 addl $hi1,$nm1,$nm1
320 addl $ab1,$nm1,$nm1
321 extrd,u $nm1,31,32,$hi1
322 fstws,mb ${fab0}L,-8($xfer) ; save high part
323 stw $nm1,-4($tp) ; tp[j-1]
324
325 fcpy,sgl %fr0,${fti}L ; zero high part
326 fcpy,sgl %fr0,${fab0}L
327 addl $hi1,$hi0,$hi0
328 extrd,u $hi0,31,32,$hi1
329 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
330 fcnvxf,dbl,dbl ${fab0},${fab0}
331 stw $hi0,0($tp)
332 stw $hi1,4($tp)
333
334 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
335 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
336 xmpyu ${fn0},${fab0}R,${fm0}
337 ldo `$LOCALS+32+4`($fp),$tp
338L\$outer
339 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
340 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
341 fstds ${fab0},-16($xfer) ; 33-bit value
342 fstds ${fnm0},-8($xfer)
343 flddx $idx($ap),${fai} ; ap[2]
344 flddx $idx($np),${fni} ; np[2]
345 ldo 8($idx),$idx ; j++++
346 ldd -16($xfer),$ab0 ; 33-bit value
347 ldd -8($xfer),$nm0
348 ldw 0($xfer),$hi0 ; high part
349
350 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
351 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
352 extrd,u $ab0,31,32,$ti0 ; carry bit
353 extrd,u $ab0,63,32,$ab0
354 fstds ${fab1},0($xfer)
355 addl $ti0,$hi0,$hi0 ; account carry bit
356 fstds ${fnm1},8($xfer)
357 addl $ab0,$nm0,$nm0 ; low part is discarded
358 ldw 0($tp),$ti1 ; tp[1]
359 extrd,u $nm0,31,32,$hi1
360 fstds ${fab0},-16($xfer)
361 fstds ${fnm0},-8($xfer)
362
363L\$inner
364 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
365 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
366 ldd 0($xfer),$ab1
367 fstds ${fab1},0($xfer)
368 addl $hi0,$ti1,$ti1
369 addl $ti1,$ab1,$ab1
370 ldd 8($xfer),$nm1
371 fstds ${fnm1},8($xfer)
372 extrd,u $ab1,31,32,$hi0
373 extrd,u $ab1,63,32,$ab1
374 flddx $idx($ap),${fai} ; ap[j,j+1]
375 flddx $idx($np),${fni} ; np[j,j+1]
376 addl $hi1,$nm1,$nm1
377 addl $ab1,$nm1,$nm1
378 ldw 4($tp),$ti0 ; tp[j]
379 stw $nm1,-4($tp) ; tp[j-1]
380
381 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
382 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
383 ldd -16($xfer),$ab0
384 fstds ${fab0},-16($xfer)
385 addl $hi0,$ti0,$ti0
386 addl $ti0,$ab0,$ab0
387 ldd -8($xfer),$nm0
388 fstds ${fnm0},-8($xfer)
389 extrd,u $ab0,31,32,$hi0
390 extrd,u $nm1,31,32,$hi1
391 ldw 8($tp),$ti1 ; tp[j]
392 extrd,u $ab0,63,32,$ab0
393 addl $hi1,$nm0,$nm0
394 addl $ab0,$nm0,$nm0
395 stw,ma $nm0,8($tp) ; tp[j-1]
396 addib,<> 8,$idx,L\$inner ; j++++
397 extrd,u $nm0,31,32,$hi1
398
399 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
400 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
401 ldd 0($xfer),$ab1
402 fstds ${fab1},0($xfer)
403 addl $hi0,$ti1,$ti1
404 addl $ti1,$ab1,$ab1
405 ldd 8($xfer),$nm1
406 fstds ${fnm1},8($xfer)
407 extrd,u $ab1,31,32,$hi0
408 extrd,u $ab1,63,32,$ab1
409 ldw 4($tp),$ti0 ; tp[j]
410 addl $hi1,$nm1,$nm1
411 addl $ab1,$nm1,$nm1
412 ldd -16($xfer),$ab0
413 ldd -8($xfer),$nm0
414 extrd,u $nm1,31,32,$hi1
415
416 addl $hi0,$ab0,$ab0
417 addl $ti0,$ab0,$ab0
418 stw $nm1,-4($tp) ; tp[j-1]
419 extrd,u $ab0,31,32,$hi0
420 ldw 8($tp),$ti1 ; tp[j]
421 extrd,u $ab0,63,32,$ab0
422 addl $hi1,$nm0,$nm0
423 ldd 0($xfer),$ab1
424 addl $ab0,$nm0,$nm0
425 ldd,mb 8($xfer),$nm1
426 extrd,u $nm0,31,32,$hi1
427 stw,ma $nm0,8($tp) ; tp[j-1]
428
429 addib,= -1,$num,L\$outerdone ; i--
430 subi 0,$arrsz,$idx ; j=0
431___
432$code.=<<___ if ($BN_SZ==4);
433 fldws,ma 4($bp),${fbi} ; bp[i]
434___
435$code.=<<___ if ($BN_SZ==8);
436 ldi 12,$ti0 ; bp[i] in flipped word order
437 addl,ev %r0,$num,$num
438 ldi -4,$ti0
439 addl $ti0,$bp,$bp
440 fldws 0($bp),${fbi}
441___
442$code.=<<___;
443 flddx $idx($ap),${fai} ; ap[0]
444 addl $hi0,$ab1,$ab1
445 flddx $idx($np),${fni} ; np[0]
446 fldws 8($xfer),${fti}R ; tp[0]
447 addl $ti1,$ab1,$ab1
448 extrd,u $ab1,31,32,$hi0
449 extrd,u $ab1,63,32,$ab1
450
451 ldo 8($idx),$idx ; j++++
452 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
453 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
454 ldw 4($tp),$ti0 ; tp[j]
455
456 addl $hi1,$nm1,$nm1
457 fstws,mb ${fab0}L,-8($xfer) ; save high part
458 addl $ab1,$nm1,$nm1
459 extrd,u $nm1,31,32,$hi1
460 fcpy,sgl %fr0,${fti}L ; zero high part
461 fcpy,sgl %fr0,${fab0}L
462 stw $nm1,-4($tp) ; tp[j-1]
463
464 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
465 fcnvxf,dbl,dbl ${fab0},${fab0}
466 addl $hi1,$hi0,$hi0
467 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
468 addl $ti0,$hi0,$hi0
469 extrd,u $hi0,31,32,$hi1
470 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
471 stw $hi0,0($tp)
472 stw $hi1,4($tp)
473 xmpyu ${fn0},${fab0}R,${fm0}
474
475 b L\$outer
476 ldo `$LOCALS+32+4`($fp),$tp
477
478L\$outerdone
479 addl $hi0,$ab1,$ab1
480 addl $ti1,$ab1,$ab1
481 extrd,u $ab1,31,32,$hi0
482 extrd,u $ab1,63,32,$ab1
483
484 ldw 4($tp),$ti0 ; tp[j]
485
486 addl $hi1,$nm1,$nm1
487 addl $ab1,$nm1,$nm1
488 extrd,u $nm1,31,32,$hi1
489 stw $nm1,-4($tp) ; tp[j-1]
490
491 addl $hi1,$hi0,$hi0
492 addl $ti0,$hi0,$hi0
493 extrd,u $hi0,31,32,$hi1
494 stw $hi0,0($tp)
495 stw $hi1,4($tp)
496
497 ldo `$LOCALS+32`($fp),$tp
498 sub %r0,%r0,%r0 ; clear borrow
499___
500$code.=<<___ if ($BN_SZ==4);
501 ldws,ma 4($tp),$ti0
502 extru,= $rp,31,3,%r0 ; is rp 64-bit aligned?
503 b L\$sub_pa11
504 addl $tp,$arrsz,$tp
505L\$sub
506 ldwx $idx($np),$hi0
507 subb $ti0,$hi0,$hi1
508 ldwx $idx($tp),$ti0
509 addib,<> 4,$idx,L\$sub
510 stws,ma $hi1,4($rp)
511
512 subb $ti0,%r0,$hi1
513 ldo -4($tp),$tp
514___
515$code.=<<___ if ($BN_SZ==8);
516 ldd,ma 8($tp),$ti0
517L\$sub
518 ldd $idx($np),$hi0
519 shrpd $ti0,$ti0,32,$ti0 ; flip word order
520 std $ti0,-8($tp) ; save flipped value
521 sub,db $ti0,$hi0,$hi1
522 ldd,ma 8($tp),$ti0
523 addib,<> 8,$idx,L\$sub
524 std,ma $hi1,8($rp)
525
526 extrd,u $ti0,31,32,$ti0 ; carry in flipped word order
527 sub,db $ti0,%r0,$hi1
528 ldo -8($tp),$tp
529___
530$code.=<<___;
531 and $tp,$hi1,$ap
532 andcm $rp,$hi1,$bp
533 or $ap,$bp,$np
534
535 sub $rp,$arrsz,$rp ; rewind rp
536 subi 0,$arrsz,$idx
537 ldo `$LOCALS+32`($fp),$tp
538L\$copy
539 ldd $idx($np),$hi0
540 std,ma %r0,8($tp)
541 addib,<> 8,$idx,.-8 ; L\$copy
542 std,ma $hi0,8($rp)
543___
544
545if ($BN_SZ==4) { # PA-RISC 1.1 code-path
546$ablo=$ab0;
547$abhi=$ab1;
548$nmlo0=$nm0;
549$nmhi0=$nm1;
550$nmlo1="%r9";
551$nmhi1="%r8";
552
553$code.=<<___;
554 b L\$done
555 nop
556
557 .ALIGN 8
558L\$parisc11
559 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
560 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
561 ldw -12($xfer),$ablo
562 ldw -16($xfer),$hi0
563 ldw -4($xfer),$nmlo0
564 ldw -8($xfer),$nmhi0
565 fstds ${fab0},-16($xfer)
566 fstds ${fnm0},-8($xfer)
567
568 ldo 8($idx),$idx ; j++++
569 add $ablo,$nmlo0,$nmlo0 ; discarded
570 addc %r0,$nmhi0,$hi1
571 ldw 4($xfer),$ablo
572 ldw 0($xfer),$abhi
573 nop
574
575L\$1st_pa11
576 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
577 flddx $idx($ap),${fai} ; ap[j,j+1]
578 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
579 flddx $idx($np),${fni} ; np[j,j+1]
580 add $hi0,$ablo,$ablo
581 ldw 12($xfer),$nmlo1
582 addc %r0,$abhi,$hi0
583 ldw 8($xfer),$nmhi1
584 add $ablo,$nmlo1,$nmlo1
585 fstds ${fab1},0($xfer)
586 addc %r0,$nmhi1,$nmhi1
587 fstds ${fnm1},8($xfer)
588 add $hi1,$nmlo1,$nmlo1
589 ldw -12($xfer),$ablo
590 addc %r0,$nmhi1,$hi1
591 ldw -16($xfer),$abhi
592
593 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
594 ldw -4($xfer),$nmlo0
595 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
596 ldw -8($xfer),$nmhi0
597 add $hi0,$ablo,$ablo
598 stw $nmlo1,-4($tp) ; tp[j-1]
599 addc %r0,$abhi,$hi0
600 fstds ${fab0},-16($xfer)
601 add $ablo,$nmlo0,$nmlo0
602 fstds ${fnm0},-8($xfer)
603 addc %r0,$nmhi0,$nmhi0
604 ldw 0($xfer),$abhi
605 add $hi1,$nmlo0,$nmlo0
606 ldw 4($xfer),$ablo
607 stws,ma $nmlo0,8($tp) ; tp[j-1]
608 addib,<> 8,$idx,L\$1st_pa11 ; j++++
609 addc %r0,$nmhi0,$hi1
610
611 ldw 8($xfer),$nmhi1
612 ldw 12($xfer),$nmlo1
613 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
614 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
615 add $hi0,$ablo,$ablo
616 fstds ${fab1},0($xfer)
617 addc %r0,$abhi,$hi0
618 fstds ${fnm1},8($xfer)
619 add $ablo,$nmlo1,$nmlo1
620 ldw -16($xfer),$abhi
621 addc %r0,$nmhi1,$nmhi1
622 ldw -12($xfer),$ablo
623 add $hi1,$nmlo1,$nmlo1
624 ldw -8($xfer),$nmhi0
625 addc %r0,$nmhi1,$hi1
626 ldw -4($xfer),$nmlo0
627
628 add $hi0,$ablo,$ablo
629 stw $nmlo1,-4($tp) ; tp[j-1]
630 addc %r0,$abhi,$hi0
631 ldw 0($xfer),$abhi
632 add $ablo,$nmlo0,$nmlo0
633 ldw 4($xfer),$ablo
634 addc %r0,$nmhi0,$nmhi0
635 ldws,mb 8($xfer),$nmhi1
636 add $hi1,$nmlo0,$nmlo0
637 ldw 4($xfer),$nmlo1
638 addc %r0,$nmhi0,$hi1
639 stws,ma $nmlo0,8($tp) ; tp[j-1]
640
641 ldo -1($num),$num ; i--
642 subi 0,$arrsz,$idx ; j=0
643
644 fldws,ma 4($bp),${fbi} ; bp[1]
645 flddx $idx($ap),${fai} ; ap[0,1]
646 flddx $idx($np),${fni} ; np[0,1]
647 fldws 8($xfer),${fti}R ; tp[0]
648 add $hi0,$ablo,$ablo
649 addc %r0,$abhi,$hi0
650 ldo 8($idx),$idx ; j++++
651 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
652 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
653 add $hi1,$nmlo1,$nmlo1
654 addc %r0,$nmhi1,$nmhi1
655 add $ablo,$nmlo1,$nmlo1
656 addc %r0,$nmhi1,$hi1
657 fstws,mb ${fab0}L,-8($xfer) ; save high part
658 stw $nmlo1,-4($tp) ; tp[j-1]
659
660 fcpy,sgl %fr0,${fti}L ; zero high part
661 fcpy,sgl %fr0,${fab0}L
662 add $hi1,$hi0,$hi0
663 addc %r0,%r0,$hi1
664 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
665 fcnvxf,dbl,dbl ${fab0},${fab0}
666 stw $hi0,0($tp)
667 stw $hi1,4($tp)
668
669 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
670 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
671 xmpyu ${fn0},${fab0}R,${fm0}
672 ldo `$LOCALS+32+4`($fp),$tp
673L\$outer_pa11
674 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
675 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
676 fstds ${fab0},-16($xfer) ; 33-bit value
677 fstds ${fnm0},-8($xfer)
678 flddx $idx($ap),${fai} ; ap[2,3]
679 flddx $idx($np),${fni} ; np[2,3]
680 ldw -16($xfer),$abhi ; carry bit actually
681 ldo 8($idx),$idx ; j++++
682 ldw -12($xfer),$ablo
683 ldw -8($xfer),$nmhi0
684 ldw -4($xfer),$nmlo0
685 ldw 0($xfer),$hi0 ; high part
686
687 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
688 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
689 fstds ${fab1},0($xfer)
690 addl $abhi,$hi0,$hi0 ; account carry bit
691 fstds ${fnm1},8($xfer)
692 add $ablo,$nmlo0,$nmlo0 ; discarded
693 ldw 0($tp),$ti1 ; tp[1]
694 addc %r0,$nmhi0,$hi1
695 fstds ${fab0},-16($xfer)
696 fstds ${fnm0},-8($xfer)
697 ldw 4($xfer),$ablo
698 ldw 0($xfer),$abhi
699
700L\$inner_pa11
701 xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
702 flddx $idx($ap),${fai} ; ap[j,j+1]
703 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
704 flddx $idx($np),${fni} ; np[j,j+1]
705 add $hi0,$ablo,$ablo
706 ldw 4($tp),$ti0 ; tp[j]
707 addc %r0,$abhi,$abhi
708 ldw 12($xfer),$nmlo1
709 add $ti1,$ablo,$ablo
710 ldw 8($xfer),$nmhi1
711 addc %r0,$abhi,$hi0
712 fstds ${fab1},0($xfer)
713 add $ablo,$nmlo1,$nmlo1
714 fstds ${fnm1},8($xfer)
715 addc %r0,$nmhi1,$nmhi1
716 ldw -12($xfer),$ablo
717 add $hi1,$nmlo1,$nmlo1
718 ldw -16($xfer),$abhi
719 addc %r0,$nmhi1,$hi1
720
721 xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
722 ldw 8($tp),$ti1 ; tp[j]
723 xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
724 ldw -4($xfer),$nmlo0
725 add $hi0,$ablo,$ablo
726 ldw -8($xfer),$nmhi0
727 addc %r0,$abhi,$abhi
728 stw $nmlo1,-4($tp) ; tp[j-1]
729 add $ti0,$ablo,$ablo
730 fstds ${fab0},-16($xfer)
731 addc %r0,$abhi,$hi0
732 fstds ${fnm0},-8($xfer)
733 add $ablo,$nmlo0,$nmlo0
734 ldw 4($xfer),$ablo
735 addc %r0,$nmhi0,$nmhi0
736 ldw 0($xfer),$abhi
737 add $hi1,$nmlo0,$nmlo0
738 stws,ma $nmlo0,8($tp) ; tp[j-1]
739 addib,<> 8,$idx,L\$inner_pa11 ; j++++
740 addc %r0,$nmhi0,$hi1
741
742 xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
743 ldw 12($xfer),$nmlo1
744 xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
745 ldw 8($xfer),$nmhi1
746 add $hi0,$ablo,$ablo
747 ldw 4($tp),$ti0 ; tp[j]
748 addc %r0,$abhi,$abhi
749 fstds ${fab1},0($xfer)
750 add $ti1,$ablo,$ablo
751 fstds ${fnm1},8($xfer)
752 addc %r0,$abhi,$hi0
753 ldw -16($xfer),$abhi
754 add $ablo,$nmlo1,$nmlo1
755 ldw -12($xfer),$ablo
756 addc %r0,$nmhi1,$nmhi1
757 ldw -8($xfer),$nmhi0
758 add $hi1,$nmlo1,$nmlo1
759 ldw -4($xfer),$nmlo0
760 addc %r0,$nmhi1,$hi1
761
762 add $hi0,$ablo,$ablo
763 stw $nmlo1,-4($tp) ; tp[j-1]
764 addc %r0,$abhi,$abhi
765 add $ti0,$ablo,$ablo
766 ldw 8($tp),$ti1 ; tp[j]
767 addc %r0,$abhi,$hi0
768 ldw 0($xfer),$abhi
769 add $ablo,$nmlo0,$nmlo0
770 ldw 4($xfer),$ablo
771 addc %r0,$nmhi0,$nmhi0
772 ldws,mb 8($xfer),$nmhi1
773 add $hi1,$nmlo0,$nmlo0
774 ldw 4($xfer),$nmlo1
775 addc %r0,$nmhi0,$hi1
776 stws,ma $nmlo0,8($tp) ; tp[j-1]
777
778 addib,= -1,$num,L\$outerdone_pa11; i--
779 subi 0,$arrsz,$idx ; j=0
780
781 fldws,ma 4($bp),${fbi} ; bp[i]
782 flddx $idx($ap),${fai} ; ap[0]
783 add $hi0,$ablo,$ablo
784 addc %r0,$abhi,$abhi
785 flddx $idx($np),${fni} ; np[0]
786 fldws 8($xfer),${fti}R ; tp[0]
787 add $ti1,$ablo,$ablo
788 addc %r0,$abhi,$hi0
789
790 ldo 8($idx),$idx ; j++++
791 xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
792 xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
793 ldw 4($tp),$ti0 ; tp[j]
794
795 add $hi1,$nmlo1,$nmlo1
796 addc %r0,$nmhi1,$nmhi1
797 fstws,mb ${fab0}L,-8($xfer) ; save high part
798 add $ablo,$nmlo1,$nmlo1
799 addc %r0,$nmhi1,$hi1
800 fcpy,sgl %fr0,${fti}L ; zero high part
801 fcpy,sgl %fr0,${fab0}L
802 stw $nmlo1,-4($tp) ; tp[j-1]
803
804 fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
805 fcnvxf,dbl,dbl ${fab0},${fab0}
806 add $hi1,$hi0,$hi0
807 addc %r0,%r0,$hi1
808 fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
809 add $ti0,$hi0,$hi0
810 addc %r0,$hi1,$hi1
811 fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
812 stw $hi0,0($tp)
813 stw $hi1,4($tp)
814 xmpyu ${fn0},${fab0}R,${fm0}
815
816 b L\$outer_pa11
817 ldo `$LOCALS+32+4`($fp),$tp
818
819L\$outerdone_pa11
820 add $hi0,$ablo,$ablo
821 addc %r0,$abhi,$abhi
822 add $ti1,$ablo,$ablo
823 addc %r0,$abhi,$hi0
824
825 ldw 4($tp),$ti0 ; tp[j]
826
827 add $hi1,$nmlo1,$nmlo1
828 addc %r0,$nmhi1,$nmhi1
829 add $ablo,$nmlo1,$nmlo1
830 addc %r0,$nmhi1,$hi1
831 stw $nmlo1,-4($tp) ; tp[j-1]
832
833 add $hi1,$hi0,$hi0
834 addc %r0,%r0,$hi1
835 add $ti0,$hi0,$hi0
836 addc %r0,$hi1,$hi1
837 stw $hi0,0($tp)
838 stw $hi1,4($tp)
839
840 ldo `$LOCALS+32+4`($fp),$tp
841 sub %r0,%r0,%r0 ; clear borrow
842 ldw -4($tp),$ti0
843 addl $tp,$arrsz,$tp
844L\$sub_pa11
845 ldwx $idx($np),$hi0
846 subb $ti0,$hi0,$hi1
847 ldwx $idx($tp),$ti0
848 addib,<> 4,$idx,L\$sub_pa11
849 stws,ma $hi1,4($rp)
850
851 subb $ti0,%r0,$hi1
852 ldo -4($tp),$tp
853 and $tp,$hi1,$ap
854 andcm $rp,$hi1,$bp
855 or $ap,$bp,$np
856
857 sub $rp,$arrsz,$rp ; rewind rp
858 subi 0,$arrsz,$idx
859 ldo `$LOCALS+32`($fp),$tp
860L\$copy_pa11
861 ldwx $idx($np),$hi0
862 stws,ma %r0,4($tp)
863 addib,<> 4,$idx,L\$copy_pa11
864 stws,ma $hi0,4($rp)
865
866 nop ; alignment
867L\$done
868___
869}
870
871$code.=<<___;
872 ldi 1,%r28 ; signal "handled"
873 ldo $FRAME($fp),%sp ; destroy tp[num+1]
874
875 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
876 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
877 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
878 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
879 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
880 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
881 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
882 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
883L\$abort
884 bv (%r2)
885 .EXIT
886 $POPMB -$FRAME(%sp),%r3
887 .PROCEND
888 .STRINGZ "Montgomery Multiplication for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
889___
890
891# Explicitly encode PA-RISC 2.0 instructions used in this module, so
892# that it can be compiled with .LEVEL 1.0. It should be noted that I
893# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
894# directive...
895
896my $ldd = sub {
897 my ($mod,$args) = @_;
898 my $orig = "ldd$mod\t$args";
899
900 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
901 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
902 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
903 }
904 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
905 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
906 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
907 $opcode|=(1<<5) if ($mod =~ /^,m/);
908 $opcode|=(1<<13) if ($mod =~ /^,mb/);
909 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
910 }
911 else { "\t".$orig; }
912};
913
914my $std = sub {
915 my ($mod,$args) = @_;
916 my $orig = "std$mod\t$args";
917
918 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 6
919 { my $opcode=(0x03<<26)|($3<<21)|($1<<16)|(1<<12)|(0xB<<6);
920 $opcode|=(($2&0xF)<<1)|(($2&0x10)>>4); # encode offset
921 $opcode|=(1<<5) if ($mod =~ /^,m/);
922 $opcode|=(1<<13) if ($mod =~ /^,mb/);
923 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
924 }
925 else { "\t".$orig; }
926};
927
928my $extrd = sub {
929 my ($mod,$args) = @_;
930 my $orig = "extrd$mod\t$args";
931
932 # I only have ",u" completer, it's implicitly encoded...
933 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
934 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
935 my $len=32-$3;
936 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
937 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
938 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
939 }
940 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
941 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
942 my $len=32-$2;
943 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
944 $opcode |= (1<<13) if ($mod =~ /,\**=/);
945 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
946 }
947 else { "\t".$orig; }
948};
949
950my $shrpd = sub {
951 my ($mod,$args) = @_;
952 my $orig = "shrpd$mod\t$args";
953
954 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
955 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
956 my $cpos=63-$3;
957 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
958 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
959 }
960 else { "\t".$orig; }
961};
962
963my $sub = sub {
964 my ($mod,$args) = @_;
965 my $orig = "sub$mod\t$args";
966
967 if ($mod eq ",db" && $args =~ /%r([0-9]+),%r([0-9]+),%r([0-9]+)/) {
968 my $opcode=(0x02<<26)|($2<<21)|($1<<16)|$3;
969 $opcode|=(1<<10); # e1
970 $opcode|=(1<<8); # e2
971 $opcode|=(1<<5); # d
972 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig
973 }
974 else { "\t".$orig; }
975};
976
977sub assemble {
978 my ($mnemonic,$mod,$args)=@_;
979 my $opcode = eval("\$$mnemonic");
980
981 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
982}
983
984foreach (split("\n",$code)) {
985 s/\`([^\`]*)\`/eval $1/ge;
986 # flip word order in 64-bit mode...
987 s/(xmpyu\s+)($fai|$fni)([LR])/$1.$2.($3 eq "L"?"R":"L")/e if ($BN_SZ==8);
988 # assemble 2.0 instructions in 32-bit mode...
989 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
990
991 print $_,"\n";
992}
993close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl b/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl
index 7849eae959..f9b6992ccc 100644
--- a/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/ppc-mont.pl
@@ -31,7 +31,6 @@ if ($flavour =~ /32/) {
31 $BNSZ= $BITS/8; 31 $BNSZ= $BITS/8;
32 $SIZE_T=4; 32 $SIZE_T=4;
33 $RZONE= 224; 33 $RZONE= 224;
34 $FRAME= $SIZE_T*16;
35 34
36 $LD= "lwz"; # load 35 $LD= "lwz"; # load
37 $LDU= "lwzu"; # load and update 36 $LDU= "lwzu"; # load and update
@@ -51,7 +50,6 @@ if ($flavour =~ /32/) {
51 $BNSZ= $BITS/8; 50 $BNSZ= $BITS/8;
52 $SIZE_T=8; 51 $SIZE_T=8;
53 $RZONE= 288; 52 $RZONE= 288;
54 $FRAME= $SIZE_T*16;
55 53
56 # same as above, but 64-bit mnemonics... 54 # same as above, but 64-bit mnemonics...
57 $LD= "ld"; # load 55 $LD= "ld"; # load
@@ -69,6 +67,9 @@ if ($flavour =~ /32/) {
69 $POP= $LD; 67 $POP= $LD;
70} else { die "nonsense $flavour"; } 68} else { die "nonsense $flavour"; }
71 69
70$FRAME=8*$SIZE_T+$RZONE;
71$LOCALS=8*$SIZE_T;
72
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 73$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 74( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 75( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -89,18 +90,18 @@ $aj="r10";
89$nj="r11"; 90$nj="r11";
90$tj="r12"; 91$tj="r12";
91# non-volatile registers 92# non-volatile registers
92$i="r14"; 93$i="r20";
93$j="r15"; 94$j="r21";
94$tp="r16"; 95$tp="r22";
95$m0="r17"; 96$m0="r23";
96$m1="r18"; 97$m1="r24";
97$lo0="r19"; 98$lo0="r25";
98$hi0="r20"; 99$hi0="r26";
99$lo1="r21"; 100$lo1="r27";
100$hi1="r22"; 101$hi1="r28";
101$alo="r23"; 102$alo="r29";
102$ahi="r24"; 103$ahi="r30";
103$nlo="r25"; 104$nlo="r31";
104# 105#
105$nhi="r0"; 106$nhi="r0";
106 107
@@ -108,42 +109,48 @@ $code=<<___;
108.machine "any" 109.machine "any"
109.text 110.text
110 111
111.globl .bn_mul_mont 112.globl .bn_mul_mont_int
112.align 4 113.align 4
113.bn_mul_mont: 114.bn_mul_mont_int:
114 cmpwi $num,4 115 cmpwi $num,4
115 mr $rp,r3 ; $rp is reassigned 116 mr $rp,r3 ; $rp is reassigned
116 li r3,0 117 li r3,0
117 bltlr 118 bltlr
118 119___
120$code.=<<___ if ($BNSZ==4);
121 cmpwi $num,32 ; longer key performance is not better
122 bgelr
123___
124$code.=<<___;
119 slwi $num,$num,`log($BNSZ)/log(2)` 125 slwi $num,$num,`log($BNSZ)/log(2)`
120 li $tj,-4096 126 li $tj,-4096
121 addi $ovf,$num,`$FRAME+$RZONE` 127 addi $ovf,$num,$FRAME
122 subf $ovf,$ovf,$sp ; $sp-$ovf 128 subf $ovf,$ovf,$sp ; $sp-$ovf
123 and $ovf,$ovf,$tj ; minimize TLB usage 129 and $ovf,$ovf,$tj ; minimize TLB usage
124 subf $ovf,$sp,$ovf ; $ovf-$sp 130 subf $ovf,$sp,$ovf ; $ovf-$sp
131 mr $tj,$sp
125 srwi $num,$num,`log($BNSZ)/log(2)` 132 srwi $num,$num,`log($BNSZ)/log(2)`
126 $STUX $sp,$sp,$ovf 133 $STUX $sp,$sp,$ovf
127 134
128 $PUSH r14,`4*$SIZE_T`($sp) 135 $PUSH r20,`-12*$SIZE_T`($tj)
129 $PUSH r15,`5*$SIZE_T`($sp) 136 $PUSH r21,`-11*$SIZE_T`($tj)
130 $PUSH r16,`6*$SIZE_T`($sp) 137 $PUSH r22,`-10*$SIZE_T`($tj)
131 $PUSH r17,`7*$SIZE_T`($sp) 138 $PUSH r23,`-9*$SIZE_T`($tj)
132 $PUSH r18,`8*$SIZE_T`($sp) 139 $PUSH r24,`-8*$SIZE_T`($tj)
133 $PUSH r19,`9*$SIZE_T`($sp) 140 $PUSH r25,`-7*$SIZE_T`($tj)
134 $PUSH r20,`10*$SIZE_T`($sp) 141 $PUSH r26,`-6*$SIZE_T`($tj)
135 $PUSH r21,`11*$SIZE_T`($sp) 142 $PUSH r27,`-5*$SIZE_T`($tj)
136 $PUSH r22,`12*$SIZE_T`($sp) 143 $PUSH r28,`-4*$SIZE_T`($tj)
137 $PUSH r23,`13*$SIZE_T`($sp) 144 $PUSH r29,`-3*$SIZE_T`($tj)
138 $PUSH r24,`14*$SIZE_T`($sp) 145 $PUSH r30,`-2*$SIZE_T`($tj)
139 $PUSH r25,`15*$SIZE_T`($sp) 146 $PUSH r31,`-1*$SIZE_T`($tj)
140 147
141 $LD $n0,0($n0) ; pull n0[0] value 148 $LD $n0,0($n0) ; pull n0[0] value
142 addi $num,$num,-2 ; adjust $num for counter register 149 addi $num,$num,-2 ; adjust $num for counter register
143 150
144 $LD $m0,0($bp) ; m0=bp[0] 151 $LD $m0,0($bp) ; m0=bp[0]
145 $LD $aj,0($ap) ; ap[0] 152 $LD $aj,0($ap) ; ap[0]
146 addi $tp,$sp,$FRAME 153 addi $tp,$sp,$LOCALS
147 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0] 154 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
148 $UMULH $hi0,$aj,$m0 155 $UMULH $hi0,$aj,$m0
149 156
@@ -205,8 +212,8 @@ L1st:
205Louter: 212Louter:
206 $LDX $m0,$bp,$i ; m0=bp[i] 213 $LDX $m0,$bp,$i ; m0=bp[i]
207 $LD $aj,0($ap) ; ap[0] 214 $LD $aj,0($ap) ; ap[0]
208 addi $tp,$sp,$FRAME 215 addi $tp,$sp,$LOCALS
209 $LD $tj,$FRAME($sp) ; tp[0] 216 $LD $tj,$LOCALS($sp); tp[0]
210 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i] 217 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
211 $UMULH $hi0,$aj,$m0 218 $UMULH $hi0,$aj,$m0
212 $LD $aj,$BNSZ($ap) ; ap[1] 219 $LD $aj,$BNSZ($ap) ; ap[1]
@@ -273,7 +280,7 @@ Linner:
273 280
274 addi $num,$num,2 ; restore $num 281 addi $num,$num,2 ; restore $num
275 subfc $j,$j,$j ; j=0 and "clear" XER[CA] 282 subfc $j,$j,$j ; j=0 and "clear" XER[CA]
276 addi $tp,$sp,$FRAME 283 addi $tp,$sp,$LOCALS
277 mtctr $num 284 mtctr $num
278 285
279.align 4 286.align 4
@@ -299,23 +306,27 @@ Lcopy: ; copy or in-place refresh
299 addi $j,$j,$BNSZ 306 addi $j,$j,$BNSZ
300 bdnz- Lcopy 307 bdnz- Lcopy
301 308
302 $POP r14,`4*$SIZE_T`($sp) 309 $POP $tj,0($sp)
303 $POP r15,`5*$SIZE_T`($sp)
304 $POP r16,`6*$SIZE_T`($sp)
305 $POP r17,`7*$SIZE_T`($sp)
306 $POP r18,`8*$SIZE_T`($sp)
307 $POP r19,`9*$SIZE_T`($sp)
308 $POP r20,`10*$SIZE_T`($sp)
309 $POP r21,`11*$SIZE_T`($sp)
310 $POP r22,`12*$SIZE_T`($sp)
311 $POP r23,`13*$SIZE_T`($sp)
312 $POP r24,`14*$SIZE_T`($sp)
313 $POP r25,`15*$SIZE_T`($sp)
314 $POP $sp,0($sp)
315 li r3,1 310 li r3,1
311 $POP r20,`-12*$SIZE_T`($tj)
312 $POP r21,`-11*$SIZE_T`($tj)
313 $POP r22,`-10*$SIZE_T`($tj)
314 $POP r23,`-9*$SIZE_T`($tj)
315 $POP r24,`-8*$SIZE_T`($tj)
316 $POP r25,`-7*$SIZE_T`($tj)
317 $POP r26,`-6*$SIZE_T`($tj)
318 $POP r27,`-5*$SIZE_T`($tj)
319 $POP r28,`-4*$SIZE_T`($tj)
320 $POP r29,`-3*$SIZE_T`($tj)
321 $POP r30,`-2*$SIZE_T`($tj)
322 $POP r31,`-1*$SIZE_T`($tj)
323 mr $sp,$tj
316 blr 324 blr
317 .long 0 325 .long 0
318.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" 326 .byte 0,12,4,0,0x80,12,6,0
327 .long 0
328
329.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
319___ 330___
320 331
321$code =~ s/\`([^\`]*)\`/eval $1/gem; 332$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl b/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl
index 3449b35855..a14e769ad0 100644
--- a/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/ppc64-mont.pl
@@ -45,23 +45,40 @@
45# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive 45# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
46# in absolute terms, but it's apparently the way Power 6 is... 46# in absolute terms, but it's apparently the way Power 6 is...
47 47
48# December 2009
49
50# Adapted for 32-bit build this module delivers 25-120%, yes, more
51# than *twice* for longer keys, performance improvement over 32-bit
52# ppc-mont.pl on 1.8GHz PPC970. However! This implementation utilizes
53# even 64-bit integer operations and the trouble is that most PPC
54# operating systems don't preserve upper halves of general purpose
55# registers upon 32-bit signal delivery. They do preserve them upon
56# context switch, but not signalling:-( This means that asynchronous
57# signals have to be blocked upon entry to this subroutine. Signal
58# masking (and of course complementary unmasking) has quite an impact
59# on performance, naturally larger for shorter keys. It's so severe
60# that 512-bit key performance can be as low as 1/3 of expected one.
61# This is why this routine can be engaged for longer key operations
62# only on these OSes, see crypto/ppccap.c for further details. MacOS X
63# is an exception from this and doesn't require signal masking, and
64# that's where above improvement coefficients were collected. For
65# others alternative would be to break dependence on upper halves of
66# GPRs by sticking to 32-bit integer operations...
67
48$flavour = shift; 68$flavour = shift;
49 69
50if ($flavour =~ /32/) { 70if ($flavour =~ /32/) {
51 $SIZE_T=4; 71 $SIZE_T=4;
52 $RZONE= 224; 72 $RZONE= 224;
53 $FRAME= $SIZE_T*12+8*12; 73 $fname= "bn_mul_mont_fpu64";
54 $fname= "bn_mul_mont_ppc64";
55 74
56 $STUX= "stwux"; # store indexed and update 75 $STUX= "stwux"; # store indexed and update
57 $PUSH= "stw"; 76 $PUSH= "stw";
58 $POP= "lwz"; 77 $POP= "lwz";
59 die "not implemented yet";
60} elsif ($flavour =~ /64/) { 78} elsif ($flavour =~ /64/) {
61 $SIZE_T=8; 79 $SIZE_T=8;
62 $RZONE= 288; 80 $RZONE= 288;
63 $FRAME= $SIZE_T*12+8*12; 81 $fname= "bn_mul_mont_fpu64";
64 $fname= "bn_mul_mont";
65 82
66 # same as above, but 64-bit mnemonics... 83 # same as above, but 64-bit mnemonics...
67 $STUX= "stdux"; # store indexed and update 84 $STUX= "stdux"; # store indexed and update
@@ -76,7 +93,7 @@ die "can't locate ppc-xlate.pl";
76 93
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 94open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78 95
79$FRAME=($FRAME+63)&~63; 96$FRAME=64; # padded frame header
80$TRANSFER=16*8; 97$TRANSFER=16*8;
81 98
82$carry="r0"; 99$carry="r0";
@@ -93,16 +110,16 @@ $tp="r10";
93$j="r11"; 110$j="r11";
94$i="r12"; 111$i="r12";
95# non-volatile registers 112# non-volatile registers
96$nap_d="r14"; # interleaved ap and np in double format 113$nap_d="r22"; # interleaved ap and np in double format
97$a0="r15"; # ap[0] 114$a0="r23"; # ap[0]
98$t0="r16"; # temporary registers 115$t0="r24"; # temporary registers
99$t1="r17"; 116$t1="r25";
100$t2="r18"; 117$t2="r26";
101$t3="r19"; 118$t3="r27";
102$t4="r20"; 119$t4="r28";
103$t5="r21"; 120$t5="r29";
104$t6="r22"; 121$t6="r30";
105$t7="r23"; 122$t7="r31";
106 123
107# PPC offers enough register bank capacity to unroll inner loops twice 124# PPC offers enough register bank capacity to unroll inner loops twice
108# 125#
@@ -132,28 +149,17 @@ $ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
132$na="f4"; $nb="f5"; $nc="f6"; $nd="f7"; 149$na="f4"; $nb="f5"; $nc="f6"; $nd="f7";
133$dota="f8"; $dotb="f9"; 150$dota="f8"; $dotb="f9";
134$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13"; 151$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13";
135$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17"; 152$N0="f20"; $N1="f21"; $N2="f22"; $N3="f23";
136$T0a="f18"; $T0b="f19"; 153$T0a="f24"; $T0b="f25";
137$T1a="f20"; $T1b="f21"; 154$T1a="f26"; $T1b="f27";
138$T2a="f22"; $T2b="f23"; 155$T2a="f28"; $T2b="f29";
139$T3a="f24"; $T3b="f25"; 156$T3a="f30"; $T3b="f31";
140 157
141# sp----------->+-------------------------------+ 158# sp----------->+-------------------------------+
142# | saved sp | 159# | saved sp |
143# +-------------------------------+ 160# +-------------------------------+
144# | |
145# +-------------------------------+
146# | 10 saved gpr, r14-r23 |
147# . .
148# . .
149# +12*size_t +-------------------------------+
150# | 12 saved fpr, f14-f25 |
151# . . 161# . .
152# . . 162# +64 +-------------------------------+
153# +12*8 +-------------------------------+
154# | padding to 64 byte boundary |
155# . .
156# +X +-------------------------------+
157# | 16 gpr<->fpr transfer zone | 163# | 16 gpr<->fpr transfer zone |
158# . . 164# . .
159# . . 165# . .
@@ -173,6 +179,16 @@ $T3a="f24"; $T3b="f25";
173# . . 179# . .
174# . . 180# . .
175# +-------------------------------+ 181# +-------------------------------+
182# . .
183# -12*size_t +-------------------------------+
184# | 10 saved gpr, r22-r31 |
185# . .
186# . .
187# -12*8 +-------------------------------+
188# | 12 saved fpr, f20-f31 |
189# . .
190# . .
191# +-------------------------------+
176 192
177$code=<<___; 193$code=<<___;
178.machine "any" 194.machine "any"
@@ -181,14 +197,14 @@ $code=<<___;
181.globl .$fname 197.globl .$fname
182.align 5 198.align 5
183.$fname: 199.$fname:
184 cmpwi $num,4 200 cmpwi $num,`3*8/$SIZE_T`
185 mr $rp,r3 ; $rp is reassigned 201 mr $rp,r3 ; $rp is reassigned
186 li r3,0 ; possible "not handled" return code 202 li r3,0 ; possible "not handled" return code
187 bltlr- 203 bltlr-
188 andi. r0,$num,1 ; $num has to be even 204 andi. r0,$num,`16/$SIZE_T-1` ; $num has to be "even"
189 bnelr- 205 bnelr-
190 206
191 slwi $num,$num,3 ; num*=8 207 slwi $num,$num,`log($SIZE_T)/log(2)` ; num*=sizeof(BN_LONG)
192 li $i,-4096 208 li $i,-4096
193 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num 209 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
194 add $tp,$tp,$num ; place for tp[num+1] 210 add $tp,$tp,$num ; place for tp[num+1]
@@ -196,35 +212,50 @@ $code=<<___;
196 subf $tp,$tp,$sp ; $sp-$tp 212 subf $tp,$tp,$sp ; $sp-$tp
197 and $tp,$tp,$i ; minimize TLB usage 213 and $tp,$tp,$i ; minimize TLB usage
198 subf $tp,$sp,$tp ; $tp-$sp 214 subf $tp,$sp,$tp ; $tp-$sp
215 mr $i,$sp
199 $STUX $sp,$sp,$tp ; alloca 216 $STUX $sp,$sp,$tp ; alloca
200 217
201 $PUSH r14,`2*$SIZE_T`($sp) 218 $PUSH r22,`-12*8-10*$SIZE_T`($i)
202 $PUSH r15,`3*$SIZE_T`($sp) 219 $PUSH r23,`-12*8-9*$SIZE_T`($i)
203 $PUSH r16,`4*$SIZE_T`($sp) 220 $PUSH r24,`-12*8-8*$SIZE_T`($i)
204 $PUSH r17,`5*$SIZE_T`($sp) 221 $PUSH r25,`-12*8-7*$SIZE_T`($i)
205 $PUSH r18,`6*$SIZE_T`($sp) 222 $PUSH r26,`-12*8-6*$SIZE_T`($i)
206 $PUSH r19,`7*$SIZE_T`($sp) 223 $PUSH r27,`-12*8-5*$SIZE_T`($i)
207 $PUSH r20,`8*$SIZE_T`($sp) 224 $PUSH r28,`-12*8-4*$SIZE_T`($i)
208 $PUSH r21,`9*$SIZE_T`($sp) 225 $PUSH r29,`-12*8-3*$SIZE_T`($i)
209 $PUSH r22,`10*$SIZE_T`($sp) 226 $PUSH r30,`-12*8-2*$SIZE_T`($i)
210 $PUSH r23,`11*$SIZE_T`($sp) 227 $PUSH r31,`-12*8-1*$SIZE_T`($i)
211 stfd f14,`12*$SIZE_T+0`($sp) 228 stfd f20,`-12*8`($i)
212 stfd f15,`12*$SIZE_T+8`($sp) 229 stfd f21,`-11*8`($i)
213 stfd f16,`12*$SIZE_T+16`($sp) 230 stfd f22,`-10*8`($i)
214 stfd f17,`12*$SIZE_T+24`($sp) 231 stfd f23,`-9*8`($i)
215 stfd f18,`12*$SIZE_T+32`($sp) 232 stfd f24,`-8*8`($i)
216 stfd f19,`12*$SIZE_T+40`($sp) 233 stfd f25,`-7*8`($i)
217 stfd f20,`12*$SIZE_T+48`($sp) 234 stfd f26,`-6*8`($i)
218 stfd f21,`12*$SIZE_T+56`($sp) 235 stfd f27,`-5*8`($i)
219 stfd f22,`12*$SIZE_T+64`($sp) 236 stfd f28,`-4*8`($i)
220 stfd f23,`12*$SIZE_T+72`($sp) 237 stfd f29,`-3*8`($i)
221 stfd f24,`12*$SIZE_T+80`($sp) 238 stfd f30,`-2*8`($i)
222 stfd f25,`12*$SIZE_T+88`($sp) 239 stfd f31,`-1*8`($i)
223 240___
241$code.=<<___ if ($SIZE_T==8);
224 ld $a0,0($ap) ; pull ap[0] value 242 ld $a0,0($ap) ; pull ap[0] value
225 ld $n0,0($n0) ; pull n0[0] value 243 ld $n0,0($n0) ; pull n0[0] value
226 ld $t3,0($bp) ; bp[0] 244 ld $t3,0($bp) ; bp[0]
227 245___
246$code.=<<___ if ($SIZE_T==4);
247 mr $t1,$n0
248 lwz $a0,0($ap) ; pull ap[0,1] value
249 lwz $t0,4($ap)
250 lwz $n0,0($t1) ; pull n0[0,1] value
251 lwz $t1,4($t1)
252 lwz $t3,0($bp) ; bp[0,1]
253 lwz $t2,4($bp)
254 insrdi $a0,$t0,32,0
255 insrdi $n0,$t1,32,0
256 insrdi $t3,$t2,32,0
257___
258$code.=<<___;
228 addi $tp,$sp,`$FRAME+$TRANSFER+8+64` 259 addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
229 li $i,-64 260 li $i,-64
230 add $nap_d,$tp,$num 261 add $nap_d,$tp,$num
@@ -258,6 +289,8 @@ $code=<<___;
258 std $t5,`$FRAME+40`($sp) 289 std $t5,`$FRAME+40`($sp)
259 std $t6,`$FRAME+48`($sp) 290 std $t6,`$FRAME+48`($sp)
260 std $t7,`$FRAME+56`($sp) 291 std $t7,`$FRAME+56`($sp)
292___
293$code.=<<___ if ($SIZE_T==8);
261 lwz $t0,4($ap) ; load a[j] as 32-bit word pair 294 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
262 lwz $t1,0($ap) 295 lwz $t1,0($ap)
263 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair 296 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
@@ -266,6 +299,18 @@ $code=<<___;
266 lwz $t5,0($np) 299 lwz $t5,0($np)
267 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair 300 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
268 lwz $t7,8($np) 301 lwz $t7,8($np)
302___
303$code.=<<___ if ($SIZE_T==4);
304 lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs
305 lwz $t1,4($ap)
306 lwz $t2,8($ap)
307 lwz $t3,12($ap)
308 lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs
309 lwz $t5,4($np)
310 lwz $t6,8($np)
311 lwz $t7,12($np)
312___
313$code.=<<___;
269 lfd $ba,`$FRAME+0`($sp) 314 lfd $ba,`$FRAME+0`($sp)
270 lfd $bb,`$FRAME+8`($sp) 315 lfd $bb,`$FRAME+8`($sp)
271 lfd $bc,`$FRAME+16`($sp) 316 lfd $bc,`$FRAME+16`($sp)
@@ -374,6 +419,8 @@ $code=<<___;
374 419
375.align 5 420.align 5
376L1st: 421L1st:
422___
423$code.=<<___ if ($SIZE_T==8);
377 lwz $t0,4($ap) ; load a[j] as 32-bit word pair 424 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
378 lwz $t1,0($ap) 425 lwz $t1,0($ap)
379 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair 426 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
@@ -382,6 +429,18 @@ L1st:
382 lwz $t5,0($np) 429 lwz $t5,0($np)
383 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair 430 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
384 lwz $t7,8($np) 431 lwz $t7,8($np)
432___
433$code.=<<___ if ($SIZE_T==4);
434 lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs
435 lwz $t1,4($ap)
436 lwz $t2,8($ap)
437 lwz $t3,12($ap)
438 lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs
439 lwz $t5,4($np)
440 lwz $t6,8($np)
441 lwz $t7,12($np)
442___
443$code.=<<___;
385 std $t0,`$FRAME+64`($sp) 444 std $t0,`$FRAME+64`($sp)
386 std $t1,`$FRAME+72`($sp) 445 std $t1,`$FRAME+72`($sp)
387 std $t2,`$FRAME+80`($sp) 446 std $t2,`$FRAME+80`($sp)
@@ -559,7 +618,17 @@ L1st:
559 li $i,8 ; i=1 618 li $i,8 ; i=1
560.align 5 619.align 5
561Louter: 620Louter:
621___
622$code.=<<___ if ($SIZE_T==8);
562 ldx $t3,$bp,$i ; bp[i] 623 ldx $t3,$bp,$i ; bp[i]
624___
625$code.=<<___ if ($SIZE_T==4);
626 add $t0,$bp,$i
627 lwz $t3,0($t0) ; bp[i,i+1]
628 lwz $t0,4($t0)
629 insrdi $t3,$t0,32,0
630___
631$code.=<<___;
563 ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] 632 ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
564 mulld $t7,$a0,$t3 ; ap[0]*bp[i] 633 mulld $t7,$a0,$t3 ; ap[0]*bp[i]
565 634
@@ -761,6 +830,13 @@ Linner:
761 stfd $T0b,`$FRAME+8`($sp) 830 stfd $T0b,`$FRAME+8`($sp)
762 add $t7,$t7,$carry 831 add $t7,$t7,$carry
763 addc $t3,$t0,$t1 832 addc $t3,$t0,$t1
833___
834$code.=<<___ if ($SIZE_T==4); # adjust XER[CA]
835 extrdi $t0,$t0,32,0
836 extrdi $t1,$t1,32,0
837 adde $t0,$t0,$t1
838___
839$code.=<<___;
764 stfd $T1a,`$FRAME+16`($sp) 840 stfd $T1a,`$FRAME+16`($sp)
765 stfd $T1b,`$FRAME+24`($sp) 841 stfd $T1b,`$FRAME+24`($sp)
766 insrdi $t4,$t7,16,0 ; 64..127 bits 842 insrdi $t4,$t7,16,0 ; 64..127 bits
@@ -768,6 +844,13 @@ Linner:
768 stfd $T2a,`$FRAME+32`($sp) 844 stfd $T2a,`$FRAME+32`($sp)
769 stfd $T2b,`$FRAME+40`($sp) 845 stfd $T2b,`$FRAME+40`($sp)
770 adde $t5,$t4,$t2 846 adde $t5,$t4,$t2
847___
848$code.=<<___ if ($SIZE_T==4); # adjust XER[CA]
849 extrdi $t4,$t4,32,0
850 extrdi $t2,$t2,32,0
851 adde $t4,$t4,$t2
852___
853$code.=<<___;
771 stfd $T3a,`$FRAME+48`($sp) 854 stfd $T3a,`$FRAME+48`($sp)
772 stfd $T3b,`$FRAME+56`($sp) 855 stfd $T3b,`$FRAME+56`($sp)
773 addze $carry,$carry 856 addze $carry,$carry
@@ -816,7 +899,21 @@ Linner:
816 ld $t7,`$FRAME+72`($sp) 899 ld $t7,`$FRAME+72`($sp)
817 900
818 addc $t3,$t0,$t1 901 addc $t3,$t0,$t1
902___
903$code.=<<___ if ($SIZE_T==4); # adjust XER[CA]
904 extrdi $t0,$t0,32,0
905 extrdi $t1,$t1,32,0
906 adde $t0,$t0,$t1
907___
908$code.=<<___;
819 adde $t5,$t4,$t2 909 adde $t5,$t4,$t2
910___
911$code.=<<___ if ($SIZE_T==4); # adjust XER[CA]
912 extrdi $t4,$t4,32,0
913 extrdi $t2,$t2,32,0
914 adde $t4,$t4,$t2
915___
916$code.=<<___;
820 addze $carry,$carry 917 addze $carry,$carry
821 918
822 std $t3,-16($tp) ; tp[j-1] 919 std $t3,-16($tp) ; tp[j-1]
@@ -835,7 +932,9 @@ Linner:
835 subf $nap_d,$t7,$nap_d ; rewind pointer 932 subf $nap_d,$t7,$nap_d ; rewind pointer
836 cmpw $i,$num 933 cmpw $i,$num
837 blt- Louter 934 blt- Louter
935___
838 936
937$code.=<<___ if ($SIZE_T==8);
839 subf $np,$num,$np ; rewind np 938 subf $np,$num,$np ; rewind np
840 addi $j,$j,1 ; restore counter 939 addi $j,$j,1 ; restore counter
841 subfc $i,$i,$i ; j=0 and "clear" XER[CA] 940 subfc $i,$i,$i ; j=0 and "clear" XER[CA]
@@ -883,34 +982,105 @@ Lcopy: ; copy or in-place refresh
883 stdx $i,$t4,$i 982 stdx $i,$t4,$i
884 addi $i,$i,16 983 addi $i,$i,16
885 bdnz- Lcopy 984 bdnz- Lcopy
985___
986$code.=<<___ if ($SIZE_T==4);
987 subf $np,$num,$np ; rewind np
988 addi $j,$j,1 ; restore counter
989 subfc $i,$i,$i ; j=0 and "clear" XER[CA]
990 addi $tp,$sp,`$FRAME+$TRANSFER`
991 addi $np,$np,-4
992 addi $rp,$rp,-4
993 addi $ap,$sp,`$FRAME+$TRANSFER+4`
994 mtctr $j
995
996.align 4
997Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order
998 ldu $t2,16($tp)
999 lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order
1000 lwz $t5,8($np)
1001 lwz $t6,12($np)
1002 lwzu $t7,16($np)
1003 extrdi $t1,$t0,32,0
1004 extrdi $t3,$t2,32,0
1005 subfe $t4,$t4,$t0 ; tp[j]-np[j]
1006 stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order
1007 subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1]
1008 stw $t1,8($ap)
1009 subfe $t6,$t6,$t2 ; tp[j+2]-np[j+2]
1010 stw $t2,12($ap)
1011 subfe $t7,$t7,$t3 ; tp[j+3]-np[j+3]
1012 stwu $t3,16($ap)
1013 stw $t4,4($rp)
1014 stw $t5,8($rp)
1015 stw $t6,12($rp)
1016 stwu $t7,16($rp)
1017 bdnz- Lsub
1018
1019 li $i,0
1020 subfe $ovf,$i,$ovf ; handle upmost overflow bit
1021 addi $tp,$sp,`$FRAME+$TRANSFER+4`
1022 subf $rp,$num,$rp ; rewind rp
1023 and $ap,$tp,$ovf
1024 andc $np,$rp,$ovf
1025 or $ap,$ap,$np ; ap=borrow?tp:rp
1026 addi $tp,$sp,`$FRAME+$TRANSFER`
1027 mtctr $j
1028
1029.align 4
1030Lcopy: ; copy or in-place refresh
1031 lwz $t0,4($ap)
1032 lwz $t1,8($ap)
1033 lwz $t2,12($ap)
1034 lwzu $t3,16($ap)
1035 std $i,8($nap_d) ; zap nap_d
1036 std $i,16($nap_d)
1037 std $i,24($nap_d)
1038 std $i,32($nap_d)
1039 std $i,40($nap_d)
1040 std $i,48($nap_d)
1041 std $i,56($nap_d)
1042 stdu $i,64($nap_d)
1043 stw $t0,4($rp)
1044 stw $t1,8($rp)
1045 stw $t2,12($rp)
1046 stwu $t3,16($rp)
1047 std $i,8($tp) ; zap tp at once
1048 stdu $i,16($tp)
1049 bdnz- Lcopy
1050___
886 1051
887 $POP r14,`2*$SIZE_T`($sp) 1052$code.=<<___;
888 $POP r15,`3*$SIZE_T`($sp) 1053 $POP $i,0($sp)
889 $POP r16,`4*$SIZE_T`($sp)
890 $POP r17,`5*$SIZE_T`($sp)
891 $POP r18,`6*$SIZE_T`($sp)
892 $POP r19,`7*$SIZE_T`($sp)
893 $POP r20,`8*$SIZE_T`($sp)
894 $POP r21,`9*$SIZE_T`($sp)
895 $POP r22,`10*$SIZE_T`($sp)
896 $POP r23,`11*$SIZE_T`($sp)
897 lfd f14,`12*$SIZE_T+0`($sp)
898 lfd f15,`12*$SIZE_T+8`($sp)
899 lfd f16,`12*$SIZE_T+16`($sp)
900 lfd f17,`12*$SIZE_T+24`($sp)
901 lfd f18,`12*$SIZE_T+32`($sp)
902 lfd f19,`12*$SIZE_T+40`($sp)
903 lfd f20,`12*$SIZE_T+48`($sp)
904 lfd f21,`12*$SIZE_T+56`($sp)
905 lfd f22,`12*$SIZE_T+64`($sp)
906 lfd f23,`12*$SIZE_T+72`($sp)
907 lfd f24,`12*$SIZE_T+80`($sp)
908 lfd f25,`12*$SIZE_T+88`($sp)
909 $POP $sp,0($sp)
910 li r3,1 ; signal "handled" 1054 li r3,1 ; signal "handled"
1055 $POP r22,`-12*8-10*$SIZE_T`($i)
1056 $POP r23,`-12*8-9*$SIZE_T`($i)
1057 $POP r24,`-12*8-8*$SIZE_T`($i)
1058 $POP r25,`-12*8-7*$SIZE_T`($i)
1059 $POP r26,`-12*8-6*$SIZE_T`($i)
1060 $POP r27,`-12*8-5*$SIZE_T`($i)
1061 $POP r28,`-12*8-4*$SIZE_T`($i)
1062 $POP r29,`-12*8-3*$SIZE_T`($i)
1063 $POP r30,`-12*8-2*$SIZE_T`($i)
1064 $POP r31,`-12*8-1*$SIZE_T`($i)
1065 lfd f20,`-12*8`($i)
1066 lfd f21,`-11*8`($i)
1067 lfd f22,`-10*8`($i)
1068 lfd f23,`-9*8`($i)
1069 lfd f24,`-8*8`($i)
1070 lfd f25,`-7*8`($i)
1071 lfd f26,`-6*8`($i)
1072 lfd f27,`-5*8`($i)
1073 lfd f28,`-4*8`($i)
1074 lfd f29,`-3*8`($i)
1075 lfd f30,`-2*8`($i)
1076 lfd f31,`-1*8`($i)
1077 mr $sp,$i
911 blr 1078 blr
912 .long 0 1079 .long 0
913.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>" 1080 .byte 0,12,4,0,0x8c,10,6,0
1081 .long 0
1082
1083.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
914___ 1084___
915 1085
916$code =~ s/\`([^\`]*)\`/eval $1/gem; 1086$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/bn/asm/s390x-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/s390x-gf2m.pl
new file mode 100644
index 0000000000..cd9f13eca2
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/s390x-gf2m.pl
@@ -0,0 +1,221 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# May 2011
11#
12# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
13# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
14# the time being... gcc 4.3 appeared to generate poor code, therefore
15# the effort. And indeed, the module delivers 55%-90%(*) improvement
16# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
17# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
18# This is for 64-bit build. In 32-bit "highgprs" case improvement is
19# even higher, for example on z990 it was measured 80%-150%. ECDSA
20# sign is modest 9%-12% faster. Keep in mind that these coefficients
21# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
22# burnt in it...
23#
24# (*) gcc 4.1 was observed to deliver better results than gcc 4.3,
25# so that improvement coefficients can vary from one specific
26# setup to another.
27
28$flavour = shift;
29
30if ($flavour =~ /3[12]/) {
31 $SIZE_T=4;
32 $g="";
33} else {
34 $SIZE_T=8;
35 $g="g";
36}
37
38while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
39open STDOUT,">$output";
40
41$stdframe=16*$SIZE_T+4*8;
42
43$rp="%r2";
44$a1="%r3";
45$a0="%r4";
46$b1="%r5";
47$b0="%r6";
48
49$ra="%r14";
50$sp="%r15";
51
52@T=("%r0","%r1");
53@i=("%r12","%r13");
54
55($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));
56($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;
57
58$code.=<<___;
59.text
60
61.type _mul_1x1,\@function
62.align 16
63_mul_1x1:
64 lgr $a1,$a
65 sllg $a2,$a,1
66 sllg $a4,$a,2
67 sllg $a8,$a,3
68
69 srag $lo,$a1,63 # broadcast 63rd bit
70 nihh $a1,0x1fff
71 srag @i[0],$a2,63 # broadcast 62nd bit
72 nihh $a2,0x3fff
73 srag @i[1],$a4,63 # broadcast 61st bit
74 nihh $a4,0x7fff
75 ngr $lo,$b
76 ngr @i[0],$b
77 ngr @i[1],$b
78
79 lghi @T[0],0
80 lgr $a12,$a1
81 stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0
82 xgr $a12,$a2
83 stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1
84 lgr $a48,$a4
85 stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2
86 xgr $a48,$a8
87 stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2
88 xgr $a1,$a4
89
90 stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4
91 xgr $a2,$a4
92 stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4
93 xgr $a12,$a4
94 stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4
95 xgr $a1,$a48
96 stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4
97 xgr $a2,$a48
98
99 stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8
100 xgr $a12,$a48
101 stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8
102 xgr $a1,$a4
103 stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8
104 xgr $a2,$a4
105 stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8
106
107 xgr $a12,$a4
108 stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8
109 srlg $hi,$lo,1
110 stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8
111 sllg $lo,$lo,63
112 stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8
113 srlg @T[0],@i[0],2
114 stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8
115
116 lghi $mask,`0xf<<3`
117 sllg $a1,@i[0],62
118 sllg @i[0],$b,3
119 srlg @T[1],@i[1],3
120 ngr @i[0],$mask
121 sllg $a2,@i[1],61
122 srlg @i[1],$b,4-3
123 xgr $hi,@T[0]
124 ngr @i[1],$mask
125 xgr $lo,$a1
126 xgr $hi,@T[1]
127 xgr $lo,$a2
128
129 xg $lo,$stdframe(@i[0],$sp)
130 srlg @i[0],$b,8-3
131 ngr @i[0],$mask
132___
133for($n=1;$n<14;$n++) {
134$code.=<<___;
135 lg @T[1],$stdframe(@i[1],$sp)
136 srlg @i[1],$b,`($n+2)*4`-3
137 sllg @T[0],@T[1],`$n*4`
138 ngr @i[1],$mask
139 srlg @T[1],@T[1],`64-$n*4`
140 xgr $lo,@T[0]
141 xgr $hi,@T[1]
142___
143 push(@i,shift(@i)); push(@T,shift(@T));
144}
145$code.=<<___;
146 lg @T[1],$stdframe(@i[1],$sp)
147 sllg @T[0],@T[1],`$n*4`
148 srlg @T[1],@T[1],`64-$n*4`
149 xgr $lo,@T[0]
150 xgr $hi,@T[1]
151
152 lg @T[0],$stdframe(@i[0],$sp)
153 sllg @T[1],@T[0],`($n+1)*4`
154 srlg @T[0],@T[0],`64-($n+1)*4`
155 xgr $lo,@T[1]
156 xgr $hi,@T[0]
157
158 br $ra
159.size _mul_1x1,.-_mul_1x1
160
161.globl bn_GF2m_mul_2x2
162.type bn_GF2m_mul_2x2,\@function
163.align 16
164bn_GF2m_mul_2x2:
165 stm${g} %r3,%r15,3*$SIZE_T($sp)
166
167 lghi %r1,-$stdframe-128
168 la %r0,0($sp)
169 la $sp,0(%r1,$sp) # alloca
170 st${g} %r0,0($sp) # back chain
171___
172if ($SIZE_T==8) {
173my @r=map("%r$_",(6..9));
174$code.=<<___;
175 bras $ra,_mul_1x1 # a1·b1
176 stmg $lo,$hi,16($rp)
177
178 lg $a,`$stdframe+128+4*$SIZE_T`($sp)
179 lg $b,`$stdframe+128+6*$SIZE_T`($sp)
180 bras $ra,_mul_1x1 # a0·b0
181 stmg $lo,$hi,0($rp)
182
183 lg $a,`$stdframe+128+3*$SIZE_T`($sp)
184 lg $b,`$stdframe+128+5*$SIZE_T`($sp)
185 xg $a,`$stdframe+128+4*$SIZE_T`($sp)
186 xg $b,`$stdframe+128+6*$SIZE_T`($sp)
187 bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
188 lmg @r[0],@r[3],0($rp)
189
190 xgr $lo,$hi
191 xgr $hi,@r[1]
192 xgr $lo,@r[0]
193 xgr $hi,@r[2]
194 xgr $lo,@r[3]
195 xgr $hi,@r[3]
196 xgr $lo,$hi
197 stg $hi,16($rp)
198 stg $lo,8($rp)
199___
200} else {
201$code.=<<___;
202 sllg %r3,%r3,32
203 sllg %r5,%r5,32
204 or %r3,%r4
205 or %r5,%r6
206 bras $ra,_mul_1x1
207 rllg $lo,$lo,32
208 rllg $hi,$hi,32
209 stmg $lo,$hi,0($rp)
210___
211}
212$code.=<<___;
213 lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)
214 br $ra
215.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
216.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
217___
218
219$code =~ s/\`([^\`]*)\`/eval($1)/gem;
220print $code;
221close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl b/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl
index f61246f5b6..9fd64e81ee 100644
--- a/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/s390x-mont.pl
@@ -32,6 +32,33 @@
32# Reschedule to minimize/avoid Address Generation Interlock hazard, 32# Reschedule to minimize/avoid Address Generation Interlock hazard,
33# make inner loops counter-based. 33# make inner loops counter-based.
34 34
35# November 2010.
36#
37# Adapt for -m31 build. If kernel supports what's called "highgprs"
38# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
39# instructions and achieve "64-bit" performance even in 31-bit legacy
40# application context. The feature is not specific to any particular
41# processor, as long as it's "z-CPU". Latter implies that the code
42# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
43# is achieved by swapping words after 64-bit loads, follow _dswap-s.
44# On z990 it was measured to perform 2.6-2.2 times better than
45# compiler-generated code, less for longer keys...
46
47$flavour = shift;
48
49if ($flavour =~ /3[12]/) {
50 $SIZE_T=4;
51 $g="";
52} else {
53 $SIZE_T=8;
54 $g="g";
55}
56
57while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
58open STDOUT,">$output";
59
60$stdframe=16*$SIZE_T+4*8;
61
35$mn0="%r0"; 62$mn0="%r0";
36$num="%r1"; 63$num="%r1";
37 64
@@ -60,34 +87,44 @@ $code.=<<___;
60.globl bn_mul_mont 87.globl bn_mul_mont
61.type bn_mul_mont,\@function 88.type bn_mul_mont,\@function
62bn_mul_mont: 89bn_mul_mont:
63 lgf $num,164($sp) # pull $num 90 lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num
64 sla $num,3 # $num to enumerate bytes 91 sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes
65 la $bp,0($num,$bp) 92 la $bp,0($num,$bp)
66 93
67 stg %r2,16($sp) 94 st${g} %r2,2*$SIZE_T($sp)
68 95
69 cghi $num,16 # 96 cghi $num,16 #
70 lghi %r2,0 # 97 lghi %r2,0 #
71 blr %r14 # if($num<16) return 0; 98 blr %r14 # if($num<16) return 0;
99___
100$code.=<<___ if ($flavour =~ /3[12]/);
101 tmll $num,4
102 bnzr %r14 # if ($num&1) return 0;
103___
104$code.=<<___ if ($flavour !~ /3[12]/);
72 cghi $num,96 # 105 cghi $num,96 #
73 bhr %r14 # if($num>96) return 0; 106 bhr %r14 # if($num>96) return 0;
107___
108$code.=<<___;
109 stm${g} %r3,%r15,3*$SIZE_T($sp)
74 110
75 stmg %r3,%r15,24($sp) 111 lghi $rp,-$stdframe-8 # leave room for carry bit
76
77 lghi $rp,-160-8 # leave room for carry bit
78 lcgr $j,$num # -$num 112 lcgr $j,$num # -$num
79 lgr %r0,$sp 113 lgr %r0,$sp
80 la $rp,0($rp,$sp) 114 la $rp,0($rp,$sp)
81 la $sp,0($j,$rp) # alloca 115 la $sp,0($j,$rp) # alloca
82 stg %r0,0($sp) # back chain 116 st${g} %r0,0($sp) # back chain
83 117
84 sra $num,3 # restore $num 118 sra $num,3 # restore $num
85 la $bp,0($j,$bp) # restore $bp 119 la $bp,0($j,$bp) # restore $bp
86 ahi $num,-1 # adjust $num for inner loop 120 ahi $num,-1 # adjust $num for inner loop
87 lg $n0,0($n0) # pull n0 121 lg $n0,0($n0) # pull n0
122 _dswap $n0
88 123
89 lg $bi,0($bp) 124 lg $bi,0($bp)
125 _dswap $bi
90 lg $alo,0($ap) 126 lg $alo,0($ap)
127 _dswap $alo
91 mlgr $ahi,$bi # ap[0]*bp[0] 128 mlgr $ahi,$bi # ap[0]*bp[0]
92 lgr $AHI,$ahi 129 lgr $AHI,$ahi
93 130
@@ -95,6 +132,7 @@ bn_mul_mont:
95 msgr $mn0,$n0 132 msgr $mn0,$n0
96 133
97 lg $nlo,0($np) # 134 lg $nlo,0($np) #
135 _dswap $nlo
98 mlgr $nhi,$mn0 # np[0]*m1 136 mlgr $nhi,$mn0 # np[0]*m1
99 algr $nlo,$alo # +="tp[0]" 137 algr $nlo,$alo # +="tp[0]"
100 lghi $NHI,0 138 lghi $NHI,0
@@ -106,12 +144,14 @@ bn_mul_mont:
106.align 16 144.align 16
107.L1st: 145.L1st:
108 lg $alo,0($j,$ap) 146 lg $alo,0($j,$ap)
147 _dswap $alo
109 mlgr $ahi,$bi # ap[j]*bp[0] 148 mlgr $ahi,$bi # ap[j]*bp[0]
110 algr $alo,$AHI 149 algr $alo,$AHI
111 lghi $AHI,0 150 lghi $AHI,0
112 alcgr $AHI,$ahi 151 alcgr $AHI,$ahi
113 152
114 lg $nlo,0($j,$np) 153 lg $nlo,0($j,$np)
154 _dswap $nlo
115 mlgr $nhi,$mn0 # np[j]*m1 155 mlgr $nhi,$mn0 # np[j]*m1
116 algr $nlo,$NHI 156 algr $nlo,$NHI
117 lghi $NHI,0 157 lghi $NHI,0
@@ -119,22 +159,24 @@ bn_mul_mont:
119 algr $nlo,$alo 159 algr $nlo,$alo
120 alcgr $NHI,$nhi 160 alcgr $NHI,$nhi
121 161
122 stg $nlo,160-8($j,$sp) # tp[j-1]= 162 stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
123 la $j,8($j) # j++ 163 la $j,8($j) # j++
124 brct $count,.L1st 164 brct $count,.L1st
125 165
126 algr $NHI,$AHI 166 algr $NHI,$AHI
127 lghi $AHI,0 167 lghi $AHI,0
128 alcgr $AHI,$AHI # upmost overflow bit 168 alcgr $AHI,$AHI # upmost overflow bit
129 stg $NHI,160-8($j,$sp) 169 stg $NHI,$stdframe-8($j,$sp)
130 stg $AHI,160($j,$sp) 170 stg $AHI,$stdframe($j,$sp)
131 la $bp,8($bp) # bp++ 171 la $bp,8($bp) # bp++
132 172
133.Louter: 173.Louter:
134 lg $bi,0($bp) # bp[i] 174 lg $bi,0($bp) # bp[i]
175 _dswap $bi
135 lg $alo,0($ap) 176 lg $alo,0($ap)
177 _dswap $alo
136 mlgr $ahi,$bi # ap[0]*bp[i] 178 mlgr $ahi,$bi # ap[0]*bp[i]
137 alg $alo,160($sp) # +=tp[0] 179 alg $alo,$stdframe($sp) # +=tp[0]
138 lghi $AHI,0 180 lghi $AHI,0
139 alcgr $AHI,$ahi 181 alcgr $AHI,$ahi
140 182
@@ -142,6 +184,7 @@ bn_mul_mont:
142 msgr $mn0,$n0 # tp[0]*n0 184 msgr $mn0,$n0 # tp[0]*n0
143 185
144 lg $nlo,0($np) # np[0] 186 lg $nlo,0($np) # np[0]
187 _dswap $nlo
145 mlgr $nhi,$mn0 # np[0]*m1 188 mlgr $nhi,$mn0 # np[0]*m1
146 algr $nlo,$alo # +="tp[0]" 189 algr $nlo,$alo # +="tp[0]"
147 lghi $NHI,0 190 lghi $NHI,0
@@ -153,14 +196,16 @@ bn_mul_mont:
153.align 16 196.align 16
154.Linner: 197.Linner:
155 lg $alo,0($j,$ap) 198 lg $alo,0($j,$ap)
199 _dswap $alo
156 mlgr $ahi,$bi # ap[j]*bp[i] 200 mlgr $ahi,$bi # ap[j]*bp[i]
157 algr $alo,$AHI 201 algr $alo,$AHI
158 lghi $AHI,0 202 lghi $AHI,0
159 alcgr $ahi,$AHI 203 alcgr $ahi,$AHI
160 alg $alo,160($j,$sp)# +=tp[j] 204 alg $alo,$stdframe($j,$sp)# +=tp[j]
161 alcgr $AHI,$ahi 205 alcgr $AHI,$ahi
162 206
163 lg $nlo,0($j,$np) 207 lg $nlo,0($j,$np)
208 _dswap $nlo
164 mlgr $nhi,$mn0 # np[j]*m1 209 mlgr $nhi,$mn0 # np[j]*m1
165 algr $nlo,$NHI 210 algr $nlo,$NHI
166 lghi $NHI,0 211 lghi $NHI,0
@@ -168,31 +213,33 @@ bn_mul_mont:
168 algr $nlo,$alo # +="tp[j]" 213 algr $nlo,$alo # +="tp[j]"
169 alcgr $NHI,$nhi 214 alcgr $NHI,$nhi
170 215
171 stg $nlo,160-8($j,$sp) # tp[j-1]= 216 stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
172 la $j,8($j) # j++ 217 la $j,8($j) # j++
173 brct $count,.Linner 218 brct $count,.Linner
174 219
175 algr $NHI,$AHI 220 algr $NHI,$AHI
176 lghi $AHI,0 221 lghi $AHI,0
177 alcgr $AHI,$AHI 222 alcgr $AHI,$AHI
178 alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit 223 alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
179 lghi $ahi,0 224 lghi $ahi,0
180 alcgr $AHI,$ahi # new upmost overflow bit 225 alcgr $AHI,$ahi # new upmost overflow bit
181 stg $NHI,160-8($j,$sp) 226 stg $NHI,$stdframe-8($j,$sp)
182 stg $AHI,160($j,$sp) 227 stg $AHI,$stdframe($j,$sp)
183 228
184 la $bp,8($bp) # bp++ 229 la $bp,8($bp) # bp++
185 clg $bp,160+8+32($j,$sp) # compare to &bp[num] 230 cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num]
186 jne .Louter 231 jne .Louter
187 232
188 lg $rp,160+8+16($j,$sp) # reincarnate rp 233 l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp
189 la $ap,160($sp) 234 la $ap,$stdframe($sp)
190 ahi $num,1 # restore $num, incidentally clears "borrow" 235 ahi $num,1 # restore $num, incidentally clears "borrow"
191 236
192 la $j,0(%r0) 237 la $j,0(%r0)
193 lr $count,$num 238 lr $count,$num
194.Lsub: lg $alo,0($j,$ap) 239.Lsub: lg $alo,0($j,$ap)
195 slbg $alo,0($j,$np) 240 lg $nlo,0($j,$np)
241 _dswap $nlo
242 slbgr $alo,$nlo
196 stg $alo,0($j,$rp) 243 stg $alo,0($j,$rp)
197 la $j,8($j) 244 la $j,8($j)
198 brct $count,.Lsub 245 brct $count,.Lsub
@@ -207,19 +254,24 @@ bn_mul_mont:
207 254
208 la $j,0(%r0) 255 la $j,0(%r0)
209 lgr $count,$num 256 lgr $count,$num
210.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh 257.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
211 stg $j,160($j,$sp) # zap tp 258 _dswap $alo
259 stg $j,$stdframe($j,$sp) # zap tp
212 stg $alo,0($j,$rp) 260 stg $alo,0($j,$rp)
213 la $j,8($j) 261 la $j,8($j)
214 brct $count,.Lcopy 262 brct $count,.Lcopy
215 263
216 la %r1,160+8+48($j,$sp) 264 la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
217 lmg %r6,%r15,0(%r1) 265 lm${g} %r6,%r15,0(%r1)
218 lghi %r2,1 # signal "processed" 266 lghi %r2,1 # signal "processed"
219 br %r14 267 br %r14
220.size bn_mul_mont,.-bn_mul_mont 268.size bn_mul_mont,.-bn_mul_mont
221.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>" 269.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
222___ 270___
223 271
224print $code; 272foreach (split("\n",$code)) {
273 s/\`([^\`]*)\`/eval $1/ge;
274 s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
275 print $_,"\n";
276}
225close STDOUT; 277close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/x86-gf2m.pl
new file mode 100644
index 0000000000..808a1e5969
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/x86-gf2m.pl
@@ -0,0 +1,313 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# May 2011
11#
12# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
13# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
14# the time being... Except that it has three code paths: pure integer
15# code suitable for any x86 CPU, MMX code suitable for PIII and later
16# and PCLMULQDQ suitable for Westmere and later. Improvement varies
17# from one benchmark and µ-arch to another. Below are interval values
18# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
19# code:
20#
21# PIII 16%-30%
22# P4 12%-12%
23# Opteron 18%-40%
24# Core2 19%-44%
25# Atom 38%-64%
26# Westmere 53%-121%(PCLMULQDQ)/20%-32%(MMX)
27# Sandy Bridge 72%-127%(PCLMULQDQ)/27%-23%(MMX)
28#
29# Note that above improvement coefficients are not coefficients for
30# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result
31# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark
32# is more and more dominated by other subroutines, most notably by
33# BN_GF2m_mod[_mul]_arr...
34
35$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
36push(@INC,"${dir}","${dir}../../perlasm");
37require "x86asm.pl";
38
39&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386");
40
41$sse2=0;
42for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
43
44&external_label("OPENSSL_ia32cap_P") if ($sse2);
45
46$a="eax";
47$b="ebx";
48($a1,$a2,$a4)=("ecx","edx","ebp");
49
50$R="mm0";
51@T=("mm1","mm2");
52($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5");
53@i=("esi","edi");
54
55 if (!$x86only) {
56&function_begin_B("_mul_1x1_mmx");
57 &sub ("esp",32+4);
58 &mov ($a1,$a);
59 &lea ($a2,&DWP(0,$a,$a));
60 &and ($a1,0x3fffffff);
61 &lea ($a4,&DWP(0,$a2,$a2));
62 &mov (&DWP(0*4,"esp"),0);
63 &and ($a2,0x7fffffff);
64 &movd ($A,$a);
65 &movd ($B,$b);
66 &mov (&DWP(1*4,"esp"),$a1); # a1
67 &xor ($a1,$a2); # a1^a2
68 &pxor ($B31,$B31);
69 &pxor ($B30,$B30);
70 &mov (&DWP(2*4,"esp"),$a2); # a2
71 &xor ($a2,$a4); # a2^a4
72 &mov (&DWP(3*4,"esp"),$a1); # a1^a2
73 &pcmpgtd($B31,$A); # broadcast 31st bit
74 &paddd ($A,$A); # $A<<=1
75 &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
76 &mov (&DWP(4*4,"esp"),$a4); # a4
77 &xor ($a4,$a2); # a2=a4^a2^a4
78 &pand ($B31,$B);
79 &pcmpgtd($B30,$A); # broadcast 30th bit
80 &mov (&DWP(5*4,"esp"),$a1); # a1^a4
81 &xor ($a4,$a1); # a1^a2^a4
82 &psllq ($B31,31);
83 &pand ($B30,$B);
84 &mov (&DWP(6*4,"esp"),$a2); # a2^a4
85 &mov (@i[0],0x7);
86 &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
87 &mov ($a4,@i[0]);
88 &and (@i[0],$b);
89 &shr ($b,3);
90 &mov (@i[1],$a4);
91 &psllq ($B30,30);
92 &and (@i[1],$b);
93 &shr ($b,3);
94 &movd ($R,&DWP(0,"esp",@i[0],4));
95 &mov (@i[0],$a4);
96 &and (@i[0],$b);
97 &shr ($b,3);
98 for($n=1;$n<9;$n++) {
99 &movd (@T[1],&DWP(0,"esp",@i[1],4));
100 &mov (@i[1],$a4);
101 &psllq (@T[1],3*$n);
102 &and (@i[1],$b);
103 &shr ($b,3);
104 &pxor ($R,@T[1]);
105
106 push(@i,shift(@i)); push(@T,shift(@T));
107 }
108 &movd (@T[1],&DWP(0,"esp",@i[1],4));
109 &pxor ($R,$B30);
110 &psllq (@T[1],3*$n++);
111 &pxor ($R,@T[1]);
112
113 &movd (@T[0],&DWP(0,"esp",@i[0],4));
114 &pxor ($R,$B31);
115 &psllq (@T[0],3*$n);
116 &add ("esp",32+4);
117 &pxor ($R,@T[0]);
118 &ret ();
119&function_end_B("_mul_1x1_mmx");
120 }
121
122($lo,$hi)=("eax","edx");
123@T=("ecx","ebp");
124
125&function_begin_B("_mul_1x1_ialu");
126 &sub ("esp",32+4);
127 &mov ($a1,$a);
128 &lea ($a2,&DWP(0,$a,$a));
129 &lea ($a4,&DWP(0,"",$a,4));
130 &and ($a1,0x3fffffff);
131 &lea (@i[1],&DWP(0,$lo,$lo));
132 &sar ($lo,31); # broadcast 31st bit
133 &mov (&DWP(0*4,"esp"),0);
134 &and ($a2,0x7fffffff);
135 &mov (&DWP(1*4,"esp"),$a1); # a1
136 &xor ($a1,$a2); # a1^a2
137 &mov (&DWP(2*4,"esp"),$a2); # a2
138 &xor ($a2,$a4); # a2^a4
139 &mov (&DWP(3*4,"esp"),$a1); # a1^a2
140 &xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
141 &mov (&DWP(4*4,"esp"),$a4); # a4
142 &xor ($a4,$a2); # a2=a4^a2^a4
143 &mov (&DWP(5*4,"esp"),$a1); # a1^a4
144 &xor ($a4,$a1); # a1^a2^a4
145 &sar (@i[1],31); # broardcast 30th bit
146 &and ($lo,$b);
147 &mov (&DWP(6*4,"esp"),$a2); # a2^a4
148 &and (@i[1],$b);
149 &mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
150 &mov ($hi,$lo);
151 &shl ($lo,31);
152 &mov (@T[0],@i[1]);
153 &shr ($hi,1);
154
155 &mov (@i[0],0x7);
156 &shl (@i[1],30);
157 &and (@i[0],$b);
158 &shr (@T[0],2);
159 &xor ($lo,@i[1]);
160
161 &shr ($b,3);
162 &mov (@i[1],0x7); # 5-byte instruction!?
163 &and (@i[1],$b);
164 &shr ($b,3);
165 &xor ($hi,@T[0]);
166 &xor ($lo,&DWP(0,"esp",@i[0],4));
167 &mov (@i[0],0x7);
168 &and (@i[0],$b);
169 &shr ($b,3);
170 for($n=1;$n<9;$n++) {
171 &mov (@T[1],&DWP(0,"esp",@i[1],4));
172 &mov (@i[1],0x7);
173 &mov (@T[0],@T[1]);
174 &shl (@T[1],3*$n);
175 &and (@i[1],$b);
176 &shr (@T[0],32-3*$n);
177 &xor ($lo,@T[1]);
178 &shr ($b,3);
179 &xor ($hi,@T[0]);
180
181 push(@i,shift(@i)); push(@T,shift(@T));
182 }
183 &mov (@T[1],&DWP(0,"esp",@i[1],4));
184 &mov (@T[0],@T[1]);
185 &shl (@T[1],3*$n);
186 &mov (@i[1],&DWP(0,"esp",@i[0],4));
187 &shr (@T[0],32-3*$n); $n++;
188 &mov (@i[0],@i[1]);
189 &xor ($lo,@T[1]);
190 &shl (@i[1],3*$n);
191 &xor ($hi,@T[0]);
192 &shr (@i[0],32-3*$n);
193 &xor ($lo,@i[1]);
194 &xor ($hi,@i[0]);
195
196 &add ("esp",32+4);
197 &ret ();
198&function_end_B("_mul_1x1_ialu");
199
200# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
201&function_begin_B("bn_GF2m_mul_2x2");
202if (!$x86only) {
203 &picmeup("edx","OPENSSL_ia32cap_P");
204 &mov ("eax",&DWP(0,"edx"));
205 &mov ("edx",&DWP(4,"edx"));
206 &test ("eax",1<<23); # check MMX bit
207 &jz (&label("ialu"));
208if ($sse2) {
209 &test ("eax",1<<24); # check FXSR bit
210 &jz (&label("mmx"));
211 &test ("edx",1<<1); # check PCLMULQDQ bit
212 &jz (&label("mmx"));
213
214 &movups ("xmm0",&QWP(8,"esp"));
215 &shufps ("xmm0","xmm0",0b10110001);
216 &pclmulqdq ("xmm0","xmm0",1);
217 &mov ("eax",&DWP(4,"esp"));
218 &movups (&QWP(0,"eax"),"xmm0");
219 &ret ();
220
221&set_label("mmx",16);
222}
223 &push ("ebp");
224 &push ("ebx");
225 &push ("esi");
226 &push ("edi");
227 &mov ($a,&wparam(1));
228 &mov ($b,&wparam(3));
229 &call ("_mul_1x1_mmx"); # a1·b1
230 &movq ("mm7",$R);
231
232 &mov ($a,&wparam(2));
233 &mov ($b,&wparam(4));
234 &call ("_mul_1x1_mmx"); # a0·b0
235 &movq ("mm6",$R);
236
237 &mov ($a,&wparam(1));
238 &mov ($b,&wparam(3));
239 &xor ($a,&wparam(2));
240 &xor ($b,&wparam(4));
241 &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
242 &pxor ($R,"mm7");
243 &mov ($a,&wparam(0));
244 &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
245
246 &movq ($A,$R);
247 &psllq ($R,32);
248 &pop ("edi");
249 &psrlq ($A,32);
250 &pop ("esi");
251 &pxor ($R,"mm6");
252 &pop ("ebx");
253 &pxor ($A,"mm7");
254 &movq (&QWP(0,$a),$R);
255 &pop ("ebp");
256 &movq (&QWP(8,$a),$A);
257 &emms ();
258 &ret ();
259&set_label("ialu",16);
260}
261 &push ("ebp");
262 &push ("ebx");
263 &push ("esi");
264 &push ("edi");
265 &stack_push(4+1);
266
267 &mov ($a,&wparam(1));
268 &mov ($b,&wparam(3));
269 &call ("_mul_1x1_ialu"); # a1·b1
270 &mov (&DWP(8,"esp"),$lo);
271 &mov (&DWP(12,"esp"),$hi);
272
273 &mov ($a,&wparam(2));
274 &mov ($b,&wparam(4));
275 &call ("_mul_1x1_ialu"); # a0·b0
276 &mov (&DWP(0,"esp"),$lo);
277 &mov (&DWP(4,"esp"),$hi);
278
279 &mov ($a,&wparam(1));
280 &mov ($b,&wparam(3));
281 &xor ($a,&wparam(2));
282 &xor ($b,&wparam(4));
283 &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
284
285 &mov ("ebp",&wparam(0));
286 @r=("ebx","ecx","edi","esi");
287 &mov (@r[0],&DWP(0,"esp"));
288 &mov (@r[1],&DWP(4,"esp"));
289 &mov (@r[2],&DWP(8,"esp"));
290 &mov (@r[3],&DWP(12,"esp"));
291
292 &xor ($lo,$hi);
293 &xor ($hi,@r[1]);
294 &xor ($lo,@r[0]);
295 &mov (&DWP(0,"ebp"),@r[0]);
296 &xor ($hi,@r[2]);
297 &mov (&DWP(12,"ebp"),@r[3]);
298 &xor ($lo,@r[3]);
299 &stack_pop(4+1);
300 &xor ($hi,@r[3]);
301 &pop ("edi");
302 &xor ($lo,$hi);
303 &pop ("esi");
304 &mov (&DWP(8,"ebp"),$hi);
305 &pop ("ebx");
306 &mov (&DWP(4,"ebp"),$lo);
307 &pop ("ebp");
308 &ret ();
309&function_end_B("bn_GF2m_mul_2x2");
310
311&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
312
313&asm_finish();
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl
new file mode 100644
index 0000000000..1658acbbdd
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gf2m.pl
@@ -0,0 +1,389 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# May 2011
11#
12# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
13# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
14# the time being... Except that it has two code paths: code suitable
15# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
16# later. Improvement varies from one benchmark and µ-arch to another.
17# Vanilla code path is at most 20% faster than compiler-generated code
18# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
19# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
20# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
21# all CPU time is burnt in it...
22
23$flavour = shift;
24$output = shift;
25if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
26
27$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
31( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
32die "can't locate x86_64-xlate.pl";
33
34open STDOUT,"| $^X $xlate $flavour $output";
35
36($lo,$hi)=("%rax","%rdx"); $a=$lo;
37($i0,$i1)=("%rsi","%rdi");
38($t0,$t1)=("%rbx","%rcx");
39($b,$mask)=("%rbp","%r8");
40($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15));
41($R,$Tx)=("%xmm0","%xmm1");
42
43$code.=<<___;
44.text
45
46.type _mul_1x1,\@abi-omnipotent
47.align 16
48_mul_1x1:
49 sub \$128+8,%rsp
50 mov \$-1,$a1
51 lea ($a,$a),$i0
52 shr \$3,$a1
53 lea (,$a,4),$i1
54 and $a,$a1 # a1=a&0x1fffffffffffffff
55 lea (,$a,8),$a8
56 sar \$63,$a # broadcast 63rd bit
57 lea ($a1,$a1),$a2
58 sar \$63,$i0 # broadcast 62nd bit
59 lea (,$a1,4),$a4
60 and $b,$a
61 sar \$63,$i1 # boardcast 61st bit
62 mov $a,$hi # $a is $lo
63 shl \$63,$lo
64 and $b,$i0
65 shr \$1,$hi
66 mov $i0,$t1
67 shl \$62,$i0
68 and $b,$i1
69 shr \$2,$t1
70 xor $i0,$lo
71 mov $i1,$t0
72 shl \$61,$i1
73 xor $t1,$hi
74 shr \$3,$t0
75 xor $i1,$lo
76 xor $t0,$hi
77
78 mov $a1,$a12
79 movq \$0,0(%rsp) # tab[0]=0
80 xor $a2,$a12 # a1^a2
81 mov $a1,8(%rsp) # tab[1]=a1
82 mov $a4,$a48
83 mov $a2,16(%rsp) # tab[2]=a2
84 xor $a8,$a48 # a4^a8
85 mov $a12,24(%rsp) # tab[3]=a1^a2
86
87 xor $a4,$a1
88 mov $a4,32(%rsp) # tab[4]=a4
89 xor $a4,$a2
90 mov $a1,40(%rsp) # tab[5]=a1^a4
91 xor $a4,$a12
92 mov $a2,48(%rsp) # tab[6]=a2^a4
93 xor $a48,$a1 # a1^a4^a4^a8=a1^a8
94 mov $a12,56(%rsp) # tab[7]=a1^a2^a4
95 xor $a48,$a2 # a2^a4^a4^a8=a1^a8
96
97 mov $a8,64(%rsp) # tab[8]=a8
98 xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8
99 mov $a1,72(%rsp) # tab[9]=a1^a8
100 xor $a4,$a1 # a1^a8^a4
101 mov $a2,80(%rsp) # tab[10]=a2^a8
102 xor $a4,$a2 # a2^a8^a4
103 mov $a12,88(%rsp) # tab[11]=a1^a2^a8
104
105 xor $a4,$a12 # a1^a2^a8^a4
106 mov $a48,96(%rsp) # tab[12]=a4^a8
107 mov $mask,$i0
108 mov $a1,104(%rsp) # tab[13]=a1^a4^a8
109 and $b,$i0
110 mov $a2,112(%rsp) # tab[14]=a2^a4^a8
111 shr \$4,$b
112 mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8
113 mov $mask,$i1
114 and $b,$i1
115 shr \$4,$b
116
117 movq (%rsp,$i0,8),$R # half of calculations is done in SSE2
118 mov $mask,$i0
119 and $b,$i0
120 shr \$4,$b
121___
122 for ($n=1;$n<8;$n++) {
123 $code.=<<___;
124 mov (%rsp,$i1,8),$t1
125 mov $mask,$i1
126 mov $t1,$t0
127 shl \$`8*$n-4`,$t1
128 and $b,$i1
129 movq (%rsp,$i0,8),$Tx
130 shr \$`64-(8*$n-4)`,$t0
131 xor $t1,$lo
132 pslldq \$$n,$Tx
133 mov $mask,$i0
134 shr \$4,$b
135 xor $t0,$hi
136 and $b,$i0
137 shr \$4,$b
138 pxor $Tx,$R
139___
140 }
141$code.=<<___;
142 mov (%rsp,$i1,8),$t1
143 mov $t1,$t0
144 shl \$`8*$n-4`,$t1
145 movq $R,$i0
146 shr \$`64-(8*$n-4)`,$t0
147 xor $t1,$lo
148 psrldq \$8,$R
149 xor $t0,$hi
150 movq $R,$i1
151 xor $i0,$lo
152 xor $i1,$hi
153
154 add \$128+8,%rsp
155 ret
156.Lend_mul_1x1:
157.size _mul_1x1,.-_mul_1x1
158___
159
160($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order
161 ("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order
162
163$code.=<<___;
164.extern OPENSSL_ia32cap_P
165.globl bn_GF2m_mul_2x2
166.type bn_GF2m_mul_2x2,\@abi-omnipotent
167.align 16
168bn_GF2m_mul_2x2:
169 mov OPENSSL_ia32cap_P(%rip),%rax
170 bt \$33,%rax
171 jnc .Lvanilla_mul_2x2
172
173 movq $a1,%xmm0
174 movq $b1,%xmm1
175 movq $a0,%xmm2
176___
177$code.=<<___ if ($win64);
178 movq 40(%rsp),%xmm3
179___
180$code.=<<___ if (!$win64);
181 movq $b0,%xmm3
182___
183$code.=<<___;
184 movdqa %xmm0,%xmm4
185 movdqa %xmm1,%xmm5
186 pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
187 pxor %xmm2,%xmm4
188 pxor %xmm3,%xmm5
189 pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
190 pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
191 xorps %xmm0,%xmm4
192 xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
193 movdqa %xmm4,%xmm5
194 pslldq \$8,%xmm4
195 psrldq \$8,%xmm5
196 pxor %xmm4,%xmm2
197 pxor %xmm5,%xmm0
198 movdqu %xmm2,0($rp)
199 movdqu %xmm0,16($rp)
200 ret
201
202.align 16
203.Lvanilla_mul_2x2:
204 lea -8*17(%rsp),%rsp
205___
206$code.=<<___ if ($win64);
207 mov `8*17+40`(%rsp),$b0
208 mov %rdi,8*15(%rsp)
209 mov %rsi,8*16(%rsp)
210___
211$code.=<<___;
212 mov %r14,8*10(%rsp)
213 mov %r13,8*11(%rsp)
214 mov %r12,8*12(%rsp)
215 mov %rbp,8*13(%rsp)
216 mov %rbx,8*14(%rsp)
217.Lbody_mul_2x2:
218 mov $rp,32(%rsp) # save the arguments
219 mov $a1,40(%rsp)
220 mov $a0,48(%rsp)
221 mov $b1,56(%rsp)
222 mov $b0,64(%rsp)
223
224 mov \$0xf,$mask
225 mov $a1,$a
226 mov $b1,$b
227 call _mul_1x1 # a1·b1
228 mov $lo,16(%rsp)
229 mov $hi,24(%rsp)
230
231 mov 48(%rsp),$a
232 mov 64(%rsp),$b
233 call _mul_1x1 # a0·b0
234 mov $lo,0(%rsp)
235 mov $hi,8(%rsp)
236
237 mov 40(%rsp),$a
238 mov 56(%rsp),$b
239 xor 48(%rsp),$a
240 xor 64(%rsp),$b
241 call _mul_1x1 # (a0+a1)·(b0+b1)
242___
243 @r=("%rbx","%rcx","%rdi","%rsi");
244$code.=<<___;
245 mov 0(%rsp),@r[0]
246 mov 8(%rsp),@r[1]
247 mov 16(%rsp),@r[2]
248 mov 24(%rsp),@r[3]
249 mov 32(%rsp),%rbp
250
251 xor $hi,$lo
252 xor @r[1],$hi
253 xor @r[0],$lo
254 mov @r[0],0(%rbp)
255 xor @r[2],$hi
256 mov @r[3],24(%rbp)
257 xor @r[3],$lo
258 xor @r[3],$hi
259 xor $hi,$lo
260 mov $hi,16(%rbp)
261 mov $lo,8(%rbp)
262
263 mov 8*10(%rsp),%r14
264 mov 8*11(%rsp),%r13
265 mov 8*12(%rsp),%r12
266 mov 8*13(%rsp),%rbp
267 mov 8*14(%rsp),%rbx
268___
269$code.=<<___ if ($win64);
270 mov 8*15(%rsp),%rdi
271 mov 8*16(%rsp),%rsi
272___
273$code.=<<___;
274 lea 8*17(%rsp),%rsp
275 ret
276.Lend_mul_2x2:
277.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
278.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
279.align 16
280___
281
282# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
283# CONTEXT *context,DISPATCHER_CONTEXT *disp)
284if ($win64) {
285$rec="%rcx";
286$frame="%rdx";
287$context="%r8";
288$disp="%r9";
289
290$code.=<<___;
291.extern __imp_RtlVirtualUnwind
292
293.type se_handler,\@abi-omnipotent
294.align 16
295se_handler:
296 push %rsi
297 push %rdi
298 push %rbx
299 push %rbp
300 push %r12
301 push %r13
302 push %r14
303 push %r15
304 pushfq
305 sub \$64,%rsp
306
307 mov 152($context),%rax # pull context->Rsp
308 mov 248($context),%rbx # pull context->Rip
309
310 lea .Lbody_mul_2x2(%rip),%r10
311 cmp %r10,%rbx # context->Rip<"prologue" label
312 jb .Lin_prologue
313
314 mov 8*10(%rax),%r14 # mimic epilogue
315 mov 8*11(%rax),%r13
316 mov 8*12(%rax),%r12
317 mov 8*13(%rax),%rbp
318 mov 8*14(%rax),%rbx
319 mov 8*15(%rax),%rdi
320 mov 8*16(%rax),%rsi
321
322 mov %rbx,144($context) # restore context->Rbx
323 mov %rbp,160($context) # restore context->Rbp
324 mov %rsi,168($context) # restore context->Rsi
325 mov %rdi,176($context) # restore context->Rdi
326 mov %r12,216($context) # restore context->R12
327 mov %r13,224($context) # restore context->R13
328 mov %r14,232($context) # restore context->R14
329
330.Lin_prologue:
331 lea 8*17(%rax),%rax
332 mov %rax,152($context) # restore context->Rsp
333
334 mov 40($disp),%rdi # disp->ContextRecord
335 mov $context,%rsi # context
336 mov \$154,%ecx # sizeof(CONTEXT)
337 .long 0xa548f3fc # cld; rep movsq
338
339 mov $disp,%rsi
340 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
341 mov 8(%rsi),%rdx # arg2, disp->ImageBase
342 mov 0(%rsi),%r8 # arg3, disp->ControlPc
343 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
344 mov 40(%rsi),%r10 # disp->ContextRecord
345 lea 56(%rsi),%r11 # &disp->HandlerData
346 lea 24(%rsi),%r12 # &disp->EstablisherFrame
347 mov %r10,32(%rsp) # arg5
348 mov %r11,40(%rsp) # arg6
349 mov %r12,48(%rsp) # arg7
350 mov %rcx,56(%rsp) # arg8, (NULL)
351 call *__imp_RtlVirtualUnwind(%rip)
352
353 mov \$1,%eax # ExceptionContinueSearch
354 add \$64,%rsp
355 popfq
356 pop %r15
357 pop %r14
358 pop %r13
359 pop %r12
360 pop %rbp
361 pop %rbx
362 pop %rdi
363 pop %rsi
364 ret
365.size se_handler,.-se_handler
366
367.section .pdata
368.align 4
369 .rva _mul_1x1
370 .rva .Lend_mul_1x1
371 .rva .LSEH_info_1x1
372
373 .rva .Lvanilla_mul_2x2
374 .rva .Lend_mul_2x2
375 .rva .LSEH_info_2x2
376.section .xdata
377.align 8
378.LSEH_info_1x1:
379 .byte 0x01,0x07,0x02,0x00
380 .byte 0x07,0x01,0x11,0x00 # sub rsp,128+8
381.LSEH_info_2x2:
382 .byte 9,0,0,0
383 .rva se_handler
384___
385}
386
387$code =~ s/\`([^\`]*)\`/eval($1)/gem;
388print $code;
389close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont.pl
index 3b7a6f243f..5d79b35e1c 100755
--- a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont.pl
@@ -1,7 +1,7 @@
1#!/usr/bin/env perl 1#!/usr/bin/env perl
2 2
3# ==================================================================== 3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and 5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further 6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/. 7# details see http://www.openssl.org/~appro/cryptogams/.
@@ -15,6 +15,20 @@
15# respectful 50%. It remains to be seen if loop unrolling and 15# respectful 50%. It remains to be seen if loop unrolling and
16# dedicated squaring routine can provide further improvement... 16# dedicated squaring routine can provide further improvement...
17 17
18# July 2011.
19#
20# Add dedicated squaring procedure. Performance improvement varies
21# from platform to platform, but in average it's ~5%/15%/25%/33%
22# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively.
23
24# August 2011.
25#
26# Unroll and modulo-schedule inner loops in such manner that they
27# are "fallen through" for input lengths of 8, which is critical for
28# 1024-bit RSA *sign*. Average performance improvement in comparison
29# to *initial* version of this module from 2005 is ~0%/30%/40%/45%
30# for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively.
31
18$flavour = shift; 32$flavour = shift;
19$output = shift; 33$output = shift;
20if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -37,7 +51,6 @@ $n0="%r8"; # const BN_ULONG *n0,
37$num="%r9"; # int num); 51$num="%r9"; # int num);
38$lo0="%r10"; 52$lo0="%r10";
39$hi0="%r11"; 53$hi0="%r11";
40$bp="%r12"; # reassign $bp
41$hi1="%r13"; 54$hi1="%r13";
42$i="%r14"; 55$i="%r14";
43$j="%r15"; 56$j="%r15";
@@ -51,6 +64,16 @@ $code=<<___;
51.type bn_mul_mont,\@function,6 64.type bn_mul_mont,\@function,6
52.align 16 65.align 16
53bn_mul_mont: 66bn_mul_mont:
67 test \$3,${num}d
68 jnz .Lmul_enter
69 cmp \$8,${num}d
70 jb .Lmul_enter
71 cmp $ap,$bp
72 jne .Lmul4x_enter
73 jmp .Lsqr4x_enter
74
75.align 16
76.Lmul_enter:
54 push %rbx 77 push %rbx
55 push %rbp 78 push %rbp
56 push %r12 79 push %r12
@@ -66,48 +89,66 @@ bn_mul_mont:
66 and \$-1024,%rsp # minimize TLB usage 89 and \$-1024,%rsp # minimize TLB usage
67 90
68 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp 91 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
69.Lprologue: 92.Lmul_body:
70 mov %rdx,$bp # $bp reassigned, remember? 93 mov $bp,%r12 # reassign $bp
71 94___
95 $bp="%r12";
96$code.=<<___;
72 mov ($n0),$n0 # pull n0[0] value 97 mov ($n0),$n0 # pull n0[0] value
98 mov ($bp),$m0 # m0=bp[0]
99 mov ($ap),%rax
73 100
74 xor $i,$i # i=0 101 xor $i,$i # i=0
75 xor $j,$j # j=0 102 xor $j,$j # j=0
76 103
77 mov ($bp),$m0 # m0=bp[0] 104 mov $n0,$m1
78 mov ($ap),%rax
79 mulq $m0 # ap[0]*bp[0] 105 mulq $m0 # ap[0]*bp[0]
80 mov %rax,$lo0 106 mov %rax,$lo0
81 mov %rdx,$hi0 107 mov ($np),%rax
82 108
83 imulq $n0,%rax # "tp[0]"*n0 109 imulq $lo0,$m1 # "tp[0]"*n0
84 mov %rax,$m1 110 mov %rdx,$hi0
85 111
86 mulq ($np) # np[0]*m1 112 mulq $m1 # np[0]*m1
87 add $lo0,%rax # discarded 113 add %rax,$lo0 # discarded
114 mov 8($ap),%rax
88 adc \$0,%rdx 115 adc \$0,%rdx
89 mov %rdx,$hi1 116 mov %rdx,$hi1
90 117
91 lea 1($j),$j # j++ 118 lea 1($j),$j # j++
119 jmp .L1st_enter
120
121.align 16
92.L1st: 122.L1st:
123 add %rax,$hi1
93 mov ($ap,$j,8),%rax 124 mov ($ap,$j,8),%rax
94 mulq $m0 # ap[j]*bp[0]
95 add $hi0,%rax
96 adc \$0,%rdx 125 adc \$0,%rdx
97 mov %rax,$lo0 126 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
127 mov $lo0,$hi0
128 adc \$0,%rdx
129 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
130 mov %rdx,$hi1
131
132.L1st_enter:
133 mulq $m0 # ap[j]*bp[0]
134 add %rax,$hi0
98 mov ($np,$j,8),%rax 135 mov ($np,$j,8),%rax
99 mov %rdx,$hi0 136 adc \$0,%rdx
137 lea 1($j),$j # j++
138 mov %rdx,$lo0
100 139
101 mulq $m1 # np[j]*m1 140 mulq $m1 # np[j]*m1
102 add $hi1,%rax 141 cmp $num,$j
103 lea 1($j),$j # j++ 142 jne .L1st
143
144 add %rax,$hi1
145 mov ($ap),%rax # ap[0]
104 adc \$0,%rdx 146 adc \$0,%rdx
105 add $lo0,%rax # np[j]*m1+ap[j]*bp[0] 147 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
106 adc \$0,%rdx 148 adc \$0,%rdx
107 mov %rax,-16(%rsp,$j,8) # tp[j-1] 149 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
108 cmp $num,$j
109 mov %rdx,$hi1 150 mov %rdx,$hi1
110 jl .L1st 151 mov $lo0,$hi0
111 152
112 xor %rdx,%rdx 153 xor %rdx,%rdx
113 add $hi0,$hi1 154 add $hi0,$hi1
@@ -116,50 +157,64 @@ bn_mul_mont:
116 mov %rdx,(%rsp,$num,8) # store upmost overflow bit 157 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
117 158
118 lea 1($i),$i # i++ 159 lea 1($i),$i # i++
119.align 4 160 jmp .Louter
161.align 16
120.Louter: 162.Louter:
121 xor $j,$j # j=0
122
123 mov ($bp,$i,8),$m0 # m0=bp[i] 163 mov ($bp,$i,8),$m0 # m0=bp[i]
124 mov ($ap),%rax # ap[0] 164 xor $j,$j # j=0
165 mov $n0,$m1
166 mov (%rsp),$lo0
125 mulq $m0 # ap[0]*bp[i] 167 mulq $m0 # ap[0]*bp[i]
126 add (%rsp),%rax # ap[0]*bp[i]+tp[0] 168 add %rax,$lo0 # ap[0]*bp[i]+tp[0]
169 mov ($np),%rax
127 adc \$0,%rdx 170 adc \$0,%rdx
128 mov %rax,$lo0
129 mov %rdx,$hi0
130 171
131 imulq $n0,%rax # tp[0]*n0 172 imulq $lo0,$m1 # tp[0]*n0
132 mov %rax,$m1 173 mov %rdx,$hi0
133 174
134 mulq ($np,$j,8) # np[0]*m1 175 mulq $m1 # np[0]*m1
135 add $lo0,%rax # discarded 176 add %rax,$lo0 # discarded
136 mov 8(%rsp),$lo0 # tp[1] 177 mov 8($ap),%rax
137 adc \$0,%rdx 178 adc \$0,%rdx
179 mov 8(%rsp),$lo0 # tp[1]
138 mov %rdx,$hi1 180 mov %rdx,$hi1
139 181
140 lea 1($j),$j # j++ 182 lea 1($j),$j # j++
141.align 4 183 jmp .Linner_enter
184
185.align 16
142.Linner: 186.Linner:
187 add %rax,$hi1
143 mov ($ap,$j,8),%rax 188 mov ($ap,$j,8),%rax
144 mulq $m0 # ap[j]*bp[i]
145 add $hi0,%rax
146 adc \$0,%rdx 189 adc \$0,%rdx
147 add %rax,$lo0 # ap[j]*bp[i]+tp[j] 190 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
191 mov (%rsp,$j,8),$lo0
192 adc \$0,%rdx
193 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
194 mov %rdx,$hi1
195
196.Linner_enter:
197 mulq $m0 # ap[j]*bp[i]
198 add %rax,$hi0
148 mov ($np,$j,8),%rax 199 mov ($np,$j,8),%rax
149 adc \$0,%rdx 200 adc \$0,%rdx
201 add $hi0,$lo0 # ap[j]*bp[i]+tp[j]
150 mov %rdx,$hi0 202 mov %rdx,$hi0
203 adc \$0,$hi0
204 lea 1($j),$j # j++
151 205
152 mulq $m1 # np[j]*m1 206 mulq $m1 # np[j]*m1
153 add $hi1,%rax 207 cmp $num,$j
154 lea 1($j),$j # j++ 208 jne .Linner
155 adc \$0,%rdx 209
156 add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j] 210 add %rax,$hi1
211 mov ($ap),%rax # ap[0]
157 adc \$0,%rdx 212 adc \$0,%rdx
213 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
158 mov (%rsp,$j,8),$lo0 214 mov (%rsp,$j,8),$lo0
159 cmp $num,$j 215 adc \$0,%rdx
160 mov %rax,-16(%rsp,$j,8) # tp[j-1] 216 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
161 mov %rdx,$hi1 217 mov %rdx,$hi1
162 jl .Linner
163 218
164 xor %rdx,%rdx 219 xor %rdx,%rdx
165 add $hi0,$hi1 220 add $hi0,$hi1
@@ -173,35 +228,449 @@ bn_mul_mont:
173 cmp $num,$i 228 cmp $num,$i
174 jl .Louter 229 jl .Louter
175 230
176 lea (%rsp),$ap # borrow ap for tp
177 lea -1($num),$j # j=num-1
178
179 mov ($ap),%rax # tp[0]
180 xor $i,$i # i=0 and clear CF! 231 xor $i,$i # i=0 and clear CF!
232 mov (%rsp),%rax # tp[0]
233 lea (%rsp),$ap # borrow ap for tp
234 mov $num,$j # j=num
181 jmp .Lsub 235 jmp .Lsub
182.align 16 236.align 16
183.Lsub: sbb ($np,$i,8),%rax 237.Lsub: sbb ($np,$i,8),%rax
184 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] 238 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
185 dec $j # doesn't affect CF!
186 mov 8($ap,$i,8),%rax # tp[i+1] 239 mov 8($ap,$i,8),%rax # tp[i+1]
187 lea 1($i),$i # i++ 240 lea 1($i),$i # i++
188 jge .Lsub 241 dec $j # doesnn't affect CF!
242 jnz .Lsub
189 243
190 sbb \$0,%rax # handle upmost overflow bit 244 sbb \$0,%rax # handle upmost overflow bit
245 xor $i,$i
191 and %rax,$ap 246 and %rax,$ap
192 not %rax 247 not %rax
193 mov $rp,$np 248 mov $rp,$np
194 and %rax,$np 249 and %rax,$np
195 lea -1($num),$j 250 mov $num,$j # j=num
196 or $np,$ap # ap=borrow?tp:rp 251 or $np,$ap # ap=borrow?tp:rp
197.align 16 252.align 16
198.Lcopy: # copy or in-place refresh 253.Lcopy: # copy or in-place refresh
254 mov ($ap,$i,8),%rax
255 mov $i,(%rsp,$i,8) # zap temporary vector
256 mov %rax,($rp,$i,8) # rp[i]=tp[i]
257 lea 1($i),$i
258 sub \$1,$j
259 jnz .Lcopy
260
261 mov 8(%rsp,$num,8),%rsi # restore %rsp
262 mov \$1,%rax
263 mov (%rsi),%r15
264 mov 8(%rsi),%r14
265 mov 16(%rsi),%r13
266 mov 24(%rsi),%r12
267 mov 32(%rsi),%rbp
268 mov 40(%rsi),%rbx
269 lea 48(%rsi),%rsp
270.Lmul_epilogue:
271 ret
272.size bn_mul_mont,.-bn_mul_mont
273___
274{{{
275my @A=("%r10","%r11");
276my @N=("%r13","%rdi");
277$code.=<<___;
278.type bn_mul4x_mont,\@function,6
279.align 16
280bn_mul4x_mont:
281.Lmul4x_enter:
282 push %rbx
283 push %rbp
284 push %r12
285 push %r13
286 push %r14
287 push %r15
288
289 mov ${num}d,${num}d
290 lea 4($num),%r10
291 mov %rsp,%r11
292 neg %r10
293 lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+4))
294 and \$-1024,%rsp # minimize TLB usage
295
296 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
297.Lmul4x_body:
298 mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
299 mov %rdx,%r12 # reassign $bp
300___
301 $bp="%r12";
302$code.=<<___;
303 mov ($n0),$n0 # pull n0[0] value
304 mov ($bp),$m0 # m0=bp[0]
305 mov ($ap),%rax
306
307 xor $i,$i # i=0
308 xor $j,$j # j=0
309
310 mov $n0,$m1
311 mulq $m0 # ap[0]*bp[0]
312 mov %rax,$A[0]
313 mov ($np),%rax
314
315 imulq $A[0],$m1 # "tp[0]"*n0
316 mov %rdx,$A[1]
317
318 mulq $m1 # np[0]*m1
319 add %rax,$A[0] # discarded
320 mov 8($ap),%rax
321 adc \$0,%rdx
322 mov %rdx,$N[1]
323
324 mulq $m0
325 add %rax,$A[1]
326 mov 8($np),%rax
327 adc \$0,%rdx
328 mov %rdx,$A[0]
329
330 mulq $m1
331 add %rax,$N[1]
332 mov 16($ap),%rax
333 adc \$0,%rdx
334 add $A[1],$N[1]
335 lea 4($j),$j # j++
336 adc \$0,%rdx
337 mov $N[1],(%rsp)
338 mov %rdx,$N[0]
339 jmp .L1st4x
340.align 16
341.L1st4x:
342 mulq $m0 # ap[j]*bp[0]
343 add %rax,$A[0]
344 mov -16($np,$j,8),%rax
345 adc \$0,%rdx
346 mov %rdx,$A[1]
347
348 mulq $m1 # np[j]*m1
349 add %rax,$N[0]
350 mov -8($ap,$j,8),%rax
351 adc \$0,%rdx
352 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
353 adc \$0,%rdx
354 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
355 mov %rdx,$N[1]
356
357 mulq $m0 # ap[j]*bp[0]
358 add %rax,$A[1]
359 mov -8($np,$j,8),%rax
360 adc \$0,%rdx
361 mov %rdx,$A[0]
362
363 mulq $m1 # np[j]*m1
364 add %rax,$N[1]
199 mov ($ap,$j,8),%rax 365 mov ($ap,$j,8),%rax
200 mov %rax,($rp,$j,8) # rp[i]=tp[i] 366 adc \$0,%rdx
201 mov $i,(%rsp,$j,8) # zap temporary vector 367 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
368 adc \$0,%rdx
369 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
370 mov %rdx,$N[0]
371
372 mulq $m0 # ap[j]*bp[0]
373 add %rax,$A[0]
374 mov ($np,$j,8),%rax
375 adc \$0,%rdx
376 mov %rdx,$A[1]
377
378 mulq $m1 # np[j]*m1
379 add %rax,$N[0]
380 mov 8($ap,$j,8),%rax
381 adc \$0,%rdx
382 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
383 adc \$0,%rdx
384 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
385 mov %rdx,$N[1]
386
387 mulq $m0 # ap[j]*bp[0]
388 add %rax,$A[1]
389 mov 8($np,$j,8),%rax
390 adc \$0,%rdx
391 lea 4($j),$j # j++
392 mov %rdx,$A[0]
393
394 mulq $m1 # np[j]*m1
395 add %rax,$N[1]
396 mov -16($ap,$j,8),%rax
397 adc \$0,%rdx
398 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
399 adc \$0,%rdx
400 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
401 mov %rdx,$N[0]
402 cmp $num,$j
403 jl .L1st4x
404
405 mulq $m0 # ap[j]*bp[0]
406 add %rax,$A[0]
407 mov -16($np,$j,8),%rax
408 adc \$0,%rdx
409 mov %rdx,$A[1]
410
411 mulq $m1 # np[j]*m1
412 add %rax,$N[0]
413 mov -8($ap,$j,8),%rax
414 adc \$0,%rdx
415 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
416 adc \$0,%rdx
417 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
418 mov %rdx,$N[1]
419
420 mulq $m0 # ap[j]*bp[0]
421 add %rax,$A[1]
422 mov -8($np,$j,8),%rax
423 adc \$0,%rdx
424 mov %rdx,$A[0]
425
426 mulq $m1 # np[j]*m1
427 add %rax,$N[1]
428 mov ($ap),%rax # ap[0]
429 adc \$0,%rdx
430 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
431 adc \$0,%rdx
432 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
433 mov %rdx,$N[0]
434
435 xor $N[1],$N[1]
436 add $A[0],$N[0]
437 adc \$0,$N[1]
438 mov $N[0],-8(%rsp,$j,8)
439 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
440
441 lea 1($i),$i # i++
442.align 4
443.Louter4x:
444 mov ($bp,$i,8),$m0 # m0=bp[i]
445 xor $j,$j # j=0
446 mov (%rsp),$A[0]
447 mov $n0,$m1
448 mulq $m0 # ap[0]*bp[i]
449 add %rax,$A[0] # ap[0]*bp[i]+tp[0]
450 mov ($np),%rax
451 adc \$0,%rdx
452
453 imulq $A[0],$m1 # tp[0]*n0
454 mov %rdx,$A[1]
455
456 mulq $m1 # np[0]*m1
457 add %rax,$A[0] # "$N[0]", discarded
458 mov 8($ap),%rax
459 adc \$0,%rdx
460 mov %rdx,$N[1]
461
462 mulq $m0 # ap[j]*bp[i]
463 add %rax,$A[1]
464 mov 8($np),%rax
465 adc \$0,%rdx
466 add 8(%rsp),$A[1] # +tp[1]
467 adc \$0,%rdx
468 mov %rdx,$A[0]
469
470 mulq $m1 # np[j]*m1
471 add %rax,$N[1]
472 mov 16($ap),%rax
473 adc \$0,%rdx
474 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j]
475 lea 4($j),$j # j+=2
476 adc \$0,%rdx
477 mov $N[1],(%rsp) # tp[j-1]
478 mov %rdx,$N[0]
479 jmp .Linner4x
480.align 16
481.Linner4x:
482 mulq $m0 # ap[j]*bp[i]
483 add %rax,$A[0]
484 mov -16($np,$j,8),%rax
485 adc \$0,%rdx
486 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
487 adc \$0,%rdx
488 mov %rdx,$A[1]
489
490 mulq $m1 # np[j]*m1
491 add %rax,$N[0]
492 mov -8($ap,$j,8),%rax
493 adc \$0,%rdx
494 add $A[0],$N[0]
495 adc \$0,%rdx
496 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
497 mov %rdx,$N[1]
498
499 mulq $m0 # ap[j]*bp[i]
500 add %rax,$A[1]
501 mov -8($np,$j,8),%rax
502 adc \$0,%rdx
503 add -8(%rsp,$j,8),$A[1]
504 adc \$0,%rdx
505 mov %rdx,$A[0]
506
507 mulq $m1 # np[j]*m1
508 add %rax,$N[1]
509 mov ($ap,$j,8),%rax
510 adc \$0,%rdx
511 add $A[1],$N[1]
512 adc \$0,%rdx
513 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
514 mov %rdx,$N[0]
515
516 mulq $m0 # ap[j]*bp[i]
517 add %rax,$A[0]
518 mov ($np,$j,8),%rax
519 adc \$0,%rdx
520 add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
521 adc \$0,%rdx
522 mov %rdx,$A[1]
523
524 mulq $m1 # np[j]*m1
525 add %rax,$N[0]
526 mov 8($ap,$j,8),%rax
527 adc \$0,%rdx
528 add $A[0],$N[0]
529 adc \$0,%rdx
530 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
531 mov %rdx,$N[1]
532
533 mulq $m0 # ap[j]*bp[i]
534 add %rax,$A[1]
535 mov 8($np,$j,8),%rax
536 adc \$0,%rdx
537 add 8(%rsp,$j,8),$A[1]
538 adc \$0,%rdx
539 lea 4($j),$j # j++
540 mov %rdx,$A[0]
541
542 mulq $m1 # np[j]*m1
543 add %rax,$N[1]
544 mov -16($ap,$j,8),%rax
545 adc \$0,%rdx
546 add $A[1],$N[1]
547 adc \$0,%rdx
548 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
549 mov %rdx,$N[0]
550 cmp $num,$j
551 jl .Linner4x
552
553 mulq $m0 # ap[j]*bp[i]
554 add %rax,$A[0]
555 mov -16($np,$j,8),%rax
556 adc \$0,%rdx
557 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
558 adc \$0,%rdx
559 mov %rdx,$A[1]
560
561 mulq $m1 # np[j]*m1
562 add %rax,$N[0]
563 mov -8($ap,$j,8),%rax
564 adc \$0,%rdx
565 add $A[0],$N[0]
566 adc \$0,%rdx
567 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
568 mov %rdx,$N[1]
569
570 mulq $m0 # ap[j]*bp[i]
571 add %rax,$A[1]
572 mov -8($np,$j,8),%rax
573 adc \$0,%rdx
574 add -8(%rsp,$j,8),$A[1]
575 adc \$0,%rdx
576 lea 1($i),$i # i++
577 mov %rdx,$A[0]
578
579 mulq $m1 # np[j]*m1
580 add %rax,$N[1]
581 mov ($ap),%rax # ap[0]
582 adc \$0,%rdx
583 add $A[1],$N[1]
584 adc \$0,%rdx
585 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
586 mov %rdx,$N[0]
587
588 xor $N[1],$N[1]
589 add $A[0],$N[0]
590 adc \$0,$N[1]
591 add (%rsp,$num,8),$N[0] # pull upmost overflow bit
592 adc \$0,$N[1]
593 mov $N[0],-8(%rsp,$j,8)
594 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
595
596 cmp $num,$i
597 jl .Louter4x
598___
599{
600my @ri=("%rax","%rdx",$m0,$m1);
601$code.=<<___;
602 mov 16(%rsp,$num,8),$rp # restore $rp
603 mov 0(%rsp),@ri[0] # tp[0]
604 pxor %xmm0,%xmm0
605 mov 8(%rsp),@ri[1] # tp[1]
606 shr \$2,$num # num/=4
607 lea (%rsp),$ap # borrow ap for tp
608 xor $i,$i # i=0 and clear CF!
609
610 sub 0($np),@ri[0]
611 mov 16($ap),@ri[2] # tp[2]
612 mov 24($ap),@ri[3] # tp[3]
613 sbb 8($np),@ri[1]
614 lea -1($num),$j # j=num/4-1
615 jmp .Lsub4x
616.align 16
617.Lsub4x:
618 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
619 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
620 sbb 16($np,$i,8),@ri[2]
621 mov 32($ap,$i,8),@ri[0] # tp[i+1]
622 mov 40($ap,$i,8),@ri[1]
623 sbb 24($np,$i,8),@ri[3]
624 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
625 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
626 sbb 32($np,$i,8),@ri[0]
627 mov 48($ap,$i,8),@ri[2]
628 mov 56($ap,$i,8),@ri[3]
629 sbb 40($np,$i,8),@ri[1]
630 lea 4($i),$i # i++
631 dec $j # doesnn't affect CF!
632 jnz .Lsub4x
633
634 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
635 mov 32($ap,$i,8),@ri[0] # load overflow bit
636 sbb 16($np,$i,8),@ri[2]
637 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
638 sbb 24($np,$i,8),@ri[3]
639 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
640
641 sbb \$0,@ri[0] # handle upmost overflow bit
642 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
643 xor $i,$i # i=0
644 and @ri[0],$ap
645 not @ri[0]
646 mov $rp,$np
647 and @ri[0],$np
648 lea -1($num),$j
649 or $np,$ap # ap=borrow?tp:rp
650
651 movdqu ($ap),%xmm1
652 movdqa %xmm0,(%rsp)
653 movdqu %xmm1,($rp)
654 jmp .Lcopy4x
655.align 16
656.Lcopy4x: # copy or in-place refresh
657 movdqu 16($ap,$i),%xmm2
658 movdqu 32($ap,$i),%xmm1
659 movdqa %xmm0,16(%rsp,$i)
660 movdqu %xmm2,16($rp,$i)
661 movdqa %xmm0,32(%rsp,$i)
662 movdqu %xmm1,32($rp,$i)
663 lea 32($i),$i
202 dec $j 664 dec $j
203 jge .Lcopy 665 jnz .Lcopy4x
204 666
667 shl \$2,$num
668 movdqu 16($ap,$i),%xmm2
669 movdqa %xmm0,16(%rsp,$i)
670 movdqu %xmm2,16($rp,$i)
671___
672}
673$code.=<<___;
205 mov 8(%rsp,$num,8),%rsi # restore %rsp 674 mov 8(%rsp,$num,8),%rsi # restore %rsp
206 mov \$1,%rax 675 mov \$1,%rax
207 mov (%rsi),%r15 676 mov (%rsi),%r15
@@ -211,9 +680,823 @@ bn_mul_mont:
211 mov 32(%rsi),%rbp 680 mov 32(%rsi),%rbp
212 mov 40(%rsi),%rbx 681 mov 40(%rsi),%rbx
213 lea 48(%rsi),%rsp 682 lea 48(%rsi),%rsp
214.Lepilogue: 683.Lmul4x_epilogue:
215 ret 684 ret
216.size bn_mul_mont,.-bn_mul_mont 685.size bn_mul4x_mont,.-bn_mul4x_mont
686___
687}}}
688 {{{
689######################################################################
690# void bn_sqr4x_mont(
691my $rptr="%rdi"; # const BN_ULONG *rptr,
692my $aptr="%rsi"; # const BN_ULONG *aptr,
693my $bptr="%rdx"; # not used
694my $nptr="%rcx"; # const BN_ULONG *nptr,
695my $n0 ="%r8"; # const BN_ULONG *n0);
696my $num ="%r9"; # int num, has to be divisible by 4 and
697 # not less than 8
698
699my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
700my @A0=("%r10","%r11");
701my @A1=("%r12","%r13");
702my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
703
704$code.=<<___;
705.type bn_sqr4x_mont,\@function,6
706.align 16
707bn_sqr4x_mont:
708.Lsqr4x_enter:
709 push %rbx
710 push %rbp
711 push %r12
712 push %r13
713 push %r14
714 push %r15
715
716 shl \$3,${num}d # convert $num to bytes
717 xor %r10,%r10
718 mov %rsp,%r11 # put aside %rsp
719 sub $num,%r10 # -$num
720 mov ($n0),$n0 # *n0
721 lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num)
722 and \$-1024,%rsp # minimize TLB usage
723 ##############################################################
724 # Stack layout
725 #
726 # +0 saved $num, used in reduction section
727 # +8 &t[2*$num], used in reduction section
728 # +32 saved $rptr
729 # +40 saved $nptr
730 # +48 saved *n0
731 # +56 saved %rsp
732 # +64 t[2*$num]
733 #
734 mov $rptr,32(%rsp) # save $rptr
735 mov $nptr,40(%rsp)
736 mov $n0, 48(%rsp)
737 mov %r11, 56(%rsp) # save original %rsp
738.Lsqr4x_body:
739 ##############################################################
740 # Squaring part:
741 #
742 # a) multiply-n-add everything but a[i]*a[i];
743 # b) shift result of a) by 1 to the left and accumulate
744 # a[i]*a[i] products;
745 #
746 lea 32(%r10),$i # $i=-($num-32)
747 lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2]
748
749 mov $num,$j # $j=$num
750
751 # comments apply to $num==8 case
752 mov -32($aptr,$i),$a0 # a[0]
753 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
754 mov -24($aptr,$i),%rax # a[1]
755 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
756 mov -16($aptr,$i),$ai # a[2]
757 mov %rax,$a1
758
759 mul $a0 # a[1]*a[0]
760 mov %rax,$A0[0] # a[1]*a[0]
761 mov $ai,%rax # a[2]
762 mov %rdx,$A0[1]
763 mov $A0[0],-24($tptr,$i) # t[1]
764
765 xor $A0[0],$A0[0]
766 mul $a0 # a[2]*a[0]
767 add %rax,$A0[1]
768 mov $ai,%rax
769 adc %rdx,$A0[0]
770 mov $A0[1],-16($tptr,$i) # t[2]
771
772 lea -16($i),$j # j=-16
773
774
775 mov 8($aptr,$j),$ai # a[3]
776 mul $a1 # a[2]*a[1]
777 mov %rax,$A1[0] # a[2]*a[1]+t[3]
778 mov $ai,%rax
779 mov %rdx,$A1[1]
780
781 xor $A0[1],$A0[1]
782 add $A1[0],$A0[0]
783 lea 16($j),$j
784 adc \$0,$A0[1]
785 mul $a0 # a[3]*a[0]
786 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
787 mov $ai,%rax
788 adc %rdx,$A0[1]
789 mov $A0[0],-8($tptr,$j) # t[3]
790 jmp .Lsqr4x_1st
791
792.align 16
793.Lsqr4x_1st:
794 mov ($aptr,$j),$ai # a[4]
795 xor $A1[0],$A1[0]
796 mul $a1 # a[3]*a[1]
797 add %rax,$A1[1] # a[3]*a[1]+t[4]
798 mov $ai,%rax
799 adc %rdx,$A1[0]
800
801 xor $A0[0],$A0[0]
802 add $A1[1],$A0[1]
803 adc \$0,$A0[0]
804 mul $a0 # a[4]*a[0]
805 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
806 mov $ai,%rax # a[3]
807 adc %rdx,$A0[0]
808 mov $A0[1],($tptr,$j) # t[4]
809
810
811 mov 8($aptr,$j),$ai # a[5]
812 xor $A1[1],$A1[1]
813 mul $a1 # a[4]*a[3]
814 add %rax,$A1[0] # a[4]*a[3]+t[5]
815 mov $ai,%rax
816 adc %rdx,$A1[1]
817
818 xor $A0[1],$A0[1]
819 add $A1[0],$A0[0]
820 adc \$0,$A0[1]
821 mul $a0 # a[5]*a[2]
822 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
823 mov $ai,%rax
824 adc %rdx,$A0[1]
825 mov $A0[0],8($tptr,$j) # t[5]
826
827 mov 16($aptr,$j),$ai # a[6]
828 xor $A1[0],$A1[0]
829 mul $a1 # a[5]*a[3]
830 add %rax,$A1[1] # a[5]*a[3]+t[6]
831 mov $ai,%rax
832 adc %rdx,$A1[0]
833
834 xor $A0[0],$A0[0]
835 add $A1[1],$A0[1]
836 adc \$0,$A0[0]
837 mul $a0 # a[6]*a[2]
838 add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6]
839 mov $ai,%rax # a[3]
840 adc %rdx,$A0[0]
841 mov $A0[1],16($tptr,$j) # t[6]
842
843
844 mov 24($aptr,$j),$ai # a[7]
845 xor $A1[1],$A1[1]
846 mul $a1 # a[6]*a[5]
847 add %rax,$A1[0] # a[6]*a[5]+t[7]
848 mov $ai,%rax
849 adc %rdx,$A1[1]
850
851 xor $A0[1],$A0[1]
852 add $A1[0],$A0[0]
853 lea 32($j),$j
854 adc \$0,$A0[1]
855 mul $a0 # a[7]*a[4]
856 add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6]
857 mov $ai,%rax
858 adc %rdx,$A0[1]
859 mov $A0[0],-8($tptr,$j) # t[7]
860
861 cmp \$0,$j
862 jne .Lsqr4x_1st
863
864 xor $A1[0],$A1[0]
865 add $A0[1],$A1[1]
866 adc \$0,$A1[0]
867 mul $a1 # a[7]*a[5]
868 add %rax,$A1[1]
869 adc %rdx,$A1[0]
870
871 mov $A1[1],($tptr) # t[8]
872 lea 16($i),$i
873 mov $A1[0],8($tptr) # t[9]
874 jmp .Lsqr4x_outer
875
876.align 16
877.Lsqr4x_outer: # comments apply to $num==6 case
878 mov -32($aptr,$i),$a0 # a[0]
879 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
880 mov -24($aptr,$i),%rax # a[1]
881 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
882 mov -16($aptr,$i),$ai # a[2]
883 mov %rax,$a1
884
885 mov -24($tptr,$i),$A0[0] # t[1]
886 xor $A0[1],$A0[1]
887 mul $a0 # a[1]*a[0]
888 add %rax,$A0[0] # a[1]*a[0]+t[1]
889 mov $ai,%rax # a[2]
890 adc %rdx,$A0[1]
891 mov $A0[0],-24($tptr,$i) # t[1]
892
893 xor $A0[0],$A0[0]
894 add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2]
895 adc \$0,$A0[0]
896 mul $a0 # a[2]*a[0]
897 add %rax,$A0[1]
898 mov $ai,%rax
899 adc %rdx,$A0[0]
900 mov $A0[1],-16($tptr,$i) # t[2]
901
902 lea -16($i),$j # j=-16
903 xor $A1[0],$A1[0]
904
905
906 mov 8($aptr,$j),$ai # a[3]
907 xor $A1[1],$A1[1]
908 add 8($tptr,$j),$A1[0]
909 adc \$0,$A1[1]
910 mul $a1 # a[2]*a[1]
911 add %rax,$A1[0] # a[2]*a[1]+t[3]
912 mov $ai,%rax
913 adc %rdx,$A1[1]
914
915 xor $A0[1],$A0[1]
916 add $A1[0],$A0[0]
917 adc \$0,$A0[1]
918 mul $a0 # a[3]*a[0]
919 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
920 mov $ai,%rax
921 adc %rdx,$A0[1]
922 mov $A0[0],8($tptr,$j) # t[3]
923
924 lea 16($j),$j
925 jmp .Lsqr4x_inner
926
927.align 16
928.Lsqr4x_inner:
929 mov ($aptr,$j),$ai # a[4]
930 xor $A1[0],$A1[0]
931 add ($tptr,$j),$A1[1]
932 adc \$0,$A1[0]
933 mul $a1 # a[3]*a[1]
934 add %rax,$A1[1] # a[3]*a[1]+t[4]
935 mov $ai,%rax
936 adc %rdx,$A1[0]
937
938 xor $A0[0],$A0[0]
939 add $A1[1],$A0[1]
940 adc \$0,$A0[0]
941 mul $a0 # a[4]*a[0]
942 add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4]
943 mov $ai,%rax # a[3]
944 adc %rdx,$A0[0]
945 mov $A0[1],($tptr,$j) # t[4]
946
947 mov 8($aptr,$j),$ai # a[5]
948 xor $A1[1],$A1[1]
949 add 8($tptr,$j),$A1[0]
950 adc \$0,$A1[1]
951 mul $a1 # a[4]*a[3]
952 add %rax,$A1[0] # a[4]*a[3]+t[5]
953 mov $ai,%rax
954 adc %rdx,$A1[1]
955
956 xor $A0[1],$A0[1]
957 add $A1[0],$A0[0]
958 lea 16($j),$j # j++
959 adc \$0,$A0[1]
960 mul $a0 # a[5]*a[2]
961 add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5]
962 mov $ai,%rax
963 adc %rdx,$A0[1]
964 mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
965
966 cmp \$0,$j
967 jne .Lsqr4x_inner
968
969 xor $A1[0],$A1[0]
970 add $A0[1],$A1[1]
971 adc \$0,$A1[0]
972 mul $a1 # a[5]*a[3]
973 add %rax,$A1[1]
974 adc %rdx,$A1[0]
975
976 mov $A1[1],($tptr) # t[6], "preloaded t[2]" below
977 mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below
978
979 add \$16,$i
980 jnz .Lsqr4x_outer
981
982 # comments apply to $num==4 case
983 mov -32($aptr),$a0 # a[0]
984 lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
985 mov -24($aptr),%rax # a[1]
986 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
987 mov -16($aptr),$ai # a[2]
988 mov %rax,$a1
989
990 xor $A0[1],$A0[1]
991 mul $a0 # a[1]*a[0]
992 add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1]
993 mov $ai,%rax # a[2]
994 adc %rdx,$A0[1]
995 mov $A0[0],-24($tptr) # t[1]
996
997 xor $A0[0],$A0[0]
998 add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2]
999 adc \$0,$A0[0]
1000 mul $a0 # a[2]*a[0]
1001 add %rax,$A0[1]
1002 mov $ai,%rax
1003 adc %rdx,$A0[0]
1004 mov $A0[1],-16($tptr) # t[2]
1005
1006 mov -8($aptr),$ai # a[3]
1007 mul $a1 # a[2]*a[1]
1008 add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3]
1009 mov $ai,%rax
1010 adc \$0,%rdx
1011
1012 xor $A0[1],$A0[1]
1013 add $A1[0],$A0[0]
1014 mov %rdx,$A1[1]
1015 adc \$0,$A0[1]
1016 mul $a0 # a[3]*a[0]
1017 add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3]
1018 mov $ai,%rax
1019 adc %rdx,$A0[1]
1020 mov $A0[0],-8($tptr) # t[3]
1021
1022 xor $A1[0],$A1[0]
1023 add $A0[1],$A1[1]
1024 adc \$0,$A1[0]
1025 mul $a1 # a[3]*a[1]
1026 add %rax,$A1[1]
1027 mov -16($aptr),%rax # a[2]
1028 adc %rdx,$A1[0]
1029
1030 mov $A1[1],($tptr) # t[4]
1031 mov $A1[0],8($tptr) # t[5]
1032
1033 mul $ai # a[2]*a[3]
1034___
1035{
1036my ($shift,$carry)=($a0,$a1);
1037my @S=(@A1,$ai,$n0);
1038$code.=<<___;
1039 add \$16,$i
1040 xor $shift,$shift
1041 sub $num,$i # $i=16-$num
1042 xor $carry,$carry
1043
1044 add $A1[0],%rax # t[5]
1045 adc \$0,%rdx
1046 mov %rax,8($tptr) # t[5]
1047 mov %rdx,16($tptr) # t[6]
1048 mov $carry,24($tptr) # t[7]
1049
1050 mov -16($aptr,$i),%rax # a[0]
1051 lea 64(%rsp,$num,2),$tptr
1052 xor $A0[0],$A0[0] # t[0]
1053 mov -24($tptr,$i,2),$A0[1] # t[1]
1054
1055 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1056 shr \$63,$A0[0]
1057 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1058 shr \$63,$A0[1]
1059 or $A0[0],$S[1] # | t[2*i]>>63
1060 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1061 mov $A0[1],$shift # shift=t[2*i+1]>>63
1062 mul %rax # a[i]*a[i]
1063 neg $carry # mov $carry,cf
1064 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1065 adc %rax,$S[0]
1066 mov -8($aptr,$i),%rax # a[i+1] # prefetch
1067 mov $S[0],-32($tptr,$i,2)
1068 adc %rdx,$S[1]
1069
1070 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1071 mov $S[1],-24($tptr,$i,2)
1072 sbb $carry,$carry # mov cf,$carry
1073 shr \$63,$A0[0]
1074 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1075 shr \$63,$A0[1]
1076 or $A0[0],$S[3] # | t[2*i]>>63
1077 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1078 mov $A0[1],$shift # shift=t[2*i+1]>>63
1079 mul %rax # a[i]*a[i]
1080 neg $carry # mov $carry,cf
1081 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1082 adc %rax,$S[2]
1083 mov 0($aptr,$i),%rax # a[i+1] # prefetch
1084 mov $S[2],-16($tptr,$i,2)
1085 adc %rdx,$S[3]
1086 lea 16($i),$i
1087 mov $S[3],-40($tptr,$i,2)
1088 sbb $carry,$carry # mov cf,$carry
1089 jmp .Lsqr4x_shift_n_add
1090
1091.align 16
1092.Lsqr4x_shift_n_add:
1093 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1094 shr \$63,$A0[0]
1095 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1096 shr \$63,$A0[1]
1097 or $A0[0],$S[1] # | t[2*i]>>63
1098 mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1099 mov $A0[1],$shift # shift=t[2*i+1]>>63
1100 mul %rax # a[i]*a[i]
1101 neg $carry # mov $carry,cf
1102 mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1103 adc %rax,$S[0]
1104 mov -8($aptr,$i),%rax # a[i+1] # prefetch
1105 mov $S[0],-32($tptr,$i,2)
1106 adc %rdx,$S[1]
1107
1108 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1109 mov $S[1],-24($tptr,$i,2)
1110 sbb $carry,$carry # mov cf,$carry
1111 shr \$63,$A0[0]
1112 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1113 shr \$63,$A0[1]
1114 or $A0[0],$S[3] # | t[2*i]>>63
1115 mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1116 mov $A0[1],$shift # shift=t[2*i+1]>>63
1117 mul %rax # a[i]*a[i]
1118 neg $carry # mov $carry,cf
1119 mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1120 adc %rax,$S[2]
1121 mov 0($aptr,$i),%rax # a[i+1] # prefetch
1122 mov $S[2],-16($tptr,$i,2)
1123 adc %rdx,$S[3]
1124
1125 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1126 mov $S[3],-8($tptr,$i,2)
1127 sbb $carry,$carry # mov cf,$carry
1128 shr \$63,$A0[0]
1129 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1130 shr \$63,$A0[1]
1131 or $A0[0],$S[1] # | t[2*i]>>63
1132 mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1133 mov $A0[1],$shift # shift=t[2*i+1]>>63
1134 mul %rax # a[i]*a[i]
1135 neg $carry # mov $carry,cf
1136 mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1137 adc %rax,$S[0]
1138 mov 8($aptr,$i),%rax # a[i+1] # prefetch
1139 mov $S[0],0($tptr,$i,2)
1140 adc %rdx,$S[1]
1141
1142 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
1143 mov $S[1],8($tptr,$i,2)
1144 sbb $carry,$carry # mov cf,$carry
1145 shr \$63,$A0[0]
1146 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1147 shr \$63,$A0[1]
1148 or $A0[0],$S[3] # | t[2*i]>>63
1149 mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch
1150 mov $A0[1],$shift # shift=t[2*i+1]>>63
1151 mul %rax # a[i]*a[i]
1152 neg $carry # mov $carry,cf
1153 mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch
1154 adc %rax,$S[2]
1155 mov 16($aptr,$i),%rax # a[i+1] # prefetch
1156 mov $S[2],16($tptr,$i,2)
1157 adc %rdx,$S[3]
1158 mov $S[3],24($tptr,$i,2)
1159 sbb $carry,$carry # mov cf,$carry
1160 add \$32,$i
1161 jnz .Lsqr4x_shift_n_add
1162
1163 lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
1164 shr \$63,$A0[0]
1165 lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 |
1166 shr \$63,$A0[1]
1167 or $A0[0],$S[1] # | t[2*i]>>63
1168 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1169 mov $A0[1],$shift # shift=t[2*i+1]>>63
1170 mul %rax # a[i]*a[i]
1171 neg $carry # mov $carry,cf
1172 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1173 adc %rax,$S[0]
1174 mov -8($aptr),%rax # a[i+1] # prefetch
1175 mov $S[0],-32($tptr)
1176 adc %rdx,$S[1]
1177
1178 lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift
1179 mov $S[1],-24($tptr)
1180 sbb $carry,$carry # mov cf,$carry
1181 shr \$63,$A0[0]
1182 lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 |
1183 shr \$63,$A0[1]
1184 or $A0[0],$S[3] # | t[2*i]>>63
1185 mul %rax # a[i]*a[i]
1186 neg $carry # mov $carry,cf
1187 adc %rax,$S[2]
1188 adc %rdx,$S[3]
1189 mov $S[2],-16($tptr)
1190 mov $S[3],-8($tptr)
1191___
1192}
1193##############################################################
1194# Montgomery reduction part, "word-by-word" algorithm.
1195#
1196{
1197my ($topbit,$nptr)=("%rbp",$aptr);
1198my ($m0,$m1)=($a0,$a1);
1199my @Ni=("%rbx","%r9");
1200$code.=<<___;
1201 mov 40(%rsp),$nptr # restore $nptr
1202 mov 48(%rsp),$n0 # restore *n0
1203 xor $j,$j
1204 mov $num,0(%rsp) # save $num
1205 sub $num,$j # $j=-$num
1206 mov 64(%rsp),$A0[0] # t[0] # modsched #
1207 mov $n0,$m0 # # modsched #
1208 lea 64(%rsp,$num,2),%rax # end of t[] buffer
1209 lea 64(%rsp,$num),$tptr # end of t[] window
1210 mov %rax,8(%rsp) # save end of t[] buffer
1211 lea ($nptr,$num),$nptr # end of n[] buffer
1212 xor $topbit,$topbit # $topbit=0
1213
1214 mov 0($nptr,$j),%rax # n[0] # modsched #
1215 mov 8($nptr,$j),$Ni[1] # n[1] # modsched #
1216 imulq $A0[0],$m0 # m0=t[0]*n0 # modsched #
1217 mov %rax,$Ni[0] # # modsched #
1218 jmp .Lsqr4x_mont_outer
1219
1220.align 16
1221.Lsqr4x_mont_outer:
1222 xor $A0[1],$A0[1]
1223 mul $m0 # n[0]*m0
1224 add %rax,$A0[0] # n[0]*m0+t[0]
1225 mov $Ni[1],%rax
1226 adc %rdx,$A0[1]
1227 mov $n0,$m1
1228
1229 xor $A0[0],$A0[0]
1230 add 8($tptr,$j),$A0[1]
1231 adc \$0,$A0[0]
1232 mul $m0 # n[1]*m0
1233 add %rax,$A0[1] # n[1]*m0+t[1]
1234 mov $Ni[0],%rax
1235 adc %rdx,$A0[0]
1236
1237 imulq $A0[1],$m1
1238
1239 mov 16($nptr,$j),$Ni[0] # n[2]
1240 xor $A1[1],$A1[1]
1241 add $A0[1],$A1[0]
1242 adc \$0,$A1[1]
1243 mul $m1 # n[0]*m1
1244 add %rax,$A1[0] # n[0]*m1+"t[1]"
1245 mov $Ni[0],%rax
1246 adc %rdx,$A1[1]
1247 mov $A1[0],8($tptr,$j) # "t[1]"
1248
1249 xor $A0[1],$A0[1]
1250 add 16($tptr,$j),$A0[0]
1251 adc \$0,$A0[1]
1252 mul $m0 # n[2]*m0
1253 add %rax,$A0[0] # n[2]*m0+t[2]
1254 mov $Ni[1],%rax
1255 adc %rdx,$A0[1]
1256
1257 mov 24($nptr,$j),$Ni[1] # n[3]
1258 xor $A1[0],$A1[0]
1259 add $A0[0],$A1[1]
1260 adc \$0,$A1[0]
1261 mul $m1 # n[1]*m1
1262 add %rax,$A1[1] # n[1]*m1+"t[2]"
1263 mov $Ni[1],%rax
1264 adc %rdx,$A1[0]
1265 mov $A1[1],16($tptr,$j) # "t[2]"
1266
1267 xor $A0[0],$A0[0]
1268 add 24($tptr,$j),$A0[1]
1269 lea 32($j),$j
1270 adc \$0,$A0[0]
1271 mul $m0 # n[3]*m0
1272 add %rax,$A0[1] # n[3]*m0+t[3]
1273 mov $Ni[0],%rax
1274 adc %rdx,$A0[0]
1275 jmp .Lsqr4x_mont_inner
1276
1277.align 16
1278.Lsqr4x_mont_inner:
1279 mov ($nptr,$j),$Ni[0] # n[4]
1280 xor $A1[1],$A1[1]
1281 add $A0[1],$A1[0]
1282 adc \$0,$A1[1]
1283 mul $m1 # n[2]*m1
1284 add %rax,$A1[0] # n[2]*m1+"t[3]"
1285 mov $Ni[0],%rax
1286 adc %rdx,$A1[1]
1287 mov $A1[0],-8($tptr,$j) # "t[3]"
1288
1289 xor $A0[1],$A0[1]
1290 add ($tptr,$j),$A0[0]
1291 adc \$0,$A0[1]
1292 mul $m0 # n[4]*m0
1293 add %rax,$A0[0] # n[4]*m0+t[4]
1294 mov $Ni[1],%rax
1295 adc %rdx,$A0[1]
1296
1297 mov 8($nptr,$j),$Ni[1] # n[5]
1298 xor $A1[0],$A1[0]
1299 add $A0[0],$A1[1]
1300 adc \$0,$A1[0]
1301 mul $m1 # n[3]*m1
1302 add %rax,$A1[1] # n[3]*m1+"t[4]"
1303 mov $Ni[1],%rax
1304 adc %rdx,$A1[0]
1305 mov $A1[1],($tptr,$j) # "t[4]"
1306
1307 xor $A0[0],$A0[0]
1308 add 8($tptr,$j),$A0[1]
1309 adc \$0,$A0[0]
1310 mul $m0 # n[5]*m0
1311 add %rax,$A0[1] # n[5]*m0+t[5]
1312 mov $Ni[0],%rax
1313 adc %rdx,$A0[0]
1314
1315
1316 mov 16($nptr,$j),$Ni[0] # n[6]
1317 xor $A1[1],$A1[1]
1318 add $A0[1],$A1[0]
1319 adc \$0,$A1[1]
1320 mul $m1 # n[4]*m1
1321 add %rax,$A1[0] # n[4]*m1+"t[5]"
1322 mov $Ni[0],%rax
1323 adc %rdx,$A1[1]
1324 mov $A1[0],8($tptr,$j) # "t[5]"
1325
1326 xor $A0[1],$A0[1]
1327 add 16($tptr,$j),$A0[0]
1328 adc \$0,$A0[1]
1329 mul $m0 # n[6]*m0
1330 add %rax,$A0[0] # n[6]*m0+t[6]
1331 mov $Ni[1],%rax
1332 adc %rdx,$A0[1]
1333
1334 mov 24($nptr,$j),$Ni[1] # n[7]
1335 xor $A1[0],$A1[0]
1336 add $A0[0],$A1[1]
1337 adc \$0,$A1[0]
1338 mul $m1 # n[5]*m1
1339 add %rax,$A1[1] # n[5]*m1+"t[6]"
1340 mov $Ni[1],%rax
1341 adc %rdx,$A1[0]
1342 mov $A1[1],16($tptr,$j) # "t[6]"
1343
1344 xor $A0[0],$A0[0]
1345 add 24($tptr,$j),$A0[1]
1346 lea 32($j),$j
1347 adc \$0,$A0[0]
1348 mul $m0 # n[7]*m0
1349 add %rax,$A0[1] # n[7]*m0+t[7]
1350 mov $Ni[0],%rax
1351 adc %rdx,$A0[0]
1352 cmp \$0,$j
1353 jne .Lsqr4x_mont_inner
1354
1355 sub 0(%rsp),$j # $j=-$num # modsched #
1356 mov $n0,$m0 # # modsched #
1357
1358 xor $A1[1],$A1[1]
1359 add $A0[1],$A1[0]
1360 adc \$0,$A1[1]
1361 mul $m1 # n[6]*m1
1362 add %rax,$A1[0] # n[6]*m1+"t[7]"
1363 mov $Ni[1],%rax
1364 adc %rdx,$A1[1]
1365 mov $A1[0],-8($tptr) # "t[7]"
1366
1367 xor $A0[1],$A0[1]
1368 add ($tptr),$A0[0] # +t[8]
1369 adc \$0,$A0[1]
1370 mov 0($nptr,$j),$Ni[0] # n[0] # modsched #
1371 add $topbit,$A0[0]
1372 adc \$0,$A0[1]
1373
1374 imulq 16($tptr,$j),$m0 # m0=t[0]*n0 # modsched #
1375 xor $A1[0],$A1[0]
1376 mov 8($nptr,$j),$Ni[1] # n[1] # modsched #
1377 add $A0[0],$A1[1]
1378 mov 16($tptr,$j),$A0[0] # t[0] # modsched #
1379 adc \$0,$A1[0]
1380 mul $m1 # n[7]*m1
1381 add %rax,$A1[1] # n[7]*m1+"t[8]"
1382 mov $Ni[0],%rax # # modsched #
1383 adc %rdx,$A1[0]
1384 mov $A1[1],($tptr) # "t[8]"
1385
1386 xor $topbit,$topbit
1387 add 8($tptr),$A1[0] # +t[9]
1388 adc $topbit,$topbit
1389 add $A0[1],$A1[0]
1390 lea 16($tptr),$tptr # "t[$num]>>128"
1391 adc \$0,$topbit
1392 mov $A1[0],-8($tptr) # "t[9]"
1393 cmp 8(%rsp),$tptr # are we done?
1394 jb .Lsqr4x_mont_outer
1395
1396 mov 0(%rsp),$num # restore $num
1397 mov $topbit,($tptr) # save $topbit
1398___
1399}
1400##############################################################
1401# Post-condition, 4x unrolled copy from bn_mul_mont
1402#
1403{
1404my ($tptr,$nptr)=("%rbx",$aptr);
1405my @ri=("%rax","%rdx","%r10","%r11");
1406$code.=<<___;
1407 mov 64(%rsp,$num),@ri[0] # tp[0]
1408 lea 64(%rsp,$num),$tptr # upper half of t[2*$num] holds result
1409 mov 40(%rsp),$nptr # restore $nptr
1410 shr \$5,$num # num/4
1411 mov 8($tptr),@ri[1] # t[1]
1412 xor $i,$i # i=0 and clear CF!
1413
1414 mov 32(%rsp),$rptr # restore $rptr
1415 sub 0($nptr),@ri[0]
1416 mov 16($tptr),@ri[2] # t[2]
1417 mov 24($tptr),@ri[3] # t[3]
1418 sbb 8($nptr),@ri[1]
1419 lea -1($num),$j # j=num/4-1
1420 jmp .Lsqr4x_sub
1421.align 16
1422.Lsqr4x_sub:
1423 mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i]
1424 mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i]
1425 sbb 16($nptr,$i,8),@ri[2]
1426 mov 32($tptr,$i,8),@ri[0] # tp[i+1]
1427 mov 40($tptr,$i,8),@ri[1]
1428 sbb 24($nptr,$i,8),@ri[3]
1429 mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i]
1430 mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i]
1431 sbb 32($nptr,$i,8),@ri[0]
1432 mov 48($tptr,$i,8),@ri[2]
1433 mov 56($tptr,$i,8),@ri[3]
1434 sbb 40($nptr,$i,8),@ri[1]
1435 lea 4($i),$i # i++
1436 dec $j # doesn't affect CF!
1437 jnz .Lsqr4x_sub
1438
1439 mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i]
1440 mov 32($tptr,$i,8),@ri[0] # load overflow bit
1441 sbb 16($nptr,$i,8),@ri[2]
1442 mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i]
1443 sbb 24($nptr,$i,8),@ri[3]
1444 mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i]
1445
1446 sbb \$0,@ri[0] # handle upmost overflow bit
1447 mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i]
1448 xor $i,$i # i=0
1449 and @ri[0],$tptr
1450 not @ri[0]
1451 mov $rptr,$nptr
1452 and @ri[0],$nptr
1453 lea -1($num),$j
1454 or $nptr,$tptr # tp=borrow?tp:rp
1455
1456 pxor %xmm0,%xmm0
1457 lea 64(%rsp,$num,8),$nptr
1458 movdqu ($tptr),%xmm1
1459 lea ($nptr,$num,8),$nptr
1460 movdqa %xmm0,64(%rsp) # zap lower half of temporary vector
1461 movdqa %xmm0,($nptr) # zap upper half of temporary vector
1462 movdqu %xmm1,($rptr)
1463 jmp .Lsqr4x_copy
1464.align 16
1465.Lsqr4x_copy: # copy or in-place refresh
1466 movdqu 16($tptr,$i),%xmm2
1467 movdqu 32($tptr,$i),%xmm1
1468 movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector
1469 movdqa %xmm0,96(%rsp,$i) # zap lower half of temporary vector
1470 movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector
1471 movdqa %xmm0,32($nptr,$i) # zap upper half of temporary vector
1472 movdqu %xmm2,16($rptr,$i)
1473 movdqu %xmm1,32($rptr,$i)
1474 lea 32($i),$i
1475 dec $j
1476 jnz .Lsqr4x_copy
1477
1478 movdqu 16($tptr,$i),%xmm2
1479 movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector
1480 movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector
1481 movdqu %xmm2,16($rptr,$i)
1482___
1483}
1484$code.=<<___;
1485 mov 56(%rsp),%rsi # restore %rsp
1486 mov \$1,%rax
1487 mov 0(%rsi),%r15
1488 mov 8(%rsi),%r14
1489 mov 16(%rsi),%r13
1490 mov 24(%rsi),%r12
1491 mov 32(%rsi),%rbp
1492 mov 40(%rsi),%rbx
1493 lea 48(%rsi),%rsp
1494.Lsqr4x_epilogue:
1495 ret
1496.size bn_sqr4x_mont,.-bn_sqr4x_mont
1497___
1498}}}
1499$code.=<<___;
217.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 1500.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
218.align 16 1501.align 16
219___ 1502___
@@ -228,9 +1511,9 @@ $disp="%r9";
228 1511
229$code.=<<___; 1512$code.=<<___;
230.extern __imp_RtlVirtualUnwind 1513.extern __imp_RtlVirtualUnwind
231.type se_handler,\@abi-omnipotent 1514.type mul_handler,\@abi-omnipotent
232.align 16 1515.align 16
233se_handler: 1516mul_handler:
234 push %rsi 1517 push %rsi
235 push %rdi 1518 push %rdi
236 push %rbx 1519 push %rbx
@@ -245,15 +1528,20 @@ se_handler:
245 mov 120($context),%rax # pull context->Rax 1528 mov 120($context),%rax # pull context->Rax
246 mov 248($context),%rbx # pull context->Rip 1529 mov 248($context),%rbx # pull context->Rip
247 1530
248 lea .Lprologue(%rip),%r10 1531 mov 8($disp),%rsi # disp->ImageBase
249 cmp %r10,%rbx # context->Rip<.Lprologue 1532 mov 56($disp),%r11 # disp->HandlerData
250 jb .Lin_prologue 1533
1534 mov 0(%r11),%r10d # HandlerData[0]
1535 lea (%rsi,%r10),%r10 # end of prologue label
1536 cmp %r10,%rbx # context->Rip<end of prologue label
1537 jb .Lcommon_seh_tail
251 1538
252 mov 152($context),%rax # pull context->Rsp 1539 mov 152($context),%rax # pull context->Rsp
253 1540
254 lea .Lepilogue(%rip),%r10 1541 mov 4(%r11),%r10d # HandlerData[1]
255 cmp %r10,%rbx # context->Rip>=.Lepilogue 1542 lea (%rsi,%r10),%r10 # epilogue label
256 jae .Lin_prologue 1543 cmp %r10,%rbx # context->Rip>=epilogue label
1544 jae .Lcommon_seh_tail
257 1545
258 mov 192($context),%r10 # pull $num 1546 mov 192($context),%r10 # pull $num
259 mov 8(%rax,%r10,8),%rax # pull saved stack pointer 1547 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
@@ -272,7 +1560,53 @@ se_handler:
272 mov %r14,232($context) # restore context->R14 1560 mov %r14,232($context) # restore context->R14
273 mov %r15,240($context) # restore context->R15 1561 mov %r15,240($context) # restore context->R15
274 1562
275.Lin_prologue: 1563 jmp .Lcommon_seh_tail
1564.size mul_handler,.-mul_handler
1565
1566.type sqr_handler,\@abi-omnipotent
1567.align 16
1568sqr_handler:
1569 push %rsi
1570 push %rdi
1571 push %rbx
1572 push %rbp
1573 push %r12
1574 push %r13
1575 push %r14
1576 push %r15
1577 pushfq
1578 sub \$64,%rsp
1579
1580 mov 120($context),%rax # pull context->Rax
1581 mov 248($context),%rbx # pull context->Rip
1582
1583 lea .Lsqr4x_body(%rip),%r10
1584 cmp %r10,%rbx # context->Rip<.Lsqr_body
1585 jb .Lcommon_seh_tail
1586
1587 mov 152($context),%rax # pull context->Rsp
1588
1589 lea .Lsqr4x_epilogue(%rip),%r10
1590 cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue
1591 jae .Lcommon_seh_tail
1592
1593 mov 56(%rax),%rax # pull saved stack pointer
1594 lea 48(%rax),%rax
1595
1596 mov -8(%rax),%rbx
1597 mov -16(%rax),%rbp
1598 mov -24(%rax),%r12
1599 mov -32(%rax),%r13
1600 mov -40(%rax),%r14
1601 mov -48(%rax),%r15
1602 mov %rbx,144($context) # restore context->Rbx
1603 mov %rbp,160($context) # restore context->Rbp
1604 mov %r12,216($context) # restore context->R12
1605 mov %r13,224($context) # restore context->R13
1606 mov %r14,232($context) # restore context->R14
1607 mov %r15,240($context) # restore context->R15
1608
1609.Lcommon_seh_tail:
276 mov 8(%rax),%rdi 1610 mov 8(%rax),%rdi
277 mov 16(%rax),%rsi 1611 mov 16(%rax),%rsi
278 mov %rax,152($context) # restore context->Rsp 1612 mov %rax,152($context) # restore context->Rsp
@@ -310,7 +1644,7 @@ se_handler:
310 pop %rdi 1644 pop %rdi
311 pop %rsi 1645 pop %rsi
312 ret 1646 ret
313.size se_handler,.-se_handler 1647.size sqr_handler,.-sqr_handler
314 1648
315.section .pdata 1649.section .pdata
316.align 4 1650.align 4
@@ -318,11 +1652,27 @@ se_handler:
318 .rva .LSEH_end_bn_mul_mont 1652 .rva .LSEH_end_bn_mul_mont
319 .rva .LSEH_info_bn_mul_mont 1653 .rva .LSEH_info_bn_mul_mont
320 1654
1655 .rva .LSEH_begin_bn_mul4x_mont
1656 .rva .LSEH_end_bn_mul4x_mont
1657 .rva .LSEH_info_bn_mul4x_mont
1658
1659 .rva .LSEH_begin_bn_sqr4x_mont
1660 .rva .LSEH_end_bn_sqr4x_mont
1661 .rva .LSEH_info_bn_sqr4x_mont
1662
321.section .xdata 1663.section .xdata
322.align 8 1664.align 8
323.LSEH_info_bn_mul_mont: 1665.LSEH_info_bn_mul_mont:
324 .byte 9,0,0,0 1666 .byte 9,0,0,0
325 .rva se_handler 1667 .rva mul_handler
1668 .rva .Lmul_body,.Lmul_epilogue # HandlerData[]
1669.LSEH_info_bn_mul4x_mont:
1670 .byte 9,0,0,0
1671 .rva mul_handler
1672 .rva .Lmul4x_body,.Lmul4x_epilogue # HandlerData[]
1673.LSEH_info_bn_sqr4x_mont:
1674 .byte 9,0,0,0
1675 .rva sqr_handler
326___ 1676___
327} 1677}
328 1678
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl
new file mode 100755
index 0000000000..057cda28aa
--- /dev/null
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-mont5.pl
@@ -0,0 +1,1070 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# August 2011.
11#
12# Companion to x86_64-mont.pl that optimizes cache-timing attack
13# countermeasures. The subroutines are produced by replacing bp[i]
14# references in their x86_64-mont.pl counterparts with cache-neutral
15# references to powers table computed in BN_mod_exp_mont_consttime.
16# In addition subroutine that scatters elements of the powers table
17# is implemented, so that scatter-/gathering can be tuned without
18# bn_exp.c modifications.
19
20$flavour = shift;
21$output = shift;
22if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
23
24$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
25
26$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
28( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
29die "can't locate x86_64-xlate.pl";
30
31open STDOUT,"| $^X $xlate $flavour $output";
32
33# int bn_mul_mont_gather5(
34$rp="%rdi"; # BN_ULONG *rp,
35$ap="%rsi"; # const BN_ULONG *ap,
36$bp="%rdx"; # const BN_ULONG *bp,
37$np="%rcx"; # const BN_ULONG *np,
38$n0="%r8"; # const BN_ULONG *n0,
39$num="%r9"; # int num,
40 # int idx); # 0 to 2^5-1, "index" in $bp holding
41 # pre-computed powers of a', interlaced
42 # in such manner that b[0] is $bp[idx],
43 # b[1] is [2^5+idx], etc.
44$lo0="%r10";
45$hi0="%r11";
46$hi1="%r13";
47$i="%r14";
48$j="%r15";
49$m0="%rbx";
50$m1="%rbp";
51
52$code=<<___;
53.text
54
55.globl bn_mul_mont_gather5
56.type bn_mul_mont_gather5,\@function,6
57.align 64
58bn_mul_mont_gather5:
59 test \$3,${num}d
60 jnz .Lmul_enter
61 cmp \$8,${num}d
62 jb .Lmul_enter
63 jmp .Lmul4x_enter
64
65.align 16
66.Lmul_enter:
67 mov ${num}d,${num}d
68 mov `($win64?56:8)`(%rsp),%r10d # load 7th argument
69 push %rbx
70 push %rbp
71 push %r12
72 push %r13
73 push %r14
74 push %r15
75___
76$code.=<<___ if ($win64);
77 lea -0x28(%rsp),%rsp
78 movaps %xmm6,(%rsp)
79 movaps %xmm7,0x10(%rsp)
80.Lmul_alloca:
81___
82$code.=<<___;
83 mov %rsp,%rax
84 lea 2($num),%r11
85 neg %r11
86 lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2))
87 and \$-1024,%rsp # minimize TLB usage
88
89 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
90.Lmul_body:
91 mov $bp,%r12 # reassign $bp
92___
93 $bp="%r12";
94 $STRIDE=2**5*8; # 5 is "window size"
95 $N=$STRIDE/4; # should match cache line size
96$code.=<<___;
97 mov %r10,%r11
98 shr \$`log($N/8)/log(2)`,%r10
99 and \$`$N/8-1`,%r11
100 not %r10
101 lea .Lmagic_masks(%rip),%rax
102 and \$`2**5/($N/8)-1`,%r10 # 5 is "window size"
103 lea 96($bp,%r11,8),$bp # pointer within 1st cache line
104 movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which
105 movq 8(%rax,%r10,8),%xmm5 # cache line contains element
106 movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument
107 movq 24(%rax,%r10,8),%xmm7
108
109 movq `0*$STRIDE/4-96`($bp),%xmm0
110 movq `1*$STRIDE/4-96`($bp),%xmm1
111 pand %xmm4,%xmm0
112 movq `2*$STRIDE/4-96`($bp),%xmm2
113 pand %xmm5,%xmm1
114 movq `3*$STRIDE/4-96`($bp),%xmm3
115 pand %xmm6,%xmm2
116 por %xmm1,%xmm0
117 pand %xmm7,%xmm3
118 por %xmm2,%xmm0
119 lea $STRIDE($bp),$bp
120 por %xmm3,%xmm0
121
122 movq %xmm0,$m0 # m0=bp[0]
123
124 mov ($n0),$n0 # pull n0[0] value
125 mov ($ap),%rax
126
127 xor $i,$i # i=0
128 xor $j,$j # j=0
129
130 movq `0*$STRIDE/4-96`($bp),%xmm0
131 movq `1*$STRIDE/4-96`($bp),%xmm1
132 pand %xmm4,%xmm0
133 movq `2*$STRIDE/4-96`($bp),%xmm2
134 pand %xmm5,%xmm1
135
136 mov $n0,$m1
137 mulq $m0 # ap[0]*bp[0]
138 mov %rax,$lo0
139 mov ($np),%rax
140
141 movq `3*$STRIDE/4-96`($bp),%xmm3
142 pand %xmm6,%xmm2
143 por %xmm1,%xmm0
144 pand %xmm7,%xmm3
145
146 imulq $lo0,$m1 # "tp[0]"*n0
147 mov %rdx,$hi0
148
149 por %xmm2,%xmm0
150 lea $STRIDE($bp),$bp
151 por %xmm3,%xmm0
152
153 mulq $m1 # np[0]*m1
154 add %rax,$lo0 # discarded
155 mov 8($ap),%rax
156 adc \$0,%rdx
157 mov %rdx,$hi1
158
159 lea 1($j),$j # j++
160 jmp .L1st_enter
161
162.align 16
163.L1st:
164 add %rax,$hi1
165 mov ($ap,$j,8),%rax
166 adc \$0,%rdx
167 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
168 mov $lo0,$hi0
169 adc \$0,%rdx
170 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
171 mov %rdx,$hi1
172
173.L1st_enter:
174 mulq $m0 # ap[j]*bp[0]
175 add %rax,$hi0
176 mov ($np,$j,8),%rax
177 adc \$0,%rdx
178 lea 1($j),$j # j++
179 mov %rdx,$lo0
180
181 mulq $m1 # np[j]*m1
182 cmp $num,$j
183 jne .L1st
184
185 movq %xmm0,$m0 # bp[1]
186
187 add %rax,$hi1
188 mov ($ap),%rax # ap[0]
189 adc \$0,%rdx
190 add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
191 adc \$0,%rdx
192 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
193 mov %rdx,$hi1
194 mov $lo0,$hi0
195
196 xor %rdx,%rdx
197 add $hi0,$hi1
198 adc \$0,%rdx
199 mov $hi1,-8(%rsp,$num,8)
200 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
201
202 lea 1($i),$i # i++
203 jmp .Louter
204.align 16
205.Louter:
206 xor $j,$j # j=0
207 mov $n0,$m1
208 mov (%rsp),$lo0
209
210 movq `0*$STRIDE/4-96`($bp),%xmm0
211 movq `1*$STRIDE/4-96`($bp),%xmm1
212 pand %xmm4,%xmm0
213 movq `2*$STRIDE/4-96`($bp),%xmm2
214 pand %xmm5,%xmm1
215
216 mulq $m0 # ap[0]*bp[i]
217 add %rax,$lo0 # ap[0]*bp[i]+tp[0]
218 mov ($np),%rax
219 adc \$0,%rdx
220
221 movq `3*$STRIDE/4-96`($bp),%xmm3
222 pand %xmm6,%xmm2
223 por %xmm1,%xmm0
224 pand %xmm7,%xmm3
225
226 imulq $lo0,$m1 # tp[0]*n0
227 mov %rdx,$hi0
228
229 por %xmm2,%xmm0
230 lea $STRIDE($bp),$bp
231 por %xmm3,%xmm0
232
233 mulq $m1 # np[0]*m1
234 add %rax,$lo0 # discarded
235 mov 8($ap),%rax
236 adc \$0,%rdx
237 mov 8(%rsp),$lo0 # tp[1]
238 mov %rdx,$hi1
239
240 lea 1($j),$j # j++
241 jmp .Linner_enter
242
243.align 16
244.Linner:
245 add %rax,$hi1
246 mov ($ap,$j,8),%rax
247 adc \$0,%rdx
248 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
249 mov (%rsp,$j,8),$lo0
250 adc \$0,%rdx
251 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
252 mov %rdx,$hi1
253
254.Linner_enter:
255 mulq $m0 # ap[j]*bp[i]
256 add %rax,$hi0
257 mov ($np,$j,8),%rax
258 adc \$0,%rdx
259 add $hi0,$lo0 # ap[j]*bp[i]+tp[j]
260 mov %rdx,$hi0
261 adc \$0,$hi0
262 lea 1($j),$j # j++
263
264 mulq $m1 # np[j]*m1
265 cmp $num,$j
266 jne .Linner
267
268 movq %xmm0,$m0 # bp[i+1]
269
270 add %rax,$hi1
271 mov ($ap),%rax # ap[0]
272 adc \$0,%rdx
273 add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
274 mov (%rsp,$j,8),$lo0
275 adc \$0,%rdx
276 mov $hi1,-16(%rsp,$j,8) # tp[j-1]
277 mov %rdx,$hi1
278
279 xor %rdx,%rdx
280 add $hi0,$hi1
281 adc \$0,%rdx
282 add $lo0,$hi1 # pull upmost overflow bit
283 adc \$0,%rdx
284 mov $hi1,-8(%rsp,$num,8)
285 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
286
287 lea 1($i),$i # i++
288 cmp $num,$i
289 jl .Louter
290
291 xor $i,$i # i=0 and clear CF!
292 mov (%rsp),%rax # tp[0]
293 lea (%rsp),$ap # borrow ap for tp
294 mov $num,$j # j=num
295 jmp .Lsub
296.align 16
297.Lsub: sbb ($np,$i,8),%rax
298 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
299 mov 8($ap,$i,8),%rax # tp[i+1]
300 lea 1($i),$i # i++
301 dec $j # doesnn't affect CF!
302 jnz .Lsub
303
304 sbb \$0,%rax # handle upmost overflow bit
305 xor $i,$i
306 and %rax,$ap
307 not %rax
308 mov $rp,$np
309 and %rax,$np
310 mov $num,$j # j=num
311 or $np,$ap # ap=borrow?tp:rp
312.align 16
313.Lcopy: # copy or in-place refresh
314 mov ($ap,$i,8),%rax
315 mov $i,(%rsp,$i,8) # zap temporary vector
316 mov %rax,($rp,$i,8) # rp[i]=tp[i]
317 lea 1($i),$i
318 sub \$1,$j
319 jnz .Lcopy
320
321 mov 8(%rsp,$num,8),%rsi # restore %rsp
322 mov \$1,%rax
323___
324$code.=<<___ if ($win64);
325 movaps (%rsi),%xmm6
326 movaps 0x10(%rsi),%xmm7
327 lea 0x28(%rsi),%rsi
328___
329$code.=<<___;
330 mov (%rsi),%r15
331 mov 8(%rsi),%r14
332 mov 16(%rsi),%r13
333 mov 24(%rsi),%r12
334 mov 32(%rsi),%rbp
335 mov 40(%rsi),%rbx
336 lea 48(%rsi),%rsp
337.Lmul_epilogue:
338 ret
339.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
340___
341{{{
342my @A=("%r10","%r11");
343my @N=("%r13","%rdi");
344$code.=<<___;
345.type bn_mul4x_mont_gather5,\@function,6
346.align 16
347bn_mul4x_mont_gather5:
348.Lmul4x_enter:
349 mov ${num}d,${num}d
350 mov `($win64?56:8)`(%rsp),%r10d # load 7th argument
351 push %rbx
352 push %rbp
353 push %r12
354 push %r13
355 push %r14
356 push %r15
357___
358$code.=<<___ if ($win64);
359 lea -0x28(%rsp),%rsp
360 movaps %xmm6,(%rsp)
361 movaps %xmm7,0x10(%rsp)
362.Lmul4x_alloca:
363___
364$code.=<<___;
365 mov %rsp,%rax
366 lea 4($num),%r11
367 neg %r11
368 lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4))
369 and \$-1024,%rsp # minimize TLB usage
370
371 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
372.Lmul4x_body:
373 mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
374 mov %rdx,%r12 # reassign $bp
375___
376 $bp="%r12";
377 $STRIDE=2**5*8; # 5 is "window size"
378 $N=$STRIDE/4; # should match cache line size
379$code.=<<___;
380 mov %r10,%r11
381 shr \$`log($N/8)/log(2)`,%r10
382 and \$`$N/8-1`,%r11
383 not %r10
384 lea .Lmagic_masks(%rip),%rax
385 and \$`2**5/($N/8)-1`,%r10 # 5 is "window size"
386 lea 96($bp,%r11,8),$bp # pointer within 1st cache line
387 movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which
388 movq 8(%rax,%r10,8),%xmm5 # cache line contains element
389 movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument
390 movq 24(%rax,%r10,8),%xmm7
391
392 movq `0*$STRIDE/4-96`($bp),%xmm0
393 movq `1*$STRIDE/4-96`($bp),%xmm1
394 pand %xmm4,%xmm0
395 movq `2*$STRIDE/4-96`($bp),%xmm2
396 pand %xmm5,%xmm1
397 movq `3*$STRIDE/4-96`($bp),%xmm3
398 pand %xmm6,%xmm2
399 por %xmm1,%xmm0
400 pand %xmm7,%xmm3
401 por %xmm2,%xmm0
402 lea $STRIDE($bp),$bp
403 por %xmm3,%xmm0
404
405 movq %xmm0,$m0 # m0=bp[0]
406 mov ($n0),$n0 # pull n0[0] value
407 mov ($ap),%rax
408
409 xor $i,$i # i=0
410 xor $j,$j # j=0
411
412 movq `0*$STRIDE/4-96`($bp),%xmm0
413 movq `1*$STRIDE/4-96`($bp),%xmm1
414 pand %xmm4,%xmm0
415 movq `2*$STRIDE/4-96`($bp),%xmm2
416 pand %xmm5,%xmm1
417
418 mov $n0,$m1
419 mulq $m0 # ap[0]*bp[0]
420 mov %rax,$A[0]
421 mov ($np),%rax
422
423 movq `3*$STRIDE/4-96`($bp),%xmm3
424 pand %xmm6,%xmm2
425 por %xmm1,%xmm0
426 pand %xmm7,%xmm3
427
428 imulq $A[0],$m1 # "tp[0]"*n0
429 mov %rdx,$A[1]
430
431 por %xmm2,%xmm0
432 lea $STRIDE($bp),$bp
433 por %xmm3,%xmm0
434
435 mulq $m1 # np[0]*m1
436 add %rax,$A[0] # discarded
437 mov 8($ap),%rax
438 adc \$0,%rdx
439 mov %rdx,$N[1]
440
441 mulq $m0
442 add %rax,$A[1]
443 mov 8($np),%rax
444 adc \$0,%rdx
445 mov %rdx,$A[0]
446
447 mulq $m1
448 add %rax,$N[1]
449 mov 16($ap),%rax
450 adc \$0,%rdx
451 add $A[1],$N[1]
452 lea 4($j),$j # j++
453 adc \$0,%rdx
454 mov $N[1],(%rsp)
455 mov %rdx,$N[0]
456 jmp .L1st4x
457.align 16
458.L1st4x:
459 mulq $m0 # ap[j]*bp[0]
460 add %rax,$A[0]
461 mov -16($np,$j,8),%rax
462 adc \$0,%rdx
463 mov %rdx,$A[1]
464
465 mulq $m1 # np[j]*m1
466 add %rax,$N[0]
467 mov -8($ap,$j,8),%rax
468 adc \$0,%rdx
469 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
470 adc \$0,%rdx
471 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
472 mov %rdx,$N[1]
473
474 mulq $m0 # ap[j]*bp[0]
475 add %rax,$A[1]
476 mov -8($np,$j,8),%rax
477 adc \$0,%rdx
478 mov %rdx,$A[0]
479
480 mulq $m1 # np[j]*m1
481 add %rax,$N[1]
482 mov ($ap,$j,8),%rax
483 adc \$0,%rdx
484 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
485 adc \$0,%rdx
486 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
487 mov %rdx,$N[0]
488
489 mulq $m0 # ap[j]*bp[0]
490 add %rax,$A[0]
491 mov ($np,$j,8),%rax
492 adc \$0,%rdx
493 mov %rdx,$A[1]
494
495 mulq $m1 # np[j]*m1
496 add %rax,$N[0]
497 mov 8($ap,$j,8),%rax
498 adc \$0,%rdx
499 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
500 adc \$0,%rdx
501 mov $N[0],-8(%rsp,$j,8) # tp[j-1]
502 mov %rdx,$N[1]
503
504 mulq $m0 # ap[j]*bp[0]
505 add %rax,$A[1]
506 mov 8($np,$j,8),%rax
507 adc \$0,%rdx
508 lea 4($j),$j # j++
509 mov %rdx,$A[0]
510
511 mulq $m1 # np[j]*m1
512 add %rax,$N[1]
513 mov -16($ap,$j,8),%rax
514 adc \$0,%rdx
515 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
516 adc \$0,%rdx
517 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
518 mov %rdx,$N[0]
519 cmp $num,$j
520 jl .L1st4x
521
522 mulq $m0 # ap[j]*bp[0]
523 add %rax,$A[0]
524 mov -16($np,$j,8),%rax
525 adc \$0,%rdx
526 mov %rdx,$A[1]
527
528 mulq $m1 # np[j]*m1
529 add %rax,$N[0]
530 mov -8($ap,$j,8),%rax
531 adc \$0,%rdx
532 add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0]
533 adc \$0,%rdx
534 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
535 mov %rdx,$N[1]
536
537 mulq $m0 # ap[j]*bp[0]
538 add %rax,$A[1]
539 mov -8($np,$j,8),%rax
540 adc \$0,%rdx
541 mov %rdx,$A[0]
542
543 mulq $m1 # np[j]*m1
544 add %rax,$N[1]
545 mov ($ap),%rax # ap[0]
546 adc \$0,%rdx
547 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
548 adc \$0,%rdx
549 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
550 mov %rdx,$N[0]
551
552 movq %xmm0,$m0 # bp[1]
553
554 xor $N[1],$N[1]
555 add $A[0],$N[0]
556 adc \$0,$N[1]
557 mov $N[0],-8(%rsp,$j,8)
558 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
559
560 lea 1($i),$i # i++
561.align 4
562.Louter4x:
563 xor $j,$j # j=0
564 movq `0*$STRIDE/4-96`($bp),%xmm0
565 movq `1*$STRIDE/4-96`($bp),%xmm1
566 pand %xmm4,%xmm0
567 movq `2*$STRIDE/4-96`($bp),%xmm2
568 pand %xmm5,%xmm1
569
570 mov (%rsp),$A[0]
571 mov $n0,$m1
572 mulq $m0 # ap[0]*bp[i]
573 add %rax,$A[0] # ap[0]*bp[i]+tp[0]
574 mov ($np),%rax
575 adc \$0,%rdx
576
577 movq `3*$STRIDE/4-96`($bp),%xmm3
578 pand %xmm6,%xmm2
579 por %xmm1,%xmm0
580 pand %xmm7,%xmm3
581
582 imulq $A[0],$m1 # tp[0]*n0
583 mov %rdx,$A[1]
584
585 por %xmm2,%xmm0
586 lea $STRIDE($bp),$bp
587 por %xmm3,%xmm0
588
589 mulq $m1 # np[0]*m1
590 add %rax,$A[0] # "$N[0]", discarded
591 mov 8($ap),%rax
592 adc \$0,%rdx
593 mov %rdx,$N[1]
594
595 mulq $m0 # ap[j]*bp[i]
596 add %rax,$A[1]
597 mov 8($np),%rax
598 adc \$0,%rdx
599 add 8(%rsp),$A[1] # +tp[1]
600 adc \$0,%rdx
601 mov %rdx,$A[0]
602
603 mulq $m1 # np[j]*m1
604 add %rax,$N[1]
605 mov 16($ap),%rax
606 adc \$0,%rdx
607 add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j]
608 lea 4($j),$j # j+=2
609 adc \$0,%rdx
610 mov %rdx,$N[0]
611 jmp .Linner4x
612.align 16
613.Linner4x:
614 mulq $m0 # ap[j]*bp[i]
615 add %rax,$A[0]
616 mov -16($np,$j,8),%rax
617 adc \$0,%rdx
618 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
619 adc \$0,%rdx
620 mov %rdx,$A[1]
621
622 mulq $m1 # np[j]*m1
623 add %rax,$N[0]
624 mov -8($ap,$j,8),%rax
625 adc \$0,%rdx
626 add $A[0],$N[0]
627 adc \$0,%rdx
628 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
629 mov %rdx,$N[1]
630
631 mulq $m0 # ap[j]*bp[i]
632 add %rax,$A[1]
633 mov -8($np,$j,8),%rax
634 adc \$0,%rdx
635 add -8(%rsp,$j,8),$A[1]
636 adc \$0,%rdx
637 mov %rdx,$A[0]
638
639 mulq $m1 # np[j]*m1
640 add %rax,$N[1]
641 mov ($ap,$j,8),%rax
642 adc \$0,%rdx
643 add $A[1],$N[1]
644 adc \$0,%rdx
645 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
646 mov %rdx,$N[0]
647
648 mulq $m0 # ap[j]*bp[i]
649 add %rax,$A[0]
650 mov ($np,$j,8),%rax
651 adc \$0,%rdx
652 add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
653 adc \$0,%rdx
654 mov %rdx,$A[1]
655
656 mulq $m1 # np[j]*m1
657 add %rax,$N[0]
658 mov 8($ap,$j,8),%rax
659 adc \$0,%rdx
660 add $A[0],$N[0]
661 adc \$0,%rdx
662 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
663 mov %rdx,$N[1]
664
665 mulq $m0 # ap[j]*bp[i]
666 add %rax,$A[1]
667 mov 8($np,$j,8),%rax
668 adc \$0,%rdx
669 add 8(%rsp,$j,8),$A[1]
670 adc \$0,%rdx
671 lea 4($j),$j # j++
672 mov %rdx,$A[0]
673
674 mulq $m1 # np[j]*m1
675 add %rax,$N[1]
676 mov -16($ap,$j,8),%rax
677 adc \$0,%rdx
678 add $A[1],$N[1]
679 adc \$0,%rdx
680 mov $N[0],-40(%rsp,$j,8) # tp[j-1]
681 mov %rdx,$N[0]
682 cmp $num,$j
683 jl .Linner4x
684
685 mulq $m0 # ap[j]*bp[i]
686 add %rax,$A[0]
687 mov -16($np,$j,8),%rax
688 adc \$0,%rdx
689 add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j]
690 adc \$0,%rdx
691 mov %rdx,$A[1]
692
693 mulq $m1 # np[j]*m1
694 add %rax,$N[0]
695 mov -8($ap,$j,8),%rax
696 adc \$0,%rdx
697 add $A[0],$N[0]
698 adc \$0,%rdx
699 mov $N[1],-32(%rsp,$j,8) # tp[j-1]
700 mov %rdx,$N[1]
701
702 mulq $m0 # ap[j]*bp[i]
703 add %rax,$A[1]
704 mov -8($np,$j,8),%rax
705 adc \$0,%rdx
706 add -8(%rsp,$j,8),$A[1]
707 adc \$0,%rdx
708 lea 1($i),$i # i++
709 mov %rdx,$A[0]
710
711 mulq $m1 # np[j]*m1
712 add %rax,$N[1]
713 mov ($ap),%rax # ap[0]
714 adc \$0,%rdx
715 add $A[1],$N[1]
716 adc \$0,%rdx
717 mov $N[0],-24(%rsp,$j,8) # tp[j-1]
718 mov %rdx,$N[0]
719
720 movq %xmm0,$m0 # bp[i+1]
721 mov $N[1],-16(%rsp,$j,8) # tp[j-1]
722
723 xor $N[1],$N[1]
724 add $A[0],$N[0]
725 adc \$0,$N[1]
726 add (%rsp,$num,8),$N[0] # pull upmost overflow bit
727 adc \$0,$N[1]
728 mov $N[0],-8(%rsp,$j,8)
729 mov $N[1],(%rsp,$j,8) # store upmost overflow bit
730
731 cmp $num,$i
732 jl .Louter4x
733___
734{
735my @ri=("%rax","%rdx",$m0,$m1);
736$code.=<<___;
737 mov 16(%rsp,$num,8),$rp # restore $rp
738 mov 0(%rsp),@ri[0] # tp[0]
739 pxor %xmm0,%xmm0
740 mov 8(%rsp),@ri[1] # tp[1]
741 shr \$2,$num # num/=4
742 lea (%rsp),$ap # borrow ap for tp
743 xor $i,$i # i=0 and clear CF!
744
745 sub 0($np),@ri[0]
746 mov 16($ap),@ri[2] # tp[2]
747 mov 24($ap),@ri[3] # tp[3]
748 sbb 8($np),@ri[1]
749 lea -1($num),$j # j=num/4-1
750 jmp .Lsub4x
751.align 16
752.Lsub4x:
753 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
754 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
755 sbb 16($np,$i,8),@ri[2]
756 mov 32($ap,$i,8),@ri[0] # tp[i+1]
757 mov 40($ap,$i,8),@ri[1]
758 sbb 24($np,$i,8),@ri[3]
759 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
760 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
761 sbb 32($np,$i,8),@ri[0]
762 mov 48($ap,$i,8),@ri[2]
763 mov 56($ap,$i,8),@ri[3]
764 sbb 40($np,$i,8),@ri[1]
765 lea 4($i),$i # i++
766 dec $j # doesnn't affect CF!
767 jnz .Lsub4x
768
769 mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
770 mov 32($ap,$i,8),@ri[0] # load overflow bit
771 sbb 16($np,$i,8),@ri[2]
772 mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i]
773 sbb 24($np,$i,8),@ri[3]
774 mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
775
776 sbb \$0,@ri[0] # handle upmost overflow bit
777 mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
778 xor $i,$i # i=0
779 and @ri[0],$ap
780 not @ri[0]
781 mov $rp,$np
782 and @ri[0],$np
783 lea -1($num),$j
784 or $np,$ap # ap=borrow?tp:rp
785
786 movdqu ($ap),%xmm1
787 movdqa %xmm0,(%rsp)
788 movdqu %xmm1,($rp)
789 jmp .Lcopy4x
790.align 16
791.Lcopy4x: # copy or in-place refresh
792 movdqu 16($ap,$i),%xmm2
793 movdqu 32($ap,$i),%xmm1
794 movdqa %xmm0,16(%rsp,$i)
795 movdqu %xmm2,16($rp,$i)
796 movdqa %xmm0,32(%rsp,$i)
797 movdqu %xmm1,32($rp,$i)
798 lea 32($i),$i
799 dec $j
800 jnz .Lcopy4x
801
802 shl \$2,$num
803 movdqu 16($ap,$i),%xmm2
804 movdqa %xmm0,16(%rsp,$i)
805 movdqu %xmm2,16($rp,$i)
806___
807}
808$code.=<<___;
809 mov 8(%rsp,$num,8),%rsi # restore %rsp
810 mov \$1,%rax
811___
812$code.=<<___ if ($win64);
813 movaps (%rsi),%xmm6
814 movaps 0x10(%rsi),%xmm7
815 lea 0x28(%rsi),%rsi
816___
817$code.=<<___;
818 mov (%rsi),%r15
819 mov 8(%rsi),%r14
820 mov 16(%rsi),%r13
821 mov 24(%rsi),%r12
822 mov 32(%rsi),%rbp
823 mov 40(%rsi),%rbx
824 lea 48(%rsi),%rsp
825.Lmul4x_epilogue:
826 ret
827.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
828___
829}}}
830
831{
832my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
833 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
834my $out=$inp;
835my $STRIDE=2**5*8;
836my $N=$STRIDE/4;
837
838$code.=<<___;
839.globl bn_scatter5
840.type bn_scatter5,\@abi-omnipotent
841.align 16
842bn_scatter5:
843 cmp \$0, $num
844 jz .Lscatter_epilogue
845 lea ($tbl,$idx,8),$tbl
846.Lscatter:
847 mov ($inp),%rax
848 lea 8($inp),$inp
849 mov %rax,($tbl)
850 lea 32*8($tbl),$tbl
851 sub \$1,$num
852 jnz .Lscatter
853.Lscatter_epilogue:
854 ret
855.size bn_scatter5,.-bn_scatter5
856
857.globl bn_gather5
858.type bn_gather5,\@abi-omnipotent
859.align 16
860bn_gather5:
861___
862$code.=<<___ if ($win64);
863.LSEH_begin_bn_gather5:
864 # I can't trust assembler to use specific encoding:-(
865 .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp
866 .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp)
867 .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp)
868___
869$code.=<<___;
870 mov $idx,%r11
871 shr \$`log($N/8)/log(2)`,$idx
872 and \$`$N/8-1`,%r11
873 not $idx
874 lea .Lmagic_masks(%rip),%rax
875 and \$`2**5/($N/8)-1`,$idx # 5 is "window size"
876 lea 96($tbl,%r11,8),$tbl # pointer within 1st cache line
877 movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which
878 movq 8(%rax,$idx,8),%xmm5 # cache line contains element
879 movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument
880 movq 24(%rax,$idx,8),%xmm7
881 jmp .Lgather
882.align 16
883.Lgather:
884 movq `0*$STRIDE/4-96`($tbl),%xmm0
885 movq `1*$STRIDE/4-96`($tbl),%xmm1
886 pand %xmm4,%xmm0
887 movq `2*$STRIDE/4-96`($tbl),%xmm2
888 pand %xmm5,%xmm1
889 movq `3*$STRIDE/4-96`($tbl),%xmm3
890 pand %xmm6,%xmm2
891 por %xmm1,%xmm0
892 pand %xmm7,%xmm3
893 por %xmm2,%xmm0
894 lea $STRIDE($tbl),$tbl
895 por %xmm3,%xmm0
896
897 movq %xmm0,($out) # m0=bp[0]
898 lea 8($out),$out
899 sub \$1,$num
900 jnz .Lgather
901___
902$code.=<<___ if ($win64);
903 movaps %xmm6,(%rsp)
904 movaps %xmm7,0x10(%rsp)
905 lea 0x28(%rsp),%rsp
906___
907$code.=<<___;
908 ret
909.LSEH_end_bn_gather5:
910.size bn_gather5,.-bn_gather5
911___
912}
913$code.=<<___;
914.align 64
915.Lmagic_masks:
916 .long 0,0, 0,0, 0,0, -1,-1
917 .long 0,0, 0,0, 0,0, 0,0
918.asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
919___
920
921# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
922# CONTEXT *context,DISPATCHER_CONTEXT *disp)
923if ($win64) {
924$rec="%rcx";
925$frame="%rdx";
926$context="%r8";
927$disp="%r9";
928
929$code.=<<___;
930.extern __imp_RtlVirtualUnwind
931.type mul_handler,\@abi-omnipotent
932.align 16
933mul_handler:
934 push %rsi
935 push %rdi
936 push %rbx
937 push %rbp
938 push %r12
939 push %r13
940 push %r14
941 push %r15
942 pushfq
943 sub \$64,%rsp
944
945 mov 120($context),%rax # pull context->Rax
946 mov 248($context),%rbx # pull context->Rip
947
948 mov 8($disp),%rsi # disp->ImageBase
949 mov 56($disp),%r11 # disp->HandlerData
950
951 mov 0(%r11),%r10d # HandlerData[0]
952 lea (%rsi,%r10),%r10 # end of prologue label
953 cmp %r10,%rbx # context->Rip<end of prologue label
954 jb .Lcommon_seh_tail
955
956 lea `40+48`(%rax),%rax
957
958 mov 4(%r11),%r10d # HandlerData[1]
959 lea (%rsi,%r10),%r10 # end of alloca label
960 cmp %r10,%rbx # context->Rip<end of alloca label
961 jb .Lcommon_seh_tail
962
963 mov 152($context),%rax # pull context->Rsp
964
965 mov 8(%r11),%r10d # HandlerData[2]
966 lea (%rsi,%r10),%r10 # epilogue label
967 cmp %r10,%rbx # context->Rip>=epilogue label
968 jae .Lcommon_seh_tail
969
970 mov 192($context),%r10 # pull $num
971 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
972
973 movaps (%rax),%xmm0
974 movaps 16(%rax),%xmm1
975 lea `40+48`(%rax),%rax
976
977 mov -8(%rax),%rbx
978 mov -16(%rax),%rbp
979 mov -24(%rax),%r12
980 mov -32(%rax),%r13
981 mov -40(%rax),%r14
982 mov -48(%rax),%r15
983 mov %rbx,144($context) # restore context->Rbx
984 mov %rbp,160($context) # restore context->Rbp
985 mov %r12,216($context) # restore context->R12
986 mov %r13,224($context) # restore context->R13
987 mov %r14,232($context) # restore context->R14
988 mov %r15,240($context) # restore context->R15
989 movups %xmm0,512($context) # restore context->Xmm6
990 movups %xmm1,528($context) # restore context->Xmm7
991
992.Lcommon_seh_tail:
993 mov 8(%rax),%rdi
994 mov 16(%rax),%rsi
995 mov %rax,152($context) # restore context->Rsp
996 mov %rsi,168($context) # restore context->Rsi
997 mov %rdi,176($context) # restore context->Rdi
998
999 mov 40($disp),%rdi # disp->ContextRecord
1000 mov $context,%rsi # context
1001 mov \$154,%ecx # sizeof(CONTEXT)
1002 .long 0xa548f3fc # cld; rep movsq
1003
1004 mov $disp,%rsi
1005 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1006 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1007 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1008 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1009 mov 40(%rsi),%r10 # disp->ContextRecord
1010 lea 56(%rsi),%r11 # &disp->HandlerData
1011 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1012 mov %r10,32(%rsp) # arg5
1013 mov %r11,40(%rsp) # arg6
1014 mov %r12,48(%rsp) # arg7
1015 mov %rcx,56(%rsp) # arg8, (NULL)
1016 call *__imp_RtlVirtualUnwind(%rip)
1017
1018 mov \$1,%eax # ExceptionContinueSearch
1019 add \$64,%rsp
1020 popfq
1021 pop %r15
1022 pop %r14
1023 pop %r13
1024 pop %r12
1025 pop %rbp
1026 pop %rbx
1027 pop %rdi
1028 pop %rsi
1029 ret
1030.size mul_handler,.-mul_handler
1031
1032.section .pdata
1033.align 4
1034 .rva .LSEH_begin_bn_mul_mont_gather5
1035 .rva .LSEH_end_bn_mul_mont_gather5
1036 .rva .LSEH_info_bn_mul_mont_gather5
1037
1038 .rva .LSEH_begin_bn_mul4x_mont_gather5
1039 .rva .LSEH_end_bn_mul4x_mont_gather5
1040 .rva .LSEH_info_bn_mul4x_mont_gather5
1041
1042 .rva .LSEH_begin_bn_gather5
1043 .rva .LSEH_end_bn_gather5
1044 .rva .LSEH_info_bn_gather5
1045
1046.section .xdata
1047.align 8
1048.LSEH_info_bn_mul_mont_gather5:
1049 .byte 9,0,0,0
1050 .rva mul_handler
1051 .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[]
1052.align 8
1053.LSEH_info_bn_mul4x_mont_gather5:
1054 .byte 9,0,0,0
1055 .rva mul_handler
1056 .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[]
1057.align 8
1058.LSEH_info_bn_gather5:
1059 .byte 0x01,0x0d,0x05,0x00
1060 .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7
1061 .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6
1062 .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28
1063.align 8
1064___
1065}
1066
1067$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1068
1069print $code;
1070close STDOUT;
diff --git a/src/lib/libssl/src/crypto/bn/bn_gf2m.c b/src/lib/libssl/src/crypto/bn/bn_gf2m.c
index 432a3aa338..8a4dc20ad9 100644
--- a/src/lib/libssl/src/crypto/bn/bn_gf2m.c
+++ b/src/lib/libssl/src/crypto/bn/bn_gf2m.c
@@ -94,6 +94,8 @@
94#include "cryptlib.h" 94#include "cryptlib.h"
95#include "bn_lcl.h" 95#include "bn_lcl.h"
96 96
97#ifndef OPENSSL_NO_EC2M
98
97/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */ 99/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
98#define MAX_ITERATIONS 50 100#define MAX_ITERATIONS 50
99 101
@@ -122,6 +124,7 @@ static const BN_ULONG SQR_tb[16] =
122 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] 124 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
123#endif 125#endif
124 126
127#if !defined(OPENSSL_BN_ASM_GF2m)
125/* Product of two polynomials a, b each with degree < BN_BITS2 - 1, 128/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
126 * result is a polynomial r with degree < 2 * BN_BITS - 1 129 * result is a polynomial r with degree < 2 * BN_BITS - 1
127 * The caller MUST ensure that the variables have the right amount 130 * The caller MUST ensure that the variables have the right amount
@@ -216,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
216 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ 219 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
217 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ 220 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
218 } 221 }
219 222#else
223void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
224#endif
220 225
221/* Add polynomials a and b and store result in r; r could be a or b, a and b 226/* Add polynomials a and b and store result in r; r could be a or b, a and b
222 * could be equal; r is the bitwise XOR of a and b. 227 * could be equal; r is the bitwise XOR of a and b.
@@ -360,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
360int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) 365int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
361 { 366 {
362 int ret = 0; 367 int ret = 0;
363 const int max = BN_num_bits(p) + 1; 368 int arr[6];
364 int *arr=NULL;
365 bn_check_top(a); 369 bn_check_top(a);
366 bn_check_top(p); 370 bn_check_top(p);
367 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; 371 ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
368 ret = BN_GF2m_poly2arr(p, arr, max); 372 if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
369 if (!ret || ret > max)
370 { 373 {
371 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH); 374 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
372 goto err; 375 return 0;
373 } 376 }
374 ret = BN_GF2m_mod_arr(r, a, arr); 377 ret = BN_GF2m_mod_arr(r, a, arr);
375 bn_check_top(r); 378 bn_check_top(r);
376err:
377 if (arr) OPENSSL_free(arr);
378 return ret; 379 return ret;
379 } 380 }
380 381
@@ -521,7 +522,7 @@ err:
521 */ 522 */
522int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 523int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
523 { 524 {
524 BIGNUM *b, *c, *u, *v, *tmp; 525 BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
525 int ret = 0; 526 int ret = 0;
526 527
527 bn_check_top(a); 528 bn_check_top(a);
@@ -529,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
529 530
530 BN_CTX_start(ctx); 531 BN_CTX_start(ctx);
531 532
532 b = BN_CTX_get(ctx); 533 if ((b = BN_CTX_get(ctx))==NULL) goto err;
533 c = BN_CTX_get(ctx); 534 if ((c = BN_CTX_get(ctx))==NULL) goto err;
534 u = BN_CTX_get(ctx); 535 if ((u = BN_CTX_get(ctx))==NULL) goto err;
535 v = BN_CTX_get(ctx); 536 if ((v = BN_CTX_get(ctx))==NULL) goto err;
536 if (v == NULL) goto err;
537 537
538 if (!BN_one(b)) goto err;
539 if (!BN_GF2m_mod(u, a, p)) goto err; 538 if (!BN_GF2m_mod(u, a, p)) goto err;
540 if (!BN_copy(v, p)) goto err;
541
542 if (BN_is_zero(u)) goto err; 539 if (BN_is_zero(u)) goto err;
543 540
541 if (!BN_copy(v, p)) goto err;
542#if 0
543 if (!BN_one(b)) goto err;
544
544 while (1) 545 while (1)
545 { 546 {
546 while (!BN_is_odd(u)) 547 while (!BN_is_odd(u))
@@ -565,13 +566,89 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
565 if (!BN_GF2m_add(u, u, v)) goto err; 566 if (!BN_GF2m_add(u, u, v)) goto err;
566 if (!BN_GF2m_add(b, b, c)) goto err; 567 if (!BN_GF2m_add(b, b, c)) goto err;
567 } 568 }
569#else
570 {
571 int i, ubits = BN_num_bits(u),
572 vbits = BN_num_bits(v), /* v is copy of p */
573 top = p->top;
574 BN_ULONG *udp,*bdp,*vdp,*cdp;
575
576 bn_wexpand(u,top); udp = u->d;
577 for (i=u->top;i<top;i++) udp[i] = 0;
578 u->top = top;
579 bn_wexpand(b,top); bdp = b->d;
580 bdp[0] = 1;
581 for (i=1;i<top;i++) bdp[i] = 0;
582 b->top = top;
583 bn_wexpand(c,top); cdp = c->d;
584 for (i=0;i<top;i++) cdp[i] = 0;
585 c->top = top;
586 vdp = v->d; /* It pays off to "cache" *->d pointers, because
587 * it allows optimizer to be more aggressive.
588 * But we don't have to "cache" p->d, because *p
589 * is declared 'const'... */
590 while (1)
591 {
592 while (ubits && !(udp[0]&1))
593 {
594 BN_ULONG u0,u1,b0,b1,mask;
595
596 u0 = udp[0];
597 b0 = bdp[0];
598 mask = (BN_ULONG)0-(b0&1);
599 b0 ^= p->d[0]&mask;
600 for (i=0;i<top-1;i++)
601 {
602 u1 = udp[i+1];
603 udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
604 u0 = u1;
605 b1 = bdp[i+1]^(p->d[i+1]&mask);
606 bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
607 b0 = b1;
608 }
609 udp[i] = u0>>1;
610 bdp[i] = b0>>1;
611 ubits--;
612 }
568 613
614 if (ubits<=BN_BITS2 && udp[0]==1) break;
615
616 if (ubits<vbits)
617 {
618 i = ubits; ubits = vbits; vbits = i;
619 tmp = u; u = v; v = tmp;
620 tmp = b; b = c; c = tmp;
621 udp = vdp; vdp = v->d;
622 bdp = cdp; cdp = c->d;
623 }
624 for(i=0;i<top;i++)
625 {
626 udp[i] ^= vdp[i];
627 bdp[i] ^= cdp[i];
628 }
629 if (ubits==vbits)
630 {
631 BN_ULONG ul;
632 int utop = (ubits-1)/BN_BITS2;
633
634 while ((ul=udp[utop])==0 && utop) utop--;
635 ubits = utop*BN_BITS2 + BN_num_bits_word(ul);
636 }
637 }
638 bn_correct_top(b);
639 }
640#endif
569 641
570 if (!BN_copy(r, b)) goto err; 642 if (!BN_copy(r, b)) goto err;
571 bn_check_top(r); 643 bn_check_top(r);
572 ret = 1; 644 ret = 1;
573 645
574err: 646err:
647#ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */
648 bn_correct_top(c);
649 bn_correct_top(u);
650 bn_correct_top(v);
651#endif
575 BN_CTX_end(ctx); 652 BN_CTX_end(ctx);
576 return ret; 653 return ret;
577 } 654 }
@@ -1033,3 +1110,4 @@ int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
1033 return 1; 1110 return 1;
1034 } 1111 }
1035 1112
1113#endif
diff --git a/src/lib/libssl/src/crypto/bn/bn_nist.c b/src/lib/libssl/src/crypto/bn/bn_nist.c
index c6de032696..43caee4770 100644
--- a/src/lib/libssl/src/crypto/bn/bn_nist.c
+++ b/src/lib/libssl/src/crypto/bn/bn_nist.c
@@ -319,6 +319,13 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l))) 319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0)); 320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n) 321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
322# if defined(L_ENDIAN)
323# if defined(__arch64__)
324# define NIST_INT64 long
325# else
326# define NIST_INT64 long long
327# endif
328# endif
322#else 329#else
323#define bn_cp_64(to, n, from, m) \ 330#define bn_cp_64(to, n, from, m) \
324 { \ 331 { \
@@ -330,13 +337,15 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
330 bn_32_set_0(to, (n)*2); \ 337 bn_32_set_0(to, (n)*2); \
331 bn_32_set_0(to, (n)*2+1); \ 338 bn_32_set_0(to, (n)*2+1); \
332 } 339 }
333#if BN_BITS2 == 32
334#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0; 340#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
335#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0; 341#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
336#endif 342# if defined(_WIN32) && !defined(__GNUC__)
343# define NIST_INT64 __int64
344# elif defined(BN_LLONG)
345# define NIST_INT64 long long
346# endif
337#endif /* BN_BITS2 != 64 */ 347#endif /* BN_BITS2 != 64 */
338 348
339
340#define nist_set_192(to, from, a1, a2, a3) \ 349#define nist_set_192(to, from, a1, a2, a3) \
341 { \ 350 { \
342 bn_cp_64(to, 0, from, (a3) - 3) \ 351 bn_cp_64(to, 0, from, (a3) - 3) \
@@ -350,9 +359,11 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
350 int top = a->top, i; 359 int top = a->top, i;
351 int carry; 360 int carry;
352 register BN_ULONG *r_d, *a_d = a->d; 361 register BN_ULONG *r_d, *a_d = a->d;
353 BN_ULONG t_d[BN_NIST_192_TOP], 362 union {
354 buf[BN_NIST_192_TOP], 363 BN_ULONG bn[BN_NIST_192_TOP];
355 c_d[BN_NIST_192_TOP], 364 unsigned int ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
365 } buf;
366 BN_ULONG c_d[BN_NIST_192_TOP],
356 *res; 367 *res;
357 PTR_SIZE_INT mask; 368 PTR_SIZE_INT mask;
358 static const BIGNUM _bignum_nist_p_192_sqr = { 369 static const BIGNUM _bignum_nist_p_192_sqr = {
@@ -385,15 +396,48 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
385 else 396 else
386 r_d = a_d; 397 r_d = a_d;
387 398
388 nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP); 399 nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
400
401#if defined(NIST_INT64)
402 {
403 NIST_INT64 acc; /* accumulator */
404 unsigned int *rp=(unsigned int *)r_d;
405 const unsigned int *bp=(const unsigned int *)buf.ui;
406
407 acc = rp[0]; acc += bp[3*2-6];
408 acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32;
409
410 acc += rp[1]; acc += bp[3*2-5];
411 acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32;
389 412
390 nist_set_192(t_d, buf, 0, 3, 3); 413 acc += rp[2]; acc += bp[3*2-6];
414 acc += bp[4*2-6];
415 acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32;
416
417 acc += rp[3]; acc += bp[3*2-5];
418 acc += bp[4*2-5];
419 acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32;
420
421 acc += rp[4]; acc += bp[4*2-6];
422 acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32;
423
424 acc += rp[5]; acc += bp[4*2-5];
425 acc += bp[5*2-5]; rp[5] = (unsigned int)acc;
426
427 carry = (int)(acc>>32);
428 }
429#else
430 {
431 BN_ULONG t_d[BN_NIST_192_TOP];
432
433 nist_set_192(t_d, buf.bn, 0, 3, 3);
391 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 434 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
392 nist_set_192(t_d, buf, 4, 4, 0); 435 nist_set_192(t_d, buf.bn, 4, 4, 0);
393 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 436 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
394 nist_set_192(t_d, buf, 5, 5, 5) 437 nist_set_192(t_d, buf.bn, 5, 5, 5)
395 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 438 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
396 439 }
440#endif
397 if (carry > 0) 441 if (carry > 0)
398 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP); 442 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
399 else 443 else
@@ -435,8 +479,7 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
435 int top = a->top, i; 479 int top = a->top, i;
436 int carry; 480 int carry;
437 BN_ULONG *r_d, *a_d = a->d; 481 BN_ULONG *r_d, *a_d = a->d;
438 BN_ULONG t_d[BN_NIST_224_TOP], 482 BN_ULONG buf[BN_NIST_224_TOP],
439 buf[BN_NIST_224_TOP],
440 c_d[BN_NIST_224_TOP], 483 c_d[BN_NIST_224_TOP],
441 *res; 484 *res;
442 PTR_SIZE_INT mask; 485 PTR_SIZE_INT mask;
@@ -474,14 +517,54 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
474 517
475#if BN_BITS2==64 518#if BN_BITS2==64
476 /* copy upper 256 bits of 448 bit number ... */ 519 /* copy upper 256 bits of 448 bit number ... */
477 nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP); 520 nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
478 /* ... and right shift by 32 to obtain upper 224 bits */ 521 /* ... and right shift by 32 to obtain upper 224 bits */
479 nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8); 522 nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8);
480 /* truncate lower part to 224 bits too */ 523 /* truncate lower part to 224 bits too */
481 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l; 524 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
482#else 525#else
483 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); 526 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
484#endif 527#endif
528
529#if defined(NIST_INT64) && BN_BITS2!=64
530 {
531 NIST_INT64 acc; /* accumulator */
532 unsigned int *rp=(unsigned int *)r_d;
533 const unsigned int *bp=(const unsigned int *)buf;
534
535 acc = rp[0]; acc -= bp[7-7];
536 acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
537
538 acc += rp[1]; acc -= bp[8-7];
539 acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32;
540
541 acc += rp[2]; acc -= bp[9-7];
542 acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32;
543
544 acc += rp[3]; acc += bp[7-7];
545 acc += bp[11-7];
546 acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32;
547
548 acc += rp[4]; acc += bp[8-7];
549 acc += bp[12-7];
550 acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32;
551
552 acc += rp[5]; acc += bp[9-7];
553 acc += bp[13-7];
554 acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32;
555
556 acc += rp[6]; acc += bp[10-7];
557 acc -= bp[13-7]; rp[6] = (unsigned int)acc;
558
559 carry = (int)(acc>>32);
560# if BN_BITS2==64
561 rp[7] = carry;
562# endif
563 }
564#else
565 {
566 BN_ULONG t_d[BN_NIST_224_TOP];
567
485 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0); 568 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
486 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); 569 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
487 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0); 570 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
@@ -494,6 +577,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
494#if BN_BITS2==64 577#if BN_BITS2==64
495 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32); 578 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
496#endif 579#endif
580 }
581#endif
497 u.f = bn_sub_words; 582 u.f = bn_sub_words;
498 if (carry > 0) 583 if (carry > 0)
499 { 584 {
@@ -548,9 +633,11 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
548 int i, top = a->top; 633 int i, top = a->top;
549 int carry = 0; 634 int carry = 0;
550 register BN_ULONG *a_d = a->d, *r_d; 635 register BN_ULONG *a_d = a->d, *r_d;
551 BN_ULONG t_d[BN_NIST_256_TOP], 636 union {
552 buf[BN_NIST_256_TOP], 637 BN_ULONG bn[BN_NIST_256_TOP];
553 c_d[BN_NIST_256_TOP], 638 unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
639 } buf;
640 BN_ULONG c_d[BN_NIST_256_TOP],
554 *res; 641 *res;
555 PTR_SIZE_INT mask; 642 PTR_SIZE_INT mask;
556 union { bn_addsub_f f; PTR_SIZE_INT p; } u; 643 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
@@ -584,12 +671,87 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
584 else 671 else
585 r_d = a_d; 672 r_d = a_d;
586 673
587 nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP); 674 nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
675
676#if defined(NIST_INT64)
677 {
678 NIST_INT64 acc; /* accumulator */
679 unsigned int *rp=(unsigned int *)r_d;
680 const unsigned int *bp=(const unsigned int *)buf.ui;
681
682 acc = rp[0]; acc += bp[8-8];
683 acc += bp[9-8];
684 acc -= bp[11-8];
685 acc -= bp[12-8];
686 acc -= bp[13-8];
687 acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32;
688
689 acc += rp[1]; acc += bp[9-8];
690 acc += bp[10-8];
691 acc -= bp[12-8];
692 acc -= bp[13-8];
693 acc -= bp[14-8];
694 acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32;
695
696 acc += rp[2]; acc += bp[10-8];
697 acc += bp[11-8];
698 acc -= bp[13-8];
699 acc -= bp[14-8];
700 acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32;
701
702 acc += rp[3]; acc += bp[11-8];
703 acc += bp[11-8];
704 acc += bp[12-8];
705 acc += bp[12-8];
706 acc += bp[13-8];
707 acc -= bp[15-8];
708 acc -= bp[8-8];
709 acc -= bp[9-8]; rp[3] = (unsigned int)acc; acc >>= 32;
710
711 acc += rp[4]; acc += bp[12-8];
712 acc += bp[12-8];
713 acc += bp[13-8];
714 acc += bp[13-8];
715 acc += bp[14-8];
716 acc -= bp[9-8];
717 acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32;
718
719 acc += rp[5]; acc += bp[13-8];
720 acc += bp[13-8];
721 acc += bp[14-8];
722 acc += bp[14-8];
723 acc += bp[15-8];
724 acc -= bp[10-8];
725 acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32;
726
727 acc += rp[6]; acc += bp[14-8];
728 acc += bp[14-8];
729 acc += bp[15-8];
730 acc += bp[15-8];
731 acc += bp[14-8];
732 acc += bp[13-8];
733 acc -= bp[8-8];
734 acc -= bp[9-8]; rp[6] = (unsigned int)acc; acc >>= 32;
735
736 acc += rp[7]; acc += bp[15-8];
737 acc += bp[15-8];
738 acc += bp[15-8];
739 acc += bp[8 -8];
740 acc -= bp[10-8];
741 acc -= bp[11-8];
742 acc -= bp[12-8];
743 acc -= bp[13-8]; rp[7] = (unsigned int)acc;
744
745 carry = (int)(acc>>32);
746 }
747#else
748 {
749 BN_ULONG t_d[BN_NIST_256_TOP];
588 750
589 /*S1*/ 751 /*S1*/
590 nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0); 752 nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
591 /*S2*/ 753 /*S2*/
592 nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0); 754 nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
593 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP); 755 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
594 /* left shift */ 756 /* left shift */
595 { 757 {
@@ -607,24 +769,26 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
607 } 769 }
608 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 770 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
609 /*S3*/ 771 /*S3*/
610 nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8); 772 nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
611 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 773 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
612 /*S4*/ 774 /*S4*/
613 nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9); 775 nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
614 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 776 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
615 /*D1*/ 777 /*D1*/
616 nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11); 778 nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
617 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 779 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
618 /*D2*/ 780 /*D2*/
619 nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12); 781 nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
620 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 782 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
621 /*D3*/ 783 /*D3*/
622 nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13); 784 nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
623 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 785 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
624 /*D4*/ 786 /*D4*/
625 nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14); 787 nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
626 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 788 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
627 789
790 }
791#endif
628 /* see BN_nist_mod_224 for explanation */ 792 /* see BN_nist_mod_224 for explanation */
629 u.f = bn_sub_words; 793 u.f = bn_sub_words;
630 if (carry > 0) 794 if (carry > 0)
@@ -672,9 +836,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
672 int i, top = a->top; 836 int i, top = a->top;
673 int carry = 0; 837 int carry = 0;
674 register BN_ULONG *r_d, *a_d = a->d; 838 register BN_ULONG *r_d, *a_d = a->d;
675 BN_ULONG t_d[BN_NIST_384_TOP], 839 union {
676 buf[BN_NIST_384_TOP], 840 BN_ULONG bn[BN_NIST_384_TOP];
677 c_d[BN_NIST_384_TOP], 841 unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
842 } buf;
843 BN_ULONG c_d[BN_NIST_384_TOP],
678 *res; 844 *res;
679 PTR_SIZE_INT mask; 845 PTR_SIZE_INT mask;
680 union { bn_addsub_f f; PTR_SIZE_INT p; } u; 846 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
@@ -709,10 +875,100 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
709 else 875 else
710 r_d = a_d; 876 r_d = a_d;
711 877
712 nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP); 878 nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
879
880#if defined(NIST_INT64)
881 {
882 NIST_INT64 acc; /* accumulator */
883 unsigned int *rp=(unsigned int *)r_d;
884 const unsigned int *bp=(const unsigned int *)buf.ui;
885
886 acc = rp[0]; acc += bp[12-12];
887 acc += bp[21-12];
888 acc += bp[20-12];
889 acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32;
890
891 acc += rp[1]; acc += bp[13-12];
892 acc += bp[22-12];
893 acc += bp[23-12];
894 acc -= bp[12-12];
895 acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32;
896
897 acc += rp[2]; acc += bp[14-12];
898 acc += bp[23-12];
899 acc -= bp[13-12];
900 acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32;
901
902 acc += rp[3]; acc += bp[15-12];
903 acc += bp[12-12];
904 acc += bp[20-12];
905 acc += bp[21-12];
906 acc -= bp[14-12];
907 acc -= bp[22-12];
908 acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32;
909
910 acc += rp[4]; acc += bp[21-12];
911 acc += bp[21-12];
912 acc += bp[16-12];
913 acc += bp[13-12];
914 acc += bp[12-12];
915 acc += bp[20-12];
916 acc += bp[22-12];
917 acc -= bp[15-12];
918 acc -= bp[23-12];
919 acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32;
920
921 acc += rp[5]; acc += bp[22-12];
922 acc += bp[22-12];
923 acc += bp[17-12];
924 acc += bp[14-12];
925 acc += bp[13-12];
926 acc += bp[21-12];
927 acc += bp[23-12];
928 acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32;
929
930 acc += rp[6]; acc += bp[23-12];
931 acc += bp[23-12];
932 acc += bp[18-12];
933 acc += bp[15-12];
934 acc += bp[14-12];
935 acc += bp[22-12];
936 acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32;
937
938 acc += rp[7]; acc += bp[19-12];
939 acc += bp[16-12];
940 acc += bp[15-12];
941 acc += bp[23-12];
942 acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32;
943
944 acc += rp[8]; acc += bp[20-12];
945 acc += bp[17-12];
946 acc += bp[16-12];
947 acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32;
948
949 acc += rp[9]; acc += bp[21-12];
950 acc += bp[18-12];
951 acc += bp[17-12];
952 acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32;
953
954 acc += rp[10]; acc += bp[22-12];
955 acc += bp[19-12];
956 acc += bp[18-12];
957 acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32;
958
959 acc += rp[11]; acc += bp[23-12];
960 acc += bp[20-12];
961 acc += bp[19-12];
962 acc -= bp[22-12]; rp[11] = (unsigned int)acc;
963
964 carry = (int)(acc>>32);
965 }
966#else
967 {
968 BN_ULONG t_d[BN_NIST_384_TOP];
713 969
714 /*S1*/ 970 /*S1*/
715 nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4); 971 nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
716 /* left shift */ 972 /* left shift */
717 { 973 {
718 register BN_ULONG *ap,t,c; 974 register BN_ULONG *ap,t,c;
@@ -729,29 +985,31 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
729 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2), 985 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
730 t_d, BN_NIST_256_TOP); 986 t_d, BN_NIST_256_TOP);
731 /*S2 */ 987 /*S2 */
732 carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP); 988 carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
733 /*S3*/ 989 /*S3*/
734 nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21); 990 nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21);
735 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 991 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
736 /*S4*/ 992 /*S4*/
737 nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0); 993 nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0);
738 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 994 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
739 /*S5*/ 995 /*S5*/
740 nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0); 996 nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0);
741 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 997 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
742 /*S6*/ 998 /*S6*/
743 nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20); 999 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20);
744 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1000 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
745 /*D1*/ 1001 /*D1*/
746 nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23); 1002 nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23);
747 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1003 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
748 /*D2*/ 1004 /*D2*/
749 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0); 1005 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0);
750 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1006 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
751 /*D3*/ 1007 /*D3*/
752 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0); 1008 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0);
753 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1009 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
754 1010
1011 }
1012#endif
755 /* see BN_nist_mod_224 for explanation */ 1013 /* see BN_nist_mod_224 for explanation */
756 u.f = bn_sub_words; 1014 u.f = bn_sub_words;
757 if (carry > 0) 1015 if (carry > 0)
diff --git a/src/lib/libssl/src/crypto/buffer/buf_str.c b/src/lib/libssl/src/crypto/buffer/buf_str.c
index 28dd1e401e..151f5ea971 100644
--- a/src/lib/libssl/src/crypto/buffer/buf_str.c
+++ b/src/lib/libssl/src/crypto/buffer/buf_str.c
@@ -1,56 +1,59 @@
1/* crypto/buffer/buf_str.c */ 1/* crypto/buffer/buffer.c */
2/* ==================================================================== 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * Copyright (c) 2007 The OpenSSL Project. All rights reserved. 3 * All rights reserved.
4 * 4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
5 * Redistribution and use in source and binary forms, with or without 23 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 24 * modification, are permitted provided that the following conditions
7 * are met: 25 * are met:
8 * 26 * 1. Redistributions of source code must retain the copyright
9 * 1. Redistributions of source code must retain the above copyright 27 * notice, this list of conditions and the following disclaimer.
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright 28 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in 29 * notice, this list of conditions and the following disclaimer in the
14 * the documentation and/or other materials provided with the 30 * documentation and/or other materials provided with the distribution.
15 * distribution. 31 * 3. All advertising materials mentioning features or use of this software
16 * 32 * must display the following acknowledgement:
17 * 3. All advertising materials mentioning features or use of this 33 * "This product includes cryptographic software written by
18 * software must display the following acknowledgment: 34 * Eric Young (eay@cryptsoft.com)"
19 * "This product includes software developed by the OpenSSL Project 35 * The word 'cryptographic' can be left out if the rouines from the library
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" 36 * being used are not cryptographic related :-).
21 * 37 * 4. If you include any Windows specific code (or a derivative thereof) from
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 38 * the apps directory (application code) you must include an acknowledgement:
23 * endorse or promote products derived from this software without 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
24 * prior written permission. For written permission, please contact 40 *
25 * licensing@OpenSSL.org. 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
26 * 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * 5. Products derived from this software may not be called "OpenSSL" 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * nor may "OpenSSL" appear in their names without prior written 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29 * permission of the OpenSSL Project. 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * 6. Redistributions of any form whatsoever must retain the following 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * acknowledgment: 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * "This product includes software developed by the OpenSSL Project 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * 51 * SUCH DAMAGE.
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 52 *
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * The licence and distribution terms for any publically available version or
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 54 * derivative of this code cannot be changed. i.e. this code cannot simply be
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 55 * copied and put under another distribution licence
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 56 * [including the GNU Public Licence.]
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */ 57 */
55 58
56#include <stdio.h> 59#include <stdio.h>
diff --git a/src/lib/libssl/src/crypto/camellia/Makefile b/src/lib/libssl/src/crypto/camellia/Makefile
index ff5fe4a01d..6ce6fc99cd 100644
--- a/src/lib/libssl/src/crypto/camellia/Makefile
+++ b/src/lib/libssl/src/crypto/camellia/Makefile
@@ -23,9 +23,9 @@ APPS=
23 23
24LIB=$(TOP)/libcrypto.a 24LIB=$(TOP)/libcrypto.a
25LIBSRC=camellia.c cmll_misc.c cmll_ecb.c cmll_cbc.c cmll_ofb.c \ 25LIBSRC=camellia.c cmll_misc.c cmll_ecb.c cmll_cbc.c cmll_ofb.c \
26 cmll_cfb.c cmll_ctr.c 26 cmll_cfb.c cmll_ctr.c cmll_utl.c
27 27
28LIBOBJ= cmll_ecb.o cmll_ofb.o cmll_cfb.o cmll_ctr.o $(CMLL_ENC) 28LIBOBJ= cmll_ecb.o cmll_ofb.o cmll_cfb.o cmll_ctr.o cmll_utl.o $(CMLL_ENC)
29 29
30SRC= $(LIBSRC) 30SRC= $(LIBSRC)
31 31
@@ -96,8 +96,15 @@ cmll_ctr.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h
96cmll_ctr.o: ../../include/openssl/opensslconf.h cmll_ctr.c 96cmll_ctr.o: ../../include/openssl/opensslconf.h cmll_ctr.c
97cmll_ecb.o: ../../include/openssl/camellia.h 97cmll_ecb.o: ../../include/openssl/camellia.h
98cmll_ecb.o: ../../include/openssl/opensslconf.h cmll_ecb.c cmll_locl.h 98cmll_ecb.o: ../../include/openssl/opensslconf.h cmll_ecb.c cmll_locl.h
99cmll_misc.o: ../../include/openssl/camellia.h 99cmll_misc.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h
100cmll_misc.o: ../../include/openssl/opensslconf.h 100cmll_misc.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
101cmll_misc.o: ../../include/openssl/opensslv.h cmll_locl.h cmll_misc.c 101cmll_misc.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
102cmll_misc.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
103cmll_misc.o: ../../include/openssl/symhacks.h cmll_locl.h cmll_misc.c
102cmll_ofb.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h 104cmll_ofb.o: ../../include/openssl/camellia.h ../../include/openssl/modes.h
103cmll_ofb.o: ../../include/openssl/opensslconf.h cmll_ofb.c 105cmll_ofb.o: ../../include/openssl/opensslconf.h cmll_ofb.c
106cmll_utl.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h
107cmll_utl.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
108cmll_utl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
109cmll_utl.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
110cmll_utl.o: ../../include/openssl/symhacks.h cmll_locl.h cmll_utl.c
diff --git a/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl
index 027302ac86..c314d62312 100644
--- a/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl
+++ b/src/lib/libssl/src/crypto/camellia/asm/cmll-x86.pl
@@ -723,11 +723,11 @@ my $bias=int(@T[0])?shift(@T):0;
723&function_end("Camellia_Ekeygen"); 723&function_end("Camellia_Ekeygen");
724 724
725if ($OPENSSL) { 725if ($OPENSSL) {
726# int Camellia_set_key ( 726# int private_Camellia_set_key (
727# const unsigned char *userKey, 727# const unsigned char *userKey,
728# int bits, 728# int bits,
729# CAMELLIA_KEY *key) 729# CAMELLIA_KEY *key)
730&function_begin_B("Camellia_set_key"); 730&function_begin_B("private_Camellia_set_key");
731 &push ("ebx"); 731 &push ("ebx");
732 &mov ("ecx",&wparam(0)); # pull arguments 732 &mov ("ecx",&wparam(0)); # pull arguments
733 &mov ("ebx",&wparam(1)); 733 &mov ("ebx",&wparam(1));
@@ -760,7 +760,7 @@ if ($OPENSSL) {
760&set_label("done",4); 760&set_label("done",4);
761 &pop ("ebx"); 761 &pop ("ebx");
762 &ret (); 762 &ret ();
763&function_end_B("Camellia_set_key"); 763&function_end_B("private_Camellia_set_key");
764} 764}
765 765
766@SBOX=( 766@SBOX=(
diff --git a/src/lib/libssl/src/crypto/camellia/camellia.h b/src/lib/libssl/src/crypto/camellia/camellia.h
index cf0457dd97..67911e0adf 100644
--- a/src/lib/libssl/src/crypto/camellia/camellia.h
+++ b/src/lib/libssl/src/crypto/camellia/camellia.h
@@ -88,6 +88,10 @@ struct camellia_key_st
88 }; 88 };
89typedef struct camellia_key_st CAMELLIA_KEY; 89typedef struct camellia_key_st CAMELLIA_KEY;
90 90
91#ifdef OPENSSL_FIPS
92int private_Camellia_set_key(const unsigned char *userKey, const int bits,
93 CAMELLIA_KEY *key);
94#endif
91int Camellia_set_key(const unsigned char *userKey, const int bits, 95int Camellia_set_key(const unsigned char *userKey, const int bits,
92 CAMELLIA_KEY *key); 96 CAMELLIA_KEY *key);
93 97
diff --git a/src/lib/libssl/src/crypto/camellia/cmll_locl.h b/src/lib/libssl/src/crypto/camellia/cmll_locl.h
index 4a4d880d16..246b6ce1d8 100644
--- a/src/lib/libssl/src/crypto/camellia/cmll_locl.h
+++ b/src/lib/libssl/src/crypto/camellia/cmll_locl.h
@@ -71,7 +71,8 @@
71typedef unsigned int u32; 71typedef unsigned int u32;
72typedef unsigned char u8; 72typedef unsigned char u8;
73 73
74int Camellia_Ekeygen(int keyBitLength, const u8 *rawKey, KEY_TABLE_TYPE keyTable); 74int Camellia_Ekeygen(int keyBitLength, const u8 *rawKey,
75 KEY_TABLE_TYPE keyTable);
75void Camellia_EncryptBlock_Rounds(int grandRounds, const u8 plaintext[], 76void Camellia_EncryptBlock_Rounds(int grandRounds, const u8 plaintext[],
76 const KEY_TABLE_TYPE keyTable, u8 ciphertext[]); 77 const KEY_TABLE_TYPE keyTable, u8 ciphertext[]);
77void Camellia_DecryptBlock_Rounds(int grandRounds, const u8 ciphertext[], 78void Camellia_DecryptBlock_Rounds(int grandRounds, const u8 ciphertext[],
@@ -80,4 +81,6 @@ void Camellia_EncryptBlock(int keyBitLength, const u8 plaintext[],
80 const KEY_TABLE_TYPE keyTable, u8 ciphertext[]); 81 const KEY_TABLE_TYPE keyTable, u8 ciphertext[]);
81void Camellia_DecryptBlock(int keyBitLength, const u8 ciphertext[], 82void Camellia_DecryptBlock(int keyBitLength, const u8 ciphertext[],
82 const KEY_TABLE_TYPE keyTable, u8 plaintext[]); 83 const KEY_TABLE_TYPE keyTable, u8 plaintext[]);
84int private_Camellia_set_key(const unsigned char *userKey, const int bits,
85 CAMELLIA_KEY *key);
83#endif /* #ifndef HEADER_CAMELLIA_LOCL_H */ 86#endif /* #ifndef HEADER_CAMELLIA_LOCL_H */
diff --git a/src/lib/libssl/src/crypto/camellia/cmll_misc.c b/src/lib/libssl/src/crypto/camellia/cmll_misc.c
index f44689124b..f44d48564c 100644
--- a/src/lib/libssl/src/crypto/camellia/cmll_misc.c
+++ b/src/lib/libssl/src/crypto/camellia/cmll_misc.c
@@ -50,12 +50,13 @@
50 */ 50 */
51 51
52#include <openssl/opensslv.h> 52#include <openssl/opensslv.h>
53#include <openssl/crypto.h>
53#include <openssl/camellia.h> 54#include <openssl/camellia.h>
54#include "cmll_locl.h" 55#include "cmll_locl.h"
55 56
56const char CAMELLIA_version[]="CAMELLIA" OPENSSL_VERSION_PTEXT; 57const char CAMELLIA_version[]="CAMELLIA" OPENSSL_VERSION_PTEXT;
57 58
58int Camellia_set_key(const unsigned char *userKey, const int bits, 59int private_Camellia_set_key(const unsigned char *userKey, const int bits,
59 CAMELLIA_KEY *key) 60 CAMELLIA_KEY *key)
60 { 61 {
61 if(!userKey || !key) 62 if(!userKey || !key)
diff --git a/src/lib/libssl/src/crypto/camellia/cmll_utl.c b/src/lib/libssl/src/crypto/camellia/cmll_utl.c
new file mode 100644
index 0000000000..7a35711ec1
--- /dev/null
+++ b/src/lib/libssl/src/crypto/camellia/cmll_utl.c
@@ -0,0 +1,64 @@
1/* crypto/camellia/cmll_utl.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/crypto.h>
54#include <openssl/camellia.h>
55#include "cmll_locl.h"
56
57int Camellia_set_key(const unsigned char *userKey, const int bits,
58 CAMELLIA_KEY *key)
59 {
60#ifdef OPENSSL_FIPS
61 fips_cipher_abort(Camellia);
62#endif
63 return private_Camellia_set_key(userKey, bits, key);
64 }
diff --git a/src/lib/libssl/src/crypto/cmac/Makefile b/src/lib/libssl/src/crypto/cmac/Makefile
new file mode 100644
index 0000000000..54e7cc39d5
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cmac/Makefile
@@ -0,0 +1,111 @@
1#
2# OpenSSL/crypto/cmac/Makefile
3#
4
5DIR= cmac
6TOP= ../..
7CC= cc
8INCLUDES=
9CFLAG=-g
10MAKEFILE= Makefile
11AR= ar r
12
13CFLAGS= $(INCLUDES) $(CFLAG)
14
15GENERAL=Makefile
16TEST=
17APPS=
18
19LIB=$(TOP)/libcrypto.a
20LIBSRC=cmac.c cm_ameth.c cm_pmeth.c
21LIBOBJ=cmac.o cm_ameth.o cm_pmeth.o
22
23SRC= $(LIBSRC)
24
25EXHEADER= cmac.h
26HEADER= $(EXHEADER)
27
28ALL= $(GENERAL) $(SRC) $(HEADER)
29
30top:
31 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
32
33all: lib
34
35lib: $(LIBOBJ)
36 $(AR) $(LIB) $(LIBOBJ)
37 $(RANLIB) $(LIB) || echo Never mind.
38 @touch lib
39
40files:
41 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
42
43links:
44 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
45 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
46 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
47
48install:
49 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
50 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
51 do \
52 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
53 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
54 done;
55
56tags:
57 ctags $(SRC)
58
59tests:
60
61lint:
62 lint -DLINT $(INCLUDES) $(SRC)>fluff
63
64depend:
65 @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
66 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
67
68dclean:
69 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
70 mv -f Makefile.new $(MAKEFILE)
71
72clean:
73 rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
74
75# DO NOT DELETE THIS LINE -- make depend depends on it.
76
77cm_ameth.o: ../../e_os.h ../../include/openssl/asn1.h
78cm_ameth.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
79cm_ameth.o: ../../include/openssl/cmac.h ../../include/openssl/crypto.h
80cm_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
81cm_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
82cm_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
83cm_ameth.o: ../../include/openssl/opensslconf.h
84cm_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
85cm_ameth.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
86cm_ameth.o: ../../include/openssl/symhacks.h ../asn1/asn1_locl.h ../cryptlib.h
87cm_ameth.o: cm_ameth.c
88cm_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h
89cm_pmeth.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
90cm_pmeth.o: ../../include/openssl/cmac.h ../../include/openssl/conf.h
91cm_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
92cm_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
93cm_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
94cm_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
95cm_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
96cm_pmeth.o: ../../include/openssl/opensslconf.h
97cm_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
98cm_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
99cm_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
100cm_pmeth.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
101cm_pmeth.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
102cm_pmeth.o: ../cryptlib.h ../evp/evp_locl.h cm_pmeth.c
103cmac.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
104cmac.o: ../../include/openssl/buffer.h ../../include/openssl/cmac.h
105cmac.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
106cmac.o: ../../include/openssl/err.h ../../include/openssl/evp.h
107cmac.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
108cmac.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
109cmac.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
110cmac.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
111cmac.o: ../../include/openssl/symhacks.h ../cryptlib.h cmac.c
diff --git a/src/lib/libssl/src/crypto/cmac/cm_ameth.c b/src/lib/libssl/src/crypto/cmac/cm_ameth.c
new file mode 100644
index 0000000000..0b8e5670b0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cmac/cm_ameth.c
@@ -0,0 +1,97 @@
1/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
2 * project 2010.
3 */
4/* ====================================================================
5 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * licensing@OpenSSL.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 */
52
53#include <stdio.h>
54#include "cryptlib.h"
55#include <openssl/evp.h>
56#include <openssl/cmac.h>
57#include "asn1_locl.h"
58
59/* CMAC "ASN1" method. This is just here to indicate the
60 * maximum CMAC output length and to free up a CMAC
61 * key.
62 */
63
64static int cmac_size(const EVP_PKEY *pkey)
65 {
66 return EVP_MAX_BLOCK_LENGTH;
67 }
68
69static void cmac_key_free(EVP_PKEY *pkey)
70 {
71 CMAC_CTX *cmctx = (CMAC_CTX *)pkey->pkey.ptr;
72 if (cmctx)
73 CMAC_CTX_free(cmctx);
74 }
75
76const EVP_PKEY_ASN1_METHOD cmac_asn1_meth =
77 {
78 EVP_PKEY_CMAC,
79 EVP_PKEY_CMAC,
80 0,
81
82 "CMAC",
83 "OpenSSL CMAC method",
84
85 0,0,0,0,
86
87 0,0,0,
88
89 cmac_size,
90 0,
91 0,0,0,0,0,0,0,
92
93 cmac_key_free,
94 0,
95 0,0
96 };
97
diff --git a/src/lib/libssl/src/crypto/cmac/cm_pmeth.c b/src/lib/libssl/src/crypto/cmac/cm_pmeth.c
new file mode 100644
index 0000000000..072228ec7f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cmac/cm_pmeth.c
@@ -0,0 +1,224 @@
1/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
2 * project 2010.
3 */
4/* ====================================================================
5 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * licensing@OpenSSL.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 */
52
53#include <stdio.h>
54#include "cryptlib.h"
55#include <openssl/x509.h>
56#include <openssl/x509v3.h>
57#include <openssl/evp.h>
58#include <openssl/cmac.h>
59#include "evp_locl.h"
60
61/* The context structure and "key" is simply a CMAC_CTX */
62
63static int pkey_cmac_init(EVP_PKEY_CTX *ctx)
64 {
65 ctx->data = CMAC_CTX_new();
66 if (!ctx->data)
67 return 0;
68 ctx->keygen_info_count = 0;
69 return 1;
70 }
71
72static int pkey_cmac_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src)
73 {
74 if (!pkey_cmac_init(dst))
75 return 0;
76 if (!CMAC_CTX_copy(dst->data, src->data))
77 return 0;
78 return 1;
79 }
80
81static void pkey_cmac_cleanup(EVP_PKEY_CTX *ctx)
82 {
83 CMAC_CTX_free(ctx->data);
84 }
85
86static int pkey_cmac_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
87 {
88 CMAC_CTX *cmkey = CMAC_CTX_new();
89 CMAC_CTX *cmctx = ctx->data;
90 if (!cmkey)
91 return 0;
92 if (!CMAC_CTX_copy(cmkey, cmctx))
93 {
94 CMAC_CTX_free(cmkey);
95 return 0;
96 }
97 EVP_PKEY_assign(pkey, EVP_PKEY_CMAC, cmkey);
98
99 return 1;
100 }
101
102static int int_update(EVP_MD_CTX *ctx,const void *data,size_t count)
103 {
104 if (!CMAC_Update(ctx->pctx->data, data, count))
105 return 0;
106 return 1;
107 }
108
109static int cmac_signctx_init(EVP_PKEY_CTX *ctx, EVP_MD_CTX *mctx)
110 {
111 EVP_MD_CTX_set_flags(mctx, EVP_MD_CTX_FLAG_NO_INIT);
112 mctx->update = int_update;
113 return 1;
114 }
115
116static int cmac_signctx(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
117 EVP_MD_CTX *mctx)
118 {
119 return CMAC_Final(ctx->data, sig, siglen);
120 }
121
122static int pkey_cmac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
123 {
124 CMAC_CTX *cmctx = ctx->data;
125 switch (type)
126 {
127
128 case EVP_PKEY_CTRL_SET_MAC_KEY:
129 if (!p2 || p1 < 0)
130 return 0;
131 if (!CMAC_Init(cmctx, p2, p1, NULL, NULL))
132 return 0;
133 break;
134
135 case EVP_PKEY_CTRL_CIPHER:
136 if (!CMAC_Init(cmctx, NULL, 0, p2, ctx->engine))
137 return 0;
138 break;
139
140 case EVP_PKEY_CTRL_MD:
141 if (ctx->pkey && !CMAC_CTX_copy(ctx->data,
142 (CMAC_CTX *)ctx->pkey->pkey.ptr))
143 return 0;
144 if (!CMAC_Init(cmctx, NULL, 0, NULL, NULL))
145 return 0;
146 break;
147
148 default:
149 return -2;
150
151 }
152 return 1;
153 }
154
155static int pkey_cmac_ctrl_str(EVP_PKEY_CTX *ctx,
156 const char *type, const char *value)
157 {
158 if (!value)
159 {
160 return 0;
161 }
162 if (!strcmp(type, "key"))
163 {
164 void *p = (void *)value;
165 return pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY,
166 strlen(p), p);
167 }
168 if (!strcmp(type, "cipher"))
169 {
170 const EVP_CIPHER *c;
171 c = EVP_get_cipherbyname(value);
172 if (!c)
173 return 0;
174 return pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_CIPHER, -1, (void *)c);
175 }
176 if (!strcmp(type, "hexkey"))
177 {
178 unsigned char *key;
179 int r;
180 long keylen;
181 key = string_to_hex(value, &keylen);
182 if (!key)
183 return 0;
184 r = pkey_cmac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY, keylen, key);
185 OPENSSL_free(key);
186 return r;
187 }
188 return -2;
189 }
190
191const EVP_PKEY_METHOD cmac_pkey_meth =
192 {
193 EVP_PKEY_CMAC,
194 EVP_PKEY_FLAG_SIGCTX_CUSTOM,
195 pkey_cmac_init,
196 pkey_cmac_copy,
197 pkey_cmac_cleanup,
198
199 0, 0,
200
201 0,
202 pkey_cmac_keygen,
203
204 0, 0,
205
206 0, 0,
207
208 0,0,
209
210 cmac_signctx_init,
211 cmac_signctx,
212
213 0,0,
214
215 0,0,
216
217 0,0,
218
219 0,0,
220
221 pkey_cmac_ctrl,
222 pkey_cmac_ctrl_str
223
224 };
diff --git a/src/lib/libssl/src/crypto/cmac/cmac.c b/src/lib/libssl/src/crypto/cmac/cmac.c
new file mode 100644
index 0000000000..8b72b09681
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cmac/cmac.c
@@ -0,0 +1,308 @@
1/* crypto/cmac/cmac.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include "cryptlib.h"
58#include <openssl/cmac.h>
59
60#ifdef OPENSSL_FIPS
61#include <openssl/fips.h>
62#endif
63
64struct CMAC_CTX_st
65 {
66 /* Cipher context to use */
67 EVP_CIPHER_CTX cctx;
68 /* Keys k1 and k2 */
69 unsigned char k1[EVP_MAX_BLOCK_LENGTH];
70 unsigned char k2[EVP_MAX_BLOCK_LENGTH];
71 /* Temporary block */
72 unsigned char tbl[EVP_MAX_BLOCK_LENGTH];
73 /* Last (possibly partial) block */
74 unsigned char last_block[EVP_MAX_BLOCK_LENGTH];
75 /* Number of bytes in last block: -1 means context not initialised */
76 int nlast_block;
77 };
78
79
80/* Make temporary keys K1 and K2 */
81
82static void make_kn(unsigned char *k1, unsigned char *l, int bl)
83 {
84 int i;
85 /* Shift block to left, including carry */
86 for (i = 0; i < bl; i++)
87 {
88 k1[i] = l[i] << 1;
89 if (i < bl - 1 && l[i + 1] & 0x80)
90 k1[i] |= 1;
91 }
92 /* If MSB set fixup with R */
93 if (l[0] & 0x80)
94 k1[bl - 1] ^= bl == 16 ? 0x87 : 0x1b;
95 }
96
97CMAC_CTX *CMAC_CTX_new(void)
98 {
99 CMAC_CTX *ctx;
100 ctx = OPENSSL_malloc(sizeof(CMAC_CTX));
101 if (!ctx)
102 return NULL;
103 EVP_CIPHER_CTX_init(&ctx->cctx);
104 ctx->nlast_block = -1;
105 return ctx;
106 }
107
108void CMAC_CTX_cleanup(CMAC_CTX *ctx)
109 {
110#ifdef OPENSSL_FIPS
111 if (FIPS_mode() && !ctx->cctx.engine)
112 {
113 FIPS_cmac_ctx_cleanup(ctx);
114 return;
115 }
116#endif
117 EVP_CIPHER_CTX_cleanup(&ctx->cctx);
118 OPENSSL_cleanse(ctx->tbl, EVP_MAX_BLOCK_LENGTH);
119 OPENSSL_cleanse(ctx->k1, EVP_MAX_BLOCK_LENGTH);
120 OPENSSL_cleanse(ctx->k2, EVP_MAX_BLOCK_LENGTH);
121 OPENSSL_cleanse(ctx->last_block, EVP_MAX_BLOCK_LENGTH);
122 ctx->nlast_block = -1;
123 }
124
125EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx)
126 {
127 return &ctx->cctx;
128 }
129
130void CMAC_CTX_free(CMAC_CTX *ctx)
131 {
132 CMAC_CTX_cleanup(ctx);
133 OPENSSL_free(ctx);
134 }
135
136int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in)
137 {
138 int bl;
139 if (in->nlast_block == -1)
140 return 0;
141 if (!EVP_CIPHER_CTX_copy(&out->cctx, &in->cctx))
142 return 0;
143 bl = EVP_CIPHER_CTX_block_size(&in->cctx);
144 memcpy(out->k1, in->k1, bl);
145 memcpy(out->k2, in->k2, bl);
146 memcpy(out->tbl, in->tbl, bl);
147 memcpy(out->last_block, in->last_block, bl);
148 out->nlast_block = in->nlast_block;
149 return 1;
150 }
151
152int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen,
153 const EVP_CIPHER *cipher, ENGINE *impl)
154 {
155 static unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH];
156#ifdef OPENSSL_FIPS
157 if (FIPS_mode())
158 {
159 /* If we have an ENGINE need to allow non FIPS */
160 if ((impl || ctx->cctx.engine)
161 && !(ctx->cctx.flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW))
162
163 {
164 EVPerr(EVP_F_CMAC_INIT, EVP_R_DISABLED_FOR_FIPS);
165 return 0;
166 }
167 /* Other algorithm blocking will be done in FIPS_cmac_init,
168 * via FIPS_cipherinit().
169 */
170 if (!impl && !ctx->cctx.engine)
171 return FIPS_cmac_init(ctx, key, keylen, cipher, NULL);
172 }
173#endif
174 /* All zeros means restart */
175 if (!key && !cipher && !impl && keylen == 0)
176 {
177 /* Not initialised */
178 if (ctx->nlast_block == -1)
179 return 0;
180 if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, zero_iv))
181 return 0;
182 memset(ctx->tbl, 0, EVP_CIPHER_CTX_block_size(&ctx->cctx));
183 ctx->nlast_block = 0;
184 return 1;
185 }
186 /* Initialiase context */
187 if (cipher && !EVP_EncryptInit_ex(&ctx->cctx, cipher, impl, NULL, NULL))
188 return 0;
189 /* Non-NULL key means initialisation complete */
190 if (key)
191 {
192 int bl;
193 if (!EVP_CIPHER_CTX_cipher(&ctx->cctx))
194 return 0;
195 if (!EVP_CIPHER_CTX_set_key_length(&ctx->cctx, keylen))
196 return 0;
197 if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, key, zero_iv))
198 return 0;
199 bl = EVP_CIPHER_CTX_block_size(&ctx->cctx);
200 if (!EVP_Cipher(&ctx->cctx, ctx->tbl, zero_iv, bl))
201 return 0;
202 make_kn(ctx->k1, ctx->tbl, bl);
203 make_kn(ctx->k2, ctx->k1, bl);
204 OPENSSL_cleanse(ctx->tbl, bl);
205 /* Reset context again ready for first data block */
206 if (!EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, zero_iv))
207 return 0;
208 /* Zero tbl so resume works */
209 memset(ctx->tbl, 0, bl);
210 ctx->nlast_block = 0;
211 }
212 return 1;
213 }
214
215int CMAC_Update(CMAC_CTX *ctx, const void *in, size_t dlen)
216 {
217 const unsigned char *data = in;
218 size_t bl;
219#ifdef OPENSSL_FIPS
220 if (FIPS_mode() && !ctx->cctx.engine)
221 return FIPS_cmac_update(ctx, in, dlen);
222#endif
223 if (ctx->nlast_block == -1)
224 return 0;
225 if (dlen == 0)
226 return 1;
227 bl = EVP_CIPHER_CTX_block_size(&ctx->cctx);
228 /* Copy into partial block if we need to */
229 if (ctx->nlast_block > 0)
230 {
231 size_t nleft;
232 nleft = bl - ctx->nlast_block;
233 if (dlen < nleft)
234 nleft = dlen;
235 memcpy(ctx->last_block + ctx->nlast_block, data, nleft);
236 dlen -= nleft;
237 ctx->nlast_block += nleft;
238 /* If no more to process return */
239 if (dlen == 0)
240 return 1;
241 data += nleft;
242 /* Else not final block so encrypt it */
243 if (!EVP_Cipher(&ctx->cctx, ctx->tbl, ctx->last_block,bl))
244 return 0;
245 }
246 /* Encrypt all but one of the complete blocks left */
247 while(dlen > bl)
248 {
249 if (!EVP_Cipher(&ctx->cctx, ctx->tbl, data, bl))
250 return 0;
251 dlen -= bl;
252 data += bl;
253 }
254 /* Copy any data left to last block buffer */
255 memcpy(ctx->last_block, data, dlen);
256 ctx->nlast_block = dlen;
257 return 1;
258
259 }
260
261int CMAC_Final(CMAC_CTX *ctx, unsigned char *out, size_t *poutlen)
262 {
263 int i, bl, lb;
264#ifdef OPENSSL_FIPS
265 if (FIPS_mode() && !ctx->cctx.engine)
266 return FIPS_cmac_final(ctx, out, poutlen);
267#endif
268 if (ctx->nlast_block == -1)
269 return 0;
270 bl = EVP_CIPHER_CTX_block_size(&ctx->cctx);
271 *poutlen = (size_t)bl;
272 if (!out)
273 return 1;
274 lb = ctx->nlast_block;
275 /* Is last block complete? */
276 if (lb == bl)
277 {
278 for (i = 0; i < bl; i++)
279 out[i] = ctx->last_block[i] ^ ctx->k1[i];
280 }
281 else
282 {
283 ctx->last_block[lb] = 0x80;
284 if (bl - lb > 1)
285 memset(ctx->last_block + lb + 1, 0, bl - lb - 1);
286 for (i = 0; i < bl; i++)
287 out[i] = ctx->last_block[i] ^ ctx->k2[i];
288 }
289 if (!EVP_Cipher(&ctx->cctx, out, out, bl))
290 {
291 OPENSSL_cleanse(out, bl);
292 return 0;
293 }
294 return 1;
295 }
296
297int CMAC_resume(CMAC_CTX *ctx)
298 {
299 if (ctx->nlast_block == -1)
300 return 0;
301 /* The buffer "tbl" containes the last fully encrypted block
302 * which is the last IV (or all zeroes if no last encrypted block).
303 * The last block has not been modified since CMAC_final().
304 * So reinitliasing using the last decrypted block will allow
305 * CMAC to continue after calling CMAC_Final().
306 */
307 return EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, ctx->tbl);
308 }
diff --git a/src/lib/libssl/src/crypto/cmac/cmac.h b/src/lib/libssl/src/crypto/cmac/cmac.h
new file mode 100644
index 0000000000..712e92dced
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cmac/cmac.h
@@ -0,0 +1,82 @@
1/* crypto/cmac/cmac.h */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54
55#ifndef HEADER_CMAC_H
56#define HEADER_CMAC_H
57
58#ifdef __cplusplus
59extern "C" {
60#endif
61
62#include <openssl/evp.h>
63
64/* Opaque */
65typedef struct CMAC_CTX_st CMAC_CTX;
66
67CMAC_CTX *CMAC_CTX_new(void);
68void CMAC_CTX_cleanup(CMAC_CTX *ctx);
69void CMAC_CTX_free(CMAC_CTX *ctx);
70EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx);
71int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in);
72
73int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen,
74 const EVP_CIPHER *cipher, ENGINE *impl);
75int CMAC_Update(CMAC_CTX *ctx, const void *data, size_t dlen);
76int CMAC_Final(CMAC_CTX *ctx, unsigned char *out, size_t *poutlen);
77int CMAC_resume(CMAC_CTX *ctx);
78
79#ifdef __cplusplus
80}
81#endif
82#endif
diff --git a/src/lib/libssl/src/crypto/cms/Makefile b/src/lib/libssl/src/crypto/cms/Makefile
index 5837049725..9820adb212 100644
--- a/src/lib/libssl/src/crypto/cms/Makefile
+++ b/src/lib/libssl/src/crypto/cms/Makefile
@@ -18,9 +18,11 @@ APPS=
18 18
19LIB=$(TOP)/libcrypto.a 19LIB=$(TOP)/libcrypto.a
20LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \ 20LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
21 cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c 21 cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \
22 cms_pwri.c
22LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \ 23LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \
23 cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o 24 cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o \
25 cms_pwri.o
24 26
25SRC= $(LIBSRC) 27SRC= $(LIBSRC)
26 28
@@ -230,6 +232,24 @@ cms_lib.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
230cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 232cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
231cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h 233cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h
232cms_lib.o: cms_lcl.h cms_lib.c 234cms_lib.o: cms_lcl.h cms_lib.c
235cms_pwri.o: ../../e_os.h ../../include/openssl/aes.h
236cms_pwri.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
237cms_pwri.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
238cms_pwri.o: ../../include/openssl/cms.h ../../include/openssl/conf.h
239cms_pwri.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
240cms_pwri.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
241cms_pwri.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
242cms_pwri.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
243cms_pwri.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
244cms_pwri.o: ../../include/openssl/opensslconf.h
245cms_pwri.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
246cms_pwri.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h
247cms_pwri.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
248cms_pwri.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
249cms_pwri.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
250cms_pwri.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
251cms_pwri.o: ../../include/openssl/x509v3.h ../asn1/asn1_locl.h ../cryptlib.h
252cms_pwri.o: cms_lcl.h cms_pwri.c
233cms_sd.o: ../../e_os.h ../../include/openssl/asn1.h 253cms_sd.o: ../../e_os.h ../../include/openssl/asn1.h
234cms_sd.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h 254cms_sd.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
235cms_sd.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h 255cms_sd.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
diff --git a/src/lib/libssl/src/crypto/cms/cms.h b/src/lib/libssl/src/crypto/cms/cms.h
index 09c45d0412..36994fa6a2 100644
--- a/src/lib/libssl/src/crypto/cms/cms.h
+++ b/src/lib/libssl/src/crypto/cms/cms.h
@@ -111,6 +111,7 @@ DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
111#define CMS_PARTIAL 0x4000 111#define CMS_PARTIAL 0x4000
112#define CMS_REUSE_DIGEST 0x8000 112#define CMS_REUSE_DIGEST 0x8000
113#define CMS_USE_KEYID 0x10000 113#define CMS_USE_KEYID 0x10000
114#define CMS_DEBUG_DECRYPT 0x20000
114 115
115const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms); 116const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms);
116 117
@@ -184,6 +185,8 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert);
184int CMS_decrypt_set1_key(CMS_ContentInfo *cms, 185int CMS_decrypt_set1_key(CMS_ContentInfo *cms,
185 unsigned char *key, size_t keylen, 186 unsigned char *key, size_t keylen,
186 unsigned char *id, size_t idlen); 187 unsigned char *id, size_t idlen);
188int CMS_decrypt_set1_password(CMS_ContentInfo *cms,
189 unsigned char *pass, ossl_ssize_t passlen);
187 190
188STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); 191STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms);
189int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); 192int CMS_RecipientInfo_type(CMS_RecipientInfo *ri);
@@ -219,6 +222,16 @@ int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri,
219int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, 222int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri,
220 const unsigned char *id, size_t idlen); 223 const unsigned char *id, size_t idlen);
221 224
225int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri,
226 unsigned char *pass,
227 ossl_ssize_t passlen);
228
229CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms,
230 int iter, int wrap_nid, int pbe_nid,
231 unsigned char *pass,
232 ossl_ssize_t passlen,
233 const EVP_CIPHER *kekciph);
234
222int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); 235int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri);
223 236
224int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, 237int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out,
@@ -330,6 +343,7 @@ void ERR_load_CMS_strings(void);
330#define CMS_F_CHECK_CONTENT 99 343#define CMS_F_CHECK_CONTENT 99
331#define CMS_F_CMS_ADD0_CERT 164 344#define CMS_F_CMS_ADD0_CERT 164
332#define CMS_F_CMS_ADD0_RECIPIENT_KEY 100 345#define CMS_F_CMS_ADD0_RECIPIENT_KEY 100
346#define CMS_F_CMS_ADD0_RECIPIENT_PASSWORD 165
333#define CMS_F_CMS_ADD1_RECEIPTREQUEST 158 347#define CMS_F_CMS_ADD1_RECEIPTREQUEST 158
334#define CMS_F_CMS_ADD1_RECIPIENT_CERT 101 348#define CMS_F_CMS_ADD1_RECIPIENT_CERT 101
335#define CMS_F_CMS_ADD1_SIGNER 102 349#define CMS_F_CMS_ADD1_SIGNER 102
@@ -344,6 +358,7 @@ void ERR_load_CMS_strings(void);
344#define CMS_F_CMS_DATAINIT 111 358#define CMS_F_CMS_DATAINIT 111
345#define CMS_F_CMS_DECRYPT 112 359#define CMS_F_CMS_DECRYPT 112
346#define CMS_F_CMS_DECRYPT_SET1_KEY 113 360#define CMS_F_CMS_DECRYPT_SET1_KEY 113
361#define CMS_F_CMS_DECRYPT_SET1_PASSWORD 166
347#define CMS_F_CMS_DECRYPT_SET1_PKEY 114 362#define CMS_F_CMS_DECRYPT_SET1_PKEY 114
348#define CMS_F_CMS_DIGESTALGORITHM_FIND_CTX 115 363#define CMS_F_CMS_DIGESTALGORITHM_FIND_CTX 115
349#define CMS_F_CMS_DIGESTALGORITHM_INIT_BIO 116 364#define CMS_F_CMS_DIGESTALGORITHM_INIT_BIO 116
@@ -378,7 +393,9 @@ void ERR_load_CMS_strings(void);
378#define CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT 141 393#define CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT 141
379#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS 142 394#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS 142
380#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID 143 395#define CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID 143
396#define CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT 167
381#define CMS_F_CMS_RECIPIENTINFO_SET0_KEY 144 397#define CMS_F_CMS_RECIPIENTINFO_SET0_KEY 144
398#define CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD 168
382#define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY 145 399#define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY 145
383#define CMS_F_CMS_SET1_SIGNERIDENTIFIER 146 400#define CMS_F_CMS_SET1_SIGNERIDENTIFIER 146
384#define CMS_F_CMS_SET_DETACHED 147 401#define CMS_F_CMS_SET_DETACHED 147
@@ -419,6 +436,7 @@ void ERR_load_CMS_strings(void);
419#define CMS_R_ERROR_SETTING_KEY 115 436#define CMS_R_ERROR_SETTING_KEY 115
420#define CMS_R_ERROR_SETTING_RECIPIENTINFO 116 437#define CMS_R_ERROR_SETTING_RECIPIENTINFO 116
421#define CMS_R_INVALID_ENCRYPTED_KEY_LENGTH 117 438#define CMS_R_INVALID_ENCRYPTED_KEY_LENGTH 117
439#define CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER 176
422#define CMS_R_INVALID_KEY_LENGTH 118 440#define CMS_R_INVALID_KEY_LENGTH 118
423#define CMS_R_MD_BIO_INIT_ERROR 119 441#define CMS_R_MD_BIO_INIT_ERROR 119
424#define CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH 120 442#define CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH 120
@@ -431,6 +449,7 @@ void ERR_load_CMS_strings(void);
431#define CMS_R_NOT_ENCRYPTED_DATA 122 449#define CMS_R_NOT_ENCRYPTED_DATA 122
432#define CMS_R_NOT_KEK 123 450#define CMS_R_NOT_KEK 123
433#define CMS_R_NOT_KEY_TRANSPORT 124 451#define CMS_R_NOT_KEY_TRANSPORT 124
452#define CMS_R_NOT_PWRI 177
434#define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE 125 453#define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE 125
435#define CMS_R_NO_CIPHER 126 454#define CMS_R_NO_CIPHER 126
436#define CMS_R_NO_CONTENT 127 455#define CMS_R_NO_CONTENT 127
@@ -443,6 +462,7 @@ void ERR_load_CMS_strings(void);
443#define CMS_R_NO_MATCHING_RECIPIENT 132 462#define CMS_R_NO_MATCHING_RECIPIENT 132
444#define CMS_R_NO_MATCHING_SIGNATURE 166 463#define CMS_R_NO_MATCHING_SIGNATURE 166
445#define CMS_R_NO_MSGSIGDIGEST 167 464#define CMS_R_NO_MSGSIGDIGEST 167
465#define CMS_R_NO_PASSWORD 178
446#define CMS_R_NO_PRIVATE_KEY 133 466#define CMS_R_NO_PRIVATE_KEY 133
447#define CMS_R_NO_PUBLIC_KEY 134 467#define CMS_R_NO_PUBLIC_KEY 134
448#define CMS_R_NO_RECEIPT_REQUEST 168 468#define CMS_R_NO_RECEIPT_REQUEST 168
@@ -466,10 +486,12 @@ void ERR_load_CMS_strings(void);
466#define CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM 151 486#define CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM 151
467#define CMS_R_UNSUPPORTED_CONTENT_TYPE 152 487#define CMS_R_UNSUPPORTED_CONTENT_TYPE 152
468#define CMS_R_UNSUPPORTED_KEK_ALGORITHM 153 488#define CMS_R_UNSUPPORTED_KEK_ALGORITHM 153
489#define CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM 179
469#define CMS_R_UNSUPPORTED_RECIPIENT_TYPE 154 490#define CMS_R_UNSUPPORTED_RECIPIENT_TYPE 154
470#define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE 155 491#define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE 155
471#define CMS_R_UNSUPPORTED_TYPE 156 492#define CMS_R_UNSUPPORTED_TYPE 156
472#define CMS_R_UNWRAP_ERROR 157 493#define CMS_R_UNWRAP_ERROR 157
494#define CMS_R_UNWRAP_FAILURE 180
473#define CMS_R_VERIFICATION_FAILURE 158 495#define CMS_R_VERIFICATION_FAILURE 158
474#define CMS_R_WRAP_ERROR 159 496#define CMS_R_WRAP_ERROR 159
475 497
diff --git a/src/lib/libssl/src/crypto/cms/cms_asn1.c b/src/lib/libssl/src/crypto/cms/cms_asn1.c
index fcba4dcbcc..cfe67fb6c1 100644
--- a/src/lib/libssl/src/crypto/cms/cms_asn1.c
+++ b/src/lib/libssl/src/crypto/cms/cms_asn1.c
@@ -237,6 +237,15 @@ static int cms_ri_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
237 OPENSSL_free(kekri->key); 237 OPENSSL_free(kekri->key);
238 } 238 }
239 } 239 }
240 else if (ri->type == CMS_RECIPINFO_PASS)
241 {
242 CMS_PasswordRecipientInfo *pwri = ri->d.pwri;
243 if (pwri->pass)
244 {
245 OPENSSL_cleanse(pwri->pass, pwri->passlen);
246 OPENSSL_free(pwri->pass);
247 }
248 }
240 } 249 }
241 return 1; 250 return 1;
242 } 251 }
diff --git a/src/lib/libssl/src/crypto/cms/cms_enc.c b/src/lib/libssl/src/crypto/cms/cms_enc.c
index bab26235bd..f873ce3794 100644
--- a/src/lib/libssl/src/crypto/cms/cms_enc.c
+++ b/src/lib/libssl/src/crypto/cms/cms_enc.c
@@ -73,6 +73,8 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec)
73 const EVP_CIPHER *ciph; 73 const EVP_CIPHER *ciph;
74 X509_ALGOR *calg = ec->contentEncryptionAlgorithm; 74 X509_ALGOR *calg = ec->contentEncryptionAlgorithm;
75 unsigned char iv[EVP_MAX_IV_LENGTH], *piv = NULL; 75 unsigned char iv[EVP_MAX_IV_LENGTH], *piv = NULL;
76 unsigned char *tkey = NULL;
77 size_t tkeylen;
76 78
77 int ok = 0; 79 int ok = 0;
78 80
@@ -137,32 +139,57 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec)
137 CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR); 139 CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR);
138 goto err; 140 goto err;
139 } 141 }
140 142 tkeylen = EVP_CIPHER_CTX_key_length(ctx);
141 143 /* Generate random session key */
142 if (enc && !ec->key) 144 if (!enc || !ec->key)
143 { 145 {
144 /* Generate random key */ 146 tkey = OPENSSL_malloc(tkeylen);
145 if (!ec->keylen) 147 if (!tkey)
146 ec->keylen = EVP_CIPHER_CTX_key_length(ctx);
147 ec->key = OPENSSL_malloc(ec->keylen);
148 if (!ec->key)
149 { 148 {
150 CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, 149 CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO,
151 ERR_R_MALLOC_FAILURE); 150 ERR_R_MALLOC_FAILURE);
152 goto err; 151 goto err;
153 } 152 }
154 if (EVP_CIPHER_CTX_rand_key(ctx, ec->key) <= 0) 153 if (EVP_CIPHER_CTX_rand_key(ctx, tkey) <= 0)
155 goto err; 154 goto err;
156 keep_key = 1;
157 } 155 }
158 else if (ec->keylen != (unsigned int)EVP_CIPHER_CTX_key_length(ctx)) 156
157 if (!ec->key)
158 {
159 ec->key = tkey;
160 ec->keylen = tkeylen;
161 tkey = NULL;
162 if (enc)
163 keep_key = 1;
164 else
165 ERR_clear_error();
166
167 }
168
169 if (ec->keylen != tkeylen)
159 { 170 {
160 /* If necessary set key length */ 171 /* If necessary set key length */
161 if (EVP_CIPHER_CTX_set_key_length(ctx, ec->keylen) <= 0) 172 if (EVP_CIPHER_CTX_set_key_length(ctx, ec->keylen) <= 0)
162 { 173 {
163 CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO, 174 /* Only reveal failure if debugging so we don't
164 CMS_R_INVALID_KEY_LENGTH); 175 * leak information which may be useful in MMA.
165 goto err; 176 */
177 if (enc || ec->debug)
178 {
179 CMSerr(CMS_F_CMS_ENCRYPTEDCONTENT_INIT_BIO,
180 CMS_R_INVALID_KEY_LENGTH);
181 goto err;
182 }
183 else
184 {
185 /* Use random key */
186 OPENSSL_cleanse(ec->key, ec->keylen);
187 OPENSSL_free(ec->key);
188 ec->key = tkey;
189 ec->keylen = tkeylen;
190 tkey = NULL;
191 ERR_clear_error();
192 }
166 } 193 }
167 } 194 }
168 195
@@ -198,6 +225,11 @@ BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec)
198 OPENSSL_free(ec->key); 225 OPENSSL_free(ec->key);
199 ec->key = NULL; 226 ec->key = NULL;
200 } 227 }
228 if (tkey)
229 {
230 OPENSSL_cleanse(tkey, tkeylen);
231 OPENSSL_free(tkey);
232 }
201 if (ok) 233 if (ok)
202 return b; 234 return b;
203 BIO_free(b); 235 BIO_free(b);
diff --git a/src/lib/libssl/src/crypto/cms/cms_env.c b/src/lib/libssl/src/crypto/cms/cms_env.c
index b3237d4b94..be20b1c024 100644
--- a/src/lib/libssl/src/crypto/cms/cms_env.c
+++ b/src/lib/libssl/src/crypto/cms/cms_env.c
@@ -65,14 +65,13 @@
65/* CMS EnvelopedData Utilities */ 65/* CMS EnvelopedData Utilities */
66 66
67DECLARE_ASN1_ITEM(CMS_EnvelopedData) 67DECLARE_ASN1_ITEM(CMS_EnvelopedData)
68DECLARE_ASN1_ITEM(CMS_RecipientInfo)
69DECLARE_ASN1_ITEM(CMS_KeyTransRecipientInfo) 68DECLARE_ASN1_ITEM(CMS_KeyTransRecipientInfo)
70DECLARE_ASN1_ITEM(CMS_KEKRecipientInfo) 69DECLARE_ASN1_ITEM(CMS_KEKRecipientInfo)
71DECLARE_ASN1_ITEM(CMS_OtherKeyAttribute) 70DECLARE_ASN1_ITEM(CMS_OtherKeyAttribute)
72 71
73DECLARE_STACK_OF(CMS_RecipientInfo) 72DECLARE_STACK_OF(CMS_RecipientInfo)
74 73
75static CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms) 74CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms)
76 { 75 {
77 if (OBJ_obj2nid(cms->contentType) != NID_pkcs7_enveloped) 76 if (OBJ_obj2nid(cms->contentType) != NID_pkcs7_enveloped)
78 { 77 {
@@ -371,6 +370,8 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
371 unsigned char *ek = NULL; 370 unsigned char *ek = NULL;
372 size_t eklen; 371 size_t eklen;
373 int ret = 0; 372 int ret = 0;
373 CMS_EncryptedContentInfo *ec;
374 ec = cms->d.envelopedData->encryptedContentInfo;
374 375
375 if (ktri->pkey == NULL) 376 if (ktri->pkey == NULL)
376 { 377 {
@@ -417,8 +418,14 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
417 418
418 ret = 1; 419 ret = 1;
419 420
420 cms->d.envelopedData->encryptedContentInfo->key = ek; 421 if (ec->key)
421 cms->d.envelopedData->encryptedContentInfo->keylen = eklen; 422 {
423 OPENSSL_cleanse(ec->key, ec->keylen);
424 OPENSSL_free(ec->key);
425 }
426
427 ec->key = ek;
428 ec->keylen = eklen;
422 429
423 err: 430 err:
424 if (pctx) 431 if (pctx)
@@ -786,6 +793,9 @@ int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri)
786 case CMS_RECIPINFO_KEK: 793 case CMS_RECIPINFO_KEK:
787 return cms_RecipientInfo_kekri_decrypt(cms, ri); 794 return cms_RecipientInfo_kekri_decrypt(cms, ri);
788 795
796 case CMS_RECIPINFO_PASS:
797 return cms_RecipientInfo_pwri_crypt(cms, ri, 0);
798
789 default: 799 default:
790 CMSerr(CMS_F_CMS_RECIPIENTINFO_DECRYPT, 800 CMSerr(CMS_F_CMS_RECIPIENTINFO_DECRYPT,
791 CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE); 801 CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE);
@@ -829,6 +839,10 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms)
829 r = cms_RecipientInfo_kekri_encrypt(cms, ri); 839 r = cms_RecipientInfo_kekri_encrypt(cms, ri);
830 break; 840 break;
831 841
842 case CMS_RECIPINFO_PASS:
843 r = cms_RecipientInfo_pwri_crypt(cms, ri, 1);
844 break;
845
832 default: 846 default:
833 CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, 847 CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO,
834 CMS_R_UNSUPPORTED_RECIPIENT_TYPE); 848 CMS_R_UNSUPPORTED_RECIPIENT_TYPE);
diff --git a/src/lib/libssl/src/crypto/cms/cms_err.c b/src/lib/libssl/src/crypto/cms/cms_err.c
index ff7b0309e5..8330ead7ed 100644
--- a/src/lib/libssl/src/crypto/cms/cms_err.c
+++ b/src/lib/libssl/src/crypto/cms/cms_err.c
@@ -1,6 +1,6 @@
1/* crypto/cms/cms_err.c */ 1/* crypto/cms/cms_err.c */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
@@ -73,6 +73,7 @@ static ERR_STRING_DATA CMS_str_functs[]=
73{ERR_FUNC(CMS_F_CHECK_CONTENT), "CHECK_CONTENT"}, 73{ERR_FUNC(CMS_F_CHECK_CONTENT), "CHECK_CONTENT"},
74{ERR_FUNC(CMS_F_CMS_ADD0_CERT), "CMS_add0_cert"}, 74{ERR_FUNC(CMS_F_CMS_ADD0_CERT), "CMS_add0_cert"},
75{ERR_FUNC(CMS_F_CMS_ADD0_RECIPIENT_KEY), "CMS_add0_recipient_key"}, 75{ERR_FUNC(CMS_F_CMS_ADD0_RECIPIENT_KEY), "CMS_add0_recipient_key"},
76{ERR_FUNC(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD), "CMS_add0_recipient_password"},
76{ERR_FUNC(CMS_F_CMS_ADD1_RECEIPTREQUEST), "CMS_add1_ReceiptRequest"}, 77{ERR_FUNC(CMS_F_CMS_ADD1_RECEIPTREQUEST), "CMS_add1_ReceiptRequest"},
77{ERR_FUNC(CMS_F_CMS_ADD1_RECIPIENT_CERT), "CMS_add1_recipient_cert"}, 78{ERR_FUNC(CMS_F_CMS_ADD1_RECIPIENT_CERT), "CMS_add1_recipient_cert"},
78{ERR_FUNC(CMS_F_CMS_ADD1_SIGNER), "CMS_add1_signer"}, 79{ERR_FUNC(CMS_F_CMS_ADD1_SIGNER), "CMS_add1_signer"},
@@ -87,6 +88,7 @@ static ERR_STRING_DATA CMS_str_functs[]=
87{ERR_FUNC(CMS_F_CMS_DATAINIT), "CMS_dataInit"}, 88{ERR_FUNC(CMS_F_CMS_DATAINIT), "CMS_dataInit"},
88{ERR_FUNC(CMS_F_CMS_DECRYPT), "CMS_decrypt"}, 89{ERR_FUNC(CMS_F_CMS_DECRYPT), "CMS_decrypt"},
89{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_KEY), "CMS_decrypt_set1_key"}, 90{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_KEY), "CMS_decrypt_set1_key"},
91{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_PASSWORD), "CMS_decrypt_set1_password"},
90{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_PKEY), "CMS_decrypt_set1_pkey"}, 92{ERR_FUNC(CMS_F_CMS_DECRYPT_SET1_PKEY), "CMS_decrypt_set1_pkey"},
91{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_FIND_CTX), "cms_DigestAlgorithm_find_ctx"}, 93{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_FIND_CTX), "cms_DigestAlgorithm_find_ctx"},
92{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_INIT_BIO), "cms_DigestAlgorithm_init_bio"}, 94{ERR_FUNC(CMS_F_CMS_DIGESTALGORITHM_INIT_BIO), "cms_DigestAlgorithm_init_bio"},
@@ -105,7 +107,7 @@ static ERR_STRING_DATA CMS_str_functs[]=
105{ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES), "CMS_GET0_CERTIFICATE_CHOICES"}, 107{ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES), "CMS_GET0_CERTIFICATE_CHOICES"},
106{ERR_FUNC(CMS_F_CMS_GET0_CONTENT), "CMS_get0_content"}, 108{ERR_FUNC(CMS_F_CMS_GET0_CONTENT), "CMS_get0_content"},
107{ERR_FUNC(CMS_F_CMS_GET0_ECONTENT_TYPE), "CMS_GET0_ECONTENT_TYPE"}, 109{ERR_FUNC(CMS_F_CMS_GET0_ECONTENT_TYPE), "CMS_GET0_ECONTENT_TYPE"},
108{ERR_FUNC(CMS_F_CMS_GET0_ENVELOPED), "CMS_GET0_ENVELOPED"}, 110{ERR_FUNC(CMS_F_CMS_GET0_ENVELOPED), "cms_get0_enveloped"},
109{ERR_FUNC(CMS_F_CMS_GET0_REVOCATION_CHOICES), "CMS_GET0_REVOCATION_CHOICES"}, 111{ERR_FUNC(CMS_F_CMS_GET0_REVOCATION_CHOICES), "CMS_GET0_REVOCATION_CHOICES"},
110{ERR_FUNC(CMS_F_CMS_GET0_SIGNED), "CMS_GET0_SIGNED"}, 112{ERR_FUNC(CMS_F_CMS_GET0_SIGNED), "CMS_GET0_SIGNED"},
111{ERR_FUNC(CMS_F_CMS_MSGSIGDIGEST_ADD1), "cms_msgSigDigest_add1"}, 113{ERR_FUNC(CMS_F_CMS_MSGSIGDIGEST_ADD1), "cms_msgSigDigest_add1"},
@@ -121,7 +123,9 @@ static ERR_STRING_DATA CMS_str_functs[]=
121{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT), "CMS_RECIPIENTINFO_KTRI_ENCRYPT"}, 123{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_ENCRYPT), "CMS_RECIPIENTINFO_KTRI_ENCRYPT"},
122{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS), "CMS_RecipientInfo_ktri_get0_algs"}, 124{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_ALGS), "CMS_RecipientInfo_ktri_get0_algs"},
123{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID), "CMS_RecipientInfo_ktri_get0_signer_id"}, 125{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KTRI_GET0_SIGNER_ID), "CMS_RecipientInfo_ktri_get0_signer_id"},
126{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT), "cms_RecipientInfo_pwri_crypt"},
124{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_KEY), "CMS_RecipientInfo_set0_key"}, 127{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_KEY), "CMS_RecipientInfo_set0_key"},
128{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD), "CMS_RecipientInfo_set0_password"},
125{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY), "CMS_RecipientInfo_set0_pkey"}, 129{ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY), "CMS_RecipientInfo_set0_pkey"},
126{ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"}, 130{ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"},
127{ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"}, 131{ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"},
@@ -165,6 +169,7 @@ static ERR_STRING_DATA CMS_str_reasons[]=
165{ERR_REASON(CMS_R_ERROR_SETTING_KEY) ,"error setting key"}, 169{ERR_REASON(CMS_R_ERROR_SETTING_KEY) ,"error setting key"},
166{ERR_REASON(CMS_R_ERROR_SETTING_RECIPIENTINFO),"error setting recipientinfo"}, 170{ERR_REASON(CMS_R_ERROR_SETTING_RECIPIENTINFO),"error setting recipientinfo"},
167{ERR_REASON(CMS_R_INVALID_ENCRYPTED_KEY_LENGTH),"invalid encrypted key length"}, 171{ERR_REASON(CMS_R_INVALID_ENCRYPTED_KEY_LENGTH),"invalid encrypted key length"},
172{ERR_REASON(CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER),"invalid key encryption parameter"},
168{ERR_REASON(CMS_R_INVALID_KEY_LENGTH) ,"invalid key length"}, 173{ERR_REASON(CMS_R_INVALID_KEY_LENGTH) ,"invalid key length"},
169{ERR_REASON(CMS_R_MD_BIO_INIT_ERROR) ,"md bio init error"}, 174{ERR_REASON(CMS_R_MD_BIO_INIT_ERROR) ,"md bio init error"},
170{ERR_REASON(CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH),"messagedigest attribute wrong length"}, 175{ERR_REASON(CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH),"messagedigest attribute wrong length"},
@@ -177,6 +182,7 @@ static ERR_STRING_DATA CMS_str_reasons[]=
177{ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA) ,"not encrypted data"}, 182{ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA) ,"not encrypted data"},
178{ERR_REASON(CMS_R_NOT_KEK) ,"not kek"}, 183{ERR_REASON(CMS_R_NOT_KEK) ,"not kek"},
179{ERR_REASON(CMS_R_NOT_KEY_TRANSPORT) ,"not key transport"}, 184{ERR_REASON(CMS_R_NOT_KEY_TRANSPORT) ,"not key transport"},
185{ERR_REASON(CMS_R_NOT_PWRI) ,"not pwri"},
180{ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE),"not supported for this key type"}, 186{ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE),"not supported for this key type"},
181{ERR_REASON(CMS_R_NO_CIPHER) ,"no cipher"}, 187{ERR_REASON(CMS_R_NO_CIPHER) ,"no cipher"},
182{ERR_REASON(CMS_R_NO_CONTENT) ,"no content"}, 188{ERR_REASON(CMS_R_NO_CONTENT) ,"no content"},
@@ -189,6 +195,7 @@ static ERR_STRING_DATA CMS_str_reasons[]=
189{ERR_REASON(CMS_R_NO_MATCHING_RECIPIENT) ,"no matching recipient"}, 195{ERR_REASON(CMS_R_NO_MATCHING_RECIPIENT) ,"no matching recipient"},
190{ERR_REASON(CMS_R_NO_MATCHING_SIGNATURE) ,"no matching signature"}, 196{ERR_REASON(CMS_R_NO_MATCHING_SIGNATURE) ,"no matching signature"},
191{ERR_REASON(CMS_R_NO_MSGSIGDIGEST) ,"no msgsigdigest"}, 197{ERR_REASON(CMS_R_NO_MSGSIGDIGEST) ,"no msgsigdigest"},
198{ERR_REASON(CMS_R_NO_PASSWORD) ,"no password"},
192{ERR_REASON(CMS_R_NO_PRIVATE_KEY) ,"no private key"}, 199{ERR_REASON(CMS_R_NO_PRIVATE_KEY) ,"no private key"},
193{ERR_REASON(CMS_R_NO_PUBLIC_KEY) ,"no public key"}, 200{ERR_REASON(CMS_R_NO_PUBLIC_KEY) ,"no public key"},
194{ERR_REASON(CMS_R_NO_RECEIPT_REQUEST) ,"no receipt request"}, 201{ERR_REASON(CMS_R_NO_RECEIPT_REQUEST) ,"no receipt request"},
@@ -212,10 +219,12 @@ static ERR_STRING_DATA CMS_str_reasons[]=
212{ERR_REASON(CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM),"unsupported compression algorithm"}, 219{ERR_REASON(CMS_R_UNSUPPORTED_COMPRESSION_ALGORITHM),"unsupported compression algorithm"},
213{ERR_REASON(CMS_R_UNSUPPORTED_CONTENT_TYPE),"unsupported content type"}, 220{ERR_REASON(CMS_R_UNSUPPORTED_CONTENT_TYPE),"unsupported content type"},
214{ERR_REASON(CMS_R_UNSUPPORTED_KEK_ALGORITHM),"unsupported kek algorithm"}, 221{ERR_REASON(CMS_R_UNSUPPORTED_KEK_ALGORITHM),"unsupported kek algorithm"},
222{ERR_REASON(CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM),"unsupported key encryption algorithm"},
215{ERR_REASON(CMS_R_UNSUPPORTED_RECIPIENT_TYPE),"unsupported recipient type"}, 223{ERR_REASON(CMS_R_UNSUPPORTED_RECIPIENT_TYPE),"unsupported recipient type"},
216{ERR_REASON(CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE),"unsupported recpientinfo type"}, 224{ERR_REASON(CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE),"unsupported recpientinfo type"},
217{ERR_REASON(CMS_R_UNSUPPORTED_TYPE) ,"unsupported type"}, 225{ERR_REASON(CMS_R_UNSUPPORTED_TYPE) ,"unsupported type"},
218{ERR_REASON(CMS_R_UNWRAP_ERROR) ,"unwrap error"}, 226{ERR_REASON(CMS_R_UNWRAP_ERROR) ,"unwrap error"},
227{ERR_REASON(CMS_R_UNWRAP_FAILURE) ,"unwrap failure"},
219{ERR_REASON(CMS_R_VERIFICATION_FAILURE) ,"verification failure"}, 228{ERR_REASON(CMS_R_VERIFICATION_FAILURE) ,"verification failure"},
220{ERR_REASON(CMS_R_WRAP_ERROR) ,"wrap error"}, 229{ERR_REASON(CMS_R_WRAP_ERROR) ,"wrap error"},
221{0,NULL} 230{0,NULL}
diff --git a/src/lib/libssl/src/crypto/cms/cms_lcl.h b/src/lib/libssl/src/crypto/cms/cms_lcl.h
index c8ecfa724a..a9f9730157 100644
--- a/src/lib/libssl/src/crypto/cms/cms_lcl.h
+++ b/src/lib/libssl/src/crypto/cms/cms_lcl.h
@@ -175,6 +175,8 @@ struct CMS_EncryptedContentInfo_st
175 const EVP_CIPHER *cipher; 175 const EVP_CIPHER *cipher;
176 unsigned char *key; 176 unsigned char *key;
177 size_t keylen; 177 size_t keylen;
178 /* Set to 1 if we are debugging decrypt and don't fake keys for MMA */
179 int debug;
178 }; 180 };
179 181
180struct CMS_RecipientInfo_st 182struct CMS_RecipientInfo_st
@@ -273,6 +275,9 @@ struct CMS_PasswordRecipientInfo_st
273 X509_ALGOR *keyDerivationAlgorithm; 275 X509_ALGOR *keyDerivationAlgorithm;
274 X509_ALGOR *keyEncryptionAlgorithm; 276 X509_ALGOR *keyEncryptionAlgorithm;
275 ASN1_OCTET_STRING *encryptedKey; 277 ASN1_OCTET_STRING *encryptedKey;
278 /* Extra info: password to use */
279 unsigned char *pass;
280 size_t passlen;
276 }; 281 };
277 282
278struct CMS_OtherRecipientInfo_st 283struct CMS_OtherRecipientInfo_st
@@ -411,6 +416,8 @@ DECLARE_ASN1_ITEM(CMS_SignerInfo)
411DECLARE_ASN1_ITEM(CMS_IssuerAndSerialNumber) 416DECLARE_ASN1_ITEM(CMS_IssuerAndSerialNumber)
412DECLARE_ASN1_ITEM(CMS_Attributes_Sign) 417DECLARE_ASN1_ITEM(CMS_Attributes_Sign)
413DECLARE_ASN1_ITEM(CMS_Attributes_Verify) 418DECLARE_ASN1_ITEM(CMS_Attributes_Verify)
419DECLARE_ASN1_ITEM(CMS_RecipientInfo)
420DECLARE_ASN1_ITEM(CMS_PasswordRecipientInfo)
414DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber) 421DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber)
415 422
416#define CMS_SIGNERINFO_ISSUER_SERIAL 0 423#define CMS_SIGNERINFO_ISSUER_SERIAL 0
@@ -454,6 +461,11 @@ int cms_msgSigDigest_add1(CMS_SignerInfo *dest, CMS_SignerInfo *src);
454ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si); 461ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si);
455 462
456BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms); 463BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms);
464CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms);
465
466/* PWRI routines */
467int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
468 int en_de);
457 469
458#ifdef __cplusplus 470#ifdef __cplusplus
459} 471}
diff --git a/src/lib/libssl/src/crypto/cms/cms_lib.c b/src/lib/libssl/src/crypto/cms/cms_lib.c
index d00fe0f87b..f88e8f3b52 100644
--- a/src/lib/libssl/src/crypto/cms/cms_lib.c
+++ b/src/lib/libssl/src/crypto/cms/cms_lib.c
@@ -412,8 +412,7 @@ int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain,
412 */ 412 */
413 || EVP_MD_pkey_type(EVP_MD_CTX_md(mtmp)) == nid) 413 || EVP_MD_pkey_type(EVP_MD_CTX_md(mtmp)) == nid)
414 { 414 {
415 EVP_MD_CTX_copy_ex(mctx, mtmp); 415 return EVP_MD_CTX_copy_ex(mctx, mtmp);
416 return 1;
417 } 416 }
418 chain = BIO_next(chain); 417 chain = BIO_next(chain);
419 } 418 }
diff --git a/src/lib/libssl/src/crypto/cms/cms_pwri.c b/src/lib/libssl/src/crypto/cms/cms_pwri.c
new file mode 100644
index 0000000000..b79612a12d
--- /dev/null
+++ b/src/lib/libssl/src/crypto/cms/cms_pwri.c
@@ -0,0 +1,454 @@
1/* crypto/cms/cms_pwri.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2009 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54#include "cryptlib.h"
55#include <openssl/asn1t.h>
56#include <openssl/pem.h>
57#include <openssl/x509v3.h>
58#include <openssl/err.h>
59#include <openssl/cms.h>
60#include <openssl/rand.h>
61#include <openssl/aes.h>
62#include "cms_lcl.h"
63#include "asn1_locl.h"
64
65int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri,
66 unsigned char *pass, ossl_ssize_t passlen)
67 {
68 CMS_PasswordRecipientInfo *pwri;
69 if (ri->type != CMS_RECIPINFO_PASS)
70 {
71 CMSerr(CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD, CMS_R_NOT_PWRI);
72 return 0;
73 }
74
75 pwri = ri->d.pwri;
76 pwri->pass = pass;
77 if (pass && passlen < 0)
78 passlen = strlen((char *)pass);
79 pwri->passlen = passlen;
80 return 1;
81 }
82
83CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms,
84 int iter, int wrap_nid, int pbe_nid,
85 unsigned char *pass,
86 ossl_ssize_t passlen,
87 const EVP_CIPHER *kekciph)
88 {
89 CMS_RecipientInfo *ri = NULL;
90 CMS_EnvelopedData *env;
91 CMS_PasswordRecipientInfo *pwri;
92 EVP_CIPHER_CTX ctx;
93 X509_ALGOR *encalg = NULL;
94 unsigned char iv[EVP_MAX_IV_LENGTH];
95 int ivlen;
96 env = cms_get0_enveloped(cms);
97 if (!env)
98 goto err;
99
100 if (wrap_nid <= 0)
101 wrap_nid = NID_id_alg_PWRI_KEK;
102
103 if (pbe_nid <= 0)
104 pbe_nid = NID_id_pbkdf2;
105
106 /* Get from enveloped data */
107 if (kekciph == NULL)
108 kekciph = env->encryptedContentInfo->cipher;
109
110 if (kekciph == NULL)
111 {
112 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, CMS_R_NO_CIPHER);
113 return NULL;
114 }
115 if (wrap_nid != NID_id_alg_PWRI_KEK)
116 {
117 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD,
118 CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM);
119 return NULL;
120 }
121
122 /* Setup algorithm identifier for cipher */
123 encalg = X509_ALGOR_new();
124 EVP_CIPHER_CTX_init(&ctx);
125
126 if (EVP_EncryptInit_ex(&ctx, kekciph, NULL, NULL, NULL) <= 0)
127 {
128 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, ERR_R_EVP_LIB);
129 goto err;
130 }
131
132 ivlen = EVP_CIPHER_CTX_iv_length(&ctx);
133
134 if (ivlen > 0)
135 {
136 if (RAND_pseudo_bytes(iv, ivlen) <= 0)
137 goto err;
138 if (EVP_EncryptInit_ex(&ctx, NULL, NULL, NULL, iv) <= 0)
139 {
140 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD,
141 ERR_R_EVP_LIB);
142 goto err;
143 }
144 encalg->parameter = ASN1_TYPE_new();
145 if (!encalg->parameter)
146 {
147 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD,
148 ERR_R_MALLOC_FAILURE);
149 goto err;
150 }
151 if (EVP_CIPHER_param_to_asn1(&ctx, encalg->parameter) <= 0)
152 {
153 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD,
154 CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR);
155 goto err;
156 }
157 }
158
159
160 encalg->algorithm = OBJ_nid2obj(EVP_CIPHER_CTX_type(&ctx));
161
162 EVP_CIPHER_CTX_cleanup(&ctx);
163
164 /* Initialize recipient info */
165 ri = M_ASN1_new_of(CMS_RecipientInfo);
166 if (!ri)
167 goto merr;
168
169 ri->d.pwri = M_ASN1_new_of(CMS_PasswordRecipientInfo);
170 if (!ri->d.pwri)
171 goto merr;
172 ri->type = CMS_RECIPINFO_PASS;
173
174 pwri = ri->d.pwri;
175 /* Since this is overwritten, free up empty structure already there */
176 X509_ALGOR_free(pwri->keyEncryptionAlgorithm);
177 pwri->keyEncryptionAlgorithm = X509_ALGOR_new();
178 if (!pwri->keyEncryptionAlgorithm)
179 goto merr;
180 pwri->keyEncryptionAlgorithm->algorithm = OBJ_nid2obj(wrap_nid);
181 pwri->keyEncryptionAlgorithm->parameter = ASN1_TYPE_new();
182 if (!pwri->keyEncryptionAlgorithm->parameter)
183 goto merr;
184
185 if(!ASN1_item_pack(encalg, ASN1_ITEM_rptr(X509_ALGOR),
186 &pwri->keyEncryptionAlgorithm->parameter->value.sequence))
187 goto merr;
188 pwri->keyEncryptionAlgorithm->parameter->type = V_ASN1_SEQUENCE;
189
190 X509_ALGOR_free(encalg);
191 encalg = NULL;
192
193 /* Setup PBE algorithm */
194
195 pwri->keyDerivationAlgorithm = PKCS5_pbkdf2_set(iter, NULL, 0, -1, -1);
196
197 if (!pwri->keyDerivationAlgorithm)
198 goto err;
199
200 CMS_RecipientInfo_set0_password(ri, pass, passlen);
201 pwri->version = 0;
202
203 if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri))
204 goto merr;
205
206 return ri;
207
208 merr:
209 CMSerr(CMS_F_CMS_ADD0_RECIPIENT_PASSWORD, ERR_R_MALLOC_FAILURE);
210 err:
211 EVP_CIPHER_CTX_cleanup(&ctx);
212 if (ri)
213 M_ASN1_free_of(ri, CMS_RecipientInfo);
214 if (encalg)
215 X509_ALGOR_free(encalg);
216 return NULL;
217
218 }
219
220/* This is an implementation of the key wrapping mechanism in RFC3211,
221 * at some point this should go into EVP.
222 */
223
224static int kek_unwrap_key(unsigned char *out, size_t *outlen,
225 const unsigned char *in, size_t inlen, EVP_CIPHER_CTX *ctx)
226 {
227 size_t blocklen = EVP_CIPHER_CTX_block_size(ctx);
228 unsigned char *tmp;
229 int outl, rv = 0;
230 if (inlen < 2 * blocklen)
231 {
232 /* too small */
233 return 0;
234 }
235 if (inlen % blocklen)
236 {
237 /* Invalid size */
238 return 0;
239 }
240 tmp = OPENSSL_malloc(inlen);
241 /* setup IV by decrypting last two blocks */
242 EVP_DecryptUpdate(ctx, tmp + inlen - 2 * blocklen, &outl,
243 in + inlen - 2 * blocklen, blocklen * 2);
244 /* Do a decrypt of last decrypted block to set IV to correct value
245 * output it to start of buffer so we don't corrupt decrypted block
246 * this works because buffer is at least two block lengths long.
247 */
248 EVP_DecryptUpdate(ctx, tmp, &outl,
249 tmp + inlen - blocklen, blocklen);
250 /* Can now decrypt first n - 1 blocks */
251 EVP_DecryptUpdate(ctx, tmp, &outl, in, inlen - blocklen);
252
253 /* Reset IV to original value */
254 EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, NULL);
255 /* Decrypt again */
256 EVP_DecryptUpdate(ctx, tmp, &outl, tmp, inlen);
257 /* Check check bytes */
258 if (((tmp[1] ^ tmp[4]) & (tmp[2] ^ tmp[5]) & (tmp[3] ^ tmp[6])) != 0xff)
259 {
260 /* Check byte failure */
261 goto err;
262 }
263 if (inlen < (size_t)(tmp[0] - 4 ))
264 {
265 /* Invalid length value */
266 goto err;
267 }
268 *outlen = (size_t)tmp[0];
269 memcpy(out, tmp + 4, *outlen);
270 rv = 1;
271 err:
272 OPENSSL_cleanse(tmp, inlen);
273 OPENSSL_free(tmp);
274 return rv;
275
276 }
277
278static int kek_wrap_key(unsigned char *out, size_t *outlen,
279 const unsigned char *in, size_t inlen, EVP_CIPHER_CTX *ctx)
280 {
281 size_t blocklen = EVP_CIPHER_CTX_block_size(ctx);
282 size_t olen;
283 int dummy;
284 /* First decide length of output buffer: need header and round up to
285 * multiple of block length.
286 */
287 olen = (inlen + 4 + blocklen - 1)/blocklen;
288 olen *= blocklen;
289 if (olen < 2 * blocklen)
290 {
291 /* Key too small */
292 return 0;
293 }
294 if (inlen > 0xFF)
295 {
296 /* Key too large */
297 return 0;
298 }
299 if (out)
300 {
301 /* Set header */
302 out[0] = (unsigned char)inlen;
303 out[1] = in[0] ^ 0xFF;
304 out[2] = in[1] ^ 0xFF;
305 out[3] = in[2] ^ 0xFF;
306 memcpy(out + 4, in, inlen);
307 /* Add random padding to end */
308 if (olen > inlen + 4)
309 RAND_pseudo_bytes(out + 4 + inlen, olen - 4 - inlen);
310 /* Encrypt twice */
311 EVP_EncryptUpdate(ctx, out, &dummy, out, olen);
312 EVP_EncryptUpdate(ctx, out, &dummy, out, olen);
313 }
314
315 *outlen = olen;
316
317 return 1;
318 }
319
320/* Encrypt/Decrypt content key in PWRI recipient info */
321
322int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
323 int en_de)
324 {
325 CMS_EncryptedContentInfo *ec;
326 CMS_PasswordRecipientInfo *pwri;
327 const unsigned char *p = NULL;
328 int plen;
329 int r = 0;
330 X509_ALGOR *algtmp, *kekalg = NULL;
331 EVP_CIPHER_CTX kekctx;
332 const EVP_CIPHER *kekcipher;
333 unsigned char *key = NULL;
334 size_t keylen;
335
336 ec = cms->d.envelopedData->encryptedContentInfo;
337
338 pwri = ri->d.pwri;
339 EVP_CIPHER_CTX_init(&kekctx);
340
341 if (!pwri->pass)
342 {
343 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, CMS_R_NO_PASSWORD);
344 return 0;
345 }
346 algtmp = pwri->keyEncryptionAlgorithm;
347
348 if (!algtmp || OBJ_obj2nid(algtmp->algorithm) != NID_id_alg_PWRI_KEK)
349 {
350 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
351 CMS_R_UNSUPPORTED_KEY_ENCRYPTION_ALGORITHM);
352 return 0;
353 }
354
355 if (algtmp->parameter->type == V_ASN1_SEQUENCE)
356 {
357 p = algtmp->parameter->value.sequence->data;
358 plen = algtmp->parameter->value.sequence->length;
359 kekalg = d2i_X509_ALGOR(NULL, &p, plen);
360 }
361 if (kekalg == NULL)
362 {
363 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
364 CMS_R_INVALID_KEY_ENCRYPTION_PARAMETER);
365 return 0;
366 }
367
368 kekcipher = EVP_get_cipherbyobj(kekalg->algorithm);
369
370 if(!kekcipher)
371 {
372 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
373 CMS_R_UNKNOWN_CIPHER);
374 goto err;
375 }
376
377 /* Fixup cipher based on AlgorithmIdentifier to set IV etc */
378 if (!EVP_CipherInit_ex(&kekctx, kekcipher, NULL, NULL, NULL, en_de))
379 goto err;
380 EVP_CIPHER_CTX_set_padding(&kekctx, 0);
381 if(EVP_CIPHER_asn1_to_param(&kekctx, kekalg->parameter) < 0)
382 {
383 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
384 CMS_R_CIPHER_PARAMETER_INITIALISATION_ERROR);
385 goto err;
386 }
387
388 algtmp = pwri->keyDerivationAlgorithm;
389
390 /* Finish password based key derivation to setup key in "ctx" */
391
392 if (EVP_PBE_CipherInit(algtmp->algorithm,
393 (char *)pwri->pass, pwri->passlen,
394 algtmp->parameter, &kekctx, en_de) < 0)
395 {
396 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT, ERR_R_EVP_LIB);
397 goto err;
398 }
399
400 /* Finally wrap/unwrap the key */
401
402 if (en_de)
403 {
404
405 if (!kek_wrap_key(NULL, &keylen, ec->key, ec->keylen, &kekctx))
406 goto err;
407
408 key = OPENSSL_malloc(keylen);
409
410 if (!key)
411 goto err;
412
413 if (!kek_wrap_key(key, &keylen, ec->key, ec->keylen, &kekctx))
414 goto err;
415 pwri->encryptedKey->data = key;
416 pwri->encryptedKey->length = keylen;
417 }
418 else
419 {
420 key = OPENSSL_malloc(pwri->encryptedKey->length);
421
422 if (!key)
423 {
424 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
425 ERR_R_MALLOC_FAILURE);
426 goto err;
427 }
428 if (!kek_unwrap_key(key, &keylen,
429 pwri->encryptedKey->data,
430 pwri->encryptedKey->length, &kekctx))
431 {
432 CMSerr(CMS_F_CMS_RECIPIENTINFO_PWRI_CRYPT,
433 CMS_R_UNWRAP_FAILURE);
434 goto err;
435 }
436
437 ec->key = key;
438 ec->keylen = keylen;
439
440 }
441
442 r = 1;
443
444 err:
445
446 EVP_CIPHER_CTX_cleanup(&kekctx);
447
448 if (!r && key)
449 OPENSSL_free(key);
450 X509_ALGOR_free(kekalg);
451
452 return r;
453
454 }
diff --git a/src/lib/libssl/src/crypto/cms/cms_sd.c b/src/lib/libssl/src/crypto/cms/cms_sd.c
index e3192b9c57..77fbd13596 100644
--- a/src/lib/libssl/src/crypto/cms/cms_sd.c
+++ b/src/lib/libssl/src/crypto/cms/cms_sd.c
@@ -641,7 +641,8 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
641 cms->d.signedData->encapContentInfo->eContentType; 641 cms->d.signedData->encapContentInfo->eContentType;
642 unsigned char md[EVP_MAX_MD_SIZE]; 642 unsigned char md[EVP_MAX_MD_SIZE];
643 unsigned int mdlen; 643 unsigned int mdlen;
644 EVP_DigestFinal_ex(&mctx, md, &mdlen); 644 if (!EVP_DigestFinal_ex(&mctx, md, &mdlen))
645 goto err;
645 if (!CMS_signed_add1_attr_by_NID(si, NID_pkcs9_messageDigest, 646 if (!CMS_signed_add1_attr_by_NID(si, NID_pkcs9_messageDigest,
646 V_ASN1_OCTET_STRING, 647 V_ASN1_OCTET_STRING,
647 md, mdlen)) 648 md, mdlen))
diff --git a/src/lib/libssl/src/crypto/cms/cms_smime.c b/src/lib/libssl/src/crypto/cms/cms_smime.c
index 4a799eb897..8c56e3a852 100644
--- a/src/lib/libssl/src/crypto/cms/cms_smime.c
+++ b/src/lib/libssl/src/crypto/cms/cms_smime.c
@@ -611,7 +611,10 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)
611 STACK_OF(CMS_RecipientInfo) *ris; 611 STACK_OF(CMS_RecipientInfo) *ris;
612 CMS_RecipientInfo *ri; 612 CMS_RecipientInfo *ri;
613 int i, r; 613 int i, r;
614 int debug = 0;
614 ris = CMS_get0_RecipientInfos(cms); 615 ris = CMS_get0_RecipientInfos(cms);
616 if (ris)
617 debug = cms->d.envelopedData->encryptedContentInfo->debug;
615 for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) 618 for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++)
616 { 619 {
617 ri = sk_CMS_RecipientInfo_value(ris, i); 620 ri = sk_CMS_RecipientInfo_value(ris, i);
@@ -625,17 +628,38 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)
625 CMS_RecipientInfo_set0_pkey(ri, pk); 628 CMS_RecipientInfo_set0_pkey(ri, pk);
626 r = CMS_RecipientInfo_decrypt(cms, ri); 629 r = CMS_RecipientInfo_decrypt(cms, ri);
627 CMS_RecipientInfo_set0_pkey(ri, NULL); 630 CMS_RecipientInfo_set0_pkey(ri, NULL);
628 if (r > 0)
629 return 1;
630 if (cert) 631 if (cert)
631 { 632 {
633 /* If not debugging clear any error and
634 * return success to avoid leaking of
635 * information useful to MMA
636 */
637 if (!debug)
638 {
639 ERR_clear_error();
640 return 1;
641 }
642 if (r > 0)
643 return 1;
632 CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, 644 CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY,
633 CMS_R_DECRYPT_ERROR); 645 CMS_R_DECRYPT_ERROR);
634 return 0; 646 return 0;
635 } 647 }
636 ERR_clear_error(); 648 /* If no cert and not debugging don't leave loop
649 * after first successful decrypt. Always attempt
650 * to decrypt all recipients to avoid leaking timing
651 * of a successful decrypt.
652 */
653 else if (r > 0 && debug)
654 return 1;
637 } 655 }
638 } 656 }
657 /* If no cert and not debugging always return success */
658 if (!cert && !debug)
659 {
660 ERR_clear_error();
661 return 1;
662 }
639 663
640 CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, CMS_R_NO_MATCHING_RECIPIENT); 664 CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, CMS_R_NO_MATCHING_RECIPIENT);
641 return 0; 665 return 0;
@@ -680,6 +704,30 @@ int CMS_decrypt_set1_key(CMS_ContentInfo *cms,
680 return 0; 704 return 0;
681 705
682 } 706 }
707
708int CMS_decrypt_set1_password(CMS_ContentInfo *cms,
709 unsigned char *pass, ossl_ssize_t passlen)
710 {
711 STACK_OF(CMS_RecipientInfo) *ris;
712 CMS_RecipientInfo *ri;
713 int i, r;
714 ris = CMS_get0_RecipientInfos(cms);
715 for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++)
716 {
717 ri = sk_CMS_RecipientInfo_value(ris, i);
718 if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_PASS)
719 continue;
720 CMS_RecipientInfo_set0_password(ri, pass, passlen);
721 r = CMS_RecipientInfo_decrypt(cms, ri);
722 CMS_RecipientInfo_set0_password(ri, NULL, 0);
723 if (r > 0)
724 return 1;
725 }
726
727 CMSerr(CMS_F_CMS_DECRYPT_SET1_PASSWORD, CMS_R_NO_MATCHING_RECIPIENT);
728 return 0;
729
730 }
683 731
684int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert, 732int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert,
685 BIO *dcont, BIO *out, 733 BIO *dcont, BIO *out,
@@ -694,9 +742,14 @@ int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert,
694 } 742 }
695 if (!dcont && !check_content(cms)) 743 if (!dcont && !check_content(cms))
696 return 0; 744 return 0;
745 if (flags & CMS_DEBUG_DECRYPT)
746 cms->d.envelopedData->encryptedContentInfo->debug = 1;
747 else
748 cms->d.envelopedData->encryptedContentInfo->debug = 0;
749 if (!pk && !cert && !dcont && !out)
750 return 1;
697 if (pk && !CMS_decrypt_set1_pkey(cms, pk, cert)) 751 if (pk && !CMS_decrypt_set1_pkey(cms, pk, cert))
698 return 0; 752 return 0;
699
700 cont = CMS_dataInit(cms, dcont); 753 cont = CMS_dataInit(cms, dcont);
701 if (!cont) 754 if (!cont)
702 return 0; 755 return 0;
diff --git a/src/lib/libssl/src/crypto/dh/dh_ameth.c b/src/lib/libssl/src/crypto/dh/dh_ameth.c
index 377caf96c9..02ec2d47b4 100644
--- a/src/lib/libssl/src/crypto/dh/dh_ameth.c
+++ b/src/lib/libssl/src/crypto/dh/dh_ameth.c
@@ -493,6 +493,7 @@ const EVP_PKEY_ASN1_METHOD dh_asn1_meth =
493 dh_copy_parameters, 493 dh_copy_parameters,
494 dh_cmp_parameters, 494 dh_cmp_parameters,
495 dh_param_print, 495 dh_param_print,
496 0,
496 497
497 int_dh_free, 498 int_dh_free,
498 0 499 0
diff --git a/src/lib/libssl/src/crypto/dsa/dsa_ameth.c b/src/lib/libssl/src/crypto/dsa/dsa_ameth.c
index 6413aae46e..376156ec5e 100644
--- a/src/lib/libssl/src/crypto/dsa/dsa_ameth.c
+++ b/src/lib/libssl/src/crypto/dsa/dsa_ameth.c
@@ -542,6 +542,52 @@ static int old_dsa_priv_encode(const EVP_PKEY *pkey, unsigned char **pder)
542 return i2d_DSAPrivateKey(pkey->pkey.dsa, pder); 542 return i2d_DSAPrivateKey(pkey->pkey.dsa, pder);
543 } 543 }
544 544
545static int dsa_sig_print(BIO *bp, const X509_ALGOR *sigalg,
546 const ASN1_STRING *sig,
547 int indent, ASN1_PCTX *pctx)
548 {
549 DSA_SIG *dsa_sig;
550 const unsigned char *p;
551 if (!sig)
552 {
553 if (BIO_puts(bp, "\n") <= 0)
554 return 0;
555 else
556 return 1;
557 }
558 p = sig->data;
559 dsa_sig = d2i_DSA_SIG(NULL, &p, sig->length);
560 if (dsa_sig)
561 {
562 int rv = 0;
563 size_t buf_len = 0;
564 unsigned char *m=NULL;
565 update_buflen(dsa_sig->r, &buf_len);
566 update_buflen(dsa_sig->s, &buf_len);
567 m = OPENSSL_malloc(buf_len+10);
568 if (m == NULL)
569 {
570 DSAerr(DSA_F_DSA_SIG_PRINT,ERR_R_MALLOC_FAILURE);
571 goto err;
572 }
573
574 if (BIO_write(bp, "\n", 1) != 1)
575 goto err;
576
577 if (!ASN1_bn_print(bp,"r: ",dsa_sig->r,m,indent))
578 goto err;
579 if (!ASN1_bn_print(bp,"s: ",dsa_sig->s,m,indent))
580 goto err;
581 rv = 1;
582 err:
583 if (m)
584 OPENSSL_free(m);
585 DSA_SIG_free(dsa_sig);
586 return rv;
587 }
588 return X509_signature_dump(bp, sig, indent);
589 }
590
545static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) 591static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
546 { 592 {
547 switch (op) 593 switch (op)
@@ -647,6 +693,7 @@ const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[] =
647 dsa_copy_parameters, 693 dsa_copy_parameters,
648 dsa_cmp_parameters, 694 dsa_cmp_parameters,
649 dsa_param_print, 695 dsa_param_print,
696 dsa_sig_print,
650 697
651 int_dsa_free, 698 int_dsa_free,
652 dsa_pkey_ctrl, 699 dsa_pkey_ctrl,
diff --git a/src/lib/libssl/src/crypto/dsa/dsa_locl.h b/src/lib/libssl/src/crypto/dsa/dsa_locl.h
index 2b8cfee3db..21e2e45242 100644
--- a/src/lib/libssl/src/crypto/dsa/dsa_locl.h
+++ b/src/lib/libssl/src/crypto/dsa/dsa_locl.h
@@ -56,4 +56,5 @@
56 56
57int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, 57int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits,
58 const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len, 58 const EVP_MD *evpmd, const unsigned char *seed_in, size_t seed_len,
59 unsigned char *seed_out,
59 int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); 60 int *counter_ret, unsigned long *h_ret, BN_GENCB *cb);
diff --git a/src/lib/libssl/src/crypto/dsa/dsa_pmeth.c b/src/lib/libssl/src/crypto/dsa/dsa_pmeth.c
index e2df54fec6..715d8d675b 100644
--- a/src/lib/libssl/src/crypto/dsa/dsa_pmeth.c
+++ b/src/lib/libssl/src/crypto/dsa/dsa_pmeth.c
@@ -189,7 +189,9 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
189 EVP_MD_type((const EVP_MD *)p2) != NID_dsa && 189 EVP_MD_type((const EVP_MD *)p2) != NID_dsa &&
190 EVP_MD_type((const EVP_MD *)p2) != NID_dsaWithSHA && 190 EVP_MD_type((const EVP_MD *)p2) != NID_dsaWithSHA &&
191 EVP_MD_type((const EVP_MD *)p2) != NID_sha224 && 191 EVP_MD_type((const EVP_MD *)p2) != NID_sha224 &&
192 EVP_MD_type((const EVP_MD *)p2) != NID_sha256) 192 EVP_MD_type((const EVP_MD *)p2) != NID_sha256 &&
193 EVP_MD_type((const EVP_MD *)p2) != NID_sha384 &&
194 EVP_MD_type((const EVP_MD *)p2) != NID_sha512)
193 { 195 {
194 DSAerr(DSA_F_PKEY_DSA_CTRL, DSA_R_INVALID_DIGEST_TYPE); 196 DSAerr(DSA_F_PKEY_DSA_CTRL, DSA_R_INVALID_DIGEST_TYPE);
195 return 0; 197 return 0;
@@ -253,7 +255,7 @@ static int pkey_dsa_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
253 if (!dsa) 255 if (!dsa)
254 return 0; 256 return 0;
255 ret = dsa_builtin_paramgen(dsa, dctx->nbits, dctx->qbits, dctx->pmd, 257 ret = dsa_builtin_paramgen(dsa, dctx->nbits, dctx->qbits, dctx->pmd,
256 NULL, 0, NULL, NULL, pcb); 258 NULL, 0, NULL, NULL, NULL, pcb);
257 if (ret) 259 if (ret)
258 EVP_PKEY_assign_DSA(pkey, dsa); 260 EVP_PKEY_assign_DSA(pkey, dsa);
259 else 261 else
diff --git a/src/lib/libssl/src/crypto/ec/ec2_mult.c b/src/lib/libssl/src/crypto/ec/ec2_mult.c
index e12b9b284a..26f4a783fc 100644
--- a/src/lib/libssl/src/crypto/ec/ec2_mult.c
+++ b/src/lib/libssl/src/crypto/ec/ec2_mult.c
@@ -71,6 +71,8 @@
71 71
72#include "ec_lcl.h" 72#include "ec_lcl.h"
73 73
74#ifndef OPENSSL_NO_EC2M
75
74 76
75/* Compute the x-coordinate x/z for the point 2*(x/z) in Montgomery projective 77/* Compute the x-coordinate x/z for the point 2*(x/z) in Montgomery projective
76 * coordinates. 78 * coordinates.
@@ -384,3 +386,5 @@ int ec_GF2m_have_precompute_mult(const EC_GROUP *group)
384 { 386 {
385 return ec_wNAF_have_precompute_mult(group); 387 return ec_wNAF_have_precompute_mult(group);
386 } 388 }
389
390#endif
diff --git a/src/lib/libssl/src/crypto/ec/ec2_oct.c b/src/lib/libssl/src/crypto/ec/ec2_oct.c
new file mode 100644
index 0000000000..f1d75e5ddf
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ec2_oct.c
@@ -0,0 +1,407 @@
1/* crypto/ec/ec2_oct.c */
2/* ====================================================================
3 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
4 *
5 * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
6 * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
7 * to the OpenSSL project.
8 *
9 * The ECC Code is licensed pursuant to the OpenSSL open source
10 * license provided below.
11 *
12 * The software is originally written by Sheueling Chang Shantz and
13 * Douglas Stebila of Sun Microsystems Laboratories.
14 *
15 */
16/* ====================================================================
17 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 *
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 *
26 * 2. Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 *
31 * 3. All advertising materials mentioning features or use of this
32 * software must display the following acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
35 *
36 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
37 * endorse or promote products derived from this software without
38 * prior written permission. For written permission, please contact
39 * openssl-core@openssl.org.
40 *
41 * 5. Products derived from this software may not be called "OpenSSL"
42 * nor may "OpenSSL" appear in their names without prior written
43 * permission of the OpenSSL Project.
44 *
45 * 6. Redistributions of any form whatsoever must retain the following
46 * acknowledgment:
47 * "This product includes software developed by the OpenSSL Project
48 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
51 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
53 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
54 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
57 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
59 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
61 * OF THE POSSIBILITY OF SUCH DAMAGE.
62 * ====================================================================
63 *
64 * This product includes cryptographic software written by Eric Young
65 * (eay@cryptsoft.com). This product includes software written by Tim
66 * Hudson (tjh@cryptsoft.com).
67 *
68 */
69
70#include <openssl/err.h>
71
72#include "ec_lcl.h"
73
74#ifndef OPENSSL_NO_EC2M
75
76/* Calculates and sets the affine coordinates of an EC_POINT from the given
77 * compressed coordinates. Uses algorithm 2.3.4 of SEC 1.
78 * Note that the simple implementation only uses affine coordinates.
79 *
80 * The method is from the following publication:
81 *
82 * Harper, Menezes, Vanstone:
83 * "Public-Key Cryptosystems with Very Small Key Lengths",
84 * EUROCRYPT '92, Springer-Verlag LNCS 658,
85 * published February 1993
86 *
87 * US Patents 6,141,420 and 6,618,483 (Vanstone, Mullin, Agnew) describe
88 * the same method, but claim no priority date earlier than July 29, 1994
89 * (and additionally fail to cite the EUROCRYPT '92 publication as prior art).
90 */
91int ec_GF2m_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point,
92 const BIGNUM *x_, int y_bit, BN_CTX *ctx)
93 {
94 BN_CTX *new_ctx = NULL;
95 BIGNUM *tmp, *x, *y, *z;
96 int ret = 0, z0;
97
98 /* clear error queue */
99 ERR_clear_error();
100
101 if (ctx == NULL)
102 {
103 ctx = new_ctx = BN_CTX_new();
104 if (ctx == NULL)
105 return 0;
106 }
107
108 y_bit = (y_bit != 0) ? 1 : 0;
109
110 BN_CTX_start(ctx);
111 tmp = BN_CTX_get(ctx);
112 x = BN_CTX_get(ctx);
113 y = BN_CTX_get(ctx);
114 z = BN_CTX_get(ctx);
115 if (z == NULL) goto err;
116
117 if (!BN_GF2m_mod_arr(x, x_, group->poly)) goto err;
118 if (BN_is_zero(x))
119 {
120 if (!BN_GF2m_mod_sqrt_arr(y, &group->b, group->poly, ctx)) goto err;
121 }
122 else
123 {
124 if (!group->meth->field_sqr(group, tmp, x, ctx)) goto err;
125 if (!group->meth->field_div(group, tmp, &group->b, tmp, ctx)) goto err;
126 if (!BN_GF2m_add(tmp, &group->a, tmp)) goto err;
127 if (!BN_GF2m_add(tmp, x, tmp)) goto err;
128 if (!BN_GF2m_mod_solve_quad_arr(z, tmp, group->poly, ctx))
129 {
130 unsigned long err = ERR_peek_last_error();
131
132 if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NO_SOLUTION)
133 {
134 ERR_clear_error();
135 ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT);
136 }
137 else
138 ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB);
139 goto err;
140 }
141 z0 = (BN_is_odd(z)) ? 1 : 0;
142 if (!group->meth->field_mul(group, y, x, z, ctx)) goto err;
143 if (z0 != y_bit)
144 {
145 if (!BN_GF2m_add(y, y, x)) goto err;
146 }
147 }
148
149 if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
150
151 ret = 1;
152
153 err:
154 BN_CTX_end(ctx);
155 if (new_ctx != NULL)
156 BN_CTX_free(new_ctx);
157 return ret;
158 }
159
160
161/* Converts an EC_POINT to an octet string.
162 * If buf is NULL, the encoded length will be returned.
163 * If the length len of buf is smaller than required an error will be returned.
164 */
165size_t ec_GF2m_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form,
166 unsigned char *buf, size_t len, BN_CTX *ctx)
167 {
168 size_t ret;
169 BN_CTX *new_ctx = NULL;
170 int used_ctx = 0;
171 BIGNUM *x, *y, *yxi;
172 size_t field_len, i, skip;
173
174 if ((form != POINT_CONVERSION_COMPRESSED)
175 && (form != POINT_CONVERSION_UNCOMPRESSED)
176 && (form != POINT_CONVERSION_HYBRID))
177 {
178 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_INVALID_FORM);
179 goto err;
180 }
181
182 if (EC_POINT_is_at_infinity(group, point))
183 {
184 /* encodes to a single 0 octet */
185 if (buf != NULL)
186 {
187 if (len < 1)
188 {
189 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
190 return 0;
191 }
192 buf[0] = 0;
193 }
194 return 1;
195 }
196
197
198 /* ret := required output buffer length */
199 field_len = (EC_GROUP_get_degree(group) + 7) / 8;
200 ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
201
202 /* if 'buf' is NULL, just return required length */
203 if (buf != NULL)
204 {
205 if (len < ret)
206 {
207 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
208 goto err;
209 }
210
211 if (ctx == NULL)
212 {
213 ctx = new_ctx = BN_CTX_new();
214 if (ctx == NULL)
215 return 0;
216 }
217
218 BN_CTX_start(ctx);
219 used_ctx = 1;
220 x = BN_CTX_get(ctx);
221 y = BN_CTX_get(ctx);
222 yxi = BN_CTX_get(ctx);
223 if (yxi == NULL) goto err;
224
225 if (!EC_POINT_get_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
226
227 buf[0] = form;
228 if ((form != POINT_CONVERSION_UNCOMPRESSED) && !BN_is_zero(x))
229 {
230 if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err;
231 if (BN_is_odd(yxi)) buf[0]++;
232 }
233
234 i = 1;
235
236 skip = field_len - BN_num_bytes(x);
237 if (skip > field_len)
238 {
239 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
240 goto err;
241 }
242 while (skip > 0)
243 {
244 buf[i++] = 0;
245 skip--;
246 }
247 skip = BN_bn2bin(x, buf + i);
248 i += skip;
249 if (i != 1 + field_len)
250 {
251 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
252 goto err;
253 }
254
255 if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID)
256 {
257 skip = field_len - BN_num_bytes(y);
258 if (skip > field_len)
259 {
260 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
261 goto err;
262 }
263 while (skip > 0)
264 {
265 buf[i++] = 0;
266 skip--;
267 }
268 skip = BN_bn2bin(y, buf + i);
269 i += skip;
270 }
271
272 if (i != ret)
273 {
274 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
275 goto err;
276 }
277 }
278
279 if (used_ctx)
280 BN_CTX_end(ctx);
281 if (new_ctx != NULL)
282 BN_CTX_free(new_ctx);
283 return ret;
284
285 err:
286 if (used_ctx)
287 BN_CTX_end(ctx);
288 if (new_ctx != NULL)
289 BN_CTX_free(new_ctx);
290 return 0;
291 }
292
293
294/* Converts an octet string representation to an EC_POINT.
295 * Note that the simple implementation only uses affine coordinates.
296 */
297int ec_GF2m_simple_oct2point(const EC_GROUP *group, EC_POINT *point,
298 const unsigned char *buf, size_t len, BN_CTX *ctx)
299 {
300 point_conversion_form_t form;
301 int y_bit;
302 BN_CTX *new_ctx = NULL;
303 BIGNUM *x, *y, *yxi;
304 size_t field_len, enc_len;
305 int ret = 0;
306
307 if (len == 0)
308 {
309 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL);
310 return 0;
311 }
312 form = buf[0];
313 y_bit = form & 1;
314 form = form & ~1U;
315 if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED)
316 && (form != POINT_CONVERSION_UNCOMPRESSED)
317 && (form != POINT_CONVERSION_HYBRID))
318 {
319 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
320 return 0;
321 }
322 if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit)
323 {
324 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
325 return 0;
326 }
327
328 if (form == 0)
329 {
330 if (len != 1)
331 {
332 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
333 return 0;
334 }
335
336 return EC_POINT_set_to_infinity(group, point);
337 }
338
339 field_len = (EC_GROUP_get_degree(group) + 7) / 8;
340 enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
341
342 if (len != enc_len)
343 {
344 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
345 return 0;
346 }
347
348 if (ctx == NULL)
349 {
350 ctx = new_ctx = BN_CTX_new();
351 if (ctx == NULL)
352 return 0;
353 }
354
355 BN_CTX_start(ctx);
356 x = BN_CTX_get(ctx);
357 y = BN_CTX_get(ctx);
358 yxi = BN_CTX_get(ctx);
359 if (yxi == NULL) goto err;
360
361 if (!BN_bin2bn(buf + 1, field_len, x)) goto err;
362 if (BN_ucmp(x, &group->field) >= 0)
363 {
364 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
365 goto err;
366 }
367
368 if (form == POINT_CONVERSION_COMPRESSED)
369 {
370 if (!EC_POINT_set_compressed_coordinates_GF2m(group, point, x, y_bit, ctx)) goto err;
371 }
372 else
373 {
374 if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err;
375 if (BN_ucmp(y, &group->field) >= 0)
376 {
377 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
378 goto err;
379 }
380 if (form == POINT_CONVERSION_HYBRID)
381 {
382 if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err;
383 if (y_bit != BN_is_odd(yxi))
384 {
385 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
386 goto err;
387 }
388 }
389
390 if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
391 }
392
393 if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */
394 {
395 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE);
396 goto err;
397 }
398
399 ret = 1;
400
401 err:
402 BN_CTX_end(ctx);
403 if (new_ctx != NULL)
404 BN_CTX_free(new_ctx);
405 return ret;
406 }
407#endif
diff --git a/src/lib/libssl/src/crypto/ec/ec2_smpl.c b/src/lib/libssl/src/crypto/ec/ec2_smpl.c
index 03deae6674..e0e59c7d82 100644
--- a/src/lib/libssl/src/crypto/ec/ec2_smpl.c
+++ b/src/lib/libssl/src/crypto/ec/ec2_smpl.c
@@ -71,10 +71,20 @@
71 71
72#include "ec_lcl.h" 72#include "ec_lcl.h"
73 73
74#ifndef OPENSSL_NO_EC2M
75
76#ifdef OPENSSL_FIPS
77#include <openssl/fips.h>
78#endif
79
74 80
75const EC_METHOD *EC_GF2m_simple_method(void) 81const EC_METHOD *EC_GF2m_simple_method(void)
76 { 82 {
83#ifdef OPENSSL_FIPS
84 return fips_ec_gf2m_simple_method();
85#else
77 static const EC_METHOD ret = { 86 static const EC_METHOD ret = {
87 EC_FLAGS_DEFAULT_OCT,
78 NID_X9_62_characteristic_two_field, 88 NID_X9_62_characteristic_two_field,
79 ec_GF2m_simple_group_init, 89 ec_GF2m_simple_group_init,
80 ec_GF2m_simple_group_finish, 90 ec_GF2m_simple_group_finish,
@@ -93,9 +103,7 @@ const EC_METHOD *EC_GF2m_simple_method(void)
93 0 /* get_Jprojective_coordinates_GFp */, 103 0 /* get_Jprojective_coordinates_GFp */,
94 ec_GF2m_simple_point_set_affine_coordinates, 104 ec_GF2m_simple_point_set_affine_coordinates,
95 ec_GF2m_simple_point_get_affine_coordinates, 105 ec_GF2m_simple_point_get_affine_coordinates,
96 ec_GF2m_simple_set_compressed_coordinates, 106 0,0,0,
97 ec_GF2m_simple_point2oct,
98 ec_GF2m_simple_oct2point,
99 ec_GF2m_simple_add, 107 ec_GF2m_simple_add,
100 ec_GF2m_simple_dbl, 108 ec_GF2m_simple_dbl,
101 ec_GF2m_simple_invert, 109 ec_GF2m_simple_invert,
@@ -118,6 +126,7 @@ const EC_METHOD *EC_GF2m_simple_method(void)
118 0 /* field_set_to_one */ }; 126 0 /* field_set_to_one */ };
119 127
120 return &ret; 128 return &ret;
129#endif
121 } 130 }
122 131
123 132
@@ -405,340 +414,6 @@ int ec_GF2m_simple_point_get_affine_coordinates(const EC_GROUP *group, const EC_
405 return ret; 414 return ret;
406 } 415 }
407 416
408
409/* Calculates and sets the affine coordinates of an EC_POINT from the given
410 * compressed coordinates. Uses algorithm 2.3.4 of SEC 1.
411 * Note that the simple implementation only uses affine coordinates.
412 *
413 * The method is from the following publication:
414 *
415 * Harper, Menezes, Vanstone:
416 * "Public-Key Cryptosystems with Very Small Key Lengths",
417 * EUROCRYPT '92, Springer-Verlag LNCS 658,
418 * published February 1993
419 *
420 * US Patents 6,141,420 and 6,618,483 (Vanstone, Mullin, Agnew) describe
421 * the same method, but claim no priority date earlier than July 29, 1994
422 * (and additionally fail to cite the EUROCRYPT '92 publication as prior art).
423 */
424int ec_GF2m_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point,
425 const BIGNUM *x_, int y_bit, BN_CTX *ctx)
426 {
427 BN_CTX *new_ctx = NULL;
428 BIGNUM *tmp, *x, *y, *z;
429 int ret = 0, z0;
430
431 /* clear error queue */
432 ERR_clear_error();
433
434 if (ctx == NULL)
435 {
436 ctx = new_ctx = BN_CTX_new();
437 if (ctx == NULL)
438 return 0;
439 }
440
441 y_bit = (y_bit != 0) ? 1 : 0;
442
443 BN_CTX_start(ctx);
444 tmp = BN_CTX_get(ctx);
445 x = BN_CTX_get(ctx);
446 y = BN_CTX_get(ctx);
447 z = BN_CTX_get(ctx);
448 if (z == NULL) goto err;
449
450 if (!BN_GF2m_mod_arr(x, x_, group->poly)) goto err;
451 if (BN_is_zero(x))
452 {
453 if (!BN_GF2m_mod_sqrt_arr(y, &group->b, group->poly, ctx)) goto err;
454 }
455 else
456 {
457 if (!group->meth->field_sqr(group, tmp, x, ctx)) goto err;
458 if (!group->meth->field_div(group, tmp, &group->b, tmp, ctx)) goto err;
459 if (!BN_GF2m_add(tmp, &group->a, tmp)) goto err;
460 if (!BN_GF2m_add(tmp, x, tmp)) goto err;
461 if (!BN_GF2m_mod_solve_quad_arr(z, tmp, group->poly, ctx))
462 {
463 unsigned long err = ERR_peek_last_error();
464
465 if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NO_SOLUTION)
466 {
467 ERR_clear_error();
468 ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT);
469 }
470 else
471 ECerr(EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB);
472 goto err;
473 }
474 z0 = (BN_is_odd(z)) ? 1 : 0;
475 if (!group->meth->field_mul(group, y, x, z, ctx)) goto err;
476 if (z0 != y_bit)
477 {
478 if (!BN_GF2m_add(y, y, x)) goto err;
479 }
480 }
481
482 if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
483
484 ret = 1;
485
486 err:
487 BN_CTX_end(ctx);
488 if (new_ctx != NULL)
489 BN_CTX_free(new_ctx);
490 return ret;
491 }
492
493
494/* Converts an EC_POINT to an octet string.
495 * If buf is NULL, the encoded length will be returned.
496 * If the length len of buf is smaller than required an error will be returned.
497 */
498size_t ec_GF2m_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form,
499 unsigned char *buf, size_t len, BN_CTX *ctx)
500 {
501 size_t ret;
502 BN_CTX *new_ctx = NULL;
503 int used_ctx = 0;
504 BIGNUM *x, *y, *yxi;
505 size_t field_len, i, skip;
506
507 if ((form != POINT_CONVERSION_COMPRESSED)
508 && (form != POINT_CONVERSION_UNCOMPRESSED)
509 && (form != POINT_CONVERSION_HYBRID))
510 {
511 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_INVALID_FORM);
512 goto err;
513 }
514
515 if (EC_POINT_is_at_infinity(group, point))
516 {
517 /* encodes to a single 0 octet */
518 if (buf != NULL)
519 {
520 if (len < 1)
521 {
522 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
523 return 0;
524 }
525 buf[0] = 0;
526 }
527 return 1;
528 }
529
530
531 /* ret := required output buffer length */
532 field_len = (EC_GROUP_get_degree(group) + 7) / 8;
533 ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
534
535 /* if 'buf' is NULL, just return required length */
536 if (buf != NULL)
537 {
538 if (len < ret)
539 {
540 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
541 goto err;
542 }
543
544 if (ctx == NULL)
545 {
546 ctx = new_ctx = BN_CTX_new();
547 if (ctx == NULL)
548 return 0;
549 }
550
551 BN_CTX_start(ctx);
552 used_ctx = 1;
553 x = BN_CTX_get(ctx);
554 y = BN_CTX_get(ctx);
555 yxi = BN_CTX_get(ctx);
556 if (yxi == NULL) goto err;
557
558 if (!EC_POINT_get_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
559
560 buf[0] = form;
561 if ((form != POINT_CONVERSION_UNCOMPRESSED) && !BN_is_zero(x))
562 {
563 if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err;
564 if (BN_is_odd(yxi)) buf[0]++;
565 }
566
567 i = 1;
568
569 skip = field_len - BN_num_bytes(x);
570 if (skip > field_len)
571 {
572 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
573 goto err;
574 }
575 while (skip > 0)
576 {
577 buf[i++] = 0;
578 skip--;
579 }
580 skip = BN_bn2bin(x, buf + i);
581 i += skip;
582 if (i != 1 + field_len)
583 {
584 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
585 goto err;
586 }
587
588 if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID)
589 {
590 skip = field_len - BN_num_bytes(y);
591 if (skip > field_len)
592 {
593 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
594 goto err;
595 }
596 while (skip > 0)
597 {
598 buf[i++] = 0;
599 skip--;
600 }
601 skip = BN_bn2bin(y, buf + i);
602 i += skip;
603 }
604
605 if (i != ret)
606 {
607 ECerr(EC_F_EC_GF2M_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
608 goto err;
609 }
610 }
611
612 if (used_ctx)
613 BN_CTX_end(ctx);
614 if (new_ctx != NULL)
615 BN_CTX_free(new_ctx);
616 return ret;
617
618 err:
619 if (used_ctx)
620 BN_CTX_end(ctx);
621 if (new_ctx != NULL)
622 BN_CTX_free(new_ctx);
623 return 0;
624 }
625
626
627/* Converts an octet string representation to an EC_POINT.
628 * Note that the simple implementation only uses affine coordinates.
629 */
630int ec_GF2m_simple_oct2point(const EC_GROUP *group, EC_POINT *point,
631 const unsigned char *buf, size_t len, BN_CTX *ctx)
632 {
633 point_conversion_form_t form;
634 int y_bit;
635 BN_CTX *new_ctx = NULL;
636 BIGNUM *x, *y, *yxi;
637 size_t field_len, enc_len;
638 int ret = 0;
639
640 if (len == 0)
641 {
642 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL);
643 return 0;
644 }
645 form = buf[0];
646 y_bit = form & 1;
647 form = form & ~1U;
648 if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED)
649 && (form != POINT_CONVERSION_UNCOMPRESSED)
650 && (form != POINT_CONVERSION_HYBRID))
651 {
652 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
653 return 0;
654 }
655 if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit)
656 {
657 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
658 return 0;
659 }
660
661 if (form == 0)
662 {
663 if (len != 1)
664 {
665 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
666 return 0;
667 }
668
669 return EC_POINT_set_to_infinity(group, point);
670 }
671
672 field_len = (EC_GROUP_get_degree(group) + 7) / 8;
673 enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
674
675 if (len != enc_len)
676 {
677 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
678 return 0;
679 }
680
681 if (ctx == NULL)
682 {
683 ctx = new_ctx = BN_CTX_new();
684 if (ctx == NULL)
685 return 0;
686 }
687
688 BN_CTX_start(ctx);
689 x = BN_CTX_get(ctx);
690 y = BN_CTX_get(ctx);
691 yxi = BN_CTX_get(ctx);
692 if (yxi == NULL) goto err;
693
694 if (!BN_bin2bn(buf + 1, field_len, x)) goto err;
695 if (BN_ucmp(x, &group->field) >= 0)
696 {
697 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
698 goto err;
699 }
700
701 if (form == POINT_CONVERSION_COMPRESSED)
702 {
703 if (!EC_POINT_set_compressed_coordinates_GF2m(group, point, x, y_bit, ctx)) goto err;
704 }
705 else
706 {
707 if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err;
708 if (BN_ucmp(y, &group->field) >= 0)
709 {
710 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
711 goto err;
712 }
713 if (form == POINT_CONVERSION_HYBRID)
714 {
715 if (!group->meth->field_div(group, yxi, y, x, ctx)) goto err;
716 if (y_bit != BN_is_odd(yxi))
717 {
718 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
719 goto err;
720 }
721 }
722
723 if (!EC_POINT_set_affine_coordinates_GF2m(group, point, x, y, ctx)) goto err;
724 }
725
726 if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */
727 {
728 ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE);
729 goto err;
730 }
731
732 ret = 1;
733
734 err:
735 BN_CTX_end(ctx);
736 if (new_ctx != NULL)
737 BN_CTX_free(new_ctx);
738 return ret;
739 }
740
741
742/* Computes a + b and stores the result in r. r could be a or b, a could be b. 417/* Computes a + b and stores the result in r. r could be a or b, a could be b.
743 * Uses algorithm A.10.2 of IEEE P1363. 418 * Uses algorithm A.10.2 of IEEE P1363.
744 */ 419 */
@@ -1040,3 +715,5 @@ int ec_GF2m_simple_field_div(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
1040 { 715 {
1041 return BN_GF2m_mod_div(r, a, b, &group->field, ctx); 716 return BN_GF2m_mod_div(r, a, b, &group->field, ctx);
1042 } 717 }
718
719#endif
diff --git a/src/lib/libssl/src/crypto/ec/ec_ameth.c b/src/lib/libssl/src/crypto/ec/ec_ameth.c
index c00f7d746c..83909c1853 100644
--- a/src/lib/libssl/src/crypto/ec/ec_ameth.c
+++ b/src/lib/libssl/src/crypto/ec/ec_ameth.c
@@ -651,6 +651,7 @@ const EVP_PKEY_ASN1_METHOD eckey_asn1_meth =
651 ec_copy_parameters, 651 ec_copy_parameters,
652 ec_cmp_parameters, 652 ec_cmp_parameters,
653 eckey_param_print, 653 eckey_param_print,
654 0,
654 655
655 int_ec_free, 656 int_ec_free,
656 ec_pkey_ctrl, 657 ec_pkey_ctrl,
diff --git a/src/lib/libssl/src/crypto/ec/ec_asn1.c b/src/lib/libssl/src/crypto/ec/ec_asn1.c
index ae55539859..175eec5342 100644
--- a/src/lib/libssl/src/crypto/ec/ec_asn1.c
+++ b/src/lib/libssl/src/crypto/ec/ec_asn1.c
@@ -83,7 +83,7 @@ int EC_GROUP_get_basis_type(const EC_GROUP *group)
83 /* everything else is currently not supported */ 83 /* everything else is currently not supported */
84 return 0; 84 return 0;
85 } 85 }
86 86#ifndef OPENSSL_NO_EC2M
87int EC_GROUP_get_trinomial_basis(const EC_GROUP *group, unsigned int *k) 87int EC_GROUP_get_trinomial_basis(const EC_GROUP *group, unsigned int *k)
88 { 88 {
89 if (group == NULL) 89 if (group == NULL)
@@ -101,7 +101,6 @@ int EC_GROUP_get_trinomial_basis(const EC_GROUP *group, unsigned int *k)
101 101
102 return 1; 102 return 1;
103 } 103 }
104
105int EC_GROUP_get_pentanomial_basis(const EC_GROUP *group, unsigned int *k1, 104int EC_GROUP_get_pentanomial_basis(const EC_GROUP *group, unsigned int *k1,
106 unsigned int *k2, unsigned int *k3) 105 unsigned int *k2, unsigned int *k3)
107 { 106 {
@@ -124,7 +123,7 @@ int EC_GROUP_get_pentanomial_basis(const EC_GROUP *group, unsigned int *k1,
124 123
125 return 1; 124 return 1;
126 } 125 }
127 126#endif
128 127
129 128
130/* some structures needed for the asn1 encoding */ 129/* some structures needed for the asn1 encoding */
@@ -340,6 +339,12 @@ static int ec_asn1_group2fieldid(const EC_GROUP *group, X9_62_FIELDID *field)
340 } 339 }
341 } 340 }
342 else /* nid == NID_X9_62_characteristic_two_field */ 341 else /* nid == NID_X9_62_characteristic_two_field */
342#ifdef OPENSSL_NO_EC2M
343 {
344 ECerr(EC_F_EC_ASN1_GROUP2FIELDID, EC_R_GF2M_NOT_SUPPORTED);
345 goto err;
346 }
347#else
343 { 348 {
344 int field_type; 349 int field_type;
345 X9_62_CHARACTERISTIC_TWO *char_two; 350 X9_62_CHARACTERISTIC_TWO *char_two;
@@ -419,6 +424,7 @@ static int ec_asn1_group2fieldid(const EC_GROUP *group, X9_62_FIELDID *field)
419 } 424 }
420 } 425 }
421 } 426 }
427#endif
422 428
423 ok = 1; 429 ok = 1;
424 430
@@ -456,6 +462,7 @@ static int ec_asn1_group2curve(const EC_GROUP *group, X9_62_CURVE *curve)
456 goto err; 462 goto err;
457 } 463 }
458 } 464 }
465#ifndef OPENSSL_NO_EC2M
459 else /* nid == NID_X9_62_characteristic_two_field */ 466 else /* nid == NID_X9_62_characteristic_two_field */
460 { 467 {
461 if (!EC_GROUP_get_curve_GF2m(group, NULL, tmp_1, tmp_2, NULL)) 468 if (!EC_GROUP_get_curve_GF2m(group, NULL, tmp_1, tmp_2, NULL))
@@ -464,7 +471,7 @@ static int ec_asn1_group2curve(const EC_GROUP *group, X9_62_CURVE *curve)
464 goto err; 471 goto err;
465 } 472 }
466 } 473 }
467 474#endif
468 len_1 = (size_t)BN_num_bytes(tmp_1); 475 len_1 = (size_t)BN_num_bytes(tmp_1);
469 len_2 = (size_t)BN_num_bytes(tmp_2); 476 len_2 = (size_t)BN_num_bytes(tmp_2);
470 477
@@ -775,8 +782,13 @@ static EC_GROUP *ec_asn1_parameters2group(const ECPARAMETERS *params)
775 782
776 /* get the field parameters */ 783 /* get the field parameters */
777 tmp = OBJ_obj2nid(params->fieldID->fieldType); 784 tmp = OBJ_obj2nid(params->fieldID->fieldType);
778
779 if (tmp == NID_X9_62_characteristic_two_field) 785 if (tmp == NID_X9_62_characteristic_two_field)
786#ifdef OPENSSL_NO_EC2M
787 {
788 ECerr(EC_F_EC_ASN1_PARAMETERS2GROUP, EC_R_GF2M_NOT_SUPPORTED);
789 goto err;
790 }
791#else
780 { 792 {
781 X9_62_CHARACTERISTIC_TWO *char_two; 793 X9_62_CHARACTERISTIC_TWO *char_two;
782 794
@@ -862,6 +874,7 @@ static EC_GROUP *ec_asn1_parameters2group(const ECPARAMETERS *params)
862 /* create the EC_GROUP structure */ 874 /* create the EC_GROUP structure */
863 ret = EC_GROUP_new_curve_GF2m(p, a, b, NULL); 875 ret = EC_GROUP_new_curve_GF2m(p, a, b, NULL);
864 } 876 }
877#endif
865 else if (tmp == NID_X9_62_prime_field) 878 else if (tmp == NID_X9_62_prime_field)
866 { 879 {
867 /* we have a curve over a prime field */ 880 /* we have a curve over a prime field */
@@ -1065,6 +1078,7 @@ EC_GROUP *d2i_ECPKParameters(EC_GROUP **a, const unsigned char **in, long len)
1065 if ((group = ec_asn1_pkparameters2group(params)) == NULL) 1078 if ((group = ec_asn1_pkparameters2group(params)) == NULL)
1066 { 1079 {
1067 ECerr(EC_F_D2I_ECPKPARAMETERS, EC_R_PKPARAMETERS2GROUP_FAILURE); 1080 ECerr(EC_F_D2I_ECPKPARAMETERS, EC_R_PKPARAMETERS2GROUP_FAILURE);
1081 ECPKPARAMETERS_free(params);
1068 return NULL; 1082 return NULL;
1069 } 1083 }
1070 1084
diff --git a/src/lib/libssl/src/crypto/ec/ec_curve.c b/src/lib/libssl/src/crypto/ec/ec_curve.c
index 23274e4031..c72fb2697c 100644
--- a/src/lib/libssl/src/crypto/ec/ec_curve.c
+++ b/src/lib/libssl/src/crypto/ec/ec_curve.c
@@ -3,7 +3,7 @@
3 * Written by Nils Larsch for the OpenSSL project. 3 * Written by Nils Larsch for the OpenSSL project.
4 */ 4 */
5/* ==================================================================== 5/* ====================================================================
6 * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved. 6 * Copyright (c) 1998-2010 The OpenSSL Project. All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
@@ -72,6 +72,7 @@
72#include "ec_lcl.h" 72#include "ec_lcl.h"
73#include <openssl/err.h> 73#include <openssl/err.h>
74#include <openssl/obj_mac.h> 74#include <openssl/obj_mac.h>
75#include <openssl/opensslconf.h>
75 76
76typedef struct { 77typedef struct {
77 int field_type, /* either NID_X9_62_prime_field or 78 int field_type, /* either NID_X9_62_prime_field or
@@ -703,6 +704,8 @@ static const struct { EC_CURVE_DATA h; unsigned char data[0+28*6]; }
703 0x13,0xDD,0x29,0x45,0x5C,0x5C,0x2A,0x3D } 704 0x13,0xDD,0x29,0x45,0x5C,0x5C,0x2A,0x3D }
704 }; 705 };
705 706
707#ifndef OPENSSL_NO_EC2M
708
706/* characteristic two curves */ 709/* characteristic two curves */
707static const struct { EC_CURVE_DATA h; unsigned char data[20+15*6]; } 710static const struct { EC_CURVE_DATA h; unsigned char data[20+15*6]; }
708 _EC_SECG_CHAR2_113R1 = { 711 _EC_SECG_CHAR2_113R1 = {
@@ -1300,7 +1303,7 @@ static const struct { EC_CURVE_DATA h; unsigned char data[20+21*6]; }
1300 { 0x53,0x81,0x4C,0x05,0x0D,0x44,0xD6,0x96,0xE6,0x76, /* seed */ 1303 { 0x53,0x81,0x4C,0x05,0x0D,0x44,0xD6,0x96,0xE6,0x76, /* seed */
1301 0x87,0x56,0x15,0x17,0x58,0x0C,0xA4,0xE2,0x9F,0xFD, 1304 0x87,0x56,0x15,0x17,0x58,0x0C,0xA4,0xE2,0x9F,0xFD,
1302 1305
1303 0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p */ 1306 0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p */
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01, 1307 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,
1305 0x07, 1308 0x07,
1306 0x01,0x08,0xB3,0x9E,0x77,0xC4,0xB1,0x08,0xBE,0xD9, /* a */ 1309 0x01,0x08,0xB3,0x9E,0x77,0xC4,0xB1,0x08,0xBE,0xD9, /* a */
@@ -1817,103 +1820,128 @@ static const struct { EC_CURVE_DATA h; unsigned char data[0+24*6]; }
1817 0xBA,0xFC,0xA7,0x5E } 1820 0xBA,0xFC,0xA7,0x5E }
1818 }; 1821 };
1819 1822
1823#endif
1824
1820typedef struct _ec_list_element_st { 1825typedef struct _ec_list_element_st {
1821 int nid; 1826 int nid;
1822 const EC_CURVE_DATA *data; 1827 const EC_CURVE_DATA *data;
1828 const EC_METHOD *(*meth)(void);
1823 const char *comment; 1829 const char *comment;
1824 } ec_list_element; 1830 } ec_list_element;
1825 1831
1826static const ec_list_element curve_list[] = { 1832static const ec_list_element curve_list[] = {
1827 /* prime field curves */ 1833 /* prime field curves */
1828 /* secg curves */ 1834 /* secg curves */
1829 { NID_secp112r1, &_EC_SECG_PRIME_112R1.h, "SECG/WTLS curve over a 112 bit prime field"}, 1835 { NID_secp112r1, &_EC_SECG_PRIME_112R1.h, 0, "SECG/WTLS curve over a 112 bit prime field" },
1830 { NID_secp112r2, &_EC_SECG_PRIME_112R2.h, "SECG curve over a 112 bit prime field"}, 1836 { NID_secp112r2, &_EC_SECG_PRIME_112R2.h, 0, "SECG curve over a 112 bit prime field" },
1831 { NID_secp128r1, &_EC_SECG_PRIME_128R1.h, "SECG curve over a 128 bit prime field"}, 1837 { NID_secp128r1, &_EC_SECG_PRIME_128R1.h, 0, "SECG curve over a 128 bit prime field" },
1832 { NID_secp128r2, &_EC_SECG_PRIME_128R2.h, "SECG curve over a 128 bit prime field"}, 1838 { NID_secp128r2, &_EC_SECG_PRIME_128R2.h, 0, "SECG curve over a 128 bit prime field" },
1833 { NID_secp160k1, &_EC_SECG_PRIME_160K1.h, "SECG curve over a 160 bit prime field"}, 1839 { NID_secp160k1, &_EC_SECG_PRIME_160K1.h, 0, "SECG curve over a 160 bit prime field" },
1834 { NID_secp160r1, &_EC_SECG_PRIME_160R1.h, "SECG curve over a 160 bit prime field"}, 1840 { NID_secp160r1, &_EC_SECG_PRIME_160R1.h, 0, "SECG curve over a 160 bit prime field" },
1835 { NID_secp160r2, &_EC_SECG_PRIME_160R2.h, "SECG/WTLS curve over a 160 bit prime field"}, 1841 { NID_secp160r2, &_EC_SECG_PRIME_160R2.h, 0, "SECG/WTLS curve over a 160 bit prime field" },
1836 /* SECG secp192r1 is the same as X9.62 prime192v1 and hence omitted */ 1842 /* SECG secp192r1 is the same as X9.62 prime192v1 and hence omitted */
1837 { NID_secp192k1, &_EC_SECG_PRIME_192K1.h, "SECG curve over a 192 bit prime field"}, 1843 { NID_secp192k1, &_EC_SECG_PRIME_192K1.h, 0, "SECG curve over a 192 bit prime field" },
1838 { NID_secp224k1, &_EC_SECG_PRIME_224K1.h, "SECG curve over a 224 bit prime field"}, 1844 { NID_secp224k1, &_EC_SECG_PRIME_224K1.h, 0, "SECG curve over a 224 bit prime field" },
1839 { NID_secp224r1, &_EC_NIST_PRIME_224.h, "NIST/SECG curve over a 224 bit prime field"}, 1845#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
1840 { NID_secp256k1, &_EC_SECG_PRIME_256K1.h, "SECG curve over a 256 bit prime field"}, 1846 { NID_secp224r1, &_EC_NIST_PRIME_224.h, EC_GFp_nistp224_method, "NIST/SECG curve over a 224 bit prime field" },
1847#else
1848 { NID_secp224r1, &_EC_NIST_PRIME_224.h, 0, "NIST/SECG curve over a 224 bit prime field" },
1849#endif
1850 { NID_secp256k1, &_EC_SECG_PRIME_256K1.h, 0, "SECG curve over a 256 bit prime field" },
1841 /* SECG secp256r1 is the same as X9.62 prime256v1 and hence omitted */ 1851 /* SECG secp256r1 is the same as X9.62 prime256v1 and hence omitted */
1842 { NID_secp384r1, &_EC_NIST_PRIME_384.h, "NIST/SECG curve over a 384 bit prime field"}, 1852 { NID_secp384r1, &_EC_NIST_PRIME_384.h, 0, "NIST/SECG curve over a 384 bit prime field" },
1843 { NID_secp521r1, &_EC_NIST_PRIME_521.h, "NIST/SECG curve over a 521 bit prime field"}, 1853#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
1854 { NID_secp521r1, &_EC_NIST_PRIME_521.h, EC_GFp_nistp521_method, "NIST/SECG curve over a 521 bit prime field" },
1855#else
1856 { NID_secp521r1, &_EC_NIST_PRIME_521.h, 0, "NIST/SECG curve over a 521 bit prime field" },
1857#endif
1844 /* X9.62 curves */ 1858 /* X9.62 curves */
1845 { NID_X9_62_prime192v1, &_EC_NIST_PRIME_192.h, "NIST/X9.62/SECG curve over a 192 bit prime field"}, 1859 { NID_X9_62_prime192v1, &_EC_NIST_PRIME_192.h, 0, "NIST/X9.62/SECG curve over a 192 bit prime field" },
1846 { NID_X9_62_prime192v2, &_EC_X9_62_PRIME_192V2.h, "X9.62 curve over a 192 bit prime field"}, 1860 { NID_X9_62_prime192v2, &_EC_X9_62_PRIME_192V2.h, 0, "X9.62 curve over a 192 bit prime field" },
1847 { NID_X9_62_prime192v3, &_EC_X9_62_PRIME_192V3.h, "X9.62 curve over a 192 bit prime field"}, 1861 { NID_X9_62_prime192v3, &_EC_X9_62_PRIME_192V3.h, 0, "X9.62 curve over a 192 bit prime field" },
1848 { NID_X9_62_prime239v1, &_EC_X9_62_PRIME_239V1.h, "X9.62 curve over a 239 bit prime field"}, 1862 { NID_X9_62_prime239v1, &_EC_X9_62_PRIME_239V1.h, 0, "X9.62 curve over a 239 bit prime field" },
1849 { NID_X9_62_prime239v2, &_EC_X9_62_PRIME_239V2.h, "X9.62 curve over a 239 bit prime field"}, 1863 { NID_X9_62_prime239v2, &_EC_X9_62_PRIME_239V2.h, 0, "X9.62 curve over a 239 bit prime field" },
1850 { NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, "X9.62 curve over a 239 bit prime field"}, 1864 { NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, 0, "X9.62 curve over a 239 bit prime field" },
1851 { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, "X9.62/SECG curve over a 256 bit prime field"}, 1865#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
1866 { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, EC_GFp_nistp256_method, "X9.62/SECG curve over a 256 bit prime field" },
1867#else
1868 { NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, 0, "X9.62/SECG curve over a 256 bit prime field" },
1869#endif
1870#ifndef OPENSSL_NO_EC2M
1852 /* characteristic two field curves */ 1871 /* characteristic two field curves */
1853 /* NIST/SECG curves */ 1872 /* NIST/SECG curves */
1854 { NID_sect113r1, &_EC_SECG_CHAR2_113R1.h, "SECG curve over a 113 bit binary field"}, 1873 { NID_sect113r1, &_EC_SECG_CHAR2_113R1.h, 0, "SECG curve over a 113 bit binary field" },
1855 { NID_sect113r2, &_EC_SECG_CHAR2_113R2.h, "SECG curve over a 113 bit binary field"}, 1874 { NID_sect113r2, &_EC_SECG_CHAR2_113R2.h, 0, "SECG curve over a 113 bit binary field" },
1856 { NID_sect131r1, &_EC_SECG_CHAR2_131R1.h, "SECG/WTLS curve over a 131 bit binary field"}, 1875 { NID_sect131r1, &_EC_SECG_CHAR2_131R1.h, 0, "SECG/WTLS curve over a 131 bit binary field" },
1857 { NID_sect131r2, &_EC_SECG_CHAR2_131R2.h, "SECG curve over a 131 bit binary field"}, 1876 { NID_sect131r2, &_EC_SECG_CHAR2_131R2.h, 0, "SECG curve over a 131 bit binary field" },
1858 { NID_sect163k1, &_EC_NIST_CHAR2_163K.h, "NIST/SECG/WTLS curve over a 163 bit binary field" }, 1877 { NID_sect163k1, &_EC_NIST_CHAR2_163K.h, 0, "NIST/SECG/WTLS curve over a 163 bit binary field" },
1859 { NID_sect163r1, &_EC_SECG_CHAR2_163R1.h, "SECG curve over a 163 bit binary field"}, 1878 { NID_sect163r1, &_EC_SECG_CHAR2_163R1.h, 0, "SECG curve over a 163 bit binary field" },
1860 { NID_sect163r2, &_EC_NIST_CHAR2_163B.h, "NIST/SECG curve over a 163 bit binary field" }, 1879 { NID_sect163r2, &_EC_NIST_CHAR2_163B.h, 0, "NIST/SECG curve over a 163 bit binary field" },
1861 { NID_sect193r1, &_EC_SECG_CHAR2_193R1.h, "SECG curve over a 193 bit binary field"}, 1880 { NID_sect193r1, &_EC_SECG_CHAR2_193R1.h, 0, "SECG curve over a 193 bit binary field" },
1862 { NID_sect193r2, &_EC_SECG_CHAR2_193R2.h, "SECG curve over a 193 bit binary field"}, 1881 { NID_sect193r2, &_EC_SECG_CHAR2_193R2.h, 0, "SECG curve over a 193 bit binary field" },
1863 { NID_sect233k1, &_EC_NIST_CHAR2_233K.h, "NIST/SECG/WTLS curve over a 233 bit binary field" }, 1882 { NID_sect233k1, &_EC_NIST_CHAR2_233K.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" },
1864 { NID_sect233r1, &_EC_NIST_CHAR2_233B.h, "NIST/SECG/WTLS curve over a 233 bit binary field" }, 1883 { NID_sect233r1, &_EC_NIST_CHAR2_233B.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" },
1865 { NID_sect239k1, &_EC_SECG_CHAR2_239K1.h, "SECG curve over a 239 bit binary field"}, 1884 { NID_sect239k1, &_EC_SECG_CHAR2_239K1.h, 0, "SECG curve over a 239 bit binary field" },
1866 { NID_sect283k1, &_EC_NIST_CHAR2_283K.h, "NIST/SECG curve over a 283 bit binary field" }, 1885 { NID_sect283k1, &_EC_NIST_CHAR2_283K.h, 0, "NIST/SECG curve over a 283 bit binary field" },
1867 { NID_sect283r1, &_EC_NIST_CHAR2_283B.h, "NIST/SECG curve over a 283 bit binary field" }, 1886 { NID_sect283r1, &_EC_NIST_CHAR2_283B.h, 0, "NIST/SECG curve over a 283 bit binary field" },
1868 { NID_sect409k1, &_EC_NIST_CHAR2_409K.h, "NIST/SECG curve over a 409 bit binary field" }, 1887 { NID_sect409k1, &_EC_NIST_CHAR2_409K.h, 0, "NIST/SECG curve over a 409 bit binary field" },
1869 { NID_sect409r1, &_EC_NIST_CHAR2_409B.h, "NIST/SECG curve over a 409 bit binary field" }, 1888 { NID_sect409r1, &_EC_NIST_CHAR2_409B.h, 0, "NIST/SECG curve over a 409 bit binary field" },
1870 { NID_sect571k1, &_EC_NIST_CHAR2_571K.h, "NIST/SECG curve over a 571 bit binary field" }, 1889 { NID_sect571k1, &_EC_NIST_CHAR2_571K.h, 0, "NIST/SECG curve over a 571 bit binary field" },
1871 { NID_sect571r1, &_EC_NIST_CHAR2_571B.h, "NIST/SECG curve over a 571 bit binary field" }, 1890 { NID_sect571r1, &_EC_NIST_CHAR2_571B.h, 0, "NIST/SECG curve over a 571 bit binary field" },
1872 /* X9.62 curves */ 1891 /* X9.62 curves */
1873 { NID_X9_62_c2pnb163v1, &_EC_X9_62_CHAR2_163V1.h, "X9.62 curve over a 163 bit binary field"}, 1892 { NID_X9_62_c2pnb163v1, &_EC_X9_62_CHAR2_163V1.h, 0, "X9.62 curve over a 163 bit binary field" },
1874 { NID_X9_62_c2pnb163v2, &_EC_X9_62_CHAR2_163V2.h, "X9.62 curve over a 163 bit binary field"}, 1893 { NID_X9_62_c2pnb163v2, &_EC_X9_62_CHAR2_163V2.h, 0, "X9.62 curve over a 163 bit binary field" },
1875 { NID_X9_62_c2pnb163v3, &_EC_X9_62_CHAR2_163V3.h, "X9.62 curve over a 163 bit binary field"}, 1894 { NID_X9_62_c2pnb163v3, &_EC_X9_62_CHAR2_163V3.h, 0, "X9.62 curve over a 163 bit binary field" },
1876 { NID_X9_62_c2pnb176v1, &_EC_X9_62_CHAR2_176V1.h, "X9.62 curve over a 176 bit binary field"}, 1895 { NID_X9_62_c2pnb176v1, &_EC_X9_62_CHAR2_176V1.h, 0, "X9.62 curve over a 176 bit binary field" },
1877 { NID_X9_62_c2tnb191v1, &_EC_X9_62_CHAR2_191V1.h, "X9.62 curve over a 191 bit binary field"}, 1896 { NID_X9_62_c2tnb191v1, &_EC_X9_62_CHAR2_191V1.h, 0, "X9.62 curve over a 191 bit binary field" },
1878 { NID_X9_62_c2tnb191v2, &_EC_X9_62_CHAR2_191V2.h, "X9.62 curve over a 191 bit binary field"}, 1897 { NID_X9_62_c2tnb191v2, &_EC_X9_62_CHAR2_191V2.h, 0, "X9.62 curve over a 191 bit binary field" },
1879 { NID_X9_62_c2tnb191v3, &_EC_X9_62_CHAR2_191V3.h, "X9.62 curve over a 191 bit binary field"}, 1898 { NID_X9_62_c2tnb191v3, &_EC_X9_62_CHAR2_191V3.h, 0, "X9.62 curve over a 191 bit binary field" },
1880 { NID_X9_62_c2pnb208w1, &_EC_X9_62_CHAR2_208W1.h, "X9.62 curve over a 208 bit binary field"}, 1899 { NID_X9_62_c2pnb208w1, &_EC_X9_62_CHAR2_208W1.h, 0, "X9.62 curve over a 208 bit binary field" },
1881 { NID_X9_62_c2tnb239v1, &_EC_X9_62_CHAR2_239V1.h, "X9.62 curve over a 239 bit binary field"}, 1900 { NID_X9_62_c2tnb239v1, &_EC_X9_62_CHAR2_239V1.h, 0, "X9.62 curve over a 239 bit binary field" },
1882 { NID_X9_62_c2tnb239v2, &_EC_X9_62_CHAR2_239V2.h, "X9.62 curve over a 239 bit binary field"}, 1901 { NID_X9_62_c2tnb239v2, &_EC_X9_62_CHAR2_239V2.h, 0, "X9.62 curve over a 239 bit binary field" },
1883 { NID_X9_62_c2tnb239v3, &_EC_X9_62_CHAR2_239V3.h, "X9.62 curve over a 239 bit binary field"}, 1902 { NID_X9_62_c2tnb239v3, &_EC_X9_62_CHAR2_239V3.h, 0, "X9.62 curve over a 239 bit binary field" },
1884 { NID_X9_62_c2pnb272w1, &_EC_X9_62_CHAR2_272W1.h, "X9.62 curve over a 272 bit binary field"}, 1903 { NID_X9_62_c2pnb272w1, &_EC_X9_62_CHAR2_272W1.h, 0, "X9.62 curve over a 272 bit binary field" },
1885 { NID_X9_62_c2pnb304w1, &_EC_X9_62_CHAR2_304W1.h, "X9.62 curve over a 304 bit binary field"}, 1904 { NID_X9_62_c2pnb304w1, &_EC_X9_62_CHAR2_304W1.h, 0, "X9.62 curve over a 304 bit binary field" },
1886 { NID_X9_62_c2tnb359v1, &_EC_X9_62_CHAR2_359V1.h, "X9.62 curve over a 359 bit binary field"}, 1905 { NID_X9_62_c2tnb359v1, &_EC_X9_62_CHAR2_359V1.h, 0, "X9.62 curve over a 359 bit binary field" },
1887 { NID_X9_62_c2pnb368w1, &_EC_X9_62_CHAR2_368W1.h, "X9.62 curve over a 368 bit binary field"}, 1906 { NID_X9_62_c2pnb368w1, &_EC_X9_62_CHAR2_368W1.h, 0, "X9.62 curve over a 368 bit binary field" },
1888 { NID_X9_62_c2tnb431r1, &_EC_X9_62_CHAR2_431R1.h, "X9.62 curve over a 431 bit binary field"}, 1907 { NID_X9_62_c2tnb431r1, &_EC_X9_62_CHAR2_431R1.h, 0, "X9.62 curve over a 431 bit binary field" },
1889 /* the WAP/WTLS curves 1908 /* the WAP/WTLS curves
1890 * [unlike SECG, spec has its own OIDs for curves from X9.62] */ 1909 * [unlike SECG, spec has its own OIDs for curves from X9.62] */
1891 { NID_wap_wsg_idm_ecid_wtls1, &_EC_WTLS_1.h, "WTLS curve over a 113 bit binary field"}, 1910 { NID_wap_wsg_idm_ecid_wtls1, &_EC_WTLS_1.h, 0, "WTLS curve over a 113 bit binary field" },
1892 { NID_wap_wsg_idm_ecid_wtls3, &_EC_NIST_CHAR2_163K.h, "NIST/SECG/WTLS curve over a 163 bit binary field"}, 1911 { NID_wap_wsg_idm_ecid_wtls3, &_EC_NIST_CHAR2_163K.h, 0, "NIST/SECG/WTLS curve over a 163 bit binary field" },
1893 { NID_wap_wsg_idm_ecid_wtls4, &_EC_SECG_CHAR2_113R1.h, "SECG curve over a 113 bit binary field"}, 1912 { NID_wap_wsg_idm_ecid_wtls4, &_EC_SECG_CHAR2_113R1.h, 0, "SECG curve over a 113 bit binary field" },
1894 { NID_wap_wsg_idm_ecid_wtls5, &_EC_X9_62_CHAR2_163V1.h, "X9.62 curve over a 163 bit binary field"}, 1913 { NID_wap_wsg_idm_ecid_wtls5, &_EC_X9_62_CHAR2_163V1.h, 0, "X9.62 curve over a 163 bit binary field" },
1895 { NID_wap_wsg_idm_ecid_wtls6, &_EC_SECG_PRIME_112R1.h, "SECG/WTLS curve over a 112 bit prime field"}, 1914#endif
1896 { NID_wap_wsg_idm_ecid_wtls7, &_EC_SECG_PRIME_160R2.h, "SECG/WTLS curve over a 160 bit prime field"}, 1915 { NID_wap_wsg_idm_ecid_wtls6, &_EC_SECG_PRIME_112R1.h, 0, "SECG/WTLS curve over a 112 bit prime field" },
1897 { NID_wap_wsg_idm_ecid_wtls8, &_EC_WTLS_8.h, "WTLS curve over a 112 bit prime field"}, 1916 { NID_wap_wsg_idm_ecid_wtls7, &_EC_SECG_PRIME_160R2.h, 0, "SECG/WTLS curve over a 160 bit prime field" },
1898 { NID_wap_wsg_idm_ecid_wtls9, &_EC_WTLS_9.h, "WTLS curve over a 160 bit prime field" }, 1917 { NID_wap_wsg_idm_ecid_wtls8, &_EC_WTLS_8.h, 0, "WTLS curve over a 112 bit prime field" },
1899 { NID_wap_wsg_idm_ecid_wtls10, &_EC_NIST_CHAR2_233K.h, "NIST/SECG/WTLS curve over a 233 bit binary field"}, 1918 { NID_wap_wsg_idm_ecid_wtls9, &_EC_WTLS_9.h, 0, "WTLS curve over a 160 bit prime field" },
1900 { NID_wap_wsg_idm_ecid_wtls11, &_EC_NIST_CHAR2_233B.h, "NIST/SECG/WTLS curve over a 233 bit binary field"}, 1919#ifndef OPENSSL_NO_EC2M
1901 { NID_wap_wsg_idm_ecid_wtls12, &_EC_WTLS_12.h, "WTLS curvs over a 224 bit prime field"}, 1920 { NID_wap_wsg_idm_ecid_wtls10, &_EC_NIST_CHAR2_233K.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" },
1921 { NID_wap_wsg_idm_ecid_wtls11, &_EC_NIST_CHAR2_233B.h, 0, "NIST/SECG/WTLS curve over a 233 bit binary field" },
1922#endif
1923 { NID_wap_wsg_idm_ecid_wtls12, &_EC_WTLS_12.h, 0, "WTLS curvs over a 224 bit prime field" },
1924#ifndef OPENSSL_NO_EC2M
1902 /* IPSec curves */ 1925 /* IPSec curves */
1903 { NID_ipsec3, &_EC_IPSEC_155_ID3.h, "\n\tIPSec/IKE/Oakley curve #3 over a 155 bit binary field.\n""\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, 1926 { NID_ipsec3, &_EC_IPSEC_155_ID3.h, 0, "\n\tIPSec/IKE/Oakley curve #3 over a 155 bit binary field.\n"
1904 { NID_ipsec4, &_EC_IPSEC_185_ID4.h, "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n""\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, 1927 "\tNot suitable for ECDSA.\n\tQuestionable extension field!" },
1928 { NID_ipsec4, &_EC_IPSEC_185_ID4.h, 0, "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n"
1929 "\tNot suitable for ECDSA.\n\tQuestionable extension field!" },
1930#endif
1905}; 1931};
1906 1932
1907#define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element)) 1933#define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element))
1908 1934
1909static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data) 1935static EC_GROUP *ec_group_new_from_data(const ec_list_element curve)
1910 { 1936 {
1911 EC_GROUP *group=NULL; 1937 EC_GROUP *group=NULL;
1912 EC_POINT *P=NULL; 1938 EC_POINT *P=NULL;
1913 BN_CTX *ctx=NULL; 1939 BN_CTX *ctx=NULL;
1914 BIGNUM *p=NULL, *a=NULL, *b=NULL, *x=NULL, *y=NULL, *order=NULL; 1940 BIGNUM *p=NULL, *a=NULL, *b=NULL, *x=NULL, *y=NULL, *order=NULL;
1915 int ok=0; 1941 int ok=0;
1916 int seed_len,param_len; 1942 int seed_len,param_len;
1943 const EC_METHOD *meth;
1944 const EC_CURVE_DATA *data;
1917 const unsigned char *params; 1945 const unsigned char *params;
1918 1946
1919 if ((ctx = BN_CTX_new()) == NULL) 1947 if ((ctx = BN_CTX_new()) == NULL)
@@ -1922,10 +1950,11 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data)
1922 goto err; 1950 goto err;
1923 } 1951 }
1924 1952
1953 data = curve.data;
1925 seed_len = data->seed_len; 1954 seed_len = data->seed_len;
1926 param_len = data->param_len; 1955 param_len = data->param_len;
1927 params = (const unsigned char *)(data+1); /* skip header */ 1956 params = (const unsigned char *)(data+1); /* skip header */
1928 params += seed_len; /* skip seed */ 1957 params += seed_len; /* skip seed */
1929 1958
1930 if (!(p = BN_bin2bn(params+0*param_len, param_len, NULL)) 1959 if (!(p = BN_bin2bn(params+0*param_len, param_len, NULL))
1931 || !(a = BN_bin2bn(params+1*param_len, param_len, NULL)) 1960 || !(a = BN_bin2bn(params+1*param_len, param_len, NULL))
@@ -1935,7 +1964,17 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data)
1935 goto err; 1964 goto err;
1936 } 1965 }
1937 1966
1938 if (data->field_type == NID_X9_62_prime_field) 1967 if (curve.meth != 0)
1968 {
1969 meth = curve.meth();
1970 if (((group = EC_GROUP_new(meth)) == NULL) ||
1971 (!(group->meth->group_set_curve(group, p, a, b, ctx))))
1972 {
1973 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB);
1974 goto err;
1975 }
1976 }
1977 else if (data->field_type == NID_X9_62_prime_field)
1939 { 1978 {
1940 if ((group = EC_GROUP_new_curve_GFp(p, a, b, ctx)) == NULL) 1979 if ((group = EC_GROUP_new_curve_GFp(p, a, b, ctx)) == NULL)
1941 { 1980 {
@@ -1943,6 +1982,7 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data)
1943 goto err; 1982 goto err;
1944 } 1983 }
1945 } 1984 }
1985#ifndef OPENSSL_NO_EC2M
1946 else /* field_type == NID_X9_62_characteristic_two_field */ 1986 else /* field_type == NID_X9_62_characteristic_two_field */
1947 { 1987 {
1948 if ((group = EC_GROUP_new_curve_GF2m(p, a, b, ctx)) == NULL) 1988 if ((group = EC_GROUP_new_curve_GF2m(p, a, b, ctx)) == NULL)
@@ -1951,20 +1991,21 @@ static EC_GROUP *ec_group_new_from_data(const EC_CURVE_DATA *data)
1951 goto err; 1991 goto err;
1952 } 1992 }
1953 } 1993 }
1994#endif
1954 1995
1955 if ((P = EC_POINT_new(group)) == NULL) 1996 if ((P = EC_POINT_new(group)) == NULL)
1956 { 1997 {
1957 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB); 1998 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB);
1958 goto err; 1999 goto err;
1959 } 2000 }
1960 2001
1961 if (!(x = BN_bin2bn(params+3*param_len, param_len, NULL)) 2002 if (!(x = BN_bin2bn(params+3*param_len, param_len, NULL))
1962 || !(y = BN_bin2bn(params+4*param_len, param_len, NULL))) 2003 || !(y = BN_bin2bn(params+4*param_len, param_len, NULL)))
1963 { 2004 {
1964 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_BN_LIB); 2005 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_BN_LIB);
1965 goto err; 2006 goto err;
1966 } 2007 }
1967 if (!EC_POINT_set_affine_coordinates_GF2m(group, P, x, y, ctx)) 2008 if (!EC_POINT_set_affine_coordinates_GFp(group, P, x, y, ctx))
1968 { 2009 {
1969 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB); 2010 ECerr(EC_F_EC_GROUP_NEW_FROM_DATA, ERR_R_EC_LIB);
1970 goto err; 2011 goto err;
@@ -2025,7 +2066,7 @@ EC_GROUP *EC_GROUP_new_by_curve_name(int nid)
2025 for (i=0; i<curve_list_length; i++) 2066 for (i=0; i<curve_list_length; i++)
2026 if (curve_list[i].nid == nid) 2067 if (curve_list[i].nid == nid)
2027 { 2068 {
2028 ret = ec_group_new_from_data(curve_list[i].data); 2069 ret = ec_group_new_from_data(curve_list[i]);
2029 break; 2070 break;
2030 } 2071 }
2031 2072
diff --git a/src/lib/libssl/src/crypto/ec/ec_key.c b/src/lib/libssl/src/crypto/ec/ec_key.c
index 522802c07a..bf9fd2dc2c 100644
--- a/src/lib/libssl/src/crypto/ec/ec_key.c
+++ b/src/lib/libssl/src/crypto/ec/ec_key.c
@@ -64,7 +64,9 @@
64#include <string.h> 64#include <string.h>
65#include "ec_lcl.h" 65#include "ec_lcl.h"
66#include <openssl/err.h> 66#include <openssl/err.h>
67#include <string.h> 67#ifdef OPENSSL_FIPS
68#include <openssl/fips.h>
69#endif
68 70
69EC_KEY *EC_KEY_new(void) 71EC_KEY *EC_KEY_new(void)
70 { 72 {
@@ -78,6 +80,7 @@ EC_KEY *EC_KEY_new(void)
78 } 80 }
79 81
80 ret->version = 1; 82 ret->version = 1;
83 ret->flags = 0;
81 ret->group = NULL; 84 ret->group = NULL;
82 ret->pub_key = NULL; 85 ret->pub_key = NULL;
83 ret->priv_key= NULL; 86 ret->priv_key= NULL;
@@ -197,6 +200,7 @@ EC_KEY *EC_KEY_copy(EC_KEY *dest, const EC_KEY *src)
197 dest->enc_flag = src->enc_flag; 200 dest->enc_flag = src->enc_flag;
198 dest->conv_form = src->conv_form; 201 dest->conv_form = src->conv_form;
199 dest->version = src->version; 202 dest->version = src->version;
203 dest->flags = src->flags;
200 204
201 return dest; 205 return dest;
202 } 206 }
@@ -237,6 +241,11 @@ int EC_KEY_generate_key(EC_KEY *eckey)
237 BIGNUM *priv_key = NULL, *order = NULL; 241 BIGNUM *priv_key = NULL, *order = NULL;
238 EC_POINT *pub_key = NULL; 242 EC_POINT *pub_key = NULL;
239 243
244#ifdef OPENSSL_FIPS
245 if (FIPS_mode())
246 return FIPS_ec_key_generate_key(eckey);
247#endif
248
240 if (!eckey || !eckey->group) 249 if (!eckey || !eckey->group)
241 { 250 {
242 ECerr(EC_F_EC_KEY_GENERATE_KEY, ERR_R_PASSED_NULL_PARAMETER); 251 ECerr(EC_F_EC_KEY_GENERATE_KEY, ERR_R_PASSED_NULL_PARAMETER);
@@ -371,6 +380,82 @@ err:
371 return(ok); 380 return(ok);
372 } 381 }
373 382
383int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y)
384 {
385 BN_CTX *ctx = NULL;
386 BIGNUM *tx, *ty;
387 EC_POINT *point = NULL;
388 int ok = 0, tmp_nid, is_char_two = 0;
389
390 if (!key || !key->group || !x || !y)
391 {
392 ECerr(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES,
393 ERR_R_PASSED_NULL_PARAMETER);
394 return 0;
395 }
396 ctx = BN_CTX_new();
397 if (!ctx)
398 goto err;
399
400 point = EC_POINT_new(key->group);
401
402 if (!point)
403 goto err;
404
405 tmp_nid = EC_METHOD_get_field_type(EC_GROUP_method_of(key->group));
406
407 if (tmp_nid == NID_X9_62_characteristic_two_field)
408 is_char_two = 1;
409
410 tx = BN_CTX_get(ctx);
411 ty = BN_CTX_get(ctx);
412#ifndef OPENSSL_NO_EC2M
413 if (is_char_two)
414 {
415 if (!EC_POINT_set_affine_coordinates_GF2m(key->group, point,
416 x, y, ctx))
417 goto err;
418 if (!EC_POINT_get_affine_coordinates_GF2m(key->group, point,
419 tx, ty, ctx))
420 goto err;
421 }
422 else
423#endif
424 {
425 if (!EC_POINT_set_affine_coordinates_GFp(key->group, point,
426 x, y, ctx))
427 goto err;
428 if (!EC_POINT_get_affine_coordinates_GFp(key->group, point,
429 tx, ty, ctx))
430 goto err;
431 }
432 /* Check if retrieved coordinates match originals: if not values
433 * are out of range.
434 */
435 if (BN_cmp(x, tx) || BN_cmp(y, ty))
436 {
437 ECerr(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES,
438 EC_R_COORDINATES_OUT_OF_RANGE);
439 goto err;
440 }
441
442 if (!EC_KEY_set_public_key(key, point))
443 goto err;
444
445 if (EC_KEY_check_key(key) == 0)
446 goto err;
447
448 ok = 1;
449
450 err:
451 if (ctx)
452 BN_CTX_free(ctx);
453 if (point)
454 EC_POINT_free(point);
455 return ok;
456
457 }
458
374const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key) 459const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key)
375 { 460 {
376 return key->group; 461 return key->group;
@@ -461,3 +546,18 @@ int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx)
461 return 0; 546 return 0;
462 return EC_GROUP_precompute_mult(key->group, ctx); 547 return EC_GROUP_precompute_mult(key->group, ctx);
463 } 548 }
549
550int EC_KEY_get_flags(const EC_KEY *key)
551 {
552 return key->flags;
553 }
554
555void EC_KEY_set_flags(EC_KEY *key, int flags)
556 {
557 key->flags |= flags;
558 }
559
560void EC_KEY_clear_flags(EC_KEY *key, int flags)
561 {
562 key->flags &= ~flags;
563 }
diff --git a/src/lib/libssl/src/crypto/ec/ec_oct.c b/src/lib/libssl/src/crypto/ec/ec_oct.c
new file mode 100644
index 0000000000..fd9db0798d
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ec_oct.c
@@ -0,0 +1,199 @@
1/* crypto/ec/ec_lib.c */
2/*
3 * Originally written by Bodo Moeller for the OpenSSL project.
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2003 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58/* ====================================================================
59 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
60 * Binary polynomial ECC support in OpenSSL originally developed by
61 * SUN MICROSYSTEMS, INC., and contributed to the OpenSSL project.
62 */
63
64#include <string.h>
65
66#include <openssl/err.h>
67#include <openssl/opensslv.h>
68
69#include "ec_lcl.h"
70
71int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *point,
72 const BIGNUM *x, int y_bit, BN_CTX *ctx)
73 {
74 if (group->meth->point_set_compressed_coordinates == 0
75 && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT))
76 {
77 ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
78 return 0;
79 }
80 if (group->meth != point->meth)
81 {
82 ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, EC_R_INCOMPATIBLE_OBJECTS);
83 return 0;
84 }
85 if(group->meth->flags & EC_FLAGS_DEFAULT_OCT)
86 {
87 if (group->meth->field_type == NID_X9_62_prime_field)
88 return ec_GFp_simple_set_compressed_coordinates(
89 group, point, x, y_bit, ctx);
90 else
91#ifdef OPENSSL_NO_EC2M
92 {
93 ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GFP, EC_R_GF2M_NOT_SUPPORTED);
94 return 0;
95 }
96#else
97 return ec_GF2m_simple_set_compressed_coordinates(
98 group, point, x, y_bit, ctx);
99#endif
100 }
101 return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx);
102 }
103
104#ifndef OPENSSL_NO_EC2M
105int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *point,
106 const BIGNUM *x, int y_bit, BN_CTX *ctx)
107 {
108 if (group->meth->point_set_compressed_coordinates == 0
109 && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT))
110 {
111 ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
112 return 0;
113 }
114 if (group->meth != point->meth)
115 {
116 ECerr(EC_F_EC_POINT_SET_COMPRESSED_COORDINATES_GF2M, EC_R_INCOMPATIBLE_OBJECTS);
117 return 0;
118 }
119 if(group->meth->flags & EC_FLAGS_DEFAULT_OCT)
120 {
121 if (group->meth->field_type == NID_X9_62_prime_field)
122 return ec_GFp_simple_set_compressed_coordinates(
123 group, point, x, y_bit, ctx);
124 else
125 return ec_GF2m_simple_set_compressed_coordinates(
126 group, point, x, y_bit, ctx);
127 }
128 return group->meth->point_set_compressed_coordinates(group, point, x, y_bit, ctx);
129 }
130#endif
131
132size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form,
133 unsigned char *buf, size_t len, BN_CTX *ctx)
134 {
135 if (group->meth->point2oct == 0
136 && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT))
137 {
138 ECerr(EC_F_EC_POINT_POINT2OCT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
139 return 0;
140 }
141 if (group->meth != point->meth)
142 {
143 ECerr(EC_F_EC_POINT_POINT2OCT, EC_R_INCOMPATIBLE_OBJECTS);
144 return 0;
145 }
146 if(group->meth->flags & EC_FLAGS_DEFAULT_OCT)
147 {
148 if (group->meth->field_type == NID_X9_62_prime_field)
149 return ec_GFp_simple_point2oct(group, point,
150 form, buf, len, ctx);
151 else
152#ifdef OPENSSL_NO_EC2M
153 {
154 ECerr(EC_F_EC_POINT_POINT2OCT, EC_R_GF2M_NOT_SUPPORTED);
155 return 0;
156 }
157#else
158 return ec_GF2m_simple_point2oct(group, point,
159 form, buf, len, ctx);
160#endif
161 }
162
163 return group->meth->point2oct(group, point, form, buf, len, ctx);
164 }
165
166
167int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *point,
168 const unsigned char *buf, size_t len, BN_CTX *ctx)
169 {
170 if (group->meth->oct2point == 0
171 && !(group->meth->flags & EC_FLAGS_DEFAULT_OCT))
172 {
173 ECerr(EC_F_EC_POINT_OCT2POINT, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
174 return 0;
175 }
176 if (group->meth != point->meth)
177 {
178 ECerr(EC_F_EC_POINT_OCT2POINT, EC_R_INCOMPATIBLE_OBJECTS);
179 return 0;
180 }
181 if(group->meth->flags & EC_FLAGS_DEFAULT_OCT)
182 {
183 if (group->meth->field_type == NID_X9_62_prime_field)
184 return ec_GFp_simple_oct2point(group, point,
185 buf, len, ctx);
186 else
187#ifdef OPENSSL_NO_EC2M
188 {
189 ECerr(EC_F_EC_POINT_OCT2POINT, EC_R_GF2M_NOT_SUPPORTED);
190 return 0;
191 }
192#else
193 return ec_GF2m_simple_oct2point(group, point,
194 buf, len, ctx);
195#endif
196 }
197 return group->meth->oct2point(group, point, buf, len, ctx);
198 }
199
diff --git a/src/lib/libssl/src/crypto/ec/ec_pmeth.c b/src/lib/libssl/src/crypto/ec/ec_pmeth.c
index f433076ca1..d1ed66c37e 100644
--- a/src/lib/libssl/src/crypto/ec/ec_pmeth.c
+++ b/src/lib/libssl/src/crypto/ec/ec_pmeth.c
@@ -221,6 +221,7 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
221 221
222 case EVP_PKEY_CTRL_MD: 222 case EVP_PKEY_CTRL_MD:
223 if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 && 223 if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 &&
224 EVP_MD_type((const EVP_MD *)p2) != NID_ecdsa_with_SHA1 &&
224 EVP_MD_type((const EVP_MD *)p2) != NID_sha224 && 225 EVP_MD_type((const EVP_MD *)p2) != NID_sha224 &&
225 EVP_MD_type((const EVP_MD *)p2) != NID_sha256 && 226 EVP_MD_type((const EVP_MD *)p2) != NID_sha256 &&
226 EVP_MD_type((const EVP_MD *)p2) != NID_sha384 && 227 EVP_MD_type((const EVP_MD *)p2) != NID_sha384 &&
diff --git a/src/lib/libssl/src/crypto/ec/eck_prn.c b/src/lib/libssl/src/crypto/ec/eck_prn.c
index 7d3e175ae7..06de8f3959 100644
--- a/src/lib/libssl/src/crypto/ec/eck_prn.c
+++ b/src/lib/libssl/src/crypto/ec/eck_prn.c
@@ -207,7 +207,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off)
207 reason = ERR_R_MALLOC_FAILURE; 207 reason = ERR_R_MALLOC_FAILURE;
208 goto err; 208 goto err;
209 } 209 }
210 210#ifndef OPENSSL_NO_EC2M
211 if (is_char_two) 211 if (is_char_two)
212 { 212 {
213 if (!EC_GROUP_get_curve_GF2m(x, p, a, b, ctx)) 213 if (!EC_GROUP_get_curve_GF2m(x, p, a, b, ctx))
@@ -217,6 +217,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off)
217 } 217 }
218 } 218 }
219 else /* prime field */ 219 else /* prime field */
220#endif
220 { 221 {
221 if (!EC_GROUP_get_curve_GFp(x, p, a, b, ctx)) 222 if (!EC_GROUP_get_curve_GFp(x, p, a, b, ctx))
222 { 223 {
diff --git a/src/lib/libssl/src/crypto/ec/ecp_nistp224.c b/src/lib/libssl/src/crypto/ec/ecp_nistp224.c
new file mode 100644
index 0000000000..b5ff56c252
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ecp_nistp224.c
@@ -0,0 +1,1658 @@
1/* crypto/ec/ecp_nistp224.c */
2/*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23 *
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
26 */
27
28#include <openssl/opensslconf.h>
29#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31#ifndef OPENSSL_SYS_VMS
32#include <stdint.h>
33#else
34#include <inttypes.h>
35#endif
36
37#include <string.h>
38#include <openssl/err.h>
39#include "ec_lcl.h"
40
41#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
44#else
45 #error "Need GCC 3.1 or later to define type uint128_t"
46#endif
47
48typedef uint8_t u8;
49typedef uint64_t u64;
50typedef int64_t s64;
51
52
53/******************************************************************************/
54/* INTERNAL REPRESENTATION OF FIELD ELEMENTS
55 *
56 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57 * using 64-bit coefficients called 'limbs',
58 * and sometimes (for multiplication results) as
59 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60 * using 128-bit coefficients called 'widelimbs'.
61 * A 4-limb representation is an 'felem';
62 * a 7-widelimb representation is a 'widefelem'.
63 * Even within felems, bits of adjacent limbs overlap, and we don't always
64 * reduce the representations: we ensure that inputs to each felem
65 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66 * and fit into a 128-bit word without overflow. The coefficients are then
67 * again partially reduced to obtain an felem satisfying a_i < 2^57.
68 * We only reduce to the unique minimal representation at the end of the
69 * computation.
70 */
71
72typedef uint64_t limb;
73typedef uint128_t widelimb;
74
75typedef limb felem[4];
76typedef widelimb widefelem[7];
77
78/* Field element represented as a byte arrary.
79 * 28*8 = 224 bits is also the group order size for the elliptic curve,
80 * and we also use this type for scalars for point multiplication.
81 */
82typedef u8 felem_bytearray[28];
83
84static const felem_bytearray nistp224_curve_params[5] = {
85 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
86 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
87 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
88 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
89 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
90 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
91 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
92 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
93 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
94 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
95 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
96 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
97 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
98 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
99 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
100};
101
102/* Precomputed multiples of the standard generator
103 * Points are given in coordinates (X, Y, Z) where Z normally is 1
104 * (0 for the point at infinity).
105 * For each field element, slice a_0 is word 0, etc.
106 *
107 * The table has 2 * 16 elements, starting with the following:
108 * index | bits | point
109 * ------+---------+------------------------------
110 * 0 | 0 0 0 0 | 0G
111 * 1 | 0 0 0 1 | 1G
112 * 2 | 0 0 1 0 | 2^56G
113 * 3 | 0 0 1 1 | (2^56 + 1)G
114 * 4 | 0 1 0 0 | 2^112G
115 * 5 | 0 1 0 1 | (2^112 + 1)G
116 * 6 | 0 1 1 0 | (2^112 + 2^56)G
117 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
118 * 8 | 1 0 0 0 | 2^168G
119 * 9 | 1 0 0 1 | (2^168 + 1)G
120 * 10 | 1 0 1 0 | (2^168 + 2^56)G
121 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
122 * 12 | 1 1 0 0 | (2^168 + 2^112)G
123 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
124 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
125 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
126 * followed by a copy of this with each element multiplied by 2^28.
127 *
128 * The reason for this is so that we can clock bits into four different
129 * locations when doing simple scalar multiplies against the base point,
130 * and then another four locations using the second 16 elements.
131 */
132static const felem gmul[2][16][3] =
133{{{{0, 0, 0, 0},
134 {0, 0, 0, 0},
135 {0, 0, 0, 0}},
136 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
137 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
138 {1, 0, 0, 0}},
139 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
140 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
141 {1, 0, 0, 0}},
142 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
143 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
144 {1, 0, 0, 0}},
145 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
146 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
147 {1, 0, 0, 0}},
148 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
149 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
150 {1, 0, 0, 0}},
151 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
152 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
153 {1, 0, 0, 0}},
154 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
155 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
156 {1, 0, 0, 0}},
157 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
158 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
159 {1, 0, 0, 0}},
160 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
161 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
162 {1, 0, 0, 0}},
163 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
164 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
165 {1, 0, 0, 0}},
166 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
167 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
168 {1, 0, 0, 0}},
169 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
170 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
171 {1, 0, 0, 0}},
172 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
173 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
174 {1, 0, 0, 0}},
175 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
176 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
177 {1, 0, 0, 0}},
178 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
179 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
180 {1, 0, 0, 0}}},
181 {{{0, 0, 0, 0},
182 {0, 0, 0, 0},
183 {0, 0, 0, 0}},
184 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
185 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
186 {1, 0, 0, 0}},
187 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
188 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
189 {1, 0, 0, 0}},
190 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
191 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
192 {1, 0, 0, 0}},
193 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
194 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
195 {1, 0, 0, 0}},
196 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
197 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
198 {1, 0, 0, 0}},
199 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
200 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
201 {1, 0, 0, 0}},
202 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
203 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
204 {1, 0, 0, 0}},
205 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
206 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
207 {1, 0, 0, 0}},
208 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
209 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
210 {1, 0, 0, 0}},
211 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
212 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
213 {1, 0, 0, 0}},
214 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
215 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
216 {1, 0, 0, 0}},
217 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
218 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
219 {1, 0, 0, 0}},
220 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
221 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
222 {1, 0, 0, 0}},
223 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
224 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
225 {1, 0, 0, 0}},
226 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
227 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
228 {1, 0, 0, 0}}}};
229
230/* Precomputation for the group generator. */
231typedef struct {
232 felem g_pre_comp[2][16][3];
233 int references;
234} NISTP224_PRE_COMP;
235
236const EC_METHOD *EC_GFp_nistp224_method(void)
237 {
238 static const EC_METHOD ret = {
239 EC_FLAGS_DEFAULT_OCT,
240 NID_X9_62_prime_field,
241 ec_GFp_nistp224_group_init,
242 ec_GFp_simple_group_finish,
243 ec_GFp_simple_group_clear_finish,
244 ec_GFp_nist_group_copy,
245 ec_GFp_nistp224_group_set_curve,
246 ec_GFp_simple_group_get_curve,
247 ec_GFp_simple_group_get_degree,
248 ec_GFp_simple_group_check_discriminant,
249 ec_GFp_simple_point_init,
250 ec_GFp_simple_point_finish,
251 ec_GFp_simple_point_clear_finish,
252 ec_GFp_simple_point_copy,
253 ec_GFp_simple_point_set_to_infinity,
254 ec_GFp_simple_set_Jprojective_coordinates_GFp,
255 ec_GFp_simple_get_Jprojective_coordinates_GFp,
256 ec_GFp_simple_point_set_affine_coordinates,
257 ec_GFp_nistp224_point_get_affine_coordinates,
258 0 /* point_set_compressed_coordinates */,
259 0 /* point2oct */,
260 0 /* oct2point */,
261 ec_GFp_simple_add,
262 ec_GFp_simple_dbl,
263 ec_GFp_simple_invert,
264 ec_GFp_simple_is_at_infinity,
265 ec_GFp_simple_is_on_curve,
266 ec_GFp_simple_cmp,
267 ec_GFp_simple_make_affine,
268 ec_GFp_simple_points_make_affine,
269 ec_GFp_nistp224_points_mul,
270 ec_GFp_nistp224_precompute_mult,
271 ec_GFp_nistp224_have_precompute_mult,
272 ec_GFp_nist_field_mul,
273 ec_GFp_nist_field_sqr,
274 0 /* field_div */,
275 0 /* field_encode */,
276 0 /* field_decode */,
277 0 /* field_set_to_one */ };
278
279 return &ret;
280 }
281
282/* Helper functions to convert field elements to/from internal representation */
283static void bin28_to_felem(felem out, const u8 in[28])
284 {
285 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
286 out[1] = (*((const uint64_t *)(in+7))) & 0x00ffffffffffffff;
287 out[2] = (*((const uint64_t *)(in+14))) & 0x00ffffffffffffff;
288 out[3] = (*((const uint64_t *)(in+21))) & 0x00ffffffffffffff;
289 }
290
291static void felem_to_bin28(u8 out[28], const felem in)
292 {
293 unsigned i;
294 for (i = 0; i < 7; ++i)
295 {
296 out[i] = in[0]>>(8*i);
297 out[i+7] = in[1]>>(8*i);
298 out[i+14] = in[2]>>(8*i);
299 out[i+21] = in[3]>>(8*i);
300 }
301 }
302
303/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
304static void flip_endian(u8 *out, const u8 *in, unsigned len)
305 {
306 unsigned i;
307 for (i = 0; i < len; ++i)
308 out[i] = in[len-1-i];
309 }
310
311/* From OpenSSL BIGNUM to internal representation */
312static int BN_to_felem(felem out, const BIGNUM *bn)
313 {
314 felem_bytearray b_in;
315 felem_bytearray b_out;
316 unsigned num_bytes;
317
318 /* BN_bn2bin eats leading zeroes */
319 memset(b_out, 0, sizeof b_out);
320 num_bytes = BN_num_bytes(bn);
321 if (num_bytes > sizeof b_out)
322 {
323 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
324 return 0;
325 }
326 if (BN_is_negative(bn))
327 {
328 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
329 return 0;
330 }
331 num_bytes = BN_bn2bin(bn, b_in);
332 flip_endian(b_out, b_in, num_bytes);
333 bin28_to_felem(out, b_out);
334 return 1;
335 }
336
337/* From internal representation to OpenSSL BIGNUM */
338static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
339 {
340 felem_bytearray b_in, b_out;
341 felem_to_bin28(b_in, in);
342 flip_endian(b_out, b_in, sizeof b_out);
343 return BN_bin2bn(b_out, sizeof b_out, out);
344 }
345
346/******************************************************************************/
347/* FIELD OPERATIONS
348 *
349 * Field operations, using the internal representation of field elements.
350 * NB! These operations are specific to our point multiplication and cannot be
351 * expected to be correct in general - e.g., multiplication with a large scalar
352 * will cause an overflow.
353 *
354 */
355
356static void felem_one(felem out)
357 {
358 out[0] = 1;
359 out[1] = 0;
360 out[2] = 0;
361 out[3] = 0;
362 }
363
364static void felem_assign(felem out, const felem in)
365 {
366 out[0] = in[0];
367 out[1] = in[1];
368 out[2] = in[2];
369 out[3] = in[3];
370 }
371
372/* Sum two field elements: out += in */
373static void felem_sum(felem out, const felem in)
374 {
375 out[0] += in[0];
376 out[1] += in[1];
377 out[2] += in[2];
378 out[3] += in[3];
379 }
380
381/* Get negative value: out = -in */
382/* Assumes in[i] < 2^57 */
383static void felem_neg(felem out, const felem in)
384 {
385 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
386 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
387 static const limb two58m42m2 = (((limb) 1) << 58) -
388 (((limb) 1) << 42) - (((limb) 1) << 2);
389
390 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
391 out[0] = two58p2 - in[0];
392 out[1] = two58m42m2 - in[1];
393 out[2] = two58m2 - in[2];
394 out[3] = two58m2 - in[3];
395 }
396
397/* Subtract field elements: out -= in */
398/* Assumes in[i] < 2^57 */
399static void felem_diff(felem out, const felem in)
400 {
401 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
402 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
403 static const limb two58m42m2 = (((limb) 1) << 58) -
404 (((limb) 1) << 42) - (((limb) 1) << 2);
405
406 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
407 out[0] += two58p2;
408 out[1] += two58m42m2;
409 out[2] += two58m2;
410 out[3] += two58m2;
411
412 out[0] -= in[0];
413 out[1] -= in[1];
414 out[2] -= in[2];
415 out[3] -= in[3];
416 }
417
418/* Subtract in unreduced 128-bit mode: out -= in */
419/* Assumes in[i] < 2^119 */
420static void widefelem_diff(widefelem out, const widefelem in)
421 {
422 static const widelimb two120 = ((widelimb) 1) << 120;
423 static const widelimb two120m64 = (((widelimb) 1) << 120) -
424 (((widelimb) 1) << 64);
425 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
426 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
427
428 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
429 out[0] += two120;
430 out[1] += two120m64;
431 out[2] += two120m64;
432 out[3] += two120;
433 out[4] += two120m104m64;
434 out[5] += two120m64;
435 out[6] += two120m64;
436
437 out[0] -= in[0];
438 out[1] -= in[1];
439 out[2] -= in[2];
440 out[3] -= in[3];
441 out[4] -= in[4];
442 out[5] -= in[5];
443 out[6] -= in[6];
444 }
445
446/* Subtract in mixed mode: out128 -= in64 */
447/* in[i] < 2^63 */
448static void felem_diff_128_64(widefelem out, const felem in)
449 {
450 static const widelimb two64p8 = (((widelimb) 1) << 64) +
451 (((widelimb) 1) << 8);
452 static const widelimb two64m8 = (((widelimb) 1) << 64) -
453 (((widelimb) 1) << 8);
454 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
455 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
456
457 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
458 out[0] += two64p8;
459 out[1] += two64m48m8;
460 out[2] += two64m8;
461 out[3] += two64m8;
462
463 out[0] -= in[0];
464 out[1] -= in[1];
465 out[2] -= in[2];
466 out[3] -= in[3];
467 }
468
469/* Multiply a field element by a scalar: out = out * scalar
470 * The scalars we actually use are small, so results fit without overflow */
471static void felem_scalar(felem out, const limb scalar)
472 {
473 out[0] *= scalar;
474 out[1] *= scalar;
475 out[2] *= scalar;
476 out[3] *= scalar;
477 }
478
479/* Multiply an unreduced field element by a scalar: out = out * scalar
480 * The scalars we actually use are small, so results fit without overflow */
481static void widefelem_scalar(widefelem out, const widelimb scalar)
482 {
483 out[0] *= scalar;
484 out[1] *= scalar;
485 out[2] *= scalar;
486 out[3] *= scalar;
487 out[4] *= scalar;
488 out[5] *= scalar;
489 out[6] *= scalar;
490 }
491
492/* Square a field element: out = in^2 */
493static void felem_square(widefelem out, const felem in)
494 {
495 limb tmp0, tmp1, tmp2;
496 tmp0 = 2 * in[0]; tmp1 = 2 * in[1]; tmp2 = 2 * in[2];
497 out[0] = ((widelimb) in[0]) * in[0];
498 out[1] = ((widelimb) in[0]) * tmp1;
499 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500 out[3] = ((widelimb) in[3]) * tmp0 +
501 ((widelimb) in[1]) * tmp2;
502 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503 out[5] = ((widelimb) in[3]) * tmp2;
504 out[6] = ((widelimb) in[3]) * in[3];
505 }
506
507/* Multiply two field elements: out = in1 * in2 */
508static void felem_mul(widefelem out, const felem in1, const felem in2)
509 {
510 out[0] = ((widelimb) in1[0]) * in2[0];
511 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
513 ((widelimb) in1[2]) * in2[0];
514 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
515 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
516 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
517 ((widelimb) in1[3]) * in2[1];
518 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519 out[6] = ((widelimb) in1[3]) * in2[3];
520 }
521
522/* Reduce seven 128-bit coefficients to four 64-bit coefficients.
523 * Requires in[i] < 2^126,
524 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525static void felem_reduce(felem out, const widefelem in)
526 {
527 static const widelimb two127p15 = (((widelimb) 1) << 127) +
528 (((widelimb) 1) << 15);
529 static const widelimb two127m71 = (((widelimb) 1) << 127) -
530 (((widelimb) 1) << 71);
531 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
533 widelimb output[5];
534
535 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536 output[0] = in[0] + two127p15;
537 output[1] = in[1] + two127m71m55;
538 output[2] = in[2] + two127m71;
539 output[3] = in[3];
540 output[4] = in[4];
541
542 /* Eliminate in[4], in[5], in[6] */
543 output[4] += in[6] >> 16;
544 output[3] += (in[6] & 0xffff) << 40;
545 output[2] -= in[6];
546
547 output[3] += in[5] >> 16;
548 output[2] += (in[5] & 0xffff) << 40;
549 output[1] -= in[5];
550
551 output[2] += output[4] >> 16;
552 output[1] += (output[4] & 0xffff) << 40;
553 output[0] -= output[4];
554
555 /* Carry 2 -> 3 -> 4 */
556 output[3] += output[2] >> 56;
557 output[2] &= 0x00ffffffffffffff;
558
559 output[4] = output[3] >> 56;
560 output[3] &= 0x00ffffffffffffff;
561
562 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
563
564 /* Eliminate output[4] */
565 output[2] += output[4] >> 16;
566 /* output[2] < 2^56 + 2^56 = 2^57 */
567 output[1] += (output[4] & 0xffff) << 40;
568 output[0] -= output[4];
569
570 /* Carry 0 -> 1 -> 2 -> 3 */
571 output[1] += output[0] >> 56;
572 out[0] = output[0] & 0x00ffffffffffffff;
573
574 output[2] += output[1] >> 56;
575 /* output[2] < 2^57 + 2^72 */
576 out[1] = output[1] & 0x00ffffffffffffff;
577 output[3] += output[2] >> 56;
578 /* output[3] <= 2^56 + 2^16 */
579 out[2] = output[2] & 0x00ffffffffffffff;
580
581 /* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
582 * out[3] <= 2^56 + 2^16 (due to final carry),
583 * so out < 2*p */
584 out[3] = output[3];
585 }
586
587static void felem_square_reduce(felem out, const felem in)
588 {
589 widefelem tmp;
590 felem_square(tmp, in);
591 felem_reduce(out, tmp);
592 }
593
594static void felem_mul_reduce(felem out, const felem in1, const felem in2)
595 {
596 widefelem tmp;
597 felem_mul(tmp, in1, in2);
598 felem_reduce(out, tmp);
599 }
600
601/* Reduce to unique minimal representation.
602 * Requires 0 <= in < 2*p (always call felem_reduce first) */
603static void felem_contract(felem out, const felem in)
604 {
605 static const int64_t two56 = ((limb) 1) << 56;
606 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
607 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
608 int64_t tmp[4], a;
609 tmp[0] = in[0];
610 tmp[1] = in[1];
611 tmp[2] = in[2];
612 tmp[3] = in[3];
613 /* Case 1: a = 1 iff in >= 2^224 */
614 a = (in[3] >> 56);
615 tmp[0] -= a;
616 tmp[1] += a << 40;
617 tmp[3] &= 0x00ffffffffffffff;
618 /* Case 2: a = 0 iff p <= in < 2^224, i.e.,
619 * the high 128 bits are all 1 and the lower part is non-zero */
620 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
621 (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
622 a &= 0x00ffffffffffffff;
623 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
624 a = (a - 1) >> 63;
625 /* subtract 2^224 - 2^96 + 1 if a is all-one*/
626 tmp[3] &= a ^ 0xffffffffffffffff;
627 tmp[2] &= a ^ 0xffffffffffffffff;
628 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
629 tmp[0] -= 1 & a;
630
631 /* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must
632 * be non-zero, so we only need one step */
633 a = tmp[0] >> 63;
634 tmp[0] += two56 & a;
635 tmp[1] -= 1 & a;
636
637 /* carry 1 -> 2 -> 3 */
638 tmp[2] += tmp[1] >> 56;
639 tmp[1] &= 0x00ffffffffffffff;
640
641 tmp[3] += tmp[2] >> 56;
642 tmp[2] &= 0x00ffffffffffffff;
643
644 /* Now 0 <= out < p */
645 out[0] = tmp[0];
646 out[1] = tmp[1];
647 out[2] = tmp[2];
648 out[3] = tmp[3];
649 }
650
651/* Zero-check: returns 1 if input is 0, and 0 otherwise.
652 * We know that field elements are reduced to in < 2^225,
653 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
654 * and 2^225 - 2^97 + 2 */
655static limb felem_is_zero(const felem in)
656 {
657 limb zero, two224m96p1, two225m97p2;
658
659 zero = in[0] | in[1] | in[2] | in[3];
660 zero = (((int64_t)(zero) - 1) >> 63) & 1;
661 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
662 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
663 two224m96p1 = (((int64_t)(two224m96p1) - 1) >> 63) & 1;
664 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
665 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
666 two225m97p2 = (((int64_t)(two225m97p2) - 1) >> 63) & 1;
667 return (zero | two224m96p1 | two225m97p2);
668 }
669
670static limb felem_is_zero_int(const felem in)
671 {
672 return (int) (felem_is_zero(in) & ((limb)1));
673 }
674
675/* Invert a field element */
676/* Computation chain copied from djb's code */
677static void felem_inv(felem out, const felem in)
678 {
679 felem ftmp, ftmp2, ftmp3, ftmp4;
680 widefelem tmp;
681 unsigned i;
682
683 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2 */
684 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 1 */
685 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2 */
686 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 1 */
687 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
688 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
689 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
690 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 1 */
691 felem_square(tmp, ftmp); felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
692 for (i = 0; i < 5; ++i) /* 2^12 - 2^6 */
693 {
694 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
695 }
696 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
697 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
698 for (i = 0; i < 11; ++i) /* 2^24 - 2^12 */
699 {
700 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
701 }
702 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
703 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
704 for (i = 0; i < 23; ++i) /* 2^48 - 2^24 */
705 {
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp);
707 }
708 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
709 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
710 for (i = 0; i < 47; ++i) /* 2^96 - 2^48 */
711 {
712 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
713 }
714 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
715 felem_square(tmp, ftmp3); felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
716 for (i = 0; i < 23; ++i) /* 2^120 - 2^24 */
717 {
718 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp);
719 }
720 felem_mul(tmp, ftmp2, ftmp4); felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
721 for (i = 0; i < 6; ++i) /* 2^126 - 2^6 */
722 {
723 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
724 }
725 felem_mul(tmp, ftmp2, ftmp); felem_reduce(ftmp, tmp); /* 2^126 - 1 */
726 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^127 - 2 */
727 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^127 - 1 */
728 for (i = 0; i < 97; ++i) /* 2^224 - 2^97 */
729 {
730 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
731 }
732 felem_mul(tmp, ftmp, ftmp3); felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
733 }
734
735/* Copy in constant time:
736 * if icopy == 1, copy in to out,
737 * if icopy == 0, copy out to itself. */
738static void
739copy_conditional(felem out, const felem in, limb icopy)
740 {
741 unsigned i;
742 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
743 const limb copy = -icopy;
744 for (i = 0; i < 4; ++i)
745 {
746 const limb tmp = copy & (in[i] ^ out[i]);
747 out[i] ^= tmp;
748 }
749 }
750
751/******************************************************************************/
752/* ELLIPTIC CURVE POINT OPERATIONS
753 *
754 * Points are represented in Jacobian projective coordinates:
755 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
756 * or to the point at infinity if Z == 0.
757 *
758 */
759
760/* Double an elliptic curve point:
761 * (X', Y', Z') = 2 * (X, Y, Z), where
762 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
763 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
764 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
765 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
766 * while x_out == y_in is not (maybe this works, but it's not tested). */
767static void
768point_double(felem x_out, felem y_out, felem z_out,
769 const felem x_in, const felem y_in, const felem z_in)
770 {
771 widefelem tmp, tmp2;
772 felem delta, gamma, beta, alpha, ftmp, ftmp2;
773
774 felem_assign(ftmp, x_in);
775 felem_assign(ftmp2, x_in);
776
777 /* delta = z^2 */
778 felem_square(tmp, z_in);
779 felem_reduce(delta, tmp);
780
781 /* gamma = y^2 */
782 felem_square(tmp, y_in);
783 felem_reduce(gamma, tmp);
784
785 /* beta = x*gamma */
786 felem_mul(tmp, x_in, gamma);
787 felem_reduce(beta, tmp);
788
789 /* alpha = 3*(x-delta)*(x+delta) */
790 felem_diff(ftmp, delta);
791 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
792 felem_sum(ftmp2, delta);
793 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
794 felem_scalar(ftmp2, 3);
795 /* ftmp2[i] < 3 * 2^58 < 2^60 */
796 felem_mul(tmp, ftmp, ftmp2);
797 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
798 felem_reduce(alpha, tmp);
799
800 /* x' = alpha^2 - 8*beta */
801 felem_square(tmp, alpha);
802 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
803 felem_assign(ftmp, beta);
804 felem_scalar(ftmp, 8);
805 /* ftmp[i] < 8 * 2^57 = 2^60 */
806 felem_diff_128_64(tmp, ftmp);
807 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
808 felem_reduce(x_out, tmp);
809
810 /* z' = (y + z)^2 - gamma - delta */
811 felem_sum(delta, gamma);
812 /* delta[i] < 2^57 + 2^57 = 2^58 */
813 felem_assign(ftmp, y_in);
814 felem_sum(ftmp, z_in);
815 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
816 felem_square(tmp, ftmp);
817 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
818 felem_diff_128_64(tmp, delta);
819 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
820 felem_reduce(z_out, tmp);
821
822 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
823 felem_scalar(beta, 4);
824 /* beta[i] < 4 * 2^57 = 2^59 */
825 felem_diff(beta, x_out);
826 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
827 felem_mul(tmp, alpha, beta);
828 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
829 felem_square(tmp2, gamma);
830 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
831 widefelem_scalar(tmp2, 8);
832 /* tmp2[i] < 8 * 2^116 = 2^119 */
833 widefelem_diff(tmp, tmp2);
834 /* tmp[i] < 2^119 + 2^120 < 2^121 */
835 felem_reduce(y_out, tmp);
836 }
837
838/* Add two elliptic curve points:
839 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
840 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
841 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
842 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
843 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
844 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
845 *
846 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
847 */
848
849/* This function is not entirely constant-time:
850 * it includes a branch for checking whether the two input points are equal,
851 * (while not equal to the point at infinity).
852 * This case never happens during single point multiplication,
853 * so there is no timing leak for ECDH or ECDSA signing. */
854static void point_add(felem x3, felem y3, felem z3,
855 const felem x1, const felem y1, const felem z1,
856 const int mixed, const felem x2, const felem y2, const felem z2)
857 {
858 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
859 widefelem tmp, tmp2;
860 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
861
862 if (!mixed)
863 {
864 /* ftmp2 = z2^2 */
865 felem_square(tmp, z2);
866 felem_reduce(ftmp2, tmp);
867
868 /* ftmp4 = z2^3 */
869 felem_mul(tmp, ftmp2, z2);
870 felem_reduce(ftmp4, tmp);
871
872 /* ftmp4 = z2^3*y1 */
873 felem_mul(tmp2, ftmp4, y1);
874 felem_reduce(ftmp4, tmp2);
875
876 /* ftmp2 = z2^2*x1 */
877 felem_mul(tmp2, ftmp2, x1);
878 felem_reduce(ftmp2, tmp2);
879 }
880 else
881 {
882 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
883
884 /* ftmp4 = z2^3*y1 */
885 felem_assign(ftmp4, y1);
886
887 /* ftmp2 = z2^2*x1 */
888 felem_assign(ftmp2, x1);
889 }
890
891 /* ftmp = z1^2 */
892 felem_square(tmp, z1);
893 felem_reduce(ftmp, tmp);
894
895 /* ftmp3 = z1^3 */
896 felem_mul(tmp, ftmp, z1);
897 felem_reduce(ftmp3, tmp);
898
899 /* tmp = z1^3*y2 */
900 felem_mul(tmp, ftmp3, y2);
901 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
902
903 /* ftmp3 = z1^3*y2 - z2^3*y1 */
904 felem_diff_128_64(tmp, ftmp4);
905 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
906 felem_reduce(ftmp3, tmp);
907
908 /* tmp = z1^2*x2 */
909 felem_mul(tmp, ftmp, x2);
910 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
911
912 /* ftmp = z1^2*x2 - z2^2*x1 */
913 felem_diff_128_64(tmp, ftmp2);
914 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
915 felem_reduce(ftmp, tmp);
916
917 /* the formulae are incorrect if the points are equal
918 * so we check for this and do doubling if this happens */
919 x_equal = felem_is_zero(ftmp);
920 y_equal = felem_is_zero(ftmp3);
921 z1_is_zero = felem_is_zero(z1);
922 z2_is_zero = felem_is_zero(z2);
923 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
924 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
925 {
926 point_double(x3, y3, z3, x1, y1, z1);
927 return;
928 }
929
930 /* ftmp5 = z1*z2 */
931 if (!mixed)
932 {
933 felem_mul(tmp, z1, z2);
934 felem_reduce(ftmp5, tmp);
935 }
936 else
937 {
938 /* special case z2 = 0 is handled later */
939 felem_assign(ftmp5, z1);
940 }
941
942 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
943 felem_mul(tmp, ftmp, ftmp5);
944 felem_reduce(z_out, tmp);
945
946 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
947 felem_assign(ftmp5, ftmp);
948 felem_square(tmp, ftmp);
949 felem_reduce(ftmp, tmp);
950
951 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
952 felem_mul(tmp, ftmp, ftmp5);
953 felem_reduce(ftmp5, tmp);
954
955 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
956 felem_mul(tmp, ftmp2, ftmp);
957 felem_reduce(ftmp2, tmp);
958
959 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
960 felem_mul(tmp, ftmp4, ftmp5);
961 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
962
963 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
964 felem_square(tmp2, ftmp3);
965 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
966
967 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
968 felem_diff_128_64(tmp2, ftmp5);
969 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
970
971 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
972 felem_assign(ftmp5, ftmp2);
973 felem_scalar(ftmp5, 2);
974 /* ftmp5[i] < 2 * 2^57 = 2^58 */
975
976 /* x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
977 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
978 felem_diff_128_64(tmp2, ftmp5);
979 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
980 felem_reduce(x_out, tmp2);
981
982 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
983 felem_diff(ftmp2, x_out);
984 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
985
986 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
987 felem_mul(tmp2, ftmp3, ftmp2);
988 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
989
990 /* y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
991 z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
992 widefelem_diff(tmp2, tmp);
993 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
994 felem_reduce(y_out, tmp2);
995
996 /* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
997 * the point at infinity, so we need to check for this separately */
998
999 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1000 copy_conditional(x_out, x2, z1_is_zero);
1001 copy_conditional(x_out, x1, z2_is_zero);
1002 copy_conditional(y_out, y2, z1_is_zero);
1003 copy_conditional(y_out, y1, z2_is_zero);
1004 copy_conditional(z_out, z2, z1_is_zero);
1005 copy_conditional(z_out, z1, z2_is_zero);
1006 felem_assign(x3, x_out);
1007 felem_assign(y3, y_out);
1008 felem_assign(z3, z_out);
1009 }
1010
1011/* select_point selects the |idx|th point from a precomputation table and
1012 * copies it to out. */
1013static void select_point(const u64 idx, unsigned int size, const felem pre_comp[/*size*/][3], felem out[3])
1014 {
1015 unsigned i, j;
1016 limb *outlimbs = &out[0][0];
1017 memset(outlimbs, 0, 3 * sizeof(felem));
1018
1019 for (i = 0; i < size; i++)
1020 {
1021 const limb *inlimbs = &pre_comp[i][0][0];
1022 u64 mask = i ^ idx;
1023 mask |= mask >> 4;
1024 mask |= mask >> 2;
1025 mask |= mask >> 1;
1026 mask &= 1;
1027 mask--;
1028 for (j = 0; j < 4 * 3; j++)
1029 outlimbs[j] |= inlimbs[j] & mask;
1030 }
1031 }
1032
1033/* get_bit returns the |i|th bit in |in| */
1034static char get_bit(const felem_bytearray in, unsigned i)
1035 {
1036 if (i >= 224)
1037 return 0;
1038 return (in[i >> 3] >> (i & 7)) & 1;
1039 }
1040
1041/* Interleaved point multiplication using precomputed point multiples:
1042 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1043 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1044 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1045 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1046static void batch_mul(felem x_out, felem y_out, felem z_out,
1047 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1048 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1049 {
1050 int i, skip;
1051 unsigned num;
1052 unsigned gen_mul = (g_scalar != NULL);
1053 felem nq[3], tmp[4];
1054 u64 bits;
1055 u8 sign, digit;
1056
1057 /* set nq to the point at infinity */
1058 memset(nq, 0, 3 * sizeof(felem));
1059
1060 /* Loop over all scalars msb-to-lsb, interleaving additions
1061 * of multiples of the generator (two in each of the last 28 rounds)
1062 * and additions of other points multiples (every 5th round).
1063 */
1064 skip = 1; /* save two point operations in the first round */
1065 for (i = (num_points ? 220 : 27); i >= 0; --i)
1066 {
1067 /* double */
1068 if (!skip)
1069 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1070
1071 /* add multiples of the generator */
1072 if (gen_mul && (i <= 27))
1073 {
1074 /* first, look 28 bits upwards */
1075 bits = get_bit(g_scalar, i + 196) << 3;
1076 bits |= get_bit(g_scalar, i + 140) << 2;
1077 bits |= get_bit(g_scalar, i + 84) << 1;
1078 bits |= get_bit(g_scalar, i + 28);
1079 /* select the point to add, in constant time */
1080 select_point(bits, 16, g_pre_comp[1], tmp);
1081
1082 if (!skip)
1083 {
1084 point_add(nq[0], nq[1], nq[2],
1085 nq[0], nq[1], nq[2],
1086 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1087 }
1088 else
1089 {
1090 memcpy(nq, tmp, 3 * sizeof(felem));
1091 skip = 0;
1092 }
1093
1094 /* second, look at the current position */
1095 bits = get_bit(g_scalar, i + 168) << 3;
1096 bits |= get_bit(g_scalar, i + 112) << 2;
1097 bits |= get_bit(g_scalar, i + 56) << 1;
1098 bits |= get_bit(g_scalar, i);
1099 /* select the point to add, in constant time */
1100 select_point(bits, 16, g_pre_comp[0], tmp);
1101 point_add(nq[0], nq[1], nq[2],
1102 nq[0], nq[1], nq[2],
1103 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1104 }
1105
1106 /* do other additions every 5 doublings */
1107 if (num_points && (i % 5 == 0))
1108 {
1109 /* loop over all scalars */
1110 for (num = 0; num < num_points; ++num)
1111 {
1112 bits = get_bit(scalars[num], i + 4) << 5;
1113 bits |= get_bit(scalars[num], i + 3) << 4;
1114 bits |= get_bit(scalars[num], i + 2) << 3;
1115 bits |= get_bit(scalars[num], i + 1) << 2;
1116 bits |= get_bit(scalars[num], i) << 1;
1117 bits |= get_bit(scalars[num], i - 1);
1118 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1119
1120 /* select the point to add or subtract */
1121 select_point(digit, 17, pre_comp[num], tmp);
1122 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1123 copy_conditional(tmp[1], tmp[3], sign);
1124
1125 if (!skip)
1126 {
1127 point_add(nq[0], nq[1], nq[2],
1128 nq[0], nq[1], nq[2],
1129 mixed, tmp[0], tmp[1], tmp[2]);
1130 }
1131 else
1132 {
1133 memcpy(nq, tmp, 3 * sizeof(felem));
1134 skip = 0;
1135 }
1136 }
1137 }
1138 }
1139 felem_assign(x_out, nq[0]);
1140 felem_assign(y_out, nq[1]);
1141 felem_assign(z_out, nq[2]);
1142 }
1143
1144/******************************************************************************/
1145/* FUNCTIONS TO MANAGE PRECOMPUTATION
1146 */
1147
1148static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1149 {
1150 NISTP224_PRE_COMP *ret = NULL;
1151 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1152 if (!ret)
1153 {
1154 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1155 return ret;
1156 }
1157 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1158 ret->references = 1;
1159 return ret;
1160 }
1161
1162static void *nistp224_pre_comp_dup(void *src_)
1163 {
1164 NISTP224_PRE_COMP *src = src_;
1165
1166 /* no need to actually copy, these objects never change! */
1167 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1168
1169 return src_;
1170 }
1171
1172static void nistp224_pre_comp_free(void *pre_)
1173 {
1174 int i;
1175 NISTP224_PRE_COMP *pre = pre_;
1176
1177 if (!pre)
1178 return;
1179
1180 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1181 if (i > 0)
1182 return;
1183
1184 OPENSSL_free(pre);
1185 }
1186
1187static void nistp224_pre_comp_clear_free(void *pre_)
1188 {
1189 int i;
1190 NISTP224_PRE_COMP *pre = pre_;
1191
1192 if (!pre)
1193 return;
1194
1195 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1196 if (i > 0)
1197 return;
1198
1199 OPENSSL_cleanse(pre, sizeof *pre);
1200 OPENSSL_free(pre);
1201 }
1202
1203/******************************************************************************/
1204/* OPENSSL EC_METHOD FUNCTIONS
1205 */
1206
1207int ec_GFp_nistp224_group_init(EC_GROUP *group)
1208 {
1209 int ret;
1210 ret = ec_GFp_simple_group_init(group);
1211 group->a_is_minus3 = 1;
1212 return ret;
1213 }
1214
1215int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1216 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1217 {
1218 int ret = 0;
1219 BN_CTX *new_ctx = NULL;
1220 BIGNUM *curve_p, *curve_a, *curve_b;
1221
1222 if (ctx == NULL)
1223 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1224 BN_CTX_start(ctx);
1225 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1226 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1227 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1228 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1229 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1230 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1231 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1232 (BN_cmp(curve_b, b)))
1233 {
1234 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1235 EC_R_WRONG_CURVE_PARAMETERS);
1236 goto err;
1237 }
1238 group->field_mod_func = BN_nist_mod_224;
1239 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1240err:
1241 BN_CTX_end(ctx);
1242 if (new_ctx != NULL)
1243 BN_CTX_free(new_ctx);
1244 return ret;
1245 }
1246
1247/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1248 * (X', Y') = (X/Z^2, Y/Z^3) */
1249int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1250 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1251 {
1252 felem z1, z2, x_in, y_in, x_out, y_out;
1253 widefelem tmp;
1254
1255 if (EC_POINT_is_at_infinity(group, point))
1256 {
1257 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1258 EC_R_POINT_AT_INFINITY);
1259 return 0;
1260 }
1261 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1262 (!BN_to_felem(z1, &point->Z))) return 0;
1263 felem_inv(z2, z1);
1264 felem_square(tmp, z2); felem_reduce(z1, tmp);
1265 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1266 felem_contract(x_out, x_in);
1267 if (x != NULL)
1268 {
1269 if (!felem_to_BN(x, x_out)) {
1270 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1271 ERR_R_BN_LIB);
1272 return 0;
1273 }
1274 }
1275 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1276 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1277 felem_contract(y_out, y_in);
1278 if (y != NULL)
1279 {
1280 if (!felem_to_BN(y, y_out)) {
1281 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1282 ERR_R_BN_LIB);
1283 return 0;
1284 }
1285 }
1286 return 1;
1287 }
1288
1289static void make_points_affine(size_t num, felem points[/*num*/][3], felem tmp_felems[/*num+1*/])
1290 {
1291 /* Runs in constant time, unless an input is the point at infinity
1292 * (which normally shouldn't happen). */
1293 ec_GFp_nistp_points_make_affine_internal(
1294 num,
1295 points,
1296 sizeof(felem),
1297 tmp_felems,
1298 (void (*)(void *)) felem_one,
1299 (int (*)(const void *)) felem_is_zero_int,
1300 (void (*)(void *, const void *)) felem_assign,
1301 (void (*)(void *, const void *)) felem_square_reduce,
1302 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1303 (void (*)(void *, const void *)) felem_inv,
1304 (void (*)(void *, const void *)) felem_contract);
1305 }
1306
1307/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1308 * Result is stored in r (r can equal one of the inputs). */
1309int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1310 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1311 const BIGNUM *scalars[], BN_CTX *ctx)
1312 {
1313 int ret = 0;
1314 int j;
1315 unsigned i;
1316 int mixed = 0;
1317 BN_CTX *new_ctx = NULL;
1318 BIGNUM *x, *y, *z, *tmp_scalar;
1319 felem_bytearray g_secret;
1320 felem_bytearray *secrets = NULL;
1321 felem (*pre_comp)[17][3] = NULL;
1322 felem *tmp_felems = NULL;
1323 felem_bytearray tmp;
1324 unsigned num_bytes;
1325 int have_pre_comp = 0;
1326 size_t num_points = num;
1327 felem x_in, y_in, z_in, x_out, y_out, z_out;
1328 NISTP224_PRE_COMP *pre = NULL;
1329 const felem (*g_pre_comp)[16][3] = NULL;
1330 EC_POINT *generator = NULL;
1331 const EC_POINT *p = NULL;
1332 const BIGNUM *p_scalar = NULL;
1333
1334 if (ctx == NULL)
1335 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1336 BN_CTX_start(ctx);
1337 if (((x = BN_CTX_get(ctx)) == NULL) ||
1338 ((y = BN_CTX_get(ctx)) == NULL) ||
1339 ((z = BN_CTX_get(ctx)) == NULL) ||
1340 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1341 goto err;
1342
1343 if (scalar != NULL)
1344 {
1345 pre = EC_EX_DATA_get_data(group->extra_data,
1346 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1347 nistp224_pre_comp_clear_free);
1348 if (pre)
1349 /* we have precomputation, try to use it */
1350 g_pre_comp = (const felem (*)[16][3]) pre->g_pre_comp;
1351 else
1352 /* try to use the standard precomputation */
1353 g_pre_comp = &gmul[0];
1354 generator = EC_POINT_new(group);
1355 if (generator == NULL)
1356 goto err;
1357 /* get the generator from precomputation */
1358 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1359 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1360 !felem_to_BN(z, g_pre_comp[0][1][2]))
1361 {
1362 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1363 goto err;
1364 }
1365 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1366 generator, x, y, z, ctx))
1367 goto err;
1368 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1369 /* precomputation matches generator */
1370 have_pre_comp = 1;
1371 else
1372 /* we don't have valid precomputation:
1373 * treat the generator as a random point */
1374 num_points = num_points + 1;
1375 }
1376
1377 if (num_points > 0)
1378 {
1379 if (num_points >= 3)
1380 {
1381 /* unless we precompute multiples for just one or two points,
1382 * converting those into affine form is time well spent */
1383 mixed = 1;
1384 }
1385 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1386 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1387 if (mixed)
1388 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1389 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1390 {
1391 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1392 goto err;
1393 }
1394
1395 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1396 * i.e., they contribute nothing to the linear combination */
1397 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1398 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1399 for (i = 0; i < num_points; ++i)
1400 {
1401 if (i == num)
1402 /* the generator */
1403 {
1404 p = EC_GROUP_get0_generator(group);
1405 p_scalar = scalar;
1406 }
1407 else
1408 /* the i^th point */
1409 {
1410 p = points[i];
1411 p_scalar = scalars[i];
1412 }
1413 if ((p_scalar != NULL) && (p != NULL))
1414 {
1415 /* reduce scalar to 0 <= scalar < 2^224 */
1416 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar)))
1417 {
1418 /* this is an unusual input, and we don't guarantee
1419 * constant-timeness */
1420 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1421 {
1422 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1423 goto err;
1424 }
1425 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1426 }
1427 else
1428 num_bytes = BN_bn2bin(p_scalar, tmp);
1429 flip_endian(secrets[i], tmp, num_bytes);
1430 /* precompute multiples */
1431 if ((!BN_to_felem(x_out, &p->X)) ||
1432 (!BN_to_felem(y_out, &p->Y)) ||
1433 (!BN_to_felem(z_out, &p->Z))) goto err;
1434 felem_assign(pre_comp[i][1][0], x_out);
1435 felem_assign(pre_comp[i][1][1], y_out);
1436 felem_assign(pre_comp[i][1][2], z_out);
1437 for (j = 2; j <= 16; ++j)
1438 {
1439 if (j & 1)
1440 {
1441 point_add(
1442 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1443 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1444 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1445 }
1446 else
1447 {
1448 point_double(
1449 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1450 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1451 }
1452 }
1453 }
1454 }
1455 if (mixed)
1456 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1457 }
1458
1459 /* the scalar for the generator */
1460 if ((scalar != NULL) && (have_pre_comp))
1461 {
1462 memset(g_secret, 0, sizeof g_secret);
1463 /* reduce scalar to 0 <= scalar < 2^224 */
1464 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar)))
1465 {
1466 /* this is an unusual input, and we don't guarantee
1467 * constant-timeness */
1468 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1469 {
1470 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471 goto err;
1472 }
1473 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1474 }
1475 else
1476 num_bytes = BN_bn2bin(scalar, tmp);
1477 flip_endian(g_secret, tmp, num_bytes);
1478 /* do the multiplication with generator precomputation*/
1479 batch_mul(x_out, y_out, z_out,
1480 (const felem_bytearray (*)) secrets, num_points,
1481 g_secret,
1482 mixed, (const felem (*)[17][3]) pre_comp,
1483 g_pre_comp);
1484 }
1485 else
1486 /* do the multiplication without generator precomputation */
1487 batch_mul(x_out, y_out, z_out,
1488 (const felem_bytearray (*)) secrets, num_points,
1489 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1490 /* reduce the output to its unique minimal representation */
1491 felem_contract(x_in, x_out);
1492 felem_contract(y_in, y_out);
1493 felem_contract(z_in, z_out);
1494 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1495 (!felem_to_BN(z, z_in)))
1496 {
1497 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1498 goto err;
1499 }
1500 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1501
1502err:
1503 BN_CTX_end(ctx);
1504 if (generator != NULL)
1505 EC_POINT_free(generator);
1506 if (new_ctx != NULL)
1507 BN_CTX_free(new_ctx);
1508 if (secrets != NULL)
1509 OPENSSL_free(secrets);
1510 if (pre_comp != NULL)
1511 OPENSSL_free(pre_comp);
1512 if (tmp_felems != NULL)
1513 OPENSSL_free(tmp_felems);
1514 return ret;
1515 }
1516
1517int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1518 {
1519 int ret = 0;
1520 NISTP224_PRE_COMP *pre = NULL;
1521 int i, j;
1522 BN_CTX *new_ctx = NULL;
1523 BIGNUM *x, *y;
1524 EC_POINT *generator = NULL;
1525 felem tmp_felems[32];
1526
1527 /* throw away old precomputation */
1528 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1529 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1530 if (ctx == NULL)
1531 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1532 BN_CTX_start(ctx);
1533 if (((x = BN_CTX_get(ctx)) == NULL) ||
1534 ((y = BN_CTX_get(ctx)) == NULL))
1535 goto err;
1536 /* get the generator */
1537 if (group->generator == NULL) goto err;
1538 generator = EC_POINT_new(group);
1539 if (generator == NULL)
1540 goto err;
1541 BN_bin2bn(nistp224_curve_params[3], sizeof (felem_bytearray), x);
1542 BN_bin2bn(nistp224_curve_params[4], sizeof (felem_bytearray), y);
1543 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1544 goto err;
1545 if ((pre = nistp224_pre_comp_new()) == NULL)
1546 goto err;
1547 /* if the generator is the standard one, use built-in precomputation */
1548 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1549 {
1550 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1551 ret = 1;
1552 goto err;
1553 }
1554 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1555 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1556 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1557 goto err;
1558 /* compute 2^56*G, 2^112*G, 2^168*G for the first table,
1559 * 2^28*G, 2^84*G, 2^140*G, 2^196*G for the second one
1560 */
1561 for (i = 1; i <= 8; i <<= 1)
1562 {
1563 point_double(
1564 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1565 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1566 for (j = 0; j < 27; ++j)
1567 {
1568 point_double(
1569 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1570 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1571 }
1572 if (i == 8)
1573 break;
1574 point_double(
1575 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1576 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1577 for (j = 0; j < 27; ++j)
1578 {
1579 point_double(
1580 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
1581 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
1582 }
1583 }
1584 for (i = 0; i < 2; i++)
1585 {
1586 /* g_pre_comp[i][0] is the point at infinity */
1587 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1588 /* the remaining multiples */
1589 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1590 point_add(
1591 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1592 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1593 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1594 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1595 pre->g_pre_comp[i][2][2]);
1596 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1597 point_add(
1598 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1599 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1600 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1601 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1602 pre->g_pre_comp[i][2][2]);
1603 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1604 point_add(
1605 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1606 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1607 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1608 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1609 pre->g_pre_comp[i][4][2]);
1610 /* 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G */
1611 point_add(
1612 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1613 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1614 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1615 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1616 pre->g_pre_comp[i][2][2]);
1617 for (j = 1; j < 8; ++j)
1618 {
1619 /* odd multiples: add G resp. 2^28*G */
1620 point_add(
1621 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1],
1622 pre->g_pre_comp[i][2*j+1][2], pre->g_pre_comp[i][2*j][0],
1623 pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
1624 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1625 pre->g_pre_comp[i][1][2]);
1626 }
1627 }
1628 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1629
1630 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1631 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1632 goto err;
1633 ret = 1;
1634 pre = NULL;
1635 err:
1636 BN_CTX_end(ctx);
1637 if (generator != NULL)
1638 EC_POINT_free(generator);
1639 if (new_ctx != NULL)
1640 BN_CTX_free(new_ctx);
1641 if (pre)
1642 nistp224_pre_comp_free(pre);
1643 return ret;
1644 }
1645
1646int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1647 {
1648 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1649 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1650 != NULL)
1651 return 1;
1652 else
1653 return 0;
1654 }
1655
1656#else
1657static void *dummy=&dummy;
1658#endif
diff --git a/src/lib/libssl/src/crypto/ec/ecp_nistp256.c b/src/lib/libssl/src/crypto/ec/ecp_nistp256.c
new file mode 100644
index 0000000000..4bc0f5dce0
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ecp_nistp256.c
@@ -0,0 +1,2171 @@
1/* crypto/ec/ecp_nistp256.c */
2/*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29#include <openssl/opensslconf.h>
30#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32#ifndef OPENSSL_SYS_VMS
33#include <stdint.h>
34#else
35#include <inttypes.h>
36#endif
37
38#include <string.h>
39#include <openssl/err.h>
40#include "ec_lcl.h"
41
42#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45 typedef __int128_t int128_t;
46#else
47 #error "Need GCC 3.1 or later to define type uint128_t"
48#endif
49
50typedef uint8_t u8;
51typedef uint32_t u32;
52typedef uint64_t u64;
53typedef int64_t s64;
54
55/* The underlying field.
56 *
57 * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element
58 * of this field into 32 bytes. We call this an felem_bytearray. */
59
60typedef u8 felem_bytearray[32];
61
62/* These are the parameters of P256, taken from FIPS 186-3, page 86. These
63 * values are big-endian. */
64static const felem_bytearray nistp256_curve_params[5] = {
65 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
69 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
71 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
73 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
74 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
75 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
76 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
77 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
78 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
79 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
80 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
81 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
82 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
83 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
84 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
85};
86
87/* The representation of field elements.
88 * ------------------------------------
89 *
90 * We represent field elements with either four 128-bit values, eight 128-bit
91 * values, or four 64-bit values. The field element represented is:
92 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
93 * or:
94 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
95 *
96 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
97 * apart, but are 128-bits wide, the most significant bits of each limb overlap
98 * with the least significant bits of the next.
99 *
100 * A field element with four limbs is an 'felem'. One with eight limbs is a
101 * 'longfelem'
102 *
103 * A field element with four, 64-bit values is called a 'smallfelem'. Small
104 * values are used as intermediate values before multiplication.
105 */
106
107#define NLIMBS 4
108
109typedef uint128_t limb;
110typedef limb felem[NLIMBS];
111typedef limb longfelem[NLIMBS * 2];
112typedef u64 smallfelem[NLIMBS];
113
114/* This is the value of the prime as four 64-bit words, little-endian. */
115static const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
116static const limb bottom32bits = 0xffffffff;
117static const u64 bottom63bits = 0x7ffffffffffffffful;
118
119/* bin32_to_felem takes a little-endian byte array and converts it into felem
120 * form. This assumes that the CPU is little-endian. */
121static void bin32_to_felem(felem out, const u8 in[32])
122 {
123 out[0] = *((u64*) &in[0]);
124 out[1] = *((u64*) &in[8]);
125 out[2] = *((u64*) &in[16]);
126 out[3] = *((u64*) &in[24]);
127 }
128
129/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian,
130 * 32 byte array. This assumes that the CPU is little-endian. */
131static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
132 {
133 *((u64*) &out[0]) = in[0];
134 *((u64*) &out[8]) = in[1];
135 *((u64*) &out[16]) = in[2];
136 *((u64*) &out[24]) = in[3];
137 }
138
139/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
140static void flip_endian(u8 *out, const u8 *in, unsigned len)
141 {
142 unsigned i;
143 for (i = 0; i < len; ++i)
144 out[i] = in[len-1-i];
145 }
146
147/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
148static int BN_to_felem(felem out, const BIGNUM *bn)
149 {
150 felem_bytearray b_in;
151 felem_bytearray b_out;
152 unsigned num_bytes;
153
154 /* BN_bn2bin eats leading zeroes */
155 memset(b_out, 0, sizeof b_out);
156 num_bytes = BN_num_bytes(bn);
157 if (num_bytes > sizeof b_out)
158 {
159 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
160 return 0;
161 }
162 if (BN_is_negative(bn))
163 {
164 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
165 return 0;
166 }
167 num_bytes = BN_bn2bin(bn, b_in);
168 flip_endian(b_out, b_in, num_bytes);
169 bin32_to_felem(out, b_out);
170 return 1;
171 }
172
173/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
174static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
175 {
176 felem_bytearray b_in, b_out;
177 smallfelem_to_bin32(b_in, in);
178 flip_endian(b_out, b_in, sizeof b_out);
179 return BN_bin2bn(b_out, sizeof b_out, out);
180 }
181
182
183/* Field operations
184 * ---------------- */
185
186static void smallfelem_one(smallfelem out)
187 {
188 out[0] = 1;
189 out[1] = 0;
190 out[2] = 0;
191 out[3] = 0;
192 }
193
194static void smallfelem_assign(smallfelem out, const smallfelem in)
195 {
196 out[0] = in[0];
197 out[1] = in[1];
198 out[2] = in[2];
199 out[3] = in[3];
200 }
201
202static void felem_assign(felem out, const felem in)
203 {
204 out[0] = in[0];
205 out[1] = in[1];
206 out[2] = in[2];
207 out[3] = in[3];
208 }
209
210/* felem_sum sets out = out + in. */
211static void felem_sum(felem out, const felem in)
212 {
213 out[0] += in[0];
214 out[1] += in[1];
215 out[2] += in[2];
216 out[3] += in[3];
217 }
218
219/* felem_small_sum sets out = out + in. */
220static void felem_small_sum(felem out, const smallfelem in)
221 {
222 out[0] += in[0];
223 out[1] += in[1];
224 out[2] += in[2];
225 out[3] += in[3];
226 }
227
228/* felem_scalar sets out = out * scalar */
229static void felem_scalar(felem out, const u64 scalar)
230 {
231 out[0] *= scalar;
232 out[1] *= scalar;
233 out[2] *= scalar;
234 out[3] *= scalar;
235 }
236
237/* longfelem_scalar sets out = out * scalar */
238static void longfelem_scalar(longfelem out, const u64 scalar)
239 {
240 out[0] *= scalar;
241 out[1] *= scalar;
242 out[2] *= scalar;
243 out[3] *= scalar;
244 out[4] *= scalar;
245 out[5] *= scalar;
246 out[6] *= scalar;
247 out[7] *= scalar;
248 }
249
250#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
251#define two105 (((limb)1) << 105)
252#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
253
254/* zero105 is 0 mod p */
255static const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 };
256
257/* smallfelem_neg sets |out| to |-small|
258 * On exit:
259 * out[i] < out[i] + 2^105
260 */
261static void smallfelem_neg(felem out, const smallfelem small)
262 {
263 /* In order to prevent underflow, we subtract from 0 mod p. */
264 out[0] = zero105[0] - small[0];
265 out[1] = zero105[1] - small[1];
266 out[2] = zero105[2] - small[2];
267 out[3] = zero105[3] - small[3];
268 }
269
270/* felem_diff subtracts |in| from |out|
271 * On entry:
272 * in[i] < 2^104
273 * On exit:
274 * out[i] < out[i] + 2^105
275 */
276static void felem_diff(felem out, const felem in)
277 {
278 /* In order to prevent underflow, we add 0 mod p before subtracting. */
279 out[0] += zero105[0];
280 out[1] += zero105[1];
281 out[2] += zero105[2];
282 out[3] += zero105[3];
283
284 out[0] -= in[0];
285 out[1] -= in[1];
286 out[2] -= in[2];
287 out[3] -= in[3];
288 }
289
290#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
291#define two107 (((limb)1) << 107)
292#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
293
294/* zero107 is 0 mod p */
295static const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 };
296
297/* An alternative felem_diff for larger inputs |in|
298 * felem_diff_zero107 subtracts |in| from |out|
299 * On entry:
300 * in[i] < 2^106
301 * On exit:
302 * out[i] < out[i] + 2^107
303 */
304static void felem_diff_zero107(felem out, const felem in)
305 {
306 /* In order to prevent underflow, we add 0 mod p before subtracting. */
307 out[0] += zero107[0];
308 out[1] += zero107[1];
309 out[2] += zero107[2];
310 out[3] += zero107[3];
311
312 out[0] -= in[0];
313 out[1] -= in[1];
314 out[2] -= in[2];
315 out[3] -= in[3];
316 }
317
318/* longfelem_diff subtracts |in| from |out|
319 * On entry:
320 * in[i] < 7*2^67
321 * On exit:
322 * out[i] < out[i] + 2^70 + 2^40
323 */
324static void longfelem_diff(longfelem out, const longfelem in)
325 {
326 static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6);
327 static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40);
328 static const limb two70 = (((limb)1) << 70);
329 static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6);
330 static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6);
331
332 /* add 0 mod p to avoid underflow */
333 out[0] += two70m8p6;
334 out[1] += two70p40;
335 out[2] += two70;
336 out[3] += two70m40m38p6;
337 out[4] += two70m6;
338 out[5] += two70m6;
339 out[6] += two70m6;
340 out[7] += two70m6;
341
342 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
343 out[0] -= in[0];
344 out[1] -= in[1];
345 out[2] -= in[2];
346 out[3] -= in[3];
347 out[4] -= in[4];
348 out[5] -= in[5];
349 out[6] -= in[6];
350 out[7] -= in[7];
351 }
352
353#define two64m0 (((limb)1) << 64) - 1
354#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
355#define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
356#define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
357
358/* zero110 is 0 mod p */
359static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
360
361/* felem_shrink converts an felem into a smallfelem. The result isn't quite
362 * minimal as the value may be greater than p.
363 *
364 * On entry:
365 * in[i] < 2^109
366 * On exit:
367 * out[i] < 2^64
368 */
369static void felem_shrink(smallfelem out, const felem in)
370 {
371 felem tmp;
372 u64 a, b, mask;
373 s64 high, low;
374 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
375
376 /* Carry 2->3 */
377 tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64));
378 /* tmp[3] < 2^110 */
379
380 tmp[2] = zero110[2] + (u64) in[2];
381 tmp[0] = zero110[0] + in[0];
382 tmp[1] = zero110[1] + in[1];
383 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
384
385 /* We perform two partial reductions where we eliminate the
386 * high-word of tmp[3]. We don't update the other words till the end.
387 */
388 a = tmp[3] >> 64; /* a < 2^46 */
389 tmp[3] = (u64) tmp[3];
390 tmp[3] -= a;
391 tmp[3] += ((limb)a) << 32;
392 /* tmp[3] < 2^79 */
393
394 b = a;
395 a = tmp[3] >> 64; /* a < 2^15 */
396 b += a; /* b < 2^46 + 2^15 < 2^47 */
397 tmp[3] = (u64) tmp[3];
398 tmp[3] -= a;
399 tmp[3] += ((limb)a) << 32;
400 /* tmp[3] < 2^64 + 2^47 */
401
402 /* This adjusts the other two words to complete the two partial
403 * reductions. */
404 tmp[0] += b;
405 tmp[1] -= (((limb)b) << 32);
406
407 /* In order to make space in tmp[3] for the carry from 2 -> 3, we
408 * conditionally subtract kPrime if tmp[3] is large enough. */
409 high = tmp[3] >> 64;
410 /* As tmp[3] < 2^65, high is either 1 or 0 */
411 high <<= 63;
412 high >>= 63;
413 /* high is:
414 * all ones if the high word of tmp[3] is 1
415 * all zeros if the high word of tmp[3] if 0 */
416 low = tmp[3];
417 mask = low >> 63;
418 /* mask is:
419 * all ones if the MSB of low is 1
420 * all zeros if the MSB of low if 0 */
421 low &= bottom63bits;
422 low -= kPrime3Test;
423 /* if low was greater than kPrime3Test then the MSB is zero */
424 low = ~low;
425 low >>= 63;
426 /* low is:
427 * all ones if low was > kPrime3Test
428 * all zeros if low was <= kPrime3Test */
429 mask = (mask & low) | high;
430 tmp[0] -= mask & kPrime[0];
431 tmp[1] -= mask & kPrime[1];
432 /* kPrime[2] is zero, so omitted */
433 tmp[3] -= mask & kPrime[3];
434 /* tmp[3] < 2**64 - 2**32 + 1 */
435
436 tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0];
437 tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1];
438 tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2];
439 /* tmp[i] < 2^64 */
440
441 out[0] = tmp[0];
442 out[1] = tmp[1];
443 out[2] = tmp[2];
444 out[3] = tmp[3];
445 }
446
447/* smallfelem_expand converts a smallfelem to an felem */
448static void smallfelem_expand(felem out, const smallfelem in)
449 {
450 out[0] = in[0];
451 out[1] = in[1];
452 out[2] = in[2];
453 out[3] = in[3];
454 }
455
456/* smallfelem_square sets |out| = |small|^2
457 * On entry:
458 * small[i] < 2^64
459 * On exit:
460 * out[i] < 7 * 2^64 < 2^67
461 */
462static void smallfelem_square(longfelem out, const smallfelem small)
463 {
464 limb a;
465 u64 high, low;
466
467 a = ((uint128_t) small[0]) * small[0];
468 low = a;
469 high = a >> 64;
470 out[0] = low;
471 out[1] = high;
472
473 a = ((uint128_t) small[0]) * small[1];
474 low = a;
475 high = a >> 64;
476 out[1] += low;
477 out[1] += low;
478 out[2] = high;
479
480 a = ((uint128_t) small[0]) * small[2];
481 low = a;
482 high = a >> 64;
483 out[2] += low;
484 out[2] *= 2;
485 out[3] = high;
486
487 a = ((uint128_t) small[0]) * small[3];
488 low = a;
489 high = a >> 64;
490 out[3] += low;
491 out[4] = high;
492
493 a = ((uint128_t) small[1]) * small[2];
494 low = a;
495 high = a >> 64;
496 out[3] += low;
497 out[3] *= 2;
498 out[4] += high;
499
500 a = ((uint128_t) small[1]) * small[1];
501 low = a;
502 high = a >> 64;
503 out[2] += low;
504 out[3] += high;
505
506 a = ((uint128_t) small[1]) * small[3];
507 low = a;
508 high = a >> 64;
509 out[4] += low;
510 out[4] *= 2;
511 out[5] = high;
512
513 a = ((uint128_t) small[2]) * small[3];
514 low = a;
515 high = a >> 64;
516 out[5] += low;
517 out[5] *= 2;
518 out[6] = high;
519 out[6] += high;
520
521 a = ((uint128_t) small[2]) * small[2];
522 low = a;
523 high = a >> 64;
524 out[4] += low;
525 out[5] += high;
526
527 a = ((uint128_t) small[3]) * small[3];
528 low = a;
529 high = a >> 64;
530 out[6] += low;
531 out[7] = high;
532 }
533
534/* felem_square sets |out| = |in|^2
535 * On entry:
536 * in[i] < 2^109
537 * On exit:
538 * out[i] < 7 * 2^64 < 2^67
539 */
540static void felem_square(longfelem out, const felem in)
541 {
542 u64 small[4];
543 felem_shrink(small, in);
544 smallfelem_square(out, small);
545 }
546
547/* smallfelem_mul sets |out| = |small1| * |small2|
548 * On entry:
549 * small1[i] < 2^64
550 * small2[i] < 2^64
551 * On exit:
552 * out[i] < 7 * 2^64 < 2^67
553 */
554static void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2)
555 {
556 limb a;
557 u64 high, low;
558
559 a = ((uint128_t) small1[0]) * small2[0];
560 low = a;
561 high = a >> 64;
562 out[0] = low;
563 out[1] = high;
564
565
566 a = ((uint128_t) small1[0]) * small2[1];
567 low = a;
568 high = a >> 64;
569 out[1] += low;
570 out[2] = high;
571
572 a = ((uint128_t) small1[1]) * small2[0];
573 low = a;
574 high = a >> 64;
575 out[1] += low;
576 out[2] += high;
577
578
579 a = ((uint128_t) small1[0]) * small2[2];
580 low = a;
581 high = a >> 64;
582 out[2] += low;
583 out[3] = high;
584
585 a = ((uint128_t) small1[1]) * small2[1];
586 low = a;
587 high = a >> 64;
588 out[2] += low;
589 out[3] += high;
590
591 a = ((uint128_t) small1[2]) * small2[0];
592 low = a;
593 high = a >> 64;
594 out[2] += low;
595 out[3] += high;
596
597
598 a = ((uint128_t) small1[0]) * small2[3];
599 low = a;
600 high = a >> 64;
601 out[3] += low;
602 out[4] = high;
603
604 a = ((uint128_t) small1[1]) * small2[2];
605 low = a;
606 high = a >> 64;
607 out[3] += low;
608 out[4] += high;
609
610 a = ((uint128_t) small1[2]) * small2[1];
611 low = a;
612 high = a >> 64;
613 out[3] += low;
614 out[4] += high;
615
616 a = ((uint128_t) small1[3]) * small2[0];
617 low = a;
618 high = a >> 64;
619 out[3] += low;
620 out[4] += high;
621
622
623 a = ((uint128_t) small1[1]) * small2[3];
624 low = a;
625 high = a >> 64;
626 out[4] += low;
627 out[5] = high;
628
629 a = ((uint128_t) small1[2]) * small2[2];
630 low = a;
631 high = a >> 64;
632 out[4] += low;
633 out[5] += high;
634
635 a = ((uint128_t) small1[3]) * small2[1];
636 low = a;
637 high = a >> 64;
638 out[4] += low;
639 out[5] += high;
640
641
642 a = ((uint128_t) small1[2]) * small2[3];
643 low = a;
644 high = a >> 64;
645 out[5] += low;
646 out[6] = high;
647
648 a = ((uint128_t) small1[3]) * small2[2];
649 low = a;
650 high = a >> 64;
651 out[5] += low;
652 out[6] += high;
653
654
655 a = ((uint128_t) small1[3]) * small2[3];
656 low = a;
657 high = a >> 64;
658 out[6] += low;
659 out[7] = high;
660 }
661
662/* felem_mul sets |out| = |in1| * |in2|
663 * On entry:
664 * in1[i] < 2^109
665 * in2[i] < 2^109
666 * On exit:
667 * out[i] < 7 * 2^64 < 2^67
668 */
669static void felem_mul(longfelem out, const felem in1, const felem in2)
670 {
671 smallfelem small1, small2;
672 felem_shrink(small1, in1);
673 felem_shrink(small2, in2);
674 smallfelem_mul(out, small1, small2);
675 }
676
677/* felem_small_mul sets |out| = |small1| * |in2|
678 * On entry:
679 * small1[i] < 2^64
680 * in2[i] < 2^109
681 * On exit:
682 * out[i] < 7 * 2^64 < 2^67
683 */
684static void felem_small_mul(longfelem out, const smallfelem small1, const felem in2)
685 {
686 smallfelem small2;
687 felem_shrink(small2, in2);
688 smallfelem_mul(out, small1, small2);
689 }
690
691#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
692#define two100 (((limb)1) << 100)
693#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
694/* zero100 is 0 mod p */
695static const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 };
696
697/* Internal function for the different flavours of felem_reduce.
698 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
699 * On entry:
700 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
701 * out[1] >= in[7] + 2^32*in[4]
702 * out[2] >= in[5] + 2^32*in[5]
703 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
704 * On exit:
705 * out[0] <= out[0] + in[4] + 2^32*in[5]
706 * out[1] <= out[1] + in[5] + 2^33*in[6]
707 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
708 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
709 */
710static void felem_reduce_(felem out, const longfelem in)
711 {
712 int128_t c;
713 /* combine common terms from below */
714 c = in[4] + (in[5] << 32);
715 out[0] += c;
716 out[3] -= c;
717
718 c = in[5] - in[7];
719 out[1] += c;
720 out[2] -= c;
721
722 /* the remaining terms */
723 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
724 out[1] -= (in[4] << 32);
725 out[3] += (in[4] << 32);
726
727 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
728 out[2] -= (in[5] << 32);
729
730 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
731 out[0] -= in[6];
732 out[0] -= (in[6] << 32);
733 out[1] += (in[6] << 33);
734 out[2] += (in[6] * 2);
735 out[3] -= (in[6] << 32);
736
737 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
738 out[0] -= in[7];
739 out[0] -= (in[7] << 32);
740 out[2] += (in[7] << 33);
741 out[3] += (in[7] * 3);
742 }
743
744/* felem_reduce converts a longfelem into an felem.
745 * To be called directly after felem_square or felem_mul.
746 * On entry:
747 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
748 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
749 * On exit:
750 * out[i] < 2^101
751 */
752static void felem_reduce(felem out, const longfelem in)
753 {
754 out[0] = zero100[0] + in[0];
755 out[1] = zero100[1] + in[1];
756 out[2] = zero100[2] + in[2];
757 out[3] = zero100[3] + in[3];
758
759 felem_reduce_(out, in);
760
761 /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
762 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
763 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
764 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
765 *
766 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
767 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
768 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
769 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
770 */
771 }
772
773/* felem_reduce_zero105 converts a larger longfelem into an felem.
774 * On entry:
775 * in[0] < 2^71
776 * On exit:
777 * out[i] < 2^106
778 */
779static void felem_reduce_zero105(felem out, const longfelem in)
780 {
781 out[0] = zero105[0] + in[0];
782 out[1] = zero105[1] + in[1];
783 out[2] = zero105[2] + in[2];
784 out[3] = zero105[3] + in[3];
785
786 felem_reduce_(out, in);
787
788 /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
789 * out[1] > 2^105 - 2^71 - 2^103 > 0
790 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
791 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
792 *
793 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
794 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
795 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
796 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
797 */
798 }
799
800/* subtract_u64 sets *result = *result - v and *carry to one if the subtraction
801 * underflowed. */
802static void subtract_u64(u64* result, u64* carry, u64 v)
803 {
804 uint128_t r = *result;
805 r -= v;
806 *carry = (r >> 64) & 1;
807 *result = (u64) r;
808 }
809
810/* felem_contract converts |in| to its unique, minimal representation.
811 * On entry:
812 * in[i] < 2^109
813 */
814static void felem_contract(smallfelem out, const felem in)
815 {
816 unsigned i;
817 u64 all_equal_so_far = 0, result = 0, carry;
818
819 felem_shrink(out, in);
820 /* small is minimal except that the value might be > p */
821
822 all_equal_so_far--;
823 /* We are doing a constant time test if out >= kPrime. We need to
824 * compare each u64, from most-significant to least significant. For
825 * each one, if all words so far have been equal (m is all ones) then a
826 * non-equal result is the answer. Otherwise we continue. */
827 for (i = 3; i < 4; i--)
828 {
829 u64 equal;
830 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
831 /* if out[i] > kPrime[i] then a will underflow and the high
832 * 64-bits will all be set. */
833 result |= all_equal_so_far & ((u64) (a >> 64));
834
835 /* if kPrime[i] == out[i] then |equal| will be all zeros and
836 * the decrement will make it all ones. */
837 equal = kPrime[i] ^ out[i];
838 equal--;
839 equal &= equal << 32;
840 equal &= equal << 16;
841 equal &= equal << 8;
842 equal &= equal << 4;
843 equal &= equal << 2;
844 equal &= equal << 1;
845 equal = ((s64) equal) >> 63;
846
847 all_equal_so_far &= equal;
848 }
849
850 /* if all_equal_so_far is still all ones then the two values are equal
851 * and so out >= kPrime is true. */
852 result |= all_equal_so_far;
853
854 /* if out >= kPrime then we subtract kPrime. */
855 subtract_u64(&out[0], &carry, result & kPrime[0]);
856 subtract_u64(&out[1], &carry, carry);
857 subtract_u64(&out[2], &carry, carry);
858 subtract_u64(&out[3], &carry, carry);
859
860 subtract_u64(&out[1], &carry, result & kPrime[1]);
861 subtract_u64(&out[2], &carry, carry);
862 subtract_u64(&out[3], &carry, carry);
863
864 subtract_u64(&out[2], &carry, result & kPrime[2]);
865 subtract_u64(&out[3], &carry, carry);
866
867 subtract_u64(&out[3], &carry, result & kPrime[3]);
868 }
869
870static void smallfelem_square_contract(smallfelem out, const smallfelem in)
871 {
872 longfelem longtmp;
873 felem tmp;
874
875 smallfelem_square(longtmp, in);
876 felem_reduce(tmp, longtmp);
877 felem_contract(out, tmp);
878 }
879
880static void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2)
881 {
882 longfelem longtmp;
883 felem tmp;
884
885 smallfelem_mul(longtmp, in1, in2);
886 felem_reduce(tmp, longtmp);
887 felem_contract(out, tmp);
888 }
889
890/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
891 * otherwise.
892 * On entry:
893 * small[i] < 2^64
894 */
895static limb smallfelem_is_zero(const smallfelem small)
896 {
897 limb result;
898 u64 is_p;
899
900 u64 is_zero = small[0] | small[1] | small[2] | small[3];
901 is_zero--;
902 is_zero &= is_zero << 32;
903 is_zero &= is_zero << 16;
904 is_zero &= is_zero << 8;
905 is_zero &= is_zero << 4;
906 is_zero &= is_zero << 2;
907 is_zero &= is_zero << 1;
908 is_zero = ((s64) is_zero) >> 63;
909
910 is_p = (small[0] ^ kPrime[0]) |
911 (small[1] ^ kPrime[1]) |
912 (small[2] ^ kPrime[2]) |
913 (small[3] ^ kPrime[3]);
914 is_p--;
915 is_p &= is_p << 32;
916 is_p &= is_p << 16;
917 is_p &= is_p << 8;
918 is_p &= is_p << 4;
919 is_p &= is_p << 2;
920 is_p &= is_p << 1;
921 is_p = ((s64) is_p) >> 63;
922
923 is_zero |= is_p;
924
925 result = is_zero;
926 result |= ((limb) is_zero) << 64;
927 return result;
928 }
929
930static int smallfelem_is_zero_int(const smallfelem small)
931 {
932 return (int) (smallfelem_is_zero(small) & ((limb)1));
933 }
934
935/* felem_inv calculates |out| = |in|^{-1}
936 *
937 * Based on Fermat's Little Theorem:
938 * a^p = a (mod p)
939 * a^{p-1} = 1 (mod p)
940 * a^{p-2} = a^{-1} (mod p)
941 */
942static void felem_inv(felem out, const felem in)
943 {
944 felem ftmp, ftmp2;
945 /* each e_I will hold |in|^{2^I - 1} */
946 felem e2, e4, e8, e16, e32, e64;
947 longfelem tmp;
948 unsigned i;
949
950 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
951 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
952 felem_assign(e2, ftmp);
953 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
954 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
955 felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
956 felem_assign(e4, ftmp);
957 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
958 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
959 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
960 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
961 felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
962 felem_assign(e8, ftmp);
963 for (i = 0; i < 8; i++) {
964 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
965 } /* 2^16 - 2^8 */
966 felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
967 felem_assign(e16, ftmp);
968 for (i = 0; i < 16; i++) {
969 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
970 } /* 2^32 - 2^16 */
971 felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
972 felem_assign(e32, ftmp);
973 for (i = 0; i < 32; i++) {
974 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
975 } /* 2^64 - 2^32 */
976 felem_assign(e64, ftmp);
977 felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
978 for (i = 0; i < 192; i++) {
979 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp);
980 } /* 2^256 - 2^224 + 2^192 */
981
982 felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
983 for (i = 0; i < 16; i++) {
984 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
985 } /* 2^80 - 2^16 */
986 felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
987 for (i = 0; i < 8; i++) {
988 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
989 } /* 2^88 - 2^8 */
990 felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
991 for (i = 0; i < 4; i++) {
992 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp);
993 } /* 2^92 - 2^4 */
994 felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
995 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
996 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
997 felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
998 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
999 felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1000 felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1001
1002 felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1003 }
1004
1005static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1006 {
1007 felem tmp;
1008
1009 smallfelem_expand(tmp, in);
1010 felem_inv(tmp, tmp);
1011 felem_contract(out, tmp);
1012 }
1013
1014/* Group operations
1015 * ----------------
1016 *
1017 * Building on top of the field operations we have the operations on the
1018 * elliptic curve group itself. Points on the curve are represented in Jacobian
1019 * coordinates */
1020
1021/* point_double calculates 2*(x_in, y_in, z_in)
1022 *
1023 * The method is taken from:
1024 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1025 *
1026 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1027 * while x_out == y_in is not (maybe this works, but it's not tested). */
1028static void
1029point_double(felem x_out, felem y_out, felem z_out,
1030 const felem x_in, const felem y_in, const felem z_in)
1031 {
1032 longfelem tmp, tmp2;
1033 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1034 smallfelem small1, small2;
1035
1036 felem_assign(ftmp, x_in);
1037 /* ftmp[i] < 2^106 */
1038 felem_assign(ftmp2, x_in);
1039 /* ftmp2[i] < 2^106 */
1040
1041 /* delta = z^2 */
1042 felem_square(tmp, z_in);
1043 felem_reduce(delta, tmp);
1044 /* delta[i] < 2^101 */
1045
1046 /* gamma = y^2 */
1047 felem_square(tmp, y_in);
1048 felem_reduce(gamma, tmp);
1049 /* gamma[i] < 2^101 */
1050 felem_shrink(small1, gamma);
1051
1052 /* beta = x*gamma */
1053 felem_small_mul(tmp, small1, x_in);
1054 felem_reduce(beta, tmp);
1055 /* beta[i] < 2^101 */
1056
1057 /* alpha = 3*(x-delta)*(x+delta) */
1058 felem_diff(ftmp, delta);
1059 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1060 felem_sum(ftmp2, delta);
1061 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1062 felem_scalar(ftmp2, 3);
1063 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1064 felem_mul(tmp, ftmp, ftmp2);
1065 felem_reduce(alpha, tmp);
1066 /* alpha[i] < 2^101 */
1067 felem_shrink(small2, alpha);
1068
1069 /* x' = alpha^2 - 8*beta */
1070 smallfelem_square(tmp, small2);
1071 felem_reduce(x_out, tmp);
1072 felem_assign(ftmp, beta);
1073 felem_scalar(ftmp, 8);
1074 /* ftmp[i] < 8 * 2^101 = 2^104 */
1075 felem_diff(x_out, ftmp);
1076 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1077
1078 /* z' = (y + z)^2 - gamma - delta */
1079 felem_sum(delta, gamma);
1080 /* delta[i] < 2^101 + 2^101 = 2^102 */
1081 felem_assign(ftmp, y_in);
1082 felem_sum(ftmp, z_in);
1083 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1084 felem_square(tmp, ftmp);
1085 felem_reduce(z_out, tmp);
1086 felem_diff(z_out, delta);
1087 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1088
1089 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1090 felem_scalar(beta, 4);
1091 /* beta[i] < 4 * 2^101 = 2^103 */
1092 felem_diff_zero107(beta, x_out);
1093 /* beta[i] < 2^107 + 2^103 < 2^108 */
1094 felem_small_mul(tmp, small2, beta);
1095 /* tmp[i] < 7 * 2^64 < 2^67 */
1096 smallfelem_square(tmp2, small1);
1097 /* tmp2[i] < 7 * 2^64 */
1098 longfelem_scalar(tmp2, 8);
1099 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1100 longfelem_diff(tmp, tmp2);
1101 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1102 felem_reduce_zero105(y_out, tmp);
1103 /* y_out[i] < 2^106 */
1104 }
1105
1106/* point_double_small is the same as point_double, except that it operates on
1107 * smallfelems */
1108static void
1109point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1110 const smallfelem x_in, const smallfelem y_in, const smallfelem z_in)
1111 {
1112 felem felem_x_out, felem_y_out, felem_z_out;
1113 felem felem_x_in, felem_y_in, felem_z_in;
1114
1115 smallfelem_expand(felem_x_in, x_in);
1116 smallfelem_expand(felem_y_in, y_in);
1117 smallfelem_expand(felem_z_in, z_in);
1118 point_double(felem_x_out, felem_y_out, felem_z_out,
1119 felem_x_in, felem_y_in, felem_z_in);
1120 felem_shrink(x_out, felem_x_out);
1121 felem_shrink(y_out, felem_y_out);
1122 felem_shrink(z_out, felem_z_out);
1123 }
1124
1125/* copy_conditional copies in to out iff mask is all ones. */
1126static void
1127copy_conditional(felem out, const felem in, limb mask)
1128 {
1129 unsigned i;
1130 for (i = 0; i < NLIMBS; ++i)
1131 {
1132 const limb tmp = mask & (in[i] ^ out[i]);
1133 out[i] ^= tmp;
1134 }
1135 }
1136
1137/* copy_small_conditional copies in to out iff mask is all ones. */
1138static void
1139copy_small_conditional(felem out, const smallfelem in, limb mask)
1140 {
1141 unsigned i;
1142 const u64 mask64 = mask;
1143 for (i = 0; i < NLIMBS; ++i)
1144 {
1145 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1146 }
1147 }
1148
1149/* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1150 *
1151 * The method is taken from:
1152 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1153 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1154 *
1155 * This function includes a branch for checking whether the two input points
1156 * are equal, (while not equal to the point at infinity). This case never
1157 * happens during single point multiplication, so there is no timing leak for
1158 * ECDH or ECDSA signing. */
1159static void point_add(felem x3, felem y3, felem z3,
1160 const felem x1, const felem y1, const felem z1,
1161 const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2)
1162 {
1163 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1164 longfelem tmp, tmp2;
1165 smallfelem small1, small2, small3, small4, small5;
1166 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1167
1168 felem_shrink(small3, z1);
1169
1170 z1_is_zero = smallfelem_is_zero(small3);
1171 z2_is_zero = smallfelem_is_zero(z2);
1172
1173 /* ftmp = z1z1 = z1**2 */
1174 smallfelem_square(tmp, small3);
1175 felem_reduce(ftmp, tmp);
1176 /* ftmp[i] < 2^101 */
1177 felem_shrink(small1, ftmp);
1178
1179 if(!mixed)
1180 {
1181 /* ftmp2 = z2z2 = z2**2 */
1182 smallfelem_square(tmp, z2);
1183 felem_reduce(ftmp2, tmp);
1184 /* ftmp2[i] < 2^101 */
1185 felem_shrink(small2, ftmp2);
1186
1187 felem_shrink(small5, x1);
1188
1189 /* u1 = ftmp3 = x1*z2z2 */
1190 smallfelem_mul(tmp, small5, small2);
1191 felem_reduce(ftmp3, tmp);
1192 /* ftmp3[i] < 2^101 */
1193
1194 /* ftmp5 = z1 + z2 */
1195 felem_assign(ftmp5, z1);
1196 felem_small_sum(ftmp5, z2);
1197 /* ftmp5[i] < 2^107 */
1198
1199 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1200 felem_square(tmp, ftmp5);
1201 felem_reduce(ftmp5, tmp);
1202 /* ftmp2 = z2z2 + z1z1 */
1203 felem_sum(ftmp2, ftmp);
1204 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1205 felem_diff(ftmp5, ftmp2);
1206 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1207
1208 /* ftmp2 = z2 * z2z2 */
1209 smallfelem_mul(tmp, small2, z2);
1210 felem_reduce(ftmp2, tmp);
1211
1212 /* s1 = ftmp2 = y1 * z2**3 */
1213 felem_mul(tmp, y1, ftmp2);
1214 felem_reduce(ftmp6, tmp);
1215 /* ftmp6[i] < 2^101 */
1216 }
1217 else
1218 {
1219 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1220
1221 /* u1 = ftmp3 = x1*z2z2 */
1222 felem_assign(ftmp3, x1);
1223 /* ftmp3[i] < 2^106 */
1224
1225 /* ftmp5 = 2z1z2 */
1226 felem_assign(ftmp5, z1);
1227 felem_scalar(ftmp5, 2);
1228 /* ftmp5[i] < 2*2^106 = 2^107 */
1229
1230 /* s1 = ftmp2 = y1 * z2**3 */
1231 felem_assign(ftmp6, y1);
1232 /* ftmp6[i] < 2^106 */
1233 }
1234
1235 /* u2 = x2*z1z1 */
1236 smallfelem_mul(tmp, x2, small1);
1237 felem_reduce(ftmp4, tmp);
1238
1239 /* h = ftmp4 = u2 - u1 */
1240 felem_diff_zero107(ftmp4, ftmp3);
1241 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1242 felem_shrink(small4, ftmp4);
1243
1244 x_equal = smallfelem_is_zero(small4);
1245
1246 /* z_out = ftmp5 * h */
1247 felem_small_mul(tmp, small4, ftmp5);
1248 felem_reduce(z_out, tmp);
1249 /* z_out[i] < 2^101 */
1250
1251 /* ftmp = z1 * z1z1 */
1252 smallfelem_mul(tmp, small1, small3);
1253 felem_reduce(ftmp, tmp);
1254
1255 /* s2 = tmp = y2 * z1**3 */
1256 felem_small_mul(tmp, y2, ftmp);
1257 felem_reduce(ftmp5, tmp);
1258
1259 /* r = ftmp5 = (s2 - s1)*2 */
1260 felem_diff_zero107(ftmp5, ftmp6);
1261 /* ftmp5[i] < 2^107 + 2^107 = 2^108*/
1262 felem_scalar(ftmp5, 2);
1263 /* ftmp5[i] < 2^109 */
1264 felem_shrink(small1, ftmp5);
1265 y_equal = smallfelem_is_zero(small1);
1266
1267 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1268 {
1269 point_double(x3, y3, z3, x1, y1, z1);
1270 return;
1271 }
1272
1273 /* I = ftmp = (2h)**2 */
1274 felem_assign(ftmp, ftmp4);
1275 felem_scalar(ftmp, 2);
1276 /* ftmp[i] < 2*2^108 = 2^109 */
1277 felem_square(tmp, ftmp);
1278 felem_reduce(ftmp, tmp);
1279
1280 /* J = ftmp2 = h * I */
1281 felem_mul(tmp, ftmp4, ftmp);
1282 felem_reduce(ftmp2, tmp);
1283
1284 /* V = ftmp4 = U1 * I */
1285 felem_mul(tmp, ftmp3, ftmp);
1286 felem_reduce(ftmp4, tmp);
1287
1288 /* x_out = r**2 - J - 2V */
1289 smallfelem_square(tmp, small1);
1290 felem_reduce(x_out, tmp);
1291 felem_assign(ftmp3, ftmp4);
1292 felem_scalar(ftmp4, 2);
1293 felem_sum(ftmp4, ftmp2);
1294 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1295 felem_diff(x_out, ftmp4);
1296 /* x_out[i] < 2^105 + 2^101 */
1297
1298 /* y_out = r(V-x_out) - 2 * s1 * J */
1299 felem_diff_zero107(ftmp3, x_out);
1300 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1301 felem_small_mul(tmp, small1, ftmp3);
1302 felem_mul(tmp2, ftmp6, ftmp2);
1303 longfelem_scalar(tmp2, 2);
1304 /* tmp2[i] < 2*2^67 = 2^68 */
1305 longfelem_diff(tmp, tmp2);
1306 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1307 felem_reduce_zero105(y_out, tmp);
1308 /* y_out[i] < 2^106 */
1309
1310 copy_small_conditional(x_out, x2, z1_is_zero);
1311 copy_conditional(x_out, x1, z2_is_zero);
1312 copy_small_conditional(y_out, y2, z1_is_zero);
1313 copy_conditional(y_out, y1, z2_is_zero);
1314 copy_small_conditional(z_out, z2, z1_is_zero);
1315 copy_conditional(z_out, z1, z2_is_zero);
1316 felem_assign(x3, x_out);
1317 felem_assign(y3, y_out);
1318 felem_assign(z3, z_out);
1319 }
1320
1321/* point_add_small is the same as point_add, except that it operates on
1322 * smallfelems */
1323static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1324 smallfelem x1, smallfelem y1, smallfelem z1,
1325 smallfelem x2, smallfelem y2, smallfelem z2)
1326 {
1327 felem felem_x3, felem_y3, felem_z3;
1328 felem felem_x1, felem_y1, felem_z1;
1329 smallfelem_expand(felem_x1, x1);
1330 smallfelem_expand(felem_y1, y1);
1331 smallfelem_expand(felem_z1, z1);
1332 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2);
1333 felem_shrink(x3, felem_x3);
1334 felem_shrink(y3, felem_y3);
1335 felem_shrink(z3, felem_z3);
1336 }
1337
1338/* Base point pre computation
1339 * --------------------------
1340 *
1341 * Two different sorts of precomputed tables are used in the following code.
1342 * Each contain various points on the curve, where each point is three field
1343 * elements (x, y, z).
1344 *
1345 * For the base point table, z is usually 1 (0 for the point at infinity).
1346 * This table has 2 * 16 elements, starting with the following:
1347 * index | bits | point
1348 * ------+---------+------------------------------
1349 * 0 | 0 0 0 0 | 0G
1350 * 1 | 0 0 0 1 | 1G
1351 * 2 | 0 0 1 0 | 2^64G
1352 * 3 | 0 0 1 1 | (2^64 + 1)G
1353 * 4 | 0 1 0 0 | 2^128G
1354 * 5 | 0 1 0 1 | (2^128 + 1)G
1355 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1356 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1357 * 8 | 1 0 0 0 | 2^192G
1358 * 9 | 1 0 0 1 | (2^192 + 1)G
1359 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1360 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1361 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1362 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1363 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1364 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1365 * followed by a copy of this with each element multiplied by 2^32.
1366 *
1367 * The reason for this is so that we can clock bits into four different
1368 * locations when doing simple scalar multiplies against the base point,
1369 * and then another four locations using the second 16 elements.
1370 *
1371 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1372
1373/* gmul is the table of precomputed base points */
1374static const smallfelem gmul[2][16][3] =
1375{{{{0, 0, 0, 0},
1376 {0, 0, 0, 0},
1377 {0, 0, 0, 0}},
1378 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247},
1379 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b},
1380 {1, 0, 0, 0}},
1381 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5},
1382 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d},
1383 {1, 0, 0, 0}},
1384 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f},
1385 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644},
1386 {1, 0, 0, 0}},
1387 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67},
1388 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee},
1389 {1, 0, 0, 0}},
1390 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff},
1391 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b},
1392 {1, 0, 0, 0}},
1393 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8},
1394 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851},
1395 {1, 0, 0, 0}},
1396 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea},
1397 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b},
1398 {1, 0, 0, 0}},
1399 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276},
1400 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816},
1401 {1, 0, 0, 0}},
1402 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad},
1403 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663},
1404 {1, 0, 0, 0}},
1405 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d},
1406 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321},
1407 {1, 0, 0, 0}},
1408 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287},
1409 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6},
1410 {1, 0, 0, 0}},
1411 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466},
1412 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20},
1413 {1, 0, 0, 0}},
1414 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9},
1415 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61},
1416 {1, 0, 0, 0}},
1417 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a},
1418 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc},
1419 {1, 0, 0, 0}},
1420 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c},
1421 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab},
1422 {1, 0, 0, 0}}},
1423 {{{0, 0, 0, 0},
1424 {0, 0, 0, 0},
1425 {0, 0, 0, 0}},
1426 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89},
1427 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624},
1428 {1, 0, 0, 0}},
1429 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6},
1430 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1},
1431 {1, 0, 0, 0}},
1432 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a},
1433 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593},
1434 {1, 0, 0, 0}},
1435 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617},
1436 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7},
1437 {1, 0, 0, 0}},
1438 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276},
1439 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a},
1440 {1, 0, 0, 0}},
1441 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908},
1442 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e},
1443 {1, 0, 0, 0}},
1444 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7},
1445 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec},
1446 {1, 0, 0, 0}},
1447 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee},
1448 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6},
1449 {1, 0, 0, 0}},
1450 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109},
1451 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5},
1452 {1, 0, 0, 0}},
1453 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba},
1454 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44},
1455 {1, 0, 0, 0}},
1456 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b},
1457 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc},
1458 {1, 0, 0, 0}},
1459 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107},
1460 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387},
1461 {1, 0, 0, 0}},
1462 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503},
1463 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be},
1464 {1, 0, 0, 0}},
1465 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9},
1466 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a},
1467 {1, 0, 0, 0}},
1468 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6},
1469 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81},
1470 {1, 0, 0, 0}}}};
1471
1472/* select_point selects the |idx|th point from a precomputation table and
1473 * copies it to out. */
1474static void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3])
1475 {
1476 unsigned i, j;
1477 u64 *outlimbs = &out[0][0];
1478 memset(outlimbs, 0, 3 * sizeof(smallfelem));
1479
1480 for (i = 0; i < size; i++)
1481 {
1482 const u64 *inlimbs = (u64*) &pre_comp[i][0][0];
1483 u64 mask = i ^ idx;
1484 mask |= mask >> 4;
1485 mask |= mask >> 2;
1486 mask |= mask >> 1;
1487 mask &= 1;
1488 mask--;
1489 for (j = 0; j < NLIMBS * 3; j++)
1490 outlimbs[j] |= inlimbs[j] & mask;
1491 }
1492 }
1493
1494/* get_bit returns the |i|th bit in |in| */
1495static char get_bit(const felem_bytearray in, int i)
1496 {
1497 if ((i < 0) || (i >= 256))
1498 return 0;
1499 return (in[i >> 3] >> (i & 7)) & 1;
1500 }
1501
1502/* Interleaved point multiplication using precomputed point multiples:
1503 * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[],
1504 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1505 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1506 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1507static void batch_mul(felem x_out, felem y_out, felem z_out,
1508 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1509 const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3])
1510 {
1511 int i, skip;
1512 unsigned num, gen_mul = (g_scalar != NULL);
1513 felem nq[3], ftmp;
1514 smallfelem tmp[3];
1515 u64 bits;
1516 u8 sign, digit;
1517
1518 /* set nq to the point at infinity */
1519 memset(nq, 0, 3 * sizeof(felem));
1520
1521 /* Loop over all scalars msb-to-lsb, interleaving additions
1522 * of multiples of the generator (two in each of the last 32 rounds)
1523 * and additions of other points multiples (every 5th round).
1524 */
1525 skip = 1; /* save two point operations in the first round */
1526 for (i = (num_points ? 255 : 31); i >= 0; --i)
1527 {
1528 /* double */
1529 if (!skip)
1530 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1531
1532 /* add multiples of the generator */
1533 if (gen_mul && (i <= 31))
1534 {
1535 /* first, look 32 bits upwards */
1536 bits = get_bit(g_scalar, i + 224) << 3;
1537 bits |= get_bit(g_scalar, i + 160) << 2;
1538 bits |= get_bit(g_scalar, i + 96) << 1;
1539 bits |= get_bit(g_scalar, i + 32);
1540 /* select the point to add, in constant time */
1541 select_point(bits, 16, g_pre_comp[1], tmp);
1542
1543 if (!skip)
1544 {
1545 point_add(nq[0], nq[1], nq[2],
1546 nq[0], nq[1], nq[2],
1547 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1548 }
1549 else
1550 {
1551 smallfelem_expand(nq[0], tmp[0]);
1552 smallfelem_expand(nq[1], tmp[1]);
1553 smallfelem_expand(nq[2], tmp[2]);
1554 skip = 0;
1555 }
1556
1557 /* second, look at the current position */
1558 bits = get_bit(g_scalar, i + 192) << 3;
1559 bits |= get_bit(g_scalar, i + 128) << 2;
1560 bits |= get_bit(g_scalar, i + 64) << 1;
1561 bits |= get_bit(g_scalar, i);
1562 /* select the point to add, in constant time */
1563 select_point(bits, 16, g_pre_comp[0], tmp);
1564 point_add(nq[0], nq[1], nq[2],
1565 nq[0], nq[1], nq[2],
1566 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1567 }
1568
1569 /* do other additions every 5 doublings */
1570 if (num_points && (i % 5 == 0))
1571 {
1572 /* loop over all scalars */
1573 for (num = 0; num < num_points; ++num)
1574 {
1575 bits = get_bit(scalars[num], i + 4) << 5;
1576 bits |= get_bit(scalars[num], i + 3) << 4;
1577 bits |= get_bit(scalars[num], i + 2) << 3;
1578 bits |= get_bit(scalars[num], i + 1) << 2;
1579 bits |= get_bit(scalars[num], i) << 1;
1580 bits |= get_bit(scalars[num], i - 1);
1581 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1582
1583 /* select the point to add or subtract, in constant time */
1584 select_point(digit, 17, pre_comp[num], tmp);
1585 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */
1586 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1587 felem_contract(tmp[1], ftmp);
1588
1589 if (!skip)
1590 {
1591 point_add(nq[0], nq[1], nq[2],
1592 nq[0], nq[1], nq[2],
1593 mixed, tmp[0], tmp[1], tmp[2]);
1594 }
1595 else
1596 {
1597 smallfelem_expand(nq[0], tmp[0]);
1598 smallfelem_expand(nq[1], tmp[1]);
1599 smallfelem_expand(nq[2], tmp[2]);
1600 skip = 0;
1601 }
1602 }
1603 }
1604 }
1605 felem_assign(x_out, nq[0]);
1606 felem_assign(y_out, nq[1]);
1607 felem_assign(z_out, nq[2]);
1608 }
1609
1610/* Precomputation for the group generator. */
1611typedef struct {
1612 smallfelem g_pre_comp[2][16][3];
1613 int references;
1614} NISTP256_PRE_COMP;
1615
1616const EC_METHOD *EC_GFp_nistp256_method(void)
1617 {
1618 static const EC_METHOD ret = {
1619 EC_FLAGS_DEFAULT_OCT,
1620 NID_X9_62_prime_field,
1621 ec_GFp_nistp256_group_init,
1622 ec_GFp_simple_group_finish,
1623 ec_GFp_simple_group_clear_finish,
1624 ec_GFp_nist_group_copy,
1625 ec_GFp_nistp256_group_set_curve,
1626 ec_GFp_simple_group_get_curve,
1627 ec_GFp_simple_group_get_degree,
1628 ec_GFp_simple_group_check_discriminant,
1629 ec_GFp_simple_point_init,
1630 ec_GFp_simple_point_finish,
1631 ec_GFp_simple_point_clear_finish,
1632 ec_GFp_simple_point_copy,
1633 ec_GFp_simple_point_set_to_infinity,
1634 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1635 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1636 ec_GFp_simple_point_set_affine_coordinates,
1637 ec_GFp_nistp256_point_get_affine_coordinates,
1638 0 /* point_set_compressed_coordinates */,
1639 0 /* point2oct */,
1640 0 /* oct2point */,
1641 ec_GFp_simple_add,
1642 ec_GFp_simple_dbl,
1643 ec_GFp_simple_invert,
1644 ec_GFp_simple_is_at_infinity,
1645 ec_GFp_simple_is_on_curve,
1646 ec_GFp_simple_cmp,
1647 ec_GFp_simple_make_affine,
1648 ec_GFp_simple_points_make_affine,
1649 ec_GFp_nistp256_points_mul,
1650 ec_GFp_nistp256_precompute_mult,
1651 ec_GFp_nistp256_have_precompute_mult,
1652 ec_GFp_nist_field_mul,
1653 ec_GFp_nist_field_sqr,
1654 0 /* field_div */,
1655 0 /* field_encode */,
1656 0 /* field_decode */,
1657 0 /* field_set_to_one */ };
1658
1659 return &ret;
1660 }
1661
1662/******************************************************************************/
1663/* FUNCTIONS TO MANAGE PRECOMPUTATION
1664 */
1665
1666static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1667 {
1668 NISTP256_PRE_COMP *ret = NULL;
1669 ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1670 if (!ret)
1671 {
1672 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1673 return ret;
1674 }
1675 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1676 ret->references = 1;
1677 return ret;
1678 }
1679
1680static void *nistp256_pre_comp_dup(void *src_)
1681 {
1682 NISTP256_PRE_COMP *src = src_;
1683
1684 /* no need to actually copy, these objects never change! */
1685 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1686
1687 return src_;
1688 }
1689
1690static void nistp256_pre_comp_free(void *pre_)
1691 {
1692 int i;
1693 NISTP256_PRE_COMP *pre = pre_;
1694
1695 if (!pre)
1696 return;
1697
1698 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1699 if (i > 0)
1700 return;
1701
1702 OPENSSL_free(pre);
1703 }
1704
1705static void nistp256_pre_comp_clear_free(void *pre_)
1706 {
1707 int i;
1708 NISTP256_PRE_COMP *pre = pre_;
1709
1710 if (!pre)
1711 return;
1712
1713 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1714 if (i > 0)
1715 return;
1716
1717 OPENSSL_cleanse(pre, sizeof *pre);
1718 OPENSSL_free(pre);
1719 }
1720
1721/******************************************************************************/
1722/* OPENSSL EC_METHOD FUNCTIONS
1723 */
1724
1725int ec_GFp_nistp256_group_init(EC_GROUP *group)
1726 {
1727 int ret;
1728 ret = ec_GFp_simple_group_init(group);
1729 group->a_is_minus3 = 1;
1730 return ret;
1731 }
1732
1733int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1734 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1735 {
1736 int ret = 0;
1737 BN_CTX *new_ctx = NULL;
1738 BIGNUM *curve_p, *curve_a, *curve_b;
1739
1740 if (ctx == NULL)
1741 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1742 BN_CTX_start(ctx);
1743 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1744 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1745 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1746 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1747 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1748 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1749 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1750 (BN_cmp(curve_b, b)))
1751 {
1752 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1753 EC_R_WRONG_CURVE_PARAMETERS);
1754 goto err;
1755 }
1756 group->field_mod_func = BN_nist_mod_256;
1757 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1758err:
1759 BN_CTX_end(ctx);
1760 if (new_ctx != NULL)
1761 BN_CTX_free(new_ctx);
1762 return ret;
1763 }
1764
1765/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1766 * (X', Y') = (X/Z^2, Y/Z^3) */
1767int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1768 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1769 {
1770 felem z1, z2, x_in, y_in;
1771 smallfelem x_out, y_out;
1772 longfelem tmp;
1773
1774 if (EC_POINT_is_at_infinity(group, point))
1775 {
1776 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1777 EC_R_POINT_AT_INFINITY);
1778 return 0;
1779 }
1780 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1781 (!BN_to_felem(z1, &point->Z))) return 0;
1782 felem_inv(z2, z1);
1783 felem_square(tmp, z2); felem_reduce(z1, tmp);
1784 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1785 felem_contract(x_out, x_in);
1786 if (x != NULL)
1787 {
1788 if (!smallfelem_to_BN(x, x_out)) {
1789 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1790 ERR_R_BN_LIB);
1791 return 0;
1792 }
1793 }
1794 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1795 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1796 felem_contract(y_out, y_in);
1797 if (y != NULL)
1798 {
1799 if (!smallfelem_to_BN(y, y_out))
1800 {
1801 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1802 ERR_R_BN_LIB);
1803 return 0;
1804 }
1805 }
1806 return 1;
1807 }
1808
1809static void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */])
1810 {
1811 /* Runs in constant time, unless an input is the point at infinity
1812 * (which normally shouldn't happen). */
1813 ec_GFp_nistp_points_make_affine_internal(
1814 num,
1815 points,
1816 sizeof(smallfelem),
1817 tmp_smallfelems,
1818 (void (*)(void *)) smallfelem_one,
1819 (int (*)(const void *)) smallfelem_is_zero_int,
1820 (void (*)(void *, const void *)) smallfelem_assign,
1821 (void (*)(void *, const void *)) smallfelem_square_contract,
1822 (void (*)(void *, const void *, const void *)) smallfelem_mul_contract,
1823 (void (*)(void *, const void *)) smallfelem_inv_contract,
1824 (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */);
1825 }
1826
1827/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1828 * Result is stored in r (r can equal one of the inputs). */
1829int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
1830 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1831 const BIGNUM *scalars[], BN_CTX *ctx)
1832 {
1833 int ret = 0;
1834 int j;
1835 int mixed = 0;
1836 BN_CTX *new_ctx = NULL;
1837 BIGNUM *x, *y, *z, *tmp_scalar;
1838 felem_bytearray g_secret;
1839 felem_bytearray *secrets = NULL;
1840 smallfelem (*pre_comp)[17][3] = NULL;
1841 smallfelem *tmp_smallfelems = NULL;
1842 felem_bytearray tmp;
1843 unsigned i, num_bytes;
1844 int have_pre_comp = 0;
1845 size_t num_points = num;
1846 smallfelem x_in, y_in, z_in;
1847 felem x_out, y_out, z_out;
1848 NISTP256_PRE_COMP *pre = NULL;
1849 const smallfelem (*g_pre_comp)[16][3] = NULL;
1850 EC_POINT *generator = NULL;
1851 const EC_POINT *p = NULL;
1852 const BIGNUM *p_scalar = NULL;
1853
1854 if (ctx == NULL)
1855 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1856 BN_CTX_start(ctx);
1857 if (((x = BN_CTX_get(ctx)) == NULL) ||
1858 ((y = BN_CTX_get(ctx)) == NULL) ||
1859 ((z = BN_CTX_get(ctx)) == NULL) ||
1860 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1861 goto err;
1862
1863 if (scalar != NULL)
1864 {
1865 pre = EC_EX_DATA_get_data(group->extra_data,
1866 nistp256_pre_comp_dup, nistp256_pre_comp_free,
1867 nistp256_pre_comp_clear_free);
1868 if (pre)
1869 /* we have precomputation, try to use it */
1870 g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp;
1871 else
1872 /* try to use the standard precomputation */
1873 g_pre_comp = &gmul[0];
1874 generator = EC_POINT_new(group);
1875 if (generator == NULL)
1876 goto err;
1877 /* get the generator from precomputation */
1878 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
1879 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
1880 !smallfelem_to_BN(z, g_pre_comp[0][1][2]))
1881 {
1882 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1883 goto err;
1884 }
1885 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1886 generator, x, y, z, ctx))
1887 goto err;
1888 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1889 /* precomputation matches generator */
1890 have_pre_comp = 1;
1891 else
1892 /* we don't have valid precomputation:
1893 * treat the generator as a random point */
1894 num_points++;
1895 }
1896 if (num_points > 0)
1897 {
1898 if (num_points >= 3)
1899 {
1900 /* unless we precompute multiples for just one or two points,
1901 * converting those into affine form is time well spent */
1902 mixed = 1;
1903 }
1904 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1905 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem));
1906 if (mixed)
1907 tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem));
1908 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL)))
1909 {
1910 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1911 goto err;
1912 }
1913
1914 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1915 * i.e., they contribute nothing to the linear combination */
1916 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1917 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem));
1918 for (i = 0; i < num_points; ++i)
1919 {
1920 if (i == num)
1921 /* we didn't have a valid precomputation, so we pick
1922 * the generator */
1923 {
1924 p = EC_GROUP_get0_generator(group);
1925 p_scalar = scalar;
1926 }
1927 else
1928 /* the i^th point */
1929 {
1930 p = points[i];
1931 p_scalar = scalars[i];
1932 }
1933 if ((p_scalar != NULL) && (p != NULL))
1934 {
1935 /* reduce scalar to 0 <= scalar < 2^256 */
1936 if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar)))
1937 {
1938 /* this is an unusual input, and we don't guarantee
1939 * constant-timeness */
1940 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1941 {
1942 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1943 goto err;
1944 }
1945 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1946 }
1947 else
1948 num_bytes = BN_bn2bin(p_scalar, tmp);
1949 flip_endian(secrets[i], tmp, num_bytes);
1950 /* precompute multiples */
1951 if ((!BN_to_felem(x_out, &p->X)) ||
1952 (!BN_to_felem(y_out, &p->Y)) ||
1953 (!BN_to_felem(z_out, &p->Z))) goto err;
1954 felem_shrink(pre_comp[i][1][0], x_out);
1955 felem_shrink(pre_comp[i][1][1], y_out);
1956 felem_shrink(pre_comp[i][1][2], z_out);
1957 for (j = 2; j <= 16; ++j)
1958 {
1959 if (j & 1)
1960 {
1961 point_add_small(
1962 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1963 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1964 pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1965 }
1966 else
1967 {
1968 point_double_small(
1969 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1970 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1971 }
1972 }
1973 }
1974 }
1975 if (mixed)
1976 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
1977 }
1978
1979 /* the scalar for the generator */
1980 if ((scalar != NULL) && (have_pre_comp))
1981 {
1982 memset(g_secret, 0, sizeof(g_secret));
1983 /* reduce scalar to 0 <= scalar < 2^256 */
1984 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar)))
1985 {
1986 /* this is an unusual input, and we don't guarantee
1987 * constant-timeness */
1988 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1989 {
1990 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
1991 goto err;
1992 }
1993 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1994 }
1995 else
1996 num_bytes = BN_bn2bin(scalar, tmp);
1997 flip_endian(g_secret, tmp, num_bytes);
1998 /* do the multiplication with generator precomputation*/
1999 batch_mul(x_out, y_out, z_out,
2000 (const felem_bytearray (*)) secrets, num_points,
2001 g_secret,
2002 mixed, (const smallfelem (*)[17][3]) pre_comp,
2003 g_pre_comp);
2004 }
2005 else
2006 /* do the multiplication without generator precomputation */
2007 batch_mul(x_out, y_out, z_out,
2008 (const felem_bytearray (*)) secrets, num_points,
2009 NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL);
2010 /* reduce the output to its unique minimal representation */
2011 felem_contract(x_in, x_out);
2012 felem_contract(y_in, y_out);
2013 felem_contract(z_in, z_out);
2014 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2015 (!smallfelem_to_BN(z, z_in)))
2016 {
2017 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2018 goto err;
2019 }
2020 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2021
2022err:
2023 BN_CTX_end(ctx);
2024 if (generator != NULL)
2025 EC_POINT_free(generator);
2026 if (new_ctx != NULL)
2027 BN_CTX_free(new_ctx);
2028 if (secrets != NULL)
2029 OPENSSL_free(secrets);
2030 if (pre_comp != NULL)
2031 OPENSSL_free(pre_comp);
2032 if (tmp_smallfelems != NULL)
2033 OPENSSL_free(tmp_smallfelems);
2034 return ret;
2035 }
2036
2037int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2038 {
2039 int ret = 0;
2040 NISTP256_PRE_COMP *pre = NULL;
2041 int i, j;
2042 BN_CTX *new_ctx = NULL;
2043 BIGNUM *x, *y;
2044 EC_POINT *generator = NULL;
2045 smallfelem tmp_smallfelems[32];
2046 felem x_tmp, y_tmp, z_tmp;
2047
2048 /* throw away old precomputation */
2049 EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup,
2050 nistp256_pre_comp_free, nistp256_pre_comp_clear_free);
2051 if (ctx == NULL)
2052 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
2053 BN_CTX_start(ctx);
2054 if (((x = BN_CTX_get(ctx)) == NULL) ||
2055 ((y = BN_CTX_get(ctx)) == NULL))
2056 goto err;
2057 /* get the generator */
2058 if (group->generator == NULL) goto err;
2059 generator = EC_POINT_new(group);
2060 if (generator == NULL)
2061 goto err;
2062 BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x);
2063 BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y);
2064 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2065 goto err;
2066 if ((pre = nistp256_pre_comp_new()) == NULL)
2067 goto err;
2068 /* if the generator is the standard one, use built-in precomputation */
2069 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2070 {
2071 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2072 ret = 1;
2073 goto err;
2074 }
2075 if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
2076 (!BN_to_felem(y_tmp, &group->generator->Y)) ||
2077 (!BN_to_felem(z_tmp, &group->generator->Z)))
2078 goto err;
2079 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2080 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2081 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2082 /* compute 2^64*G, 2^128*G, 2^192*G for the first table,
2083 * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one
2084 */
2085 for (i = 1; i <= 8; i <<= 1)
2086 {
2087 point_double_small(
2088 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2089 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
2090 for (j = 0; j < 31; ++j)
2091 {
2092 point_double_small(
2093 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
2094 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2095 }
2096 if (i == 8)
2097 break;
2098 point_double_small(
2099 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2100 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
2101 for (j = 0; j < 31; ++j)
2102 {
2103 point_double_small(
2104 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2],
2105 pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]);
2106 }
2107 }
2108 for (i = 0; i < 2; i++)
2109 {
2110 /* g_pre_comp[i][0] is the point at infinity */
2111 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2112 /* the remaining multiples */
2113 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2114 point_add_small(
2115 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2],
2116 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2117 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2118 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2119 point_add_small(
2120 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2],
2121 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2122 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2123 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2124 point_add_small(
2125 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2126 pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2127 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]);
2128 /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */
2129 point_add_small(
2130 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2],
2131 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2132 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]);
2133 for (j = 1; j < 8; ++j)
2134 {
2135 /* odd multiples: add G resp. 2^32*G */
2136 point_add_small(
2137 pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2],
2138 pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2],
2139 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]);
2140 }
2141 }
2142 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2143
2144 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
2145 nistp256_pre_comp_free, nistp256_pre_comp_clear_free))
2146 goto err;
2147 ret = 1;
2148 pre = NULL;
2149 err:
2150 BN_CTX_end(ctx);
2151 if (generator != NULL)
2152 EC_POINT_free(generator);
2153 if (new_ctx != NULL)
2154 BN_CTX_free(new_ctx);
2155 if (pre)
2156 nistp256_pre_comp_free(pre);
2157 return ret;
2158 }
2159
2160int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2161 {
2162 if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup,
2163 nistp256_pre_comp_free, nistp256_pre_comp_clear_free)
2164 != NULL)
2165 return 1;
2166 else
2167 return 0;
2168 }
2169#else
2170static void *dummy=&dummy;
2171#endif
diff --git a/src/lib/libssl/src/crypto/ec/ecp_nistp521.c b/src/lib/libssl/src/crypto/ec/ecp_nistp521.c
new file mode 100644
index 0000000000..178b655f7f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ecp_nistp521.c
@@ -0,0 +1,2025 @@
1/* crypto/ec/ecp_nistp521.c */
2/*
3 * Written by Adam Langley (Google) for the OpenSSL project
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/*
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23 *
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
27 */
28
29#include <openssl/opensslconf.h>
30#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31
32#ifndef OPENSSL_SYS_VMS
33#include <stdint.h>
34#else
35#include <inttypes.h>
36#endif
37
38#include <string.h>
39#include <openssl/err.h>
40#include "ec_lcl.h"
41
42#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43 /* even with gcc, the typedef won't work for 32-bit platforms */
44 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
45#else
46 #error "Need GCC 3.1 or later to define type uint128_t"
47#endif
48
49typedef uint8_t u8;
50typedef uint64_t u64;
51typedef int64_t s64;
52
53/* The underlying field.
54 *
55 * P521 operates over GF(2^521-1). We can serialise an element of this field
56 * into 66 bytes where the most significant byte contains only a single bit. We
57 * call this an felem_bytearray. */
58
59typedef u8 felem_bytearray[66];
60
61/* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
62 * These values are big-endian. */
63static const felem_bytearray nistp521_curve_params[5] =
64 {
65 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff},
74 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82 0xff, 0xfc},
83 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
84 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
85 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
86 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
87 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
88 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
89 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
90 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
91 0x3f, 0x00},
92 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
93 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
94 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
95 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
96 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
97 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
98 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
99 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
100 0xbd, 0x66},
101 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
102 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
103 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
104 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
105 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
106 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
107 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
108 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 0x66, 0x50}
110 };
111
112/* The representation of field elements.
113 * ------------------------------------
114 *
115 * We represent field elements with nine values. These values are either 64 or
116 * 128 bits and the field element represented is:
117 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
118 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
119 * 58 bits apart, but are greater than 58 bits in length, the most significant
120 * bits of each limb overlap with the least significant bits of the next.
121 *
122 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
123 * 'largefelem' */
124
125#define NLIMBS 9
126
127typedef uint64_t limb;
128typedef limb felem[NLIMBS];
129typedef uint128_t largefelem[NLIMBS];
130
131static const limb bottom57bits = 0x1ffffffffffffff;
132static const limb bottom58bits = 0x3ffffffffffffff;
133
134/* bin66_to_felem takes a little-endian byte array and converts it into felem
135 * form. This assumes that the CPU is little-endian. */
136static void bin66_to_felem(felem out, const u8 in[66])
137 {
138 out[0] = (*((limb*) &in[0])) & bottom58bits;
139 out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits;
140 out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits;
141 out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits;
142 out[4] = (*((limb*) &in[29])) & bottom58bits;
143 out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits;
144 out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits;
145 out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits;
146 out[8] = (*((limb*) &in[58])) & bottom57bits;
147 }
148
149/* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
150 * array. This assumes that the CPU is little-endian. */
151static void felem_to_bin66(u8 out[66], const felem in)
152 {
153 memset(out, 0, 66);
154 (*((limb*) &out[0])) = in[0];
155 (*((limb*) &out[7])) |= in[1] << 2;
156 (*((limb*) &out[14])) |= in[2] << 4;
157 (*((limb*) &out[21])) |= in[3] << 6;
158 (*((limb*) &out[29])) = in[4];
159 (*((limb*) &out[36])) |= in[5] << 2;
160 (*((limb*) &out[43])) |= in[6] << 4;
161 (*((limb*) &out[50])) |= in[7] << 6;
162 (*((limb*) &out[58])) = in[8];
163 }
164
165/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
166static void flip_endian(u8 *out, const u8 *in, unsigned len)
167 {
168 unsigned i;
169 for (i = 0; i < len; ++i)
170 out[i] = in[len-1-i];
171 }
172
173/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
174static int BN_to_felem(felem out, const BIGNUM *bn)
175 {
176 felem_bytearray b_in;
177 felem_bytearray b_out;
178 unsigned num_bytes;
179
180 /* BN_bn2bin eats leading zeroes */
181 memset(b_out, 0, sizeof b_out);
182 num_bytes = BN_num_bytes(bn);
183 if (num_bytes > sizeof b_out)
184 {
185 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
186 return 0;
187 }
188 if (BN_is_negative(bn))
189 {
190 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
191 return 0;
192 }
193 num_bytes = BN_bn2bin(bn, b_in);
194 flip_endian(b_out, b_in, num_bytes);
195 bin66_to_felem(out, b_out);
196 return 1;
197 }
198
199/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
200static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
201 {
202 felem_bytearray b_in, b_out;
203 felem_to_bin66(b_in, in);
204 flip_endian(b_out, b_in, sizeof b_out);
205 return BN_bin2bn(b_out, sizeof b_out, out);
206 }
207
208
209/* Field operations
210 * ---------------- */
211
212static void felem_one(felem out)
213 {
214 out[0] = 1;
215 out[1] = 0;
216 out[2] = 0;
217 out[3] = 0;
218 out[4] = 0;
219 out[5] = 0;
220 out[6] = 0;
221 out[7] = 0;
222 out[8] = 0;
223 }
224
225static void felem_assign(felem out, const felem in)
226 {
227 out[0] = in[0];
228 out[1] = in[1];
229 out[2] = in[2];
230 out[3] = in[3];
231 out[4] = in[4];
232 out[5] = in[5];
233 out[6] = in[6];
234 out[7] = in[7];
235 out[8] = in[8];
236 }
237
238/* felem_sum64 sets out = out + in. */
239static void felem_sum64(felem out, const felem in)
240 {
241 out[0] += in[0];
242 out[1] += in[1];
243 out[2] += in[2];
244 out[3] += in[3];
245 out[4] += in[4];
246 out[5] += in[5];
247 out[6] += in[6];
248 out[7] += in[7];
249 out[8] += in[8];
250 }
251
252/* felem_scalar sets out = in * scalar */
253static void felem_scalar(felem out, const felem in, limb scalar)
254 {
255 out[0] = in[0] * scalar;
256 out[1] = in[1] * scalar;
257 out[2] = in[2] * scalar;
258 out[3] = in[3] * scalar;
259 out[4] = in[4] * scalar;
260 out[5] = in[5] * scalar;
261 out[6] = in[6] * scalar;
262 out[7] = in[7] * scalar;
263 out[8] = in[8] * scalar;
264 }
265
266/* felem_scalar64 sets out = out * scalar */
267static void felem_scalar64(felem out, limb scalar)
268 {
269 out[0] *= scalar;
270 out[1] *= scalar;
271 out[2] *= scalar;
272 out[3] *= scalar;
273 out[4] *= scalar;
274 out[5] *= scalar;
275 out[6] *= scalar;
276 out[7] *= scalar;
277 out[8] *= scalar;
278 }
279
280/* felem_scalar128 sets out = out * scalar */
281static void felem_scalar128(largefelem out, limb scalar)
282 {
283 out[0] *= scalar;
284 out[1] *= scalar;
285 out[2] *= scalar;
286 out[3] *= scalar;
287 out[4] *= scalar;
288 out[5] *= scalar;
289 out[6] *= scalar;
290 out[7] *= scalar;
291 out[8] *= scalar;
292 }
293
294/* felem_neg sets |out| to |-in|
295 * On entry:
296 * in[i] < 2^59 + 2^14
297 * On exit:
298 * out[i] < 2^62
299 */
300static void felem_neg(felem out, const felem in)
301 {
302 /* In order to prevent underflow, we subtract from 0 mod p. */
303 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
304 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
305
306 out[0] = two62m3 - in[0];
307 out[1] = two62m2 - in[1];
308 out[2] = two62m2 - in[2];
309 out[3] = two62m2 - in[3];
310 out[4] = two62m2 - in[4];
311 out[5] = two62m2 - in[5];
312 out[6] = two62m2 - in[6];
313 out[7] = two62m2 - in[7];
314 out[8] = two62m2 - in[8];
315 }
316
317/* felem_diff64 subtracts |in| from |out|
318 * On entry:
319 * in[i] < 2^59 + 2^14
320 * On exit:
321 * out[i] < out[i] + 2^62
322 */
323static void felem_diff64(felem out, const felem in)
324 {
325 /* In order to prevent underflow, we add 0 mod p before subtracting. */
326 static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5);
327 static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4);
328
329 out[0] += two62m3 - in[0];
330 out[1] += two62m2 - in[1];
331 out[2] += two62m2 - in[2];
332 out[3] += two62m2 - in[3];
333 out[4] += two62m2 - in[4];
334 out[5] += two62m2 - in[5];
335 out[6] += two62m2 - in[6];
336 out[7] += two62m2 - in[7];
337 out[8] += two62m2 - in[8];
338 }
339
340/* felem_diff_128_64 subtracts |in| from |out|
341 * On entry:
342 * in[i] < 2^62 + 2^17
343 * On exit:
344 * out[i] < out[i] + 2^63
345 */
346static void felem_diff_128_64(largefelem out, const felem in)
347 {
348 /* In order to prevent underflow, we add 0 mod p before subtracting. */
349 static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5);
350 static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4);
351
352 out[0] += two63m6 - in[0];
353 out[1] += two63m5 - in[1];
354 out[2] += two63m5 - in[2];
355 out[3] += two63m5 - in[3];
356 out[4] += two63m5 - in[4];
357 out[5] += two63m5 - in[5];
358 out[6] += two63m5 - in[6];
359 out[7] += two63m5 - in[7];
360 out[8] += two63m5 - in[8];
361 }
362
363/* felem_diff_128_64 subtracts |in| from |out|
364 * On entry:
365 * in[i] < 2^126
366 * On exit:
367 * out[i] < out[i] + 2^127 - 2^69
368 */
369static void felem_diff128(largefelem out, const largefelem in)
370 {
371 /* In order to prevent underflow, we add 0 mod p before subtracting. */
372 static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70);
373 static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69);
374
375 out[0] += (two127m70 - in[0]);
376 out[1] += (two127m69 - in[1]);
377 out[2] += (two127m69 - in[2]);
378 out[3] += (two127m69 - in[3]);
379 out[4] += (two127m69 - in[4]);
380 out[5] += (two127m69 - in[5]);
381 out[6] += (two127m69 - in[6]);
382 out[7] += (two127m69 - in[7]);
383 out[8] += (two127m69 - in[8]);
384 }
385
386/* felem_square sets |out| = |in|^2
387 * On entry:
388 * in[i] < 2^62
389 * On exit:
390 * out[i] < 17 * max(in[i]) * max(in[i])
391 */
392static void felem_square(largefelem out, const felem in)
393 {
394 felem inx2, inx4;
395 felem_scalar(inx2, in, 2);
396 felem_scalar(inx4, in, 4);
397
398 /* We have many cases were we want to do
399 * in[x] * in[y] +
400 * in[y] * in[x]
401 * This is obviously just
402 * 2 * in[x] * in[y]
403 * However, rather than do the doubling on the 128 bit result, we
404 * double one of the inputs to the multiplication by reading from
405 * |inx2| */
406
407 out[0] = ((uint128_t) in[0]) * in[0];
408 out[1] = ((uint128_t) in[0]) * inx2[1];
409 out[2] = ((uint128_t) in[0]) * inx2[2] +
410 ((uint128_t) in[1]) * in[1];
411 out[3] = ((uint128_t) in[0]) * inx2[3] +
412 ((uint128_t) in[1]) * inx2[2];
413 out[4] = ((uint128_t) in[0]) * inx2[4] +
414 ((uint128_t) in[1]) * inx2[3] +
415 ((uint128_t) in[2]) * in[2];
416 out[5] = ((uint128_t) in[0]) * inx2[5] +
417 ((uint128_t) in[1]) * inx2[4] +
418 ((uint128_t) in[2]) * inx2[3];
419 out[6] = ((uint128_t) in[0]) * inx2[6] +
420 ((uint128_t) in[1]) * inx2[5] +
421 ((uint128_t) in[2]) * inx2[4] +
422 ((uint128_t) in[3]) * in[3];
423 out[7] = ((uint128_t) in[0]) * inx2[7] +
424 ((uint128_t) in[1]) * inx2[6] +
425 ((uint128_t) in[2]) * inx2[5] +
426 ((uint128_t) in[3]) * inx2[4];
427 out[8] = ((uint128_t) in[0]) * inx2[8] +
428 ((uint128_t) in[1]) * inx2[7] +
429 ((uint128_t) in[2]) * inx2[6] +
430 ((uint128_t) in[3]) * inx2[5] +
431 ((uint128_t) in[4]) * in[4];
432
433 /* The remaining limbs fall above 2^521, with the first falling at
434 * 2^522. They correspond to locations one bit up from the limbs
435 * produced above so we would have to multiply by two to align them.
436 * Again, rather than operate on the 128-bit result, we double one of
437 * the inputs to the multiplication. If we want to double for both this
438 * reason, and the reason above, then we end up multiplying by four. */
439
440 /* 9 */
441 out[0] += ((uint128_t) in[1]) * inx4[8] +
442 ((uint128_t) in[2]) * inx4[7] +
443 ((uint128_t) in[3]) * inx4[6] +
444 ((uint128_t) in[4]) * inx4[5];
445
446 /* 10 */
447 out[1] += ((uint128_t) in[2]) * inx4[8] +
448 ((uint128_t) in[3]) * inx4[7] +
449 ((uint128_t) in[4]) * inx4[6] +
450 ((uint128_t) in[5]) * inx2[5];
451
452 /* 11 */
453 out[2] += ((uint128_t) in[3]) * inx4[8] +
454 ((uint128_t) in[4]) * inx4[7] +
455 ((uint128_t) in[5]) * inx4[6];
456
457 /* 12 */
458 out[3] += ((uint128_t) in[4]) * inx4[8] +
459 ((uint128_t) in[5]) * inx4[7] +
460 ((uint128_t) in[6]) * inx2[6];
461
462 /* 13 */
463 out[4] += ((uint128_t) in[5]) * inx4[8] +
464 ((uint128_t) in[6]) * inx4[7];
465
466 /* 14 */
467 out[5] += ((uint128_t) in[6]) * inx4[8] +
468 ((uint128_t) in[7]) * inx2[7];
469
470 /* 15 */
471 out[6] += ((uint128_t) in[7]) * inx4[8];
472
473 /* 16 */
474 out[7] += ((uint128_t) in[8]) * inx2[8];
475 }
476
477/* felem_mul sets |out| = |in1| * |in2|
478 * On entry:
479 * in1[i] < 2^64
480 * in2[i] < 2^63
481 * On exit:
482 * out[i] < 17 * max(in1[i]) * max(in2[i])
483 */
484static void felem_mul(largefelem out, const felem in1, const felem in2)
485 {
486 felem in2x2;
487 felem_scalar(in2x2, in2, 2);
488
489 out[0] = ((uint128_t) in1[0]) * in2[0];
490
491 out[1] = ((uint128_t) in1[0]) * in2[1] +
492 ((uint128_t) in1[1]) * in2[0];
493
494 out[2] = ((uint128_t) in1[0]) * in2[2] +
495 ((uint128_t) in1[1]) * in2[1] +
496 ((uint128_t) in1[2]) * in2[0];
497
498 out[3] = ((uint128_t) in1[0]) * in2[3] +
499 ((uint128_t) in1[1]) * in2[2] +
500 ((uint128_t) in1[2]) * in2[1] +
501 ((uint128_t) in1[3]) * in2[0];
502
503 out[4] = ((uint128_t) in1[0]) * in2[4] +
504 ((uint128_t) in1[1]) * in2[3] +
505 ((uint128_t) in1[2]) * in2[2] +
506 ((uint128_t) in1[3]) * in2[1] +
507 ((uint128_t) in1[4]) * in2[0];
508
509 out[5] = ((uint128_t) in1[0]) * in2[5] +
510 ((uint128_t) in1[1]) * in2[4] +
511 ((uint128_t) in1[2]) * in2[3] +
512 ((uint128_t) in1[3]) * in2[2] +
513 ((uint128_t) in1[4]) * in2[1] +
514 ((uint128_t) in1[5]) * in2[0];
515
516 out[6] = ((uint128_t) in1[0]) * in2[6] +
517 ((uint128_t) in1[1]) * in2[5] +
518 ((uint128_t) in1[2]) * in2[4] +
519 ((uint128_t) in1[3]) * in2[3] +
520 ((uint128_t) in1[4]) * in2[2] +
521 ((uint128_t) in1[5]) * in2[1] +
522 ((uint128_t) in1[6]) * in2[0];
523
524 out[7] = ((uint128_t) in1[0]) * in2[7] +
525 ((uint128_t) in1[1]) * in2[6] +
526 ((uint128_t) in1[2]) * in2[5] +
527 ((uint128_t) in1[3]) * in2[4] +
528 ((uint128_t) in1[4]) * in2[3] +
529 ((uint128_t) in1[5]) * in2[2] +
530 ((uint128_t) in1[6]) * in2[1] +
531 ((uint128_t) in1[7]) * in2[0];
532
533 out[8] = ((uint128_t) in1[0]) * in2[8] +
534 ((uint128_t) in1[1]) * in2[7] +
535 ((uint128_t) in1[2]) * in2[6] +
536 ((uint128_t) in1[3]) * in2[5] +
537 ((uint128_t) in1[4]) * in2[4] +
538 ((uint128_t) in1[5]) * in2[3] +
539 ((uint128_t) in1[6]) * in2[2] +
540 ((uint128_t) in1[7]) * in2[1] +
541 ((uint128_t) in1[8]) * in2[0];
542
543 /* See comment in felem_square about the use of in2x2 here */
544
545 out[0] += ((uint128_t) in1[1]) * in2x2[8] +
546 ((uint128_t) in1[2]) * in2x2[7] +
547 ((uint128_t) in1[3]) * in2x2[6] +
548 ((uint128_t) in1[4]) * in2x2[5] +
549 ((uint128_t) in1[5]) * in2x2[4] +
550 ((uint128_t) in1[6]) * in2x2[3] +
551 ((uint128_t) in1[7]) * in2x2[2] +
552 ((uint128_t) in1[8]) * in2x2[1];
553
554 out[1] += ((uint128_t) in1[2]) * in2x2[8] +
555 ((uint128_t) in1[3]) * in2x2[7] +
556 ((uint128_t) in1[4]) * in2x2[6] +
557 ((uint128_t) in1[5]) * in2x2[5] +
558 ((uint128_t) in1[6]) * in2x2[4] +
559 ((uint128_t) in1[7]) * in2x2[3] +
560 ((uint128_t) in1[8]) * in2x2[2];
561
562 out[2] += ((uint128_t) in1[3]) * in2x2[8] +
563 ((uint128_t) in1[4]) * in2x2[7] +
564 ((uint128_t) in1[5]) * in2x2[6] +
565 ((uint128_t) in1[6]) * in2x2[5] +
566 ((uint128_t) in1[7]) * in2x2[4] +
567 ((uint128_t) in1[8]) * in2x2[3];
568
569 out[3] += ((uint128_t) in1[4]) * in2x2[8] +
570 ((uint128_t) in1[5]) * in2x2[7] +
571 ((uint128_t) in1[6]) * in2x2[6] +
572 ((uint128_t) in1[7]) * in2x2[5] +
573 ((uint128_t) in1[8]) * in2x2[4];
574
575 out[4] += ((uint128_t) in1[5]) * in2x2[8] +
576 ((uint128_t) in1[6]) * in2x2[7] +
577 ((uint128_t) in1[7]) * in2x2[6] +
578 ((uint128_t) in1[8]) * in2x2[5];
579
580 out[5] += ((uint128_t) in1[6]) * in2x2[8] +
581 ((uint128_t) in1[7]) * in2x2[7] +
582 ((uint128_t) in1[8]) * in2x2[6];
583
584 out[6] += ((uint128_t) in1[7]) * in2x2[8] +
585 ((uint128_t) in1[8]) * in2x2[7];
586
587 out[7] += ((uint128_t) in1[8]) * in2x2[8];
588 }
589
590static const limb bottom52bits = 0xfffffffffffff;
591
592/* felem_reduce converts a largefelem to an felem.
593 * On entry:
594 * in[i] < 2^128
595 * On exit:
596 * out[i] < 2^59 + 2^14
597 */
598static void felem_reduce(felem out, const largefelem in)
599 {
600 u64 overflow1, overflow2;
601
602 out[0] = ((limb) in[0]) & bottom58bits;
603 out[1] = ((limb) in[1]) & bottom58bits;
604 out[2] = ((limb) in[2]) & bottom58bits;
605 out[3] = ((limb) in[3]) & bottom58bits;
606 out[4] = ((limb) in[4]) & bottom58bits;
607 out[5] = ((limb) in[5]) & bottom58bits;
608 out[6] = ((limb) in[6]) & bottom58bits;
609 out[7] = ((limb) in[7]) & bottom58bits;
610 out[8] = ((limb) in[8]) & bottom58bits;
611
612 /* out[i] < 2^58 */
613
614 out[1] += ((limb) in[0]) >> 58;
615 out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
616 /* out[1] < 2^58 + 2^6 + 2^58
617 * = 2^59 + 2^6 */
618 out[2] += ((limb) (in[0] >> 64)) >> 52;
619
620 out[2] += ((limb) in[1]) >> 58;
621 out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
622 out[3] += ((limb) (in[1] >> 64)) >> 52;
623
624 out[3] += ((limb) in[2]) >> 58;
625 out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
626 out[4] += ((limb) (in[2] >> 64)) >> 52;
627
628 out[4] += ((limb) in[3]) >> 58;
629 out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
630 out[5] += ((limb) (in[3] >> 64)) >> 52;
631
632 out[5] += ((limb) in[4]) >> 58;
633 out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
634 out[6] += ((limb) (in[4] >> 64)) >> 52;
635
636 out[6] += ((limb) in[5]) >> 58;
637 out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
638 out[7] += ((limb) (in[5] >> 64)) >> 52;
639
640 out[7] += ((limb) in[6]) >> 58;
641 out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
642 out[8] += ((limb) (in[6] >> 64)) >> 52;
643
644 out[8] += ((limb) in[7]) >> 58;
645 out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
646 /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
647 * < 2^59 + 2^13 */
648 overflow1 = ((limb) (in[7] >> 64)) >> 52;
649
650 overflow1 += ((limb) in[8]) >> 58;
651 overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
652 overflow2 = ((limb) (in[8] >> 64)) >> 52;
653
654 overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
655 overflow2 <<= 1; /* overflow2 < 2^13 */
656
657 out[0] += overflow1; /* out[0] < 2^60 */
658 out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */
659
660 out[1] += out[0] >> 58; out[0] &= bottom58bits;
661 /* out[0] < 2^58
662 * out[1] < 2^59 + 2^6 + 2^13 + 2^2
663 * < 2^59 + 2^14 */
664 }
665
666static void felem_square_reduce(felem out, const felem in)
667 {
668 largefelem tmp;
669 felem_square(tmp, in);
670 felem_reduce(out, tmp);
671 }
672
673static void felem_mul_reduce(felem out, const felem in1, const felem in2)
674 {
675 largefelem tmp;
676 felem_mul(tmp, in1, in2);
677 felem_reduce(out, tmp);
678 }
679
680/* felem_inv calculates |out| = |in|^{-1}
681 *
682 * Based on Fermat's Little Theorem:
683 * a^p = a (mod p)
684 * a^{p-1} = 1 (mod p)
685 * a^{p-2} = a^{-1} (mod p)
686 */
687static void felem_inv(felem out, const felem in)
688 {
689 felem ftmp, ftmp2, ftmp3, ftmp4;
690 largefelem tmp;
691 unsigned i;
692
693 felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */
694 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
695 felem_assign(ftmp2, ftmp);
696 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
697 felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */
698 felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */
699
700 felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */
701 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */
702 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */
703
704 felem_assign(ftmp2, ftmp3);
705 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */
706 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */
707 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */
708 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */
709 felem_assign(ftmp4, ftmp3);
710 felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */
711 felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */
712 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */
713 felem_assign(ftmp2, ftmp3);
714
715 for (i = 0; i < 8; i++)
716 {
717 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
718 }
719 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */
720 felem_assign(ftmp2, ftmp3);
721
722 for (i = 0; i < 16; i++)
723 {
724 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
725 }
726 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */
727 felem_assign(ftmp2, ftmp3);
728
729 for (i = 0; i < 32; i++)
730 {
731 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
732 }
733 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */
734 felem_assign(ftmp2, ftmp3);
735
736 for (i = 0; i < 64; i++)
737 {
738 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
739 }
740 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */
741 felem_assign(ftmp2, ftmp3);
742
743 for (i = 0; i < 128; i++)
744 {
745 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
746 }
747 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */
748 felem_assign(ftmp2, ftmp3);
749
750 for (i = 0; i < 256; i++)
751 {
752 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
753 }
754 felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */
755
756 for (i = 0; i < 9; i++)
757 {
758 felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
759 }
760 felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */
761 felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */
762}
763
764/* This is 2^521-1, expressed as an felem */
765static const felem kPrime =
766 {
767 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
768 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
769 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
770 };
771
772/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
773 * otherwise.
774 * On entry:
775 * in[i] < 2^59 + 2^14
776 */
777static limb felem_is_zero(const felem in)
778 {
779 felem ftmp;
780 limb is_zero, is_p;
781 felem_assign(ftmp, in);
782
783 ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits;
784 /* ftmp[8] < 2^57 */
785 ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits;
786 ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits;
787 ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits;
788 ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits;
789 ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits;
790 ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits;
791 ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits;
792 ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits;
793 /* ftmp[8] < 2^57 + 4 */
794
795 /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
796 * greater than our bound for ftmp[8]. Therefore we only have to check
797 * if the zero is zero or 2^521-1. */
798
799 is_zero = 0;
800 is_zero |= ftmp[0];
801 is_zero |= ftmp[1];
802 is_zero |= ftmp[2];
803 is_zero |= ftmp[3];
804 is_zero |= ftmp[4];
805 is_zero |= ftmp[5];
806 is_zero |= ftmp[6];
807 is_zero |= ftmp[7];
808 is_zero |= ftmp[8];
809
810 is_zero--;
811 /* We know that ftmp[i] < 2^63, therefore the only way that the top bit
812 * can be set is if is_zero was 0 before the decrement. */
813 is_zero = ((s64) is_zero) >> 63;
814
815 is_p = ftmp[0] ^ kPrime[0];
816 is_p |= ftmp[1] ^ kPrime[1];
817 is_p |= ftmp[2] ^ kPrime[2];
818 is_p |= ftmp[3] ^ kPrime[3];
819 is_p |= ftmp[4] ^ kPrime[4];
820 is_p |= ftmp[5] ^ kPrime[5];
821 is_p |= ftmp[6] ^ kPrime[6];
822 is_p |= ftmp[7] ^ kPrime[7];
823 is_p |= ftmp[8] ^ kPrime[8];
824
825 is_p--;
826 is_p = ((s64) is_p) >> 63;
827
828 is_zero |= is_p;
829 return is_zero;
830 }
831
832static int felem_is_zero_int(const felem in)
833 {
834 return (int) (felem_is_zero(in) & ((limb)1));
835 }
836
837/* felem_contract converts |in| to its unique, minimal representation.
838 * On entry:
839 * in[i] < 2^59 + 2^14
840 */
841static void felem_contract(felem out, const felem in)
842 {
843 limb is_p, is_greater, sign;
844 static const limb two58 = ((limb)1) << 58;
845
846 felem_assign(out, in);
847
848 out[0] += out[8] >> 57; out[8] &= bottom57bits;
849 /* out[8] < 2^57 */
850 out[1] += out[0] >> 58; out[0] &= bottom58bits;
851 out[2] += out[1] >> 58; out[1] &= bottom58bits;
852 out[3] += out[2] >> 58; out[2] &= bottom58bits;
853 out[4] += out[3] >> 58; out[3] &= bottom58bits;
854 out[5] += out[4] >> 58; out[4] &= bottom58bits;
855 out[6] += out[5] >> 58; out[5] &= bottom58bits;
856 out[7] += out[6] >> 58; out[6] &= bottom58bits;
857 out[8] += out[7] >> 58; out[7] &= bottom58bits;
858 /* out[8] < 2^57 + 4 */
859
860 /* If the value is greater than 2^521-1 then we have to subtract
861 * 2^521-1 out. See the comments in felem_is_zero regarding why we
862 * don't test for other multiples of the prime. */
863
864 /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */
865
866 is_p = out[0] ^ kPrime[0];
867 is_p |= out[1] ^ kPrime[1];
868 is_p |= out[2] ^ kPrime[2];
869 is_p |= out[3] ^ kPrime[3];
870 is_p |= out[4] ^ kPrime[4];
871 is_p |= out[5] ^ kPrime[5];
872 is_p |= out[6] ^ kPrime[6];
873 is_p |= out[7] ^ kPrime[7];
874 is_p |= out[8] ^ kPrime[8];
875
876 is_p--;
877 is_p &= is_p << 32;
878 is_p &= is_p << 16;
879 is_p &= is_p << 8;
880 is_p &= is_p << 4;
881 is_p &= is_p << 2;
882 is_p &= is_p << 1;
883 is_p = ((s64) is_p) >> 63;
884 is_p = ~is_p;
885
886 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
887
888 out[0] &= is_p;
889 out[1] &= is_p;
890 out[2] &= is_p;
891 out[3] &= is_p;
892 out[4] &= is_p;
893 out[5] &= is_p;
894 out[6] &= is_p;
895 out[7] &= is_p;
896 out[8] &= is_p;
897
898 /* In order to test that |out| >= 2^521-1 we need only test if out[8]
899 * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */
900 is_greater = out[8] >> 57;
901 is_greater |= is_greater << 32;
902 is_greater |= is_greater << 16;
903 is_greater |= is_greater << 8;
904 is_greater |= is_greater << 4;
905 is_greater |= is_greater << 2;
906 is_greater |= is_greater << 1;
907 is_greater = ((s64) is_greater) >> 63;
908
909 out[0] -= kPrime[0] & is_greater;
910 out[1] -= kPrime[1] & is_greater;
911 out[2] -= kPrime[2] & is_greater;
912 out[3] -= kPrime[3] & is_greater;
913 out[4] -= kPrime[4] & is_greater;
914 out[5] -= kPrime[5] & is_greater;
915 out[6] -= kPrime[6] & is_greater;
916 out[7] -= kPrime[7] & is_greater;
917 out[8] -= kPrime[8] & is_greater;
918
919 /* Eliminate negative coefficients */
920 sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign);
921 sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign);
922 sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign);
923 sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign);
924 sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign);
925 sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
926 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
927 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
928 sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign);
929 sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign);
930 sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign);
931 }
932
933/* Group operations
934 * ----------------
935 *
936 * Building on top of the field operations we have the operations on the
937 * elliptic curve group itself. Points on the curve are represented in Jacobian
938 * coordinates */
939
940/* point_double calcuates 2*(x_in, y_in, z_in)
941 *
942 * The method is taken from:
943 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
944 *
945 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
946 * while x_out == y_in is not (maybe this works, but it's not tested). */
947static void
948point_double(felem x_out, felem y_out, felem z_out,
949 const felem x_in, const felem y_in, const felem z_in)
950 {
951 largefelem tmp, tmp2;
952 felem delta, gamma, beta, alpha, ftmp, ftmp2;
953
954 felem_assign(ftmp, x_in);
955 felem_assign(ftmp2, x_in);
956
957 /* delta = z^2 */
958 felem_square(tmp, z_in);
959 felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */
960
961 /* gamma = y^2 */
962 felem_square(tmp, y_in);
963 felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */
964
965 /* beta = x*gamma */
966 felem_mul(tmp, x_in, gamma);
967 felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */
968
969 /* alpha = 3*(x-delta)*(x+delta) */
970 felem_diff64(ftmp, delta);
971 /* ftmp[i] < 2^61 */
972 felem_sum64(ftmp2, delta);
973 /* ftmp2[i] < 2^60 + 2^15 */
974 felem_scalar64(ftmp2, 3);
975 /* ftmp2[i] < 3*2^60 + 3*2^15 */
976 felem_mul(tmp, ftmp, ftmp2);
977 /* tmp[i] < 17(3*2^121 + 3*2^76)
978 * = 61*2^121 + 61*2^76
979 * < 64*2^121 + 64*2^76
980 * = 2^127 + 2^82
981 * < 2^128 */
982 felem_reduce(alpha, tmp);
983
984 /* x' = alpha^2 - 8*beta */
985 felem_square(tmp, alpha);
986 /* tmp[i] < 17*2^120
987 * < 2^125 */
988 felem_assign(ftmp, beta);
989 felem_scalar64(ftmp, 8);
990 /* ftmp[i] < 2^62 + 2^17 */
991 felem_diff_128_64(tmp, ftmp);
992 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
993 felem_reduce(x_out, tmp);
994
995 /* z' = (y + z)^2 - gamma - delta */
996 felem_sum64(delta, gamma);
997 /* delta[i] < 2^60 + 2^15 */
998 felem_assign(ftmp, y_in);
999 felem_sum64(ftmp, z_in);
1000 /* ftmp[i] < 2^60 + 2^15 */
1001 felem_square(tmp, ftmp);
1002 /* tmp[i] < 17(2^122)
1003 * < 2^127 */
1004 felem_diff_128_64(tmp, delta);
1005 /* tmp[i] < 2^127 + 2^63 */
1006 felem_reduce(z_out, tmp);
1007
1008 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1009 felem_scalar64(beta, 4);
1010 /* beta[i] < 2^61 + 2^16 */
1011 felem_diff64(beta, x_out);
1012 /* beta[i] < 2^61 + 2^60 + 2^16 */
1013 felem_mul(tmp, alpha, beta);
1014 /* tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1015 * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1016 * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1017 * < 2^128 */
1018 felem_square(tmp2, gamma);
1019 /* tmp2[i] < 17*(2^59 + 2^14)^2
1020 * = 17*(2^118 + 2^74 + 2^28) */
1021 felem_scalar128(tmp2, 8);
1022 /* tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1023 * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1024 * < 2^126 */
1025 felem_diff128(tmp, tmp2);
1026 /* tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1027 * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1028 * 2^74 + 2^69 + 2^34 + 2^30
1029 * < 2^128 */
1030 felem_reduce(y_out, tmp);
1031 }
1032
1033/* copy_conditional copies in to out iff mask is all ones. */
1034static void
1035copy_conditional(felem out, const felem in, limb mask)
1036 {
1037 unsigned i;
1038 for (i = 0; i < NLIMBS; ++i)
1039 {
1040 const limb tmp = mask & (in[i] ^ out[i]);
1041 out[i] ^= tmp;
1042 }
1043 }
1044
1045/* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1046 *
1047 * The method is taken from
1048 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1049 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1050 *
1051 * This function includes a branch for checking whether the two input points
1052 * are equal (while not equal to the point at infinity). This case never
1053 * happens during single point multiplication, so there is no timing leak for
1054 * ECDH or ECDSA signing. */
1055static void point_add(felem x3, felem y3, felem z3,
1056 const felem x1, const felem y1, const felem z1,
1057 const int mixed, const felem x2, const felem y2, const felem z2)
1058 {
1059 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1060 largefelem tmp, tmp2;
1061 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1062
1063 z1_is_zero = felem_is_zero(z1);
1064 z2_is_zero = felem_is_zero(z2);
1065
1066 /* ftmp = z1z1 = z1**2 */
1067 felem_square(tmp, z1);
1068 felem_reduce(ftmp, tmp);
1069
1070 if (!mixed)
1071 {
1072 /* ftmp2 = z2z2 = z2**2 */
1073 felem_square(tmp, z2);
1074 felem_reduce(ftmp2, tmp);
1075
1076 /* u1 = ftmp3 = x1*z2z2 */
1077 felem_mul(tmp, x1, ftmp2);
1078 felem_reduce(ftmp3, tmp);
1079
1080 /* ftmp5 = z1 + z2 */
1081 felem_assign(ftmp5, z1);
1082 felem_sum64(ftmp5, z2);
1083 /* ftmp5[i] < 2^61 */
1084
1085 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1086 felem_square(tmp, ftmp5);
1087 /* tmp[i] < 17*2^122 */
1088 felem_diff_128_64(tmp, ftmp);
1089 /* tmp[i] < 17*2^122 + 2^63 */
1090 felem_diff_128_64(tmp, ftmp2);
1091 /* tmp[i] < 17*2^122 + 2^64 */
1092 felem_reduce(ftmp5, tmp);
1093
1094 /* ftmp2 = z2 * z2z2 */
1095 felem_mul(tmp, ftmp2, z2);
1096 felem_reduce(ftmp2, tmp);
1097
1098 /* s1 = ftmp6 = y1 * z2**3 */
1099 felem_mul(tmp, y1, ftmp2);
1100 felem_reduce(ftmp6, tmp);
1101 }
1102 else
1103 {
1104 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1105
1106 /* u1 = ftmp3 = x1*z2z2 */
1107 felem_assign(ftmp3, x1);
1108
1109 /* ftmp5 = 2*z1z2 */
1110 felem_scalar(ftmp5, z1, 2);
1111
1112 /* s1 = ftmp6 = y1 * z2**3 */
1113 felem_assign(ftmp6, y1);
1114 }
1115
1116 /* u2 = x2*z1z1 */
1117 felem_mul(tmp, x2, ftmp);
1118 /* tmp[i] < 17*2^120 */
1119
1120 /* h = ftmp4 = u2 - u1 */
1121 felem_diff_128_64(tmp, ftmp3);
1122 /* tmp[i] < 17*2^120 + 2^63 */
1123 felem_reduce(ftmp4, tmp);
1124
1125 x_equal = felem_is_zero(ftmp4);
1126
1127 /* z_out = ftmp5 * h */
1128 felem_mul(tmp, ftmp5, ftmp4);
1129 felem_reduce(z_out, tmp);
1130
1131 /* ftmp = z1 * z1z1 */
1132 felem_mul(tmp, ftmp, z1);
1133 felem_reduce(ftmp, tmp);
1134
1135 /* s2 = tmp = y2 * z1**3 */
1136 felem_mul(tmp, y2, ftmp);
1137 /* tmp[i] < 17*2^120 */
1138
1139 /* r = ftmp5 = (s2 - s1)*2 */
1140 felem_diff_128_64(tmp, ftmp6);
1141 /* tmp[i] < 17*2^120 + 2^63 */
1142 felem_reduce(ftmp5, tmp);
1143 y_equal = felem_is_zero(ftmp5);
1144 felem_scalar64(ftmp5, 2);
1145 /* ftmp5[i] < 2^61 */
1146
1147 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero)
1148 {
1149 point_double(x3, y3, z3, x1, y1, z1);
1150 return;
1151 }
1152
1153 /* I = ftmp = (2h)**2 */
1154 felem_assign(ftmp, ftmp4);
1155 felem_scalar64(ftmp, 2);
1156 /* ftmp[i] < 2^61 */
1157 felem_square(tmp, ftmp);
1158 /* tmp[i] < 17*2^122 */
1159 felem_reduce(ftmp, tmp);
1160
1161 /* J = ftmp2 = h * I */
1162 felem_mul(tmp, ftmp4, ftmp);
1163 felem_reduce(ftmp2, tmp);
1164
1165 /* V = ftmp4 = U1 * I */
1166 felem_mul(tmp, ftmp3, ftmp);
1167 felem_reduce(ftmp4, tmp);
1168
1169 /* x_out = r**2 - J - 2V */
1170 felem_square(tmp, ftmp5);
1171 /* tmp[i] < 17*2^122 */
1172 felem_diff_128_64(tmp, ftmp2);
1173 /* tmp[i] < 17*2^122 + 2^63 */
1174 felem_assign(ftmp3, ftmp4);
1175 felem_scalar64(ftmp4, 2);
1176 /* ftmp4[i] < 2^61 */
1177 felem_diff_128_64(tmp, ftmp4);
1178 /* tmp[i] < 17*2^122 + 2^64 */
1179 felem_reduce(x_out, tmp);
1180
1181 /* y_out = r(V-x_out) - 2 * s1 * J */
1182 felem_diff64(ftmp3, x_out);
1183 /* ftmp3[i] < 2^60 + 2^60
1184 * = 2^61 */
1185 felem_mul(tmp, ftmp5, ftmp3);
1186 /* tmp[i] < 17*2^122 */
1187 felem_mul(tmp2, ftmp6, ftmp2);
1188 /* tmp2[i] < 17*2^120 */
1189 felem_scalar128(tmp2, 2);
1190 /* tmp2[i] < 17*2^121 */
1191 felem_diff128(tmp, tmp2);
1192 /* tmp[i] < 2^127 - 2^69 + 17*2^122
1193 * = 2^126 - 2^122 - 2^6 - 2^2 - 1
1194 * < 2^127 */
1195 felem_reduce(y_out, tmp);
1196
1197 copy_conditional(x_out, x2, z1_is_zero);
1198 copy_conditional(x_out, x1, z2_is_zero);
1199 copy_conditional(y_out, y2, z1_is_zero);
1200 copy_conditional(y_out, y1, z2_is_zero);
1201 copy_conditional(z_out, z2, z1_is_zero);
1202 copy_conditional(z_out, z1, z2_is_zero);
1203 felem_assign(x3, x_out);
1204 felem_assign(y3, y_out);
1205 felem_assign(z3, z_out);
1206 }
1207
1208/* Base point pre computation
1209 * --------------------------
1210 *
1211 * Two different sorts of precomputed tables are used in the following code.
1212 * Each contain various points on the curve, where each point is three field
1213 * elements (x, y, z).
1214 *
1215 * For the base point table, z is usually 1 (0 for the point at infinity).
1216 * This table has 16 elements:
1217 * index | bits | point
1218 * ------+---------+------------------------------
1219 * 0 | 0 0 0 0 | 0G
1220 * 1 | 0 0 0 1 | 1G
1221 * 2 | 0 0 1 0 | 2^130G
1222 * 3 | 0 0 1 1 | (2^130 + 1)G
1223 * 4 | 0 1 0 0 | 2^260G
1224 * 5 | 0 1 0 1 | (2^260 + 1)G
1225 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1226 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1227 * 8 | 1 0 0 0 | 2^390G
1228 * 9 | 1 0 0 1 | (2^390 + 1)G
1229 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1230 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1231 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1232 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1233 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1234 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1235 *
1236 * The reason for this is so that we can clock bits into four different
1237 * locations when doing simple scalar multiplies against the base point.
1238 *
1239 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1240
1241/* gmul is the table of precomputed base points */
1242static const felem gmul[16][3] =
1243 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1244 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1245 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1246 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1247 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1248 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1249 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1250 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1251 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1252 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1253 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1254 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1255 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1256 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1257 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1258 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1259 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1260 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1261 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1262 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1263 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1264 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1265 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1266 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1267 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1268 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1269 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1270 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1271 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1272 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1273 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1274 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1275 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1276 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1277 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1278 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1279 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1280 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1281 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1282 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1283 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1284 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1285 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1286 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1287 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1288 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1289 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1290 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1291 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1292 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1293 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1294 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1295 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1296 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1297 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1298 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1299 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1300 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1301 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1302 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1303 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1304 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1305 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1306 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1307 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1308 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1309 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1310 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1311 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1312 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1313 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1314 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1315 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1316 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1317 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1318 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1319 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1320 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1321 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1322 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1323 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1324 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1325 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1326 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1327 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1328 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1329 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1330 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1331 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1332 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1333 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1334 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1335 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1336 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1337 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1338 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1339 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1340 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1341 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1342 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1343 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1344 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1345 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1346 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1347 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1348 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1349 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1350 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1351
1352/* select_point selects the |idx|th point from a precomputation table and
1353 * copies it to out. */
1354static void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3],
1355 felem out[3])
1356 {
1357 unsigned i, j;
1358 limb *outlimbs = &out[0][0];
1359 memset(outlimbs, 0, 3 * sizeof(felem));
1360
1361 for (i = 0; i < size; i++)
1362 {
1363 const limb *inlimbs = &pre_comp[i][0][0];
1364 limb mask = i ^ idx;
1365 mask |= mask >> 4;
1366 mask |= mask >> 2;
1367 mask |= mask >> 1;
1368 mask &= 1;
1369 mask--;
1370 for (j = 0; j < NLIMBS * 3; j++)
1371 outlimbs[j] |= inlimbs[j] & mask;
1372 }
1373 }
1374
1375/* get_bit returns the |i|th bit in |in| */
1376static char get_bit(const felem_bytearray in, int i)
1377 {
1378 if (i < 0)
1379 return 0;
1380 return (in[i >> 3] >> (i & 7)) & 1;
1381 }
1382
1383/* Interleaved point multiplication using precomputed point multiples:
1384 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1385 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1386 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1387 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1388static void batch_mul(felem x_out, felem y_out, felem z_out,
1389 const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar,
1390 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3])
1391 {
1392 int i, skip;
1393 unsigned num, gen_mul = (g_scalar != NULL);
1394 felem nq[3], tmp[4];
1395 limb bits;
1396 u8 sign, digit;
1397
1398 /* set nq to the point at infinity */
1399 memset(nq, 0, 3 * sizeof(felem));
1400
1401 /* Loop over all scalars msb-to-lsb, interleaving additions
1402 * of multiples of the generator (last quarter of rounds)
1403 * and additions of other points multiples (every 5th round).
1404 */
1405 skip = 1; /* save two point operations in the first round */
1406 for (i = (num_points ? 520 : 130); i >= 0; --i)
1407 {
1408 /* double */
1409 if (!skip)
1410 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1411
1412 /* add multiples of the generator */
1413 if (gen_mul && (i <= 130))
1414 {
1415 bits = get_bit(g_scalar, i + 390) << 3;
1416 if (i < 130)
1417 {
1418 bits |= get_bit(g_scalar, i + 260) << 2;
1419 bits |= get_bit(g_scalar, i + 130) << 1;
1420 bits |= get_bit(g_scalar, i);
1421 }
1422 /* select the point to add, in constant time */
1423 select_point(bits, 16, g_pre_comp, tmp);
1424 if (!skip)
1425 {
1426 point_add(nq[0], nq[1], nq[2],
1427 nq[0], nq[1], nq[2],
1428 1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1429 }
1430 else
1431 {
1432 memcpy(nq, tmp, 3 * sizeof(felem));
1433 skip = 0;
1434 }
1435 }
1436
1437 /* do other additions every 5 doublings */
1438 if (num_points && (i % 5 == 0))
1439 {
1440 /* loop over all scalars */
1441 for (num = 0; num < num_points; ++num)
1442 {
1443 bits = get_bit(scalars[num], i + 4) << 5;
1444 bits |= get_bit(scalars[num], i + 3) << 4;
1445 bits |= get_bit(scalars[num], i + 2) << 3;
1446 bits |= get_bit(scalars[num], i + 1) << 2;
1447 bits |= get_bit(scalars[num], i) << 1;
1448 bits |= get_bit(scalars[num], i - 1);
1449 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1450
1451 /* select the point to add or subtract, in constant time */
1452 select_point(digit, 17, pre_comp[num], tmp);
1453 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */
1454 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1455
1456 if (!skip)
1457 {
1458 point_add(nq[0], nq[1], nq[2],
1459 nq[0], nq[1], nq[2],
1460 mixed, tmp[0], tmp[1], tmp[2]);
1461 }
1462 else
1463 {
1464 memcpy(nq, tmp, 3 * sizeof(felem));
1465 skip = 0;
1466 }
1467 }
1468 }
1469 }
1470 felem_assign(x_out, nq[0]);
1471 felem_assign(y_out, nq[1]);
1472 felem_assign(z_out, nq[2]);
1473 }
1474
1475
1476/* Precomputation for the group generator. */
1477typedef struct {
1478 felem g_pre_comp[16][3];
1479 int references;
1480} NISTP521_PRE_COMP;
1481
1482const EC_METHOD *EC_GFp_nistp521_method(void)
1483 {
1484 static const EC_METHOD ret = {
1485 EC_FLAGS_DEFAULT_OCT,
1486 NID_X9_62_prime_field,
1487 ec_GFp_nistp521_group_init,
1488 ec_GFp_simple_group_finish,
1489 ec_GFp_simple_group_clear_finish,
1490 ec_GFp_nist_group_copy,
1491 ec_GFp_nistp521_group_set_curve,
1492 ec_GFp_simple_group_get_curve,
1493 ec_GFp_simple_group_get_degree,
1494 ec_GFp_simple_group_check_discriminant,
1495 ec_GFp_simple_point_init,
1496 ec_GFp_simple_point_finish,
1497 ec_GFp_simple_point_clear_finish,
1498 ec_GFp_simple_point_copy,
1499 ec_GFp_simple_point_set_to_infinity,
1500 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1501 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1502 ec_GFp_simple_point_set_affine_coordinates,
1503 ec_GFp_nistp521_point_get_affine_coordinates,
1504 0 /* point_set_compressed_coordinates */,
1505 0 /* point2oct */,
1506 0 /* oct2point */,
1507 ec_GFp_simple_add,
1508 ec_GFp_simple_dbl,
1509 ec_GFp_simple_invert,
1510 ec_GFp_simple_is_at_infinity,
1511 ec_GFp_simple_is_on_curve,
1512 ec_GFp_simple_cmp,
1513 ec_GFp_simple_make_affine,
1514 ec_GFp_simple_points_make_affine,
1515 ec_GFp_nistp521_points_mul,
1516 ec_GFp_nistp521_precompute_mult,
1517 ec_GFp_nistp521_have_precompute_mult,
1518 ec_GFp_nist_field_mul,
1519 ec_GFp_nist_field_sqr,
1520 0 /* field_div */,
1521 0 /* field_encode */,
1522 0 /* field_decode */,
1523 0 /* field_set_to_one */ };
1524
1525 return &ret;
1526 }
1527
1528
1529/******************************************************************************/
1530/* FUNCTIONS TO MANAGE PRECOMPUTATION
1531 */
1532
1533static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1534 {
1535 NISTP521_PRE_COMP *ret = NULL;
1536 ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1537 if (!ret)
1538 {
1539 ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1540 return ret;
1541 }
1542 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1543 ret->references = 1;
1544 return ret;
1545 }
1546
1547static void *nistp521_pre_comp_dup(void *src_)
1548 {
1549 NISTP521_PRE_COMP *src = src_;
1550
1551 /* no need to actually copy, these objects never change! */
1552 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1553
1554 return src_;
1555 }
1556
1557static void nistp521_pre_comp_free(void *pre_)
1558 {
1559 int i;
1560 NISTP521_PRE_COMP *pre = pre_;
1561
1562 if (!pre)
1563 return;
1564
1565 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1566 if (i > 0)
1567 return;
1568
1569 OPENSSL_free(pre);
1570 }
1571
1572static void nistp521_pre_comp_clear_free(void *pre_)
1573 {
1574 int i;
1575 NISTP521_PRE_COMP *pre = pre_;
1576
1577 if (!pre)
1578 return;
1579
1580 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1581 if (i > 0)
1582 return;
1583
1584 OPENSSL_cleanse(pre, sizeof(*pre));
1585 OPENSSL_free(pre);
1586 }
1587
1588/******************************************************************************/
1589/* OPENSSL EC_METHOD FUNCTIONS
1590 */
1591
1592int ec_GFp_nistp521_group_init(EC_GROUP *group)
1593 {
1594 int ret;
1595 ret = ec_GFp_simple_group_init(group);
1596 group->a_is_minus3 = 1;
1597 return ret;
1598 }
1599
1600int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1601 const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
1602 {
1603 int ret = 0;
1604 BN_CTX *new_ctx = NULL;
1605 BIGNUM *curve_p, *curve_a, *curve_b;
1606
1607 if (ctx == NULL)
1608 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1609 BN_CTX_start(ctx);
1610 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1611 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1612 ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err;
1613 BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1614 BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1615 BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1616 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1617 (BN_cmp(curve_b, b)))
1618 {
1619 ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1620 EC_R_WRONG_CURVE_PARAMETERS);
1621 goto err;
1622 }
1623 group->field_mod_func = BN_nist_mod_521;
1624 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1625err:
1626 BN_CTX_end(ctx);
1627 if (new_ctx != NULL)
1628 BN_CTX_free(new_ctx);
1629 return ret;
1630 }
1631
1632/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1633 * (X', Y') = (X/Z^2, Y/Z^3) */
1634int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1635 const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
1636 {
1637 felem z1, z2, x_in, y_in, x_out, y_out;
1638 largefelem tmp;
1639
1640 if (EC_POINT_is_at_infinity(group, point))
1641 {
1642 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1643 EC_R_POINT_AT_INFINITY);
1644 return 0;
1645 }
1646 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1647 (!BN_to_felem(z1, &point->Z))) return 0;
1648 felem_inv(z2, z1);
1649 felem_square(tmp, z2); felem_reduce(z1, tmp);
1650 felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp);
1651 felem_contract(x_out, x_in);
1652 if (x != NULL)
1653 {
1654 if (!felem_to_BN(x, x_out))
1655 {
1656 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1657 return 0;
1658 }
1659 }
1660 felem_mul(tmp, z1, z2); felem_reduce(z1, tmp);
1661 felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp);
1662 felem_contract(y_out, y_in);
1663 if (y != NULL)
1664 {
1665 if (!felem_to_BN(y, y_out))
1666 {
1667 ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB);
1668 return 0;
1669 }
1670 }
1671 return 1;
1672 }
1673
1674static void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */])
1675 {
1676 /* Runs in constant time, unless an input is the point at infinity
1677 * (which normally shouldn't happen). */
1678 ec_GFp_nistp_points_make_affine_internal(
1679 num,
1680 points,
1681 sizeof(felem),
1682 tmp_felems,
1683 (void (*)(void *)) felem_one,
1684 (int (*)(const void *)) felem_is_zero_int,
1685 (void (*)(void *, const void *)) felem_assign,
1686 (void (*)(void *, const void *)) felem_square_reduce,
1687 (void (*)(void *, const void *, const void *)) felem_mul_reduce,
1688 (void (*)(void *, const void *)) felem_inv,
1689 (void (*)(void *, const void *)) felem_contract);
1690 }
1691
1692/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1693 * Result is stored in r (r can equal one of the inputs). */
1694int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1695 const BIGNUM *scalar, size_t num, const EC_POINT *points[],
1696 const BIGNUM *scalars[], BN_CTX *ctx)
1697 {
1698 int ret = 0;
1699 int j;
1700 int mixed = 0;
1701 BN_CTX *new_ctx = NULL;
1702 BIGNUM *x, *y, *z, *tmp_scalar;
1703 felem_bytearray g_secret;
1704 felem_bytearray *secrets = NULL;
1705 felem (*pre_comp)[17][3] = NULL;
1706 felem *tmp_felems = NULL;
1707 felem_bytearray tmp;
1708 unsigned i, num_bytes;
1709 int have_pre_comp = 0;
1710 size_t num_points = num;
1711 felem x_in, y_in, z_in, x_out, y_out, z_out;
1712 NISTP521_PRE_COMP *pre = NULL;
1713 felem (*g_pre_comp)[3] = NULL;
1714 EC_POINT *generator = NULL;
1715 const EC_POINT *p = NULL;
1716 const BIGNUM *p_scalar = NULL;
1717
1718 if (ctx == NULL)
1719 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1720 BN_CTX_start(ctx);
1721 if (((x = BN_CTX_get(ctx)) == NULL) ||
1722 ((y = BN_CTX_get(ctx)) == NULL) ||
1723 ((z = BN_CTX_get(ctx)) == NULL) ||
1724 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1725 goto err;
1726
1727 if (scalar != NULL)
1728 {
1729 pre = EC_EX_DATA_get_data(group->extra_data,
1730 nistp521_pre_comp_dup, nistp521_pre_comp_free,
1731 nistp521_pre_comp_clear_free);
1732 if (pre)
1733 /* we have precomputation, try to use it */
1734 g_pre_comp = &pre->g_pre_comp[0];
1735 else
1736 /* try to use the standard precomputation */
1737 g_pre_comp = (felem (*)[3]) gmul;
1738 generator = EC_POINT_new(group);
1739 if (generator == NULL)
1740 goto err;
1741 /* get the generator from precomputation */
1742 if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1743 !felem_to_BN(y, g_pre_comp[1][1]) ||
1744 !felem_to_BN(z, g_pre_comp[1][2]))
1745 {
1746 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1747 goto err;
1748 }
1749 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1750 generator, x, y, z, ctx))
1751 goto err;
1752 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1753 /* precomputation matches generator */
1754 have_pre_comp = 1;
1755 else
1756 /* we don't have valid precomputation:
1757 * treat the generator as a random point */
1758 num_points++;
1759 }
1760
1761 if (num_points > 0)
1762 {
1763 if (num_points >= 2)
1764 {
1765 /* unless we precompute multiples for just one point,
1766 * converting those into affine form is time well spent */
1767 mixed = 1;
1768 }
1769 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1770 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1771 if (mixed)
1772 tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1773 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL)))
1774 {
1775 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1776 goto err;
1777 }
1778
1779 /* we treat NULL scalars as 0, and NULL points as points at infinity,
1780 * i.e., they contribute nothing to the linear combination */
1781 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1782 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1783 for (i = 0; i < num_points; ++i)
1784 {
1785 if (i == num)
1786 /* we didn't have a valid precomputation, so we pick
1787 * the generator */
1788 {
1789 p = EC_GROUP_get0_generator(group);
1790 p_scalar = scalar;
1791 }
1792 else
1793 /* the i^th point */
1794 {
1795 p = points[i];
1796 p_scalar = scalars[i];
1797 }
1798 if ((p_scalar != NULL) && (p != NULL))
1799 {
1800 /* reduce scalar to 0 <= scalar < 2^521 */
1801 if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar)))
1802 {
1803 /* this is an unusual input, and we don't guarantee
1804 * constant-timeness */
1805 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx))
1806 {
1807 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1808 goto err;
1809 }
1810 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1811 }
1812 else
1813 num_bytes = BN_bn2bin(p_scalar, tmp);
1814 flip_endian(secrets[i], tmp, num_bytes);
1815 /* precompute multiples */
1816 if ((!BN_to_felem(x_out, &p->X)) ||
1817 (!BN_to_felem(y_out, &p->Y)) ||
1818 (!BN_to_felem(z_out, &p->Z))) goto err;
1819 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1820 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1821 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1822 for (j = 2; j <= 16; ++j)
1823 {
1824 if (j & 1)
1825 {
1826 point_add(
1827 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1828 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1829 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]);
1830 }
1831 else
1832 {
1833 point_double(
1834 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1835 pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]);
1836 }
1837 }
1838 }
1839 }
1840 if (mixed)
1841 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1842 }
1843
1844 /* the scalar for the generator */
1845 if ((scalar != NULL) && (have_pre_comp))
1846 {
1847 memset(g_secret, 0, sizeof(g_secret));
1848 /* reduce scalar to 0 <= scalar < 2^521 */
1849 if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar)))
1850 {
1851 /* this is an unusual input, and we don't guarantee
1852 * constant-timeness */
1853 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx))
1854 {
1855 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1856 goto err;
1857 }
1858 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1859 }
1860 else
1861 num_bytes = BN_bn2bin(scalar, tmp);
1862 flip_endian(g_secret, tmp, num_bytes);
1863 /* do the multiplication with generator precomputation*/
1864 batch_mul(x_out, y_out, z_out,
1865 (const felem_bytearray (*)) secrets, num_points,
1866 g_secret,
1867 mixed, (const felem (*)[17][3]) pre_comp,
1868 (const felem (*)[3]) g_pre_comp);
1869 }
1870 else
1871 /* do the multiplication without generator precomputation */
1872 batch_mul(x_out, y_out, z_out,
1873 (const felem_bytearray (*)) secrets, num_points,
1874 NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL);
1875 /* reduce the output to its unique minimal representation */
1876 felem_contract(x_in, x_out);
1877 felem_contract(y_in, y_out);
1878 felem_contract(z_in, z_out);
1879 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1880 (!felem_to_BN(z, z_in)))
1881 {
1882 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1883 goto err;
1884 }
1885 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1886
1887err:
1888 BN_CTX_end(ctx);
1889 if (generator != NULL)
1890 EC_POINT_free(generator);
1891 if (new_ctx != NULL)
1892 BN_CTX_free(new_ctx);
1893 if (secrets != NULL)
1894 OPENSSL_free(secrets);
1895 if (pre_comp != NULL)
1896 OPENSSL_free(pre_comp);
1897 if (tmp_felems != NULL)
1898 OPENSSL_free(tmp_felems);
1899 return ret;
1900 }
1901
1902int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1903 {
1904 int ret = 0;
1905 NISTP521_PRE_COMP *pre = NULL;
1906 int i, j;
1907 BN_CTX *new_ctx = NULL;
1908 BIGNUM *x, *y;
1909 EC_POINT *generator = NULL;
1910 felem tmp_felems[16];
1911
1912 /* throw away old precomputation */
1913 EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
1914 nistp521_pre_comp_free, nistp521_pre_comp_clear_free);
1915 if (ctx == NULL)
1916 if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0;
1917 BN_CTX_start(ctx);
1918 if (((x = BN_CTX_get(ctx)) == NULL) ||
1919 ((y = BN_CTX_get(ctx)) == NULL))
1920 goto err;
1921 /* get the generator */
1922 if (group->generator == NULL) goto err;
1923 generator = EC_POINT_new(group);
1924 if (generator == NULL)
1925 goto err;
1926 BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x);
1927 BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y);
1928 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1929 goto err;
1930 if ((pre = nistp521_pre_comp_new()) == NULL)
1931 goto err;
1932 /* if the generator is the standard one, use built-in precomputation */
1933 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1934 {
1935 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1936 ret = 1;
1937 goto err;
1938 }
1939 if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
1940 (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
1941 (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
1942 goto err;
1943 /* compute 2^130*G, 2^260*G, 2^390*G */
1944 for (i = 1; i <= 4; i <<= 1)
1945 {
1946 point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1],
1947 pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0],
1948 pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
1949 for (j = 0; j < 129; ++j)
1950 {
1951 point_double(pre->g_pre_comp[2*i][0],
1952 pre->g_pre_comp[2*i][1],
1953 pre->g_pre_comp[2*i][2],
1954 pre->g_pre_comp[2*i][0],
1955 pre->g_pre_comp[2*i][1],
1956 pre->g_pre_comp[2*i][2]);
1957 }
1958 }
1959 /* g_pre_comp[0] is the point at infinity */
1960 memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1961 /* the remaining multiples */
1962 /* 2^130*G + 2^260*G */
1963 point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
1964 pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
1965 pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
1966 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1967 pre->g_pre_comp[2][2]);
1968 /* 2^130*G + 2^390*G */
1969 point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
1970 pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
1971 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1972 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1973 pre->g_pre_comp[2][2]);
1974 /* 2^260*G + 2^390*G */
1975 point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
1976 pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
1977 pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
1978 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
1979 pre->g_pre_comp[4][2]);
1980 /* 2^130*G + 2^260*G + 2^390*G */
1981 point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
1982 pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
1983 pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
1984 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
1985 pre->g_pre_comp[2][2]);
1986 for (i = 1; i < 8; ++i)
1987 {
1988 /* odd multiples: add G */
1989 point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1],
1990 pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0],
1991 pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2],
1992 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
1993 pre->g_pre_comp[1][2]);
1994 }
1995 make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
1996
1997 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
1998 nistp521_pre_comp_free, nistp521_pre_comp_clear_free))
1999 goto err;
2000 ret = 1;
2001 pre = NULL;
2002 err:
2003 BN_CTX_end(ctx);
2004 if (generator != NULL)
2005 EC_POINT_free(generator);
2006 if (new_ctx != NULL)
2007 BN_CTX_free(new_ctx);
2008 if (pre)
2009 nistp521_pre_comp_free(pre);
2010 return ret;
2011 }
2012
2013int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2014 {
2015 if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2016 nistp521_pre_comp_free, nistp521_pre_comp_clear_free)
2017 != NULL)
2018 return 1;
2019 else
2020 return 0;
2021 }
2022
2023#else
2024static void *dummy=&dummy;
2025#endif
diff --git a/src/lib/libssl/src/crypto/ec/ecp_nistputil.c b/src/lib/libssl/src/crypto/ec/ecp_nistputil.c
new file mode 100644
index 0000000000..c8140c807f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ecp_nistputil.c
@@ -0,0 +1,197 @@
1/* crypto/ec/ecp_nistputil.c */
2/*
3 * Written by Bodo Moeller for the OpenSSL project.
4 */
5/* Copyright 2011 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 *
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21#include <openssl/opensslconf.h>
22#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
23
24/*
25 * Common utility functions for ecp_nistp224.c, ecp_nistp256.c, ecp_nistp521.c.
26 */
27
28#include <stddef.h>
29#include "ec_lcl.h"
30
31/* Convert an array of points into affine coordinates.
32 * (If the point at infinity is found (Z = 0), it remains unchanged.)
33 * This function is essentially an equivalent to EC_POINTs_make_affine(), but
34 * works with the internal representation of points as used by ecp_nistp###.c
35 * rather than with (BIGNUM-based) EC_POINT data structures.
36 *
37 * point_array is the input/output buffer ('num' points in projective form,
38 * i.e. three coordinates each), based on an internal representation of
39 * field elements of size 'felem_size'.
40 *
41 * tmp_felems needs to point to a temporary array of 'num'+1 field elements
42 * for storage of intermediate values.
43 */
44void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array,
45 size_t felem_size, void *tmp_felems,
46 void (*felem_one)(void *out),
47 int (*felem_is_zero)(const void *in),
48 void (*felem_assign)(void *out, const void *in),
49 void (*felem_square)(void *out, const void *in),
50 void (*felem_mul)(void *out, const void *in1, const void *in2),
51 void (*felem_inv)(void *out, const void *in),
52 void (*felem_contract)(void *out, const void *in))
53 {
54 int i = 0;
55
56#define tmp_felem(I) (&((char *)tmp_felems)[(I) * felem_size])
57#define X(I) (&((char *)point_array)[3*(I) * felem_size])
58#define Y(I) (&((char *)point_array)[(3*(I) + 1) * felem_size])
59#define Z(I) (&((char *)point_array)[(3*(I) + 2) * felem_size])
60
61 if (!felem_is_zero(Z(0)))
62 felem_assign(tmp_felem(0), Z(0));
63 else
64 felem_one(tmp_felem(0));
65 for (i = 1; i < (int)num; i++)
66 {
67 if (!felem_is_zero(Z(i)))
68 felem_mul(tmp_felem(i), tmp_felem(i-1), Z(i));
69 else
70 felem_assign(tmp_felem(i), tmp_felem(i-1));
71 }
72 /* Now each tmp_felem(i) is the product of Z(0) .. Z(i), skipping any zero-valued factors:
73 * if Z(i) = 0, we essentially pretend that Z(i) = 1 */
74
75 felem_inv(tmp_felem(num-1), tmp_felem(num-1));
76 for (i = num - 1; i >= 0; i--)
77 {
78 if (i > 0)
79 /* tmp_felem(i-1) is the product of Z(0) .. Z(i-1),
80 * tmp_felem(i) is the inverse of the product of Z(0) .. Z(i)
81 */
82 felem_mul(tmp_felem(num), tmp_felem(i-1), tmp_felem(i)); /* 1/Z(i) */
83 else
84 felem_assign(tmp_felem(num), tmp_felem(0)); /* 1/Z(0) */
85
86 if (!felem_is_zero(Z(i)))
87 {
88 if (i > 0)
89 /* For next iteration, replace tmp_felem(i-1) by its inverse */
90 felem_mul(tmp_felem(i-1), tmp_felem(i), Z(i));
91
92 /* Convert point (X, Y, Z) into affine form (X/(Z^2), Y/(Z^3), 1) */
93 felem_square(Z(i), tmp_felem(num)); /* 1/(Z^2) */
94 felem_mul(X(i), X(i), Z(i)); /* X/(Z^2) */
95 felem_mul(Z(i), Z(i), tmp_felem(num)); /* 1/(Z^3) */
96 felem_mul(Y(i), Y(i), Z(i)); /* Y/(Z^3) */
97 felem_contract(X(i), X(i));
98 felem_contract(Y(i), Y(i));
99 felem_one(Z(i));
100 }
101 else
102 {
103 if (i > 0)
104 /* For next iteration, replace tmp_felem(i-1) by its inverse */
105 felem_assign(tmp_felem(i-1), tmp_felem(i));
106 }
107 }
108 }
109
110/*
111 * This function looks at 5+1 scalar bits (5 current, 1 adjacent less
112 * significant bit), and recodes them into a signed digit for use in fast point
113 * multiplication: the use of signed rather than unsigned digits means that
114 * fewer points need to be precomputed, given that point inversion is easy
115 * (a precomputed point dP makes -dP available as well).
116 *
117 * BACKGROUND:
118 *
119 * Signed digits for multiplication were introduced by Booth ("A signed binary
120 * multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
121 * pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
122 * Booth's original encoding did not generally improve the density of nonzero
123 * digits over the binary representation, and was merely meant to simplify the
124 * handling of signed factors given in two's complement; but it has since been
125 * shown to be the basis of various signed-digit representations that do have
126 * further advantages, including the wNAF, using the following general approach:
127 *
128 * (1) Given a binary representation
129 *
130 * b_k ... b_2 b_1 b_0,
131 *
132 * of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
133 * by using bit-wise subtraction as follows:
134 *
135 * b_k b_(k-1) ... b_2 b_1 b_0
136 * - b_k ... b_3 b_2 b_1 b_0
137 * -------------------------------------
138 * s_k b_(k-1) ... s_3 s_2 s_1 s_0
139 *
140 * A left-shift followed by subtraction of the original value yields a new
141 * representation of the same value, using signed bits s_i = b_(i+1) - b_i.
142 * This representation from Booth's paper has since appeared in the
143 * literature under a variety of different names including "reversed binary
144 * form", "alternating greedy expansion", "mutual opposite form", and
145 * "sign-alternating {+-1}-representation".
146 *
147 * An interesting property is that among the nonzero bits, values 1 and -1
148 * strictly alternate.
149 *
150 * (2) Various window schemes can be applied to the Booth representation of
151 * integers: for example, right-to-left sliding windows yield the wNAF
152 * (a signed-digit encoding independently discovered by various researchers
153 * in the 1990s), and left-to-right sliding windows yield a left-to-right
154 * equivalent of the wNAF (independently discovered by various researchers
155 * around 2004).
156 *
157 * To prevent leaking information through side channels in point multiplication,
158 * we need to recode the given integer into a regular pattern: sliding windows
159 * as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
160 * decades older: we'll be using the so-called "modified Booth encoding" due to
161 * MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
162 * (1961), pp. 67-91), in a radix-2^5 setting. That is, we always combine five
163 * signed bits into a signed digit:
164 *
165 * s_(4j + 4) s_(4j + 3) s_(4j + 2) s_(4j + 1) s_(4j)
166 *
167 * The sign-alternating property implies that the resulting digit values are
168 * integers from -16 to 16.
169 *
170 * Of course, we don't actually need to compute the signed digits s_i as an
171 * intermediate step (that's just a nice way to see how this scheme relates
172 * to the wNAF): a direct computation obtains the recoded digit from the
173 * six bits b_(4j + 4) ... b_(4j - 1).
174 *
175 * This function takes those five bits as an integer (0 .. 63), writing the
176 * recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute
177 * value, in the range 0 .. 8). Note that this integer essentially provides the
178 * input bits "shifted to the left" by one position: for example, the input to
179 * compute the least significant recoded digit, given that there's no bit b_-1,
180 * has to be b_4 b_3 b_2 b_1 b_0 0.
181 *
182 */
183void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign, unsigned char *digit, unsigned char in)
184 {
185 unsigned char s, d;
186
187 s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as 6-bit value */
188 d = (1 << 6) - in - 1;
189 d = (d & s) | (in & ~s);
190 d = (d >> 1) + (d & 1);
191
192 *sign = s & 1;
193 *digit = d;
194 }
195#else
196static void *dummy=&dummy;
197#endif
diff --git a/src/lib/libssl/src/crypto/ec/ecp_oct.c b/src/lib/libssl/src/crypto/ec/ecp_oct.c
new file mode 100644
index 0000000000..374a0ee731
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ec/ecp_oct.c
@@ -0,0 +1,433 @@
1/* crypto/ec/ecp_oct.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project.
4 * Includes code written by Bodo Moeller for the OpenSSL project.
5*/
6/* ====================================================================
7 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * openssl-core@openssl.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59/* ====================================================================
60 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
61 * Portions of this software developed by SUN MICROSYSTEMS, INC.,
62 * and contributed to the OpenSSL project.
63 */
64
65#include <openssl/err.h>
66#include <openssl/symhacks.h>
67
68#include "ec_lcl.h"
69
70int ec_GFp_simple_set_compressed_coordinates(const EC_GROUP *group, EC_POINT *point,
71 const BIGNUM *x_, int y_bit, BN_CTX *ctx)
72 {
73 BN_CTX *new_ctx = NULL;
74 BIGNUM *tmp1, *tmp2, *x, *y;
75 int ret = 0;
76
77 /* clear error queue*/
78 ERR_clear_error();
79
80 if (ctx == NULL)
81 {
82 ctx = new_ctx = BN_CTX_new();
83 if (ctx == NULL)
84 return 0;
85 }
86
87 y_bit = (y_bit != 0);
88
89 BN_CTX_start(ctx);
90 tmp1 = BN_CTX_get(ctx);
91 tmp2 = BN_CTX_get(ctx);
92 x = BN_CTX_get(ctx);
93 y = BN_CTX_get(ctx);
94 if (y == NULL) goto err;
95
96 /* Recover y. We have a Weierstrass equation
97 * y^2 = x^3 + a*x + b,
98 * so y is one of the square roots of x^3 + a*x + b.
99 */
100
101 /* tmp1 := x^3 */
102 if (!BN_nnmod(x, x_, &group->field,ctx)) goto err;
103 if (group->meth->field_decode == 0)
104 {
105 /* field_{sqr,mul} work on standard representation */
106 if (!group->meth->field_sqr(group, tmp2, x_, ctx)) goto err;
107 if (!group->meth->field_mul(group, tmp1, tmp2, x_, ctx)) goto err;
108 }
109 else
110 {
111 if (!BN_mod_sqr(tmp2, x_, &group->field, ctx)) goto err;
112 if (!BN_mod_mul(tmp1, tmp2, x_, &group->field, ctx)) goto err;
113 }
114
115 /* tmp1 := tmp1 + a*x */
116 if (group->a_is_minus3)
117 {
118 if (!BN_mod_lshift1_quick(tmp2, x, &group->field)) goto err;
119 if (!BN_mod_add_quick(tmp2, tmp2, x, &group->field)) goto err;
120 if (!BN_mod_sub_quick(tmp1, tmp1, tmp2, &group->field)) goto err;
121 }
122 else
123 {
124 if (group->meth->field_decode)
125 {
126 if (!group->meth->field_decode(group, tmp2, &group->a, ctx)) goto err;
127 if (!BN_mod_mul(tmp2, tmp2, x, &group->field, ctx)) goto err;
128 }
129 else
130 {
131 /* field_mul works on standard representation */
132 if (!group->meth->field_mul(group, tmp2, &group->a, x, ctx)) goto err;
133 }
134
135 if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err;
136 }
137
138 /* tmp1 := tmp1 + b */
139 if (group->meth->field_decode)
140 {
141 if (!group->meth->field_decode(group, tmp2, &group->b, ctx)) goto err;
142 if (!BN_mod_add_quick(tmp1, tmp1, tmp2, &group->field)) goto err;
143 }
144 else
145 {
146 if (!BN_mod_add_quick(tmp1, tmp1, &group->b, &group->field)) goto err;
147 }
148
149 if (!BN_mod_sqrt(y, tmp1, &group->field, ctx))
150 {
151 unsigned long err = ERR_peek_last_error();
152
153 if (ERR_GET_LIB(err) == ERR_LIB_BN && ERR_GET_REASON(err) == BN_R_NOT_A_SQUARE)
154 {
155 ERR_clear_error();
156 ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT);
157 }
158 else
159 ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_BN_LIB);
160 goto err;
161 }
162
163 if (y_bit != BN_is_odd(y))
164 {
165 if (BN_is_zero(y))
166 {
167 int kron;
168
169 kron = BN_kronecker(x, &group->field, ctx);
170 if (kron == -2) goto err;
171
172 if (kron == 1)
173 ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSION_BIT);
174 else
175 /* BN_mod_sqrt() should have cought this error (not a square) */
176 ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, EC_R_INVALID_COMPRESSED_POINT);
177 goto err;
178 }
179 if (!BN_usub(y, &group->field, y)) goto err;
180 }
181 if (y_bit != BN_is_odd(y))
182 {
183 ECerr(EC_F_EC_GFP_SIMPLE_SET_COMPRESSED_COORDINATES, ERR_R_INTERNAL_ERROR);
184 goto err;
185 }
186
187 if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err;
188
189 ret = 1;
190
191 err:
192 BN_CTX_end(ctx);
193 if (new_ctx != NULL)
194 BN_CTX_free(new_ctx);
195 return ret;
196 }
197
198
199size_t ec_GFp_simple_point2oct(const EC_GROUP *group, const EC_POINT *point, point_conversion_form_t form,
200 unsigned char *buf, size_t len, BN_CTX *ctx)
201 {
202 size_t ret;
203 BN_CTX *new_ctx = NULL;
204 int used_ctx = 0;
205 BIGNUM *x, *y;
206 size_t field_len, i, skip;
207
208 if ((form != POINT_CONVERSION_COMPRESSED)
209 && (form != POINT_CONVERSION_UNCOMPRESSED)
210 && (form != POINT_CONVERSION_HYBRID))
211 {
212 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_INVALID_FORM);
213 goto err;
214 }
215
216 if (EC_POINT_is_at_infinity(group, point))
217 {
218 /* encodes to a single 0 octet */
219 if (buf != NULL)
220 {
221 if (len < 1)
222 {
223 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
224 return 0;
225 }
226 buf[0] = 0;
227 }
228 return 1;
229 }
230
231
232 /* ret := required output buffer length */
233 field_len = BN_num_bytes(&group->field);
234 ret = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
235
236 /* if 'buf' is NULL, just return required length */
237 if (buf != NULL)
238 {
239 if (len < ret)
240 {
241 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, EC_R_BUFFER_TOO_SMALL);
242 goto err;
243 }
244
245 if (ctx == NULL)
246 {
247 ctx = new_ctx = BN_CTX_new();
248 if (ctx == NULL)
249 return 0;
250 }
251
252 BN_CTX_start(ctx);
253 used_ctx = 1;
254 x = BN_CTX_get(ctx);
255 y = BN_CTX_get(ctx);
256 if (y == NULL) goto err;
257
258 if (!EC_POINT_get_affine_coordinates_GFp(group, point, x, y, ctx)) goto err;
259
260 if ((form == POINT_CONVERSION_COMPRESSED || form == POINT_CONVERSION_HYBRID) && BN_is_odd(y))
261 buf[0] = form + 1;
262 else
263 buf[0] = form;
264
265 i = 1;
266
267 skip = field_len - BN_num_bytes(x);
268 if (skip > field_len)
269 {
270 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
271 goto err;
272 }
273 while (skip > 0)
274 {
275 buf[i++] = 0;
276 skip--;
277 }
278 skip = BN_bn2bin(x, buf + i);
279 i += skip;
280 if (i != 1 + field_len)
281 {
282 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
283 goto err;
284 }
285
286 if (form == POINT_CONVERSION_UNCOMPRESSED || form == POINT_CONVERSION_HYBRID)
287 {
288 skip = field_len - BN_num_bytes(y);
289 if (skip > field_len)
290 {
291 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
292 goto err;
293 }
294 while (skip > 0)
295 {
296 buf[i++] = 0;
297 skip--;
298 }
299 skip = BN_bn2bin(y, buf + i);
300 i += skip;
301 }
302
303 if (i != ret)
304 {
305 ECerr(EC_F_EC_GFP_SIMPLE_POINT2OCT, ERR_R_INTERNAL_ERROR);
306 goto err;
307 }
308 }
309
310 if (used_ctx)
311 BN_CTX_end(ctx);
312 if (new_ctx != NULL)
313 BN_CTX_free(new_ctx);
314 return ret;
315
316 err:
317 if (used_ctx)
318 BN_CTX_end(ctx);
319 if (new_ctx != NULL)
320 BN_CTX_free(new_ctx);
321 return 0;
322 }
323
324
325int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point,
326 const unsigned char *buf, size_t len, BN_CTX *ctx)
327 {
328 point_conversion_form_t form;
329 int y_bit;
330 BN_CTX *new_ctx = NULL;
331 BIGNUM *x, *y;
332 size_t field_len, enc_len;
333 int ret = 0;
334
335 if (len == 0)
336 {
337 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_BUFFER_TOO_SMALL);
338 return 0;
339 }
340 form = buf[0];
341 y_bit = form & 1;
342 form = form & ~1U;
343 if ((form != 0) && (form != POINT_CONVERSION_COMPRESSED)
344 && (form != POINT_CONVERSION_UNCOMPRESSED)
345 && (form != POINT_CONVERSION_HYBRID))
346 {
347 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
348 return 0;
349 }
350 if ((form == 0 || form == POINT_CONVERSION_UNCOMPRESSED) && y_bit)
351 {
352 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
353 return 0;
354 }
355
356 if (form == 0)
357 {
358 if (len != 1)
359 {
360 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
361 return 0;
362 }
363
364 return EC_POINT_set_to_infinity(group, point);
365 }
366
367 field_len = BN_num_bytes(&group->field);
368 enc_len = (form == POINT_CONVERSION_COMPRESSED) ? 1 + field_len : 1 + 2*field_len;
369
370 if (len != enc_len)
371 {
372 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
373 return 0;
374 }
375
376 if (ctx == NULL)
377 {
378 ctx = new_ctx = BN_CTX_new();
379 if (ctx == NULL)
380 return 0;
381 }
382
383 BN_CTX_start(ctx);
384 x = BN_CTX_get(ctx);
385 y = BN_CTX_get(ctx);
386 if (y == NULL) goto err;
387
388 if (!BN_bin2bn(buf + 1, field_len, x)) goto err;
389 if (BN_ucmp(x, &group->field) >= 0)
390 {
391 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
392 goto err;
393 }
394
395 if (form == POINT_CONVERSION_COMPRESSED)
396 {
397 if (!EC_POINT_set_compressed_coordinates_GFp(group, point, x, y_bit, ctx)) goto err;
398 }
399 else
400 {
401 if (!BN_bin2bn(buf + 1 + field_len, field_len, y)) goto err;
402 if (BN_ucmp(y, &group->field) >= 0)
403 {
404 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
405 goto err;
406 }
407 if (form == POINT_CONVERSION_HYBRID)
408 {
409 if (y_bit != BN_is_odd(y))
410 {
411 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_INVALID_ENCODING);
412 goto err;
413 }
414 }
415
416 if (!EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx)) goto err;
417 }
418
419 if (!EC_POINT_is_on_curve(group, point, ctx)) /* test required by X9.62 */
420 {
421 ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE);
422 goto err;
423 }
424
425 ret = 1;
426
427 err:
428 BN_CTX_end(ctx);
429 if (new_ctx != NULL)
430 BN_CTX_free(new_ctx);
431 return ret;
432 }
433
diff --git a/src/lib/libssl/src/crypto/ecdh/ecdh.h b/src/lib/libssl/src/crypto/ecdh/ecdh.h
index b4b58ee65b..8887102c0b 100644
--- a/src/lib/libssl/src/crypto/ecdh/ecdh.h
+++ b/src/lib/libssl/src/crypto/ecdh/ecdh.h
@@ -109,11 +109,13 @@ void ERR_load_ECDH_strings(void);
109/* Error codes for the ECDH functions. */ 109/* Error codes for the ECDH functions. */
110 110
111/* Function codes. */ 111/* Function codes. */
112#define ECDH_F_ECDH_CHECK 102
112#define ECDH_F_ECDH_COMPUTE_KEY 100 113#define ECDH_F_ECDH_COMPUTE_KEY 100
113#define ECDH_F_ECDH_DATA_NEW_METHOD 101 114#define ECDH_F_ECDH_DATA_NEW_METHOD 101
114 115
115/* Reason codes. */ 116/* Reason codes. */
116#define ECDH_R_KDF_FAILED 102 117#define ECDH_R_KDF_FAILED 102
118#define ECDH_R_NON_FIPS_METHOD 103
117#define ECDH_R_NO_PRIVATE_VALUE 100 119#define ECDH_R_NO_PRIVATE_VALUE 100
118#define ECDH_R_POINT_ARITHMETIC_FAILURE 101 120#define ECDH_R_POINT_ARITHMETIC_FAILURE 101
119 121
diff --git a/src/lib/libssl/src/crypto/ecdh/ecdhtest.c b/src/lib/libssl/src/crypto/ecdh/ecdhtest.c
index 212a87efa4..823d7baa65 100644
--- a/src/lib/libssl/src/crypto/ecdh/ecdhtest.c
+++ b/src/lib/libssl/src/crypto/ecdh/ecdhtest.c
@@ -158,11 +158,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out)
158 if (!EC_POINT_get_affine_coordinates_GFp(group, 158 if (!EC_POINT_get_affine_coordinates_GFp(group,
159 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; 159 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err;
160 } 160 }
161#ifndef OPENSSL_NO_EC2M
161 else 162 else
162 { 163 {
163 if (!EC_POINT_get_affine_coordinates_GF2m(group, 164 if (!EC_POINT_get_affine_coordinates_GF2m(group,
164 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err; 165 EC_KEY_get0_public_key(a), x_a, y_a, ctx)) goto err;
165 } 166 }
167#endif
166#ifdef NOISY 168#ifdef NOISY
167 BIO_puts(out," pri 1="); 169 BIO_puts(out," pri 1=");
168 BN_print(out,a->priv_key); 170 BN_print(out,a->priv_key);
@@ -183,11 +185,13 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out)
183 if (!EC_POINT_get_affine_coordinates_GFp(group, 185 if (!EC_POINT_get_affine_coordinates_GFp(group,
184 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; 186 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err;
185 } 187 }
188#ifndef OPENSSL_NO_EC2M
186 else 189 else
187 { 190 {
188 if (!EC_POINT_get_affine_coordinates_GF2m(group, 191 if (!EC_POINT_get_affine_coordinates_GF2m(group,
189 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err; 192 EC_KEY_get0_public_key(b), x_b, y_b, ctx)) goto err;
190 } 193 }
194#endif
191 195
192#ifdef NOISY 196#ifdef NOISY
193 BIO_puts(out," pri 2="); 197 BIO_puts(out," pri 2=");
@@ -324,6 +328,7 @@ int main(int argc, char *argv[])
324 if (!test_ecdh_curve(NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) goto err; 328 if (!test_ecdh_curve(NID_X9_62_prime256v1, "NIST Prime-Curve P-256", ctx, out)) goto err;
325 if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) goto err; 329 if (!test_ecdh_curve(NID_secp384r1, "NIST Prime-Curve P-384", ctx, out)) goto err;
326 if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) goto err; 330 if (!test_ecdh_curve(NID_secp521r1, "NIST Prime-Curve P-521", ctx, out)) goto err;
331#ifndef OPENSSL_NO_EC2M
327 /* NIST BINARY CURVES TESTS */ 332 /* NIST BINARY CURVES TESTS */
328 if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) goto err; 333 if (!test_ecdh_curve(NID_sect163k1, "NIST Binary-Curve K-163", ctx, out)) goto err;
329 if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) goto err; 334 if (!test_ecdh_curve(NID_sect163r2, "NIST Binary-Curve B-163", ctx, out)) goto err;
@@ -335,6 +340,7 @@ int main(int argc, char *argv[])
335 if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) goto err; 340 if (!test_ecdh_curve(NID_sect409r1, "NIST Binary-Curve B-409", ctx, out)) goto err;
336 if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) goto err; 341 if (!test_ecdh_curve(NID_sect571k1, "NIST Binary-Curve K-571", ctx, out)) goto err;
337 if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err; 342 if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err;
343#endif
338 344
339 ret = 0; 345 ret = 0;
340 346
diff --git a/src/lib/libssl/src/crypto/ecdh/ech_err.c b/src/lib/libssl/src/crypto/ecdh/ech_err.c
index 6f4b0c9953..3bd247398d 100644
--- a/src/lib/libssl/src/crypto/ecdh/ech_err.c
+++ b/src/lib/libssl/src/crypto/ecdh/ech_err.c
@@ -1,6 +1,6 @@
1/* crypto/ecdh/ech_err.c */ 1/* crypto/ecdh/ech_err.c */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
@@ -70,6 +70,7 @@
70 70
71static ERR_STRING_DATA ECDH_str_functs[]= 71static ERR_STRING_DATA ECDH_str_functs[]=
72 { 72 {
73{ERR_FUNC(ECDH_F_ECDH_CHECK), "ECDH_CHECK"},
73{ERR_FUNC(ECDH_F_ECDH_COMPUTE_KEY), "ECDH_compute_key"}, 74{ERR_FUNC(ECDH_F_ECDH_COMPUTE_KEY), "ECDH_compute_key"},
74{ERR_FUNC(ECDH_F_ECDH_DATA_NEW_METHOD), "ECDH_DATA_new_method"}, 75{ERR_FUNC(ECDH_F_ECDH_DATA_NEW_METHOD), "ECDH_DATA_new_method"},
75{0,NULL} 76{0,NULL}
@@ -78,6 +79,7 @@ static ERR_STRING_DATA ECDH_str_functs[]=
78static ERR_STRING_DATA ECDH_str_reasons[]= 79static ERR_STRING_DATA ECDH_str_reasons[]=
79 { 80 {
80{ERR_REASON(ECDH_R_KDF_FAILED) ,"KDF failed"}, 81{ERR_REASON(ECDH_R_KDF_FAILED) ,"KDF failed"},
82{ERR_REASON(ECDH_R_NON_FIPS_METHOD) ,"non fips method"},
81{ERR_REASON(ECDH_R_NO_PRIVATE_VALUE) ,"no private value"}, 83{ERR_REASON(ECDH_R_NO_PRIVATE_VALUE) ,"no private value"},
82{ERR_REASON(ECDH_R_POINT_ARITHMETIC_FAILURE),"point arithmetic failure"}, 84{ERR_REASON(ECDH_R_POINT_ARITHMETIC_FAILURE),"point arithmetic failure"},
83{0,NULL} 85{0,NULL}
diff --git a/src/lib/libssl/src/crypto/ecdh/ech_lib.c b/src/lib/libssl/src/crypto/ecdh/ech_lib.c
index 4d8ea03d3d..dadbfd3c49 100644
--- a/src/lib/libssl/src/crypto/ecdh/ech_lib.c
+++ b/src/lib/libssl/src/crypto/ecdh/ech_lib.c
@@ -73,6 +73,9 @@
73#include <openssl/engine.h> 73#include <openssl/engine.h>
74#endif 74#endif
75#include <openssl/err.h> 75#include <openssl/err.h>
76#ifdef OPENSSL_FIPS
77#include <openssl/fips.h>
78#endif
76 79
77const char ECDH_version[]="ECDH" OPENSSL_VERSION_PTEXT; 80const char ECDH_version[]="ECDH" OPENSSL_VERSION_PTEXT;
78 81
@@ -90,7 +93,16 @@ void ECDH_set_default_method(const ECDH_METHOD *meth)
90const ECDH_METHOD *ECDH_get_default_method(void) 93const ECDH_METHOD *ECDH_get_default_method(void)
91 { 94 {
92 if(!default_ECDH_method) 95 if(!default_ECDH_method)
96 {
97#ifdef OPENSSL_FIPS
98 if (FIPS_mode())
99 return FIPS_ecdh_openssl();
100 else
101 return ECDH_OpenSSL();
102#else
93 default_ECDH_method = ECDH_OpenSSL(); 103 default_ECDH_method = ECDH_OpenSSL();
104#endif
105 }
94 return default_ECDH_method; 106 return default_ECDH_method;
95 } 107 }
96 108
@@ -215,6 +227,14 @@ ECDH_DATA *ecdh_check(EC_KEY *key)
215 } 227 }
216 else 228 else
217 ecdh_data = (ECDH_DATA *)data; 229 ecdh_data = (ECDH_DATA *)data;
230#ifdef OPENSSL_FIPS
231 if (FIPS_mode() && !(ecdh_data->flags & ECDH_FLAG_FIPS_METHOD)
232 && !(EC_KEY_get_flags(key) & EC_FLAG_NON_FIPS_ALLOW))
233 {
234 ECDHerr(ECDH_F_ECDH_CHECK, ECDH_R_NON_FIPS_METHOD);
235 return NULL;
236 }
237#endif
218 238
219 239
220 return ecdh_data; 240 return ecdh_data;
diff --git a/src/lib/libssl/src/crypto/ecdh/ech_locl.h b/src/lib/libssl/src/crypto/ecdh/ech_locl.h
index f658526a7e..f6cad6a894 100644
--- a/src/lib/libssl/src/crypto/ecdh/ech_locl.h
+++ b/src/lib/libssl/src/crypto/ecdh/ech_locl.h
@@ -75,6 +75,14 @@ struct ecdh_method
75 char *app_data; 75 char *app_data;
76 }; 76 };
77 77
78/* If this flag is set the ECDH method is FIPS compliant and can be used
79 * in FIPS mode. This is set in the validated module method. If an
80 * application sets this flag in its own methods it is its responsibility
81 * to ensure the result is compliant.
82 */
83
84#define ECDH_FLAG_FIPS_METHOD 0x1
85
78typedef struct ecdh_data_st { 86typedef struct ecdh_data_st {
79 /* EC_KEY_METH_DATA part */ 87 /* EC_KEY_METH_DATA part */
80 int (*init)(EC_KEY *); 88 int (*init)(EC_KEY *);
diff --git a/src/lib/libssl/src/crypto/ecdh/ech_ossl.c b/src/lib/libssl/src/crypto/ecdh/ech_ossl.c
index 2a40ff12df..4a30628fbc 100644
--- a/src/lib/libssl/src/crypto/ecdh/ech_ossl.c
+++ b/src/lib/libssl/src/crypto/ecdh/ech_ossl.c
@@ -157,6 +157,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key,
157 goto err; 157 goto err;
158 } 158 }
159 } 159 }
160#ifndef OPENSSL_NO_EC2M
160 else 161 else
161 { 162 {
162 if (!EC_POINT_get_affine_coordinates_GF2m(group, tmp, x, y, ctx)) 163 if (!EC_POINT_get_affine_coordinates_GF2m(group, tmp, x, y, ctx))
@@ -165,6 +166,7 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key,
165 goto err; 166 goto err;
166 } 167 }
167 } 168 }
169#endif
168 170
169 buflen = (EC_GROUP_get_degree(group) + 7)/8; 171 buflen = (EC_GROUP_get_degree(group) + 7)/8;
170 len = BN_num_bytes(x); 172 len = BN_num_bytes(x);
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecdsa.h b/src/lib/libssl/src/crypto/ecdsa/ecdsa.h
index e61c539812..7fb5254b62 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecdsa.h
+++ b/src/lib/libssl/src/crypto/ecdsa/ecdsa.h
@@ -238,6 +238,7 @@ void ERR_load_ECDSA_strings(void);
238/* Error codes for the ECDSA functions. */ 238/* Error codes for the ECDSA functions. */
239 239
240/* Function codes. */ 240/* Function codes. */
241#define ECDSA_F_ECDSA_CHECK 104
241#define ECDSA_F_ECDSA_DATA_NEW_METHOD 100 242#define ECDSA_F_ECDSA_DATA_NEW_METHOD 100
242#define ECDSA_F_ECDSA_DO_SIGN 101 243#define ECDSA_F_ECDSA_DO_SIGN 101
243#define ECDSA_F_ECDSA_DO_VERIFY 102 244#define ECDSA_F_ECDSA_DO_VERIFY 102
@@ -249,6 +250,7 @@ void ERR_load_ECDSA_strings(void);
249#define ECDSA_R_ERR_EC_LIB 102 250#define ECDSA_R_ERR_EC_LIB 102
250#define ECDSA_R_MISSING_PARAMETERS 103 251#define ECDSA_R_MISSING_PARAMETERS 103
251#define ECDSA_R_NEED_NEW_SETUP_VALUES 106 252#define ECDSA_R_NEED_NEW_SETUP_VALUES 106
253#define ECDSA_R_NON_FIPS_METHOD 107
252#define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104 254#define ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED 104
253#define ECDSA_R_SIGNATURE_MALLOC_FAILED 105 255#define ECDSA_R_SIGNATURE_MALLOC_FAILED 105
254 256
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecdsatest.c b/src/lib/libssl/src/crypto/ecdsa/ecdsatest.c
index 54cfb8c753..537bb30362 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecdsatest.c
+++ b/src/lib/libssl/src/crypto/ecdsa/ecdsatest.c
@@ -262,6 +262,7 @@ int x9_62_tests(BIO *out)
262 "3238135532097973577080787768312505059318910517550078427819" 262 "3238135532097973577080787768312505059318910517550078427819"
263 "78505179448783")) 263 "78505179448783"))
264 goto x962_err; 264 goto x962_err;
265#ifndef OPENSSL_NO_EC2M
265 if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1, 266 if (!x9_62_test_internal(out, NID_X9_62_c2tnb191v1,
266 "87194383164871543355722284926904419997237591535066528048", 267 "87194383164871543355722284926904419997237591535066528048",
267 "308992691965804947361541664549085895292153777025772063598")) 268 "308992691965804947361541664549085895292153777025772063598"))
@@ -272,7 +273,7 @@ int x9_62_tests(BIO *out)
272 "1970303740007316867383349976549972270528498040721988191026" 273 "1970303740007316867383349976549972270528498040721988191026"
273 "49413465737174")) 274 "49413465737174"))
274 goto x962_err; 275 goto x962_err;
275 276#endif
276 ret = 1; 277 ret = 1;
277x962_err: 278x962_err:
278 if (!restore_rand()) 279 if (!restore_rand())
@@ -289,7 +290,8 @@ int test_builtin(BIO *out)
289 ECDSA_SIG *ecdsa_sig = NULL; 290 ECDSA_SIG *ecdsa_sig = NULL;
290 unsigned char digest[20], wrong_digest[20]; 291 unsigned char digest[20], wrong_digest[20];
291 unsigned char *signature = NULL; 292 unsigned char *signature = NULL;
292 unsigned char *sig_ptr; 293 const unsigned char *sig_ptr;
294 unsigned char *sig_ptr2;
293 unsigned char *raw_buf = NULL; 295 unsigned char *raw_buf = NULL;
294 unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len; 296 unsigned int sig_len, degree, r_len, s_len, bn_len, buf_len;
295 int nid, ret = 0; 297 int nid, ret = 0;
@@ -464,8 +466,8 @@ int test_builtin(BIO *out)
464 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) 466 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL))
465 goto builtin_err; 467 goto builtin_err;
466 468
467 sig_ptr = signature; 469 sig_ptr2 = signature;
468 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr); 470 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2);
469 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1) 471 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) == 1)
470 { 472 {
471 BIO_printf(out, " failed\n"); 473 BIO_printf(out, " failed\n");
@@ -477,8 +479,8 @@ int test_builtin(BIO *out)
477 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL)) 479 (BN_bin2bn(raw_buf + bn_len, bn_len, ecdsa_sig->s) == NULL))
478 goto builtin_err; 480 goto builtin_err;
479 481
480 sig_ptr = signature; 482 sig_ptr2 = signature;
481 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr); 483 sig_len = i2d_ECDSA_SIG(ecdsa_sig, &sig_ptr2);
482 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1) 484 if (ECDSA_verify(0, digest, 20, signature, sig_len, eckey) != 1)
483 { 485 {
484 BIO_printf(out, " failed\n"); 486 BIO_printf(out, " failed\n");
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecs_err.c b/src/lib/libssl/src/crypto/ecdsa/ecs_err.c
index 98e38d537f..81542e6d15 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecs_err.c
+++ b/src/lib/libssl/src/crypto/ecdsa/ecs_err.c
@@ -1,6 +1,6 @@
1/* crypto/ecdsa/ecs_err.c */ 1/* crypto/ecdsa/ecs_err.c */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
@@ -70,6 +70,7 @@
70 70
71static ERR_STRING_DATA ECDSA_str_functs[]= 71static ERR_STRING_DATA ECDSA_str_functs[]=
72 { 72 {
73{ERR_FUNC(ECDSA_F_ECDSA_CHECK), "ECDSA_CHECK"},
73{ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"}, 74{ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"},
74{ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"}, 75{ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"},
75{ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"}, 76{ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"},
@@ -84,6 +85,7 @@ static ERR_STRING_DATA ECDSA_str_reasons[]=
84{ERR_REASON(ECDSA_R_ERR_EC_LIB) ,"err ec lib"}, 85{ERR_REASON(ECDSA_R_ERR_EC_LIB) ,"err ec lib"},
85{ERR_REASON(ECDSA_R_MISSING_PARAMETERS) ,"missing parameters"}, 86{ERR_REASON(ECDSA_R_MISSING_PARAMETERS) ,"missing parameters"},
86{ERR_REASON(ECDSA_R_NEED_NEW_SETUP_VALUES),"need new setup values"}, 87{ERR_REASON(ECDSA_R_NEED_NEW_SETUP_VALUES),"need new setup values"},
88{ERR_REASON(ECDSA_R_NON_FIPS_METHOD) ,"non fips method"},
87{ERR_REASON(ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED),"random number generation failed"}, 89{ERR_REASON(ECDSA_R_RANDOM_NUMBER_GENERATION_FAILED),"random number generation failed"},
88{ERR_REASON(ECDSA_R_SIGNATURE_MALLOC_FAILED),"signature malloc failed"}, 90{ERR_REASON(ECDSA_R_SIGNATURE_MALLOC_FAILED),"signature malloc failed"},
89{0,NULL} 91{0,NULL}
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecs_lib.c b/src/lib/libssl/src/crypto/ecdsa/ecs_lib.c
index 2ebae3aa27..e477da430b 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecs_lib.c
+++ b/src/lib/libssl/src/crypto/ecdsa/ecs_lib.c
@@ -60,6 +60,9 @@
60#endif 60#endif
61#include <openssl/err.h> 61#include <openssl/err.h>
62#include <openssl/bn.h> 62#include <openssl/bn.h>
63#ifdef OPENSSL_FIPS
64#include <openssl/fips.h>
65#endif
63 66
64const char ECDSA_version[]="ECDSA" OPENSSL_VERSION_PTEXT; 67const char ECDSA_version[]="ECDSA" OPENSSL_VERSION_PTEXT;
65 68
@@ -77,7 +80,16 @@ void ECDSA_set_default_method(const ECDSA_METHOD *meth)
77const ECDSA_METHOD *ECDSA_get_default_method(void) 80const ECDSA_METHOD *ECDSA_get_default_method(void)
78{ 81{
79 if(!default_ECDSA_method) 82 if(!default_ECDSA_method)
83 {
84#ifdef OPENSSL_FIPS
85 if (FIPS_mode())
86 return FIPS_ecdsa_openssl();
87 else
88 return ECDSA_OpenSSL();
89#else
80 default_ECDSA_method = ECDSA_OpenSSL(); 90 default_ECDSA_method = ECDSA_OpenSSL();
91#endif
92 }
81 return default_ECDSA_method; 93 return default_ECDSA_method;
82} 94}
83 95
@@ -193,7 +205,14 @@ ECDSA_DATA *ecdsa_check(EC_KEY *key)
193 } 205 }
194 else 206 else
195 ecdsa_data = (ECDSA_DATA *)data; 207 ecdsa_data = (ECDSA_DATA *)data;
196 208#ifdef OPENSSL_FIPS
209 if (FIPS_mode() && !(ecdsa_data->flags & ECDSA_FLAG_FIPS_METHOD)
210 && !(EC_KEY_get_flags(key) & EC_FLAG_NON_FIPS_ALLOW))
211 {
212 ECDSAerr(ECDSA_F_ECDSA_CHECK, ECDSA_R_NON_FIPS_METHOD);
213 return NULL;
214 }
215#endif
197 216
198 return ecdsa_data; 217 return ecdsa_data;
199} 218}
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecs_locl.h b/src/lib/libssl/src/crypto/ecdsa/ecs_locl.h
index 3a69a840e2..cb3be13cfc 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecs_locl.h
+++ b/src/lib/libssl/src/crypto/ecdsa/ecs_locl.h
@@ -82,6 +82,14 @@ struct ecdsa_method
82 char *app_data; 82 char *app_data;
83 }; 83 };
84 84
85/* If this flag is set the ECDSA method is FIPS compliant and can be used
86 * in FIPS mode. This is set in the validated module method. If an
87 * application sets this flag in its own methods it is its responsibility
88 * to ensure the result is compliant.
89 */
90
91#define ECDSA_FLAG_FIPS_METHOD 0x1
92
85typedef struct ecdsa_data_st { 93typedef struct ecdsa_data_st {
86 /* EC_KEY_METH_DATA part */ 94 /* EC_KEY_METH_DATA part */
87 int (*init)(EC_KEY *); 95 int (*init)(EC_KEY *);
diff --git a/src/lib/libssl/src/crypto/ecdsa/ecs_ossl.c b/src/lib/libssl/src/crypto/ecdsa/ecs_ossl.c
index 1bbf328de5..7725935610 100644
--- a/src/lib/libssl/src/crypto/ecdsa/ecs_ossl.c
+++ b/src/lib/libssl/src/crypto/ecdsa/ecs_ossl.c
@@ -167,6 +167,7 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
167 goto err; 167 goto err;
168 } 168 }
169 } 169 }
170#ifndef OPENSSL_NO_EC2M
170 else /* NID_X9_62_characteristic_two_field */ 171 else /* NID_X9_62_characteristic_two_field */
171 { 172 {
172 if (!EC_POINT_get_affine_coordinates_GF2m(group, 173 if (!EC_POINT_get_affine_coordinates_GF2m(group,
@@ -176,6 +177,7 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
176 goto err; 177 goto err;
177 } 178 }
178 } 179 }
180#endif
179 if (!BN_nnmod(r, X, order, ctx)) 181 if (!BN_nnmod(r, X, order, ctx))
180 { 182 {
181 ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); 183 ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
@@ -454,6 +456,7 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len,
454 goto err; 456 goto err;
455 } 457 }
456 } 458 }
459#ifndef OPENSSL_NO_EC2M
457 else /* NID_X9_62_characteristic_two_field */ 460 else /* NID_X9_62_characteristic_two_field */
458 { 461 {
459 if (!EC_POINT_get_affine_coordinates_GF2m(group, 462 if (!EC_POINT_get_affine_coordinates_GF2m(group,
@@ -463,7 +466,7 @@ static int ecdsa_do_verify(const unsigned char *dgst, int dgst_len,
463 goto err; 466 goto err;
464 } 467 }
465 } 468 }
466 469#endif
467 if (!BN_nnmod(u1, X, order, ctx)) 470 if (!BN_nnmod(u1, X, order, ctx))
468 { 471 {
469 ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB); 472 ECDSAerr(ECDSA_F_ECDSA_DO_VERIFY, ERR_R_BN_LIB);
diff --git a/src/lib/libssl/src/crypto/engine/eng_rdrand.c b/src/lib/libssl/src/crypto/engine/eng_rdrand.c
new file mode 100644
index 0000000000..a9ba5ae6f9
--- /dev/null
+++ b/src/lib/libssl/src/crypto/engine/eng_rdrand.c
@@ -0,0 +1,142 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * licensing@OpenSSL.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/opensslconf.h>
51
52#include <stdio.h>
53#include <string.h>
54#include <openssl/engine.h>
55#include <openssl/rand.h>
56#include <openssl/err.h>
57
58#if (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
59 defined(__x86_64) || defined(__x86_64__) || \
60 defined(_M_AMD64) || defined (_M_X64)) && defined(OPENSSL_CPUID_OBJ)
61
62size_t OPENSSL_ia32_rdrand(void);
63
64static int get_random_bytes (unsigned char *buf, int num)
65 {
66 size_t rnd;
67
68 while (num>=(int)sizeof(size_t)) {
69 if ((rnd = OPENSSL_ia32_rdrand()) == 0) return 0;
70
71 *((size_t *)buf) = rnd;
72 buf += sizeof(size_t);
73 num -= sizeof(size_t);
74 }
75 if (num) {
76 if ((rnd = OPENSSL_ia32_rdrand()) == 0) return 0;
77
78 memcpy (buf,&rnd,num);
79 }
80
81 return 1;
82 }
83
84static int random_status (void)
85{ return 1; }
86
87static RAND_METHOD rdrand_meth =
88 {
89 NULL, /* seed */
90 get_random_bytes,
91 NULL, /* cleanup */
92 NULL, /* add */
93 get_random_bytes,
94 random_status,
95 };
96
97static int rdrand_init(ENGINE *e)
98{ return 1; }
99
100static const char *engine_e_rdrand_id = "rdrand";
101static const char *engine_e_rdrand_name = "Intel RDRAND engine";
102
103static int bind_helper(ENGINE *e)
104 {
105 if (!ENGINE_set_id(e, engine_e_rdrand_id) ||
106 !ENGINE_set_name(e, engine_e_rdrand_name) ||
107 !ENGINE_set_init_function(e, rdrand_init) ||
108 !ENGINE_set_RAND(e, &rdrand_meth) )
109 return 0;
110
111 return 1;
112 }
113
114static ENGINE *ENGINE_rdrand(void)
115 {
116 ENGINE *ret = ENGINE_new();
117 if(!ret)
118 return NULL;
119 if(!bind_helper(ret))
120 {
121 ENGINE_free(ret);
122 return NULL;
123 }
124 return ret;
125 }
126
127void ENGINE_load_rdrand (void)
128 {
129 extern unsigned int OPENSSL_ia32cap_P[];
130
131 if (OPENSSL_ia32cap_P[1] & (1<<(62-32)))
132 {
133 ENGINE *toadd = ENGINE_rdrand();
134 if(!toadd) return;
135 ENGINE_add(toadd);
136 ENGINE_free(toadd);
137 ERR_clear_error();
138 }
139 }
140#else
141void ENGINE_load_rdrand (void) {}
142#endif
diff --git a/src/lib/libssl/src/crypto/engine/eng_rsax.c b/src/lib/libssl/src/crypto/engine/eng_rsax.c
new file mode 100644
index 0000000000..96e63477ee
--- /dev/null
+++ b/src/lib/libssl/src/crypto/engine/eng_rsax.c
@@ -0,0 +1,668 @@
1/* crypto/engine/eng_rsax.c */
2/* Copyright (c) 2010-2010 Intel Corp.
3 * Author: Vinodh.Gopal@intel.com
4 * Jim Guilford
5 * Erdinc.Ozturk@intel.com
6 * Maxim.Perminov@intel.com
7 * Ying.Huang@intel.com
8 *
9 * More information about algorithm used can be found at:
10 * http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
11 */
12/* ====================================================================
13 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 *
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
25 * distribution.
26 *
27 * 3. All advertising materials mentioning features or use of this
28 * software must display the following acknowledgment:
29 * "This product includes software developed by the OpenSSL Project
30 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 *
32 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
33 * endorse or promote products derived from this software without
34 * prior written permission. For written permission, please contact
35 * licensing@OpenSSL.org.
36 *
37 * 5. Products derived from this software may not be called "OpenSSL"
38 * nor may "OpenSSL" appear in their names without prior written
39 * permission of the OpenSSL Project.
40 *
41 * 6. Redistributions of any form whatsoever must retain the following
42 * acknowledgment:
43 * "This product includes software developed by the OpenSSL Project
44 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
47 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
49 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
50 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
55 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
57 * OF THE POSSIBILITY OF SUCH DAMAGE.
58 * ====================================================================
59 *
60 * This product includes cryptographic software written by Eric Young
61 * (eay@cryptsoft.com). This product includes software written by Tim
62 * Hudson (tjh@cryptsoft.com).
63 */
64
65#include <openssl/opensslconf.h>
66
67#include <stdio.h>
68#include <string.h>
69#include <openssl/crypto.h>
70#include <openssl/buffer.h>
71#include <openssl/engine.h>
72#ifndef OPENSSL_NO_RSA
73#include <openssl/rsa.h>
74#endif
75#include <openssl/bn.h>
76#include <openssl/err.h>
77
78/* RSAX is available **ONLY* on x86_64 CPUs */
79#undef COMPILE_RSAX
80
81#if (defined(__x86_64) || defined(__x86_64__) || \
82 defined(_M_AMD64) || defined (_M_X64)) && !defined(OPENSSL_NO_ASM)
83#define COMPILE_RSAX
84static ENGINE *ENGINE_rsax (void);
85#endif
86
87void ENGINE_load_rsax (void)
88 {
89/* On non-x86 CPUs it just returns. */
90#ifdef COMPILE_RSAX
91 ENGINE *toadd = ENGINE_rsax();
92 if(!toadd) return;
93 ENGINE_add(toadd);
94 ENGINE_free(toadd);
95 ERR_clear_error();
96#endif
97 }
98
99#ifdef COMPILE_RSAX
100#define E_RSAX_LIB_NAME "rsax engine"
101
102static int e_rsax_destroy(ENGINE *e);
103static int e_rsax_init(ENGINE *e);
104static int e_rsax_finish(ENGINE *e);
105static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void));
106
107#ifndef OPENSSL_NO_RSA
108/* RSA stuff */
109static int e_rsax_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx);
110static int e_rsax_rsa_finish(RSA *r);
111#endif
112
113static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = {
114 {0, NULL, NULL, 0}
115 };
116
117#ifndef OPENSSL_NO_RSA
118/* Our internal RSA_METHOD that we provide pointers to */
119static RSA_METHOD e_rsax_rsa =
120 {
121 "Intel RSA-X method",
122 NULL,
123 NULL,
124 NULL,
125 NULL,
126 e_rsax_rsa_mod_exp,
127 NULL,
128 NULL,
129 e_rsax_rsa_finish,
130 RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE,
131 NULL,
132 NULL,
133 NULL
134 };
135#endif
136
137/* Constants used when creating the ENGINE */
138static const char *engine_e_rsax_id = "rsax";
139static const char *engine_e_rsax_name = "RSAX engine support";
140
141/* This internal function is used by ENGINE_rsax() */
142static int bind_helper(ENGINE *e)
143 {
144#ifndef OPENSSL_NO_RSA
145 const RSA_METHOD *meth1;
146#endif
147 if(!ENGINE_set_id(e, engine_e_rsax_id) ||
148 !ENGINE_set_name(e, engine_e_rsax_name) ||
149#ifndef OPENSSL_NO_RSA
150 !ENGINE_set_RSA(e, &e_rsax_rsa) ||
151#endif
152 !ENGINE_set_destroy_function(e, e_rsax_destroy) ||
153 !ENGINE_set_init_function(e, e_rsax_init) ||
154 !ENGINE_set_finish_function(e, e_rsax_finish) ||
155 !ENGINE_set_ctrl_function(e, e_rsax_ctrl) ||
156 !ENGINE_set_cmd_defns(e, e_rsax_cmd_defns))
157 return 0;
158
159#ifndef OPENSSL_NO_RSA
160 meth1 = RSA_PKCS1_SSLeay();
161 e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc;
162 e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec;
163 e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc;
164 e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec;
165 e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp;
166#endif
167 return 1;
168 }
169
170static ENGINE *ENGINE_rsax(void)
171 {
172 ENGINE *ret = ENGINE_new();
173 if(!ret)
174 return NULL;
175 if(!bind_helper(ret))
176 {
177 ENGINE_free(ret);
178 return NULL;
179 }
180 return ret;
181 }
182
183#ifndef OPENSSL_NO_RSA
184/* Used to attach our own key-data to an RSA structure */
185static int rsax_ex_data_idx = -1;
186#endif
187
188static int e_rsax_destroy(ENGINE *e)
189 {
190 return 1;
191 }
192
193/* (de)initialisation functions. */
194static int e_rsax_init(ENGINE *e)
195 {
196#ifndef OPENSSL_NO_RSA
197 if (rsax_ex_data_idx == -1)
198 rsax_ex_data_idx = RSA_get_ex_new_index(0,
199 NULL,
200 NULL, NULL, NULL);
201#endif
202 if (rsax_ex_data_idx == -1)
203 return 0;
204 return 1;
205 }
206
207static int e_rsax_finish(ENGINE *e)
208 {
209 return 1;
210 }
211
212static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void))
213 {
214 int to_return = 1;
215
216 switch(cmd)
217 {
218 /* The command isn't understood by this engine */
219 default:
220 to_return = 0;
221 break;
222 }
223
224 return to_return;
225 }
226
227
228#ifndef OPENSSL_NO_RSA
229
230#ifdef _WIN32
231typedef unsigned __int64 UINT64;
232#else
233typedef unsigned long long UINT64;
234#endif
235typedef unsigned short UINT16;
236
237/* Table t is interleaved in the following manner:
238 * The order in memory is t[0][0], t[0][1], ..., t[0][7], t[1][0], ...
239 * A particular 512-bit value is stored in t[][index] rather than the more
240 * normal t[index][]; i.e. the qwords of a particular entry in t are not
241 * adjacent in memory
242 */
243
244/* Init BIGNUM b from the interleaved UINT64 array */
245static int interleaved_array_to_bn_512(BIGNUM* b, UINT64 *array);
246
247/* Extract array elements from BIGNUM b
248 * To set the whole array from b, call with n=8
249 */
250static int bn_extract_to_array_512(const BIGNUM* b, unsigned int n, UINT64 *array);
251
252struct mod_ctx_512 {
253 UINT64 t[8][8];
254 UINT64 m[8];
255 UINT64 m1[8]; /* 2^278 % m */
256 UINT64 m2[8]; /* 2^640 % m */
257 UINT64 k1[2]; /* (- 1/m) % 2^128 */
258};
259
260static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data);
261
262void mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
263 UINT64 *g, /* 512 bits, 8 qwords */
264 UINT64 *exp, /* 512 bits, 8 qwords */
265 struct mod_ctx_512 *data);
266
267typedef struct st_e_rsax_mod_ctx
268{
269 UINT64 type;
270 union {
271 struct mod_ctx_512 b512;
272 } ctx;
273
274} E_RSAX_MOD_CTX;
275
276static E_RSAX_MOD_CTX *e_rsax_get_ctx(RSA *rsa, int idx, BIGNUM* m)
277{
278 E_RSAX_MOD_CTX *hptr;
279
280 if (idx < 0 || idx > 2)
281 return NULL;
282
283 hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
284 if (!hptr) {
285 hptr = OPENSSL_malloc(3*sizeof(E_RSAX_MOD_CTX));
286 if (!hptr) return NULL;
287 hptr[2].type = hptr[1].type= hptr[0].type = 0;
288 RSA_set_ex_data(rsa, rsax_ex_data_idx, hptr);
289 }
290
291 if (hptr[idx].type == (UINT64)BN_num_bits(m))
292 return hptr+idx;
293
294 if (BN_num_bits(m) == 512) {
295 UINT64 _m[8];
296 bn_extract_to_array_512(m, 8, _m);
297 memset( &hptr[idx].ctx.b512, 0, sizeof(struct mod_ctx_512));
298 mod_exp_pre_compute_data_512(_m, &hptr[idx].ctx.b512);
299 }
300
301 hptr[idx].type = BN_num_bits(m);
302 return hptr+idx;
303}
304
305static int e_rsax_rsa_finish(RSA *rsa)
306 {
307 E_RSAX_MOD_CTX *hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
308 if(hptr)
309 {
310 OPENSSL_free(hptr);
311 RSA_set_ex_data(rsa, rsax_ex_data_idx, NULL);
312 }
313 if (rsa->_method_mod_n)
314 BN_MONT_CTX_free(rsa->_method_mod_n);
315 if (rsa->_method_mod_p)
316 BN_MONT_CTX_free(rsa->_method_mod_p);
317 if (rsa->_method_mod_q)
318 BN_MONT_CTX_free(rsa->_method_mod_q);
319 return 1;
320 }
321
322
323static int e_rsax_bn_mod_exp(BIGNUM *r, const BIGNUM *g, const BIGNUM *e,
324 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont, E_RSAX_MOD_CTX* rsax_mod_ctx )
325{
326 if (rsax_mod_ctx && BN_get_flags(e, BN_FLG_CONSTTIME) != 0) {
327 if (BN_num_bits(m) == 512) {
328 UINT64 _r[8];
329 UINT64 _g[8];
330 UINT64 _e[8];
331
332 /* Init the arrays from the BIGNUMs */
333 bn_extract_to_array_512(g, 8, _g);
334 bn_extract_to_array_512(e, 8, _e);
335
336 mod_exp_512(_r, _g, _e, &rsax_mod_ctx->ctx.b512);
337 /* Return the result in the BIGNUM */
338 interleaved_array_to_bn_512(r, _r);
339 return 1;
340 }
341 }
342
343 return BN_mod_exp_mont(r, g, e, m, ctx, in_mont);
344}
345
346/* Declares for the Intel CIAP 512-bit / CRT / 1024 bit RSA modular
347 * exponentiation routine precalculations and a structure to hold the
348 * necessary values. These files are meant to live in crypto/rsa/ in
349 * the target openssl.
350 */
351
352/*
353 * Local method: extracts a piece from a BIGNUM, to fit it into
354 * an array. Call with n=8 to extract an entire 512-bit BIGNUM
355 */
356static int bn_extract_to_array_512(const BIGNUM* b, unsigned int n, UINT64 *array)
357{
358 int i;
359 UINT64 tmp;
360 unsigned char bn_buff[64];
361 memset(bn_buff, 0, 64);
362 if (BN_num_bytes(b) > 64) {
363 printf ("Can't support this byte size\n");
364 return 0; }
365 if (BN_num_bytes(b)!=0) {
366 if (!BN_bn2bin(b, bn_buff+(64-BN_num_bytes(b)))) {
367 printf ("Error's in bn2bin\n");
368 /* We have to error, here */
369 return 0; } }
370 while (n-- > 0) {
371 array[n] = 0;
372 for (i=7; i>=0; i--) {
373 tmp = bn_buff[63-(n*8+i)];
374 array[n] |= tmp << (8*i); } }
375 return 1;
376}
377
378/* Init a 512-bit BIGNUM from the UINT64*_ (8 * 64) interleaved array */
379static int interleaved_array_to_bn_512(BIGNUM* b, UINT64 *array)
380{
381 unsigned char tmp[64];
382 int n=8;
383 int i;
384 while (n-- > 0) {
385 for (i = 7; i>=0; i--) {
386 tmp[63-(n*8+i)] = (unsigned char)(array[n]>>(8*i)); } }
387 BN_bin2bn(tmp, 64, b);
388 return 0;
389}
390
391
392/* The main 512bit precompute call */
393static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data)
394 {
395 BIGNUM two_768, two_640, two_128, two_512, tmp, _m, tmp2;
396
397 /* We need a BN_CTX for the modulo functions */
398 BN_CTX* ctx;
399 /* Some tmps */
400 UINT64 _t[8];
401 int i, j, ret = 0;
402
403 /* Init _m with m */
404 BN_init(&_m);
405 interleaved_array_to_bn_512(&_m, m);
406 memset(_t, 0, 64);
407
408 /* Inits */
409 BN_init(&two_768);
410 BN_init(&two_640);
411 BN_init(&two_128);
412 BN_init(&two_512);
413 BN_init(&tmp);
414 BN_init(&tmp2);
415
416 /* Create our context */
417 if ((ctx=BN_CTX_new()) == NULL) { goto err; }
418 BN_CTX_start(ctx);
419
420 /*
421 * For production, if you care, these only need to be set once,
422 * and may be made constants.
423 */
424 BN_lshift(&two_768, BN_value_one(), 768);
425 BN_lshift(&two_640, BN_value_one(), 640);
426 BN_lshift(&two_128, BN_value_one(), 128);
427 BN_lshift(&two_512, BN_value_one(), 512);
428
429 if (0 == (m[7] & 0x8000000000000000)) {
430 exit(1);
431 }
432 if (0 == (m[0] & 0x1)) { /* Odd modulus required for Mont */
433 exit(1);
434 }
435
436 /* Precompute m1 */
437 BN_mod(&tmp, &two_768, &_m, ctx);
438 if (!bn_extract_to_array_512(&tmp, 8, &data->m1[0])) {
439 goto err; }
440
441 /* Precompute m2 */
442 BN_mod(&tmp, &two_640, &_m, ctx);
443 if (!bn_extract_to_array_512(&tmp, 8, &data->m2[0])) {
444 goto err;
445 }
446
447 /*
448 * Precompute k1, a 128b number = ((-1)* m-1 ) mod 2128; k1 should
449 * be non-negative.
450 */
451 BN_mod_inverse(&tmp, &_m, &two_128, ctx);
452 if (!BN_is_zero(&tmp)) { BN_sub(&tmp, &two_128, &tmp); }
453 if (!bn_extract_to_array_512(&tmp, 2, &data->k1[0])) {
454 goto err; }
455
456 /* Precompute t */
457 for (i=0; i<8; i++) {
458 BN_zero(&tmp);
459 if (i & 1) { BN_add(&tmp, &two_512, &tmp); }
460 if (i & 2) { BN_add(&tmp, &two_512, &tmp); }
461 if (i & 4) { BN_add(&tmp, &two_640, &tmp); }
462
463 BN_nnmod(&tmp2, &tmp, &_m, ctx);
464 if (!bn_extract_to_array_512(&tmp2, 8, _t)) {
465 goto err; }
466 for (j=0; j<8; j++) data->t[j][i] = _t[j]; }
467
468 /* Precompute m */
469 for (i=0; i<8; i++) {
470 data->m[i] = m[i]; }
471
472 ret = 1;
473
474err:
475 /* Cleanup */
476 if (ctx != NULL) {
477 BN_CTX_end(ctx); BN_CTX_free(ctx); }
478 BN_free(&two_768);
479 BN_free(&two_640);
480 BN_free(&two_128);
481 BN_free(&two_512);
482 BN_free(&tmp);
483 BN_free(&tmp2);
484 BN_free(&_m);
485
486 return ret;
487}
488
489
490static int e_rsax_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
491 {
492 BIGNUM *r1,*m1,*vrfy;
493 BIGNUM local_dmp1,local_dmq1,local_c,local_r1;
494 BIGNUM *dmp1,*dmq1,*c,*pr1;
495 int ret=0;
496
497 BN_CTX_start(ctx);
498 r1 = BN_CTX_get(ctx);
499 m1 = BN_CTX_get(ctx);
500 vrfy = BN_CTX_get(ctx);
501
502 {
503 BIGNUM local_p, local_q;
504 BIGNUM *p = NULL, *q = NULL;
505 int error = 0;
506
507 /* Make sure BN_mod_inverse in Montgomery
508 * intialization uses the BN_FLG_CONSTTIME flag
509 * (unless RSA_FLAG_NO_CONSTTIME is set)
510 */
511 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
512 {
513 BN_init(&local_p);
514 p = &local_p;
515 BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
516
517 BN_init(&local_q);
518 q = &local_q;
519 BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME);
520 }
521 else
522 {
523 p = rsa->p;
524 q = rsa->q;
525 }
526
527 if (rsa->flags & RSA_FLAG_CACHE_PRIVATE)
528 {
529 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_p, CRYPTO_LOCK_RSA, p, ctx))
530 error = 1;
531 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx))
532 error = 1;
533 }
534
535 /* clean up */
536 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
537 {
538 BN_free(&local_p);
539 BN_free(&local_q);
540 }
541 if ( error )
542 goto err;
543 }
544
545 if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
546 if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx))
547 goto err;
548
549 /* compute I mod q */
550 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
551 {
552 c = &local_c;
553 BN_with_flags(c, I, BN_FLG_CONSTTIME);
554 if (!BN_mod(r1,c,rsa->q,ctx)) goto err;
555 }
556 else
557 {
558 if (!BN_mod(r1,I,rsa->q,ctx)) goto err;
559 }
560
561 /* compute r1^dmq1 mod q */
562 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
563 {
564 dmq1 = &local_dmq1;
565 BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME);
566 }
567 else
568 dmq1 = rsa->dmq1;
569
570 if (!e_rsax_bn_mod_exp(m1,r1,dmq1,rsa->q,ctx,
571 rsa->_method_mod_q, e_rsax_get_ctx(rsa, 0, rsa->q) )) goto err;
572
573 /* compute I mod p */
574 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
575 {
576 c = &local_c;
577 BN_with_flags(c, I, BN_FLG_CONSTTIME);
578 if (!BN_mod(r1,c,rsa->p,ctx)) goto err;
579 }
580 else
581 {
582 if (!BN_mod(r1,I,rsa->p,ctx)) goto err;
583 }
584
585 /* compute r1^dmp1 mod p */
586 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
587 {
588 dmp1 = &local_dmp1;
589 BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME);
590 }
591 else
592 dmp1 = rsa->dmp1;
593
594 if (!e_rsax_bn_mod_exp(r0,r1,dmp1,rsa->p,ctx,
595 rsa->_method_mod_p, e_rsax_get_ctx(rsa, 1, rsa->p) )) goto err;
596
597 if (!BN_sub(r0,r0,m1)) goto err;
598 /* This will help stop the size of r0 increasing, which does
599 * affect the multiply if it optimised for a power of 2 size */
600 if (BN_is_negative(r0))
601 if (!BN_add(r0,r0,rsa->p)) goto err;
602
603 if (!BN_mul(r1,r0,rsa->iqmp,ctx)) goto err;
604
605 /* Turn BN_FLG_CONSTTIME flag on before division operation */
606 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
607 {
608 pr1 = &local_r1;
609 BN_with_flags(pr1, r1, BN_FLG_CONSTTIME);
610 }
611 else
612 pr1 = r1;
613 if (!BN_mod(r0,pr1,rsa->p,ctx)) goto err;
614
615 /* If p < q it is occasionally possible for the correction of
616 * adding 'p' if r0 is negative above to leave the result still
617 * negative. This can break the private key operations: the following
618 * second correction should *always* correct this rare occurrence.
619 * This will *never* happen with OpenSSL generated keys because
620 * they ensure p > q [steve]
621 */
622 if (BN_is_negative(r0))
623 if (!BN_add(r0,r0,rsa->p)) goto err;
624 if (!BN_mul(r1,r0,rsa->q,ctx)) goto err;
625 if (!BN_add(r0,r1,m1)) goto err;
626
627 if (rsa->e && rsa->n)
628 {
629 if (!e_rsax_bn_mod_exp(vrfy,r0,rsa->e,rsa->n,ctx,rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, rsa->n) ))
630 goto err;
631
632 /* If 'I' was greater than (or equal to) rsa->n, the operation
633 * will be equivalent to using 'I mod n'. However, the result of
634 * the verify will *always* be less than 'n' so we don't check
635 * for absolute equality, just congruency. */
636 if (!BN_sub(vrfy, vrfy, I)) goto err;
637 if (!BN_mod(vrfy, vrfy, rsa->n, ctx)) goto err;
638 if (BN_is_negative(vrfy))
639 if (!BN_add(vrfy, vrfy, rsa->n)) goto err;
640 if (!BN_is_zero(vrfy))
641 {
642 /* 'I' and 'vrfy' aren't congruent mod n. Don't leak
643 * miscalculated CRT output, just do a raw (slower)
644 * mod_exp and return that instead. */
645
646 BIGNUM local_d;
647 BIGNUM *d = NULL;
648
649 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
650 {
651 d = &local_d;
652 BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
653 }
654 else
655 d = rsa->d;
656 if (!e_rsax_bn_mod_exp(r0,I,d,rsa->n,ctx,
657 rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, rsa->n) )) goto err;
658 }
659 }
660 ret=1;
661
662err:
663 BN_CTX_end(ctx);
664
665 return ret;
666 }
667#endif /* !OPENSSL_NO_RSA */
668#endif /* !COMPILE_RSAX */
diff --git a/src/lib/libssl/src/crypto/evp/e_aes_cbc_hmac_sha1.c b/src/lib/libssl/src/crypto/evp/e_aes_cbc_hmac_sha1.c
new file mode 100644
index 0000000000..710fb79baf
--- /dev/null
+++ b/src/lib/libssl/src/crypto/evp/e_aes_cbc_hmac_sha1.c
@@ -0,0 +1,406 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * licensing@OpenSSL.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/opensslconf.h>
51
52#include <stdio.h>
53#include <string.h>
54
55#if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA1)
56
57#include <openssl/evp.h>
58#include <openssl/objects.h>
59#include <openssl/aes.h>
60#include <openssl/sha.h>
61#include "evp_locl.h"
62
63#ifndef EVP_CIPH_FLAG_AEAD_CIPHER
64#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000
65#define EVP_CTRL_AEAD_TLS1_AAD 0x16
66#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17
67#endif
68
69#if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1)
70#define EVP_CIPH_FLAG_DEFAULT_ASN1 0
71#endif
72
73#define TLS1_1_VERSION 0x0302
74
75typedef struct
76 {
77 AES_KEY ks;
78 SHA_CTX head,tail,md;
79 size_t payload_length; /* AAD length in decrypt case */
80 union {
81 unsigned int tls_ver;
82 unsigned char tls_aad[16]; /* 13 used */
83 } aux;
84 } EVP_AES_HMAC_SHA1;
85
86#define NO_PAYLOAD_LENGTH ((size_t)-1)
87
88#if defined(AES_ASM) && ( \
89 defined(__x86_64) || defined(__x86_64__) || \
90 defined(_M_AMD64) || defined(_M_X64) || \
91 defined(__INTEL__) )
92
93extern unsigned int OPENSSL_ia32cap_P[2];
94#define AESNI_CAPABLE (1<<(57-32))
95
96int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
97 AES_KEY *key);
98int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
99 AES_KEY *key);
100
101void aesni_cbc_encrypt(const unsigned char *in,
102 unsigned char *out,
103 size_t length,
104 const AES_KEY *key,
105 unsigned char *ivec, int enc);
106
107void aesni_cbc_sha1_enc (const void *inp, void *out, size_t blocks,
108 const AES_KEY *key, unsigned char iv[16],
109 SHA_CTX *ctx,const void *in0);
110
111#define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data)
112
113static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
114 const unsigned char *inkey,
115 const unsigned char *iv, int enc)
116 {
117 EVP_AES_HMAC_SHA1 *key = data(ctx);
118 int ret;
119
120 if (enc)
121 ret=aesni_set_encrypt_key(inkey,ctx->key_len*8,&key->ks);
122 else
123 ret=aesni_set_decrypt_key(inkey,ctx->key_len*8,&key->ks);
124
125 SHA1_Init(&key->head); /* handy when benchmarking */
126 key->tail = key->head;
127 key->md = key->head;
128
129 key->payload_length = NO_PAYLOAD_LENGTH;
130
131 return ret<0?0:1;
132 }
133
134#define STITCHED_CALL
135
136#if !defined(STITCHED_CALL)
137#define aes_off 0
138#endif
139
140void sha1_block_data_order (void *c,const void *p,size_t len);
141
142static void sha1_update(SHA_CTX *c,const void *data,size_t len)
143{ const unsigned char *ptr = data;
144 size_t res;
145
146 if ((res = c->num)) {
147 res = SHA_CBLOCK-res;
148 if (len<res) res=len;
149 SHA1_Update (c,ptr,res);
150 ptr += res;
151 len -= res;
152 }
153
154 res = len % SHA_CBLOCK;
155 len -= res;
156
157 if (len) {
158 sha1_block_data_order(c,ptr,len/SHA_CBLOCK);
159
160 ptr += len;
161 c->Nh += len>>29;
162 c->Nl += len<<=3;
163 if (c->Nl<(unsigned int)len) c->Nh++;
164 }
165
166 if (res)
167 SHA1_Update(c,ptr,res);
168}
169
170#define SHA1_Update sha1_update
171
172static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
173 const unsigned char *in, size_t len)
174 {
175 EVP_AES_HMAC_SHA1 *key = data(ctx);
176 unsigned int l;
177 size_t plen = key->payload_length,
178 iv = 0, /* explicit IV in TLS 1.1 and later */
179 sha_off = 0;
180#if defined(STITCHED_CALL)
181 size_t aes_off = 0,
182 blocks;
183
184 sha_off = SHA_CBLOCK-key->md.num;
185#endif
186
187 if (len%AES_BLOCK_SIZE) return 0;
188
189 if (ctx->encrypt) {
190 if (plen==NO_PAYLOAD_LENGTH)
191 plen = len;
192 else if (len!=((plen+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE))
193 return 0;
194 else if (key->aux.tls_ver >= TLS1_1_VERSION)
195 iv = AES_BLOCK_SIZE;
196
197#if defined(STITCHED_CALL)
198 if (plen>(sha_off+iv) && (blocks=(plen-(sha_off+iv))/SHA_CBLOCK)) {
199 SHA1_Update(&key->md,in+iv,sha_off);
200
201 aesni_cbc_sha1_enc(in,out,blocks,&key->ks,
202 ctx->iv,&key->md,in+iv+sha_off);
203 blocks *= SHA_CBLOCK;
204 aes_off += blocks;
205 sha_off += blocks;
206 key->md.Nh += blocks>>29;
207 key->md.Nl += blocks<<=3;
208 if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
209 } else {
210 sha_off = 0;
211 }
212#endif
213 sha_off += iv;
214 SHA1_Update(&key->md,in+sha_off,plen-sha_off);
215
216 if (plen!=len) { /* "TLS" mode of operation */
217 if (in!=out)
218 memcpy(out+aes_off,in+aes_off,plen-aes_off);
219
220 /* calculate HMAC and append it to payload */
221 SHA1_Final(out+plen,&key->md);
222 key->md = key->tail;
223 SHA1_Update(&key->md,out+plen,SHA_DIGEST_LENGTH);
224 SHA1_Final(out+plen,&key->md);
225
226 /* pad the payload|hmac */
227 plen += SHA_DIGEST_LENGTH;
228 for (l=len-plen-1;plen<len;plen++) out[plen]=l;
229 /* encrypt HMAC|padding at once */
230 aesni_cbc_encrypt(out+aes_off,out+aes_off,len-aes_off,
231 &key->ks,ctx->iv,1);
232 } else {
233 aesni_cbc_encrypt(in+aes_off,out+aes_off,len-aes_off,
234 &key->ks,ctx->iv,1);
235 }
236 } else {
237 unsigned char mac[SHA_DIGEST_LENGTH];
238
239 /* decrypt HMAC|padding at once */
240 aesni_cbc_encrypt(in,out,len,
241 &key->ks,ctx->iv,0);
242
243 if (plen) { /* "TLS" mode of operation */
244 /* figure out payload length */
245 if (len<(size_t)(out[len-1]+1+SHA_DIGEST_LENGTH))
246 return 0;
247
248 len -= (out[len-1]+1+SHA_DIGEST_LENGTH);
249
250 if ((key->aux.tls_aad[plen-4]<<8|key->aux.tls_aad[plen-3])
251 >= TLS1_1_VERSION) {
252 len -= AES_BLOCK_SIZE;
253 iv = AES_BLOCK_SIZE;
254 }
255
256 key->aux.tls_aad[plen-2] = len>>8;
257 key->aux.tls_aad[plen-1] = len;
258
259 /* calculate HMAC and verify it */
260 key->md = key->head;
261 SHA1_Update(&key->md,key->aux.tls_aad,plen);
262 SHA1_Update(&key->md,out+iv,len);
263 SHA1_Final(mac,&key->md);
264
265 key->md = key->tail;
266 SHA1_Update(&key->md,mac,SHA_DIGEST_LENGTH);
267 SHA1_Final(mac,&key->md);
268
269 if (memcmp(out+iv+len,mac,SHA_DIGEST_LENGTH))
270 return 0;
271 } else {
272 SHA1_Update(&key->md,out,len);
273 }
274 }
275
276 key->payload_length = NO_PAYLOAD_LENGTH;
277
278 return 1;
279 }
280
281static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr)
282 {
283 EVP_AES_HMAC_SHA1 *key = data(ctx);
284
285 switch (type)
286 {
287 case EVP_CTRL_AEAD_SET_MAC_KEY:
288 {
289 unsigned int i;
290 unsigned char hmac_key[64];
291
292 memset (hmac_key,0,sizeof(hmac_key));
293
294 if (arg > (int)sizeof(hmac_key)) {
295 SHA1_Init(&key->head);
296 SHA1_Update(&key->head,ptr,arg);
297 SHA1_Final(hmac_key,&key->head);
298 } else {
299 memcpy(hmac_key,ptr,arg);
300 }
301
302 for (i=0;i<sizeof(hmac_key);i++)
303 hmac_key[i] ^= 0x36; /* ipad */
304 SHA1_Init(&key->head);
305 SHA1_Update(&key->head,hmac_key,sizeof(hmac_key));
306
307 for (i=0;i<sizeof(hmac_key);i++)
308 hmac_key[i] ^= 0x36^0x5c; /* opad */
309 SHA1_Init(&key->tail);
310 SHA1_Update(&key->tail,hmac_key,sizeof(hmac_key));
311
312 return 1;
313 }
314 case EVP_CTRL_AEAD_TLS1_AAD:
315 {
316 unsigned char *p=ptr;
317 unsigned int len=p[arg-2]<<8|p[arg-1];
318
319 if (ctx->encrypt)
320 {
321 key->payload_length = len;
322 if ((key->aux.tls_ver=p[arg-4]<<8|p[arg-3]) >= TLS1_1_VERSION) {
323 len -= AES_BLOCK_SIZE;
324 p[arg-2] = len>>8;
325 p[arg-1] = len;
326 }
327 key->md = key->head;
328 SHA1_Update(&key->md,p,arg);
329
330 return (int)(((len+SHA_DIGEST_LENGTH+AES_BLOCK_SIZE)&-AES_BLOCK_SIZE)
331 - len);
332 }
333 else
334 {
335 if (arg>13) arg = 13;
336 memcpy(key->aux.tls_aad,ptr,arg);
337 key->payload_length = arg;
338
339 return SHA_DIGEST_LENGTH;
340 }
341 }
342 default:
343 return -1;
344 }
345 }
346
347static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher =
348 {
349#ifdef NID_aes_128_cbc_hmac_sha1
350 NID_aes_128_cbc_hmac_sha1,
351#else
352 NID_undef,
353#endif
354 16,16,16,
355 EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
356 aesni_cbc_hmac_sha1_init_key,
357 aesni_cbc_hmac_sha1_cipher,
358 NULL,
359 sizeof(EVP_AES_HMAC_SHA1),
360 EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
361 EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
362 aesni_cbc_hmac_sha1_ctrl,
363 NULL
364 };
365
366static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher =
367 {
368#ifdef NID_aes_256_cbc_hmac_sha1
369 NID_aes_256_cbc_hmac_sha1,
370#else
371 NID_undef,
372#endif
373 16,32,16,
374 EVP_CIPH_CBC_MODE|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_AEAD_CIPHER,
375 aesni_cbc_hmac_sha1_init_key,
376 aesni_cbc_hmac_sha1_cipher,
377 NULL,
378 sizeof(EVP_AES_HMAC_SHA1),
379 EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_set_asn1_iv,
380 EVP_CIPH_FLAG_DEFAULT_ASN1?NULL:EVP_CIPHER_get_asn1_iv,
381 aesni_cbc_hmac_sha1_ctrl,
382 NULL
383 };
384
385const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void)
386 {
387 return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE?
388 &aesni_128_cbc_hmac_sha1_cipher:NULL);
389 }
390
391const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void)
392 {
393 return(OPENSSL_ia32cap_P[1]&AESNI_CAPABLE?
394 &aesni_256_cbc_hmac_sha1_cipher:NULL);
395 }
396#else
397const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void)
398 {
399 return NULL;
400 }
401const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void)
402 {
403 return NULL;
404 }
405#endif
406#endif
diff --git a/src/lib/libssl/src/crypto/evp/e_rc4_hmac_md5.c b/src/lib/libssl/src/crypto/evp/e_rc4_hmac_md5.c
new file mode 100644
index 0000000000..56563191ba
--- /dev/null
+++ b/src/lib/libssl/src/crypto/evp/e_rc4_hmac_md5.c
@@ -0,0 +1,298 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * licensing@OpenSSL.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/opensslconf.h>
51
52#include <stdio.h>
53#include <string.h>
54
55#if !defined(OPENSSL_NO_RC4) && !defined(OPENSSL_NO_MD5)
56
57#include <openssl/evp.h>
58#include <openssl/objects.h>
59#include <openssl/rc4.h>
60#include <openssl/md5.h>
61
62#ifndef EVP_CIPH_FLAG_AEAD_CIPHER
63#define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000
64#define EVP_CTRL_AEAD_TLS1_AAD 0x16
65#define EVP_CTRL_AEAD_SET_MAC_KEY 0x17
66#endif
67
68/* FIXME: surely this is available elsewhere? */
69#define EVP_RC4_KEY_SIZE 16
70
71typedef struct
72 {
73 RC4_KEY ks;
74 MD5_CTX head,tail,md;
75 size_t payload_length;
76 } EVP_RC4_HMAC_MD5;
77
78#define NO_PAYLOAD_LENGTH ((size_t)-1)
79
80void rc4_md5_enc (RC4_KEY *key, const void *in0, void *out,
81 MD5_CTX *ctx,const void *inp,size_t blocks);
82
83#define data(ctx) ((EVP_RC4_HMAC_MD5 *)(ctx)->cipher_data)
84
85static int rc4_hmac_md5_init_key(EVP_CIPHER_CTX *ctx,
86 const unsigned char *inkey,
87 const unsigned char *iv, int enc)
88 {
89 EVP_RC4_HMAC_MD5 *key = data(ctx);
90
91 RC4_set_key(&key->ks,EVP_CIPHER_CTX_key_length(ctx),
92 inkey);
93
94 MD5_Init(&key->head); /* handy when benchmarking */
95 key->tail = key->head;
96 key->md = key->head;
97
98 key->payload_length = NO_PAYLOAD_LENGTH;
99
100 return 1;
101 }
102
103#if !defined(OPENSSL_NO_ASM) && ( \
104 defined(__x86_64) || defined(__x86_64__) || \
105 defined(_M_AMD64) || defined(_M_X64) || \
106 defined(__INTEL__) ) && \
107 !(defined(__APPLE__) && defined(__MACH__))
108#define STITCHED_CALL
109#endif
110
111#if !defined(STITCHED_CALL)
112#define rc4_off 0
113#define md5_off 0
114#endif
115
116static int rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
117 const unsigned char *in, size_t len)
118 {
119 EVP_RC4_HMAC_MD5 *key = data(ctx);
120#if defined(STITCHED_CALL)
121 size_t rc4_off = 32-1-(key->ks.x&(32-1)), /* 32 is $MOD from rc4_md5-x86_64.pl */
122 md5_off = MD5_CBLOCK-key->md.num,
123 blocks;
124 unsigned int l;
125 extern unsigned int OPENSSL_ia32cap_P[];
126#endif
127 size_t plen = key->payload_length;
128
129 if (plen!=NO_PAYLOAD_LENGTH && len!=(plen+MD5_DIGEST_LENGTH)) return 0;
130
131 if (ctx->encrypt) {
132 if (plen==NO_PAYLOAD_LENGTH) plen = len;
133#if defined(STITCHED_CALL)
134 /* cipher has to "fall behind" */
135 if (rc4_off>md5_off) md5_off+=MD5_CBLOCK;
136
137 if (plen>md5_off && (blocks=(plen-md5_off)/MD5_CBLOCK) &&
138 (OPENSSL_ia32cap_P[0]&(1<<20))==0) {
139 MD5_Update(&key->md,in,md5_off);
140 RC4(&key->ks,rc4_off,in,out);
141
142 rc4_md5_enc(&key->ks,in+rc4_off,out+rc4_off,
143 &key->md,in+md5_off,blocks);
144 blocks *= MD5_CBLOCK;
145 rc4_off += blocks;
146 md5_off += blocks;
147 key->md.Nh += blocks>>29;
148 key->md.Nl += blocks<<=3;
149 if (key->md.Nl<(unsigned int)blocks) key->md.Nh++;
150 } else {
151 rc4_off = 0;
152 md5_off = 0;
153 }
154#endif
155 MD5_Update(&key->md,in+md5_off,plen-md5_off);
156
157 if (plen!=len) { /* "TLS" mode of operation */
158 if (in!=out)
159 memcpy(out+rc4_off,in+rc4_off,plen-rc4_off);
160
161 /* calculate HMAC and append it to payload */
162 MD5_Final(out+plen,&key->md);
163 key->md = key->tail;
164 MD5_Update(&key->md,out+plen,MD5_DIGEST_LENGTH);
165 MD5_Final(out+plen,&key->md);
166 /* encrypt HMAC at once */
167 RC4(&key->ks,len-rc4_off,out+rc4_off,out+rc4_off);
168 } else {
169 RC4(&key->ks,len-rc4_off,in+rc4_off,out+rc4_off);
170 }
171 } else {
172 unsigned char mac[MD5_DIGEST_LENGTH];
173#if defined(STITCHED_CALL)
174 /* digest has to "fall behind" */
175 if (md5_off>rc4_off) rc4_off += 2*MD5_CBLOCK;
176 else rc4_off += MD5_CBLOCK;
177
178 if (len>rc4_off && (blocks=(len-rc4_off)/MD5_CBLOCK) &&
179 (OPENSSL_ia32cap_P[0]&(1<<20))==0) {
180 RC4(&key->ks,rc4_off,in,out);
181 MD5_Update(&key->md,out,md5_off);
182
183 rc4_md5_enc(&key->ks,in+rc4_off,out+rc4_off,
184 &key->md,out+md5_off,blocks);
185 blocks *= MD5_CBLOCK;
186 rc4_off += blocks;
187 md5_off += blocks;
188 l = (key->md.Nl+(blocks<<3))&0xffffffffU;
189 if (l<key->md.Nl) key->md.Nh++;
190 key->md.Nl = l;
191 key->md.Nh += blocks>>29;
192 } else {
193 md5_off=0;
194 rc4_off=0;
195 }
196#endif
197 /* decrypt HMAC at once */
198 RC4(&key->ks,len-rc4_off,in+rc4_off,out+rc4_off);
199 if (plen!=NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */
200 MD5_Update(&key->md,out+md5_off,plen-md5_off);
201
202 /* calculate HMAC and verify it */
203 MD5_Final(mac,&key->md);
204 key->md = key->tail;
205 MD5_Update(&key->md,mac,MD5_DIGEST_LENGTH);
206 MD5_Final(mac,&key->md);
207
208 if (memcmp(out+plen,mac,MD5_DIGEST_LENGTH))
209 return 0;
210 } else {
211 MD5_Update(&key->md,out+md5_off,len-md5_off);
212 }
213 }
214
215 key->payload_length = NO_PAYLOAD_LENGTH;
216
217 return 1;
218 }
219
220static int rc4_hmac_md5_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, void *ptr)
221 {
222 EVP_RC4_HMAC_MD5 *key = data(ctx);
223
224 switch (type)
225 {
226 case EVP_CTRL_AEAD_SET_MAC_KEY:
227 {
228 unsigned int i;
229 unsigned char hmac_key[64];
230
231 memset (hmac_key,0,sizeof(hmac_key));
232
233 if (arg > (int)sizeof(hmac_key)) {
234 MD5_Init(&key->head);
235 MD5_Update(&key->head,ptr,arg);
236 MD5_Final(hmac_key,&key->head);
237 } else {
238 memcpy(hmac_key,ptr,arg);
239 }
240
241 for (i=0;i<sizeof(hmac_key);i++)
242 hmac_key[i] ^= 0x36; /* ipad */
243 MD5_Init(&key->head);
244 MD5_Update(&key->head,hmac_key,sizeof(hmac_key));
245
246 for (i=0;i<sizeof(hmac_key);i++)
247 hmac_key[i] ^= 0x36^0x5c; /* opad */
248 MD5_Init(&key->tail);
249 MD5_Update(&key->tail,hmac_key,sizeof(hmac_key));
250
251 return 1;
252 }
253 case EVP_CTRL_AEAD_TLS1_AAD:
254 {
255 unsigned char *p=ptr;
256 unsigned int len=p[arg-2]<<8|p[arg-1];
257
258 if (!ctx->encrypt)
259 {
260 len -= MD5_DIGEST_LENGTH;
261 p[arg-2] = len>>8;
262 p[arg-1] = len;
263 }
264 key->payload_length=len;
265 key->md = key->head;
266 MD5_Update(&key->md,p,arg);
267
268 return MD5_DIGEST_LENGTH;
269 }
270 default:
271 return -1;
272 }
273 }
274
275static EVP_CIPHER r4_hmac_md5_cipher=
276 {
277#ifdef NID_rc4_hmac_md5
278 NID_rc4_hmac_md5,
279#else
280 NID_undef,
281#endif
282 1,EVP_RC4_KEY_SIZE,0,
283 EVP_CIPH_STREAM_CIPHER|EVP_CIPH_VARIABLE_LENGTH|EVP_CIPH_FLAG_AEAD_CIPHER,
284 rc4_hmac_md5_init_key,
285 rc4_hmac_md5_cipher,
286 NULL,
287 sizeof(EVP_RC4_HMAC_MD5),
288 NULL,
289 NULL,
290 rc4_hmac_md5_ctrl,
291 NULL
292 };
293
294const EVP_CIPHER *EVP_rc4_hmac_md5(void)
295 {
296 return(&r4_hmac_md5_cipher);
297 }
298#endif
diff --git a/src/lib/libssl/src/crypto/evp/evp_fips.c b/src/lib/libssl/src/crypto/evp/evp_fips.c
new file mode 100644
index 0000000000..cb7f4fc0fa
--- /dev/null
+++ b/src/lib/libssl/src/crypto/evp/evp_fips.c
@@ -0,0 +1,113 @@
1/* crypto/evp/evp_fips.c */
2/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
3 * project.
4 */
5/* ====================================================================
6 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 */
53
54
55#include <openssl/evp.h>
56
57#ifdef OPENSSL_FIPS
58#include <openssl/fips.h>
59
60const EVP_CIPHER *EVP_aes_128_cbc(void) { return FIPS_evp_aes_128_cbc(); }
61const EVP_CIPHER *EVP_aes_128_ccm(void) { return FIPS_evp_aes_128_ccm(); }
62const EVP_CIPHER *EVP_aes_128_cfb1(void) { return FIPS_evp_aes_128_cfb1(); }
63const EVP_CIPHER *EVP_aes_128_cfb128(void) { return FIPS_evp_aes_128_cfb128(); }
64const EVP_CIPHER *EVP_aes_128_cfb8(void) { return FIPS_evp_aes_128_cfb8(); }
65const EVP_CIPHER *EVP_aes_128_ctr(void) { return FIPS_evp_aes_128_ctr(); }
66const EVP_CIPHER *EVP_aes_128_ecb(void) { return FIPS_evp_aes_128_ecb(); }
67const EVP_CIPHER *EVP_aes_128_gcm(void) { return FIPS_evp_aes_128_gcm(); }
68const EVP_CIPHER *EVP_aes_128_ofb(void) { return FIPS_evp_aes_128_ofb(); }
69const EVP_CIPHER *EVP_aes_128_xts(void) { return FIPS_evp_aes_128_xts(); }
70const EVP_CIPHER *EVP_aes_192_cbc(void) { return FIPS_evp_aes_192_cbc(); }
71const EVP_CIPHER *EVP_aes_192_ccm(void) { return FIPS_evp_aes_192_ccm(); }
72const EVP_CIPHER *EVP_aes_192_cfb1(void) { return FIPS_evp_aes_192_cfb1(); }
73const EVP_CIPHER *EVP_aes_192_cfb128(void) { return FIPS_evp_aes_192_cfb128(); }
74const EVP_CIPHER *EVP_aes_192_cfb8(void) { return FIPS_evp_aes_192_cfb8(); }
75const EVP_CIPHER *EVP_aes_192_ctr(void) { return FIPS_evp_aes_192_ctr(); }
76const EVP_CIPHER *EVP_aes_192_ecb(void) { return FIPS_evp_aes_192_ecb(); }
77const EVP_CIPHER *EVP_aes_192_gcm(void) { return FIPS_evp_aes_192_gcm(); }
78const EVP_CIPHER *EVP_aes_192_ofb(void) { return FIPS_evp_aes_192_ofb(); }
79const EVP_CIPHER *EVP_aes_256_cbc(void) { return FIPS_evp_aes_256_cbc(); }
80const EVP_CIPHER *EVP_aes_256_ccm(void) { return FIPS_evp_aes_256_ccm(); }
81const EVP_CIPHER *EVP_aes_256_cfb1(void) { return FIPS_evp_aes_256_cfb1(); }
82const EVP_CIPHER *EVP_aes_256_cfb128(void) { return FIPS_evp_aes_256_cfb128(); }
83const EVP_CIPHER *EVP_aes_256_cfb8(void) { return FIPS_evp_aes_256_cfb8(); }
84const EVP_CIPHER *EVP_aes_256_ctr(void) { return FIPS_evp_aes_256_ctr(); }
85const EVP_CIPHER *EVP_aes_256_ecb(void) { return FIPS_evp_aes_256_ecb(); }
86const EVP_CIPHER *EVP_aes_256_gcm(void) { return FIPS_evp_aes_256_gcm(); }
87const EVP_CIPHER *EVP_aes_256_ofb(void) { return FIPS_evp_aes_256_ofb(); }
88const EVP_CIPHER *EVP_aes_256_xts(void) { return FIPS_evp_aes_256_xts(); }
89const EVP_CIPHER *EVP_des_ede(void) { return FIPS_evp_des_ede(); }
90const EVP_CIPHER *EVP_des_ede3(void) { return FIPS_evp_des_ede3(); }
91const EVP_CIPHER *EVP_des_ede3_cbc(void) { return FIPS_evp_des_ede3_cbc(); }
92const EVP_CIPHER *EVP_des_ede3_cfb1(void) { return FIPS_evp_des_ede3_cfb1(); }
93const EVP_CIPHER *EVP_des_ede3_cfb64(void) { return FIPS_evp_des_ede3_cfb64(); }
94const EVP_CIPHER *EVP_des_ede3_cfb8(void) { return FIPS_evp_des_ede3_cfb8(); }
95const EVP_CIPHER *EVP_des_ede3_ecb(void) { return FIPS_evp_des_ede3_ecb(); }
96const EVP_CIPHER *EVP_des_ede3_ofb(void) { return FIPS_evp_des_ede3_ofb(); }
97const EVP_CIPHER *EVP_des_ede_cbc(void) { return FIPS_evp_des_ede_cbc(); }
98const EVP_CIPHER *EVP_des_ede_cfb64(void) { return FIPS_evp_des_ede_cfb64(); }
99const EVP_CIPHER *EVP_des_ede_ecb(void) { return FIPS_evp_des_ede_ecb(); }
100const EVP_CIPHER *EVP_des_ede_ofb(void) { return FIPS_evp_des_ede_ofb(); }
101const EVP_CIPHER *EVP_enc_null(void) { return FIPS_evp_enc_null(); }
102
103const EVP_MD *EVP_sha1(void) { return FIPS_evp_sha1(); }
104const EVP_MD *EVP_sha224(void) { return FIPS_evp_sha224(); }
105const EVP_MD *EVP_sha256(void) { return FIPS_evp_sha256(); }
106const EVP_MD *EVP_sha384(void) { return FIPS_evp_sha384(); }
107const EVP_MD *EVP_sha512(void) { return FIPS_evp_sha512(); }
108
109const EVP_MD *EVP_dss(void) { return FIPS_evp_dss(); }
110const EVP_MD *EVP_dss1(void) { return FIPS_evp_dss1(); }
111const EVP_MD *EVP_ecdsa(void) { return FIPS_evp_ecdsa(); }
112
113#endif
diff --git a/src/lib/libssl/src/crypto/evp/m_ecdsa.c b/src/lib/libssl/src/crypto/evp/m_ecdsa.c
index 8d87a49ebe..4b15fb0f6c 100644
--- a/src/lib/libssl/src/crypto/evp/m_ecdsa.c
+++ b/src/lib/libssl/src/crypto/evp/m_ecdsa.c
@@ -116,6 +116,8 @@
116#include <openssl/x509.h> 116#include <openssl/x509.h>
117 117
118#ifndef OPENSSL_NO_SHA 118#ifndef OPENSSL_NO_SHA
119#ifndef OPENSSL_FIPS
120
119static int init(EVP_MD_CTX *ctx) 121static int init(EVP_MD_CTX *ctx)
120 { return SHA1_Init(ctx->md_data); } 122 { return SHA1_Init(ctx->md_data); }
121 123
@@ -146,3 +148,4 @@ const EVP_MD *EVP_ecdsa(void)
146 return(&ecdsa_md); 148 return(&ecdsa_md);
147 } 149 }
148#endif 150#endif
151#endif
diff --git a/src/lib/libssl/src/crypto/evp/m_wp.c b/src/lib/libssl/src/crypto/evp/m_wp.c
index 1ce47c040b..c51bc2d5d1 100644
--- a/src/lib/libssl/src/crypto/evp/m_wp.c
+++ b/src/lib/libssl/src/crypto/evp/m_wp.c
@@ -9,6 +9,7 @@
9#include <openssl/objects.h> 9#include <openssl/objects.h>
10#include <openssl/x509.h> 10#include <openssl/x509.h>
11#include <openssl/whrlpool.h> 11#include <openssl/whrlpool.h>
12#include "evp_locl.h"
12 13
13static int init(EVP_MD_CTX *ctx) 14static int init(EVP_MD_CTX *ctx)
14 { return WHIRLPOOL_Init(ctx->md_data); } 15 { return WHIRLPOOL_Init(ctx->md_data); }
diff --git a/src/lib/libssl/src/crypto/evp/pmeth_gn.c b/src/lib/libssl/src/crypto/evp/pmeth_gn.c
index 5d74161a09..4651c81370 100644
--- a/src/lib/libssl/src/crypto/evp/pmeth_gn.c
+++ b/src/lib/libssl/src/crypto/evp/pmeth_gn.c
@@ -199,7 +199,7 @@ int EVP_PKEY_CTX_get_keygen_info(EVP_PKEY_CTX *ctx, int idx)
199 } 199 }
200 200
201EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e, 201EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e,
202 unsigned char *key, int keylen) 202 const unsigned char *key, int keylen)
203 { 203 {
204 EVP_PKEY_CTX *mac_ctx = NULL; 204 EVP_PKEY_CTX *mac_ctx = NULL;
205 EVP_PKEY *mac_key = NULL; 205 EVP_PKEY *mac_key = NULL;
@@ -209,7 +209,8 @@ EVP_PKEY *EVP_PKEY_new_mac_key(int type, ENGINE *e,
209 if (EVP_PKEY_keygen_init(mac_ctx) <= 0) 209 if (EVP_PKEY_keygen_init(mac_ctx) <= 0)
210 goto merr; 210 goto merr;
211 if (EVP_PKEY_CTX_ctrl(mac_ctx, -1, EVP_PKEY_OP_KEYGEN, 211 if (EVP_PKEY_CTX_ctrl(mac_ctx, -1, EVP_PKEY_OP_KEYGEN,
212 EVP_PKEY_CTRL_SET_MAC_KEY, keylen, key) <= 0) 212 EVP_PKEY_CTRL_SET_MAC_KEY,
213 keylen, (void *)key) <= 0)
213 goto merr; 214 goto merr;
214 if (EVP_PKEY_keygen(mac_ctx, &mac_key) <= 0) 215 if (EVP_PKEY_keygen(mac_ctx, &mac_key) <= 0)
215 goto merr; 216 goto merr;
diff --git a/src/lib/libssl/src/crypto/evp/pmeth_lib.c b/src/lib/libssl/src/crypto/evp/pmeth_lib.c
index 5481d4b8a5..acfa7b6f87 100644
--- a/src/lib/libssl/src/crypto/evp/pmeth_lib.c
+++ b/src/lib/libssl/src/crypto/evp/pmeth_lib.c
@@ -73,7 +73,7 @@ DECLARE_STACK_OF(EVP_PKEY_METHOD)
73STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL; 73STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL;
74 74
75extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth; 75extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth;
76extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth; 76extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth, cmac_pkey_meth;
77 77
78static const EVP_PKEY_METHOD *standard_methods[] = 78static const EVP_PKEY_METHOD *standard_methods[] =
79 { 79 {
@@ -90,6 +90,7 @@ static const EVP_PKEY_METHOD *standard_methods[] =
90 &ec_pkey_meth, 90 &ec_pkey_meth,
91#endif 91#endif
92 &hmac_pkey_meth, 92 &hmac_pkey_meth,
93 &cmac_pkey_meth
93 }; 94 };
94 95
95DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *, 96DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *,
@@ -203,6 +204,8 @@ EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags)
203 if (!pmeth) 204 if (!pmeth)
204 return NULL; 205 return NULL;
205 206
207 memset(pmeth, 0, sizeof(EVP_PKEY_METHOD));
208
206 pmeth->pkey_id = id; 209 pmeth->pkey_id = id;
207 pmeth->flags = flags | EVP_PKEY_FLAG_DYNAMIC; 210 pmeth->flags = flags | EVP_PKEY_FLAG_DYNAMIC;
208 211
@@ -235,6 +238,56 @@ EVP_PKEY_METHOD* EVP_PKEY_meth_new(int id, int flags)
235 return pmeth; 238 return pmeth;
236 } 239 }
237 240
241void EVP_PKEY_meth_get0_info(int *ppkey_id, int *pflags,
242 const EVP_PKEY_METHOD *meth)
243 {
244 if (ppkey_id)
245 *ppkey_id = meth->pkey_id;
246 if (pflags)
247 *pflags = meth->flags;
248 }
249
250void EVP_PKEY_meth_copy(EVP_PKEY_METHOD *dst, const EVP_PKEY_METHOD *src)
251 {
252
253 dst->init = src->init;
254 dst->copy = src->copy;
255 dst->cleanup = src->cleanup;
256
257 dst->paramgen_init = src->paramgen_init;
258 dst->paramgen = src->paramgen;
259
260 dst->keygen_init = src->keygen_init;
261 dst->keygen = src->keygen;
262
263 dst->sign_init = src->sign_init;
264 dst->sign = src->sign;
265
266 dst->verify_init = src->verify_init;
267 dst->verify = src->verify;
268
269 dst->verify_recover_init = src->verify_recover_init;
270 dst->verify_recover = src->verify_recover;
271
272 dst->signctx_init = src->signctx_init;
273 dst->signctx = src->signctx;
274
275 dst->verifyctx_init = src->verifyctx_init;
276 dst->verifyctx = src->verifyctx;
277
278 dst->encrypt_init = src->encrypt_init;
279 dst->encrypt = src->encrypt;
280
281 dst->decrypt_init = src->decrypt_init;
282 dst->decrypt = src->decrypt;
283
284 dst->derive_init = src->derive_init;
285 dst->derive = src->derive;
286
287 dst->ctrl = src->ctrl;
288 dst->ctrl_str = src->ctrl_str;
289 }
290
238void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth) 291void EVP_PKEY_meth_free(EVP_PKEY_METHOD *pmeth)
239 { 292 {
240 if (pmeth && (pmeth->flags & EVP_PKEY_FLAG_DYNAMIC)) 293 if (pmeth && (pmeth->flags & EVP_PKEY_FLAG_DYNAMIC))
diff --git a/src/lib/libssl/src/crypto/fips_err.h b/src/lib/libssl/src/crypto/fips_err.h
index b328616858..c671691b47 100644
--- a/src/lib/libssl/src/crypto/fips_err.h
+++ b/src/lib/libssl/src/crypto/fips_err.h
@@ -1,6 +1,6 @@
1/* crypto/fips_err.h */ 1/* crypto/fips_err.h */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
@@ -71,53 +71,125 @@
71static ERR_STRING_DATA FIPS_str_functs[]= 71static ERR_STRING_DATA FIPS_str_functs[]=
72 { 72 {
73{ERR_FUNC(FIPS_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"}, 73{ERR_FUNC(FIPS_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"},
74{ERR_FUNC(FIPS_F_DH_INIT), "DH_INIT"},
75{ERR_FUNC(FIPS_F_DRBG_RESEED), "DRBG_RESEED"},
74{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN), "DSA_BUILTIN_PARAMGEN"}, 76{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN), "DSA_BUILTIN_PARAMGEN"},
77{ERR_FUNC(FIPS_F_DSA_BUILTIN_PARAMGEN2), "DSA_BUILTIN_PARAMGEN2"},
75{ERR_FUNC(FIPS_F_DSA_DO_SIGN), "DSA_do_sign"}, 78{ERR_FUNC(FIPS_F_DSA_DO_SIGN), "DSA_do_sign"},
76{ERR_FUNC(FIPS_F_DSA_DO_VERIFY), "DSA_do_verify"}, 79{ERR_FUNC(FIPS_F_DSA_DO_VERIFY), "DSA_do_verify"},
77{ERR_FUNC(FIPS_F_EVP_CIPHERINIT_EX), "EVP_CipherInit_ex"},
78{ERR_FUNC(FIPS_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"},
79{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA), "FIPS_CHECK_DSA"}, 80{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA), "FIPS_CHECK_DSA"},
80{ERR_FUNC(FIPS_F_FIPS_CHECK_INCORE_FINGERPRINT), "FIPS_CHECK_INCORE_FINGERPRINT"}, 81{ERR_FUNC(FIPS_F_FIPS_CHECK_DSA_PRNG), "fips_check_dsa_prng"},
81{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA), "FIPS_CHECK_RSA"}, 82{ERR_FUNC(FIPS_F_FIPS_CHECK_EC), "FIPS_CHECK_EC"},
82{ERR_FUNC(FIPS_F_FIPS_DSA_CHECK), "FIPS_DSA_CHECK"}, 83{ERR_FUNC(FIPS_F_FIPS_CHECK_EC_PRNG), "fips_check_ec_prng"},
83{ERR_FUNC(FIPS_F_FIPS_MODE_SET), "FIPS_mode_set"}, 84{ERR_FUNC(FIPS_F_FIPS_CHECK_INCORE_FINGERPRINT), "FIPS_check_incore_fingerprint"},
85{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA), "fips_check_rsa"},
86{ERR_FUNC(FIPS_F_FIPS_CHECK_RSA_PRNG), "fips_check_rsa_prng"},
87{ERR_FUNC(FIPS_F_FIPS_CIPHER), "FIPS_cipher"},
88{ERR_FUNC(FIPS_F_FIPS_CIPHERINIT), "FIPS_cipherinit"},
89{ERR_FUNC(FIPS_F_FIPS_CIPHER_CTX_CTRL), "FIPS_CIPHER_CTX_CTRL"},
90{ERR_FUNC(FIPS_F_FIPS_DIGESTFINAL), "FIPS_digestfinal"},
91{ERR_FUNC(FIPS_F_FIPS_DIGESTINIT), "FIPS_digestinit"},
92{ERR_FUNC(FIPS_F_FIPS_DIGESTUPDATE), "FIPS_digestupdate"},
93{ERR_FUNC(FIPS_F_FIPS_DRBG_BYTES), "FIPS_DRBG_BYTES"},
94{ERR_FUNC(FIPS_F_FIPS_DRBG_CHECK), "FIPS_DRBG_CHECK"},
95{ERR_FUNC(FIPS_F_FIPS_DRBG_CPRNG_TEST), "FIPS_DRBG_CPRNG_TEST"},
96{ERR_FUNC(FIPS_F_FIPS_DRBG_ERROR_CHECK), "FIPS_DRBG_ERROR_CHECK"},
97{ERR_FUNC(FIPS_F_FIPS_DRBG_GENERATE), "FIPS_drbg_generate"},
98{ERR_FUNC(FIPS_F_FIPS_DRBG_INIT), "FIPS_drbg_init"},
99{ERR_FUNC(FIPS_F_FIPS_DRBG_INSTANTIATE), "FIPS_drbg_instantiate"},
100{ERR_FUNC(FIPS_F_FIPS_DRBG_NEW), "FIPS_drbg_new"},
101{ERR_FUNC(FIPS_F_FIPS_DRBG_RESEED), "FIPS_drbg_reseed"},
102{ERR_FUNC(FIPS_F_FIPS_DRBG_SINGLE_KAT), "FIPS_DRBG_SINGLE_KAT"},
103{ERR_FUNC(FIPS_F_FIPS_DSA_SIGN_DIGEST), "FIPS_dsa_sign_digest"},
104{ERR_FUNC(FIPS_F_FIPS_DSA_VERIFY_DIGEST), "FIPS_dsa_verify_digest"},
105{ERR_FUNC(FIPS_F_FIPS_GET_ENTROPY), "FIPS_GET_ENTROPY"},
106{ERR_FUNC(FIPS_F_FIPS_MODULE_MODE_SET), "FIPS_module_mode_set"},
84{ERR_FUNC(FIPS_F_FIPS_PKEY_SIGNATURE_TEST), "fips_pkey_signature_test"}, 107{ERR_FUNC(FIPS_F_FIPS_PKEY_SIGNATURE_TEST), "fips_pkey_signature_test"},
108{ERR_FUNC(FIPS_F_FIPS_RAND_ADD), "FIPS_rand_add"},
109{ERR_FUNC(FIPS_F_FIPS_RAND_BYTES), "FIPS_rand_bytes"},
110{ERR_FUNC(FIPS_F_FIPS_RAND_PSEUDO_BYTES), "FIPS_rand_pseudo_bytes"},
111{ERR_FUNC(FIPS_F_FIPS_RAND_SEED), "FIPS_rand_seed"},
112{ERR_FUNC(FIPS_F_FIPS_RAND_SET_METHOD), "FIPS_rand_set_method"},
113{ERR_FUNC(FIPS_F_FIPS_RAND_STATUS), "FIPS_rand_status"},
114{ERR_FUNC(FIPS_F_FIPS_RSA_SIGN_DIGEST), "FIPS_rsa_sign_digest"},
115{ERR_FUNC(FIPS_F_FIPS_RSA_VERIFY_DIGEST), "FIPS_rsa_verify_digest"},
85{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES), "FIPS_selftest_aes"}, 116{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES), "FIPS_selftest_aes"},
117{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_CCM), "FIPS_selftest_aes_ccm"},
118{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_GCM), "FIPS_selftest_aes_gcm"},
119{ERR_FUNC(FIPS_F_FIPS_SELFTEST_AES_XTS), "FIPS_selftest_aes_xts"},
120{ERR_FUNC(FIPS_F_FIPS_SELFTEST_CMAC), "FIPS_selftest_cmac"},
86{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DES), "FIPS_selftest_des"}, 121{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DES), "FIPS_selftest_des"},
87{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DSA), "FIPS_selftest_dsa"}, 122{ERR_FUNC(FIPS_F_FIPS_SELFTEST_DSA), "FIPS_selftest_dsa"},
123{ERR_FUNC(FIPS_F_FIPS_SELFTEST_ECDSA), "FIPS_selftest_ecdsa"},
88{ERR_FUNC(FIPS_F_FIPS_SELFTEST_HMAC), "FIPS_selftest_hmac"}, 124{ERR_FUNC(FIPS_F_FIPS_SELFTEST_HMAC), "FIPS_selftest_hmac"},
89{ERR_FUNC(FIPS_F_FIPS_SELFTEST_RNG), "FIPS_selftest_rng"},
90{ERR_FUNC(FIPS_F_FIPS_SELFTEST_SHA1), "FIPS_selftest_sha1"}, 125{ERR_FUNC(FIPS_F_FIPS_SELFTEST_SHA1), "FIPS_selftest_sha1"},
126{ERR_FUNC(FIPS_F_FIPS_SELFTEST_X931), "FIPS_selftest_x931"},
127{ERR_FUNC(FIPS_F_FIPS_SET_PRNG_KEY), "FIPS_SET_PRNG_KEY"},
91{ERR_FUNC(FIPS_F_HASH_FINAL), "HASH_FINAL"}, 128{ERR_FUNC(FIPS_F_HASH_FINAL), "HASH_FINAL"},
92{ERR_FUNC(FIPS_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"}, 129{ERR_FUNC(FIPS_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"},
130{ERR_FUNC(FIPS_F_RSA_EAY_INIT), "RSA_EAY_INIT"},
93{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"}, 131{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"},
94{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"}, 132{ERR_FUNC(FIPS_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"},
95{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"}, 133{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"},
96{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_ENCRYPT), "RSA_EAY_PUBLIC_ENCRYPT"}, 134{ERR_FUNC(FIPS_F_RSA_EAY_PUBLIC_ENCRYPT), "RSA_EAY_PUBLIC_ENCRYPT"},
97{ERR_FUNC(FIPS_F_RSA_X931_GENERATE_KEY_EX), "RSA_X931_generate_key_ex"}, 135{ERR_FUNC(FIPS_F_RSA_X931_GENERATE_KEY_EX), "RSA_X931_generate_key_ex"},
98{ERR_FUNC(FIPS_F_SSLEAY_RAND_BYTES), "SSLEAY_RAND_BYTES"},
99{0,NULL} 136{0,NULL}
100 }; 137 };
101 138
102static ERR_STRING_DATA FIPS_str_reasons[]= 139static ERR_STRING_DATA FIPS_str_reasons[]=
103 { 140 {
104{ERR_REASON(FIPS_R_CANNOT_READ_EXE) ,"cannot read exe"}, 141{ERR_REASON(FIPS_R_ADDITIONAL_INPUT_ERROR_UNDETECTED),"additional input error undetected"},
105{ERR_REASON(FIPS_R_CANNOT_READ_EXE_DIGEST),"cannot read exe digest"}, 142{ERR_REASON(FIPS_R_ADDITIONAL_INPUT_TOO_LONG),"additional input too long"},
143{ERR_REASON(FIPS_R_ALREADY_INSTANTIATED) ,"already instantiated"},
144{ERR_REASON(FIPS_R_AUTHENTICATION_FAILURE),"authentication failure"},
106{ERR_REASON(FIPS_R_CONTRADICTING_EVIDENCE),"contradicting evidence"}, 145{ERR_REASON(FIPS_R_CONTRADICTING_EVIDENCE),"contradicting evidence"},
107{ERR_REASON(FIPS_R_EXE_DIGEST_DOES_NOT_MATCH),"exe digest does not match"}, 146{ERR_REASON(FIPS_R_DRBG_NOT_INITIALISED) ,"drbg not initialised"},
147{ERR_REASON(FIPS_R_DRBG_STUCK) ,"drbg stuck"},
148{ERR_REASON(FIPS_R_ENTROPY_ERROR_UNDETECTED),"entropy error undetected"},
149{ERR_REASON(FIPS_R_ENTROPY_NOT_REQUESTED_FOR_RESEED),"entropy not requested for reseed"},
150{ERR_REASON(FIPS_R_ENTROPY_SOURCE_STUCK) ,"entropy source stuck"},
151{ERR_REASON(FIPS_R_ERROR_INITIALISING_DRBG),"error initialising drbg"},
152{ERR_REASON(FIPS_R_ERROR_INSTANTIATING_DRBG),"error instantiating drbg"},
153{ERR_REASON(FIPS_R_ERROR_RETRIEVING_ADDITIONAL_INPUT),"error retrieving additional input"},
154{ERR_REASON(FIPS_R_ERROR_RETRIEVING_ENTROPY),"error retrieving entropy"},
155{ERR_REASON(FIPS_R_ERROR_RETRIEVING_NONCE),"error retrieving nonce"},
108{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH),"fingerprint does not match"}, 156{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH),"fingerprint does not match"},
109{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_NONPIC_RELOCATED),"fingerprint does not match nonpic relocated"}, 157{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_NONPIC_RELOCATED),"fingerprint does not match nonpic relocated"},
110{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_SEGMENT_ALIASING),"fingerprint does not match segment aliasing"}, 158{ERR_REASON(FIPS_R_FINGERPRINT_DOES_NOT_MATCH_SEGMENT_ALIASING),"fingerprint does not match segment aliasing"},
111{ERR_REASON(FIPS_R_FIPS_MODE_ALREADY_SET),"fips mode already set"}, 159{ERR_REASON(FIPS_R_FIPS_MODE_ALREADY_SET),"fips mode already set"},
112{ERR_REASON(FIPS_R_FIPS_SELFTEST_FAILED) ,"fips selftest failed"}, 160{ERR_REASON(FIPS_R_FIPS_SELFTEST_FAILED) ,"fips selftest failed"},
161{ERR_REASON(FIPS_R_FUNCTION_ERROR) ,"function error"},
162{ERR_REASON(FIPS_R_GENERATE_ERROR) ,"generate error"},
163{ERR_REASON(FIPS_R_GENERATE_ERROR_UNDETECTED),"generate error undetected"},
164{ERR_REASON(FIPS_R_INSTANTIATE_ERROR) ,"instantiate error"},
165{ERR_REASON(FIPS_R_INSUFFICIENT_SECURITY_STRENGTH),"insufficient security strength"},
166{ERR_REASON(FIPS_R_INTERNAL_ERROR) ,"internal error"},
113{ERR_REASON(FIPS_R_INVALID_KEY_LENGTH) ,"invalid key length"}, 167{ERR_REASON(FIPS_R_INVALID_KEY_LENGTH) ,"invalid key length"},
168{ERR_REASON(FIPS_R_INVALID_PARAMETERS) ,"invalid parameters"},
169{ERR_REASON(FIPS_R_IN_ERROR_STATE) ,"in error state"},
114{ERR_REASON(FIPS_R_KEY_TOO_SHORT) ,"key too short"}, 170{ERR_REASON(FIPS_R_KEY_TOO_SHORT) ,"key too short"},
171{ERR_REASON(FIPS_R_NONCE_ERROR_UNDETECTED),"nonce error undetected"},
115{ERR_REASON(FIPS_R_NON_FIPS_METHOD) ,"non fips method"}, 172{ERR_REASON(FIPS_R_NON_FIPS_METHOD) ,"non fips method"},
173{ERR_REASON(FIPS_R_NOPR_TEST1_FAILURE) ,"nopr test1 failure"},
174{ERR_REASON(FIPS_R_NOPR_TEST2_FAILURE) ,"nopr test2 failure"},
175{ERR_REASON(FIPS_R_NOT_INSTANTIATED) ,"not instantiated"},
116{ERR_REASON(FIPS_R_PAIRWISE_TEST_FAILED) ,"pairwise test failed"}, 176{ERR_REASON(FIPS_R_PAIRWISE_TEST_FAILED) ,"pairwise test failed"},
117{ERR_REASON(FIPS_R_RSA_DECRYPT_ERROR) ,"rsa decrypt error"}, 177{ERR_REASON(FIPS_R_PERSONALISATION_ERROR_UNDETECTED),"personalisation error undetected"},
118{ERR_REASON(FIPS_R_RSA_ENCRYPT_ERROR) ,"rsa encrypt error"}, 178{ERR_REASON(FIPS_R_PERSONALISATION_STRING_TOO_LONG),"personalisation string too long"},
179{ERR_REASON(FIPS_R_PRNG_STRENGTH_TOO_LOW),"prng strength too low"},
180{ERR_REASON(FIPS_R_PR_TEST1_FAILURE) ,"pr test1 failure"},
181{ERR_REASON(FIPS_R_PR_TEST2_FAILURE) ,"pr test2 failure"},
182{ERR_REASON(FIPS_R_REQUEST_LENGTH_ERROR_UNDETECTED),"request length error undetected"},
183{ERR_REASON(FIPS_R_REQUEST_TOO_LARGE_FOR_DRBG),"request too large for drbg"},
184{ERR_REASON(FIPS_R_RESEED_COUNTER_ERROR) ,"reseed counter error"},
185{ERR_REASON(FIPS_R_RESEED_ERROR) ,"reseed error"},
119{ERR_REASON(FIPS_R_SELFTEST_FAILED) ,"selftest failed"}, 186{ERR_REASON(FIPS_R_SELFTEST_FAILED) ,"selftest failed"},
187{ERR_REASON(FIPS_R_SELFTEST_FAILURE) ,"selftest failure"},
188{ERR_REASON(FIPS_R_STRENGTH_ERROR_UNDETECTED),"strength error undetected"},
120{ERR_REASON(FIPS_R_TEST_FAILURE) ,"test failure"}, 189{ERR_REASON(FIPS_R_TEST_FAILURE) ,"test failure"},
190{ERR_REASON(FIPS_R_UNINSTANTIATE_ERROR) ,"uninstantiate error"},
191{ERR_REASON(FIPS_R_UNINSTANTIATE_ZEROISE_ERROR),"uninstantiate zeroise error"},
192{ERR_REASON(FIPS_R_UNSUPPORTED_DRBG_TYPE),"unsupported drbg type"},
121{ERR_REASON(FIPS_R_UNSUPPORTED_PLATFORM) ,"unsupported platform"}, 193{ERR_REASON(FIPS_R_UNSUPPORTED_PLATFORM) ,"unsupported platform"},
122{0,NULL} 194{0,NULL}
123 }; 195 };
diff --git a/src/lib/libssl/src/crypto/fips_ers.c b/src/lib/libssl/src/crypto/fips_ers.c
new file mode 100644
index 0000000000..09f11748f6
--- /dev/null
+++ b/src/lib/libssl/src/crypto/fips_ers.c
@@ -0,0 +1,7 @@
1#include <openssl/opensslconf.h>
2
3#ifdef OPENSSL_FIPS
4# include "fips_err.h"
5#else
6static void *dummy=&dummy;
7#endif
diff --git a/src/lib/libssl/src/crypto/hmac/hm_ameth.c b/src/lib/libssl/src/crypto/hmac/hm_ameth.c
index 6d8a89149e..e03f24aeda 100644
--- a/src/lib/libssl/src/crypto/hmac/hm_ameth.c
+++ b/src/lib/libssl/src/crypto/hmac/hm_ameth.c
@@ -153,7 +153,7 @@ const EVP_PKEY_ASN1_METHOD hmac_asn1_meth =
153 153
154 hmac_size, 154 hmac_size,
155 0, 155 0,
156 0,0,0,0,0,0, 156 0,0,0,0,0,0,0,
157 157
158 hmac_key_free, 158 hmac_key_free,
159 hmac_pkey_ctrl, 159 hmac_pkey_ctrl,
diff --git a/src/lib/libssl/src/crypto/hmac/hm_pmeth.c b/src/lib/libssl/src/crypto/hmac/hm_pmeth.c
index 71e8567a14..0daa44511d 100644
--- a/src/lib/libssl/src/crypto/hmac/hm_pmeth.c
+++ b/src/lib/libssl/src/crypto/hmac/hm_pmeth.c
@@ -100,7 +100,8 @@ static int pkey_hmac_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src)
100 dctx = dst->data; 100 dctx = dst->data;
101 dctx->md = sctx->md; 101 dctx->md = sctx->md;
102 HMAC_CTX_init(&dctx->ctx); 102 HMAC_CTX_init(&dctx->ctx);
103 HMAC_CTX_copy(&dctx->ctx, &sctx->ctx); 103 if (!HMAC_CTX_copy(&dctx->ctx, &sctx->ctx))
104 return 0;
104 if (sctx->ktmp.data) 105 if (sctx->ktmp.data)
105 { 106 {
106 if (!ASN1_OCTET_STRING_set(&dctx->ktmp, 107 if (!ASN1_OCTET_STRING_set(&dctx->ktmp,
@@ -141,7 +142,8 @@ static int pkey_hmac_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
141static int int_update(EVP_MD_CTX *ctx,const void *data,size_t count) 142static int int_update(EVP_MD_CTX *ctx,const void *data,size_t count)
142 { 143 {
143 HMAC_PKEY_CTX *hctx = ctx->pctx->data; 144 HMAC_PKEY_CTX *hctx = ctx->pctx->data;
144 HMAC_Update(&hctx->ctx, data, count); 145 if (!HMAC_Update(&hctx->ctx, data, count))
146 return 0;
145 return 1; 147 return 1;
146 } 148 }
147 149
@@ -167,7 +169,8 @@ static int hmac_signctx(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
167 if (!sig) 169 if (!sig)
168 return 1; 170 return 1;
169 171
170 HMAC_Final(&hctx->ctx, sig, &hlen); 172 if (!HMAC_Final(&hctx->ctx, sig, &hlen))
173 return 0;
171 *siglen = (size_t)hlen; 174 *siglen = (size_t)hlen;
172 return 1; 175 return 1;
173 } 176 }
@@ -192,8 +195,9 @@ static int pkey_hmac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
192 195
193 case EVP_PKEY_CTRL_DIGESTINIT: 196 case EVP_PKEY_CTRL_DIGESTINIT:
194 key = (ASN1_OCTET_STRING *)ctx->pkey->pkey.ptr; 197 key = (ASN1_OCTET_STRING *)ctx->pkey->pkey.ptr;
195 HMAC_Init_ex(&hctx->ctx, key->data, key->length, hctx->md, 198 if (!HMAC_Init_ex(&hctx->ctx, key->data, key->length, hctx->md,
196 ctx->engine); 199 ctx->engine))
200 return 0;
197 break; 201 break;
198 202
199 default: 203 default:
diff --git a/src/lib/libssl/src/crypto/ia64cpuid.S b/src/lib/libssl/src/crypto/ia64cpuid.S
index d705fff7ee..7832b9b640 100644
--- a/src/lib/libssl/src/crypto/ia64cpuid.S
+++ b/src/lib/libssl/src/crypto/ia64cpuid.S
@@ -26,7 +26,7 @@ OPENSSL_atomic_add:
26{ .mii; mov ar.ccv=r2 26{ .mii; mov ar.ccv=r2
27 add r8=r2,r33 27 add r8=r2,r33
28 mov r3=r2 };; 28 mov r3=r2 };;
29{ .mmi; mf 29{ .mmi; mf;;
30 cmpxchg4.acq r2=[r32],r8,ar.ccv 30 cmpxchg4.acq r2=[r32],r8,ar.ccv
31 nop.i 0 };; 31 nop.i 0 };;
32{ .mib; cmp.ne p6,p0=r2,r3 32{ .mib; cmp.ne p6,p0=r2,r3
diff --git a/src/lib/libssl/src/crypto/idea/i_cbc.c b/src/lib/libssl/src/crypto/idea/i_cbc.c
new file mode 100644
index 0000000000..ecb9cb8b83
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/i_cbc.c
@@ -0,0 +1,168 @@
1/* crypto/idea/i_cbc.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/idea.h>
60#include "idea_lcl.h"
61
62void idea_cbc_encrypt(const unsigned char *in, unsigned char *out, long length,
63 IDEA_KEY_SCHEDULE *ks, unsigned char *iv, int encrypt)
64 {
65 register unsigned long tin0,tin1;
66 register unsigned long tout0,tout1,xor0,xor1;
67 register long l=length;
68 unsigned long tin[2];
69
70 if (encrypt)
71 {
72 n2l(iv,tout0);
73 n2l(iv,tout1);
74 iv-=8;
75 for (l-=8; l>=0; l-=8)
76 {
77 n2l(in,tin0);
78 n2l(in,tin1);
79 tin0^=tout0;
80 tin1^=tout1;
81 tin[0]=tin0;
82 tin[1]=tin1;
83 idea_encrypt(tin,ks);
84 tout0=tin[0]; l2n(tout0,out);
85 tout1=tin[1]; l2n(tout1,out);
86 }
87 if (l != -8)
88 {
89 n2ln(in,tin0,tin1,l+8);
90 tin0^=tout0;
91 tin1^=tout1;
92 tin[0]=tin0;
93 tin[1]=tin1;
94 idea_encrypt(tin,ks);
95 tout0=tin[0]; l2n(tout0,out);
96 tout1=tin[1]; l2n(tout1,out);
97 }
98 l2n(tout0,iv);
99 l2n(tout1,iv);
100 }
101 else
102 {
103 n2l(iv,xor0);
104 n2l(iv,xor1);
105 iv-=8;
106 for (l-=8; l>=0; l-=8)
107 {
108 n2l(in,tin0); tin[0]=tin0;
109 n2l(in,tin1); tin[1]=tin1;
110 idea_encrypt(tin,ks);
111 tout0=tin[0]^xor0;
112 tout1=tin[1]^xor1;
113 l2n(tout0,out);
114 l2n(tout1,out);
115 xor0=tin0;
116 xor1=tin1;
117 }
118 if (l != -8)
119 {
120 n2l(in,tin0); tin[0]=tin0;
121 n2l(in,tin1); tin[1]=tin1;
122 idea_encrypt(tin,ks);
123 tout0=tin[0]^xor0;
124 tout1=tin[1]^xor1;
125 l2nn(tout0,tout1,out,l+8);
126 xor0=tin0;
127 xor1=tin1;
128 }
129 l2n(xor0,iv);
130 l2n(xor1,iv);
131 }
132 tin0=tin1=tout0=tout1=xor0=xor1=0;
133 tin[0]=tin[1]=0;
134 }
135
136void idea_encrypt(unsigned long *d, IDEA_KEY_SCHEDULE *key)
137 {
138 register IDEA_INT *p;
139 register unsigned long x1,x2,x3,x4,t0,t1,ul;
140
141 x2=d[0];
142 x1=(x2>>16);
143 x4=d[1];
144 x3=(x4>>16);
145
146 p= &(key->data[0][0]);
147
148 E_IDEA(0);
149 E_IDEA(1);
150 E_IDEA(2);
151 E_IDEA(3);
152 E_IDEA(4);
153 E_IDEA(5);
154 E_IDEA(6);
155 E_IDEA(7);
156
157 x1&=0xffff;
158 idea_mul(x1,x1,*p,ul); p++;
159
160 t0= x3+ *(p++);
161 t1= x2+ *(p++);
162
163 x4&=0xffff;
164 idea_mul(x4,x4,*p,ul);
165
166 d[0]=(t0&0xffff)|((x1&0xffff)<<16);
167 d[1]=(x4&0xffff)|((t1&0xffff)<<16);
168 }
diff --git a/src/lib/libssl/src/crypto/idea/i_cfb64.c b/src/lib/libssl/src/crypto/idea/i_cfb64.c
new file mode 100644
index 0000000000..66d49d520e
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/i_cfb64.c
@@ -0,0 +1,122 @@
1/* crypto/idea/i_cfb64.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/idea.h>
60#include "idea_lcl.h"
61
62/* The input and output encrypted as though 64bit cfb mode is being
63 * used. The extra state information to record how much of the
64 * 64bit block we have used is contained in *num;
65 */
66
67void idea_cfb64_encrypt(const unsigned char *in, unsigned char *out,
68 long length, IDEA_KEY_SCHEDULE *schedule,
69 unsigned char *ivec, int *num, int encrypt)
70 {
71 register unsigned long v0,v1,t;
72 register int n= *num;
73 register long l=length;
74 unsigned long ti[2];
75 unsigned char *iv,c,cc;
76
77 iv=(unsigned char *)ivec;
78 if (encrypt)
79 {
80 while (l--)
81 {
82 if (n == 0)
83 {
84 n2l(iv,v0); ti[0]=v0;
85 n2l(iv,v1); ti[1]=v1;
86 idea_encrypt((unsigned long *)ti,schedule);
87 iv=(unsigned char *)ivec;
88 t=ti[0]; l2n(t,iv);
89 t=ti[1]; l2n(t,iv);
90 iv=(unsigned char *)ivec;
91 }
92 c= *(in++)^iv[n];
93 *(out++)=c;
94 iv[n]=c;
95 n=(n+1)&0x07;
96 }
97 }
98 else
99 {
100 while (l--)
101 {
102 if (n == 0)
103 {
104 n2l(iv,v0); ti[0]=v0;
105 n2l(iv,v1); ti[1]=v1;
106 idea_encrypt((unsigned long *)ti,schedule);
107 iv=(unsigned char *)ivec;
108 t=ti[0]; l2n(t,iv);
109 t=ti[1]; l2n(t,iv);
110 iv=(unsigned char *)ivec;
111 }
112 cc= *(in++);
113 c=iv[n];
114 iv[n]=cc;
115 *(out++)=c^cc;
116 n=(n+1)&0x07;
117 }
118 }
119 v0=v1=ti[0]=ti[1]=t=c=cc=0;
120 *num=n;
121 }
122
diff --git a/src/lib/libssl/src/crypto/idea/i_ecb.c b/src/lib/libssl/src/crypto/idea/i_ecb.c
new file mode 100644
index 0000000000..fef38230a7
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/i_ecb.c
@@ -0,0 +1,85 @@
1/* crypto/idea/i_ecb.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/idea.h>
60#include "idea_lcl.h"
61#include <openssl/opensslv.h>
62
63const char IDEA_version[]="IDEA" OPENSSL_VERSION_PTEXT;
64
65const char *idea_options(void)
66 {
67 if (sizeof(short) != sizeof(IDEA_INT))
68 return("idea(int)");
69 else
70 return("idea(short)");
71 }
72
73void idea_ecb_encrypt(const unsigned char *in, unsigned char *out,
74 IDEA_KEY_SCHEDULE *ks)
75 {
76 unsigned long l0,l1,d[2];
77
78 n2l(in,l0); d[0]=l0;
79 n2l(in,l1); d[1]=l1;
80 idea_encrypt(d,ks);
81 l0=d[0]; l2n(l0,out);
82 l1=d[1]; l2n(l1,out);
83 l0=l1=d[0]=d[1]=0;
84 }
85
diff --git a/src/lib/libssl/src/crypto/idea/i_ofb64.c b/src/lib/libssl/src/crypto/idea/i_ofb64.c
new file mode 100644
index 0000000000..e749e88e34
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/i_ofb64.c
@@ -0,0 +1,111 @@
1/* crypto/idea/i_ofb64.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/idea.h>
60#include "idea_lcl.h"
61
62/* The input and output encrypted as though 64bit ofb mode is being
63 * used. The extra state information to record how much of the
64 * 64bit block we have used is contained in *num;
65 */
66void idea_ofb64_encrypt(const unsigned char *in, unsigned char *out,
67 long length, IDEA_KEY_SCHEDULE *schedule,
68 unsigned char *ivec, int *num)
69 {
70 register unsigned long v0,v1,t;
71 register int n= *num;
72 register long l=length;
73 unsigned char d[8];
74 register char *dp;
75 unsigned long ti[2];
76 unsigned char *iv;
77 int save=0;
78
79 iv=(unsigned char *)ivec;
80 n2l(iv,v0);
81 n2l(iv,v1);
82 ti[0]=v0;
83 ti[1]=v1;
84 dp=(char *)d;
85 l2n(v0,dp);
86 l2n(v1,dp);
87 while (l--)
88 {
89 if (n == 0)
90 {
91 idea_encrypt((unsigned long *)ti,schedule);
92 dp=(char *)d;
93 t=ti[0]; l2n(t,dp);
94 t=ti[1]; l2n(t,dp);
95 save++;
96 }
97 *(out++)= *(in++)^d[n];
98 n=(n+1)&0x07;
99 }
100 if (save)
101 {
102 v0=ti[0];
103 v1=ti[1];
104 iv=(unsigned char *)ivec;
105 l2n(v0,iv);
106 l2n(v1,iv);
107 }
108 t=v0=v1=ti[0]=ti[1]=0;
109 *num=n;
110 }
111
diff --git a/src/lib/libssl/src/crypto/idea/i_skey.c b/src/lib/libssl/src/crypto/idea/i_skey.c
new file mode 100644
index 0000000000..afb830964d
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/i_skey.c
@@ -0,0 +1,164 @@
1/* crypto/idea/i_skey.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/crypto.h>
60#include <openssl/idea.h>
61#include "idea_lcl.h"
62
63static IDEA_INT inverse(unsigned int xin);
64void idea_set_encrypt_key(const unsigned char *key, IDEA_KEY_SCHEDULE *ks)
65#ifdef OPENSSL_FIPS
66 {
67 fips_cipher_abort(IDEA);
68 private_idea_set_encrypt_key(key, ks);
69 }
70void private_idea_set_encrypt_key(const unsigned char *key, IDEA_KEY_SCHEDULE *ks)
71#endif
72 {
73 int i;
74 register IDEA_INT *kt,*kf,r0,r1,r2;
75
76 kt= &(ks->data[0][0]);
77 n2s(key,kt[0]); n2s(key,kt[1]); n2s(key,kt[2]); n2s(key,kt[3]);
78 n2s(key,kt[4]); n2s(key,kt[5]); n2s(key,kt[6]); n2s(key,kt[7]);
79
80 kf=kt;
81 kt+=8;
82 for (i=0; i<6; i++)
83 {
84 r2= kf[1];
85 r1= kf[2];
86 *(kt++)= ((r2<<9) | (r1>>7))&0xffff;
87 r0= kf[3];
88 *(kt++)= ((r1<<9) | (r0>>7))&0xffff;
89 r1= kf[4];
90 *(kt++)= ((r0<<9) | (r1>>7))&0xffff;
91 r0= kf[5];
92 *(kt++)= ((r1<<9) | (r0>>7))&0xffff;
93 r1= kf[6];
94 *(kt++)= ((r0<<9) | (r1>>7))&0xffff;
95 r0= kf[7];
96 *(kt++)= ((r1<<9) | (r0>>7))&0xffff;
97 r1= kf[0];
98 if (i >= 5) break;
99 *(kt++)= ((r0<<9) | (r1>>7))&0xffff;
100 *(kt++)= ((r1<<9) | (r2>>7))&0xffff;
101 kf+=8;
102 }
103 }
104
105void idea_set_decrypt_key(IDEA_KEY_SCHEDULE *ek, IDEA_KEY_SCHEDULE *dk)
106 {
107 int r;
108 register IDEA_INT *fp,*tp,t;
109
110 tp= &(dk->data[0][0]);
111 fp= &(ek->data[8][0]);
112 for (r=0; r<9; r++)
113 {
114 *(tp++)=inverse(fp[0]);
115 *(tp++)=((int)(0x10000L-fp[2])&0xffff);
116 *(tp++)=((int)(0x10000L-fp[1])&0xffff);
117 *(tp++)=inverse(fp[3]);
118 if (r == 8) break;
119 fp-=6;
120 *(tp++)=fp[4];
121 *(tp++)=fp[5];
122 }
123
124 tp= &(dk->data[0][0]);
125 t=tp[1];
126 tp[1]=tp[2];
127 tp[2]=t;
128
129 t=tp[49];
130 tp[49]=tp[50];
131 tp[50]=t;
132 }
133
134/* taken directly from the 'paper' I'll have a look at it later */
135static IDEA_INT inverse(unsigned int xin)
136 {
137 long n1,n2,q,r,b1,b2,t;
138
139 if (xin == 0)
140 b2=0;
141 else
142 {
143 n1=0x10001;
144 n2=xin;
145 b2=1;
146 b1=0;
147
148 do {
149 r=(n1%n2);
150 q=(n1-r)/n2;
151 if (r == 0)
152 { if (b2 < 0) b2=0x10001+b2; }
153 else
154 {
155 n1=n2;
156 n2=r;
157 t=b2;
158 b2=b1-q*b2;
159 b1=t;
160 }
161 } while (r != 0);
162 }
163 return((IDEA_INT)b2);
164 }
diff --git a/src/lib/libssl/src/crypto/idea/idea_lcl.h b/src/lib/libssl/src/crypto/idea/idea_lcl.h
new file mode 100644
index 0000000000..f3dbfa67e9
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/idea_lcl.h
@@ -0,0 +1,215 @@
1/* crypto/idea/idea_lcl.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/* The new form of this macro (check if the a*b == 0) was suggested by
60 * Colin Plumb <colin@nyx10.cs.du.edu> */
61/* Removal of the inner if from from Wei Dai 24/4/96 */
62#define idea_mul(r,a,b,ul) \
63ul=(unsigned long)a*b; \
64if (ul != 0) \
65 { \
66 r=(ul&0xffff)-(ul>>16); \
67 r-=((r)>>16); \
68 } \
69else \
70 r=(-(int)a-b+1); /* assuming a or b is 0 and in range */
71
72#ifdef undef
73#define idea_mul(r,a,b,ul,sl) \
74if (a == 0) r=(0x10001-b)&0xffff; \
75else if (b == 0) r=(0x10001-a)&0xffff; \
76else { \
77 ul=(unsigned long)a*b; \
78 sl=(ul&0xffff)-(ul>>16); \
79 if (sl <= 0) sl+=0x10001; \
80 r=sl; \
81 }
82#endif
83
84/* 7/12/95 - Many thanks to Rhys Weatherley <rweather@us.oracle.com>
85 * for pointing out that I was assuming little endian
86 * byte order for all quantities what idea
87 * actually used bigendian. No where in the spec does it mention
88 * this, it is all in terms of 16 bit numbers and even the example
89 * does not use byte streams for the input example :-(.
90 * If you byte swap each pair of input, keys and iv, the functions
91 * would produce the output as the old version :-(.
92 */
93
94/* NOTE - c is not incremented as per n2l */
95#define n2ln(c,l1,l2,n) { \
96 c+=n; \
97 l1=l2=0; \
98 switch (n) { \
99 case 8: l2 =((unsigned long)(*(--(c)))) ; \
100 case 7: l2|=((unsigned long)(*(--(c))))<< 8; \
101 case 6: l2|=((unsigned long)(*(--(c))))<<16; \
102 case 5: l2|=((unsigned long)(*(--(c))))<<24; \
103 case 4: l1 =((unsigned long)(*(--(c)))) ; \
104 case 3: l1|=((unsigned long)(*(--(c))))<< 8; \
105 case 2: l1|=((unsigned long)(*(--(c))))<<16; \
106 case 1: l1|=((unsigned long)(*(--(c))))<<24; \
107 } \
108 }
109
110/* NOTE - c is not incremented as per l2n */
111#define l2nn(l1,l2,c,n) { \
112 c+=n; \
113 switch (n) { \
114 case 8: *(--(c))=(unsigned char)(((l2) )&0xff); \
115 case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
116 case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
117 case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
118 case 4: *(--(c))=(unsigned char)(((l1) )&0xff); \
119 case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
120 case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
121 case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
122 } \
123 }
124
125#undef n2l
126#define n2l(c,l) (l =((unsigned long)(*((c)++)))<<24L, \
127 l|=((unsigned long)(*((c)++)))<<16L, \
128 l|=((unsigned long)(*((c)++)))<< 8L, \
129 l|=((unsigned long)(*((c)++))))
130
131#undef l2n
132#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
133 *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
134 *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
135 *((c)++)=(unsigned char)(((l) )&0xff))
136
137#undef s2n
138#define s2n(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
139 *((c)++)=(unsigned char)(((l)>> 8L)&0xff))
140
141#undef n2s
142#define n2s(c,l) (l =((IDEA_INT)(*((c)++)))<< 8L, \
143 l|=((IDEA_INT)(*((c)++))) )
144
145#ifdef undef
146/* NOTE - c is not incremented as per c2l */
147#define c2ln(c,l1,l2,n) { \
148 c+=n; \
149 l1=l2=0; \
150 switch (n) { \
151 case 8: l2 =((unsigned long)(*(--(c))))<<24; \
152 case 7: l2|=((unsigned long)(*(--(c))))<<16; \
153 case 6: l2|=((unsigned long)(*(--(c))))<< 8; \
154 case 5: l2|=((unsigned long)(*(--(c)))); \
155 case 4: l1 =((unsigned long)(*(--(c))))<<24; \
156 case 3: l1|=((unsigned long)(*(--(c))))<<16; \
157 case 2: l1|=((unsigned long)(*(--(c))))<< 8; \
158 case 1: l1|=((unsigned long)(*(--(c)))); \
159 } \
160 }
161
162/* NOTE - c is not incremented as per l2c */
163#define l2cn(l1,l2,c,n) { \
164 c+=n; \
165 switch (n) { \
166 case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
167 case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
168 case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
169 case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
170 case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
171 case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
172 case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
173 case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
174 } \
175 }
176
177#undef c2s
178#define c2s(c,l) (l =((unsigned long)(*((c)++))) , \
179 l|=((unsigned long)(*((c)++)))<< 8L)
180
181#undef s2c
182#define s2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
183 *((c)++)=(unsigned char)(((l)>> 8L)&0xff))
184
185#undef c2l
186#define c2l(c,l) (l =((unsigned long)(*((c)++))) , \
187 l|=((unsigned long)(*((c)++)))<< 8L, \
188 l|=((unsigned long)(*((c)++)))<<16L, \
189 l|=((unsigned long)(*((c)++)))<<24L)
190
191#undef l2c
192#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
193 *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
194 *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
195 *((c)++)=(unsigned char)(((l)>>24L)&0xff))
196#endif
197
198#define E_IDEA(num) \
199 x1&=0xffff; \
200 idea_mul(x1,x1,*p,ul); p++; \
201 x2+= *(p++); \
202 x3+= *(p++); \
203 x4&=0xffff; \
204 idea_mul(x4,x4,*p,ul); p++; \
205 t0=(x1^x3)&0xffff; \
206 idea_mul(t0,t0,*p,ul); p++; \
207 t1=(t0+(x2^x4))&0xffff; \
208 idea_mul(t1,t1,*p,ul); p++; \
209 t0+=t1; \
210 x1^=t1; \
211 x4^=t0; \
212 ul=x2^t0; /* do the swap to x3 */ \
213 x2=x3^t1; \
214 x3=ul;
215
diff --git a/src/lib/libssl/src/crypto/idea/idea_spd.c b/src/lib/libssl/src/crypto/idea/idea_spd.c
new file mode 100644
index 0000000000..699353e871
--- /dev/null
+++ b/src/lib/libssl/src/crypto/idea/idea_spd.c
@@ -0,0 +1,299 @@
1/* crypto/idea/idea_spd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/* 11-Sep-92 Andrew Daviel Support for Silicon Graphics IRIX added */
60/* 06-Apr-92 Luke Brennan Support for VMS and add extra signal calls */
61
62#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
63#define TIMES
64#endif
65
66#include <stdio.h>
67
68#include <openssl/e_os2.h>
69#include OPENSSL_UNISTD_IO
70OPENSSL_DECLARE_EXIT
71
72#ifndef OPENSSL_SYS_NETWARE
73#include <signal.h>
74#endif
75
76#ifndef _IRIX
77#include <time.h>
78#endif
79#ifdef TIMES
80#include <sys/types.h>
81#include <sys/times.h>
82#endif
83
84/* Depending on the VMS version, the tms structure is perhaps defined.
85 The __TMS macro will show if it was. If it wasn't defined, we should
86 undefine TIMES, since that tells the rest of the program how things
87 should be handled. -- Richard Levitte */
88#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
89#undef TIMES
90#endif
91
92#ifndef TIMES
93#include <sys/timeb.h>
94#endif
95
96#if defined(sun) || defined(__ultrix)
97#define _POSIX_SOURCE
98#include <limits.h>
99#include <sys/param.h>
100#endif
101
102#include <openssl/idea.h>
103
104/* The following if from times(3) man page. It may need to be changed */
105#ifndef HZ
106#ifndef CLK_TCK
107#define HZ 100.0
108#else /* CLK_TCK */
109#define HZ ((double)CLK_TCK)
110#endif
111#endif
112
113#define BUFSIZE ((long)1024)
114long run=0;
115
116double Time_F(int s);
117#ifdef SIGALRM
118#if defined(__STDC__) || defined(sgi) || defined(_AIX)
119#define SIGRETTYPE void
120#else
121#define SIGRETTYPE int
122#endif
123
124SIGRETTYPE sig_done(int sig);
125SIGRETTYPE sig_done(int sig)
126 {
127 signal(SIGALRM,sig_done);
128 run=0;
129#ifdef LINT
130 sig=sig;
131#endif
132 }
133#endif
134
135#define START 0
136#define STOP 1
137
138double Time_F(int s)
139 {
140 double ret;
141#ifdef TIMES
142 static struct tms tstart,tend;
143
144 if (s == START)
145 {
146 times(&tstart);
147 return(0);
148 }
149 else
150 {
151 times(&tend);
152 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
153 return((ret == 0.0)?1e-6:ret);
154 }
155#else /* !times() */
156 static struct timeb tstart,tend;
157 long i;
158
159 if (s == START)
160 {
161 ftime(&tstart);
162 return(0);
163 }
164 else
165 {
166 ftime(&tend);
167 i=(long)tend.millitm-(long)tstart.millitm;
168 ret=((double)(tend.time-tstart.time))+((double)i)/1e3;
169 return((ret == 0.0)?1e-6:ret);
170 }
171#endif
172 }
173
174int main(int argc, char **argv)
175 {
176 long count;
177 static unsigned char buf[BUFSIZE];
178 static unsigned char key[] ={
179 0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,
180 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10,
181 };
182 IDEA_KEY_SCHEDULE sch;
183 double a,aa,b,c,d;
184#ifndef SIGALRM
185 long ca,cca,cb,cc;
186#endif
187
188#ifndef TIMES
189 printf("To get the most accurate results, try to run this\n");
190 printf("program when this computer is idle.\n");
191#endif
192
193#ifndef SIGALRM
194 printf("First we calculate the approximate speed ...\n");
195 idea_set_encrypt_key(key,&sch);
196 count=10;
197 do {
198 long i;
199 IDEA_INT data[2];
200
201 count*=2;
202 Time_F(START);
203 for (i=count; i; i--)
204 idea_encrypt(data,&sch);
205 d=Time_F(STOP);
206 } while (d < 3.0);
207 ca=count/4;
208 cca=count/200;
209 cb=count;
210 cc=count*8/BUFSIZE+1;
211 printf("idea_set_encrypt_key %ld times\n",ca);
212#define COND(d) (count <= (d))
213#define COUNT(d) (d)
214#else
215#define COND(c) (run)
216#define COUNT(d) (count)
217 signal(SIGALRM,sig_done);
218 printf("Doing idea_set_encrypt_key for 10 seconds\n");
219 alarm(10);
220#endif
221
222 Time_F(START);
223 for (count=0,run=1; COND(ca); count+=4)
224 {
225 idea_set_encrypt_key(key,&sch);
226 idea_set_encrypt_key(key,&sch);
227 idea_set_encrypt_key(key,&sch);
228 idea_set_encrypt_key(key,&sch);
229 }
230 d=Time_F(STOP);
231 printf("%ld idea idea_set_encrypt_key's in %.2f seconds\n",count,d);
232 a=((double)COUNT(ca))/d;
233
234#ifdef SIGALRM
235 printf("Doing idea_set_decrypt_key for 10 seconds\n");
236 alarm(10);
237#else
238 printf("Doing idea_set_decrypt_key %ld times\n",cca);
239#endif
240
241 Time_F(START);
242 for (count=0,run=1; COND(cca); count+=4)
243 {
244 idea_set_decrypt_key(&sch,&sch);
245 idea_set_decrypt_key(&sch,&sch);
246 idea_set_decrypt_key(&sch,&sch);
247 idea_set_decrypt_key(&sch,&sch);
248 }
249 d=Time_F(STOP);
250 printf("%ld idea idea_set_decrypt_key's in %.2f seconds\n",count,d);
251 aa=((double)COUNT(cca))/d;
252
253#ifdef SIGALRM
254 printf("Doing idea_encrypt's for 10 seconds\n");
255 alarm(10);
256#else
257 printf("Doing idea_encrypt %ld times\n",cb);
258#endif
259 Time_F(START);
260 for (count=0,run=1; COND(cb); count+=4)
261 {
262 unsigned long data[2];
263
264 idea_encrypt(data,&sch);
265 idea_encrypt(data,&sch);
266 idea_encrypt(data,&sch);
267 idea_encrypt(data,&sch);
268 }
269 d=Time_F(STOP);
270 printf("%ld idea_encrypt's in %.2f second\n",count,d);
271 b=((double)COUNT(cb)*8)/d;
272
273#ifdef SIGALRM
274 printf("Doing idea_cbc_encrypt on %ld byte blocks for 10 seconds\n",
275 BUFSIZE);
276 alarm(10);
277#else
278 printf("Doing idea_cbc_encrypt %ld times on %ld byte blocks\n",cc,
279 BUFSIZE);
280#endif
281 Time_F(START);
282 for (count=0,run=1; COND(cc); count++)
283 idea_cbc_encrypt(buf,buf,BUFSIZE,&sch,
284 &(key[0]),IDEA_ENCRYPT);
285 d=Time_F(STOP);
286 printf("%ld idea_cbc_encrypt's of %ld byte blocks in %.2f second\n",
287 count,BUFSIZE,d);
288 c=((double)COUNT(cc)*BUFSIZE)/d;
289
290 printf("IDEA set_encrypt_key per sec = %12.2f (%9.3fuS)\n",a,1.0e6/a);
291 printf("IDEA set_decrypt_key per sec = %12.2f (%9.3fuS)\n",aa,1.0e6/aa);
292 printf("IDEA raw ecb bytes per sec = %12.2f (%9.3fuS)\n",b,8.0e6/b);
293 printf("IDEA cbc bytes per sec = %12.2f (%9.3fuS)\n",c,8.0e6/c);
294 exit(0);
295#if defined(LINT) || defined(OPENSSL_SYS_MSDOS)
296 return(0);
297#endif
298 }
299
diff --git a/src/lib/libssl/src/crypto/mdc2/mdc2dgst.c b/src/lib/libssl/src/crypto/mdc2/mdc2dgst.c
index 4aa406edc3..b74bb1a759 100644
--- a/src/lib/libssl/src/crypto/mdc2/mdc2dgst.c
+++ b/src/lib/libssl/src/crypto/mdc2/mdc2dgst.c
@@ -61,6 +61,7 @@
61#include <string.h> 61#include <string.h>
62#include <openssl/des.h> 62#include <openssl/des.h>
63#include <openssl/mdc2.h> 63#include <openssl/mdc2.h>
64#include <openssl/crypto.h>
64 65
65#undef c2l 66#undef c2l
66#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \ 67#define c2l(c,l) (l =((DES_LONG)(*((c)++))) , \
@@ -75,7 +76,7 @@
75 *((c)++)=(unsigned char)(((l)>>24L)&0xff)) 76 *((c)++)=(unsigned char)(((l)>>24L)&0xff))
76 77
77static void mdc2_body(MDC2_CTX *c, const unsigned char *in, size_t len); 78static void mdc2_body(MDC2_CTX *c, const unsigned char *in, size_t len);
78int MDC2_Init(MDC2_CTX *c) 79fips_md_init(MDC2)
79 { 80 {
80 c->num=0; 81 c->num=0;
81 c->pad_type=1; 82 c->pad_type=1;
diff --git a/src/lib/libssl/src/crypto/modes/Makefile b/src/lib/libssl/src/crypto/modes/Makefile
index 6c85861b6c..c825b12f25 100644
--- a/src/lib/libssl/src/crypto/modes/Makefile
+++ b/src/lib/libssl/src/crypto/modes/Makefile
@@ -10,21 +10,27 @@ CFLAG=-g
10MAKEFILE= Makefile 10MAKEFILE= Makefile
11AR= ar r 11AR= ar r
12 12
13MODES_ASM_OBJ=
14
13CFLAGS= $(INCLUDES) $(CFLAG) 15CFLAGS= $(INCLUDES) $(CFLAG)
16ASFLAGS= $(INCLUDES) $(ASFLAG)
17AFLAGS= $(ASFLAGS)
14 18
15GENERAL=Makefile 19GENERAL=Makefile
16TEST= 20TEST=
17APPS= 21APPS=
18 22
19LIB=$(TOP)/libcrypto.a 23LIB=$(TOP)/libcrypto.a
20LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c 24LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
21LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o 25 ccm128.c xts128.c
26LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
27 ccm128.o xts128.o $(MODES_ASM_OBJ)
22 28
23SRC= $(LIBSRC) 29SRC= $(LIBSRC)
24 30
25#EXHEADER= store.h str_compat.h 31#EXHEADER= store.h str_compat.h
26EXHEADER= modes.h 32EXHEADER= modes.h
27HEADER= $(EXHEADER) 33HEADER= modes_lcl.h $(EXHEADER)
28 34
29ALL= $(GENERAL) $(SRC) $(HEADER) 35ALL= $(GENERAL) $(SRC) $(HEADER)
30 36
@@ -38,6 +44,24 @@ lib: $(LIBOBJ)
38 $(RANLIB) $(LIB) || echo Never mind. 44 $(RANLIB) $(LIB) || echo Never mind.
39 @touch lib 45 @touch lib
40 46
47ghash-ia64.s: asm/ghash-ia64.pl
48 $(PERL) asm/ghash-ia64.pl $@ $(CFLAGS)
49ghash-x86.s: asm/ghash-x86.pl
50 $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
51ghash-x86_64.s: asm/ghash-x86_64.pl
52 $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
53ghash-sparcv9.s: asm/ghash-sparcv9.pl
54 $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
55ghash-alpha.s: asm/ghash-alpha.pl
56 $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
57ghash-parisc.s: asm/ghash-parisc.pl
58 $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
59
60# GNU make "catch all"
61ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
62
63ghash-armv4.o: ghash-armv4.S
64
41files: 65files:
42 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO 66 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
43 67
@@ -71,12 +95,47 @@ dclean:
71 mv -f Makefile.new $(MAKEFILE) 95 mv -f Makefile.new $(MAKEFILE)
72 96
73clean: 97clean:
74 rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff 98 rm -f *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
75 99
76# DO NOT DELETE THIS LINE -- make depend depends on it. 100# DO NOT DELETE THIS LINE -- make depend depends on it.
77 101
78cbc128.o: cbc128.c modes.h 102cbc128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
79cfb128.o: cfb128.c modes.h 103cbc128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
80ctr128.o: ctr128.c modes.h 104cbc128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
81cts128.o: cts128.c modes.h 105cbc128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
82ofb128.o: modes.h ofb128.c 106cbc128.o: ../../include/openssl/symhacks.h cbc128.c modes_lcl.h
107ccm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
108ccm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
109ccm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
110ccm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
111ccm128.o: ../../include/openssl/symhacks.h ccm128.c modes_lcl.h
112cfb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
113cfb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
114cfb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
115cfb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
116cfb128.o: ../../include/openssl/symhacks.h cfb128.c modes_lcl.h
117ctr128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
118ctr128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
119ctr128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
120ctr128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
121ctr128.o: ../../include/openssl/symhacks.h ctr128.c modes_lcl.h
122cts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
123cts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
124cts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
125cts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
126cts128.o: ../../include/openssl/symhacks.h cts128.c modes_lcl.h
127gcm128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
128gcm128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
129gcm128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
130gcm128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
131gcm128.o: ../../include/openssl/symhacks.h gcm128.c modes_lcl.h
132ofb128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
133ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
134ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
135ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
136ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
137xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
138xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
139xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
140xts128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
141xts128.o: ../../include/openssl/symhacks.h modes_lcl.h xts128.c
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-alpha.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-alpha.pl
new file mode 100644
index 0000000000..6358b2750f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-alpha.pl
@@ -0,0 +1,451 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# March 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. Even though
15# loops are aggressively modulo-scheduled in respect to references to
16# Htbl and Z.hi updates for 8 cycles per byte, measured performance is
17# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic
18# scheduling "glitch," because uprofile(1) indicates uniform sample
19# distribution, as if all instruction bundles execute in 1.5 cycles.
20# Meaning that it could have been even faster, yet 12 cycles is ~60%
21# better than gcc-generated code and ~80% than code generated by vendor
22# compiler.
23
24$cnt="v0"; # $0
25$t0="t0";
26$t1="t1";
27$t2="t2";
28$Thi0="t3"; # $4
29$Tlo0="t4";
30$Thi1="t5";
31$Tlo1="t6";
32$rem="t7"; # $8
33#################
34$Xi="a0"; # $16, input argument block
35$Htbl="a1";
36$inp="a2";
37$len="a3";
38$nlo="a4"; # $20
39$nhi="a5";
40$Zhi="t8";
41$Zlo="t9";
42$Xhi="t10"; # $24
43$Xlo="t11";
44$remp="t12";
45$rem_4bit="AT"; # $28
46
47{ my $N;
48 sub loop() {
49
50 $N++;
51$code.=<<___;
52.align 4
53 extbl $Xlo,7,$nlo
54 and $nlo,0xf0,$nhi
55 sll $nlo,4,$nlo
56 and $nlo,0xf0,$nlo
57
58 addq $nlo,$Htbl,$nlo
59 ldq $Zlo,8($nlo)
60 addq $nhi,$Htbl,$nhi
61 ldq $Zhi,0($nlo)
62
63 and $Zlo,0x0f,$remp
64 sll $Zhi,60,$t0
65 lda $cnt,6(zero)
66 extbl $Xlo,6,$nlo
67
68 ldq $Tlo1,8($nhi)
69 s8addq $remp,$rem_4bit,$remp
70 ldq $Thi1,0($nhi)
71 srl $Zlo,4,$Zlo
72
73 ldq $rem,0($remp)
74 srl $Zhi,4,$Zhi
75 xor $t0,$Zlo,$Zlo
76 and $nlo,0xf0,$nhi
77
78 xor $Tlo1,$Zlo,$Zlo
79 sll $nlo,4,$nlo
80 xor $Thi1,$Zhi,$Zhi
81 and $nlo,0xf0,$nlo
82
83 addq $nlo,$Htbl,$nlo
84 ldq $Tlo0,8($nlo)
85 addq $nhi,$Htbl,$nhi
86 ldq $Thi0,0($nlo)
87
88.Looplo$N:
89 and $Zlo,0x0f,$remp
90 sll $Zhi,60,$t0
91 subq $cnt,1,$cnt
92 srl $Zlo,4,$Zlo
93
94 ldq $Tlo1,8($nhi)
95 xor $rem,$Zhi,$Zhi
96 ldq $Thi1,0($nhi)
97 s8addq $remp,$rem_4bit,$remp
98
99 ldq $rem,0($remp)
100 srl $Zhi,4,$Zhi
101 xor $t0,$Zlo,$Zlo
102 extbl $Xlo,$cnt,$nlo
103
104 and $nlo,0xf0,$nhi
105 xor $Thi0,$Zhi,$Zhi
106 xor $Tlo0,$Zlo,$Zlo
107 sll $nlo,4,$nlo
108
109
110 and $Zlo,0x0f,$remp
111 sll $Zhi,60,$t0
112 and $nlo,0xf0,$nlo
113 srl $Zlo,4,$Zlo
114
115 s8addq $remp,$rem_4bit,$remp
116 xor $rem,$Zhi,$Zhi
117 addq $nlo,$Htbl,$nlo
118 addq $nhi,$Htbl,$nhi
119
120 ldq $rem,0($remp)
121 srl $Zhi,4,$Zhi
122 ldq $Tlo0,8($nlo)
123 xor $t0,$Zlo,$Zlo
124
125 xor $Tlo1,$Zlo,$Zlo
126 xor $Thi1,$Zhi,$Zhi
127 ldq $Thi0,0($nlo)
128 bne $cnt,.Looplo$N
129
130
131 and $Zlo,0x0f,$remp
132 sll $Zhi,60,$t0
133 lda $cnt,7(zero)
134 srl $Zlo,4,$Zlo
135
136 ldq $Tlo1,8($nhi)
137 xor $rem,$Zhi,$Zhi
138 ldq $Thi1,0($nhi)
139 s8addq $remp,$rem_4bit,$remp
140
141 ldq $rem,0($remp)
142 srl $Zhi,4,$Zhi
143 xor $t0,$Zlo,$Zlo
144 extbl $Xhi,$cnt,$nlo
145
146 and $nlo,0xf0,$nhi
147 xor $Thi0,$Zhi,$Zhi
148 xor $Tlo0,$Zlo,$Zlo
149 sll $nlo,4,$nlo
150
151 and $Zlo,0x0f,$remp
152 sll $Zhi,60,$t0
153 and $nlo,0xf0,$nlo
154 srl $Zlo,4,$Zlo
155
156 s8addq $remp,$rem_4bit,$remp
157 xor $rem,$Zhi,$Zhi
158 addq $nlo,$Htbl,$nlo
159 addq $nhi,$Htbl,$nhi
160
161 ldq $rem,0($remp)
162 srl $Zhi,4,$Zhi
163 ldq $Tlo0,8($nlo)
164 xor $t0,$Zlo,$Zlo
165
166 xor $Tlo1,$Zlo,$Zlo
167 xor $Thi1,$Zhi,$Zhi
168 ldq $Thi0,0($nlo)
169 unop
170
171
172.Loophi$N:
173 and $Zlo,0x0f,$remp
174 sll $Zhi,60,$t0
175 subq $cnt,1,$cnt
176 srl $Zlo,4,$Zlo
177
178 ldq $Tlo1,8($nhi)
179 xor $rem,$Zhi,$Zhi
180 ldq $Thi1,0($nhi)
181 s8addq $remp,$rem_4bit,$remp
182
183 ldq $rem,0($remp)
184 srl $Zhi,4,$Zhi
185 xor $t0,$Zlo,$Zlo
186 extbl $Xhi,$cnt,$nlo
187
188 and $nlo,0xf0,$nhi
189 xor $Thi0,$Zhi,$Zhi
190 xor $Tlo0,$Zlo,$Zlo
191 sll $nlo,4,$nlo
192
193
194 and $Zlo,0x0f,$remp
195 sll $Zhi,60,$t0
196 and $nlo,0xf0,$nlo
197 srl $Zlo,4,$Zlo
198
199 s8addq $remp,$rem_4bit,$remp
200 xor $rem,$Zhi,$Zhi
201 addq $nlo,$Htbl,$nlo
202 addq $nhi,$Htbl,$nhi
203
204 ldq $rem,0($remp)
205 srl $Zhi,4,$Zhi
206 ldq $Tlo0,8($nlo)
207 xor $t0,$Zlo,$Zlo
208
209 xor $Tlo1,$Zlo,$Zlo
210 xor $Thi1,$Zhi,$Zhi
211 ldq $Thi0,0($nlo)
212 bne $cnt,.Loophi$N
213
214
215 and $Zlo,0x0f,$remp
216 sll $Zhi,60,$t0
217 srl $Zlo,4,$Zlo
218
219 ldq $Tlo1,8($nhi)
220 xor $rem,$Zhi,$Zhi
221 ldq $Thi1,0($nhi)
222 s8addq $remp,$rem_4bit,$remp
223
224 ldq $rem,0($remp)
225 srl $Zhi,4,$Zhi
226 xor $t0,$Zlo,$Zlo
227
228 xor $Tlo0,$Zlo,$Zlo
229 xor $Thi0,$Zhi,$Zhi
230
231 and $Zlo,0x0f,$remp
232 sll $Zhi,60,$t0
233 srl $Zlo,4,$Zlo
234
235 s8addq $remp,$rem_4bit,$remp
236 xor $rem,$Zhi,$Zhi
237
238 ldq $rem,0($remp)
239 srl $Zhi,4,$Zhi
240 xor $Tlo1,$Zlo,$Zlo
241 xor $Thi1,$Zhi,$Zhi
242 xor $t0,$Zlo,$Zlo
243 xor $rem,$Zhi,$Zhi
244___
245}}
246
247$code=<<___;
248#ifdef __linux__
249#include <asm/regdef.h>
250#else
251#include <asm.h>
252#include <regdef.h>
253#endif
254
255.text
256
257.set noat
258.set noreorder
259.globl gcm_gmult_4bit
260.align 4
261.ent gcm_gmult_4bit
262gcm_gmult_4bit:
263 .frame sp,0,ra
264 .prologue 0
265
266 ldq $Xlo,8($Xi)
267 ldq $Xhi,0($Xi)
268
269 br $rem_4bit,.Lpic1
270.Lpic1: lda $rem_4bit,rem_4bit-.Lpic1($rem_4bit)
271___
272
273 &loop();
274
275$code.=<<___;
276 srl $Zlo,24,$t0 # byte swap
277 srl $Zlo,8,$t1
278
279 sll $Zlo,8,$t2
280 sll $Zlo,24,$Zlo
281 zapnot $t0,0x11,$t0
282 zapnot $t1,0x22,$t1
283
284 zapnot $Zlo,0x88,$Zlo
285 or $t0,$t1,$t0
286 zapnot $t2,0x44,$t2
287
288 or $Zlo,$t0,$Zlo
289 srl $Zhi,24,$t0
290 srl $Zhi,8,$t1
291
292 or $Zlo,$t2,$Zlo
293 sll $Zhi,8,$t2
294 sll $Zhi,24,$Zhi
295
296 srl $Zlo,32,$Xlo
297 sll $Zlo,32,$Zlo
298
299 zapnot $t0,0x11,$t0
300 zapnot $t1,0x22,$t1
301 or $Zlo,$Xlo,$Xlo
302
303 zapnot $Zhi,0x88,$Zhi
304 or $t0,$t1,$t0
305 zapnot $t2,0x44,$t2
306
307 or $Zhi,$t0,$Zhi
308 or $Zhi,$t2,$Zhi
309
310 srl $Zhi,32,$Xhi
311 sll $Zhi,32,$Zhi
312
313 or $Zhi,$Xhi,$Xhi
314 stq $Xlo,8($Xi)
315 stq $Xhi,0($Xi)
316
317 ret (ra)
318.end gcm_gmult_4bit
319___
320
321$inhi="s0";
322$inlo="s1";
323
324$code.=<<___;
325.globl gcm_ghash_4bit
326.align 4
327.ent gcm_ghash_4bit
328gcm_ghash_4bit:
329 lda sp,-32(sp)
330 stq ra,0(sp)
331 stq s0,8(sp)
332 stq s1,16(sp)
333 .mask 0x04000600,-32
334 .frame sp,32,ra
335 .prologue 0
336
337 ldq_u $inhi,0($inp)
338 ldq_u $Thi0,7($inp)
339 ldq_u $inlo,8($inp)
340 ldq_u $Tlo0,15($inp)
341 ldq $Xhi,0($Xi)
342 ldq $Xlo,8($Xi)
343
344 br $rem_4bit,.Lpic2
345.Lpic2: lda $rem_4bit,rem_4bit-.Lpic2($rem_4bit)
346
347.Louter:
348 extql $inhi,$inp,$inhi
349 extqh $Thi0,$inp,$Thi0
350 or $inhi,$Thi0,$inhi
351 lda $inp,16($inp)
352
353 extql $inlo,$inp,$inlo
354 extqh $Tlo0,$inp,$Tlo0
355 or $inlo,$Tlo0,$inlo
356 subq $len,16,$len
357
358 xor $Xlo,$inlo,$Xlo
359 xor $Xhi,$inhi,$Xhi
360___
361
362 &loop();
363
364$code.=<<___;
365 srl $Zlo,24,$t0 # byte swap
366 srl $Zlo,8,$t1
367
368 sll $Zlo,8,$t2
369 sll $Zlo,24,$Zlo
370 zapnot $t0,0x11,$t0
371 zapnot $t1,0x22,$t1
372
373 zapnot $Zlo,0x88,$Zlo
374 or $t0,$t1,$t0
375 zapnot $t2,0x44,$t2
376
377 or $Zlo,$t0,$Zlo
378 srl $Zhi,24,$t0
379 srl $Zhi,8,$t1
380
381 or $Zlo,$t2,$Zlo
382 sll $Zhi,8,$t2
383 sll $Zhi,24,$Zhi
384
385 srl $Zlo,32,$Xlo
386 sll $Zlo,32,$Zlo
387 beq $len,.Ldone
388
389 zapnot $t0,0x11,$t0
390 zapnot $t1,0x22,$t1
391 or $Zlo,$Xlo,$Xlo
392 ldq_u $inhi,0($inp)
393
394 zapnot $Zhi,0x88,$Zhi
395 or $t0,$t1,$t0
396 zapnot $t2,0x44,$t2
397 ldq_u $Thi0,7($inp)
398
399 or $Zhi,$t0,$Zhi
400 or $Zhi,$t2,$Zhi
401 ldq_u $inlo,8($inp)
402 ldq_u $Tlo0,15($inp)
403
404 srl $Zhi,32,$Xhi
405 sll $Zhi,32,$Zhi
406
407 or $Zhi,$Xhi,$Xhi
408 br zero,.Louter
409
410.Ldone:
411 zapnot $t0,0x11,$t0
412 zapnot $t1,0x22,$t1
413 or $Zlo,$Xlo,$Xlo
414
415 zapnot $Zhi,0x88,$Zhi
416 or $t0,$t1,$t0
417 zapnot $t2,0x44,$t2
418
419 or $Zhi,$t0,$Zhi
420 or $Zhi,$t2,$Zhi
421
422 srl $Zhi,32,$Xhi
423 sll $Zhi,32,$Zhi
424
425 or $Zhi,$Xhi,$Xhi
426
427 stq $Xlo,8($Xi)
428 stq $Xhi,0($Xi)
429
430 .set noreorder
431 /*ldq ra,0(sp)*/
432 ldq s0,8(sp)
433 ldq s1,16(sp)
434 lda sp,32(sp)
435 ret (ra)
436.end gcm_ghash_4bit
437
438.align 4
439rem_4bit:
440 .quad 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48
441 .quad 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48
442 .quad 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48
443 .quad 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48
444.ascii "GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
445.align 4
446
447___
448$output=shift and open STDOUT,">$output";
449print $code;
450close STDOUT;
451
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-armv4.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-armv4.pl
new file mode 100644
index 0000000000..d91586ee29
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-armv4.pl
@@ -0,0 +1,429 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# April 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+32 bytes shared table]. There is no
15# experimental performance data available yet. The only approximation
16# that can be made at this point is based on code size. Inner loop is
17# 32 instructions long and on single-issue core should execute in <40
18# cycles. Having verified that gcc 3.4 didn't unroll corresponding
19# loop, this assembler loop body was found to be ~3x smaller than
20# compiler-generated one...
21#
22# July 2010
23#
24# Rescheduling for dual-issue pipeline resulted in 8.5% improvement on
25# Cortex A8 core and ~25 cycles per processed byte (which was observed
26# to be ~3 times faster than gcc-generated code:-)
27#
28# February 2011
29#
30# Profiler-assisted and platform-specific optimization resulted in 7%
31# improvement on Cortex A8 core and ~23.5 cycles per byte.
32#
33# March 2011
34#
35# Add NEON implementation featuring polynomial multiplication, i.e. no
36# lookup tables involved. On Cortex A8 it was measured to process one
37# byte in 15 cycles or 55% faster than integer-only code.
38
39# ====================================================================
40# Note about "528B" variant. In ARM case it makes lesser sense to
41# implement it for following reasons:
42#
43# - performance improvement won't be anywhere near 50%, because 128-
44# bit shift operation is neatly fused with 128-bit xor here, and
45# "538B" variant would eliminate only 4-5 instructions out of 32
46# in the inner loop (meaning that estimated improvement is ~15%);
47# - ARM-based systems are often embedded ones and extra memory
48# consumption might be unappreciated (for so little improvement);
49#
50# Byte order [in]dependence. =========================================
51#
52# Caller is expected to maintain specific *dword* order in Htable,
53# namely with *least* significant dword of 128-bit value at *lower*
54# address. This differs completely from C code and has everything to
55# do with ldm instruction and order in which dwords are "consumed" by
56# algorithm. *Byte* order within these dwords in turn is whatever
57# *native* byte order on current platform. See gcm128.c for working
58# example...
59
60while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
61open STDOUT,">$output";
62
63$Xi="r0"; # argument block
64$Htbl="r1";
65$inp="r2";
66$len="r3";
67
68$Zll="r4"; # variables
69$Zlh="r5";
70$Zhl="r6";
71$Zhh="r7";
72$Tll="r8";
73$Tlh="r9";
74$Thl="r10";
75$Thh="r11";
76$nlo="r12";
77################# r13 is stack pointer
78$nhi="r14";
79################# r15 is program counter
80
81$rem_4bit=$inp; # used in gcm_gmult_4bit
82$cnt=$len;
83
84sub Zsmash() {
85 my $i=12;
86 my @args=@_;
87 for ($Zll,$Zlh,$Zhl,$Zhh) {
88 $code.=<<___;
89#if __ARM_ARCH__>=7 && defined(__ARMEL__)
90 rev $_,$_
91 str $_,[$Xi,#$i]
92#elif defined(__ARMEB__)
93 str $_,[$Xi,#$i]
94#else
95 mov $Tlh,$_,lsr#8
96 strb $_,[$Xi,#$i+3]
97 mov $Thl,$_,lsr#16
98 strb $Tlh,[$Xi,#$i+2]
99 mov $Thh,$_,lsr#24
100 strb $Thl,[$Xi,#$i+1]
101 strb $Thh,[$Xi,#$i]
102#endif
103___
104 $code.="\t".shift(@args)."\n";
105 $i-=4;
106 }
107}
108
109$code=<<___;
110#include "arm_arch.h"
111
112.text
113.code 32
114
115.type rem_4bit,%object
116.align 5
117rem_4bit:
118.short 0x0000,0x1C20,0x3840,0x2460
119.short 0x7080,0x6CA0,0x48C0,0x54E0
120.short 0xE100,0xFD20,0xD940,0xC560
121.short 0x9180,0x8DA0,0xA9C0,0xB5E0
122.size rem_4bit,.-rem_4bit
123
124.type rem_4bit_get,%function
125rem_4bit_get:
126 sub $rem_4bit,pc,#8
127 sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
128 b .Lrem_4bit_got
129 nop
130.size rem_4bit_get,.-rem_4bit_get
131
132.global gcm_ghash_4bit
133.type gcm_ghash_4bit,%function
134gcm_ghash_4bit:
135 sub r12,pc,#8
136 add $len,$inp,$len @ $len to point at the end
137 stmdb sp!,{r3-r11,lr} @ save $len/end too
138 sub r12,r12,#48 @ &rem_4bit
139
140 ldmia r12,{r4-r11} @ copy rem_4bit ...
141 stmdb sp!,{r4-r11} @ ... to stack
142
143 ldrb $nlo,[$inp,#15]
144 ldrb $nhi,[$Xi,#15]
145.Louter:
146 eor $nlo,$nlo,$nhi
147 and $nhi,$nlo,#0xf0
148 and $nlo,$nlo,#0x0f
149 mov $cnt,#14
150
151 add $Zhh,$Htbl,$nlo,lsl#4
152 ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo]
153 add $Thh,$Htbl,$nhi
154 ldrb $nlo,[$inp,#14]
155
156 and $nhi,$Zll,#0xf @ rem
157 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
158 add $nhi,$nhi,$nhi
159 eor $Zll,$Tll,$Zll,lsr#4
160 ldrh $Tll,[sp,$nhi] @ rem_4bit[rem]
161 eor $Zll,$Zll,$Zlh,lsl#28
162 ldrb $nhi,[$Xi,#14]
163 eor $Zlh,$Tlh,$Zlh,lsr#4
164 eor $Zlh,$Zlh,$Zhl,lsl#28
165 eor $Zhl,$Thl,$Zhl,lsr#4
166 eor $Zhl,$Zhl,$Zhh,lsl#28
167 eor $Zhh,$Thh,$Zhh,lsr#4
168 eor $nlo,$nlo,$nhi
169 and $nhi,$nlo,#0xf0
170 and $nlo,$nlo,#0x0f
171 eor $Zhh,$Zhh,$Tll,lsl#16
172
173.Linner:
174 add $Thh,$Htbl,$nlo,lsl#4
175 and $nlo,$Zll,#0xf @ rem
176 subs $cnt,$cnt,#1
177 add $nlo,$nlo,$nlo
178 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo]
179 eor $Zll,$Tll,$Zll,lsr#4
180 eor $Zll,$Zll,$Zlh,lsl#28
181 eor $Zlh,$Tlh,$Zlh,lsr#4
182 eor $Zlh,$Zlh,$Zhl,lsl#28
183 ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
184 eor $Zhl,$Thl,$Zhl,lsr#4
185 ldrplb $nlo,[$inp,$cnt]
186 eor $Zhl,$Zhl,$Zhh,lsl#28
187 eor $Zhh,$Thh,$Zhh,lsr#4
188
189 add $Thh,$Htbl,$nhi
190 and $nhi,$Zll,#0xf @ rem
191 eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
192 add $nhi,$nhi,$nhi
193 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
194 eor $Zll,$Tll,$Zll,lsr#4
195 ldrplb $Tll,[$Xi,$cnt]
196 eor $Zll,$Zll,$Zlh,lsl#28
197 eor $Zlh,$Tlh,$Zlh,lsr#4
198 ldrh $Tlh,[sp,$nhi]
199 eor $Zlh,$Zlh,$Zhl,lsl#28
200 eor $Zhl,$Thl,$Zhl,lsr#4
201 eor $Zhl,$Zhl,$Zhh,lsl#28
202 eorpl $nlo,$nlo,$Tll
203 eor $Zhh,$Thh,$Zhh,lsr#4
204 andpl $nhi,$nlo,#0xf0
205 andpl $nlo,$nlo,#0x0f
206 eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
207 bpl .Linner
208
209 ldr $len,[sp,#32] @ re-load $len/end
210 add $inp,$inp,#16
211 mov $nhi,$Zll
212___
213 &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
214$code.=<<___;
215 bne .Louter
216
217 add sp,sp,#36
218#if __ARM_ARCH__>=5
219 ldmia sp!,{r4-r11,pc}
220#else
221 ldmia sp!,{r4-r11,lr}
222 tst lr,#1
223 moveq pc,lr @ be binary compatible with V4, yet
224 bx lr @ interoperable with Thumb ISA:-)
225#endif
226.size gcm_ghash_4bit,.-gcm_ghash_4bit
227
228.global gcm_gmult_4bit
229.type gcm_gmult_4bit,%function
230gcm_gmult_4bit:
231 stmdb sp!,{r4-r11,lr}
232 ldrb $nlo,[$Xi,#15]
233 b rem_4bit_get
234.Lrem_4bit_got:
235 and $nhi,$nlo,#0xf0
236 and $nlo,$nlo,#0x0f
237 mov $cnt,#14
238
239 add $Zhh,$Htbl,$nlo,lsl#4
240 ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo]
241 ldrb $nlo,[$Xi,#14]
242
243 add $Thh,$Htbl,$nhi
244 and $nhi,$Zll,#0xf @ rem
245 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
246 add $nhi,$nhi,$nhi
247 eor $Zll,$Tll,$Zll,lsr#4
248 ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem]
249 eor $Zll,$Zll,$Zlh,lsl#28
250 eor $Zlh,$Tlh,$Zlh,lsr#4
251 eor $Zlh,$Zlh,$Zhl,lsl#28
252 eor $Zhl,$Thl,$Zhl,lsr#4
253 eor $Zhl,$Zhl,$Zhh,lsl#28
254 eor $Zhh,$Thh,$Zhh,lsr#4
255 and $nhi,$nlo,#0xf0
256 eor $Zhh,$Zhh,$Tll,lsl#16
257 and $nlo,$nlo,#0x0f
258
259.Loop:
260 add $Thh,$Htbl,$nlo,lsl#4
261 and $nlo,$Zll,#0xf @ rem
262 subs $cnt,$cnt,#1
263 add $nlo,$nlo,$nlo
264 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo]
265 eor $Zll,$Tll,$Zll,lsr#4
266 eor $Zll,$Zll,$Zlh,lsl#28
267 eor $Zlh,$Tlh,$Zlh,lsr#4
268 eor $Zlh,$Zlh,$Zhl,lsl#28
269 ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
270 eor $Zhl,$Thl,$Zhl,lsr#4
271 ldrplb $nlo,[$Xi,$cnt]
272 eor $Zhl,$Zhl,$Zhh,lsl#28
273 eor $Zhh,$Thh,$Zhh,lsr#4
274
275 add $Thh,$Htbl,$nhi
276 and $nhi,$Zll,#0xf @ rem
277 eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
278 add $nhi,$nhi,$nhi
279 ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
280 eor $Zll,$Tll,$Zll,lsr#4
281 eor $Zll,$Zll,$Zlh,lsl#28
282 eor $Zlh,$Tlh,$Zlh,lsr#4
283 ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem]
284 eor $Zlh,$Zlh,$Zhl,lsl#28
285 eor $Zhl,$Thl,$Zhl,lsr#4
286 eor $Zhl,$Zhl,$Zhh,lsl#28
287 eor $Zhh,$Thh,$Zhh,lsr#4
288 andpl $nhi,$nlo,#0xf0
289 andpl $nlo,$nlo,#0x0f
290 eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
291 bpl .Loop
292___
293 &Zsmash();
294$code.=<<___;
295#if __ARM_ARCH__>=5
296 ldmia sp!,{r4-r11,pc}
297#else
298 ldmia sp!,{r4-r11,lr}
299 tst lr,#1
300 moveq pc,lr @ be binary compatible with V4, yet
301 bx lr @ interoperable with Thumb ISA:-)
302#endif
303.size gcm_gmult_4bit,.-gcm_gmult_4bit
304___
305{
306my $cnt=$Htbl; # $Htbl is used once in the very beginning
307
308my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7));
309my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15));
310
311# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit
312# in Zo. Or should I say "top bit", because GHASH is specified in
313# reverse bit order? Otherwise straightforward 128-bt H by one input
314# byte multiplication and modulo-reduction, times 16.
315
316sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
317sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
318sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
319
320$code.=<<___;
321#if __ARM_ARCH__>=7
322.fpu neon
323
324.global gcm_gmult_neon
325.type gcm_gmult_neon,%function
326.align 4
327gcm_gmult_neon:
328 sub $Htbl,#16 @ point at H in GCM128_CTX
329 vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi
330 vmov.i32 $mod,#0xe1 @ our irreducible polynomial
331 vld1.64 `&Dlo("$IN")`,[$Xi,:64]!
332 vshr.u64 $mod,#32
333 vldmia $Htbl,{$Hhi-$Hlo} @ load H
334 veor $zero,$zero
335#ifdef __ARMEL__
336 vrev64.8 $IN,$IN
337#endif
338 veor $Qpost,$Qpost
339 veor $R,$R
340 mov $cnt,#16
341 veor $Z,$Z
342 mov $len,#16
343 veor $Zo,$Zo
344 vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
345 b .Linner_neon
346.size gcm_gmult_neon,.-gcm_gmult_neon
347
348.global gcm_ghash_neon
349.type gcm_ghash_neon,%function
350.align 4
351gcm_ghash_neon:
352 vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi
353 vmov.i32 $mod,#0xe1 @ our irreducible polynomial
354 vld1.64 `&Dlo("$Z")`,[$Xi,:64]!
355 vshr.u64 $mod,#32
356 vldmia $Xi,{$Hhi-$Hlo} @ load H
357 veor $zero,$zero
358 nop
359#ifdef __ARMEL__
360 vrev64.8 $Z,$Z
361#endif
362.Louter_neon:
363 vld1.64 `&Dhi($IN)`,[$inp]! @ load inp
364 veor $Qpost,$Qpost
365 vld1.64 `&Dlo($IN)`,[$inp]!
366 veor $R,$R
367 mov $cnt,#16
368#ifdef __ARMEL__
369 vrev64.8 $IN,$IN
370#endif
371 veor $Zo,$Zo
372 veor $IN,$Z @ inp^=Xi
373 veor $Z,$Z
374 vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
375.Linner_neon:
376 subs $cnt,$cnt,#1
377 vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i]
378 vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i]
379 vext.8 $IN,$zero,#1 @ IN>>=8
380
381 veor $Z,$Qpost @ modulo-scheduled part
382 vshl.i64 `&Dlo("$R")`,#48
383 vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte
384 veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")`
385
386 veor `&Dhi("$Z")`,`&Dlo("$R")`
387 vuzp.8 $Qlo,$Qhi
388 vsli.8 $Zo,$T,#1 @ compose the "carry" byte
389 vext.8 $Z,$zero,#1 @ Z>>=8
390
391 vmull.p8 $R,$Zo,$mod @ "carry"·0xe1
392 vshr.u8 $Zo,$T,#7 @ save Z's bottom bit
393 vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8
394 veor $Z,$Qhi
395 bne .Linner_neon
396
397 veor $Z,$Qpost @ modulo-scheduled artefact
398 vshl.i64 `&Dlo("$R")`,#48
399 veor `&Dhi("$Z")`,`&Dlo("$R")`
400
401 @ finalization, normalize Z:Zo
402 vand $Zo,$mod @ suffices to mask the bit
403 vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63
404 vshl.i64 $Z,#1
405 subs $len,#16
406 vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1
407 bne .Louter_neon
408
409#ifdef __ARMEL__
410 vrev64.8 $Z,$Z
411#endif
412 sub $Xi,#16
413 vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi
414 vst1.64 `&Dlo("$Z")`,[$Xi,:64]
415
416 bx lr
417.size gcm_ghash_neon,.-gcm_ghash_neon
418#endif
419___
420}
421$code.=<<___;
422.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
423.align 2
424___
425
426$code =~ s/\`([^\`]*)\`/eval $1/gem;
427$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
428print $code;
429close STDOUT; # enforce flush
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-ia64.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-ia64.pl
new file mode 100755
index 0000000000..0354c95444
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-ia64.pl
@@ -0,0 +1,463 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# March 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. Streamed
15# GHASH performance was measured to be 6.67 cycles per processed byte
16# on Itanium 2, which is >90% better than Microsoft compiler generated
17# code. To anchor to something else sha1-ia64.pl module processes one
18# byte in 5.7 cycles. On Itanium GHASH should run at ~8.5 cycles per
19# byte.
20
21# September 2010
22#
23# It was originally thought that it makes lesser sense to implement
24# "528B" variant on Itanium 2 for following reason. Because number of
25# functional units is naturally limited, it appeared impossible to
26# implement "528B" loop in 4 cycles, only in 5. This would mean that
27# theoretically performance improvement couldn't be more than 20%.
28# But occasionally you prove yourself wrong:-) I figured out a way to
29# fold couple of instructions and having freed yet another instruction
30# slot by unrolling the loop... Resulting performance is 4.45 cycles
31# per processed byte and 50% better than "256B" version. On original
32# Itanium performance should remain the same as the "256B" version,
33# i.e. ~8.5 cycles.
34
35$output=shift and (open STDOUT,">$output" or die "can't open $output: $!");
36
37if ($^O eq "hpux") {
38 $ADDP="addp4";
39 for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
40} else { $ADDP="add"; }
41for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/);
42 $big_endian=0 if (/\-DL_ENDIAN/); }
43if (!defined($big_endian))
44 { $big_endian=(unpack('L',pack('N',1))==1); }
45
46sub loop() {
47my $label=shift;
48my ($p16,$p17)=(shift)?("p63","p63"):("p16","p17"); # mask references to inp
49
50# Loop is scheduled for 6 ticks on Itanium 2 and 8 on Itanium, i.e.
51# in scalable manner;-) Naturally assuming data in L1 cache...
52# Special note about 'dep' instruction, which is used to construct
53# &rem_4bit[Zlo&0xf]. It works, because rem_4bit is aligned at 128
54# bytes boundary and lower 7 bits of its address are guaranteed to
55# be zero.
56$code.=<<___;
57$label:
58{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8
59 (p19) dep rem=Zlo,rem_4bitp,3,4 }
60{ .mfi; (p19) xor Zhi=Zhi,Hhi
61 ($p17) xor xi[1]=xi[1],in[1] };;
62{ .mfi; (p18) ld8 Hhi=[Hi[1]]
63 (p19) shrp Zlo=Zhi,Zlo,4 }
64{ .mfi; (p19) ld8 rem=[rem]
65 (p18) and Hi[1]=mask0xf0,xi[2] };;
66{ .mmi; ($p16) ld1 in[0]=[inp],-1
67 (p18) xor Zlo=Zlo,Hlo
68 (p19) shr.u Zhi=Zhi,4 }
69{ .mib; (p19) xor Hhi=Hhi,rem
70 (p18) add Hi[1]=Htbl,Hi[1] };;
71
72{ .mfi; (p18) ld8 Hlo=[Hi[1]],-8
73 (p18) dep rem=Zlo,rem_4bitp,3,4 }
74{ .mfi; (p17) shladd Hi[0]=xi[1],4,r0
75 (p18) xor Zhi=Zhi,Hhi };;
76{ .mfi; (p18) ld8 Hhi=[Hi[1]]
77 (p18) shrp Zlo=Zhi,Zlo,4 }
78{ .mfi; (p18) ld8 rem=[rem]
79 (p17) and Hi[0]=mask0xf0,Hi[0] };;
80{ .mmi; (p16) ld1 xi[0]=[Xi],-1
81 (p18) xor Zlo=Zlo,Hlo
82 (p18) shr.u Zhi=Zhi,4 }
83{ .mib; (p18) xor Hhi=Hhi,rem
84 (p17) add Hi[0]=Htbl,Hi[0]
85 br.ctop.sptk $label };;
86___
87}
88
89$code=<<___;
90.explicit
91.text
92
93prevfs=r2; prevlc=r3; prevpr=r8;
94mask0xf0=r21;
95rem=r22; rem_4bitp=r23;
96Xi=r24; Htbl=r25;
97inp=r26; end=r27;
98Hhi=r28; Hlo=r29;
99Zhi=r30; Zlo=r31;
100
101.align 128
102.skip 16 // aligns loop body
103.global gcm_gmult_4bit#
104.proc gcm_gmult_4bit#
105gcm_gmult_4bit:
106 .prologue
107{ .mmi; .save ar.pfs,prevfs
108 alloc prevfs=ar.pfs,2,6,0,8
109 $ADDP Xi=15,in0 // &Xi[15]
110 mov rem_4bitp=ip }
111{ .mii; $ADDP Htbl=8,in1 // &Htbl[0].lo
112 .save ar.lc,prevlc
113 mov prevlc=ar.lc
114 .save pr,prevpr
115 mov prevpr=pr };;
116
117 .body
118 .rotr in[3],xi[3],Hi[2]
119
120{ .mib; ld1 xi[2]=[Xi],-1 // Xi[15]
121 mov mask0xf0=0xf0
122 brp.loop.imp .Loop1,.Lend1-16};;
123{ .mmi; ld1 xi[1]=[Xi],-1 // Xi[14]
124 };;
125{ .mii; shladd Hi[1]=xi[2],4,r0
126 mov pr.rot=0x7<<16
127 mov ar.lc=13 };;
128{ .mii; and Hi[1]=mask0xf0,Hi[1]
129 mov ar.ec=3
130 xor Zlo=Zlo,Zlo };;
131{ .mii; add Hi[1]=Htbl,Hi[1] // &Htbl[nlo].lo
132 add rem_4bitp=rem_4bit#-gcm_gmult_4bit#,rem_4bitp
133 xor Zhi=Zhi,Zhi };;
134___
135 &loop (".Loop1",1);
136$code.=<<___;
137.Lend1:
138{ .mib; xor Zhi=Zhi,Hhi };; // modulo-scheduling artefact
139{ .mib; mux1 Zlo=Zlo,\@rev };;
140{ .mib; mux1 Zhi=Zhi,\@rev };;
141{ .mmi; add Hlo=9,Xi;; // ;; is here to prevent
142 add Hhi=1,Xi };; // pipeline flush on Itanium
143{ .mib; st8 [Hlo]=Zlo
144 mov pr=prevpr,0x1ffff };;
145{ .mib; st8 [Hhi]=Zhi
146 mov ar.lc=prevlc
147 br.ret.sptk.many b0 };;
148.endp gcm_gmult_4bit#
149___
150
151######################################################################
152# "528B" (well, "512B" actualy) streamed GHASH
153#
154$Xip="in0";
155$Htbl="in1";
156$inp="in2";
157$len="in3";
158$rem_8bit="loc0";
159$mask0xff="loc1";
160($sum,$rum) = $big_endian ? ("nop.m","nop.m") : ("sum","rum");
161
162sub load_htable() {
163 for (my $i=0;$i<8;$i++) {
164 $code.=<<___;
165{ .mmi; ld8 r`16+2*$i+1`=[r8],16 // Htable[$i].hi
166 ld8 r`16+2*$i`=[r9],16 } // Htable[$i].lo
167{ .mmi; ldf8 f`32+2*$i+1`=[r10],16 // Htable[`8+$i`].hi
168 ldf8 f`32+2*$i`=[r11],16 // Htable[`8+$i`].lo
169___
170 $code.=shift if (($i+$#_)==7);
171 $code.="\t};;\n"
172 }
173}
174
175$code.=<<___;
176prevsp=r3;
177
178.align 32
179.skip 16 // aligns loop body
180.global gcm_ghash_4bit#
181.proc gcm_ghash_4bit#
182gcm_ghash_4bit:
183 .prologue
184{ .mmi; .save ar.pfs,prevfs
185 alloc prevfs=ar.pfs,4,2,0,0
186 .vframe prevsp
187 mov prevsp=sp
188 mov $rem_8bit=ip };;
189 .body
190{ .mfi; $ADDP r8=0+0,$Htbl
191 $ADDP r9=0+8,$Htbl }
192{ .mfi; $ADDP r10=128+0,$Htbl
193 $ADDP r11=128+8,$Htbl };;
194___
195 &load_htable(
196 " $ADDP $Xip=15,$Xip", # &Xi[15]
197 " $ADDP $len=$len,$inp", # &inp[len]
198 " $ADDP $inp=15,$inp", # &inp[15]
199 " mov $mask0xff=0xff",
200 " add sp=-512,sp",
201 " andcm sp=sp,$mask0xff", # align stack frame
202 " add r14=0,sp",
203 " add r15=8,sp");
204$code.=<<___;
205{ .mmi; $sum 1<<1 // go big-endian
206 add r8=256+0,sp
207 add r9=256+8,sp }
208{ .mmi; add r10=256+128+0,sp
209 add r11=256+128+8,sp
210 add $len=-17,$len };;
211___
212for($i=0;$i<8;$i++) { # generate first half of Hshr4[]
213my ($rlo,$rhi)=("r".eval(16+2*$i),"r".eval(16+2*$i+1));
214$code.=<<___;
215{ .mmi; st8 [r8]=$rlo,16 // Htable[$i].lo
216 st8 [r9]=$rhi,16 // Htable[$i].hi
217 shrp $rlo=$rhi,$rlo,4 }//;;
218{ .mmi; stf8 [r10]=f`32+2*$i`,16 // Htable[`8+$i`].lo
219 stf8 [r11]=f`32+2*$i+1`,16 // Htable[`8+$i`].hi
220 shr.u $rhi=$rhi,4 };;
221{ .mmi; st8 [r14]=$rlo,16 // Htable[$i].lo>>4
222 st8 [r15]=$rhi,16 }//;; // Htable[$i].hi>>4
223___
224}
225$code.=<<___;
226{ .mmi; ld8 r16=[r8],16 // Htable[8].lo
227 ld8 r17=[r9],16 };; // Htable[8].hi
228{ .mmi; ld8 r18=[r8],16 // Htable[9].lo
229 ld8 r19=[r9],16 } // Htable[9].hi
230{ .mmi; rum 1<<5 // clear um.mfh
231 shrp r16=r17,r16,4 };;
232___
233for($i=0;$i<6;$i++) { # generate second half of Hshr4[]
234$code.=<<___;
235{ .mmi; ld8 r`20+2*$i`=[r8],16 // Htable[`10+$i`].lo
236 ld8 r`20+2*$i+1`=[r9],16 // Htable[`10+$i`].hi
237 shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };;
238{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4
239 st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4
240 shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 }
241___
242}
243$code.=<<___;
244{ .mmi; shr.u r`16+2*$i+1`=r`16+2*$i+1`,4 };;
245{ .mmi; st8 [r14]=r`16+2*$i`,16 // Htable[`8+$i`].lo>>4
246 st8 [r15]=r`16+2*$i+1`,16 // Htable[`8+$i`].hi>>4
247 shrp r`18+2*$i`=r`18+2*$i+1`,r`18+2*$i`,4 }
248{ .mmi; add $Htbl=256,sp // &Htable[0]
249 add $rem_8bit=rem_8bit#-gcm_ghash_4bit#,$rem_8bit
250 shr.u r`18+2*$i+1`=r`18+2*$i+1`,4 };;
251{ .mmi; st8 [r14]=r`18+2*$i` // Htable[`8+$i`].lo>>4
252 st8 [r15]=r`18+2*$i+1` } // Htable[`8+$i`].hi>>4
253___
254
255$in="r15";
256@xi=("r16","r17");
257@rem=("r18","r19");
258($Alo,$Ahi,$Blo,$Bhi,$Zlo,$Zhi)=("r20","r21","r22","r23","r24","r25");
259($Atbl,$Btbl)=("r26","r27");
260
261$code.=<<___; # (p16)
262{ .mmi; ld1 $in=[$inp],-1 //(p16) *inp--
263 ld1 $xi[0]=[$Xip],-1 //(p16) *Xi--
264 cmp.eq p0,p6=r0,r0 };; // clear p6
265___
266push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
267
268$code.=<<___; # (p16),(p17)
269{ .mmi; ld1 $xi[0]=[$Xip],-1 //(p16) *Xi--
270 xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i]
271{ .mii; ld1 $in=[$inp],-1 //(p16) *inp--
272 dep $Atbl=$xi[1],$Htbl,4,4 //(p17) &Htable[nlo].lo
273 and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0
274.align 32
275.LOOP:
276{ .mmi;
277(p6) st8 [$Xip]=$Zhi,13
278 xor $Zlo=$Zlo,$Zlo
279 add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi].lo
280___
281push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
282
283$code.=<<___; # (p16),(p17),(p18)
284{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi
285 ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo
286 xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i]
287{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi
288 dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo
289{ .mfi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4
290 xor $Zlo=$Zlo,$Alo };; //(p18) Z.lo^=Htable[nlo].lo
291{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi
292 ld1 $in=[$inp],-1 } //(p16) *inp--
293{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4)
294 mov $Zhi=$Ahi //(p18) Z.hi^=Htable[nlo].hi
295 and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0
296{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi
297 ld1 $xi[0]=[$Xip],-1 //(p16) *Xi--
298 shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8)
299{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff
300 add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi]
301___
302push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
303
304for ($i=1;$i<14;$i++) {
305# Above and below fragments are derived from this one by removing
306# unsuitable (p??) instructions.
307$code.=<<___; # (p16),(p17),(p18),(p19)
308{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi
309 ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo
310 shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8
311{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem]
312 xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo
313 xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i]
314{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi
315 ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem]
316 dep $Atbl=$xi[1],$Htbl,4,4 } //(p17) &Htable[nlo].lo
317{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4
318 xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo
319 xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi
320{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi
321 ld1 $in=[$inp],-1 //(p16) *inp--
322 shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48
323{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4)
324 xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi
325 and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0
326{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi
327 ld1 $xi[0]=[$Xip],-1 //(p16) *Xi--
328 shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8)
329{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff
330 xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48
331 add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi]
332___
333push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
334}
335
336$code.=<<___; # (p17),(p18),(p19)
337{ .mmi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi
338 ld8 $rem[0]=[$Btbl],-256 //(p18) Htable[nhi].lo,&Hshr4[nhi].lo
339 shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8
340{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem]
341 xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo
342 xor $xi[1]=$xi[1],$in };; //(p17) xi=$xi[i]^inp[i]
343{ .mmi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi
344 ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem]
345 dep $Atbl=$xi[1],$Htbl,4,4 };; //(p17) &Htable[nlo].lo
346{ .mmi; shladd $rem[0]=$rem[0],4,r0 //(p18) Htable[nhi].lo<<4
347 xor $Zlo=$Zlo,$Alo //(p18) Z.lo^=Htable[nlo].lo
348 xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi
349{ .mmi; ld8 $Blo=[$Btbl],8 //(p18) Hshr4[nhi].lo,&Hshr4[nhi].hi
350 shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48
351{ .mmi; xor $rem[0]=$rem[0],$Zlo //(p18) Z.lo^(Htable[nhi].lo<<4)
352 xor $Zhi=$Zhi,$Ahi //(p18) Z.hi^=Htable[nlo].hi
353 and $xi[1]=-16,$xi[1] };; //(p17) nhi=xi&0xf0
354{ .mmi; ld8 $Bhi=[$Btbl] //(p18) Hshr4[nhi].hi
355 shrp $Zlo=$Zhi,$Zlo,8 } //(p18) Z.lo=(Z.hi<<56)|(Z.lo>>8)
356{ .mmi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff
357 xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48
358 add $Btbl=$xi[1],$Htbl };; //(p17) &Htable[nhi]
359___
360push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
361
362$code.=<<___; # (p18),(p19)
363{ .mfi; ld8 $Alo=[$Atbl],8 //(p18) Htable[nlo].lo,&Htable[nlo].hi
364 shr.u $Zhi=$Zhi,8 } //(p19) Z.hi>>=8
365{ .mfi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem]
366 xor $Zlo=$Zlo,$Blo };; //(p19) Z.lo^=Hshr4[nhi].lo
367{ .mfi; ld8 $Ahi=[$Atbl] //(p18) Htable[nlo].hi
368 xor $Zlo=$Zlo,$Alo } //(p18) Z.lo^=Htable[nlo].lo
369{ .mfi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem]
370 xor $Zhi=$Zhi,$Bhi };; //(p19) Z.hi^=Hshr4[nhi].hi
371{ .mfi; ld8 $Blo=[$Btbl],8 //(p18) Htable[nhi].lo,&Htable[nhi].hi
372 shl $rem[1]=$rem[1],48 } //(p19) rem_8bit[rem]<<48
373{ .mfi; shladd $rem[0]=$Zlo,4,r0 //(p18) Z.lo<<4
374 xor $Zhi=$Zhi,$Ahi };; //(p18) Z.hi^=Htable[nlo].hi
375{ .mfi; ld8 $Bhi=[$Btbl] //(p18) Htable[nhi].hi
376 shrp $Zlo=$Zhi,$Zlo,4 } //(p18) Z.lo=(Z.hi<<60)|(Z.lo>>4)
377{ .mfi; and $rem[0]=$rem[0],$mask0xff //(p18) rem=($Zlo^(Htable[nhi].lo<<4))&0xff
378 xor $Zhi=$Zhi,$rem[1] };; //(p19) Z.hi^=rem_8bit[rem]<<48
379___
380push (@xi,shift(@xi)); push (@rem,shift(@rem)); # "rotate" registers
381
382$code.=<<___; # (p19)
383{ .mmi; cmp.ltu p6,p0=$inp,$len
384 add $inp=32,$inp
385 shr.u $Zhi=$Zhi,4 } //(p19) Z.hi>>=4
386{ .mmi; shladd $rem[1]=$rem[1],1,$rem_8bit //(p19) &rem_8bit[rem]
387 xor $Zlo=$Zlo,$Blo //(p19) Z.lo^=Hshr4[nhi].lo
388 add $Xip=9,$Xip };; // &Xi.lo
389{ .mmi; ld2 $rem[1]=[$rem[1]] //(p19) rem_8bit[rem]
390(p6) ld1 $in=[$inp],-1 //[p16] *inp--
391(p6) extr.u $xi[1]=$Zlo,8,8 } //[p17] Xi[14]
392{ .mmi; xor $Zhi=$Zhi,$Bhi //(p19) Z.hi^=Hshr4[nhi].hi
393(p6) and $xi[0]=$Zlo,$mask0xff };; //[p16] Xi[15]
394{ .mmi; st8 [$Xip]=$Zlo,-8
395(p6) xor $xi[0]=$xi[0],$in //[p17] xi=$xi[i]^inp[i]
396 shl $rem[1]=$rem[1],48 };; //(p19) rem_8bit[rem]<<48
397{ .mmi;
398(p6) ld1 $in=[$inp],-1 //[p16] *inp--
399 xor $Zhi=$Zhi,$rem[1] //(p19) Z.hi^=rem_8bit[rem]<<48
400(p6) dep $Atbl=$xi[0],$Htbl,4,4 } //[p17] &Htable[nlo].lo
401{ .mib;
402(p6) and $xi[0]=-16,$xi[0] //[p17] nhi=xi&0xf0
403(p6) br.cond.dptk.many .LOOP };;
404
405{ .mib; st8 [$Xip]=$Zhi };;
406{ .mib; $rum 1<<1 // return to little-endian
407 .restore sp
408 mov sp=prevsp
409 br.ret.sptk.many b0 };;
410.endp gcm_ghash_4bit#
411___
412$code.=<<___;
413.align 128
414.type rem_4bit#,\@object
415rem_4bit:
416 data8 0x0000<<48, 0x1C20<<48, 0x3840<<48, 0x2460<<48
417 data8 0x7080<<48, 0x6CA0<<48, 0x48C0<<48, 0x54E0<<48
418 data8 0xE100<<48, 0xFD20<<48, 0xD940<<48, 0xC560<<48
419 data8 0x9180<<48, 0x8DA0<<48, 0xA9C0<<48, 0xB5E0<<48
420.size rem_4bit#,128
421.type rem_8bit#,\@object
422rem_8bit:
423 data1 0x00,0x00, 0x01,0xC2, 0x03,0x84, 0x02,0x46, 0x07,0x08, 0x06,0xCA, 0x04,0x8C, 0x05,0x4E
424 data1 0x0E,0x10, 0x0F,0xD2, 0x0D,0x94, 0x0C,0x56, 0x09,0x18, 0x08,0xDA, 0x0A,0x9C, 0x0B,0x5E
425 data1 0x1C,0x20, 0x1D,0xE2, 0x1F,0xA4, 0x1E,0x66, 0x1B,0x28, 0x1A,0xEA, 0x18,0xAC, 0x19,0x6E
426 data1 0x12,0x30, 0x13,0xF2, 0x11,0xB4, 0x10,0x76, 0x15,0x38, 0x14,0xFA, 0x16,0xBC, 0x17,0x7E
427 data1 0x38,0x40, 0x39,0x82, 0x3B,0xC4, 0x3A,0x06, 0x3F,0x48, 0x3E,0x8A, 0x3C,0xCC, 0x3D,0x0E
428 data1 0x36,0x50, 0x37,0x92, 0x35,0xD4, 0x34,0x16, 0x31,0x58, 0x30,0x9A, 0x32,0xDC, 0x33,0x1E
429 data1 0x24,0x60, 0x25,0xA2, 0x27,0xE4, 0x26,0x26, 0x23,0x68, 0x22,0xAA, 0x20,0xEC, 0x21,0x2E
430 data1 0x2A,0x70, 0x2B,0xB2, 0x29,0xF4, 0x28,0x36, 0x2D,0x78, 0x2C,0xBA, 0x2E,0xFC, 0x2F,0x3E
431 data1 0x70,0x80, 0x71,0x42, 0x73,0x04, 0x72,0xC6, 0x77,0x88, 0x76,0x4A, 0x74,0x0C, 0x75,0xCE
432 data1 0x7E,0x90, 0x7F,0x52, 0x7D,0x14, 0x7C,0xD6, 0x79,0x98, 0x78,0x5A, 0x7A,0x1C, 0x7B,0xDE
433 data1 0x6C,0xA0, 0x6D,0x62, 0x6F,0x24, 0x6E,0xE6, 0x6B,0xA8, 0x6A,0x6A, 0x68,0x2C, 0x69,0xEE
434 data1 0x62,0xB0, 0x63,0x72, 0x61,0x34, 0x60,0xF6, 0x65,0xB8, 0x64,0x7A, 0x66,0x3C, 0x67,0xFE
435 data1 0x48,0xC0, 0x49,0x02, 0x4B,0x44, 0x4A,0x86, 0x4F,0xC8, 0x4E,0x0A, 0x4C,0x4C, 0x4D,0x8E
436 data1 0x46,0xD0, 0x47,0x12, 0x45,0x54, 0x44,0x96, 0x41,0xD8, 0x40,0x1A, 0x42,0x5C, 0x43,0x9E
437 data1 0x54,0xE0, 0x55,0x22, 0x57,0x64, 0x56,0xA6, 0x53,0xE8, 0x52,0x2A, 0x50,0x6C, 0x51,0xAE
438 data1 0x5A,0xF0, 0x5B,0x32, 0x59,0x74, 0x58,0xB6, 0x5D,0xF8, 0x5C,0x3A, 0x5E,0x7C, 0x5F,0xBE
439 data1 0xE1,0x00, 0xE0,0xC2, 0xE2,0x84, 0xE3,0x46, 0xE6,0x08, 0xE7,0xCA, 0xE5,0x8C, 0xE4,0x4E
440 data1 0xEF,0x10, 0xEE,0xD2, 0xEC,0x94, 0xED,0x56, 0xE8,0x18, 0xE9,0xDA, 0xEB,0x9C, 0xEA,0x5E
441 data1 0xFD,0x20, 0xFC,0xE2, 0xFE,0xA4, 0xFF,0x66, 0xFA,0x28, 0xFB,0xEA, 0xF9,0xAC, 0xF8,0x6E
442 data1 0xF3,0x30, 0xF2,0xF2, 0xF0,0xB4, 0xF1,0x76, 0xF4,0x38, 0xF5,0xFA, 0xF7,0xBC, 0xF6,0x7E
443 data1 0xD9,0x40, 0xD8,0x82, 0xDA,0xC4, 0xDB,0x06, 0xDE,0x48, 0xDF,0x8A, 0xDD,0xCC, 0xDC,0x0E
444 data1 0xD7,0x50, 0xD6,0x92, 0xD4,0xD4, 0xD5,0x16, 0xD0,0x58, 0xD1,0x9A, 0xD3,0xDC, 0xD2,0x1E
445 data1 0xC5,0x60, 0xC4,0xA2, 0xC6,0xE4, 0xC7,0x26, 0xC2,0x68, 0xC3,0xAA, 0xC1,0xEC, 0xC0,0x2E
446 data1 0xCB,0x70, 0xCA,0xB2, 0xC8,0xF4, 0xC9,0x36, 0xCC,0x78, 0xCD,0xBA, 0xCF,0xFC, 0xCE,0x3E
447 data1 0x91,0x80, 0x90,0x42, 0x92,0x04, 0x93,0xC6, 0x96,0x88, 0x97,0x4A, 0x95,0x0C, 0x94,0xCE
448 data1 0x9F,0x90, 0x9E,0x52, 0x9C,0x14, 0x9D,0xD6, 0x98,0x98, 0x99,0x5A, 0x9B,0x1C, 0x9A,0xDE
449 data1 0x8D,0xA0, 0x8C,0x62, 0x8E,0x24, 0x8F,0xE6, 0x8A,0xA8, 0x8B,0x6A, 0x89,0x2C, 0x88,0xEE
450 data1 0x83,0xB0, 0x82,0x72, 0x80,0x34, 0x81,0xF6, 0x84,0xB8, 0x85,0x7A, 0x87,0x3C, 0x86,0xFE
451 data1 0xA9,0xC0, 0xA8,0x02, 0xAA,0x44, 0xAB,0x86, 0xAE,0xC8, 0xAF,0x0A, 0xAD,0x4C, 0xAC,0x8E
452 data1 0xA7,0xD0, 0xA6,0x12, 0xA4,0x54, 0xA5,0x96, 0xA0,0xD8, 0xA1,0x1A, 0xA3,0x5C, 0xA2,0x9E
453 data1 0xB5,0xE0, 0xB4,0x22, 0xB6,0x64, 0xB7,0xA6, 0xB2,0xE8, 0xB3,0x2A, 0xB1,0x6C, 0xB0,0xAE
454 data1 0xBB,0xF0, 0xBA,0x32, 0xB8,0x74, 0xB9,0xB6, 0xBC,0xF8, 0xBD,0x3A, 0xBF,0x7C, 0xBE,0xBE
455.size rem_8bit#,512
456stringz "GHASH for IA64, CRYPTOGAMS by <appro\@openssl.org>"
457___
458
459$code =~ s/mux1(\s+)\S+\@rev/nop.i$1 0x0/gm if ($big_endian);
460$code =~ s/\`([^\`]*)\`/eval $1/gem;
461
462print $code;
463close STDOUT;
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl
new file mode 100644
index 0000000000..8c7454ee93
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-parisc.pl
@@ -0,0 +1,730 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# April 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
15# it processes one byte in 19.6 cycles, which is more than twice as
16# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
17# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
18# processed byte. This is ~2.2x faster than 64-bit code generated by
19# vendor compiler (which used to be very hard to beat:-).
20#
21# Special thanks to polarhome.com for providing HP-UX account.
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36 $NREGS =6;
37} else {
38 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
39 $SIZE_T =4;
40 $FRAME_MARKER =48;
41 $SAVED_RP =20;
42 $PUSH ="stw";
43 $PUSHMA ="stwm";
44 $POP ="ldw";
45 $POPMB ="ldwm";
46 $NREGS =11;
47}
48
49$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
50 # [+ argument transfer]
51
52################# volatile registers
53$Xi="%r26"; # argument block
54$Htbl="%r25";
55$inp="%r24";
56$len="%r23";
57$Hhh=$Htbl; # variables
58$Hll="%r22";
59$Zhh="%r21";
60$Zll="%r20";
61$cnt="%r19";
62$rem_4bit="%r28";
63$rem="%r29";
64$mask0xf0="%r31";
65
66################# preserved registers
67$Thh="%r1";
68$Tll="%r2";
69$nlo="%r3";
70$nhi="%r4";
71$byte="%r5";
72if ($SIZE_T==4) {
73 $Zhl="%r6";
74 $Zlh="%r7";
75 $Hhl="%r8";
76 $Hlh="%r9";
77 $Thl="%r10";
78 $Tlh="%r11";
79}
80$rem2="%r6"; # used in PA-RISC 2.0 code
81
82$code.=<<___;
83 .LEVEL $LEVEL
84 .SPACE \$TEXT\$
85 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
86
87 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
88 .ALIGN 64
89gcm_gmult_4bit
90 .PROC
91 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
92 .ENTRY
93 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
94 $PUSHMA %r3,$FRAME(%sp)
95 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
96 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
97 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
98___
99$code.=<<___ if ($SIZE_T==4);
100 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
101 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
102 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
103 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
104 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
105___
106$code.=<<___;
107 blr %r0,$rem_4bit
108 ldi 3,$rem
109L\$pic_gmult
110 andcm $rem_4bit,$rem,$rem_4bit
111 addl $inp,$len,$len
112 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
113 ldi 0xf0,$mask0xf0
114___
115$code.=<<___ if ($SIZE_T==4);
116 ldi 31,$rem
117 mtctl $rem,%cr11
118 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
119 b L\$parisc1_gmult
120 nop
121___
122
123$code.=<<___;
124 ldb 15($Xi),$nlo
125 ldo 8($Htbl),$Hll
126
127 and $mask0xf0,$nlo,$nhi
128 depd,z $nlo,59,4,$nlo
129
130 ldd $nlo($Hll),$Zll
131 ldd $nlo($Hhh),$Zhh
132
133 depd,z $Zll,60,4,$rem
134 shrpd $Zhh,$Zll,4,$Zll
135 extrd,u $Zhh,59,60,$Zhh
136 ldb 14($Xi),$nlo
137
138 ldd $nhi($Hll),$Tll
139 ldd $nhi($Hhh),$Thh
140 and $mask0xf0,$nlo,$nhi
141 depd,z $nlo,59,4,$nlo
142
143 xor $Tll,$Zll,$Zll
144 xor $Thh,$Zhh,$Zhh
145 ldd $rem($rem_4bit),$rem
146 b L\$oop_gmult_pa2
147 ldi 13,$cnt
148
149 .ALIGN 8
150L\$oop_gmult_pa2
151 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
152 depd,z $Zll,60,4,$rem
153
154 shrpd $Zhh,$Zll,4,$Zll
155 extrd,u $Zhh,59,60,$Zhh
156 ldd $nlo($Hll),$Tll
157 ldd $nlo($Hhh),$Thh
158
159 xor $Tll,$Zll,$Zll
160 xor $Thh,$Zhh,$Zhh
161 ldd $rem($rem_4bit),$rem
162
163 xor $rem,$Zhh,$Zhh
164 depd,z $Zll,60,4,$rem
165 ldbx $cnt($Xi),$nlo
166
167 shrpd $Zhh,$Zll,4,$Zll
168 extrd,u $Zhh,59,60,$Zhh
169 ldd $nhi($Hll),$Tll
170 ldd $nhi($Hhh),$Thh
171
172 and $mask0xf0,$nlo,$nhi
173 depd,z $nlo,59,4,$nlo
174 ldd $rem($rem_4bit),$rem
175
176 xor $Tll,$Zll,$Zll
177 addib,uv -1,$cnt,L\$oop_gmult_pa2
178 xor $Thh,$Zhh,$Zhh
179
180 xor $rem,$Zhh,$Zhh
181 depd,z $Zll,60,4,$rem
182
183 shrpd $Zhh,$Zll,4,$Zll
184 extrd,u $Zhh,59,60,$Zhh
185 ldd $nlo($Hll),$Tll
186 ldd $nlo($Hhh),$Thh
187
188 xor $Tll,$Zll,$Zll
189 xor $Thh,$Zhh,$Zhh
190 ldd $rem($rem_4bit),$rem
191
192 xor $rem,$Zhh,$Zhh
193 depd,z $Zll,60,4,$rem
194
195 shrpd $Zhh,$Zll,4,$Zll
196 extrd,u $Zhh,59,60,$Zhh
197 ldd $nhi($Hll),$Tll
198 ldd $nhi($Hhh),$Thh
199
200 xor $Tll,$Zll,$Zll
201 xor $Thh,$Zhh,$Zhh
202 ldd $rem($rem_4bit),$rem
203
204 xor $rem,$Zhh,$Zhh
205 std $Zll,8($Xi)
206 std $Zhh,0($Xi)
207___
208
209$code.=<<___ if ($SIZE_T==4);
210 b L\$done_gmult
211 nop
212
213L\$parisc1_gmult
214 ldb 15($Xi),$nlo
215 ldo 12($Htbl),$Hll
216 ldo 8($Htbl),$Hlh
217 ldo 4($Htbl),$Hhl
218
219 and $mask0xf0,$nlo,$nhi
220 zdep $nlo,27,4,$nlo
221
222 ldwx $nlo($Hll),$Zll
223 ldwx $nlo($Hlh),$Zlh
224 ldwx $nlo($Hhl),$Zhl
225 ldwx $nlo($Hhh),$Zhh
226 zdep $Zll,28,4,$rem
227 ldb 14($Xi),$nlo
228 ldwx $rem($rem_4bit),$rem
229 shrpw $Zlh,$Zll,4,$Zll
230 ldwx $nhi($Hll),$Tll
231 shrpw $Zhl,$Zlh,4,$Zlh
232 ldwx $nhi($Hlh),$Tlh
233 shrpw $Zhh,$Zhl,4,$Zhl
234 ldwx $nhi($Hhl),$Thl
235 extru $Zhh,27,28,$Zhh
236 ldwx $nhi($Hhh),$Thh
237 xor $rem,$Zhh,$Zhh
238 and $mask0xf0,$nlo,$nhi
239 zdep $nlo,27,4,$nlo
240
241 xor $Tll,$Zll,$Zll
242 ldwx $nlo($Hll),$Tll
243 xor $Tlh,$Zlh,$Zlh
244 ldwx $nlo($Hlh),$Tlh
245 xor $Thl,$Zhl,$Zhl
246 b L\$oop_gmult_pa1
247 ldi 13,$cnt
248
249 .ALIGN 8
250L\$oop_gmult_pa1
251 zdep $Zll,28,4,$rem
252 ldwx $nlo($Hhl),$Thl
253 xor $Thh,$Zhh,$Zhh
254 ldwx $rem($rem_4bit),$rem
255 shrpw $Zlh,$Zll,4,$Zll
256 ldwx $nlo($Hhh),$Thh
257 shrpw $Zhl,$Zlh,4,$Zlh
258 ldbx $cnt($Xi),$nlo
259 xor $Tll,$Zll,$Zll
260 ldwx $nhi($Hll),$Tll
261 shrpw $Zhh,$Zhl,4,$Zhl
262 xor $Tlh,$Zlh,$Zlh
263 ldwx $nhi($Hlh),$Tlh
264 extru $Zhh,27,28,$Zhh
265 xor $Thl,$Zhl,$Zhl
266 ldwx $nhi($Hhl),$Thl
267 xor $rem,$Zhh,$Zhh
268 zdep $Zll,28,4,$rem
269 xor $Thh,$Zhh,$Zhh
270 ldwx $nhi($Hhh),$Thh
271 shrpw $Zlh,$Zll,4,$Zll
272 ldwx $rem($rem_4bit),$rem
273 shrpw $Zhl,$Zlh,4,$Zlh
274 shrpw $Zhh,$Zhl,4,$Zhl
275 and $mask0xf0,$nlo,$nhi
276 extru $Zhh,27,28,$Zhh
277 zdep $nlo,27,4,$nlo
278 xor $Tll,$Zll,$Zll
279 ldwx $nlo($Hll),$Tll
280 xor $Tlh,$Zlh,$Zlh
281 ldwx $nlo($Hlh),$Tlh
282 xor $rem,$Zhh,$Zhh
283 addib,uv -1,$cnt,L\$oop_gmult_pa1
284 xor $Thl,$Zhl,$Zhl
285
286 zdep $Zll,28,4,$rem
287 ldwx $nlo($Hhl),$Thl
288 xor $Thh,$Zhh,$Zhh
289 ldwx $rem($rem_4bit),$rem
290 shrpw $Zlh,$Zll,4,$Zll
291 ldwx $nlo($Hhh),$Thh
292 shrpw $Zhl,$Zlh,4,$Zlh
293 xor $Tll,$Zll,$Zll
294 ldwx $nhi($Hll),$Tll
295 shrpw $Zhh,$Zhl,4,$Zhl
296 xor $Tlh,$Zlh,$Zlh
297 ldwx $nhi($Hlh),$Tlh
298 extru $Zhh,27,28,$Zhh
299 xor $rem,$Zhh,$Zhh
300 xor $Thl,$Zhl,$Zhl
301 ldwx $nhi($Hhl),$Thl
302 xor $Thh,$Zhh,$Zhh
303 ldwx $nhi($Hhh),$Thh
304 zdep $Zll,28,4,$rem
305 ldwx $rem($rem_4bit),$rem
306 shrpw $Zlh,$Zll,4,$Zll
307 shrpw $Zhl,$Zlh,4,$Zlh
308 shrpw $Zhh,$Zhl,4,$Zhl
309 extru $Zhh,27,28,$Zhh
310 xor $Tll,$Zll,$Zll
311 xor $Tlh,$Zlh,$Zlh
312 xor $rem,$Zhh,$Zhh
313 stw $Zll,12($Xi)
314 xor $Thl,$Zhl,$Zhl
315 stw $Zlh,8($Xi)
316 xor $Thh,$Zhh,$Zhh
317 stw $Zhl,4($Xi)
318 stw $Zhh,0($Xi)
319___
320$code.=<<___;
321L\$done_gmult
322 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
323 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
324 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
325 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
326___
327$code.=<<___ if ($SIZE_T==4);
328 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
329 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
330 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
331 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
332 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
333___
334$code.=<<___;
335 bv (%r2)
336 .EXIT
337 $POPMB -$FRAME(%sp),%r3
338 .PROCEND
339
340 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
341 .ALIGN 64
342gcm_ghash_4bit
343 .PROC
344 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
345 .ENTRY
346 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
347 $PUSHMA %r3,$FRAME(%sp)
348 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
349 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
350 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
351___
352$code.=<<___ if ($SIZE_T==4);
353 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
354 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
355 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
356 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
357 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
358___
359$code.=<<___;
360 blr %r0,$rem_4bit
361 ldi 3,$rem
362L\$pic_ghash
363 andcm $rem_4bit,$rem,$rem_4bit
364 addl $inp,$len,$len
365 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
366 ldi 0xf0,$mask0xf0
367___
368$code.=<<___ if ($SIZE_T==4);
369 ldi 31,$rem
370 mtctl $rem,%cr11
371 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
372 b L\$parisc1_ghash
373 nop
374___
375
376$code.=<<___;
377 ldb 15($Xi),$nlo
378 ldo 8($Htbl),$Hll
379
380L\$outer_ghash_pa2
381 ldb 15($inp),$nhi
382 xor $nhi,$nlo,$nlo
383 and $mask0xf0,$nlo,$nhi
384 depd,z $nlo,59,4,$nlo
385
386 ldd $nlo($Hll),$Zll
387 ldd $nlo($Hhh),$Zhh
388
389 depd,z $Zll,60,4,$rem
390 shrpd $Zhh,$Zll,4,$Zll
391 extrd,u $Zhh,59,60,$Zhh
392 ldb 14($Xi),$nlo
393 ldb 14($inp),$byte
394
395 ldd $nhi($Hll),$Tll
396 ldd $nhi($Hhh),$Thh
397 xor $byte,$nlo,$nlo
398 and $mask0xf0,$nlo,$nhi
399 depd,z $nlo,59,4,$nlo
400
401 xor $Tll,$Zll,$Zll
402 xor $Thh,$Zhh,$Zhh
403 ldd $rem($rem_4bit),$rem
404 b L\$oop_ghash_pa2
405 ldi 13,$cnt
406
407 .ALIGN 8
408L\$oop_ghash_pa2
409 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
410 depd,z $Zll,60,4,$rem2
411
412 shrpd $Zhh,$Zll,4,$Zll
413 extrd,u $Zhh,59,60,$Zhh
414 ldd $nlo($Hll),$Tll
415 ldd $nlo($Hhh),$Thh
416
417 xor $Tll,$Zll,$Zll
418 xor $Thh,$Zhh,$Zhh
419 ldbx $cnt($Xi),$nlo
420 ldbx $cnt($inp),$byte
421
422 depd,z $Zll,60,4,$rem
423 shrpd $Zhh,$Zll,4,$Zll
424 ldd $rem2($rem_4bit),$rem2
425
426 xor $rem2,$Zhh,$Zhh
427 xor $byte,$nlo,$nlo
428 ldd $nhi($Hll),$Tll
429 ldd $nhi($Hhh),$Thh
430
431 and $mask0xf0,$nlo,$nhi
432 depd,z $nlo,59,4,$nlo
433
434 extrd,u $Zhh,59,60,$Zhh
435 xor $Tll,$Zll,$Zll
436
437 ldd $rem($rem_4bit),$rem
438 addib,uv -1,$cnt,L\$oop_ghash_pa2
439 xor $Thh,$Zhh,$Zhh
440
441 xor $rem,$Zhh,$Zhh
442 depd,z $Zll,60,4,$rem2
443
444 shrpd $Zhh,$Zll,4,$Zll
445 extrd,u $Zhh,59,60,$Zhh
446 ldd $nlo($Hll),$Tll
447 ldd $nlo($Hhh),$Thh
448
449 xor $Tll,$Zll,$Zll
450 xor $Thh,$Zhh,$Zhh
451
452 depd,z $Zll,60,4,$rem
453 shrpd $Zhh,$Zll,4,$Zll
454 ldd $rem2($rem_4bit),$rem2
455
456 xor $rem2,$Zhh,$Zhh
457 ldd $nhi($Hll),$Tll
458 ldd $nhi($Hhh),$Thh
459
460 extrd,u $Zhh,59,60,$Zhh
461 xor $Tll,$Zll,$Zll
462 xor $Thh,$Zhh,$Zhh
463 ldd $rem($rem_4bit),$rem
464
465 xor $rem,$Zhh,$Zhh
466 std $Zll,8($Xi)
467 ldo 16($inp),$inp
468 std $Zhh,0($Xi)
469 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
470 copy $Zll,$nlo
471___
472
473$code.=<<___ if ($SIZE_T==4);
474 b L\$done_ghash
475 nop
476
477L\$parisc1_ghash
478 ldb 15($Xi),$nlo
479 ldo 12($Htbl),$Hll
480 ldo 8($Htbl),$Hlh
481 ldo 4($Htbl),$Hhl
482
483L\$outer_ghash_pa1
484 ldb 15($inp),$byte
485 xor $byte,$nlo,$nlo
486 and $mask0xf0,$nlo,$nhi
487 zdep $nlo,27,4,$nlo
488
489 ldwx $nlo($Hll),$Zll
490 ldwx $nlo($Hlh),$Zlh
491 ldwx $nlo($Hhl),$Zhl
492 ldwx $nlo($Hhh),$Zhh
493 zdep $Zll,28,4,$rem
494 ldb 14($Xi),$nlo
495 ldb 14($inp),$byte
496 ldwx $rem($rem_4bit),$rem
497 shrpw $Zlh,$Zll,4,$Zll
498 ldwx $nhi($Hll),$Tll
499 shrpw $Zhl,$Zlh,4,$Zlh
500 ldwx $nhi($Hlh),$Tlh
501 shrpw $Zhh,$Zhl,4,$Zhl
502 ldwx $nhi($Hhl),$Thl
503 extru $Zhh,27,28,$Zhh
504 ldwx $nhi($Hhh),$Thh
505 xor $byte,$nlo,$nlo
506 xor $rem,$Zhh,$Zhh
507 and $mask0xf0,$nlo,$nhi
508 zdep $nlo,27,4,$nlo
509
510 xor $Tll,$Zll,$Zll
511 ldwx $nlo($Hll),$Tll
512 xor $Tlh,$Zlh,$Zlh
513 ldwx $nlo($Hlh),$Tlh
514 xor $Thl,$Zhl,$Zhl
515 b L\$oop_ghash_pa1
516 ldi 13,$cnt
517
518 .ALIGN 8
519L\$oop_ghash_pa1
520 zdep $Zll,28,4,$rem
521 ldwx $nlo($Hhl),$Thl
522 xor $Thh,$Zhh,$Zhh
523 ldwx $rem($rem_4bit),$rem
524 shrpw $Zlh,$Zll,4,$Zll
525 ldwx $nlo($Hhh),$Thh
526 shrpw $Zhl,$Zlh,4,$Zlh
527 ldbx $cnt($Xi),$nlo
528 xor $Tll,$Zll,$Zll
529 ldwx $nhi($Hll),$Tll
530 shrpw $Zhh,$Zhl,4,$Zhl
531 ldbx $cnt($inp),$byte
532 xor $Tlh,$Zlh,$Zlh
533 ldwx $nhi($Hlh),$Tlh
534 extru $Zhh,27,28,$Zhh
535 xor $Thl,$Zhl,$Zhl
536 ldwx $nhi($Hhl),$Thl
537 xor $rem,$Zhh,$Zhh
538 zdep $Zll,28,4,$rem
539 xor $Thh,$Zhh,$Zhh
540 ldwx $nhi($Hhh),$Thh
541 shrpw $Zlh,$Zll,4,$Zll
542 ldwx $rem($rem_4bit),$rem
543 shrpw $Zhl,$Zlh,4,$Zlh
544 xor $byte,$nlo,$nlo
545 shrpw $Zhh,$Zhl,4,$Zhl
546 and $mask0xf0,$nlo,$nhi
547 extru $Zhh,27,28,$Zhh
548 zdep $nlo,27,4,$nlo
549 xor $Tll,$Zll,$Zll
550 ldwx $nlo($Hll),$Tll
551 xor $Tlh,$Zlh,$Zlh
552 ldwx $nlo($Hlh),$Tlh
553 xor $rem,$Zhh,$Zhh
554 addib,uv -1,$cnt,L\$oop_ghash_pa1
555 xor $Thl,$Zhl,$Zhl
556
557 zdep $Zll,28,4,$rem
558 ldwx $nlo($Hhl),$Thl
559 xor $Thh,$Zhh,$Zhh
560 ldwx $rem($rem_4bit),$rem
561 shrpw $Zlh,$Zll,4,$Zll
562 ldwx $nlo($Hhh),$Thh
563 shrpw $Zhl,$Zlh,4,$Zlh
564 xor $Tll,$Zll,$Zll
565 ldwx $nhi($Hll),$Tll
566 shrpw $Zhh,$Zhl,4,$Zhl
567 xor $Tlh,$Zlh,$Zlh
568 ldwx $nhi($Hlh),$Tlh
569 extru $Zhh,27,28,$Zhh
570 xor $rem,$Zhh,$Zhh
571 xor $Thl,$Zhl,$Zhl
572 ldwx $nhi($Hhl),$Thl
573 xor $Thh,$Zhh,$Zhh
574 ldwx $nhi($Hhh),$Thh
575 zdep $Zll,28,4,$rem
576 ldwx $rem($rem_4bit),$rem
577 shrpw $Zlh,$Zll,4,$Zll
578 shrpw $Zhl,$Zlh,4,$Zlh
579 shrpw $Zhh,$Zhl,4,$Zhl
580 extru $Zhh,27,28,$Zhh
581 xor $Tll,$Zll,$Zll
582 xor $Tlh,$Zlh,$Zlh
583 xor $rem,$Zhh,$Zhh
584 stw $Zll,12($Xi)
585 xor $Thl,$Zhl,$Zhl
586 stw $Zlh,8($Xi)
587 xor $Thh,$Zhh,$Zhh
588 stw $Zhl,4($Xi)
589 ldo 16($inp),$inp
590 stw $Zhh,0($Xi)
591 comb,<> $inp,$len,L\$outer_ghash_pa1
592 copy $Zll,$nlo
593___
594$code.=<<___;
595L\$done_ghash
596 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
597 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
598 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
599 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
600___
601$code.=<<___ if ($SIZE_T==4);
602 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
603 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
604 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
605 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
606 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
607___
608$code.=<<___;
609 bv (%r2)
610 .EXIT
611 $POPMB -$FRAME(%sp),%r3
612 .PROCEND
613
614 .ALIGN 64
615L\$rem_4bit
616 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
617 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
618 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
619 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
620 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
621 .ALIGN 64
622___
623
624# Explicitly encode PA-RISC 2.0 instructions used in this module, so
625# that it can be compiled with .LEVEL 1.0. It should be noted that I
626# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
627# directive...
628
629my $ldd = sub {
630 my ($mod,$args) = @_;
631 my $orig = "ldd$mod\t$args";
632
633 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
634 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
635 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
636 }
637 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
638 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
639 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
640 $opcode|=(1<<5) if ($mod =~ /^,m/);
641 $opcode|=(1<<13) if ($mod =~ /^,mb/);
642 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
643 }
644 else { "\t".$orig; }
645};
646
647my $std = sub {
648 my ($mod,$args) = @_;
649 my $orig = "std$mod\t$args";
650
651 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
652 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
653 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
654 }
655 else { "\t".$orig; }
656};
657
658my $extrd = sub {
659 my ($mod,$args) = @_;
660 my $orig = "extrd$mod\t$args";
661
662 # I only have ",u" completer, it's implicitly encoded...
663 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
664 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
665 my $len=32-$3;
666 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
667 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
668 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
669 }
670 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
671 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
672 my $len=32-$2;
673 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
674 $opcode |= (1<<13) if ($mod =~ /,\**=/);
675 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
676 }
677 else { "\t".$orig; }
678};
679
680my $shrpd = sub {
681 my ($mod,$args) = @_;
682 my $orig = "shrpd$mod\t$args";
683
684 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
685 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
686 my $cpos=63-$3;
687 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
688 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
689 }
690 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
691 { sprintf "\t.WORD\t0x%08x\t; %s",
692 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
693 }
694 else { "\t".$orig; }
695};
696
697my $depd = sub {
698 my ($mod,$args) = @_;
699 my $orig = "depd$mod\t$args";
700
701 # I only have ",z" completer, it's impicitly encoded...
702 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
703 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
704 my $cpos=63-$2;
705 my $len=32-$3;
706 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
707 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
708 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
709 }
710 else { "\t".$orig; }
711};
712
713sub assemble {
714 my ($mnemonic,$mod,$args)=@_;
715 my $opcode = eval("\$$mnemonic");
716
717 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
718}
719
720foreach (split("\n",$code)) {
721 s/\`([^\`]*)\`/eval $1/ge;
722 if ($SIZE_T==4) {
723 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
724 s/cmpb,\*/comb,/;
725 s/,\*/,/;
726 }
727 print $_,"\n";
728}
729
730close STDOUT;
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-s390x.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-s390x.pl
new file mode 100644
index 0000000000..6a40d5d89c
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-s390x.pl
@@ -0,0 +1,262 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# September 2010.
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. Performance
15# was measured to be ~18 cycles per processed byte on z10, which is
16# almost 40% better than gcc-generated code. It should be noted that
17# 18 cycles is worse result than expected: loop is scheduled for 12
18# and the result should be close to 12. In the lack of instruction-
19# level profiling data it's impossible to tell why...
20
21# November 2010.
22#
23# Adapt for -m31 build. If kernel supports what's called "highgprs"
24# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
25# instructions and achieve "64-bit" performance even in 31-bit legacy
26# application context. The feature is not specific to any particular
27# processor, as long as it's "z-CPU". Latter implies that the code
28# remains z/Architecture specific. On z990 it was measured to perform
29# 2.8x better than 32-bit code generated by gcc 4.3.
30
31# March 2011.
32#
33# Support for hardware KIMD-GHASH is verified to produce correct
34# result and therefore is engaged. On z196 it was measured to process
35# 8KB buffer ~7 faster than software implementation. It's not as
36# impressive for smaller buffer sizes and for smallest 16-bytes buffer
37# it's actually almost 2 times slower. Which is the reason why
38# KIMD-GHASH is not used in gcm_gmult_4bit.
39
40$flavour = shift;
41
42if ($flavour =~ /3[12]/) {
43 $SIZE_T=4;
44 $g="";
45} else {
46 $SIZE_T=8;
47 $g="g";
48}
49
50while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
51open STDOUT,">$output";
52
53$softonly=0;
54
55$Zhi="%r0";
56$Zlo="%r1";
57
58$Xi="%r2"; # argument block
59$Htbl="%r3";
60$inp="%r4";
61$len="%r5";
62
63$rem0="%r6"; # variables
64$rem1="%r7";
65$nlo="%r8";
66$nhi="%r9";
67$xi="%r10";
68$cnt="%r11";
69$tmp="%r12";
70$x78="%r13";
71$rem_4bit="%r14";
72
73$sp="%r15";
74
75$code.=<<___;
76.text
77
78.globl gcm_gmult_4bit
79.align 32
80gcm_gmult_4bit:
81___
82$code.=<<___ if(!$softonly && 0); # hardware is slow for single block...
83 larl %r1,OPENSSL_s390xcap_P
84 lg %r0,0(%r1)
85 tmhl %r0,0x4000 # check for message-security-assist
86 jz .Lsoft_gmult
87 lghi %r0,0
88 la %r1,16($sp)
89 .long 0xb93e0004 # kimd %r0,%r4
90 lg %r1,24($sp)
91 tmhh %r1,0x4000 # check for function 65
92 jz .Lsoft_gmult
93 stg %r0,16($sp) # arrange 16 bytes of zero input
94 stg %r0,24($sp)
95 lghi %r0,65 # function 65
96 la %r1,0($Xi) # H lies right after Xi in gcm128_context
97 la $inp,16($sp)
98 lghi $len,16
99 .long 0xb93e0004 # kimd %r0,$inp
100 brc 1,.-4 # pay attention to "partial completion"
101 br %r14
102.align 32
103.Lsoft_gmult:
104___
105$code.=<<___;
106 stm${g} %r6,%r14,6*$SIZE_T($sp)
107
108 aghi $Xi,-1
109 lghi $len,1
110 lghi $x78,`0xf<<3`
111 larl $rem_4bit,rem_4bit
112
113 lg $Zlo,8+1($Xi) # Xi
114 j .Lgmult_shortcut
115.type gcm_gmult_4bit,\@function
116.size gcm_gmult_4bit,(.-gcm_gmult_4bit)
117
118.globl gcm_ghash_4bit
119.align 32
120gcm_ghash_4bit:
121___
122$code.=<<___ if(!$softonly);
123 larl %r1,OPENSSL_s390xcap_P
124 lg %r0,0(%r1)
125 tmhl %r0,0x4000 # check for message-security-assist
126 jz .Lsoft_ghash
127 lghi %r0,0
128 la %r1,16($sp)
129 .long 0xb93e0004 # kimd %r0,%r4
130 lg %r1,24($sp)
131 tmhh %r1,0x4000 # check for function 65
132 jz .Lsoft_ghash
133 lghi %r0,65 # function 65
134 la %r1,0($Xi) # H lies right after Xi in gcm128_context
135 .long 0xb93e0004 # kimd %r0,$inp
136 brc 1,.-4 # pay attention to "partial completion"
137 br %r14
138.align 32
139.Lsoft_ghash:
140___
141$code.=<<___ if ($flavour =~ /3[12]/);
142 llgfr $len,$len
143___
144$code.=<<___;
145 stm${g} %r6,%r14,6*$SIZE_T($sp)
146
147 aghi $Xi,-1
148 srlg $len,$len,4
149 lghi $x78,`0xf<<3`
150 larl $rem_4bit,rem_4bit
151
152 lg $Zlo,8+1($Xi) # Xi
153 lg $Zhi,0+1($Xi)
154 lghi $tmp,0
155.Louter:
156 xg $Zhi,0($inp) # Xi ^= inp
157 xg $Zlo,8($inp)
158 xgr $Zhi,$tmp
159 stg $Zlo,8+1($Xi)
160 stg $Zhi,0+1($Xi)
161
162.Lgmult_shortcut:
163 lghi $tmp,0xf0
164 sllg $nlo,$Zlo,4
165 srlg $xi,$Zlo,8 # extract second byte
166 ngr $nlo,$tmp
167 lgr $nhi,$Zlo
168 lghi $cnt,14
169 ngr $nhi,$tmp
170
171 lg $Zlo,8($nlo,$Htbl)
172 lg $Zhi,0($nlo,$Htbl)
173
174 sllg $nlo,$xi,4
175 sllg $rem0,$Zlo,3
176 ngr $nlo,$tmp
177 ngr $rem0,$x78
178 ngr $xi,$tmp
179
180 sllg $tmp,$Zhi,60
181 srlg $Zlo,$Zlo,4
182 srlg $Zhi,$Zhi,4
183 xg $Zlo,8($nhi,$Htbl)
184 xg $Zhi,0($nhi,$Htbl)
185 lgr $nhi,$xi
186 sllg $rem1,$Zlo,3
187 xgr $Zlo,$tmp
188 ngr $rem1,$x78
189 j .Lghash_inner
190.align 16
191.Lghash_inner:
192 srlg $Zlo,$Zlo,4
193 sllg $tmp,$Zhi,60
194 xg $Zlo,8($nlo,$Htbl)
195 srlg $Zhi,$Zhi,4
196 llgc $xi,0($cnt,$Xi)
197 xg $Zhi,0($nlo,$Htbl)
198 sllg $nlo,$xi,4
199 xg $Zhi,0($rem0,$rem_4bit)
200 nill $nlo,0xf0
201 sllg $rem0,$Zlo,3
202 xgr $Zlo,$tmp
203 ngr $rem0,$x78
204 nill $xi,0xf0
205
206 sllg $tmp,$Zhi,60
207 srlg $Zlo,$Zlo,4
208 srlg $Zhi,$Zhi,4
209 xg $Zlo,8($nhi,$Htbl)
210 xg $Zhi,0($nhi,$Htbl)
211 lgr $nhi,$xi
212 xg $Zhi,0($rem1,$rem_4bit)
213 sllg $rem1,$Zlo,3
214 xgr $Zlo,$tmp
215 ngr $rem1,$x78
216 brct $cnt,.Lghash_inner
217
218 sllg $tmp,$Zhi,60
219 srlg $Zlo,$Zlo,4
220 srlg $Zhi,$Zhi,4
221 xg $Zlo,8($nlo,$Htbl)
222 xg $Zhi,0($nlo,$Htbl)
223 sllg $xi,$Zlo,3
224 xg $Zhi,0($rem0,$rem_4bit)
225 xgr $Zlo,$tmp
226 ngr $xi,$x78
227
228 sllg $tmp,$Zhi,60
229 srlg $Zlo,$Zlo,4
230 srlg $Zhi,$Zhi,4
231 xg $Zlo,8($nhi,$Htbl)
232 xg $Zhi,0($nhi,$Htbl)
233 xgr $Zlo,$tmp
234 xg $Zhi,0($rem1,$rem_4bit)
235
236 lg $tmp,0($xi,$rem_4bit)
237 la $inp,16($inp)
238 sllg $tmp,$tmp,4 # correct last rem_4bit[rem]
239 brctg $len,.Louter
240
241 xgr $Zhi,$tmp
242 stg $Zlo,8+1($Xi)
243 stg $Zhi,0+1($Xi)
244 lm${g} %r6,%r14,6*$SIZE_T($sp)
245 br %r14
246.type gcm_ghash_4bit,\@function
247.size gcm_ghash_4bit,(.-gcm_ghash_4bit)
248
249.align 64
250rem_4bit:
251 .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0
252 .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0
253 .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0
254 .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0
255.type rem_4bit,\@object
256.size rem_4bit,(.-rem_4bit)
257.string "GHASH for s390x, CRYPTOGAMS by <appro\@openssl.org>"
258___
259
260$code =~ s/\`([^\`]*)\`/eval $1/gem;
261print $code;
262close STDOUT;
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-sparcv9.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-sparcv9.pl
new file mode 100644
index 0000000000..70e7b044a3
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-sparcv9.pl
@@ -0,0 +1,330 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# March 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+128 bytes shared table]. Performance
15# results are for streamed GHASH subroutine on UltraSPARC pre-Tx CPU
16# and are expressed in cycles per processed byte, less is better:
17#
18# gcc 3.3.x cc 5.2 this assembler
19#
20# 32-bit build 81.4 43.3 12.6 (+546%/+244%)
21# 64-bit build 20.2 21.2 12.6 (+60%/+68%)
22#
23# Here is data collected on UltraSPARC T1 system running Linux:
24#
25# gcc 4.4.1 this assembler
26#
27# 32-bit build 566 50 (+1000%)
28# 64-bit build 56 50 (+12%)
29#
30# I don't quite understand why difference between 32-bit and 64-bit
31# compiler-generated code is so big. Compilers *were* instructed to
32# generate code for UltraSPARC and should have used 64-bit registers
33# for Z vector (see C code) even in 32-bit build... Oh well, it only
34# means more impressive improvement coefficients for this assembler
35# module;-) Loops are aggressively modulo-scheduled in respect to
36# references to input data and Z.hi updates to achieve 12 cycles
37# timing. To anchor to something else, sha1-sparcv9.pl spends 11.6
38# cycles to process one byte on UltraSPARC pre-Tx CPU and ~24 on T1.
39
40$bits=32;
41for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
42if ($bits==64) { $bias=2047; $frame=192; }
43else { $bias=0; $frame=112; }
44
45$output=shift;
46open STDOUT,">$output";
47
48$Zhi="%o0"; # 64-bit values
49$Zlo="%o1";
50$Thi="%o2";
51$Tlo="%o3";
52$rem="%o4";
53$tmp="%o5";
54
55$nhi="%l0"; # small values and pointers
56$nlo="%l1";
57$xi0="%l2";
58$xi1="%l3";
59$rem_4bit="%l4";
60$remi="%l5";
61$Htblo="%l6";
62$cnt="%l7";
63
64$Xi="%i0"; # input argument block
65$Htbl="%i1";
66$inp="%i2";
67$len="%i3";
68
69$code.=<<___;
70.section ".text",#alloc,#execinstr
71
72.align 64
73rem_4bit:
74 .long `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
75 .long `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
76 .long `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
77 .long `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
78.type rem_4bit,#object
79.size rem_4bit,(.-rem_4bit)
80
81.globl gcm_ghash_4bit
82.align 32
83gcm_ghash_4bit:
84 save %sp,-$frame,%sp
85 ldub [$inp+15],$nlo
86 ldub [$Xi+15],$xi0
87 ldub [$Xi+14],$xi1
88 add $len,$inp,$len
89 add $Htbl,8,$Htblo
90
911: call .+8
92 add %o7,rem_4bit-1b,$rem_4bit
93
94.Louter:
95 xor $xi0,$nlo,$nlo
96 and $nlo,0xf0,$nhi
97 and $nlo,0x0f,$nlo
98 sll $nlo,4,$nlo
99 ldx [$Htblo+$nlo],$Zlo
100 ldx [$Htbl+$nlo],$Zhi
101
102 ldub [$inp+14],$nlo
103
104 ldx [$Htblo+$nhi],$Tlo
105 and $Zlo,0xf,$remi
106 ldx [$Htbl+$nhi],$Thi
107 sll $remi,3,$remi
108 ldx [$rem_4bit+$remi],$rem
109 srlx $Zlo,4,$Zlo
110 mov 13,$cnt
111 sllx $Zhi,60,$tmp
112 xor $Tlo,$Zlo,$Zlo
113 srlx $Zhi,4,$Zhi
114 xor $Zlo,$tmp,$Zlo
115
116 xor $xi1,$nlo,$nlo
117 and $Zlo,0xf,$remi
118 and $nlo,0xf0,$nhi
119 and $nlo,0x0f,$nlo
120 ba .Lghash_inner
121 sll $nlo,4,$nlo
122.align 32
123.Lghash_inner:
124 ldx [$Htblo+$nlo],$Tlo
125 sll $remi,3,$remi
126 xor $Thi,$Zhi,$Zhi
127 ldx [$Htbl+$nlo],$Thi
128 srlx $Zlo,4,$Zlo
129 xor $rem,$Zhi,$Zhi
130 ldx [$rem_4bit+$remi],$rem
131 sllx $Zhi,60,$tmp
132 xor $Tlo,$Zlo,$Zlo
133 ldub [$inp+$cnt],$nlo
134 srlx $Zhi,4,$Zhi
135 xor $Zlo,$tmp,$Zlo
136 ldub [$Xi+$cnt],$xi1
137 xor $Thi,$Zhi,$Zhi
138 and $Zlo,0xf,$remi
139
140 ldx [$Htblo+$nhi],$Tlo
141 sll $remi,3,$remi
142 xor $rem,$Zhi,$Zhi
143 ldx [$Htbl+$nhi],$Thi
144 srlx $Zlo,4,$Zlo
145 ldx [$rem_4bit+$remi],$rem
146 sllx $Zhi,60,$tmp
147 xor $xi1,$nlo,$nlo
148 srlx $Zhi,4,$Zhi
149 and $nlo,0xf0,$nhi
150 addcc $cnt,-1,$cnt
151 xor $Zlo,$tmp,$Zlo
152 and $nlo,0x0f,$nlo
153 xor $Tlo,$Zlo,$Zlo
154 sll $nlo,4,$nlo
155 blu .Lghash_inner
156 and $Zlo,0xf,$remi
157
158 ldx [$Htblo+$nlo],$Tlo
159 sll $remi,3,$remi
160 xor $Thi,$Zhi,$Zhi
161 ldx [$Htbl+$nlo],$Thi
162 srlx $Zlo,4,$Zlo
163 xor $rem,$Zhi,$Zhi
164 ldx [$rem_4bit+$remi],$rem
165 sllx $Zhi,60,$tmp
166 xor $Tlo,$Zlo,$Zlo
167 srlx $Zhi,4,$Zhi
168 xor $Zlo,$tmp,$Zlo
169 xor $Thi,$Zhi,$Zhi
170
171 add $inp,16,$inp
172 cmp $inp,$len
173 be,pn `$bits==64?"%xcc":"%icc"`,.Ldone
174 and $Zlo,0xf,$remi
175
176 ldx [$Htblo+$nhi],$Tlo
177 sll $remi,3,$remi
178 xor $rem,$Zhi,$Zhi
179 ldx [$Htbl+$nhi],$Thi
180 srlx $Zlo,4,$Zlo
181 ldx [$rem_4bit+$remi],$rem
182 sllx $Zhi,60,$tmp
183 xor $Tlo,$Zlo,$Zlo
184 ldub [$inp+15],$nlo
185 srlx $Zhi,4,$Zhi
186 xor $Zlo,$tmp,$Zlo
187 xor $Thi,$Zhi,$Zhi
188 stx $Zlo,[$Xi+8]
189 xor $rem,$Zhi,$Zhi
190 stx $Zhi,[$Xi]
191 srl $Zlo,8,$xi1
192 and $Zlo,0xff,$xi0
193 ba .Louter
194 and $xi1,0xff,$xi1
195.align 32
196.Ldone:
197 ldx [$Htblo+$nhi],$Tlo
198 sll $remi,3,$remi
199 xor $rem,$Zhi,$Zhi
200 ldx [$Htbl+$nhi],$Thi
201 srlx $Zlo,4,$Zlo
202 ldx [$rem_4bit+$remi],$rem
203 sllx $Zhi,60,$tmp
204 xor $Tlo,$Zlo,$Zlo
205 srlx $Zhi,4,$Zhi
206 xor $Zlo,$tmp,$Zlo
207 xor $Thi,$Zhi,$Zhi
208 stx $Zlo,[$Xi+8]
209 xor $rem,$Zhi,$Zhi
210 stx $Zhi,[$Xi]
211
212 ret
213 restore
214.type gcm_ghash_4bit,#function
215.size gcm_ghash_4bit,(.-gcm_ghash_4bit)
216___
217
218undef $inp;
219undef $len;
220
221$code.=<<___;
222.globl gcm_gmult_4bit
223.align 32
224gcm_gmult_4bit:
225 save %sp,-$frame,%sp
226 ldub [$Xi+15],$nlo
227 add $Htbl,8,$Htblo
228
2291: call .+8
230 add %o7,rem_4bit-1b,$rem_4bit
231
232 and $nlo,0xf0,$nhi
233 and $nlo,0x0f,$nlo
234 sll $nlo,4,$nlo
235 ldx [$Htblo+$nlo],$Zlo
236 ldx [$Htbl+$nlo],$Zhi
237
238 ldub [$Xi+14],$nlo
239
240 ldx [$Htblo+$nhi],$Tlo
241 and $Zlo,0xf,$remi
242 ldx [$Htbl+$nhi],$Thi
243 sll $remi,3,$remi
244 ldx [$rem_4bit+$remi],$rem
245 srlx $Zlo,4,$Zlo
246 mov 13,$cnt
247 sllx $Zhi,60,$tmp
248 xor $Tlo,$Zlo,$Zlo
249 srlx $Zhi,4,$Zhi
250 xor $Zlo,$tmp,$Zlo
251
252 and $Zlo,0xf,$remi
253 and $nlo,0xf0,$nhi
254 and $nlo,0x0f,$nlo
255 ba .Lgmult_inner
256 sll $nlo,4,$nlo
257.align 32
258.Lgmult_inner:
259 ldx [$Htblo+$nlo],$Tlo
260 sll $remi,3,$remi
261 xor $Thi,$Zhi,$Zhi
262 ldx [$Htbl+$nlo],$Thi
263 srlx $Zlo,4,$Zlo
264 xor $rem,$Zhi,$Zhi
265 ldx [$rem_4bit+$remi],$rem
266 sllx $Zhi,60,$tmp
267 xor $Tlo,$Zlo,$Zlo
268 ldub [$Xi+$cnt],$nlo
269 srlx $Zhi,4,$Zhi
270 xor $Zlo,$tmp,$Zlo
271 xor $Thi,$Zhi,$Zhi
272 and $Zlo,0xf,$remi
273
274 ldx [$Htblo+$nhi],$Tlo
275 sll $remi,3,$remi
276 xor $rem,$Zhi,$Zhi
277 ldx [$Htbl+$nhi],$Thi
278 srlx $Zlo,4,$Zlo
279 ldx [$rem_4bit+$remi],$rem
280 sllx $Zhi,60,$tmp
281 srlx $Zhi,4,$Zhi
282 and $nlo,0xf0,$nhi
283 addcc $cnt,-1,$cnt
284 xor $Zlo,$tmp,$Zlo
285 and $nlo,0x0f,$nlo
286 xor $Tlo,$Zlo,$Zlo
287 sll $nlo,4,$nlo
288 blu .Lgmult_inner
289 and $Zlo,0xf,$remi
290
291 ldx [$Htblo+$nlo],$Tlo
292 sll $remi,3,$remi
293 xor $Thi,$Zhi,$Zhi
294 ldx [$Htbl+$nlo],$Thi
295 srlx $Zlo,4,$Zlo
296 xor $rem,$Zhi,$Zhi
297 ldx [$rem_4bit+$remi],$rem
298 sllx $Zhi,60,$tmp
299 xor $Tlo,$Zlo,$Zlo
300 srlx $Zhi,4,$Zhi
301 xor $Zlo,$tmp,$Zlo
302 xor $Thi,$Zhi,$Zhi
303 and $Zlo,0xf,$remi
304
305 ldx [$Htblo+$nhi],$Tlo
306 sll $remi,3,$remi
307 xor $rem,$Zhi,$Zhi
308 ldx [$Htbl+$nhi],$Thi
309 srlx $Zlo,4,$Zlo
310 ldx [$rem_4bit+$remi],$rem
311 sllx $Zhi,60,$tmp
312 xor $Tlo,$Zlo,$Zlo
313 srlx $Zhi,4,$Zhi
314 xor $Zlo,$tmp,$Zlo
315 xor $Thi,$Zhi,$Zhi
316 stx $Zlo,[$Xi+8]
317 xor $rem,$Zhi,$Zhi
318 stx $Zhi,[$Xi]
319
320 ret
321 restore
322.type gcm_gmult_4bit,#function
323.size gcm_gmult_4bit,(.-gcm_gmult_4bit)
324.asciz "GHASH for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
325.align 4
326___
327
328$code =~ s/\`([^\`]*)\`/eval $1/gem;
329print $code;
330close STDOUT;
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-x86.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-x86.pl
new file mode 100644
index 0000000000..6b09669d47
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-x86.pl
@@ -0,0 +1,1342 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# March, May, June 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that it
14# uses 256 bytes per-key table [+64/128 bytes fixed table]. It has two
15# code paths: vanilla x86 and vanilla MMX. Former will be executed on
16# 486 and Pentium, latter on all others. MMX GHASH features so called
17# "528B" variant of "4-bit" method utilizing additional 256+16 bytes
18# of per-key storage [+512 bytes shared table]. Performance results
19# are for streamed GHASH subroutine and are expressed in cycles per
20# processed byte, less is better:
21#
22# gcc 2.95.3(*) MMX assembler x86 assembler
23#
24# Pentium 105/111(**) - 50
25# PIII 68 /75 12.2 24
26# P4 125/125 17.8 84(***)
27# Opteron 66 /70 10.1 30
28# Core2 54 /67 8.4 18
29#
30# (*) gcc 3.4.x was observed to generate few percent slower code,
31# which is one of reasons why 2.95.3 results were chosen,
32# another reason is lack of 3.4.x results for older CPUs;
33# comparison with MMX results is not completely fair, because C
34# results are for vanilla "256B" implementation, while
35# assembler results are for "528B";-)
36# (**) second number is result for code compiled with -fPIC flag,
37# which is actually more relevant, because assembler code is
38# position-independent;
39# (***) see comment in non-MMX routine for further details;
40#
41# To summarize, it's >2-5 times faster than gcc-generated code. To
42# anchor it to something else SHA1 assembler processes one byte in
43# 11-13 cycles on contemporary x86 cores. As for choice of MMX in
44# particular, see comment at the end of the file...
45
46# May 2010
47#
48# Add PCLMULQDQ version performing at 2.10 cycles per processed byte.
49# The question is how close is it to theoretical limit? The pclmulqdq
50# instruction latency appears to be 14 cycles and there can't be more
51# than 2 of them executing at any given time. This means that single
52# Karatsuba multiplication would take 28 cycles *plus* few cycles for
53# pre- and post-processing. Then multiplication has to be followed by
54# modulo-reduction. Given that aggregated reduction method [see
55# "Carry-less Multiplication and Its Usage for Computing the GCM Mode"
56# white paper by Intel] allows you to perform reduction only once in
57# a while we can assume that asymptotic performance can be estimated
58# as (28+Tmod/Naggr)/16, where Tmod is time to perform reduction
59# and Naggr is the aggregation factor.
60#
61# Before we proceed to this implementation let's have closer look at
62# the best-performing code suggested by Intel in their white paper.
63# By tracing inter-register dependencies Tmod is estimated as ~19
64# cycles and Naggr chosen by Intel is 4, resulting in 2.05 cycles per
65# processed byte. As implied, this is quite optimistic estimate,
66# because it does not account for Karatsuba pre- and post-processing,
67# which for a single multiplication is ~5 cycles. Unfortunately Intel
68# does not provide performance data for GHASH alone. But benchmarking
69# AES_GCM_encrypt ripped out of Fig. 15 of the white paper with aadt
70# alone resulted in 2.46 cycles per byte of out 16KB buffer. Note that
71# the result accounts even for pre-computing of degrees of the hash
72# key H, but its portion is negligible at 16KB buffer size.
73#
74# Moving on to the implementation in question. Tmod is estimated as
75# ~13 cycles and Naggr is 2, giving asymptotic performance of ...
76# 2.16. How is it possible that measured performance is better than
77# optimistic theoretical estimate? There is one thing Intel failed
78# to recognize. By serializing GHASH with CTR in same subroutine
79# former's performance is really limited to above (Tmul + Tmod/Naggr)
80# equation. But if GHASH procedure is detached, the modulo-reduction
81# can be interleaved with Naggr-1 multiplications at instruction level
82# and under ideal conditions even disappear from the equation. So that
83# optimistic theoretical estimate for this implementation is ...
84# 28/16=1.75, and not 2.16. Well, it's probably way too optimistic,
85# at least for such small Naggr. I'd argue that (28+Tproc/Naggr),
86# where Tproc is time required for Karatsuba pre- and post-processing,
87# is more realistic estimate. In this case it gives ... 1.91 cycles.
88# Or in other words, depending on how well we can interleave reduction
89# and one of the two multiplications the performance should be betwen
90# 1.91 and 2.16. As already mentioned, this implementation processes
91# one byte out of 8KB buffer in 2.10 cycles, while x86_64 counterpart
92# - in 2.02. x86_64 performance is better, because larger register
93# bank allows to interleave reduction and multiplication better.
94#
95# Does it make sense to increase Naggr? To start with it's virtually
96# impossible in 32-bit mode, because of limited register bank
97# capacity. Otherwise improvement has to be weighed agiainst slower
98# setup, as well as code size and complexity increase. As even
99# optimistic estimate doesn't promise 30% performance improvement,
100# there are currently no plans to increase Naggr.
101#
102# Special thanks to David Woodhouse <dwmw2@infradead.org> for
103# providing access to a Westmere-based system on behalf of Intel
104# Open Source Technology Centre.
105
106# January 2010
107#
108# Tweaked to optimize transitions between integer and FP operations
109# on same XMM register, PCLMULQDQ subroutine was measured to process
110# one byte in 2.07 cycles on Sandy Bridge, and in 2.12 - on Westmere.
111# The minor regression on Westmere is outweighed by ~15% improvement
112# on Sandy Bridge. Strangely enough attempt to modify 64-bit code in
113# similar manner resulted in almost 20% degradation on Sandy Bridge,
114# where original 64-bit code processes one byte in 1.95 cycles.
115
116$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
117push(@INC,"${dir}","${dir}../../perlasm");
118require "x86asm.pl";
119
120&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
121
122$sse2=0;
123for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
124
125($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
126$inp = "edi";
127$Htbl = "esi";
128
129$unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse
130 # than unrolled, which has to be weighted against
131 # 2.5x x86-specific code size reduction.
132
133sub x86_loop {
134 my $off = shift;
135 my $rem = "eax";
136
137 &mov ($Zhh,&DWP(4,$Htbl,$Zll));
138 &mov ($Zhl,&DWP(0,$Htbl,$Zll));
139 &mov ($Zlh,&DWP(12,$Htbl,$Zll));
140 &mov ($Zll,&DWP(8,$Htbl,$Zll));
141 &xor ($rem,$rem); # avoid partial register stalls on PIII
142
143 # shrd practically kills P4, 2.5x deterioration, but P4 has
144 # MMX code-path to execute. shrd runs tad faster [than twice
145 # the shifts, move's and or's] on pre-MMX Pentium (as well as
146 # PIII and Core2), *but* minimizes code size, spares register
147 # and thus allows to fold the loop...
148 if (!$unroll) {
149 my $cnt = $inp;
150 &mov ($cnt,15);
151 &jmp (&label("x86_loop"));
152 &set_label("x86_loop",16);
153 for($i=1;$i<=2;$i++) {
154 &mov (&LB($rem),&LB($Zll));
155 &shrd ($Zll,$Zlh,4);
156 &and (&LB($rem),0xf);
157 &shrd ($Zlh,$Zhl,4);
158 &shrd ($Zhl,$Zhh,4);
159 &shr ($Zhh,4);
160 &xor ($Zhh,&DWP($off+16,"esp",$rem,4));
161
162 &mov (&LB($rem),&BP($off,"esp",$cnt));
163 if ($i&1) {
164 &and (&LB($rem),0xf0);
165 } else {
166 &shl (&LB($rem),4);
167 }
168
169 &xor ($Zll,&DWP(8,$Htbl,$rem));
170 &xor ($Zlh,&DWP(12,$Htbl,$rem));
171 &xor ($Zhl,&DWP(0,$Htbl,$rem));
172 &xor ($Zhh,&DWP(4,$Htbl,$rem));
173
174 if ($i&1) {
175 &dec ($cnt);
176 &js (&label("x86_break"));
177 } else {
178 &jmp (&label("x86_loop"));
179 }
180 }
181 &set_label("x86_break",16);
182 } else {
183 for($i=1;$i<32;$i++) {
184 &comment($i);
185 &mov (&LB($rem),&LB($Zll));
186 &shrd ($Zll,$Zlh,4);
187 &and (&LB($rem),0xf);
188 &shrd ($Zlh,$Zhl,4);
189 &shrd ($Zhl,$Zhh,4);
190 &shr ($Zhh,4);
191 &xor ($Zhh,&DWP($off+16,"esp",$rem,4));
192
193 if ($i&1) {
194 &mov (&LB($rem),&BP($off+15-($i>>1),"esp"));
195 &and (&LB($rem),0xf0);
196 } else {
197 &mov (&LB($rem),&BP($off+15-($i>>1),"esp"));
198 &shl (&LB($rem),4);
199 }
200
201 &xor ($Zll,&DWP(8,$Htbl,$rem));
202 &xor ($Zlh,&DWP(12,$Htbl,$rem));
203 &xor ($Zhl,&DWP(0,$Htbl,$rem));
204 &xor ($Zhh,&DWP(4,$Htbl,$rem));
205 }
206 }
207 &bswap ($Zll);
208 &bswap ($Zlh);
209 &bswap ($Zhl);
210 if (!$x86only) {
211 &bswap ($Zhh);
212 } else {
213 &mov ("eax",$Zhh);
214 &bswap ("eax");
215 &mov ($Zhh,"eax");
216 }
217}
218
219if ($unroll) {
220 &function_begin_B("_x86_gmult_4bit_inner");
221 &x86_loop(4);
222 &ret ();
223 &function_end_B("_x86_gmult_4bit_inner");
224}
225
226sub deposit_rem_4bit {
227 my $bias = shift;
228
229 &mov (&DWP($bias+0, "esp"),0x0000<<16);
230 &mov (&DWP($bias+4, "esp"),0x1C20<<16);
231 &mov (&DWP($bias+8, "esp"),0x3840<<16);
232 &mov (&DWP($bias+12,"esp"),0x2460<<16);
233 &mov (&DWP($bias+16,"esp"),0x7080<<16);
234 &mov (&DWP($bias+20,"esp"),0x6CA0<<16);
235 &mov (&DWP($bias+24,"esp"),0x48C0<<16);
236 &mov (&DWP($bias+28,"esp"),0x54E0<<16);
237 &mov (&DWP($bias+32,"esp"),0xE100<<16);
238 &mov (&DWP($bias+36,"esp"),0xFD20<<16);
239 &mov (&DWP($bias+40,"esp"),0xD940<<16);
240 &mov (&DWP($bias+44,"esp"),0xC560<<16);
241 &mov (&DWP($bias+48,"esp"),0x9180<<16);
242 &mov (&DWP($bias+52,"esp"),0x8DA0<<16);
243 &mov (&DWP($bias+56,"esp"),0xA9C0<<16);
244 &mov (&DWP($bias+60,"esp"),0xB5E0<<16);
245}
246
247$suffix = $x86only ? "" : "_x86";
248
249&function_begin("gcm_gmult_4bit".$suffix);
250 &stack_push(16+4+1); # +1 for stack alignment
251 &mov ($inp,&wparam(0)); # load Xi
252 &mov ($Htbl,&wparam(1)); # load Htable
253
254 &mov ($Zhh,&DWP(0,$inp)); # load Xi[16]
255 &mov ($Zhl,&DWP(4,$inp));
256 &mov ($Zlh,&DWP(8,$inp));
257 &mov ($Zll,&DWP(12,$inp));
258
259 &deposit_rem_4bit(16);
260
261 &mov (&DWP(0,"esp"),$Zhh); # copy Xi[16] on stack
262 &mov (&DWP(4,"esp"),$Zhl);
263 &mov (&DWP(8,"esp"),$Zlh);
264 &mov (&DWP(12,"esp"),$Zll);
265 &shr ($Zll,20);
266 &and ($Zll,0xf0);
267
268 if ($unroll) {
269 &call ("_x86_gmult_4bit_inner");
270 } else {
271 &x86_loop(0);
272 &mov ($inp,&wparam(0));
273 }
274
275 &mov (&DWP(12,$inp),$Zll);
276 &mov (&DWP(8,$inp),$Zlh);
277 &mov (&DWP(4,$inp),$Zhl);
278 &mov (&DWP(0,$inp),$Zhh);
279 &stack_pop(16+4+1);
280&function_end("gcm_gmult_4bit".$suffix);
281
282&function_begin("gcm_ghash_4bit".$suffix);
283 &stack_push(16+4+1); # +1 for 64-bit alignment
284 &mov ($Zll,&wparam(0)); # load Xi
285 &mov ($Htbl,&wparam(1)); # load Htable
286 &mov ($inp,&wparam(2)); # load in
287 &mov ("ecx",&wparam(3)); # load len
288 &add ("ecx",$inp);
289 &mov (&wparam(3),"ecx");
290
291 &mov ($Zhh,&DWP(0,$Zll)); # load Xi[16]
292 &mov ($Zhl,&DWP(4,$Zll));
293 &mov ($Zlh,&DWP(8,$Zll));
294 &mov ($Zll,&DWP(12,$Zll));
295
296 &deposit_rem_4bit(16);
297
298 &set_label("x86_outer_loop",16);
299 &xor ($Zll,&DWP(12,$inp)); # xor with input
300 &xor ($Zlh,&DWP(8,$inp));
301 &xor ($Zhl,&DWP(4,$inp));
302 &xor ($Zhh,&DWP(0,$inp));
303 &mov (&DWP(12,"esp"),$Zll); # dump it on stack
304 &mov (&DWP(8,"esp"),$Zlh);
305 &mov (&DWP(4,"esp"),$Zhl);
306 &mov (&DWP(0,"esp"),$Zhh);
307
308 &shr ($Zll,20);
309 &and ($Zll,0xf0);
310
311 if ($unroll) {
312 &call ("_x86_gmult_4bit_inner");
313 } else {
314 &x86_loop(0);
315 &mov ($inp,&wparam(2));
316 }
317 &lea ($inp,&DWP(16,$inp));
318 &cmp ($inp,&wparam(3));
319 &mov (&wparam(2),$inp) if (!$unroll);
320 &jb (&label("x86_outer_loop"));
321
322 &mov ($inp,&wparam(0)); # load Xi
323 &mov (&DWP(12,$inp),$Zll);
324 &mov (&DWP(8,$inp),$Zlh);
325 &mov (&DWP(4,$inp),$Zhl);
326 &mov (&DWP(0,$inp),$Zhh);
327 &stack_pop(16+4+1);
328&function_end("gcm_ghash_4bit".$suffix);
329
330if (!$x86only) {{{
331
332&static_label("rem_4bit");
333
334if (!$sse2) {{ # pure-MMX "May" version...
335
336$S=12; # shift factor for rem_4bit
337
338&function_begin_B("_mmx_gmult_4bit_inner");
339# MMX version performs 3.5 times better on P4 (see comment in non-MMX
340# routine for further details), 100% better on Opteron, ~70% better
341# on Core2 and PIII... In other words effort is considered to be well
342# spent... Since initial release the loop was unrolled in order to
343# "liberate" register previously used as loop counter. Instead it's
344# used to optimize critical path in 'Z.hi ^= rem_4bit[Z.lo&0xf]'.
345# The path involves move of Z.lo from MMX to integer register,
346# effective address calculation and finally merge of value to Z.hi.
347# Reference to rem_4bit is scheduled so late that I had to >>4
348# rem_4bit elements. This resulted in 20-45% procent improvement
349# on contemporary µ-archs.
350{
351 my $cnt;
352 my $rem_4bit = "eax";
353 my @rem = ($Zhh,$Zll);
354 my $nhi = $Zhl;
355 my $nlo = $Zlh;
356
357 my ($Zlo,$Zhi) = ("mm0","mm1");
358 my $tmp = "mm2";
359
360 &xor ($nlo,$nlo); # avoid partial register stalls on PIII
361 &mov ($nhi,$Zll);
362 &mov (&LB($nlo),&LB($nhi));
363 &shl (&LB($nlo),4);
364 &and ($nhi,0xf0);
365 &movq ($Zlo,&QWP(8,$Htbl,$nlo));
366 &movq ($Zhi,&QWP(0,$Htbl,$nlo));
367 &movd ($rem[0],$Zlo);
368
369 for ($cnt=28;$cnt>=-2;$cnt--) {
370 my $odd = $cnt&1;
371 my $nix = $odd ? $nlo : $nhi;
372
373 &shl (&LB($nlo),4) if ($odd);
374 &psrlq ($Zlo,4);
375 &movq ($tmp,$Zhi);
376 &psrlq ($Zhi,4);
377 &pxor ($Zlo,&QWP(8,$Htbl,$nix));
378 &mov (&LB($nlo),&BP($cnt/2,$inp)) if (!$odd && $cnt>=0);
379 &psllq ($tmp,60);
380 &and ($nhi,0xf0) if ($odd);
381 &pxor ($Zhi,&QWP(0,$rem_4bit,$rem[1],8)) if ($cnt<28);
382 &and ($rem[0],0xf);
383 &pxor ($Zhi,&QWP(0,$Htbl,$nix));
384 &mov ($nhi,$nlo) if (!$odd && $cnt>=0);
385 &movd ($rem[1],$Zlo);
386 &pxor ($Zlo,$tmp);
387
388 push (@rem,shift(@rem)); # "rotate" registers
389 }
390
391 &mov ($inp,&DWP(4,$rem_4bit,$rem[1],8)); # last rem_4bit[rem]
392
393 &psrlq ($Zlo,32); # lower part of Zlo is already there
394 &movd ($Zhl,$Zhi);
395 &psrlq ($Zhi,32);
396 &movd ($Zlh,$Zlo);
397 &movd ($Zhh,$Zhi);
398 &shl ($inp,4); # compensate for rem_4bit[i] being >>4
399
400 &bswap ($Zll);
401 &bswap ($Zhl);
402 &bswap ($Zlh);
403 &xor ($Zhh,$inp);
404 &bswap ($Zhh);
405
406 &ret ();
407}
408&function_end_B("_mmx_gmult_4bit_inner");
409
410&function_begin("gcm_gmult_4bit_mmx");
411 &mov ($inp,&wparam(0)); # load Xi
412 &mov ($Htbl,&wparam(1)); # load Htable
413
414 &call (&label("pic_point"));
415 &set_label("pic_point");
416 &blindpop("eax");
417 &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
418
419 &movz ($Zll,&BP(15,$inp));
420
421 &call ("_mmx_gmult_4bit_inner");
422
423 &mov ($inp,&wparam(0)); # load Xi
424 &emms ();
425 &mov (&DWP(12,$inp),$Zll);
426 &mov (&DWP(4,$inp),$Zhl);
427 &mov (&DWP(8,$inp),$Zlh);
428 &mov (&DWP(0,$inp),$Zhh);
429&function_end("gcm_gmult_4bit_mmx");
430
431# Streamed version performs 20% better on P4, 7% on Opteron,
432# 10% on Core2 and PIII...
433&function_begin("gcm_ghash_4bit_mmx");
434 &mov ($Zhh,&wparam(0)); # load Xi
435 &mov ($Htbl,&wparam(1)); # load Htable
436 &mov ($inp,&wparam(2)); # load in
437 &mov ($Zlh,&wparam(3)); # load len
438
439 &call (&label("pic_point"));
440 &set_label("pic_point");
441 &blindpop("eax");
442 &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
443
444 &add ($Zlh,$inp);
445 &mov (&wparam(3),$Zlh); # len to point at the end of input
446 &stack_push(4+1); # +1 for stack alignment
447
448 &mov ($Zll,&DWP(12,$Zhh)); # load Xi[16]
449 &mov ($Zhl,&DWP(4,$Zhh));
450 &mov ($Zlh,&DWP(8,$Zhh));
451 &mov ($Zhh,&DWP(0,$Zhh));
452 &jmp (&label("mmx_outer_loop"));
453
454 &set_label("mmx_outer_loop",16);
455 &xor ($Zll,&DWP(12,$inp));
456 &xor ($Zhl,&DWP(4,$inp));
457 &xor ($Zlh,&DWP(8,$inp));
458 &xor ($Zhh,&DWP(0,$inp));
459 &mov (&wparam(2),$inp);
460 &mov (&DWP(12,"esp"),$Zll);
461 &mov (&DWP(4,"esp"),$Zhl);
462 &mov (&DWP(8,"esp"),$Zlh);
463 &mov (&DWP(0,"esp"),$Zhh);
464
465 &mov ($inp,"esp");
466 &shr ($Zll,24);
467
468 &call ("_mmx_gmult_4bit_inner");
469
470 &mov ($inp,&wparam(2));
471 &lea ($inp,&DWP(16,$inp));
472 &cmp ($inp,&wparam(3));
473 &jb (&label("mmx_outer_loop"));
474
475 &mov ($inp,&wparam(0)); # load Xi
476 &emms ();
477 &mov (&DWP(12,$inp),$Zll);
478 &mov (&DWP(4,$inp),$Zhl);
479 &mov (&DWP(8,$inp),$Zlh);
480 &mov (&DWP(0,$inp),$Zhh);
481
482 &stack_pop(4+1);
483&function_end("gcm_ghash_4bit_mmx");
484
485}} else {{ # "June" MMX version...
486 # ... has slower "April" gcm_gmult_4bit_mmx with folded
487 # loop. This is done to conserve code size...
488$S=16; # shift factor for rem_4bit
489
490sub mmx_loop() {
491# MMX version performs 2.8 times better on P4 (see comment in non-MMX
492# routine for further details), 40% better on Opteron and Core2, 50%
493# better on PIII... In other words effort is considered to be well
494# spent...
495 my $inp = shift;
496 my $rem_4bit = shift;
497 my $cnt = $Zhh;
498 my $nhi = $Zhl;
499 my $nlo = $Zlh;
500 my $rem = $Zll;
501
502 my ($Zlo,$Zhi) = ("mm0","mm1");
503 my $tmp = "mm2";
504
505 &xor ($nlo,$nlo); # avoid partial register stalls on PIII
506 &mov ($nhi,$Zll);
507 &mov (&LB($nlo),&LB($nhi));
508 &mov ($cnt,14);
509 &shl (&LB($nlo),4);
510 &and ($nhi,0xf0);
511 &movq ($Zlo,&QWP(8,$Htbl,$nlo));
512 &movq ($Zhi,&QWP(0,$Htbl,$nlo));
513 &movd ($rem,$Zlo);
514 &jmp (&label("mmx_loop"));
515
516 &set_label("mmx_loop",16);
517 &psrlq ($Zlo,4);
518 &and ($rem,0xf);
519 &movq ($tmp,$Zhi);
520 &psrlq ($Zhi,4);
521 &pxor ($Zlo,&QWP(8,$Htbl,$nhi));
522 &mov (&LB($nlo),&BP(0,$inp,$cnt));
523 &psllq ($tmp,60);
524 &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
525 &dec ($cnt);
526 &movd ($rem,$Zlo);
527 &pxor ($Zhi,&QWP(0,$Htbl,$nhi));
528 &mov ($nhi,$nlo);
529 &pxor ($Zlo,$tmp);
530 &js (&label("mmx_break"));
531
532 &shl (&LB($nlo),4);
533 &and ($rem,0xf);
534 &psrlq ($Zlo,4);
535 &and ($nhi,0xf0);
536 &movq ($tmp,$Zhi);
537 &psrlq ($Zhi,4);
538 &pxor ($Zlo,&QWP(8,$Htbl,$nlo));
539 &psllq ($tmp,60);
540 &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
541 &movd ($rem,$Zlo);
542 &pxor ($Zhi,&QWP(0,$Htbl,$nlo));
543 &pxor ($Zlo,$tmp);
544 &jmp (&label("mmx_loop"));
545
546 &set_label("mmx_break",16);
547 &shl (&LB($nlo),4);
548 &and ($rem,0xf);
549 &psrlq ($Zlo,4);
550 &and ($nhi,0xf0);
551 &movq ($tmp,$Zhi);
552 &psrlq ($Zhi,4);
553 &pxor ($Zlo,&QWP(8,$Htbl,$nlo));
554 &psllq ($tmp,60);
555 &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
556 &movd ($rem,$Zlo);
557 &pxor ($Zhi,&QWP(0,$Htbl,$nlo));
558 &pxor ($Zlo,$tmp);
559
560 &psrlq ($Zlo,4);
561 &and ($rem,0xf);
562 &movq ($tmp,$Zhi);
563 &psrlq ($Zhi,4);
564 &pxor ($Zlo,&QWP(8,$Htbl,$nhi));
565 &psllq ($tmp,60);
566 &pxor ($Zhi,&QWP(0,$rem_4bit,$rem,8));
567 &movd ($rem,$Zlo);
568 &pxor ($Zhi,&QWP(0,$Htbl,$nhi));
569 &pxor ($Zlo,$tmp);
570
571 &psrlq ($Zlo,32); # lower part of Zlo is already there
572 &movd ($Zhl,$Zhi);
573 &psrlq ($Zhi,32);
574 &movd ($Zlh,$Zlo);
575 &movd ($Zhh,$Zhi);
576
577 &bswap ($Zll);
578 &bswap ($Zhl);
579 &bswap ($Zlh);
580 &bswap ($Zhh);
581}
582
583&function_begin("gcm_gmult_4bit_mmx");
584 &mov ($inp,&wparam(0)); # load Xi
585 &mov ($Htbl,&wparam(1)); # load Htable
586
587 &call (&label("pic_point"));
588 &set_label("pic_point");
589 &blindpop("eax");
590 &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
591
592 &movz ($Zll,&BP(15,$inp));
593
594 &mmx_loop($inp,"eax");
595
596 &emms ();
597 &mov (&DWP(12,$inp),$Zll);
598 &mov (&DWP(4,$inp),$Zhl);
599 &mov (&DWP(8,$inp),$Zlh);
600 &mov (&DWP(0,$inp),$Zhh);
601&function_end("gcm_gmult_4bit_mmx");
602
603######################################################################
604# Below subroutine is "528B" variant of "4-bit" GCM GHASH function
605# (see gcm128.c for details). It provides further 20-40% performance
606# improvement over above mentioned "May" version.
607
608&static_label("rem_8bit");
609
610&function_begin("gcm_ghash_4bit_mmx");
611{ my ($Zlo,$Zhi) = ("mm7","mm6");
612 my $rem_8bit = "esi";
613 my $Htbl = "ebx";
614
615 # parameter block
616 &mov ("eax",&wparam(0)); # Xi
617 &mov ("ebx",&wparam(1)); # Htable
618 &mov ("ecx",&wparam(2)); # inp
619 &mov ("edx",&wparam(3)); # len
620 &mov ("ebp","esp"); # original %esp
621 &call (&label("pic_point"));
622 &set_label ("pic_point");
623 &blindpop ($rem_8bit);
624 &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit));
625
626 &sub ("esp",512+16+16); # allocate stack frame...
627 &and ("esp",-64); # ...and align it
628 &sub ("esp",16); # place for (u8)(H[]<<4)
629
630 &add ("edx","ecx"); # pointer to the end of input
631 &mov (&DWP(528+16+0,"esp"),"eax"); # save Xi
632 &mov (&DWP(528+16+8,"esp"),"edx"); # save inp+len
633 &mov (&DWP(528+16+12,"esp"),"ebp"); # save original %esp
634
635 { my @lo = ("mm0","mm1","mm2");
636 my @hi = ("mm3","mm4","mm5");
637 my @tmp = ("mm6","mm7");
638 my $off1=0,$off2=0,$i;
639
640 &add ($Htbl,128); # optimize for size
641 &lea ("edi",&DWP(16+128,"esp"));
642 &lea ("ebp",&DWP(16+256+128,"esp"));
643
644 # decompose Htable (low and high parts are kept separately),
645 # generate Htable[]>>4, (u8)(Htable[]<<4), save to stack...
646 for ($i=0;$i<18;$i++) {
647
648 &mov ("edx",&DWP(16*$i+8-128,$Htbl)) if ($i<16);
649 &movq ($lo[0],&QWP(16*$i+8-128,$Htbl)) if ($i<16);
650 &psllq ($tmp[1],60) if ($i>1);
651 &movq ($hi[0],&QWP(16*$i+0-128,$Htbl)) if ($i<16);
652 &por ($lo[2],$tmp[1]) if ($i>1);
653 &movq (&QWP($off1-128,"edi"),$lo[1]) if ($i>0 && $i<17);
654 &psrlq ($lo[1],4) if ($i>0 && $i<17);
655 &movq (&QWP($off1,"edi"),$hi[1]) if ($i>0 && $i<17);
656 &movq ($tmp[0],$hi[1]) if ($i>0 && $i<17);
657 &movq (&QWP($off2-128,"ebp"),$lo[2]) if ($i>1);
658 &psrlq ($hi[1],4) if ($i>0 && $i<17);
659 &movq (&QWP($off2,"ebp"),$hi[2]) if ($i>1);
660 &shl ("edx",4) if ($i<16);
661 &mov (&BP($i,"esp"),&LB("edx")) if ($i<16);
662
663 unshift (@lo,pop(@lo)); # "rotate" registers
664 unshift (@hi,pop(@hi));
665 unshift (@tmp,pop(@tmp));
666 $off1 += 8 if ($i>0);
667 $off2 += 8 if ($i>1);
668 }
669 }
670
671 &movq ($Zhi,&QWP(0,"eax"));
672 &mov ("ebx",&DWP(8,"eax"));
673 &mov ("edx",&DWP(12,"eax")); # load Xi
674
675&set_label("outer",16);
676 { my $nlo = "eax";
677 my $dat = "edx";
678 my @nhi = ("edi","ebp");
679 my @rem = ("ebx","ecx");
680 my @red = ("mm0","mm1","mm2");
681 my $tmp = "mm3";
682
683 &xor ($dat,&DWP(12,"ecx")); # merge input data
684 &xor ("ebx",&DWP(8,"ecx"));
685 &pxor ($Zhi,&QWP(0,"ecx"));
686 &lea ("ecx",&DWP(16,"ecx")); # inp+=16
687 #&mov (&DWP(528+12,"esp"),$dat); # save inp^Xi
688 &mov (&DWP(528+8,"esp"),"ebx");
689 &movq (&QWP(528+0,"esp"),$Zhi);
690 &mov (&DWP(528+16+4,"esp"),"ecx"); # save inp
691
692 &xor ($nlo,$nlo);
693 &rol ($dat,8);
694 &mov (&LB($nlo),&LB($dat));
695 &mov ($nhi[1],$nlo);
696 &and (&LB($nlo),0x0f);
697 &shr ($nhi[1],4);
698 &pxor ($red[0],$red[0]);
699 &rol ($dat,8); # next byte
700 &pxor ($red[1],$red[1]);
701 &pxor ($red[2],$red[2]);
702
703 # Just like in "May" verson modulo-schedule for critical path in
704 # 'Z.hi ^= rem_8bit[Z.lo&0xff^((u8)H[nhi]<<4)]<<48'. Final 'pxor'
705 # is scheduled so late that rem_8bit[] has to be shifted *right*
706 # by 16, which is why last argument to pinsrw is 2, which
707 # corresponds to <<32=<<48>>16...
708 for ($j=11,$i=0;$i<15;$i++) {
709
710 if ($i>0) {
711 &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo]
712 &rol ($dat,8); # next byte
713 &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8));
714
715 &pxor ($Zlo,$tmp);
716 &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8));
717 &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4)
718 } else {
719 &movq ($Zlo,&QWP(16,"esp",$nlo,8));
720 &movq ($Zhi,&QWP(16+128,"esp",$nlo,8));
721 }
722
723 &mov (&LB($nlo),&LB($dat));
724 &mov ($dat,&DWP(528+$j,"esp")) if (--$j%4==0);
725
726 &movd ($rem[0],$Zlo);
727 &movz ($rem[1],&LB($rem[1])) if ($i>0);
728 &psrlq ($Zlo,8); # Z>>=8
729
730 &movq ($tmp,$Zhi);
731 &mov ($nhi[0],$nlo);
732 &psrlq ($Zhi,8);
733
734 &pxor ($Zlo,&QWP(16+256+0,"esp",$nhi[1],8)); # Z^=H[nhi]>>4
735 &and (&LB($nlo),0x0f);
736 &psllq ($tmp,56);
737
738 &pxor ($Zhi,$red[1]) if ($i>1);
739 &shr ($nhi[0],4);
740 &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2) if ($i>0);
741
742 unshift (@red,pop(@red)); # "rotate" registers
743 unshift (@rem,pop(@rem));
744 unshift (@nhi,pop(@nhi));
745 }
746
747 &pxor ($Zlo,&QWP(16,"esp",$nlo,8)); # Z^=H[nlo]
748 &pxor ($Zhi,&QWP(16+128,"esp",$nlo,8));
749 &xor (&LB($rem[1]),&BP(0,"esp",$nhi[0])); # rem^(H[nhi]<<4)
750
751 &pxor ($Zlo,$tmp);
752 &pxor ($Zhi,&QWP(16+256+128,"esp",$nhi[0],8));
753 &movz ($rem[1],&LB($rem[1]));
754
755 &pxor ($red[2],$red[2]); # clear 2nd word
756 &psllq ($red[1],4);
757
758 &movd ($rem[0],$Zlo);
759 &psrlq ($Zlo,4); # Z>>=4
760
761 &movq ($tmp,$Zhi);
762 &psrlq ($Zhi,4);
763 &shl ($rem[0],4); # rem<<4
764
765 &pxor ($Zlo,&QWP(16,"esp",$nhi[1],8)); # Z^=H[nhi]
766 &psllq ($tmp,60);
767 &movz ($rem[0],&LB($rem[0]));
768
769 &pxor ($Zlo,$tmp);
770 &pxor ($Zhi,&QWP(16+128,"esp",$nhi[1],8));
771
772 &pinsrw ($red[0],&WP(0,$rem_8bit,$rem[1],2),2);
773 &pxor ($Zhi,$red[1]);
774
775 &movd ($dat,$Zlo);
776 &pinsrw ($red[2],&WP(0,$rem_8bit,$rem[0],2),3); # last is <<48
777
778 &psllq ($red[0],12); # correct by <<16>>4
779 &pxor ($Zhi,$red[0]);
780 &psrlq ($Zlo,32);
781 &pxor ($Zhi,$red[2]);
782
783 &mov ("ecx",&DWP(528+16+4,"esp")); # restore inp
784 &movd ("ebx",$Zlo);
785 &movq ($tmp,$Zhi); # 01234567
786 &psllw ($Zhi,8); # 1.3.5.7.
787 &psrlw ($tmp,8); # .0.2.4.6
788 &por ($Zhi,$tmp); # 10325476
789 &bswap ($dat);
790 &pshufw ($Zhi,$Zhi,0b00011011); # 76543210
791 &bswap ("ebx");
792
793 &cmp ("ecx",&DWP(528+16+8,"esp")); # are we done?
794 &jne (&label("outer"));
795 }
796
797 &mov ("eax",&DWP(528+16+0,"esp")); # restore Xi
798 &mov (&DWP(12,"eax"),"edx");
799 &mov (&DWP(8,"eax"),"ebx");
800 &movq (&QWP(0,"eax"),$Zhi);
801
802 &mov ("esp",&DWP(528+16+12,"esp")); # restore original %esp
803 &emms ();
804}
805&function_end("gcm_ghash_4bit_mmx");
806}}
807
808if ($sse2) {{
809######################################################################
810# PCLMULQDQ version.
811
812$Xip="eax";
813$Htbl="edx";
814$const="ecx";
815$inp="esi";
816$len="ebx";
817
818($Xi,$Xhi)=("xmm0","xmm1"); $Hkey="xmm2";
819($T1,$T2,$T3)=("xmm3","xmm4","xmm5");
820($Xn,$Xhn)=("xmm6","xmm7");
821
822&static_label("bswap");
823
824sub clmul64x64_T2 { # minimal "register" pressure
825my ($Xhi,$Xi,$Hkey)=@_;
826
827 &movdqa ($Xhi,$Xi); #
828 &pshufd ($T1,$Xi,0b01001110);
829 &pshufd ($T2,$Hkey,0b01001110);
830 &pxor ($T1,$Xi); #
831 &pxor ($T2,$Hkey);
832
833 &pclmulqdq ($Xi,$Hkey,0x00); #######
834 &pclmulqdq ($Xhi,$Hkey,0x11); #######
835 &pclmulqdq ($T1,$T2,0x00); #######
836 &xorps ($T1,$Xi); #
837 &xorps ($T1,$Xhi); #
838
839 &movdqa ($T2,$T1); #
840 &psrldq ($T1,8);
841 &pslldq ($T2,8); #
842 &pxor ($Xhi,$T1);
843 &pxor ($Xi,$T2); #
844}
845
846sub clmul64x64_T3 {
847# Even though this subroutine offers visually better ILP, it
848# was empirically found to be a tad slower than above version.
849# At least in gcm_ghash_clmul context. But it's just as well,
850# because loop modulo-scheduling is possible only thanks to
851# minimized "register" pressure...
852my ($Xhi,$Xi,$Hkey)=@_;
853
854 &movdqa ($T1,$Xi); #
855 &movdqa ($Xhi,$Xi);
856 &pclmulqdq ($Xi,$Hkey,0x00); #######
857 &pclmulqdq ($Xhi,$Hkey,0x11); #######
858 &pshufd ($T2,$T1,0b01001110); #
859 &pshufd ($T3,$Hkey,0b01001110);
860 &pxor ($T2,$T1); #
861 &pxor ($T3,$Hkey);
862 &pclmulqdq ($T2,$T3,0x00); #######
863 &pxor ($T2,$Xi); #
864 &pxor ($T2,$Xhi); #
865
866 &movdqa ($T3,$T2); #
867 &psrldq ($T2,8);
868 &pslldq ($T3,8); #
869 &pxor ($Xhi,$T2);
870 &pxor ($Xi,$T3); #
871}
872
873if (1) { # Algorithm 9 with <<1 twist.
874 # Reduction is shorter and uses only two
875 # temporary registers, which makes it better
876 # candidate for interleaving with 64x64
877 # multiplication. Pre-modulo-scheduled loop
878 # was found to be ~20% faster than Algorithm 5
879 # below. Algorithm 9 was therefore chosen for
880 # further optimization...
881
882sub reduction_alg9 { # 17/13 times faster than Intel version
883my ($Xhi,$Xi) = @_;
884
885 # 1st phase
886 &movdqa ($T1,$Xi) #
887 &psllq ($Xi,1);
888 &pxor ($Xi,$T1); #
889 &psllq ($Xi,5); #
890 &pxor ($Xi,$T1); #
891 &psllq ($Xi,57); #
892 &movdqa ($T2,$Xi); #
893 &pslldq ($Xi,8);
894 &psrldq ($T2,8); #
895 &pxor ($Xi,$T1);
896 &pxor ($Xhi,$T2); #
897
898 # 2nd phase
899 &movdqa ($T2,$Xi);
900 &psrlq ($Xi,5);
901 &pxor ($Xi,$T2); #
902 &psrlq ($Xi,1); #
903 &pxor ($Xi,$T2); #
904 &pxor ($T2,$Xhi);
905 &psrlq ($Xi,1); #
906 &pxor ($Xi,$T2); #
907}
908
909&function_begin_B("gcm_init_clmul");
910 &mov ($Htbl,&wparam(0));
911 &mov ($Xip,&wparam(1));
912
913 &call (&label("pic"));
914&set_label("pic");
915 &blindpop ($const);
916 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
917
918 &movdqu ($Hkey,&QWP(0,$Xip));
919 &pshufd ($Hkey,$Hkey,0b01001110);# dword swap
920
921 # <<1 twist
922 &pshufd ($T2,$Hkey,0b11111111); # broadcast uppermost dword
923 &movdqa ($T1,$Hkey);
924 &psllq ($Hkey,1);
925 &pxor ($T3,$T3); #
926 &psrlq ($T1,63);
927 &pcmpgtd ($T3,$T2); # broadcast carry bit
928 &pslldq ($T1,8);
929 &por ($Hkey,$T1); # H<<=1
930
931 # magic reduction
932 &pand ($T3,&QWP(16,$const)); # 0x1c2_polynomial
933 &pxor ($Hkey,$T3); # if(carry) H^=0x1c2_polynomial
934
935 # calculate H^2
936 &movdqa ($Xi,$Hkey);
937 &clmul64x64_T2 ($Xhi,$Xi,$Hkey);
938 &reduction_alg9 ($Xhi,$Xi);
939
940 &movdqu (&QWP(0,$Htbl),$Hkey); # save H
941 &movdqu (&QWP(16,$Htbl),$Xi); # save H^2
942
943 &ret ();
944&function_end_B("gcm_init_clmul");
945
946&function_begin_B("gcm_gmult_clmul");
947 &mov ($Xip,&wparam(0));
948 &mov ($Htbl,&wparam(1));
949
950 &call (&label("pic"));
951&set_label("pic");
952 &blindpop ($const);
953 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
954
955 &movdqu ($Xi,&QWP(0,$Xip));
956 &movdqa ($T3,&QWP(0,$const));
957 &movups ($Hkey,&QWP(0,$Htbl));
958 &pshufb ($Xi,$T3);
959
960 &clmul64x64_T2 ($Xhi,$Xi,$Hkey);
961 &reduction_alg9 ($Xhi,$Xi);
962
963 &pshufb ($Xi,$T3);
964 &movdqu (&QWP(0,$Xip),$Xi);
965
966 &ret ();
967&function_end_B("gcm_gmult_clmul");
968
969&function_begin("gcm_ghash_clmul");
970 &mov ($Xip,&wparam(0));
971 &mov ($Htbl,&wparam(1));
972 &mov ($inp,&wparam(2));
973 &mov ($len,&wparam(3));
974
975 &call (&label("pic"));
976&set_label("pic");
977 &blindpop ($const);
978 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
979
980 &movdqu ($Xi,&QWP(0,$Xip));
981 &movdqa ($T3,&QWP(0,$const));
982 &movdqu ($Hkey,&QWP(0,$Htbl));
983 &pshufb ($Xi,$T3);
984
985 &sub ($len,0x10);
986 &jz (&label("odd_tail"));
987
988 #######
989 # Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
990 # [(H*Ii+1) + (H*Xi+1)] mod P =
991 # [(H*Ii+1) + H^2*(Ii+Xi)] mod P
992 #
993 &movdqu ($T1,&QWP(0,$inp)); # Ii
994 &movdqu ($Xn,&QWP(16,$inp)); # Ii+1
995 &pshufb ($T1,$T3);
996 &pshufb ($Xn,$T3);
997 &pxor ($Xi,$T1); # Ii+Xi
998
999 &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1
1000 &movups ($Hkey,&QWP(16,$Htbl)); # load H^2
1001
1002 &lea ($inp,&DWP(32,$inp)); # i+=2
1003 &sub ($len,0x20);
1004 &jbe (&label("even_tail"));
1005
1006&set_label("mod_loop");
1007 &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi)
1008 &movdqu ($T1,&QWP(0,$inp)); # Ii
1009 &movups ($Hkey,&QWP(0,$Htbl)); # load H
1010
1011 &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi)
1012 &pxor ($Xhi,$Xhn);
1013
1014 &movdqu ($Xn,&QWP(16,$inp)); # Ii+1
1015 &pshufb ($T1,$T3);
1016 &pshufb ($Xn,$T3);
1017
1018 &movdqa ($T3,$Xn); #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1
1019 &movdqa ($Xhn,$Xn);
1020 &pxor ($Xhi,$T1); # "Ii+Xi", consume early
1021
1022 &movdqa ($T1,$Xi) #&reduction_alg9($Xhi,$Xi); 1st phase
1023 &psllq ($Xi,1);
1024 &pxor ($Xi,$T1); #
1025 &psllq ($Xi,5); #
1026 &pxor ($Xi,$T1); #
1027 &pclmulqdq ($Xn,$Hkey,0x00); #######
1028 &psllq ($Xi,57); #
1029 &movdqa ($T2,$Xi); #
1030 &pslldq ($Xi,8);
1031 &psrldq ($T2,8); #
1032 &pxor ($Xi,$T1);
1033 &pshufd ($T1,$T3,0b01001110);
1034 &pxor ($Xhi,$T2); #
1035 &pxor ($T1,$T3);
1036 &pshufd ($T3,$Hkey,0b01001110);
1037 &pxor ($T3,$Hkey); #
1038
1039 &pclmulqdq ($Xhn,$Hkey,0x11); #######
1040 &movdqa ($T2,$Xi); # 2nd phase
1041 &psrlq ($Xi,5);
1042 &pxor ($Xi,$T2); #
1043 &psrlq ($Xi,1); #
1044 &pxor ($Xi,$T2); #
1045 &pxor ($T2,$Xhi);
1046 &psrlq ($Xi,1); #
1047 &pxor ($Xi,$T2); #
1048
1049 &pclmulqdq ($T1,$T3,0x00); #######
1050 &movups ($Hkey,&QWP(16,$Htbl)); # load H^2
1051 &xorps ($T1,$Xn); #
1052 &xorps ($T1,$Xhn); #
1053
1054 &movdqa ($T3,$T1); #
1055 &psrldq ($T1,8);
1056 &pslldq ($T3,8); #
1057 &pxor ($Xhn,$T1);
1058 &pxor ($Xn,$T3); #
1059 &movdqa ($T3,&QWP(0,$const));
1060
1061 &lea ($inp,&DWP(32,$inp));
1062 &sub ($len,0x20);
1063 &ja (&label("mod_loop"));
1064
1065&set_label("even_tail");
1066 &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi)
1067
1068 &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi)
1069 &pxor ($Xhi,$Xhn);
1070
1071 &reduction_alg9 ($Xhi,$Xi);
1072
1073 &test ($len,$len);
1074 &jnz (&label("done"));
1075
1076 &movups ($Hkey,&QWP(0,$Htbl)); # load H
1077&set_label("odd_tail");
1078 &movdqu ($T1,&QWP(0,$inp)); # Ii
1079 &pshufb ($T1,$T3);
1080 &pxor ($Xi,$T1); # Ii+Xi
1081
1082 &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi)
1083 &reduction_alg9 ($Xhi,$Xi);
1084
1085&set_label("done");
1086 &pshufb ($Xi,$T3);
1087 &movdqu (&QWP(0,$Xip),$Xi);
1088&function_end("gcm_ghash_clmul");
1089
1090} else { # Algorith 5. Kept for reference purposes.
1091
1092sub reduction_alg5 { # 19/16 times faster than Intel version
1093my ($Xhi,$Xi)=@_;
1094
1095 # <<1
1096 &movdqa ($T1,$Xi); #
1097 &movdqa ($T2,$Xhi);
1098 &pslld ($Xi,1);
1099 &pslld ($Xhi,1); #
1100 &psrld ($T1,31);
1101 &psrld ($T2,31); #
1102 &movdqa ($T3,$T1);
1103 &pslldq ($T1,4);
1104 &psrldq ($T3,12); #
1105 &pslldq ($T2,4);
1106 &por ($Xhi,$T3); #
1107 &por ($Xi,$T1);
1108 &por ($Xhi,$T2); #
1109
1110 # 1st phase
1111 &movdqa ($T1,$Xi);
1112 &movdqa ($T2,$Xi);
1113 &movdqa ($T3,$Xi); #
1114 &pslld ($T1,31);
1115 &pslld ($T2,30);
1116 &pslld ($Xi,25); #
1117 &pxor ($T1,$T2);
1118 &pxor ($T1,$Xi); #
1119 &movdqa ($T2,$T1); #
1120 &pslldq ($T1,12);
1121 &psrldq ($T2,4); #
1122 &pxor ($T3,$T1);
1123
1124 # 2nd phase
1125 &pxor ($Xhi,$T3); #
1126 &movdqa ($Xi,$T3);
1127 &movdqa ($T1,$T3);
1128 &psrld ($Xi,1); #
1129 &psrld ($T1,2);
1130 &psrld ($T3,7); #
1131 &pxor ($Xi,$T1);
1132 &pxor ($Xhi,$T2);
1133 &pxor ($Xi,$T3); #
1134 &pxor ($Xi,$Xhi); #
1135}
1136
1137&function_begin_B("gcm_init_clmul");
1138 &mov ($Htbl,&wparam(0));
1139 &mov ($Xip,&wparam(1));
1140
1141 &call (&label("pic"));
1142&set_label("pic");
1143 &blindpop ($const);
1144 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
1145
1146 &movdqu ($Hkey,&QWP(0,$Xip));
1147 &pshufd ($Hkey,$Hkey,0b01001110);# dword swap
1148
1149 # calculate H^2
1150 &movdqa ($Xi,$Hkey);
1151 &clmul64x64_T3 ($Xhi,$Xi,$Hkey);
1152 &reduction_alg5 ($Xhi,$Xi);
1153
1154 &movdqu (&QWP(0,$Htbl),$Hkey); # save H
1155 &movdqu (&QWP(16,$Htbl),$Xi); # save H^2
1156
1157 &ret ();
1158&function_end_B("gcm_init_clmul");
1159
1160&function_begin_B("gcm_gmult_clmul");
1161 &mov ($Xip,&wparam(0));
1162 &mov ($Htbl,&wparam(1));
1163
1164 &call (&label("pic"));
1165&set_label("pic");
1166 &blindpop ($const);
1167 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
1168
1169 &movdqu ($Xi,&QWP(0,$Xip));
1170 &movdqa ($Xn,&QWP(0,$const));
1171 &movdqu ($Hkey,&QWP(0,$Htbl));
1172 &pshufb ($Xi,$Xn);
1173
1174 &clmul64x64_T3 ($Xhi,$Xi,$Hkey);
1175 &reduction_alg5 ($Xhi,$Xi);
1176
1177 &pshufb ($Xi,$Xn);
1178 &movdqu (&QWP(0,$Xip),$Xi);
1179
1180 &ret ();
1181&function_end_B("gcm_gmult_clmul");
1182
1183&function_begin("gcm_ghash_clmul");
1184 &mov ($Xip,&wparam(0));
1185 &mov ($Htbl,&wparam(1));
1186 &mov ($inp,&wparam(2));
1187 &mov ($len,&wparam(3));
1188
1189 &call (&label("pic"));
1190&set_label("pic");
1191 &blindpop ($const);
1192 &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
1193
1194 &movdqu ($Xi,&QWP(0,$Xip));
1195 &movdqa ($T3,&QWP(0,$const));
1196 &movdqu ($Hkey,&QWP(0,$Htbl));
1197 &pshufb ($Xi,$T3);
1198
1199 &sub ($len,0x10);
1200 &jz (&label("odd_tail"));
1201
1202 #######
1203 # Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
1204 # [(H*Ii+1) + (H*Xi+1)] mod P =
1205 # [(H*Ii+1) + H^2*(Ii+Xi)] mod P
1206 #
1207 &movdqu ($T1,&QWP(0,$inp)); # Ii
1208 &movdqu ($Xn,&QWP(16,$inp)); # Ii+1
1209 &pshufb ($T1,$T3);
1210 &pshufb ($Xn,$T3);
1211 &pxor ($Xi,$T1); # Ii+Xi
1212
1213 &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1
1214 &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2
1215
1216 &sub ($len,0x20);
1217 &lea ($inp,&DWP(32,$inp)); # i+=2
1218 &jbe (&label("even_tail"));
1219
1220&set_label("mod_loop");
1221 &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi)
1222 &movdqu ($Hkey,&QWP(0,$Htbl)); # load H
1223
1224 &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi)
1225 &pxor ($Xhi,$Xhn);
1226
1227 &reduction_alg5 ($Xhi,$Xi);
1228
1229 #######
1230 &movdqa ($T3,&QWP(0,$const));
1231 &movdqu ($T1,&QWP(0,$inp)); # Ii
1232 &movdqu ($Xn,&QWP(16,$inp)); # Ii+1
1233 &pshufb ($T1,$T3);
1234 &pshufb ($Xn,$T3);
1235 &pxor ($Xi,$T1); # Ii+Xi
1236
1237 &clmul64x64_T3 ($Xhn,$Xn,$Hkey); # H*Ii+1
1238 &movdqu ($Hkey,&QWP(16,$Htbl)); # load H^2
1239
1240 &sub ($len,0x20);
1241 &lea ($inp,&DWP(32,$inp));
1242 &ja (&label("mod_loop"));
1243
1244&set_label("even_tail");
1245 &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi)
1246
1247 &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi)
1248 &pxor ($Xhi,$Xhn);
1249
1250 &reduction_alg5 ($Xhi,$Xi);
1251
1252 &movdqa ($T3,&QWP(0,$const));
1253 &test ($len,$len);
1254 &jnz (&label("done"));
1255
1256 &movdqu ($Hkey,&QWP(0,$Htbl)); # load H
1257&set_label("odd_tail");
1258 &movdqu ($T1,&QWP(0,$inp)); # Ii
1259 &pshufb ($T1,$T3);
1260 &pxor ($Xi,$T1); # Ii+Xi
1261
1262 &clmul64x64_T3 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi)
1263 &reduction_alg5 ($Xhi,$Xi);
1264
1265 &movdqa ($T3,&QWP(0,$const));
1266&set_label("done");
1267 &pshufb ($Xi,$T3);
1268 &movdqu (&QWP(0,$Xip),$Xi);
1269&function_end("gcm_ghash_clmul");
1270
1271}
1272
1273&set_label("bswap",64);
1274 &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
1275 &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
1276}} # $sse2
1277
1278&set_label("rem_4bit",64);
1279 &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
1280 &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
1281 &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S);
1282 &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S);
1283&set_label("rem_8bit",64);
1284 &data_short(0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E);
1285 &data_short(0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E);
1286 &data_short(0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E);
1287 &data_short(0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E);
1288 &data_short(0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E);
1289 &data_short(0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E);
1290 &data_short(0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E);
1291 &data_short(0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E);
1292 &data_short(0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE);
1293 &data_short(0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE);
1294 &data_short(0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE);
1295 &data_short(0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE);
1296 &data_short(0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E);
1297 &data_short(0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E);
1298 &data_short(0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE);
1299 &data_short(0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE);
1300 &data_short(0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E);
1301 &data_short(0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E);
1302 &data_short(0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E);
1303 &data_short(0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E);
1304 &data_short(0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E);
1305 &data_short(0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E);
1306 &data_short(0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E);
1307 &data_short(0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E);
1308 &data_short(0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE);
1309 &data_short(0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE);
1310 &data_short(0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE);
1311 &data_short(0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE);
1312 &data_short(0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E);
1313 &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
1314 &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
1315 &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
1316}}} # !$x86only
1317
1318&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
1319&asm_finish();
1320
1321# A question was risen about choice of vanilla MMX. Or rather why wasn't
1322# SSE2 chosen instead? In addition to the fact that MMX runs on legacy
1323# CPUs such as PIII, "4-bit" MMX version was observed to provide better
1324# performance than *corresponding* SSE2 one even on contemporary CPUs.
1325# SSE2 results were provided by Peter-Michael Hager. He maintains SSE2
1326# implementation featuring full range of lookup-table sizes, but with
1327# per-invocation lookup table setup. Latter means that table size is
1328# chosen depending on how much data is to be hashed in every given call,
1329# more data - larger table. Best reported result for Core2 is ~4 cycles
1330# per processed byte out of 64KB block. This number accounts even for
1331# 64KB table setup overhead. As discussed in gcm128.c we choose to be
1332# more conservative in respect to lookup table sizes, but how do the
1333# results compare? Minimalistic "256B" MMX version delivers ~11 cycles
1334# on same platform. As also discussed in gcm128.c, next in line "8-bit
1335# Shoup's" or "4KB" method should deliver twice the performance of
1336# "256B" one, in other words not worse than ~6 cycles per byte. It
1337# should be also be noted that in SSE2 case improvement can be "super-
1338# linear," i.e. more than twice, mostly because >>8 maps to single
1339# instruction on SSE2 register. This is unlike "4-bit" case when >>4
1340# maps to same amount of instructions in both MMX and SSE2 cases.
1341# Bottom line is that switch to SSE2 is considered to be justifiable
1342# only in case we choose to implement "8-bit" method...
diff --git a/src/lib/libssl/src/crypto/modes/asm/ghash-x86_64.pl b/src/lib/libssl/src/crypto/modes/asm/ghash-x86_64.pl
new file mode 100644
index 0000000000..a5ae180882
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/asm/ghash-x86_64.pl
@@ -0,0 +1,805 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# March, June 2010
11#
12# The module implements "4-bit" GCM GHASH function and underlying
13# single multiplication operation in GF(2^128). "4-bit" means that
14# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH
15# function features so called "528B" variant utilizing additional
16# 256+16 bytes of per-key storage [+512 bytes shared table].
17# Performance results are for this streamed GHASH subroutine and are
18# expressed in cycles per processed byte, less is better:
19#
20# gcc 3.4.x(*) assembler
21#
22# P4 28.6 14.0 +100%
23# Opteron 19.3 7.7 +150%
24# Core2 17.8 8.1(**) +120%
25#
26# (*) comparison is not completely fair, because C results are
27# for vanilla "256B" implementation, while assembler results
28# are for "528B";-)
29# (**) it's mystery [to me] why Core2 result is not same as for
30# Opteron;
31
32# May 2010
33#
34# Add PCLMULQDQ version performing at 2.02 cycles per processed byte.
35# See ghash-x86.pl for background information and details about coding
36# techniques.
37#
38# Special thanks to David Woodhouse <dwmw2@infradead.org> for
39# providing access to a Westmere-based system on behalf of Intel
40# Open Source Technology Centre.
41
42$flavour = shift;
43$output = shift;
44if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
45
46$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
47
48$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
50( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
51die "can't locate x86_64-xlate.pl";
52
53open STDOUT,"| $^X $xlate $flavour $output";
54
55# common register layout
56$nlo="%rax";
57$nhi="%rbx";
58$Zlo="%r8";
59$Zhi="%r9";
60$tmp="%r10";
61$rem_4bit = "%r11";
62
63$Xi="%rdi";
64$Htbl="%rsi";
65
66# per-function register layout
67$cnt="%rcx";
68$rem="%rdx";
69
70sub LB() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/ or
71 $r =~ s/%[er]([sd]i)/%\1l/ or
72 $r =~ s/%[er](bp)/%\1l/ or
73 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
74
75sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
76{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
77 my $arg = pop;
78 $arg = "\$$arg" if ($arg*1 eq $arg);
79 $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
80}
81
82{ my $N;
83 sub loop() {
84 my $inp = shift;
85
86 $N++;
87$code.=<<___;
88 xor $nlo,$nlo
89 xor $nhi,$nhi
90 mov `&LB("$Zlo")`,`&LB("$nlo")`
91 mov `&LB("$Zlo")`,`&LB("$nhi")`
92 shl \$4,`&LB("$nlo")`
93 mov \$14,$cnt
94 mov 8($Htbl,$nlo),$Zlo
95 mov ($Htbl,$nlo),$Zhi
96 and \$0xf0,`&LB("$nhi")`
97 mov $Zlo,$rem
98 jmp .Loop$N
99
100.align 16
101.Loop$N:
102 shr \$4,$Zlo
103 and \$0xf,$rem
104 mov $Zhi,$tmp
105 mov ($inp,$cnt),`&LB("$nlo")`
106 shr \$4,$Zhi
107 xor 8($Htbl,$nhi),$Zlo
108 shl \$60,$tmp
109 xor ($Htbl,$nhi),$Zhi
110 mov `&LB("$nlo")`,`&LB("$nhi")`
111 xor ($rem_4bit,$rem,8),$Zhi
112 mov $Zlo,$rem
113 shl \$4,`&LB("$nlo")`
114 xor $tmp,$Zlo
115 dec $cnt
116 js .Lbreak$N
117
118 shr \$4,$Zlo
119 and \$0xf,$rem
120 mov $Zhi,$tmp
121 shr \$4,$Zhi
122 xor 8($Htbl,$nlo),$Zlo
123 shl \$60,$tmp
124 xor ($Htbl,$nlo),$Zhi
125 and \$0xf0,`&LB("$nhi")`
126 xor ($rem_4bit,$rem,8),$Zhi
127 mov $Zlo,$rem
128 xor $tmp,$Zlo
129 jmp .Loop$N
130
131.align 16
132.Lbreak$N:
133 shr \$4,$Zlo
134 and \$0xf,$rem
135 mov $Zhi,$tmp
136 shr \$4,$Zhi
137 xor 8($Htbl,$nlo),$Zlo
138 shl \$60,$tmp
139 xor ($Htbl,$nlo),$Zhi
140 and \$0xf0,`&LB("$nhi")`
141 xor ($rem_4bit,$rem,8),$Zhi
142 mov $Zlo,$rem
143 xor $tmp,$Zlo
144
145 shr \$4,$Zlo
146 and \$0xf,$rem
147 mov $Zhi,$tmp
148 shr \$4,$Zhi
149 xor 8($Htbl,$nhi),$Zlo
150 shl \$60,$tmp
151 xor ($Htbl,$nhi),$Zhi
152 xor $tmp,$Zlo
153 xor ($rem_4bit,$rem,8),$Zhi
154
155 bswap $Zlo
156 bswap $Zhi
157___
158}}
159
160$code=<<___;
161.text
162
163.globl gcm_gmult_4bit
164.type gcm_gmult_4bit,\@function,2
165.align 16
166gcm_gmult_4bit:
167 push %rbx
168 push %rbp # %rbp and %r12 are pushed exclusively in
169 push %r12 # order to reuse Win64 exception handler...
170.Lgmult_prologue:
171
172 movzb 15($Xi),$Zlo
173 lea .Lrem_4bit(%rip),$rem_4bit
174___
175 &loop ($Xi);
176$code.=<<___;
177 mov $Zlo,8($Xi)
178 mov $Zhi,($Xi)
179
180 mov 16(%rsp),%rbx
181 lea 24(%rsp),%rsp
182.Lgmult_epilogue:
183 ret
184.size gcm_gmult_4bit,.-gcm_gmult_4bit
185___
186
187# per-function register layout
188$inp="%rdx";
189$len="%rcx";
190$rem_8bit=$rem_4bit;
191
192$code.=<<___;
193.globl gcm_ghash_4bit
194.type gcm_ghash_4bit,\@function,4
195.align 16
196gcm_ghash_4bit:
197 push %rbx
198 push %rbp
199 push %r12
200 push %r13
201 push %r14
202 push %r15
203 sub \$280,%rsp
204.Lghash_prologue:
205 mov $inp,%r14 # reassign couple of args
206 mov $len,%r15
207___
208{ my $inp="%r14";
209 my $dat="%edx";
210 my $len="%r15";
211 my @nhi=("%ebx","%ecx");
212 my @rem=("%r12","%r13");
213 my $Hshr4="%rbp";
214
215 &sub ($Htbl,-128); # size optimization
216 &lea ($Hshr4,"16+128(%rsp)");
217 { my @lo =($nlo,$nhi);
218 my @hi =($Zlo,$Zhi);
219
220 &xor ($dat,$dat);
221 for ($i=0,$j=-2;$i<18;$i++,$j++) {
222 &mov ("$j(%rsp)",&LB($dat)) if ($i>1);
223 &or ($lo[0],$tmp) if ($i>1);
224 &mov (&LB($dat),&LB($lo[1])) if ($i>0 && $i<17);
225 &shr ($lo[1],4) if ($i>0 && $i<17);
226 &mov ($tmp,$hi[1]) if ($i>0 && $i<17);
227 &shr ($hi[1],4) if ($i>0 && $i<17);
228 &mov ("8*$j($Hshr4)",$hi[0]) if ($i>1);
229 &mov ($hi[0],"16*$i+0-128($Htbl)") if ($i<16);
230 &shl (&LB($dat),4) if ($i>0 && $i<17);
231 &mov ("8*$j-128($Hshr4)",$lo[0]) if ($i>1);
232 &mov ($lo[0],"16*$i+8-128($Htbl)") if ($i<16);
233 &shl ($tmp,60) if ($i>0 && $i<17);
234
235 push (@lo,shift(@lo));
236 push (@hi,shift(@hi));
237 }
238 }
239 &add ($Htbl,-128);
240 &mov ($Zlo,"8($Xi)");
241 &mov ($Zhi,"0($Xi)");
242 &add ($len,$inp); # pointer to the end of data
243 &lea ($rem_8bit,".Lrem_8bit(%rip)");
244 &jmp (".Louter_loop");
245
246$code.=".align 16\n.Louter_loop:\n";
247 &xor ($Zhi,"($inp)");
248 &mov ("%rdx","8($inp)");
249 &lea ($inp,"16($inp)");
250 &xor ("%rdx",$Zlo);
251 &mov ("($Xi)",$Zhi);
252 &mov ("8($Xi)","%rdx");
253 &shr ("%rdx",32);
254
255 &xor ($nlo,$nlo);
256 &rol ($dat,8);
257 &mov (&LB($nlo),&LB($dat));
258 &movz ($nhi[0],&LB($dat));
259 &shl (&LB($nlo),4);
260 &shr ($nhi[0],4);
261
262 for ($j=11,$i=0;$i<15;$i++) {
263 &rol ($dat,8);
264 &xor ($Zlo,"8($Htbl,$nlo)") if ($i>0);
265 &xor ($Zhi,"($Htbl,$nlo)") if ($i>0);
266 &mov ($Zlo,"8($Htbl,$nlo)") if ($i==0);
267 &mov ($Zhi,"($Htbl,$nlo)") if ($i==0);
268
269 &mov (&LB($nlo),&LB($dat));
270 &xor ($Zlo,$tmp) if ($i>0);
271 &movzw ($rem[1],"($rem_8bit,$rem[1],2)") if ($i>0);
272
273 &movz ($nhi[1],&LB($dat));
274 &shl (&LB($nlo),4);
275 &movzb ($rem[0],"(%rsp,$nhi[0])");
276
277 &shr ($nhi[1],4) if ($i<14);
278 &and ($nhi[1],0xf0) if ($i==14);
279 &shl ($rem[1],48) if ($i>0);
280 &xor ($rem[0],$Zlo);
281
282 &mov ($tmp,$Zhi);
283 &xor ($Zhi,$rem[1]) if ($i>0);
284 &shr ($Zlo,8);
285
286 &movz ($rem[0],&LB($rem[0]));
287 &mov ($dat,"$j($Xi)") if (--$j%4==0);
288 &shr ($Zhi,8);
289
290 &xor ($Zlo,"-128($Hshr4,$nhi[0],8)");
291 &shl ($tmp,56);
292 &xor ($Zhi,"($Hshr4,$nhi[0],8)");
293
294 unshift (@nhi,pop(@nhi)); # "rotate" registers
295 unshift (@rem,pop(@rem));
296 }
297 &movzw ($rem[1],"($rem_8bit,$rem[1],2)");
298 &xor ($Zlo,"8($Htbl,$nlo)");
299 &xor ($Zhi,"($Htbl,$nlo)");
300
301 &shl ($rem[1],48);
302 &xor ($Zlo,$tmp);
303
304 &xor ($Zhi,$rem[1]);
305 &movz ($rem[0],&LB($Zlo));
306 &shr ($Zlo,4);
307
308 &mov ($tmp,$Zhi);
309 &shl (&LB($rem[0]),4);
310 &shr ($Zhi,4);
311
312 &xor ($Zlo,"8($Htbl,$nhi[0])");
313 &movzw ($rem[0],"($rem_8bit,$rem[0],2)");
314 &shl ($tmp,60);
315
316 &xor ($Zhi,"($Htbl,$nhi[0])");
317 &xor ($Zlo,$tmp);
318 &shl ($rem[0],48);
319
320 &bswap ($Zlo);
321 &xor ($Zhi,$rem[0]);
322
323 &bswap ($Zhi);
324 &cmp ($inp,$len);
325 &jb (".Louter_loop");
326}
327$code.=<<___;
328 mov $Zlo,8($Xi)
329 mov $Zhi,($Xi)
330
331 lea 280(%rsp),%rsi
332 mov 0(%rsi),%r15
333 mov 8(%rsi),%r14
334 mov 16(%rsi),%r13
335 mov 24(%rsi),%r12
336 mov 32(%rsi),%rbp
337 mov 40(%rsi),%rbx
338 lea 48(%rsi),%rsp
339.Lghash_epilogue:
340 ret
341.size gcm_ghash_4bit,.-gcm_ghash_4bit
342___
343
344######################################################################
345# PCLMULQDQ version.
346
347@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
348 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
349
350($Xi,$Xhi)=("%xmm0","%xmm1"); $Hkey="%xmm2";
351($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5");
352
353sub clmul64x64_T2 { # minimal register pressure
354my ($Xhi,$Xi,$Hkey,$modulo)=@_;
355
356$code.=<<___ if (!defined($modulo));
357 movdqa $Xi,$Xhi #
358 pshufd \$0b01001110,$Xi,$T1
359 pshufd \$0b01001110,$Hkey,$T2
360 pxor $Xi,$T1 #
361 pxor $Hkey,$T2
362___
363$code.=<<___;
364 pclmulqdq \$0x00,$Hkey,$Xi #######
365 pclmulqdq \$0x11,$Hkey,$Xhi #######
366 pclmulqdq \$0x00,$T2,$T1 #######
367 pxor $Xi,$T1 #
368 pxor $Xhi,$T1 #
369
370 movdqa $T1,$T2 #
371 psrldq \$8,$T1
372 pslldq \$8,$T2 #
373 pxor $T1,$Xhi
374 pxor $T2,$Xi #
375___
376}
377
378sub reduction_alg9 { # 17/13 times faster than Intel version
379my ($Xhi,$Xi) = @_;
380
381$code.=<<___;
382 # 1st phase
383 movdqa $Xi,$T1 #
384 psllq \$1,$Xi
385 pxor $T1,$Xi #
386 psllq \$5,$Xi #
387 pxor $T1,$Xi #
388 psllq \$57,$Xi #
389 movdqa $Xi,$T2 #
390 pslldq \$8,$Xi
391 psrldq \$8,$T2 #
392 pxor $T1,$Xi
393 pxor $T2,$Xhi #
394
395 # 2nd phase
396 movdqa $Xi,$T2
397 psrlq \$5,$Xi
398 pxor $T2,$Xi #
399 psrlq \$1,$Xi #
400 pxor $T2,$Xi #
401 pxor $Xhi,$T2
402 psrlq \$1,$Xi #
403 pxor $T2,$Xi #
404___
405}
406
407{ my ($Htbl,$Xip)=@_4args;
408
409$code.=<<___;
410.globl gcm_init_clmul
411.type gcm_init_clmul,\@abi-omnipotent
412.align 16
413gcm_init_clmul:
414 movdqu ($Xip),$Hkey
415 pshufd \$0b01001110,$Hkey,$Hkey # dword swap
416
417 # <<1 twist
418 pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword
419 movdqa $Hkey,$T1
420 psllq \$1,$Hkey
421 pxor $T3,$T3 #
422 psrlq \$63,$T1
423 pcmpgtd $T2,$T3 # broadcast carry bit
424 pslldq \$8,$T1
425 por $T1,$Hkey # H<<=1
426
427 # magic reduction
428 pand .L0x1c2_polynomial(%rip),$T3
429 pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial
430
431 # calculate H^2
432 movdqa $Hkey,$Xi
433___
434 &clmul64x64_T2 ($Xhi,$Xi,$Hkey);
435 &reduction_alg9 ($Xhi,$Xi);
436$code.=<<___;
437 movdqu $Hkey,($Htbl) # save H
438 movdqu $Xi,16($Htbl) # save H^2
439 ret
440.size gcm_init_clmul,.-gcm_init_clmul
441___
442}
443
444{ my ($Xip,$Htbl)=@_4args;
445
446$code.=<<___;
447.globl gcm_gmult_clmul
448.type gcm_gmult_clmul,\@abi-omnipotent
449.align 16
450gcm_gmult_clmul:
451 movdqu ($Xip),$Xi
452 movdqa .Lbswap_mask(%rip),$T3
453 movdqu ($Htbl),$Hkey
454 pshufb $T3,$Xi
455___
456 &clmul64x64_T2 ($Xhi,$Xi,$Hkey);
457 &reduction_alg9 ($Xhi,$Xi);
458$code.=<<___;
459 pshufb $T3,$Xi
460 movdqu $Xi,($Xip)
461 ret
462.size gcm_gmult_clmul,.-gcm_gmult_clmul
463___
464}
465
466{ my ($Xip,$Htbl,$inp,$len)=@_4args;
467 my $Xn="%xmm6";
468 my $Xhn="%xmm7";
469 my $Hkey2="%xmm8";
470 my $T1n="%xmm9";
471 my $T2n="%xmm10";
472
473$code.=<<___;
474.globl gcm_ghash_clmul
475.type gcm_ghash_clmul,\@abi-omnipotent
476.align 16
477gcm_ghash_clmul:
478___
479$code.=<<___ if ($win64);
480.LSEH_begin_gcm_ghash_clmul:
481 # I can't trust assembler to use specific encoding:-(
482 .byte 0x48,0x83,0xec,0x58 #sub \$0x58,%rsp
483 .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp)
484 .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp)
485 .byte 0x44,0x0f,0x29,0x44,0x24,0x20 #movaps %xmm8,0x20(%rsp)
486 .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 #movaps %xmm9,0x30(%rsp)
487 .byte 0x44,0x0f,0x29,0x54,0x24,0x40 #movaps %xmm10,0x40(%rsp)
488___
489$code.=<<___;
490 movdqa .Lbswap_mask(%rip),$T3
491
492 movdqu ($Xip),$Xi
493 movdqu ($Htbl),$Hkey
494 pshufb $T3,$Xi
495
496 sub \$0x10,$len
497 jz .Lodd_tail
498
499 movdqu 16($Htbl),$Hkey2
500 #######
501 # Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
502 # [(H*Ii+1) + (H*Xi+1)] mod P =
503 # [(H*Ii+1) + H^2*(Ii+Xi)] mod P
504 #
505 movdqu ($inp),$T1 # Ii
506 movdqu 16($inp),$Xn # Ii+1
507 pshufb $T3,$T1
508 pshufb $T3,$Xn
509 pxor $T1,$Xi # Ii+Xi
510___
511 &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1
512$code.=<<___;
513 movdqa $Xi,$Xhi #
514 pshufd \$0b01001110,$Xi,$T1
515 pshufd \$0b01001110,$Hkey2,$T2
516 pxor $Xi,$T1 #
517 pxor $Hkey2,$T2
518
519 lea 32($inp),$inp # i+=2
520 sub \$0x20,$len
521 jbe .Leven_tail
522
523.Lmod_loop:
524___
525 &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi)
526$code.=<<___;
527 movdqu ($inp),$T1 # Ii
528 pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
529 pxor $Xhn,$Xhi
530
531 movdqu 16($inp),$Xn # Ii+1
532 pshufb $T3,$T1
533 pshufb $T3,$Xn
534
535 movdqa $Xn,$Xhn #
536 pshufd \$0b01001110,$Xn,$T1n
537 pshufd \$0b01001110,$Hkey,$T2n
538 pxor $Xn,$T1n #
539 pxor $Hkey,$T2n
540 pxor $T1,$Xhi # "Ii+Xi", consume early
541
542 movdqa $Xi,$T1 # 1st phase
543 psllq \$1,$Xi
544 pxor $T1,$Xi #
545 psllq \$5,$Xi #
546 pxor $T1,$Xi #
547 pclmulqdq \$0x00,$Hkey,$Xn #######
548 psllq \$57,$Xi #
549 movdqa $Xi,$T2 #
550 pslldq \$8,$Xi
551 psrldq \$8,$T2 #
552 pxor $T1,$Xi
553 pxor $T2,$Xhi #
554
555 pclmulqdq \$0x11,$Hkey,$Xhn #######
556 movdqa $Xi,$T2 # 2nd phase
557 psrlq \$5,$Xi
558 pxor $T2,$Xi #
559 psrlq \$1,$Xi #
560 pxor $T2,$Xi #
561 pxor $Xhi,$T2
562 psrlq \$1,$Xi #
563 pxor $T2,$Xi #
564
565 pclmulqdq \$0x00,$T2n,$T1n #######
566 movdqa $Xi,$Xhi #
567 pshufd \$0b01001110,$Xi,$T1
568 pshufd \$0b01001110,$Hkey2,$T2
569 pxor $Xi,$T1 #
570 pxor $Hkey2,$T2
571
572 pxor $Xn,$T1n #
573 pxor $Xhn,$T1n #
574 movdqa $T1n,$T2n #
575 psrldq \$8,$T1n
576 pslldq \$8,$T2n #
577 pxor $T1n,$Xhn
578 pxor $T2n,$Xn #
579
580 lea 32($inp),$inp
581 sub \$0x20,$len
582 ja .Lmod_loop
583
584.Leven_tail:
585___
586 &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi)
587$code.=<<___;
588 pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi)
589 pxor $Xhn,$Xhi
590___
591 &reduction_alg9 ($Xhi,$Xi);
592$code.=<<___;
593 test $len,$len
594 jnz .Ldone
595
596.Lodd_tail:
597 movdqu ($inp),$T1 # Ii
598 pshufb $T3,$T1
599 pxor $T1,$Xi # Ii+Xi
600___
601 &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi)
602 &reduction_alg9 ($Xhi,$Xi);
603$code.=<<___;
604.Ldone:
605 pshufb $T3,$Xi
606 movdqu $Xi,($Xip)
607___
608$code.=<<___ if ($win64);
609 movaps (%rsp),%xmm6
610 movaps 0x10(%rsp),%xmm7
611 movaps 0x20(%rsp),%xmm8
612 movaps 0x30(%rsp),%xmm9
613 movaps 0x40(%rsp),%xmm10
614 add \$0x58,%rsp
615___
616$code.=<<___;
617 ret
618.LSEH_end_gcm_ghash_clmul:
619.size gcm_ghash_clmul,.-gcm_ghash_clmul
620___
621}
622
623$code.=<<___;
624.align 64
625.Lbswap_mask:
626 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
627.L0x1c2_polynomial:
628 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
629.align 64
630.type .Lrem_4bit,\@object
631.Lrem_4bit:
632 .long 0,`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`
633 .long 0,`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`
634 .long 0,`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`
635 .long 0,`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`
636.type .Lrem_8bit,\@object
637.Lrem_8bit:
638 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
639 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
640 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
641 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
642 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
643 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
644 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
645 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
646 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
647 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
648 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
649 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
650 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
651 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
652 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
653 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
654 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
655 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
656 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
657 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
658 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
659 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
660 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
661 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
662 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
663 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
664 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
665 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
666 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
667 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
668 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
669 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
670
671.asciz "GHASH for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
672.align 64
673___
674
675# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
676# CONTEXT *context,DISPATCHER_CONTEXT *disp)
677if ($win64) {
678$rec="%rcx";
679$frame="%rdx";
680$context="%r8";
681$disp="%r9";
682
683$code.=<<___;
684.extern __imp_RtlVirtualUnwind
685.type se_handler,\@abi-omnipotent
686.align 16
687se_handler:
688 push %rsi
689 push %rdi
690 push %rbx
691 push %rbp
692 push %r12
693 push %r13
694 push %r14
695 push %r15
696 pushfq
697 sub \$64,%rsp
698
699 mov 120($context),%rax # pull context->Rax
700 mov 248($context),%rbx # pull context->Rip
701
702 mov 8($disp),%rsi # disp->ImageBase
703 mov 56($disp),%r11 # disp->HandlerData
704
705 mov 0(%r11),%r10d # HandlerData[0]
706 lea (%rsi,%r10),%r10 # prologue label
707 cmp %r10,%rbx # context->Rip<prologue label
708 jb .Lin_prologue
709
710 mov 152($context),%rax # pull context->Rsp
711
712 mov 4(%r11),%r10d # HandlerData[1]
713 lea (%rsi,%r10),%r10 # epilogue label
714 cmp %r10,%rbx # context->Rip>=epilogue label
715 jae .Lin_prologue
716
717 lea 24(%rax),%rax # adjust "rsp"
718
719 mov -8(%rax),%rbx
720 mov -16(%rax),%rbp
721 mov -24(%rax),%r12
722 mov %rbx,144($context) # restore context->Rbx
723 mov %rbp,160($context) # restore context->Rbp
724 mov %r12,216($context) # restore context->R12
725
726.Lin_prologue:
727 mov 8(%rax),%rdi
728 mov 16(%rax),%rsi
729 mov %rax,152($context) # restore context->Rsp
730 mov %rsi,168($context) # restore context->Rsi
731 mov %rdi,176($context) # restore context->Rdi
732
733 mov 40($disp),%rdi # disp->ContextRecord
734 mov $context,%rsi # context
735 mov \$`1232/8`,%ecx # sizeof(CONTEXT)
736 .long 0xa548f3fc # cld; rep movsq
737
738 mov $disp,%rsi
739 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
740 mov 8(%rsi),%rdx # arg2, disp->ImageBase
741 mov 0(%rsi),%r8 # arg3, disp->ControlPc
742 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
743 mov 40(%rsi),%r10 # disp->ContextRecord
744 lea 56(%rsi),%r11 # &disp->HandlerData
745 lea 24(%rsi),%r12 # &disp->EstablisherFrame
746 mov %r10,32(%rsp) # arg5
747 mov %r11,40(%rsp) # arg6
748 mov %r12,48(%rsp) # arg7
749 mov %rcx,56(%rsp) # arg8, (NULL)
750 call *__imp_RtlVirtualUnwind(%rip)
751
752 mov \$1,%eax # ExceptionContinueSearch
753 add \$64,%rsp
754 popfq
755 pop %r15
756 pop %r14
757 pop %r13
758 pop %r12
759 pop %rbp
760 pop %rbx
761 pop %rdi
762 pop %rsi
763 ret
764.size se_handler,.-se_handler
765
766.section .pdata
767.align 4
768 .rva .LSEH_begin_gcm_gmult_4bit
769 .rva .LSEH_end_gcm_gmult_4bit
770 .rva .LSEH_info_gcm_gmult_4bit
771
772 .rva .LSEH_begin_gcm_ghash_4bit
773 .rva .LSEH_end_gcm_ghash_4bit
774 .rva .LSEH_info_gcm_ghash_4bit
775
776 .rva .LSEH_begin_gcm_ghash_clmul
777 .rva .LSEH_end_gcm_ghash_clmul
778 .rva .LSEH_info_gcm_ghash_clmul
779
780.section .xdata
781.align 8
782.LSEH_info_gcm_gmult_4bit:
783 .byte 9,0,0,0
784 .rva se_handler
785 .rva .Lgmult_prologue,.Lgmult_epilogue # HandlerData
786.LSEH_info_gcm_ghash_4bit:
787 .byte 9,0,0,0
788 .rva se_handler
789 .rva .Lghash_prologue,.Lghash_epilogue # HandlerData
790.LSEH_info_gcm_ghash_clmul:
791 .byte 0x01,0x1f,0x0b,0x00
792 .byte 0x1f,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10
793 .byte 0x19,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9
794 .byte 0x13,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8
795 .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7
796 .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6
797 .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58
798___
799}
800
801$code =~ s/\`([^\`]*)\`/eval($1)/gem;
802
803print $code;
804
805close STDOUT;
diff --git a/src/lib/libssl/src/crypto/modes/cbc128.c b/src/lib/libssl/src/crypto/modes/cbc128.c
index 8f8bd563b9..3d3782cbe1 100644
--- a/src/lib/libssl/src/crypto/modes/cbc128.c
+++ b/src/lib/libssl/src/crypto/modes/cbc128.c
@@ -48,7 +48,8 @@
48 * 48 *
49 */ 49 */
50 50
51#include "modes.h" 51#include <openssl/crypto.h>
52#include "modes_lcl.h"
52#include <string.h> 53#include <string.h>
53 54
54#ifndef MODES_DEBUG 55#ifndef MODES_DEBUG
@@ -58,12 +59,7 @@
58#endif 59#endif
59#include <assert.h> 60#include <assert.h>
60 61
61#define STRICT_ALIGNMENT 1 62#ifndef STRICT_ALIGNMENT
62#if defined(__i386) || defined(__i386__) || \
63 defined(__x86_64) || defined(__x86_64__) || \
64 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
65 defined(__s390__) || defined(__s390x__)
66# undef STRICT_ALIGNMENT
67# define STRICT_ALIGNMENT 0 63# define STRICT_ALIGNMENT 0
68#endif 64#endif
69 65
diff --git a/src/lib/libssl/src/crypto/modes/ccm128.c b/src/lib/libssl/src/crypto/modes/ccm128.c
new file mode 100644
index 0000000000..c9b35e5b35
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/ccm128.c
@@ -0,0 +1,441 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/crypto.h>
51#include "modes_lcl.h"
52#include <string.h>
53
54#ifndef MODES_DEBUG
55# ifndef NDEBUG
56# define NDEBUG
57# endif
58#endif
59#include <assert.h>
60
61/* First you setup M and L parameters and pass the key schedule.
62 * This is called once per session setup... */
63void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
64 unsigned int M,unsigned int L,void *key,block128_f block)
65{
66 memset(ctx->nonce.c,0,sizeof(ctx->nonce.c));
67 ctx->nonce.c[0] = ((u8)(L-1)&7) | (u8)(((M-2)/2)&7)<<3;
68 ctx->blocks = 0;
69 ctx->block = block;
70 ctx->key = key;
71}
72
73/* !!! Following interfaces are to be called *once* per packet !!! */
74
75/* Then you setup per-message nonce and pass the length of the message */
76int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
77 const unsigned char *nonce,size_t nlen,size_t mlen)
78{
79 unsigned int L = ctx->nonce.c[0]&7; /* the L parameter */
80
81 if (nlen<(14-L)) return -1; /* nonce is too short */
82
83 if (sizeof(mlen)==8 && L>=3) {
84 ctx->nonce.c[8] = (u8)(mlen>>(56%(sizeof(mlen)*8)));
85 ctx->nonce.c[9] = (u8)(mlen>>(48%(sizeof(mlen)*8)));
86 ctx->nonce.c[10] = (u8)(mlen>>(40%(sizeof(mlen)*8)));
87 ctx->nonce.c[11] = (u8)(mlen>>(32%(sizeof(mlen)*8)));
88 }
89 else
90 *(u32*)(&ctx->nonce.c[8]) = 0;
91
92 ctx->nonce.c[12] = (u8)(mlen>>24);
93 ctx->nonce.c[13] = (u8)(mlen>>16);
94 ctx->nonce.c[14] = (u8)(mlen>>8);
95 ctx->nonce.c[15] = (u8)mlen;
96
97 ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
98 memcpy(&ctx->nonce.c[1],nonce,14-L);
99
100 return 0;
101}
102
103/* Then you pass additional authentication data, this is optional */
104void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
105 const unsigned char *aad,size_t alen)
106{ unsigned int i;
107 block128_f block = ctx->block;
108
109 if (alen==0) return;
110
111 ctx->nonce.c[0] |= 0x40; /* set Adata flag */
112 (*block)(ctx->nonce.c,ctx->cmac.c,ctx->key),
113 ctx->blocks++;
114
115 if (alen<(0x10000-0x100)) {
116 ctx->cmac.c[0] ^= (u8)(alen>>8);
117 ctx->cmac.c[1] ^= (u8)alen;
118 i=2;
119 }
120 else if (sizeof(alen)==8 && alen>=(size_t)1<<(32%(sizeof(alen)*8))) {
121 ctx->cmac.c[0] ^= 0xFF;
122 ctx->cmac.c[1] ^= 0xFF;
123 ctx->cmac.c[2] ^= (u8)(alen>>(56%(sizeof(alen)*8)));
124 ctx->cmac.c[3] ^= (u8)(alen>>(48%(sizeof(alen)*8)));
125 ctx->cmac.c[4] ^= (u8)(alen>>(40%(sizeof(alen)*8)));
126 ctx->cmac.c[5] ^= (u8)(alen>>(32%(sizeof(alen)*8)));
127 ctx->cmac.c[6] ^= (u8)(alen>>24);
128 ctx->cmac.c[7] ^= (u8)(alen>>16);
129 ctx->cmac.c[8] ^= (u8)(alen>>8);
130 ctx->cmac.c[9] ^= (u8)alen;
131 i=10;
132 }
133 else {
134 ctx->cmac.c[0] ^= 0xFF;
135 ctx->cmac.c[1] ^= 0xFE;
136 ctx->cmac.c[2] ^= (u8)(alen>>24);
137 ctx->cmac.c[3] ^= (u8)(alen>>16);
138 ctx->cmac.c[4] ^= (u8)(alen>>8);
139 ctx->cmac.c[5] ^= (u8)alen;
140 i=6;
141 }
142
143 do {
144 for(;i<16 && alen;++i,++aad,--alen)
145 ctx->cmac.c[i] ^= *aad;
146 (*block)(ctx->cmac.c,ctx->cmac.c,ctx->key),
147 ctx->blocks++;
148 i=0;
149 } while (alen);
150}
151
152/* Finally you encrypt or decrypt the message */
153
154/* counter part of nonce may not be larger than L*8 bits,
155 * L is not larger than 8, therefore 64-bit counter... */
156static void ctr64_inc(unsigned char *counter) {
157 unsigned int n=8;
158 u8 c;
159
160 counter += 8;
161 do {
162 --n;
163 c = counter[n];
164 ++c;
165 counter[n] = c;
166 if (c) return;
167 } while (n);
168}
169
170int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
171 const unsigned char *inp, unsigned char *out,
172 size_t len)
173{
174 size_t n;
175 unsigned int i,L;
176 unsigned char flags0 = ctx->nonce.c[0];
177 block128_f block = ctx->block;
178 void * key = ctx->key;
179 union { u64 u[2]; u8 c[16]; } scratch;
180
181 if (!(flags0&0x40))
182 (*block)(ctx->nonce.c,ctx->cmac.c,key),
183 ctx->blocks++;
184
185 ctx->nonce.c[0] = L = flags0&7;
186 for (n=0,i=15-L;i<15;++i) {
187 n |= ctx->nonce.c[i];
188 ctx->nonce.c[i]=0;
189 n <<= 8;
190 }
191 n |= ctx->nonce.c[15]; /* reconstructed length */
192 ctx->nonce.c[15]=1;
193
194 if (n!=len) return -1; /* length mismatch */
195
196 ctx->blocks += ((len+15)>>3)|1;
197 if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
198
199 while (len>=16) {
200#if defined(STRICT_ALIGNMENT)
201 union { u64 u[2]; u8 c[16]; } temp;
202
203 memcpy (temp.c,inp,16);
204 ctx->cmac.u[0] ^= temp.u[0];
205 ctx->cmac.u[1] ^= temp.u[1];
206#else
207 ctx->cmac.u[0] ^= ((u64*)inp)[0];
208 ctx->cmac.u[1] ^= ((u64*)inp)[1];
209#endif
210 (*block)(ctx->cmac.c,ctx->cmac.c,key);
211 (*block)(ctx->nonce.c,scratch.c,key);
212 ctr64_inc(ctx->nonce.c);
213#if defined(STRICT_ALIGNMENT)
214 temp.u[0] ^= scratch.u[0];
215 temp.u[1] ^= scratch.u[1];
216 memcpy(out,temp.c,16);
217#else
218 ((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0];
219 ((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1];
220#endif
221 inp += 16;
222 out += 16;
223 len -= 16;
224 }
225
226 if (len) {
227 for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
228 (*block)(ctx->cmac.c,ctx->cmac.c,key);
229 (*block)(ctx->nonce.c,scratch.c,key);
230 for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
231 }
232
233 for (i=15-L;i<16;++i)
234 ctx->nonce.c[i]=0;
235
236 (*block)(ctx->nonce.c,scratch.c,key);
237 ctx->cmac.u[0] ^= scratch.u[0];
238 ctx->cmac.u[1] ^= scratch.u[1];
239
240 ctx->nonce.c[0] = flags0;
241
242 return 0;
243}
244
245int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
246 const unsigned char *inp, unsigned char *out,
247 size_t len)
248{
249 size_t n;
250 unsigned int i,L;
251 unsigned char flags0 = ctx->nonce.c[0];
252 block128_f block = ctx->block;
253 void * key = ctx->key;
254 union { u64 u[2]; u8 c[16]; } scratch;
255
256 if (!(flags0&0x40))
257 (*block)(ctx->nonce.c,ctx->cmac.c,key);
258
259 ctx->nonce.c[0] = L = flags0&7;
260 for (n=0,i=15-L;i<15;++i) {
261 n |= ctx->nonce.c[i];
262 ctx->nonce.c[i]=0;
263 n <<= 8;
264 }
265 n |= ctx->nonce.c[15]; /* reconstructed length */
266 ctx->nonce.c[15]=1;
267
268 if (n!=len) return -1;
269
270 while (len>=16) {
271#if defined(STRICT_ALIGNMENT)
272 union { u64 u[2]; u8 c[16]; } temp;
273#endif
274 (*block)(ctx->nonce.c,scratch.c,key);
275 ctr64_inc(ctx->nonce.c);
276#if defined(STRICT_ALIGNMENT)
277 memcpy (temp.c,inp,16);
278 ctx->cmac.u[0] ^= (scratch.u[0] ^= temp.u[0]);
279 ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
280 memcpy (out,scratch.c,16);
281#else
282 ctx->cmac.u[0] ^= (((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]);
283 ctx->cmac.u[1] ^= (((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]);
284#endif
285 (*block)(ctx->cmac.c,ctx->cmac.c,key);
286
287 inp += 16;
288 out += 16;
289 len -= 16;
290 }
291
292 if (len) {
293 (*block)(ctx->nonce.c,scratch.c,key);
294 for (i=0; i<len; ++i)
295 ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
296 (*block)(ctx->cmac.c,ctx->cmac.c,key);
297 }
298
299 for (i=15-L;i<16;++i)
300 ctx->nonce.c[i]=0;
301
302 (*block)(ctx->nonce.c,scratch.c,key);
303 ctx->cmac.u[0] ^= scratch.u[0];
304 ctx->cmac.u[1] ^= scratch.u[1];
305
306 ctx->nonce.c[0] = flags0;
307
308 return 0;
309}
310
311static void ctr64_add (unsigned char *counter,size_t inc)
312{ size_t n=8, val=0;
313
314 counter += 8;
315 do {
316 --n;
317 val += counter[n] + (inc&0xff);
318 counter[n] = (unsigned char)val;
319 val >>= 8; /* carry bit */
320 inc >>= 8;
321 } while(n && (inc || val));
322}
323
324int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
325 const unsigned char *inp, unsigned char *out,
326 size_t len,ccm128_f stream)
327{
328 size_t n;
329 unsigned int i,L;
330 unsigned char flags0 = ctx->nonce.c[0];
331 block128_f block = ctx->block;
332 void * key = ctx->key;
333 union { u64 u[2]; u8 c[16]; } scratch;
334
335 if (!(flags0&0x40))
336 (*block)(ctx->nonce.c,ctx->cmac.c,key),
337 ctx->blocks++;
338
339 ctx->nonce.c[0] = L = flags0&7;
340 for (n=0,i=15-L;i<15;++i) {
341 n |= ctx->nonce.c[i];
342 ctx->nonce.c[i]=0;
343 n <<= 8;
344 }
345 n |= ctx->nonce.c[15]; /* reconstructed length */
346 ctx->nonce.c[15]=1;
347
348 if (n!=len) return -1; /* length mismatch */
349
350 ctx->blocks += ((len+15)>>3)|1;
351 if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
352
353 if ((n=len/16)) {
354 (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
355 n *= 16;
356 inp += n;
357 out += n;
358 len -= n;
359 if (len) ctr64_add(ctx->nonce.c,n/16);
360 }
361
362 if (len) {
363 for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
364 (*block)(ctx->cmac.c,ctx->cmac.c,key);
365 (*block)(ctx->nonce.c,scratch.c,key);
366 for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
367 }
368
369 for (i=15-L;i<16;++i)
370 ctx->nonce.c[i]=0;
371
372 (*block)(ctx->nonce.c,scratch.c,key);
373 ctx->cmac.u[0] ^= scratch.u[0];
374 ctx->cmac.u[1] ^= scratch.u[1];
375
376 ctx->nonce.c[0] = flags0;
377
378 return 0;
379}
380
381int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
382 const unsigned char *inp, unsigned char *out,
383 size_t len,ccm128_f stream)
384{
385 size_t n;
386 unsigned int i,L;
387 unsigned char flags0 = ctx->nonce.c[0];
388 block128_f block = ctx->block;
389 void * key = ctx->key;
390 union { u64 u[2]; u8 c[16]; } scratch;
391
392 if (!(flags0&0x40))
393 (*block)(ctx->nonce.c,ctx->cmac.c,key);
394
395 ctx->nonce.c[0] = L = flags0&7;
396 for (n=0,i=15-L;i<15;++i) {
397 n |= ctx->nonce.c[i];
398 ctx->nonce.c[i]=0;
399 n <<= 8;
400 }
401 n |= ctx->nonce.c[15]; /* reconstructed length */
402 ctx->nonce.c[15]=1;
403
404 if (n!=len) return -1;
405
406 if ((n=len/16)) {
407 (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
408 n *= 16;
409 inp += n;
410 out += n;
411 len -= n;
412 if (len) ctr64_add(ctx->nonce.c,n/16);
413 }
414
415 if (len) {
416 (*block)(ctx->nonce.c,scratch.c,key);
417 for (i=0; i<len; ++i)
418 ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
419 (*block)(ctx->cmac.c,ctx->cmac.c,key);
420 }
421
422 for (i=15-L;i<16;++i)
423 ctx->nonce.c[i]=0;
424
425 (*block)(ctx->nonce.c,scratch.c,key);
426 ctx->cmac.u[0] ^= scratch.u[0];
427 ctx->cmac.u[1] ^= scratch.u[1];
428
429 ctx->nonce.c[0] = flags0;
430
431 return 0;
432}
433
434size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx,unsigned char *tag,size_t len)
435{ unsigned int M = (ctx->nonce.c[0]>>3)&7; /* the M parameter */
436
437 M *= 2; M += 2;
438 if (len<M) return 0;
439 memcpy(tag,ctx->cmac.c,M);
440 return M;
441}
diff --git a/src/lib/libssl/src/crypto/modes/cfb128.c b/src/lib/libssl/src/crypto/modes/cfb128.c
index e5938c6137..4e6f5d35e1 100644
--- a/src/lib/libssl/src/crypto/modes/cfb128.c
+++ b/src/lib/libssl/src/crypto/modes/cfb128.c
@@ -48,7 +48,8 @@
48 * 48 *
49 */ 49 */
50 50
51#include "modes.h" 51#include <openssl/crypto.h>
52#include "modes_lcl.h"
52#include <string.h> 53#include <string.h>
53 54
54#ifndef MODES_DEBUG 55#ifndef MODES_DEBUG
@@ -58,14 +59,6 @@
58#endif 59#endif
59#include <assert.h> 60#include <assert.h>
60 61
61#define STRICT_ALIGNMENT
62#if defined(__i386) || defined(__i386__) || \
63 defined(__x86_64) || defined(__x86_64__) || \
64 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
65 defined(__s390__) || defined(__s390x__)
66# undef STRICT_ALIGNMENT
67#endif
68
69/* The input and output encrypted as though 128bit cfb mode is being 62/* The input and output encrypted as though 128bit cfb mode is being
70 * used. The extra state information to record how much of the 63 * used. The extra state information to record how much of the
71 * 128bit block we have used is contained in *num; 64 * 128bit block we have used is contained in *num;
diff --git a/src/lib/libssl/src/crypto/modes/ctr128.c b/src/lib/libssl/src/crypto/modes/ctr128.c
index 932037f551..ee642c5863 100644
--- a/src/lib/libssl/src/crypto/modes/ctr128.c
+++ b/src/lib/libssl/src/crypto/modes/ctr128.c
@@ -48,7 +48,8 @@
48 * 48 *
49 */ 49 */
50 50
51#include "modes.h" 51#include <openssl/crypto.h>
52#include "modes_lcl.h"
52#include <string.h> 53#include <string.h>
53 54
54#ifndef MODES_DEBUG 55#ifndef MODES_DEBUG
@@ -58,17 +59,6 @@
58#endif 59#endif
59#include <assert.h> 60#include <assert.h>
60 61
61typedef unsigned int u32;
62typedef unsigned char u8;
63
64#define STRICT_ALIGNMENT
65#if defined(__i386) || defined(__i386__) || \
66 defined(__x86_64) || defined(__x86_64__) || \
67 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
68 defined(__s390__) || defined(__s390x__)
69# undef STRICT_ALIGNMENT
70#endif
71
72/* NOTE: the IV/counter CTR mode is big-endian. The code itself 62/* NOTE: the IV/counter CTR mode is big-endian. The code itself
73 * is endian-neutral. */ 63 * is endian-neutral. */
74 64
@@ -182,3 +172,81 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
182 172
183 *num=n; 173 *num=n;
184} 174}
175
176/* increment upper 96 bits of 128-bit counter by 1 */
177static void ctr96_inc(unsigned char *counter) {
178 u32 n=12;
179 u8 c;
180
181 do {
182 --n;
183 c = counter[n];
184 ++c;
185 counter[n] = c;
186 if (c) return;
187 } while (n);
188}
189
190void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
191 size_t len, const void *key,
192 unsigned char ivec[16], unsigned char ecount_buf[16],
193 unsigned int *num, ctr128_f func)
194{
195 unsigned int n,ctr32;
196
197 assert(in && out && key && ecount_buf && num);
198 assert(*num < 16);
199
200 n = *num;
201
202 while (n && len) {
203 *(out++) = *(in++) ^ ecount_buf[n];
204 --len;
205 n = (n+1) % 16;
206 }
207
208 ctr32 = GETU32(ivec+12);
209 while (len>=16) {
210 size_t blocks = len/16;
211 /*
212 * 1<<28 is just a not-so-small yet not-so-large number...
213 * Below condition is practically never met, but it has to
214 * be checked for code correctness.
215 */
216 if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28))
217 blocks = (1U<<28);
218 /*
219 * As (*func) operates on 32-bit counter, caller
220 * has to handle overflow. 'if' below detects the
221 * overflow, which is then handled by limiting the
222 * amount of blocks to the exact overflow point...
223 */
224 ctr32 += (u32)blocks;
225 if (ctr32 < blocks) {
226 blocks -= ctr32;
227 ctr32 = 0;
228 }
229 (*func)(in,out,blocks,key,ivec);
230 /* (*ctr) does not update ivec, caller does: */
231 PUTU32(ivec+12,ctr32);
232 /* ... overflow was detected, propogate carry. */
233 if (ctr32 == 0) ctr96_inc(ivec);
234 blocks *= 16;
235 len -= blocks;
236 out += blocks;
237 in += blocks;
238 }
239 if (len) {
240 memset(ecount_buf,0,16);
241 (*func)(ecount_buf,ecount_buf,1,key,ivec);
242 ++ctr32;
243 PUTU32(ivec+12,ctr32);
244 if (ctr32 == 0) ctr96_inc(ivec);
245 while (len--) {
246 out[n] = in[n] ^ ecount_buf[n];
247 ++n;
248 }
249 }
250
251 *num=n;
252}
diff --git a/src/lib/libssl/src/crypto/modes/cts128.c b/src/lib/libssl/src/crypto/modes/cts128.c
index e0430f9fdc..c0e1f3696c 100644
--- a/src/lib/libssl/src/crypto/modes/cts128.c
+++ b/src/lib/libssl/src/crypto/modes/cts128.c
@@ -5,7 +5,8 @@
5 * forms are granted according to the OpenSSL license. 5 * forms are granted according to the OpenSSL license.
6 */ 6 */
7 7
8#include "modes.h" 8#include <openssl/crypto.h>
9#include "modes_lcl.h"
9#include <string.h> 10#include <string.h>
10 11
11#ifndef MODES_DEBUG 12#ifndef MODES_DEBUG
@@ -23,8 +24,9 @@
23 * deviates from mentioned RFCs. Most notably it allows input to be 24 * deviates from mentioned RFCs. Most notably it allows input to be
24 * of block length and it doesn't flip the order of the last two 25 * of block length and it doesn't flip the order of the last two
25 * blocks. CTS is being discussed even in ECB context, but it's not 26 * blocks. CTS is being discussed even in ECB context, but it's not
26 * adopted for any known application. This implementation complies 27 * adopted for any known application. This implementation provides
27 * with mentioned RFCs and [as such] extends CBC mode. 28 * two interfaces: one compliant with above mentioned RFCs and one
29 * compliant with the NIST proposal, both extending CBC mode.
28 */ 30 */
29 31
30size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out, 32size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
@@ -54,6 +56,34 @@ size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
54 return len+residue; 56 return len+residue;
55} 57}
56 58
59size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
60 size_t len, const void *key,
61 unsigned char ivec[16], block128_f block)
62{ size_t residue, n;
63
64 assert (in && out && key && ivec);
65
66 if (len < 16) return 0;
67
68 residue=len%16;
69
70 len -= residue;
71
72 CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block);
73
74 if (residue==0) return len;
75
76 in += len;
77 out += len;
78
79 for (n=0; n<residue; ++n)
80 ivec[n] ^= in[n];
81 (*block)(ivec,ivec,key);
82 memcpy(out-16+residue,ivec,16);
83
84 return len+residue;
85}
86
57size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out, 87size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
58 size_t len, const void *key, 88 size_t len, const void *key,
59 unsigned char ivec[16], cbc128_f cbc) 89 unsigned char ivec[16], cbc128_f cbc)
@@ -90,6 +120,41 @@ size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
90 return len+residue; 120 return len+residue;
91} 121}
92 122
123size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
124 size_t len, const void *key,
125 unsigned char ivec[16], cbc128_f cbc)
126{ size_t residue;
127 union { size_t align; unsigned char c[16]; } tmp;
128
129 assert (in && out && key && ivec);
130
131 if (len < 16) return 0;
132
133 residue=len%16;
134
135 len -= residue;
136
137 (*cbc)(in,out,len,key,ivec,1);
138
139 if (residue==0) return len;
140
141 in += len;
142 out += len;
143
144#if defined(CBC_HANDLES_TRUNCATED_IO)
145 (*cbc)(in,out-16+residue,residue,key,ivec,1);
146#else
147 {
148 size_t n;
149 for (n=0; n<16; n+=sizeof(size_t))
150 *(size_t *)(tmp.c+n) = 0;
151 memcpy(tmp.c,in,residue);
152 }
153 (*cbc)(tmp.c,out-16+residue,16,key,ivec,1);
154#endif
155 return len+residue;
156}
157
93size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out, 158size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
94 size_t len, const void *key, 159 size_t len, const void *key,
95 unsigned char ivec[16], block128_f block) 160 unsigned char ivec[16], block128_f block)
@@ -125,7 +190,51 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
125 for(residue+=16; n<residue; ++n) 190 for(residue+=16; n<residue; ++n)
126 out[n] = tmp.c[n] ^ in[n]; 191 out[n] = tmp.c[n] ^ in[n];
127 192
128 return len+residue-16; 193 return 16+len+residue;
194}
195
196size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
197 size_t len, const void *key,
198 unsigned char ivec[16], block128_f block)
199{ size_t residue, n;
200 union { size_t align; unsigned char c[32]; } tmp;
201
202 assert (in && out && key && ivec);
203
204 if (len<16) return 0;
205
206 residue=len%16;
207
208 if (residue==0) {
209 CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
210 return len;
211 }
212
213 len -= 16+residue;
214
215 if (len) {
216 CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
217 in += len;
218 out += len;
219 }
220
221 (*block)(in+residue,tmp.c+16,key);
222
223 for (n=0; n<16; n+=sizeof(size_t))
224 *(size_t *)(tmp.c+n) = *(size_t *)(tmp.c+16+n);
225 memcpy(tmp.c,in,residue);
226 (*block)(tmp.c,tmp.c,key);
227
228 for(n=0; n<16; ++n) {
229 unsigned char c = in[n];
230 out[n] = tmp.c[n] ^ ivec[n];
231 ivec[n] = in[n+residue];
232 tmp.c[n] = c;
233 }
234 for(residue+=16; n<residue; ++n)
235 out[n] = tmp.c[n] ^ tmp.c[n-16];
236
237 return 16+len+residue;
129} 238}
130 239
131size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, 240size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
@@ -160,7 +269,47 @@ size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
160 (*cbc)(tmp.c,tmp.c,32,key,ivec,0); 269 (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
161 memcpy(out,tmp.c,16+residue); 270 memcpy(out,tmp.c,16+residue);
162#endif 271#endif
163 return len+residue; 272 return 16+len+residue;
273}
274
275size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
276 size_t len, const void *key,
277 unsigned char ivec[16], cbc128_f cbc)
278{ size_t residue, n;
279 union { size_t align; unsigned char c[32]; } tmp;
280
281 assert (in && out && key && ivec);
282
283 if (len<16) return 0;
284
285 residue=len%16;
286
287 if (residue==0) {
288 (*cbc)(in,out,len,key,ivec,0);
289 return len;
290 }
291
292 len -= 16+residue;
293
294 if (len) {
295 (*cbc)(in,out,len,key,ivec,0);
296 in += len;
297 out += len;
298 }
299
300 for (n=16; n<32; n+=sizeof(size_t))
301 *(size_t *)(tmp.c+n) = 0;
302 /* this places in[16] at &tmp.c[16] and decrypted block at &tmp.c[0] */
303 (*cbc)(in+residue,tmp.c,16,key,tmp.c+16,0);
304
305 memcpy(tmp.c,in,residue);
306#if defined(CBC_HANDLES_TRUNCATED_IO)
307 (*cbc)(tmp.c,out,16+residue,key,ivec,0);
308#else
309 (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
310 memcpy(out,tmp.c,16+residue);
311#endif
312 return 16+len+residue;
164} 313}
165 314
166#if defined(SELFTEST) 315#if defined(SELFTEST)
@@ -200,9 +349,8 @@ static const unsigned char vector_64[64] =
200static AES_KEY encks, decks; 349static AES_KEY encks, decks;
201 350
202void test_vector(const unsigned char *vector,size_t len) 351void test_vector(const unsigned char *vector,size_t len)
203{ unsigned char cleartext[64]; 352{ unsigned char iv[sizeof(test_iv)];
204 unsigned char iv[sizeof(test_iv)]; 353 unsigned char cleartext[64],ciphertext[64];
205 unsigned char ciphertext[64];
206 size_t tail; 354 size_t tail;
207 355
208 printf("vector_%d\n",len); fflush(stdout); 356 printf("vector_%d\n",len); fflush(stdout);
@@ -243,7 +391,57 @@ void test_vector(const unsigned char *vector,size_t len)
243 fprintf(stderr,"iv_%d mismatch\n",len), exit(4); 391 fprintf(stderr,"iv_%d mismatch\n",len), exit(4);
244} 392}
245 393
246main() 394void test_nistvector(const unsigned char *vector,size_t len)
395{ unsigned char iv[sizeof(test_iv)];
396 unsigned char cleartext[64],ciphertext[64],nistvector[64];
397 size_t tail;
398
399 printf("nistvector_%d\n",len); fflush(stdout);
400
401 if ((tail=len%16) == 0) tail = 16;
402
403 len -= 16 + tail;
404 memcpy(nistvector,vector,len);
405 /* flip two last blocks */
406 memcpy(nistvector+len,vector+len+16,tail);
407 memcpy(nistvector+len+tail,vector+len,16);
408 len += 16 + tail;
409 tail = 16;
410
411 /* test block-based encryption */
412 memcpy(iv,test_iv,sizeof(test_iv));
413 CRYPTO_nistcts128_encrypt_block(test_input,ciphertext,len,&encks,iv,(block128_f)AES_encrypt);
414 if (memcmp(ciphertext,nistvector,len))
415 fprintf(stderr,"output_%d mismatch\n",len), exit(1);
416 if (memcmp(iv,nistvector+len-tail,sizeof(iv)))
417 fprintf(stderr,"iv_%d mismatch\n",len), exit(1);
418
419 /* test block-based decryption */
420 memcpy(iv,test_iv,sizeof(test_iv));
421 CRYPTO_nistcts128_decrypt_block(ciphertext,cleartext,len,&decks,iv,(block128_f)AES_decrypt);
422 if (memcmp(cleartext,test_input,len))
423 fprintf(stderr,"input_%d mismatch\n",len), exit(2);
424 if (memcmp(iv,nistvector+len-tail,sizeof(iv)))
425 fprintf(stderr,"iv_%d mismatch\n",len), exit(2);
426
427 /* test streamed encryption */
428 memcpy(iv,test_iv,sizeof(test_iv));
429 CRYPTO_nistcts128_encrypt(test_input,ciphertext,len,&encks,iv,(cbc128_f)AES_cbc_encrypt);
430 if (memcmp(ciphertext,nistvector,len))
431 fprintf(stderr,"output_%d mismatch\n",len), exit(3);
432 if (memcmp(iv,nistvector+len-tail,sizeof(iv)))
433 fprintf(stderr,"iv_%d mismatch\n",len), exit(3);
434
435 /* test streamed decryption */
436 memcpy(iv,test_iv,sizeof(test_iv));
437 CRYPTO_nistcts128_decrypt(ciphertext,cleartext,len,&decks,iv,(cbc128_f)AES_cbc_encrypt);
438 if (memcmp(cleartext,test_input,len))
439 fprintf(stderr,"input_%d mismatch\n",len), exit(4);
440 if (memcmp(iv,nistvector+len-tail,sizeof(iv)))
441 fprintf(stderr,"iv_%d mismatch\n",len), exit(4);
442}
443
444int main()
247{ 445{
248 AES_set_encrypt_key(test_key,128,&encks); 446 AES_set_encrypt_key(test_key,128,&encks);
249 AES_set_decrypt_key(test_key,128,&decks); 447 AES_set_decrypt_key(test_key,128,&decks);
@@ -254,6 +452,14 @@ main()
254 test_vector(vector_47,sizeof(vector_47)); 452 test_vector(vector_47,sizeof(vector_47));
255 test_vector(vector_48,sizeof(vector_48)); 453 test_vector(vector_48,sizeof(vector_48));
256 test_vector(vector_64,sizeof(vector_64)); 454 test_vector(vector_64,sizeof(vector_64));
257 exit(0); 455
456 test_nistvector(vector_17,sizeof(vector_17));
457 test_nistvector(vector_31,sizeof(vector_31));
458 test_nistvector(vector_32,sizeof(vector_32));
459 test_nistvector(vector_47,sizeof(vector_47));
460 test_nistvector(vector_48,sizeof(vector_48));
461 test_nistvector(vector_64,sizeof(vector_64));
462
463 return 0;
258} 464}
259#endif 465#endif
diff --git a/src/lib/libssl/src/crypto/modes/gcm128.c b/src/lib/libssl/src/crypto/modes/gcm128.c
new file mode 100644
index 0000000000..7d6d034970
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/gcm128.c
@@ -0,0 +1,1757 @@
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#define OPENSSL_FIPSAPI
51
52#include <openssl/crypto.h>
53#include "modes_lcl.h"
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69#endif
70
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119#if TABLE_BITS==8
120
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
132 REDUCE1BIT(V);
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217 while (1) {
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
220
221 if ((u8 *)Xi==xi) break;
222
223 n = *(--xi);
224
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 Z.hi = (Z.hi>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
230 else
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
232 }
233
234 if (is_endian.little) {
235#ifdef BSWAP8
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
238#else
239 u8 *p = (u8 *)Xi;
240 u32 v;
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
245#endif
246 }
247 else {
248 Xi[0] = Z.hi;
249 Xi[1] = Z.lo;
250 }
251}
252#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254#elif TABLE_BITS==4
255
256static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257{
258 u128 V;
259#if defined(OPENSSL_SMALL_FOOTPRINT)
260 int i;
261#endif
262
263 Htable[0].hi = 0;
264 Htable[0].lo = 0;
265 V.hi = H[0];
266 V.lo = H[1];
267
268#if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
270 REDUCE1BIT(V);
271 Htable[i] = V;
272 }
273
274 for (i=2; i<16; i<<=1) {
275 u128 *Hi = Htable+i;
276 int j;
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
280 }
281 }
282#else
283 Htable[8] = V;
284 REDUCE1BIT(V);
285 Htable[4] = V;
286 REDUCE1BIT(V);
287 Htable[2] = V;
288 REDUCE1BIT(V);
289 Htable[1] = V;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291 V=Htable[4];
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295 V=Htable[8];
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303#endif
304#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 /*
306 * ARM assembler expects specific dword order in Htable.
307 */
308 {
309 int j;
310 const union { long one; char little; } is_endian = {1};
311
312 if (is_endian.little)
313 for (j=0;j<16;++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo;
316 Htable[j].lo = V.hi;
317 }
318 else
319 for (j=0;j<16;++j) {
320 V = Htable[j];
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
323 }
324 }
325#endif
326}
327
328#ifndef GHASH_ASM
329static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
335static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336{
337 u128 Z;
338 int cnt = 15;
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
341
342 nlo = ((const u8 *)Xi)[15];
343 nhi = nlo>>4;
344 nlo &= 0xf;
345
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
348
349 while (1) {
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 Z.hi = (Z.hi>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
355 else
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
360
361 if (--cnt<0) break;
362
363 nlo = ((const u8 *)Xi)[cnt];
364 nhi = nlo>>4;
365 nlo &= 0xf;
366
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 Z.hi = (Z.hi>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
372 else
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
377 }
378
379 if (is_endian.little) {
380#ifdef BSWAP8
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
383#else
384 u8 *p = (u8 *)Xi;
385 u32 v;
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
390#endif
391 }
392 else {
393 Xi[0] = Z.hi;
394 Xi[1] = Z.lo;
395 }
396}
397
398#if !defined(OPENSSL_SMALL_FOOTPRINT)
399/*
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
405 */
406static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
408{
409 u128 Z;
410 int cnt;
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
413
414#if 1
415 do {
416 cnt = 15;
417 nlo = ((const u8 *)Xi)[15];
418 nlo ^= inp[15];
419 nhi = nlo>>4;
420 nlo &= 0xf;
421
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
424
425 while (1) {
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 Z.hi = (Z.hi>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
431 else
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
436
437 if (--cnt<0) break;
438
439 nlo = ((const u8 *)Xi)[cnt];
440 nlo ^= inp[cnt];
441 nhi = nlo>>4;
442 nlo &= 0xf;
443
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 Z.hi = (Z.hi>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
449 else
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
454 }
455#else
456 /*
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
460 * cache footprint...
461 */
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497 /*
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
502 */
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
509 }
510
511 do {
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
514 nlo ^= inp[cnt];
515 nhi = nlo>>4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo&0xff;
522
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 Z.hi = (Z.hi>>8);
525
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 }
530
531 nlo = ((const u8 *)Xi)[0];
532 nlo ^= inp[0];
533 nhi = nlo>>4;
534 nlo &= 0xf;
535
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
538
539 rem = (size_t)Z.lo&0xf;
540
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 Z.hi = (Z.hi>>4);
543
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547#endif
548
549 if (is_endian.little) {
550#ifdef BSWAP8
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
553#else
554 u8 *p = (u8 *)Xi;
555 u32 v;
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
560#endif
561 }
562 else {
563 Xi[0] = Z.hi;
564 Xi[1] = Z.lo;
565 }
566 } while (inp+=16, len-=16);
567}
568#endif
569#else
570void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572#endif
573
574#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580#define GHASH_CHUNK (3*1024)
581#endif
582
583#else /* TABLE_BITS */
584
585static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586{
587 u128 V,Z = { 0,0 };
588 long X;
589 int i,j;
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
592
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
594 V.lo = H[1];
595
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
599#ifdef BSWAP8
600 X = (long)(BSWAP8(xi[j]));
601#else
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604#endif
605 }
606 else {
607 const u8 *p = (const u8 *)(xi+j);
608 X = (long)GETU32(p);
609 }
610 }
611 else
612 X = xi[j];
613
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
616 Z.hi ^= V.hi&M;
617 Z.lo ^= V.lo&M;
618
619 REDUCE1BIT(V);
620 }
621 }
622
623 if (is_endian.little) {
624#ifdef BSWAP8
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
627#else
628 u8 *p = (u8 *)Xi;
629 u32 v;
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
634#endif
635 }
636 else {
637 Xi[0] = Z.hi;
638 Xi[1] = Z.lo;
639 }
640}
641#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643#endif
644
645#if TABLE_BITS==4 && defined(GHASH_ASM)
646# if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650# define GHASH_ASM_X86_OR_64
651# define GCM_FUNCREF_4BIT
652extern unsigned int OPENSSL_ia32cap_P[2];
653
654void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659# define GHASH_ASM_X86
660void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
661void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
662
663void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
664void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665# endif
666# elif defined(__arm__) || defined(__arm)
667# include "arm_arch.h"
668# if __ARM_ARCH__>=7
669# define GHASH_ASM_ARM
670# define GCM_FUNCREF_4BIT
671void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
672void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673# endif
674# endif
675#endif
676
677#ifdef GCM_FUNCREF_4BIT
678# undef GCM_MUL
679# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
680# ifdef GHASH
681# undef GHASH
682# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
683# endif
684#endif
685
686void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
687{
688 const union { long one; char little; } is_endian = {1};
689
690 memset(ctx,0,sizeof(*ctx));
691 ctx->block = block;
692 ctx->key = key;
693
694 (*block)(ctx->H.c,ctx->H.c,key);
695
696 if (is_endian.little) {
697 /* H is stored in host byte order */
698#ifdef BSWAP8
699 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
700 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
701#else
702 u8 *p = ctx->H.c;
703 u64 hi,lo;
704 hi = (u64)GETU32(p) <<32|GETU32(p+4);
705 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
706 ctx->H.u[0] = hi;
707 ctx->H.u[1] = lo;
708#endif
709 }
710
711#if TABLE_BITS==8
712 gcm_init_8bit(ctx->Htable,ctx->H.u);
713#elif TABLE_BITS==4
714# if defined(GHASH_ASM_X86_OR_64)
715# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
716 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
717 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
718 gcm_init_clmul(ctx->Htable,ctx->H.u);
719 ctx->gmult = gcm_gmult_clmul;
720 ctx->ghash = gcm_ghash_clmul;
721 return;
722 }
723# endif
724 gcm_init_4bit(ctx->Htable,ctx->H.u);
725# if defined(GHASH_ASM_X86) /* x86 only */
726# if defined(OPENSSL_IA32_SSE2)
727 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
728# else
729 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
730# endif
731 ctx->gmult = gcm_gmult_4bit_mmx;
732 ctx->ghash = gcm_ghash_4bit_mmx;
733 } else {
734 ctx->gmult = gcm_gmult_4bit_x86;
735 ctx->ghash = gcm_ghash_4bit_x86;
736 }
737# else
738 ctx->gmult = gcm_gmult_4bit;
739 ctx->ghash = gcm_ghash_4bit;
740# endif
741# elif defined(GHASH_ASM_ARM)
742 if (OPENSSL_armcap_P & ARMV7_NEON) {
743 ctx->gmult = gcm_gmult_neon;
744 ctx->ghash = gcm_ghash_neon;
745 } else {
746 gcm_init_4bit(ctx->Htable,ctx->H.u);
747 ctx->gmult = gcm_gmult_4bit;
748 ctx->ghash = gcm_ghash_4bit;
749 }
750# else
751 gcm_init_4bit(ctx->Htable,ctx->H.u);
752# endif
753#endif
754}
755
756void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
757{
758 const union { long one; char little; } is_endian = {1};
759 unsigned int ctr;
760#ifdef GCM_FUNCREF_4BIT
761 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
762#endif
763
764 ctx->Yi.u[0] = 0;
765 ctx->Yi.u[1] = 0;
766 ctx->Xi.u[0] = 0;
767 ctx->Xi.u[1] = 0;
768 ctx->len.u[0] = 0; /* AAD length */
769 ctx->len.u[1] = 0; /* message length */
770 ctx->ares = 0;
771 ctx->mres = 0;
772
773 if (len==12) {
774 memcpy(ctx->Yi.c,iv,12);
775 ctx->Yi.c[15]=1;
776 ctr=1;
777 }
778 else {
779 size_t i;
780 u64 len0 = len;
781
782 while (len>=16) {
783 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
784 GCM_MUL(ctx,Yi);
785 iv += 16;
786 len -= 16;
787 }
788 if (len) {
789 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
790 GCM_MUL(ctx,Yi);
791 }
792 len0 <<= 3;
793 if (is_endian.little) {
794#ifdef BSWAP8
795 ctx->Yi.u[1] ^= BSWAP8(len0);
796#else
797 ctx->Yi.c[8] ^= (u8)(len0>>56);
798 ctx->Yi.c[9] ^= (u8)(len0>>48);
799 ctx->Yi.c[10] ^= (u8)(len0>>40);
800 ctx->Yi.c[11] ^= (u8)(len0>>32);
801 ctx->Yi.c[12] ^= (u8)(len0>>24);
802 ctx->Yi.c[13] ^= (u8)(len0>>16);
803 ctx->Yi.c[14] ^= (u8)(len0>>8);
804 ctx->Yi.c[15] ^= (u8)(len0);
805#endif
806 }
807 else
808 ctx->Yi.u[1] ^= len0;
809
810 GCM_MUL(ctx,Yi);
811
812 if (is_endian.little)
813 ctr = GETU32(ctx->Yi.c+12);
814 else
815 ctr = ctx->Yi.d[3];
816 }
817
818 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
819 ++ctr;
820 if (is_endian.little)
821 PUTU32(ctx->Yi.c+12,ctr);
822 else
823 ctx->Yi.d[3] = ctr;
824}
825
826int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
827{
828 size_t i;
829 unsigned int n;
830 u64 alen = ctx->len.u[0];
831#ifdef GCM_FUNCREF_4BIT
832 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
833# ifdef GHASH
834 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
835 const u8 *inp,size_t len) = ctx->ghash;
836# endif
837#endif
838
839 if (ctx->len.u[1]) return -2;
840
841 alen += len;
842 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
843 return -1;
844 ctx->len.u[0] = alen;
845
846 n = ctx->ares;
847 if (n) {
848 while (n && len) {
849 ctx->Xi.c[n] ^= *(aad++);
850 --len;
851 n = (n+1)%16;
852 }
853 if (n==0) GCM_MUL(ctx,Xi);
854 else {
855 ctx->ares = n;
856 return 0;
857 }
858 }
859
860#ifdef GHASH
861 if ((i = (len&(size_t)-16))) {
862 GHASH(ctx,aad,i);
863 aad += i;
864 len -= i;
865 }
866#else
867 while (len>=16) {
868 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
869 GCM_MUL(ctx,Xi);
870 aad += 16;
871 len -= 16;
872 }
873#endif
874 if (len) {
875 n = (unsigned int)len;
876 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
877 }
878
879 ctx->ares = n;
880 return 0;
881}
882
883int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
884 const unsigned char *in, unsigned char *out,
885 size_t len)
886{
887 const union { long one; char little; } is_endian = {1};
888 unsigned int n, ctr;
889 size_t i;
890 u64 mlen = ctx->len.u[1];
891 block128_f block = ctx->block;
892 void *key = ctx->key;
893#ifdef GCM_FUNCREF_4BIT
894 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
895# ifdef GHASH
896 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
897 const u8 *inp,size_t len) = ctx->ghash;
898# endif
899#endif
900
901#if 0
902 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
903#endif
904 mlen += len;
905 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906 return -1;
907 ctx->len.u[1] = mlen;
908
909 if (ctx->ares) {
910 /* First call to encrypt finalizes GHASH(AAD) */
911 GCM_MUL(ctx,Xi);
912 ctx->ares = 0;
913 }
914
915 if (is_endian.little)
916 ctr = GETU32(ctx->Yi.c+12);
917 else
918 ctr = ctx->Yi.d[3];
919
920 n = ctx->mres;
921#if !defined(OPENSSL_SMALL_FOOTPRINT)
922 if (16%sizeof(size_t) == 0) do { /* always true actually */
923 if (n) {
924 while (n && len) {
925 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
926 --len;
927 n = (n+1)%16;
928 }
929 if (n==0) GCM_MUL(ctx,Xi);
930 else {
931 ctx->mres = n;
932 return 0;
933 }
934 }
935#if defined(STRICT_ALIGNMENT)
936 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
937 break;
938#endif
939#if defined(GHASH) && defined(GHASH_CHUNK)
940 while (len>=GHASH_CHUNK) {
941 size_t j=GHASH_CHUNK;
942
943 while (j) {
944 (*block)(ctx->Yi.c,ctx->EKi.c,key);
945 ++ctr;
946 if (is_endian.little)
947 PUTU32(ctx->Yi.c+12,ctr);
948 else
949 ctx->Yi.d[3] = ctr;
950 for (i=0; i<16; i+=sizeof(size_t))
951 *(size_t *)(out+i) =
952 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
953 out += 16;
954 in += 16;
955 j -= 16;
956 }
957 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
958 len -= GHASH_CHUNK;
959 }
960 if ((i = (len&(size_t)-16))) {
961 size_t j=i;
962
963 while (len>=16) {
964 (*block)(ctx->Yi.c,ctx->EKi.c,key);
965 ++ctr;
966 if (is_endian.little)
967 PUTU32(ctx->Yi.c+12,ctr);
968 else
969 ctx->Yi.d[3] = ctr;
970 for (i=0; i<16; i+=sizeof(size_t))
971 *(size_t *)(out+i) =
972 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
973 out += 16;
974 in += 16;
975 len -= 16;
976 }
977 GHASH(ctx,out-j,j);
978 }
979#else
980 while (len>=16) {
981 (*block)(ctx->Yi.c,ctx->EKi.c,key);
982 ++ctr;
983 if (is_endian.little)
984 PUTU32(ctx->Yi.c+12,ctr);
985 else
986 ctx->Yi.d[3] = ctr;
987 for (i=0; i<16; i+=sizeof(size_t))
988 *(size_t *)(ctx->Xi.c+i) ^=
989 *(size_t *)(out+i) =
990 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
991 GCM_MUL(ctx,Xi);
992 out += 16;
993 in += 16;
994 len -= 16;
995 }
996#endif
997 if (len) {
998 (*block)(ctx->Yi.c,ctx->EKi.c,key);
999 ++ctr;
1000 if (is_endian.little)
1001 PUTU32(ctx->Yi.c+12,ctr);
1002 else
1003 ctx->Yi.d[3] = ctr;
1004 while (len--) {
1005 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1006 ++n;
1007 }
1008 }
1009
1010 ctx->mres = n;
1011 return 0;
1012 } while(0);
1013#endif
1014 for (i=0;i<len;++i) {
1015 if (n==0) {
1016 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1017 ++ctr;
1018 if (is_endian.little)
1019 PUTU32(ctx->Yi.c+12,ctr);
1020 else
1021 ctx->Yi.d[3] = ctr;
1022 }
1023 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1024 n = (n+1)%16;
1025 if (n==0)
1026 GCM_MUL(ctx,Xi);
1027 }
1028
1029 ctx->mres = n;
1030 return 0;
1031}
1032
1033int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1034 const unsigned char *in, unsigned char *out,
1035 size_t len)
1036{
1037 const union { long one; char little; } is_endian = {1};
1038 unsigned int n, ctr;
1039 size_t i;
1040 u64 mlen = ctx->len.u[1];
1041 block128_f block = ctx->block;
1042 void *key = ctx->key;
1043#ifdef GCM_FUNCREF_4BIT
1044 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1045# ifdef GHASH
1046 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1047 const u8 *inp,size_t len) = ctx->ghash;
1048# endif
1049#endif
1050
1051 mlen += len;
1052 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1053 return -1;
1054 ctx->len.u[1] = mlen;
1055
1056 if (ctx->ares) {
1057 /* First call to decrypt finalizes GHASH(AAD) */
1058 GCM_MUL(ctx,Xi);
1059 ctx->ares = 0;
1060 }
1061
1062 if (is_endian.little)
1063 ctr = GETU32(ctx->Yi.c+12);
1064 else
1065 ctr = ctx->Yi.d[3];
1066
1067 n = ctx->mres;
1068#if !defined(OPENSSL_SMALL_FOOTPRINT)
1069 if (16%sizeof(size_t) == 0) do { /* always true actually */
1070 if (n) {
1071 while (n && len) {
1072 u8 c = *(in++);
1073 *(out++) = c^ctx->EKi.c[n];
1074 ctx->Xi.c[n] ^= c;
1075 --len;
1076 n = (n+1)%16;
1077 }
1078 if (n==0) GCM_MUL (ctx,Xi);
1079 else {
1080 ctx->mres = n;
1081 return 0;
1082 }
1083 }
1084#if defined(STRICT_ALIGNMENT)
1085 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1086 break;
1087#endif
1088#if defined(GHASH) && defined(GHASH_CHUNK)
1089 while (len>=GHASH_CHUNK) {
1090 size_t j=GHASH_CHUNK;
1091
1092 GHASH(ctx,in,GHASH_CHUNK);
1093 while (j) {
1094 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1095 ++ctr;
1096 if (is_endian.little)
1097 PUTU32(ctx->Yi.c+12,ctr);
1098 else
1099 ctx->Yi.d[3] = ctr;
1100 for (i=0; i<16; i+=sizeof(size_t))
1101 *(size_t *)(out+i) =
1102 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1103 out += 16;
1104 in += 16;
1105 j -= 16;
1106 }
1107 len -= GHASH_CHUNK;
1108 }
1109 if ((i = (len&(size_t)-16))) {
1110 GHASH(ctx,in,i);
1111 while (len>=16) {
1112 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1113 ++ctr;
1114 if (is_endian.little)
1115 PUTU32(ctx->Yi.c+12,ctr);
1116 else
1117 ctx->Yi.d[3] = ctr;
1118 for (i=0; i<16; i+=sizeof(size_t))
1119 *(size_t *)(out+i) =
1120 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1121 out += 16;
1122 in += 16;
1123 len -= 16;
1124 }
1125 }
1126#else
1127 while (len>=16) {
1128 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1129 ++ctr;
1130 if (is_endian.little)
1131 PUTU32(ctx->Yi.c+12,ctr);
1132 else
1133 ctx->Yi.d[3] = ctr;
1134 for (i=0; i<16; i+=sizeof(size_t)) {
1135 size_t c = *(size_t *)(in+i);
1136 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1137 *(size_t *)(ctx->Xi.c+i) ^= c;
1138 }
1139 GCM_MUL(ctx,Xi);
1140 out += 16;
1141 in += 16;
1142 len -= 16;
1143 }
1144#endif
1145 if (len) {
1146 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1147 ++ctr;
1148 if (is_endian.little)
1149 PUTU32(ctx->Yi.c+12,ctr);
1150 else
1151 ctx->Yi.d[3] = ctr;
1152 while (len--) {
1153 u8 c = in[n];
1154 ctx->Xi.c[n] ^= c;
1155 out[n] = c^ctx->EKi.c[n];
1156 ++n;
1157 }
1158 }
1159
1160 ctx->mres = n;
1161 return 0;
1162 } while(0);
1163#endif
1164 for (i=0;i<len;++i) {
1165 u8 c;
1166 if (n==0) {
1167 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1168 ++ctr;
1169 if (is_endian.little)
1170 PUTU32(ctx->Yi.c+12,ctr);
1171 else
1172 ctx->Yi.d[3] = ctr;
1173 }
1174 c = in[i];
1175 out[i] = c^ctx->EKi.c[n];
1176 ctx->Xi.c[n] ^= c;
1177 n = (n+1)%16;
1178 if (n==0)
1179 GCM_MUL(ctx,Xi);
1180 }
1181
1182 ctx->mres = n;
1183 return 0;
1184}
1185
1186int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1187 const unsigned char *in, unsigned char *out,
1188 size_t len, ctr128_f stream)
1189{
1190 const union { long one; char little; } is_endian = {1};
1191 unsigned int n, ctr;
1192 size_t i;
1193 u64 mlen = ctx->len.u[1];
1194 void *key = ctx->key;
1195#ifdef GCM_FUNCREF_4BIT
1196 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1197# ifdef GHASH
1198 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1199 const u8 *inp,size_t len) = ctx->ghash;
1200# endif
1201#endif
1202
1203 mlen += len;
1204 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1205 return -1;
1206 ctx->len.u[1] = mlen;
1207
1208 if (ctx->ares) {
1209 /* First call to encrypt finalizes GHASH(AAD) */
1210 GCM_MUL(ctx,Xi);
1211 ctx->ares = 0;
1212 }
1213
1214 if (is_endian.little)
1215 ctr = GETU32(ctx->Yi.c+12);
1216 else
1217 ctr = ctx->Yi.d[3];
1218
1219 n = ctx->mres;
1220 if (n) {
1221 while (n && len) {
1222 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1223 --len;
1224 n = (n+1)%16;
1225 }
1226 if (n==0) GCM_MUL(ctx,Xi);
1227 else {
1228 ctx->mres = n;
1229 return 0;
1230 }
1231 }
1232#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1233 while (len>=GHASH_CHUNK) {
1234 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1235 ctr += GHASH_CHUNK/16;
1236 if (is_endian.little)
1237 PUTU32(ctx->Yi.c+12,ctr);
1238 else
1239 ctx->Yi.d[3] = ctr;
1240 GHASH(ctx,out,GHASH_CHUNK);
1241 out += GHASH_CHUNK;
1242 in += GHASH_CHUNK;
1243 len -= GHASH_CHUNK;
1244 }
1245#endif
1246 if ((i = (len&(size_t)-16))) {
1247 size_t j=i/16;
1248
1249 (*stream)(in,out,j,key,ctx->Yi.c);
1250 ctr += (unsigned int)j;
1251 if (is_endian.little)
1252 PUTU32(ctx->Yi.c+12,ctr);
1253 else
1254 ctx->Yi.d[3] = ctr;
1255 in += i;
1256 len -= i;
1257#if defined(GHASH)
1258 GHASH(ctx,out,i);
1259 out += i;
1260#else
1261 while (j--) {
1262 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1263 GCM_MUL(ctx,Xi);
1264 out += 16;
1265 }
1266#endif
1267 }
1268 if (len) {
1269 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1270 ++ctr;
1271 if (is_endian.little)
1272 PUTU32(ctx->Yi.c+12,ctr);
1273 else
1274 ctx->Yi.d[3] = ctr;
1275 while (len--) {
1276 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1277 ++n;
1278 }
1279 }
1280
1281 ctx->mres = n;
1282 return 0;
1283}
1284
1285int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1286 const unsigned char *in, unsigned char *out,
1287 size_t len,ctr128_f stream)
1288{
1289 const union { long one; char little; } is_endian = {1};
1290 unsigned int n, ctr;
1291 size_t i;
1292 u64 mlen = ctx->len.u[1];
1293 void *key = ctx->key;
1294#ifdef GCM_FUNCREF_4BIT
1295 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1296# ifdef GHASH
1297 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1298 const u8 *inp,size_t len) = ctx->ghash;
1299# endif
1300#endif
1301
1302 mlen += len;
1303 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1304 return -1;
1305 ctx->len.u[1] = mlen;
1306
1307 if (ctx->ares) {
1308 /* First call to decrypt finalizes GHASH(AAD) */
1309 GCM_MUL(ctx,Xi);
1310 ctx->ares = 0;
1311 }
1312
1313 if (is_endian.little)
1314 ctr = GETU32(ctx->Yi.c+12);
1315 else
1316 ctr = ctx->Yi.d[3];
1317
1318 n = ctx->mres;
1319 if (n) {
1320 while (n && len) {
1321 u8 c = *(in++);
1322 *(out++) = c^ctx->EKi.c[n];
1323 ctx->Xi.c[n] ^= c;
1324 --len;
1325 n = (n+1)%16;
1326 }
1327 if (n==0) GCM_MUL (ctx,Xi);
1328 else {
1329 ctx->mres = n;
1330 return 0;
1331 }
1332 }
1333#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1334 while (len>=GHASH_CHUNK) {
1335 GHASH(ctx,in,GHASH_CHUNK);
1336 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1337 ctr += GHASH_CHUNK/16;
1338 if (is_endian.little)
1339 PUTU32(ctx->Yi.c+12,ctr);
1340 else
1341 ctx->Yi.d[3] = ctr;
1342 out += GHASH_CHUNK;
1343 in += GHASH_CHUNK;
1344 len -= GHASH_CHUNK;
1345 }
1346#endif
1347 if ((i = (len&(size_t)-16))) {
1348 size_t j=i/16;
1349
1350#if defined(GHASH)
1351 GHASH(ctx,in,i);
1352#else
1353 while (j--) {
1354 size_t k;
1355 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1356 GCM_MUL(ctx,Xi);
1357 in += 16;
1358 }
1359 j = i/16;
1360 in -= i;
1361#endif
1362 (*stream)(in,out,j,key,ctx->Yi.c);
1363 ctr += (unsigned int)j;
1364 if (is_endian.little)
1365 PUTU32(ctx->Yi.c+12,ctr);
1366 else
1367 ctx->Yi.d[3] = ctr;
1368 out += i;
1369 in += i;
1370 len -= i;
1371 }
1372 if (len) {
1373 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1374 ++ctr;
1375 if (is_endian.little)
1376 PUTU32(ctx->Yi.c+12,ctr);
1377 else
1378 ctx->Yi.d[3] = ctr;
1379 while (len--) {
1380 u8 c = in[n];
1381 ctx->Xi.c[n] ^= c;
1382 out[n] = c^ctx->EKi.c[n];
1383 ++n;
1384 }
1385 }
1386
1387 ctx->mres = n;
1388 return 0;
1389}
1390
1391int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1392 size_t len)
1393{
1394 const union { long one; char little; } is_endian = {1};
1395 u64 alen = ctx->len.u[0]<<3;
1396 u64 clen = ctx->len.u[1]<<3;
1397#ifdef GCM_FUNCREF_4BIT
1398 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1399#endif
1400
1401 if (ctx->mres)
1402 GCM_MUL(ctx,Xi);
1403
1404 if (is_endian.little) {
1405#ifdef BSWAP8
1406 alen = BSWAP8(alen);
1407 clen = BSWAP8(clen);
1408#else
1409 u8 *p = ctx->len.c;
1410
1411 ctx->len.u[0] = alen;
1412 ctx->len.u[1] = clen;
1413
1414 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1415 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1416#endif
1417 }
1418
1419 ctx->Xi.u[0] ^= alen;
1420 ctx->Xi.u[1] ^= clen;
1421 GCM_MUL(ctx,Xi);
1422
1423 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1424 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1425
1426 if (tag && len<=sizeof(ctx->Xi))
1427 return memcmp(ctx->Xi.c,tag,len);
1428 else
1429 return -1;
1430}
1431
1432void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1433{
1434 CRYPTO_gcm128_finish(ctx, NULL, 0);
1435 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1436}
1437
1438GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1439{
1440 GCM128_CONTEXT *ret;
1441
1442 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1443 CRYPTO_gcm128_init(ret,key,block);
1444
1445 return ret;
1446}
1447
1448void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1449{
1450 if (ctx) {
1451 OPENSSL_cleanse(ctx,sizeof(*ctx));
1452 OPENSSL_free(ctx);
1453 }
1454}
1455
1456#if defined(SELFTEST)
1457#include <stdio.h>
1458#include <openssl/aes.h>
1459
1460/* Test Case 1 */
1461static const u8 K1[16],
1462 *P1=NULL,
1463 *A1=NULL,
1464 IV1[12],
1465 *C1=NULL,
1466 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1467
1468/* Test Case 2 */
1469#define K2 K1
1470#define A2 A1
1471#define IV2 IV1
1472static const u8 P2[16],
1473 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1474 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1475
1476/* Test Case 3 */
1477#define A3 A2
1478static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1479 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1480 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1481 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1482 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1483 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1484 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1485 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1486 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1487 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1488 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1489
1490/* Test Case 4 */
1491#define K4 K3
1492#define IV4 IV3
1493static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1494 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1495 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1496 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1497 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1498 0xab,0xad,0xda,0xd2},
1499 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1500 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1501 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1502 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1503 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1504
1505/* Test Case 5 */
1506#define K5 K4
1507#define P5 P4
1508#define A5 A4
1509static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1510 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1511 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1512 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1513 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1514 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1515
1516/* Test Case 6 */
1517#define K6 K5
1518#define P6 P5
1519#define A6 A5
1520static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1521 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1522 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1523 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1524 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1525 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1526 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1527 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1528 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1529
1530/* Test Case 7 */
1531static const u8 K7[24],
1532 *P7=NULL,
1533 *A7=NULL,
1534 IV7[12],
1535 *C7=NULL,
1536 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1537
1538/* Test Case 8 */
1539#define K8 K7
1540#define IV8 IV7
1541#define A8 A7
1542static const u8 P8[16],
1543 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1544 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1545
1546/* Test Case 9 */
1547#define A9 A8
1548static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1549 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1550 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1551 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1552 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1553 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1554 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1555 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1556 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1557 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1558 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1559 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1560
1561/* Test Case 10 */
1562#define K10 K9
1563#define IV10 IV9
1564static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1565 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1566 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1567 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1568 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1569 0xab,0xad,0xda,0xd2},
1570 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1571 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1572 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1573 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1574 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1575
1576/* Test Case 11 */
1577#define K11 K10
1578#define P11 P10
1579#define A11 A10
1580static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1581 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1582 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1583 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1584 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1585 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1586
1587/* Test Case 12 */
1588#define K12 K11
1589#define P12 P11
1590#define A12 A11
1591static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1592 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1593 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1594 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1595 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1596 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1597 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1598 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1599 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1600
1601/* Test Case 13 */
1602static const u8 K13[32],
1603 *P13=NULL,
1604 *A13=NULL,
1605 IV13[12],
1606 *C13=NULL,
1607 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1608
1609/* Test Case 14 */
1610#define K14 K13
1611#define A14 A13
1612static const u8 P14[16],
1613 IV14[12],
1614 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1615 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1616
1617/* Test Case 15 */
1618#define A15 A14
1619static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1620 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1621 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1622 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1623 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1624 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1625 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1626 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1627 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1628 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1629 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1630 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1631
1632/* Test Case 16 */
1633#define K16 K15
1634#define IV16 IV15
1635static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1636 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1637 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1638 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1639 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1640 0xab,0xad,0xda,0xd2},
1641 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1642 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1643 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1644 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1645 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1646
1647/* Test Case 17 */
1648#define K17 K16
1649#define P17 P16
1650#define A17 A16
1651static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1652 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1653 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1654 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1655 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1656 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1657
1658/* Test Case 18 */
1659#define K18 K17
1660#define P18 P17
1661#define A18 A17
1662static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1663 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1664 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1665 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1666 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1667 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1668 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1669 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1670 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1671
1672#define TEST_CASE(n) do { \
1673 u8 out[sizeof(P##n)]; \
1674 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1675 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1676 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1677 memset(out,0,sizeof(out)); \
1678 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1679 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1680 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1681 (C##n && memcmp(out,C##n,sizeof(out)))) \
1682 ret++, printf ("encrypt test#%d failed.\n",n); \
1683 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1684 memset(out,0,sizeof(out)); \
1685 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1686 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1687 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1688 (P##n && memcmp(out,P##n,sizeof(out)))) \
1689 ret++, printf ("decrypt test#%d failed.\n",n); \
1690 } while(0)
1691
1692int main()
1693{
1694 GCM128_CONTEXT ctx;
1695 AES_KEY key;
1696 int ret=0;
1697
1698 TEST_CASE(1);
1699 TEST_CASE(2);
1700 TEST_CASE(3);
1701 TEST_CASE(4);
1702 TEST_CASE(5);
1703 TEST_CASE(6);
1704 TEST_CASE(7);
1705 TEST_CASE(8);
1706 TEST_CASE(9);
1707 TEST_CASE(10);
1708 TEST_CASE(11);
1709 TEST_CASE(12);
1710 TEST_CASE(13);
1711 TEST_CASE(14);
1712 TEST_CASE(15);
1713 TEST_CASE(16);
1714 TEST_CASE(17);
1715 TEST_CASE(18);
1716
1717#ifdef OPENSSL_CPUID_OBJ
1718 {
1719 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1720 union { u64 u; u8 c[1024]; } buf;
1721 int i;
1722
1723 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1724 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1725 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1726
1727 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1728 start = OPENSSL_rdtsc();
1729 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1730 gcm_t = OPENSSL_rdtsc() - start;
1731
1732 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1733 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1734 (block128_f)AES_encrypt);
1735 start = OPENSSL_rdtsc();
1736 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1737 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1738 (block128_f)AES_encrypt);
1739 ctr_t = OPENSSL_rdtsc() - start;
1740
1741 printf("%.2f-%.2f=%.2f\n",
1742 gcm_t/(double)sizeof(buf),
1743 ctr_t/(double)sizeof(buf),
1744 (gcm_t-ctr_t)/(double)sizeof(buf));
1745#ifdef GHASH
1746 GHASH(&ctx,buf.c,sizeof(buf));
1747 start = OPENSSL_rdtsc();
1748 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1749 gcm_t = OPENSSL_rdtsc() - start;
1750 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1751#endif
1752 }
1753#endif
1754
1755 return ret;
1756}
1757#endif
diff --git a/src/lib/libssl/src/crypto/modes/modes.h b/src/lib/libssl/src/crypto/modes/modes.h
index af8d97d795..f18215bb2b 100644
--- a/src/lib/libssl/src/crypto/modes/modes.h
+++ b/src/lib/libssl/src/crypto/modes/modes.h
@@ -15,6 +15,14 @@ typedef void (*cbc128_f)(const unsigned char *in, unsigned char *out,
15 size_t len, const void *key, 15 size_t len, const void *key,
16 unsigned char ivec[16], int enc); 16 unsigned char ivec[16], int enc);
17 17
18typedef void (*ctr128_f)(const unsigned char *in, unsigned char *out,
19 size_t blocks, const void *key,
20 const unsigned char ivec[16]);
21
22typedef void (*ccm128_f)(const unsigned char *in, unsigned char *out,
23 size_t blocks, const void *key,
24 const unsigned char ivec[16],unsigned char cmac[16]);
25
18void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, 26void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
19 size_t len, const void *key, 27 size_t len, const void *key,
20 unsigned char ivec[16], block128_f block); 28 unsigned char ivec[16], block128_f block);
@@ -27,6 +35,11 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
27 unsigned char ivec[16], unsigned char ecount_buf[16], 35 unsigned char ivec[16], unsigned char ecount_buf[16],
28 unsigned int *num, block128_f block); 36 unsigned int *num, block128_f block);
29 37
38void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
39 size_t len, const void *key,
40 unsigned char ivec[16], unsigned char ecount_buf[16],
41 unsigned int *num, ctr128_f ctr);
42
30void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, 43void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
31 size_t len, const void *key, 44 size_t len, const void *key,
32 unsigned char ivec[16], int *num, 45 unsigned char ivec[16], int *num,
@@ -57,3 +70,66 @@ size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
57size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out, 70size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
58 size_t len, const void *key, 71 size_t len, const void *key,
59 unsigned char ivec[16], cbc128_f cbc); 72 unsigned char ivec[16], cbc128_f cbc);
73
74size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
75 size_t len, const void *key,
76 unsigned char ivec[16], block128_f block);
77size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
78 size_t len, const void *key,
79 unsigned char ivec[16], cbc128_f cbc);
80size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
81 size_t len, const void *key,
82 unsigned char ivec[16], block128_f block);
83size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
84 size_t len, const void *key,
85 unsigned char ivec[16], cbc128_f cbc);
86
87typedef struct gcm128_context GCM128_CONTEXT;
88
89GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
90void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block);
91void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
92 size_t len);
93int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
94 size_t len);
95int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
96 const unsigned char *in, unsigned char *out,
97 size_t len);
98int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
99 const unsigned char *in, unsigned char *out,
100 size_t len);
101int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
102 const unsigned char *in, unsigned char *out,
103 size_t len, ctr128_f stream);
104int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
105 const unsigned char *in, unsigned char *out,
106 size_t len, ctr128_f stream);
107int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
108 size_t len);
109void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
110void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
111
112typedef struct ccm128_context CCM128_CONTEXT;
113
114void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
115 unsigned int M, unsigned int L, void *key,block128_f block);
116int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
117 const unsigned char *nonce, size_t nlen, size_t mlen);
118void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
119 const unsigned char *aad, size_t alen);
120int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
121 const unsigned char *inp, unsigned char *out, size_t len);
122int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
123 const unsigned char *inp, unsigned char *out, size_t len);
124int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
125 const unsigned char *inp, unsigned char *out, size_t len,
126 ccm128_f stream);
127int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
128 const unsigned char *inp, unsigned char *out, size_t len,
129 ccm128_f stream);
130size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
131
132typedef struct xts128_context XTS128_CONTEXT;
133
134int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
135 const unsigned char *inp, unsigned char *out, size_t len, int enc);
diff --git a/src/lib/libssl/src/crypto/modes/modes_lcl.h b/src/lib/libssl/src/crypto/modes/modes_lcl.h
new file mode 100644
index 0000000000..b6dc3c336f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/modes_lcl.h
@@ -0,0 +1,131 @@
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use is governed by OpenSSL license.
5 * ====================================================================
6 */
7
8#include <openssl/modes.h>
9
10
11#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
12typedef __int64 i64;
13typedef unsigned __int64 u64;
14#define U64(C) C##UI64
15#elif defined(__arch64__)
16typedef long i64;
17typedef unsigned long u64;
18#define U64(C) C##UL
19#else
20typedef long long i64;
21typedef unsigned long long u64;
22#define U64(C) C##ULL
23#endif
24
25typedef unsigned int u32;
26typedef unsigned char u8;
27
28#define STRICT_ALIGNMENT 1
29#if defined(__i386) || defined(__i386__) || \
30 defined(__x86_64) || defined(__x86_64__) || \
31 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
32 defined(__s390__) || defined(__s390x__) || \
33 ( (defined(__arm__) || defined(__arm)) && \
34 (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
35 defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)) )
36# undef STRICT_ALIGNMENT
37#endif
38
39#if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
40#if defined(__GNUC__) && __GNUC__>=2
41# if defined(__x86_64) || defined(__x86_64__)
42# define BSWAP8(x) ({ u64 ret=(x); \
43 asm ("bswapq %0" \
44 : "+r"(ret)); ret; })
45# define BSWAP4(x) ({ u32 ret=(x); \
46 asm ("bswapl %0" \
47 : "+r"(ret)); ret; })
48# elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)
49# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
50 asm ("bswapl %0; bswapl %1" \
51 : "+r"(hi),"+r"(lo)); \
52 (u64)hi<<32|lo; })
53# define BSWAP4(x) ({ u32 ret=(x); \
54 asm ("bswapl %0" \
55 : "+r"(ret)); ret; })
56# elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT)
57# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
58 asm ("rev %0,%0; rev %1,%1" \
59 : "+r"(hi),"+r"(lo)); \
60 (u64)hi<<32|lo; })
61# define BSWAP4(x) ({ u32 ret; \
62 asm ("rev %0,%1" \
63 : "=r"(ret) : "r"((u32)(x))); \
64 ret; })
65# endif
66#elif defined(_MSC_VER)
67# if _MSC_VER>=1300
68# pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
69# define BSWAP8(x) _byteswap_uint64((u64)(x))
70# define BSWAP4(x) _byteswap_ulong((u32)(x))
71# elif defined(_M_IX86)
72 __inline u32 _bswap4(u32 val) {
73 _asm mov eax,val
74 _asm bswap eax
75 }
76# define BSWAP4(x) _bswap4(x)
77# endif
78#endif
79#endif
80
81#if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
82#define GETU32(p) BSWAP4(*(const u32 *)(p))
83#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
84#else
85#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
86#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
87#endif
88
89/* GCM definitions */
90
91typedef struct { u64 hi,lo; } u128;
92
93#ifdef TABLE_BITS
94#undef TABLE_BITS
95#endif
96/*
97 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
98 * never be set to 8 [or 1]. For further information see gcm128.c.
99 */
100#define TABLE_BITS 4
101
102struct gcm128_context {
103 /* Following 6 names follow names in GCM specification */
104 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,len,
105 Xi,H;
106 /* Relative position of Xi, H and pre-computed Htable is used
107 * in some assembler modules, i.e. don't change the order! */
108#if TABLE_BITS==8
109 u128 Htable[256];
110#else
111 u128 Htable[16];
112 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
113 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
114#endif
115 unsigned int mres, ares;
116 block128_f block;
117 void *key;
118};
119
120struct xts128_context {
121 void *key1, *key2;
122 block128_f block1,block2;
123};
124
125struct ccm128_context {
126 union { u64 u[2]; u8 c[16]; } nonce, cmac;
127 u64 blocks;
128 block128_f block;
129 void *key;
130};
131
diff --git a/src/lib/libssl/src/crypto/modes/ofb128.c b/src/lib/libssl/src/crypto/modes/ofb128.c
index c732e2ec58..01c01702c4 100644
--- a/src/lib/libssl/src/crypto/modes/ofb128.c
+++ b/src/lib/libssl/src/crypto/modes/ofb128.c
@@ -48,7 +48,8 @@
48 * 48 *
49 */ 49 */
50 50
51#include "modes.h" 51#include <openssl/crypto.h>
52#include "modes_lcl.h"
52#include <string.h> 53#include <string.h>
53 54
54#ifndef MODES_DEBUG 55#ifndef MODES_DEBUG
@@ -58,14 +59,6 @@
58#endif 59#endif
59#include <assert.h> 60#include <assert.h>
60 61
61#define STRICT_ALIGNMENT
62#if defined(__i386) || defined(__i386__) || \
63 defined(__x86_64) || defined(__x86_64__) || \
64 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
65 defined(__s390__) || defined(__s390x__)
66# undef STRICT_ALIGNMENT
67#endif
68
69/* The input and output encrypted as though 128bit ofb mode is being 62/* The input and output encrypted as though 128bit ofb mode is being
70 * used. The extra state information to record how much of the 63 * used. The extra state information to record how much of the
71 * 128bit block we have used is contained in *num; 64 * 128bit block we have used is contained in *num;
diff --git a/src/lib/libssl/src/crypto/modes/xts128.c b/src/lib/libssl/src/crypto/modes/xts128.c
new file mode 100644
index 0000000000..9cf27a25e9
--- /dev/null
+++ b/src/lib/libssl/src/crypto/modes/xts128.c
@@ -0,0 +1,187 @@
1/* ====================================================================
2 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50#include <openssl/crypto.h>
51#include "modes_lcl.h"
52#include <string.h>
53
54#ifndef MODES_DEBUG
55# ifndef NDEBUG
56# define NDEBUG
57# endif
58#endif
59#include <assert.h>
60
61int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
62 const unsigned char *inp, unsigned char *out,
63 size_t len, int enc)
64{
65 const union { long one; char little; } is_endian = {1};
66 union { u64 u[2]; u32 d[4]; u8 c[16]; } tweak, scratch;
67 unsigned int i;
68
69 if (len<16) return -1;
70
71 memcpy(tweak.c, iv, 16);
72
73 (*ctx->block2)(tweak.c,tweak.c,ctx->key2);
74
75 if (!enc && (len%16)) len-=16;
76
77 while (len>=16) {
78#if defined(STRICT_ALIGNMENT)
79 memcpy(scratch.c,inp,16);
80 scratch.u[0] ^= tweak.u[0];
81 scratch.u[1] ^= tweak.u[1];
82#else
83 scratch.u[0] = ((u64*)inp)[0]^tweak.u[0];
84 scratch.u[1] = ((u64*)inp)[1]^tweak.u[1];
85#endif
86 (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
87#if defined(STRICT_ALIGNMENT)
88 scratch.u[0] ^= tweak.u[0];
89 scratch.u[1] ^= tweak.u[1];
90 memcpy(out,scratch.c,16);
91#else
92 ((u64*)out)[0] = scratch.u[0]^=tweak.u[0];
93 ((u64*)out)[1] = scratch.u[1]^=tweak.u[1];
94#endif
95 inp += 16;
96 out += 16;
97 len -= 16;
98
99 if (len==0) return 0;
100
101 if (is_endian.little) {
102 unsigned int carry,res;
103
104 res = 0x87&(((int)tweak.d[3])>>31);
105 carry = (unsigned int)(tweak.u[0]>>63);
106 tweak.u[0] = (tweak.u[0]<<1)^res;
107 tweak.u[1] = (tweak.u[1]<<1)|carry;
108 }
109 else {
110 size_t c;
111
112 for (c=0,i=0;i<16;++i) {
113 /*+ substitutes for |, because c is 1 bit */
114 c += ((size_t)tweak.c[i])<<1;
115 tweak.c[i] = (u8)c;
116 c = c>>8;
117 }
118 tweak.c[0] ^= (u8)(0x87&(0-c));
119 }
120 }
121 if (enc) {
122 for (i=0;i<len;++i) {
123 u8 c = inp[i];
124 out[i] = scratch.c[i];
125 scratch.c[i] = c;
126 }
127 scratch.u[0] ^= tweak.u[0];
128 scratch.u[1] ^= tweak.u[1];
129 (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
130 scratch.u[0] ^= tweak.u[0];
131 scratch.u[1] ^= tweak.u[1];
132 memcpy(out-16,scratch.c,16);
133 }
134 else {
135 union { u64 u[2]; u8 c[16]; } tweak1;
136
137 if (is_endian.little) {
138 unsigned int carry,res;
139
140 res = 0x87&(((int)tweak.d[3])>>31);
141 carry = (unsigned int)(tweak.u[0]>>63);
142 tweak1.u[0] = (tweak.u[0]<<1)^res;
143 tweak1.u[1] = (tweak.u[1]<<1)|carry;
144 }
145 else {
146 size_t c;
147
148 for (c=0,i=0;i<16;++i) {
149 /*+ substitutes for |, because c is 1 bit */
150 c += ((size_t)tweak.c[i])<<1;
151 tweak1.c[i] = (u8)c;
152 c = c>>8;
153 }
154 tweak1.c[0] ^= (u8)(0x87&(0-c));
155 }
156#if defined(STRICT_ALIGNMENT)
157 memcpy(scratch.c,inp,16);
158 scratch.u[0] ^= tweak1.u[0];
159 scratch.u[1] ^= tweak1.u[1];
160#else
161 scratch.u[0] = ((u64*)inp)[0]^tweak1.u[0];
162 scratch.u[1] = ((u64*)inp)[1]^tweak1.u[1];
163#endif
164 (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
165 scratch.u[0] ^= tweak1.u[0];
166 scratch.u[1] ^= tweak1.u[1];
167
168 for (i=0;i<len;++i) {
169 u8 c = inp[16+i];
170 out[16+i] = scratch.c[i];
171 scratch.c[i] = c;
172 }
173 scratch.u[0] ^= tweak.u[0];
174 scratch.u[1] ^= tweak.u[1];
175 (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
176#if defined(STRICT_ALIGNMENT)
177 scratch.u[0] ^= tweak.u[0];
178 scratch.u[1] ^= tweak.u[1];
179 memcpy (out,scratch.c,16);
180#else
181 ((u64*)out)[0] = scratch.u[0]^tweak.u[0];
182 ((u64*)out)[1] = scratch.u[1]^tweak.u[1];
183#endif
184 }
185
186 return 0;
187}
diff --git a/src/lib/libssl/src/crypto/o_fips.c b/src/lib/libssl/src/crypto/o_fips.c
new file mode 100644
index 0000000000..f6d1b21855
--- /dev/null
+++ b/src/lib/libssl/src/crypto/o_fips.c
@@ -0,0 +1,96 @@
1/* Written by Stephen henson (steve@openssl.org) for the OpenSSL
2 * project 2011.
3 */
4/* ====================================================================
5 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#ifdef OPENSSL_FIPS
60#include <openssl/fips.h>
61#include <openssl/fips_rand.h>
62#include <openssl/rand.h>
63#endif
64
65int FIPS_mode(void)
66 {
67 OPENSSL_init();
68#ifdef OPENSSL_FIPS
69 return FIPS_module_mode();
70#else
71 return 0;
72#endif
73 }
74
75int FIPS_mode_set(int r)
76 {
77 OPENSSL_init();
78#ifdef OPENSSL_FIPS
79#ifndef FIPS_AUTH_USER_PASS
80#define FIPS_AUTH_USER_PASS "Default FIPS Crypto User Password"
81#endif
82 if (!FIPS_module_mode_set(r, FIPS_AUTH_USER_PASS))
83 return 0;
84 if (r)
85 RAND_set_rand_method(FIPS_rand_get_method());
86 else
87 RAND_set_rand_method(NULL);
88 return 1;
89#else
90 if (r == 0)
91 return 1;
92 CRYPTOerr(CRYPTO_F_FIPS_MODE_SET, CRYPTO_R_FIPS_MODE_NOT_SUPPORTED);
93 return 0;
94#endif
95 }
96
diff --git a/src/lib/libssl/src/crypto/o_init.c b/src/lib/libssl/src/crypto/o_init.c
index 00ed65a6cf..db4cdc443b 100644
--- a/src/lib/libssl/src/crypto/o_init.c
+++ b/src/lib/libssl/src/crypto/o_init.c
@@ -3,7 +3,7 @@
3 * project. 3 * project.
4 */ 4 */
5/* ==================================================================== 5/* ====================================================================
6 * Copyright (c) 2007 The OpenSSL Project. All rights reserved. 6 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
@@ -50,14 +50,14 @@
50 * OF THE POSSIBILITY OF SUCH DAMAGE. 50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ==================================================================== 51 * ====================================================================
52 * 52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */ 53 */
58 54
59#include <e_os.h> 55#include <e_os.h>
60#include <openssl/err.h> 56#include <openssl/err.h>
57#ifdef OPENSSL_FIPS
58#include <openssl/fips.h>
59#include <openssl/rand.h>
60#endif
61 61
62/* Perform any essential OpenSSL initialization operations. 62/* Perform any essential OpenSSL initialization operations.
63 * Currently only sets FIPS callbacks 63 * Currently only sets FIPS callbacks
@@ -65,22 +65,18 @@
65 65
66void OPENSSL_init(void) 66void OPENSSL_init(void)
67 { 67 {
68#ifdef OPENSSL_FIPS
69 static int done = 0; 68 static int done = 0;
70 if (!done) 69 if (done)
71 { 70 return;
72 int_ERR_lib_init(); 71 done = 1;
73#ifdef CRYPTO_MDEBUG 72#ifdef OPENSSL_FIPS
74 CRYPTO_malloc_debug_init(); 73 FIPS_set_locking_callbacks(CRYPTO_lock, CRYPTO_add_lock);
75#endif 74 FIPS_set_error_callbacks(ERR_put_error, ERR_add_error_vdata);
76#ifdef OPENSSL_ENGINE 75 FIPS_set_malloc_callbacks(CRYPTO_malloc, CRYPTO_free);
77 int_EVP_MD_init_engine_callbacks(); 76 RAND_init_fips();
78 int_EVP_CIPHER_init_engine_callbacks();
79 int_RAND_init_engine_callbacks();
80#endif 77#endif
81 done = 1; 78#if 0
82 } 79 fprintf(stderr, "Called OPENSSL_init\n");
83#endif 80#endif
84 } 81 }
85
86 82
diff --git a/src/lib/libssl/src/crypto/objects/obj_xref.c b/src/lib/libssl/src/crypto/objects/obj_xref.c
index 152eca5c67..9f744bcede 100644
--- a/src/lib/libssl/src/crypto/objects/obj_xref.c
+++ b/src/lib/libssl/src/crypto/objects/obj_xref.c
@@ -110,8 +110,10 @@ int OBJ_find_sigid_algs(int signid, int *pdig_nid, int *ppkey_nid)
110#endif 110#endif
111 if (rv == NULL) 111 if (rv == NULL)
112 return 0; 112 return 0;
113 *pdig_nid = rv->hash_id; 113 if (pdig_nid)
114 *ppkey_nid = rv->pkey_id; 114 *pdig_nid = rv->hash_id;
115 if (ppkey_nid)
116 *ppkey_nid = rv->pkey_id;
115 return 1; 117 return 1;
116 } 118 }
117 119
@@ -144,7 +146,8 @@ int OBJ_find_sigid_by_algs(int *psignid, int dig_nid, int pkey_nid)
144#endif 146#endif
145 if (rv == NULL) 147 if (rv == NULL)
146 return 0; 148 return 0;
147 *psignid = (*rv)->sign_id; 149 if (psignid)
150 *psignid = (*rv)->sign_id;
148 return 1; 151 return 1;
149 } 152 }
150 153
diff --git a/src/lib/libssl/src/crypto/objects/obj_xref.h b/src/lib/libssl/src/crypto/objects/obj_xref.h
index d5b9b8e198..e23938c296 100644
--- a/src/lib/libssl/src/crypto/objects/obj_xref.h
+++ b/src/lib/libssl/src/crypto/objects/obj_xref.h
@@ -38,10 +38,12 @@ static const nid_triple sigoid_srt[] =
38 {NID_id_GostR3411_94_with_GostR3410_94, NID_id_GostR3411_94, NID_id_GostR3410_94}, 38 {NID_id_GostR3411_94_with_GostR3410_94, NID_id_GostR3411_94, NID_id_GostR3410_94},
39 {NID_id_GostR3411_94_with_GostR3410_94_cc, NID_id_GostR3411_94, NID_id_GostR3410_94_cc}, 39 {NID_id_GostR3411_94_with_GostR3410_94_cc, NID_id_GostR3411_94, NID_id_GostR3410_94_cc},
40 {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94, NID_id_GostR3410_2001_cc}, 40 {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94, NID_id_GostR3410_2001_cc},
41 {NID_rsassaPss, NID_undef, NID_rsaEncryption},
41 }; 42 };
42 43
43static const nid_triple * const sigoid_srt_xref[] = 44static const nid_triple * const sigoid_srt_xref[] =
44 { 45 {
46 &sigoid_srt[29],
45 &sigoid_srt[17], 47 &sigoid_srt[17],
46 &sigoid_srt[18], 48 &sigoid_srt[18],
47 &sigoid_srt[0], 49 &sigoid_srt[0],
diff --git a/src/lib/libssl/src/crypto/objects/obj_xref.txt b/src/lib/libssl/src/crypto/objects/obj_xref.txt
index e45b3d34b9..cb917182ee 100644
--- a/src/lib/libssl/src/crypto/objects/obj_xref.txt
+++ b/src/lib/libssl/src/crypto/objects/obj_xref.txt
@@ -13,6 +13,10 @@ sha512WithRSAEncryption sha512 rsaEncryption
13sha224WithRSAEncryption sha224 rsaEncryption 13sha224WithRSAEncryption sha224 rsaEncryption
14mdc2WithRSA mdc2 rsaEncryption 14mdc2WithRSA mdc2 rsaEncryption
15ripemd160WithRSA ripemd160 rsaEncryption 15ripemd160WithRSA ripemd160 rsaEncryption
16# For PSS the digest algorithm can vary and depends on the included
17# AlgorithmIdentifier. The digest "undef" indicates the public key
18# method should handle this explicitly.
19rsassaPss undef rsaEncryption
16 20
17# Alternative deprecated OIDs. By using the older "rsa" OID this 21# Alternative deprecated OIDs. By using the older "rsa" OID this
18# type will be recognized by not normally used. 22# type will be recognized by not normally used.
diff --git a/src/lib/libssl/src/crypto/pariscid.pl b/src/lib/libssl/src/crypto/pariscid.pl
new file mode 100644
index 0000000000..477ec9b87d
--- /dev/null
+++ b/src/lib/libssl/src/crypto/pariscid.pl
@@ -0,0 +1,224 @@
1#!/usr/bin/env perl
2
3$flavour = shift;
4$output = shift;
5open STDOUT,">$output";
6
7if ($flavour =~ /64/) {
8 $LEVEL ="2.0W";
9 $SIZE_T =8;
10 $ST ="std";
11} else {
12 $LEVEL ="1.1";
13 $SIZE_T =4;
14 $ST ="stw";
15}
16
17$rp="%r2";
18$sp="%r30";
19$rv="%r28";
20
21$code=<<___;
22 .LEVEL $LEVEL
23 .SPACE \$TEXT\$
24 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
25
26 .EXPORT OPENSSL_cpuid_setup,ENTRY
27 .ALIGN 8
28OPENSSL_cpuid_setup
29 .PROC
30 .CALLINFO NO_CALLS
31 .ENTRY
32 bv ($rp)
33 .EXIT
34 nop
35 .PROCEND
36
37 .EXPORT OPENSSL_rdtsc,ENTRY
38 .ALIGN 8
39OPENSSL_rdtsc
40 .PROC
41 .CALLINFO NO_CALLS
42 .ENTRY
43 mfctl %cr16,$rv
44 bv ($rp)
45 .EXIT
46 nop
47 .PROCEND
48
49 .EXPORT OPENSSL_wipe_cpu,ENTRY
50 .ALIGN 8
51OPENSSL_wipe_cpu
52 .PROC
53 .CALLINFO NO_CALLS
54 .ENTRY
55 xor %r0,%r0,%r1
56 fcpy,dbl %fr0,%fr4
57 xor %r0,%r0,%r19
58 fcpy,dbl %fr0,%fr5
59 xor %r0,%r0,%r20
60 fcpy,dbl %fr0,%fr6
61 xor %r0,%r0,%r21
62 fcpy,dbl %fr0,%fr7
63 xor %r0,%r0,%r22
64 fcpy,dbl %fr0,%fr8
65 xor %r0,%r0,%r23
66 fcpy,dbl %fr0,%fr9
67 xor %r0,%r0,%r24
68 fcpy,dbl %fr0,%fr10
69 xor %r0,%r0,%r25
70 fcpy,dbl %fr0,%fr11
71 xor %r0,%r0,%r26
72 fcpy,dbl %fr0,%fr22
73 xor %r0,%r0,%r29
74 fcpy,dbl %fr0,%fr23
75 xor %r0,%r0,%r31
76 fcpy,dbl %fr0,%fr24
77 fcpy,dbl %fr0,%fr25
78 fcpy,dbl %fr0,%fr26
79 fcpy,dbl %fr0,%fr27
80 fcpy,dbl %fr0,%fr28
81 fcpy,dbl %fr0,%fr29
82 fcpy,dbl %fr0,%fr30
83 fcpy,dbl %fr0,%fr31
84 bv ($rp)
85 .EXIT
86 ldo 0($sp),$rv
87 .PROCEND
88___
89{
90my $inp="%r26";
91my $len="%r25";
92
93$code.=<<___;
94 .EXPORT OPENSSL_cleanse,ENTRY,ARGW0=GR,ARGW1=GR
95 .ALIGN 8
96OPENSSL_cleanse
97 .PROC
98 .CALLINFO NO_CALLS
99 .ENTRY
100 cmpib,*= 0,$len,Ldone
101 nop
102 cmpib,*>>= 15,$len,Little
103 ldi $SIZE_T-1,%r1
104
105Lalign
106 and,*<> $inp,%r1,%r28
107 b,n Laligned
108 stb %r0,0($inp)
109 ldo -1($len),$len
110 b Lalign
111 ldo 1($inp),$inp
112
113Laligned
114 andcm $len,%r1,%r28
115Lot
116 $ST %r0,0($inp)
117 addib,*<> -$SIZE_T,%r28,Lot
118 ldo $SIZE_T($inp),$inp
119
120 and,*<> $len,%r1,$len
121 b,n Ldone
122Little
123 stb %r0,0($inp)
124 addib,*<> -1,$len,Little
125 ldo 1($inp),$inp
126Ldone
127 bv ($rp)
128 .EXIT
129 nop
130 .PROCEND
131___
132}
133{
134my ($out,$cnt,$max)=("%r26","%r25","%r24");
135my ($tick,$lasttick)=("%r23","%r22");
136my ($diff,$lastdiff)=("%r21","%r20");
137
138$code.=<<___;
139 .EXPORT OPENSSL_instrument_bus,ENTRY,ARGW0=GR,ARGW1=GR
140 .ALIGN 8
141OPENSSL_instrument_bus
142 .PROC
143 .CALLINFO NO_CALLS
144 .ENTRY
145 copy $cnt,$rv
146 mfctl %cr16,$tick
147 copy $tick,$lasttick
148 ldi 0,$diff
149
150 fdc 0($out)
151 ldw 0($out),$tick
152 add $diff,$tick,$tick
153 stw $tick,0($out)
154Loop
155 mfctl %cr16,$tick
156 sub $tick,$lasttick,$diff
157 copy $tick,$lasttick
158
159 fdc 0($out)
160 ldw 0($out),$tick
161 add $diff,$tick,$tick
162 stw $tick,0($out)
163
164 addib,<> -1,$cnt,Loop
165 addi 4,$out,$out
166
167 bv ($rp)
168 .EXIT
169 sub $rv,$cnt,$rv
170 .PROCEND
171
172 .EXPORT OPENSSL_instrument_bus2,ENTRY,ARGW0=GR,ARGW1=GR
173 .ALIGN 8
174OPENSSL_instrument_bus2
175 .PROC
176 .CALLINFO NO_CALLS
177 .ENTRY
178 copy $cnt,$rv
179 sub %r0,$cnt,$cnt
180
181 mfctl %cr16,$tick
182 copy $tick,$lasttick
183 ldi 0,$diff
184
185 fdc 0($out)
186 ldw 0($out),$tick
187 add $diff,$tick,$tick
188 stw $tick,0($out)
189
190 mfctl %cr16,$tick
191 sub $tick,$lasttick,$diff
192 copy $tick,$lasttick
193Loop2
194 copy $diff,$lastdiff
195 fdc 0($out)
196 ldw 0($out),$tick
197 add $diff,$tick,$tick
198 stw $tick,0($out)
199
200 addib,= -1,$max,Ldone2
201 nop
202
203 mfctl %cr16,$tick
204 sub $tick,$lasttick,$diff
205 copy $tick,$lasttick
206 cmpclr,<> $lastdiff,$diff,$tick
207 ldi 1,$tick
208
209 ldi 1,%r1
210 xor %r1,$tick,$tick
211 addb,<> $tick,$cnt,Loop2
212 shladd,l $tick,2,$out,$out
213Ldone2
214 bv ($rp)
215 .EXIT
216 add $rv,$cnt,$rv
217 .PROCEND
218___
219}
220$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
221$code =~ s/,\*/,/gm if ($SIZE_T==4);
222print $code;
223close STDOUT;
224
diff --git a/src/lib/libssl/src/crypto/pem/pvkfmt.c b/src/lib/libssl/src/crypto/pem/pvkfmt.c
index 5f130c4528..b1bf71a5da 100644
--- a/src/lib/libssl/src/crypto/pem/pvkfmt.c
+++ b/src/lib/libssl/src/crypto/pem/pvkfmt.c
@@ -709,13 +709,16 @@ static int derive_pvk_key(unsigned char *key,
709 const unsigned char *pass, int passlen) 709 const unsigned char *pass, int passlen)
710 { 710 {
711 EVP_MD_CTX mctx; 711 EVP_MD_CTX mctx;
712 int rv = 1;
712 EVP_MD_CTX_init(&mctx); 713 EVP_MD_CTX_init(&mctx);
713 EVP_DigestInit_ex(&mctx, EVP_sha1(), NULL); 714 if (!EVP_DigestInit_ex(&mctx, EVP_sha1(), NULL)
714 EVP_DigestUpdate(&mctx, salt, saltlen); 715 || !EVP_DigestUpdate(&mctx, salt, saltlen)
715 EVP_DigestUpdate(&mctx, pass, passlen); 716 || !EVP_DigestUpdate(&mctx, pass, passlen)
716 EVP_DigestFinal_ex(&mctx, key, NULL); 717 || !EVP_DigestFinal_ex(&mctx, key, NULL))
718 rv = 0;
719
717 EVP_MD_CTX_cleanup(&mctx); 720 EVP_MD_CTX_cleanup(&mctx);
718 return 1; 721 return rv;
719 } 722 }
720 723
721 724
@@ -727,11 +730,12 @@ static EVP_PKEY *do_PVK_body(const unsigned char **in,
727 const unsigned char *p = *in; 730 const unsigned char *p = *in;
728 unsigned int magic; 731 unsigned int magic;
729 unsigned char *enctmp = NULL, *q; 732 unsigned char *enctmp = NULL, *q;
733 EVP_CIPHER_CTX cctx;
734 EVP_CIPHER_CTX_init(&cctx);
730 if (saltlen) 735 if (saltlen)
731 { 736 {
732 char psbuf[PEM_BUFSIZE]; 737 char psbuf[PEM_BUFSIZE];
733 unsigned char keybuf[20]; 738 unsigned char keybuf[20];
734 EVP_CIPHER_CTX cctx;
735 int enctmplen, inlen; 739 int enctmplen, inlen;
736 if (cb) 740 if (cb)
737 inlen=cb(psbuf,PEM_BUFSIZE,0,u); 741 inlen=cb(psbuf,PEM_BUFSIZE,0,u);
@@ -757,37 +761,41 @@ static EVP_PKEY *do_PVK_body(const unsigned char **in,
757 p += 8; 761 p += 8;
758 inlen = keylen - 8; 762 inlen = keylen - 8;
759 q = enctmp + 8; 763 q = enctmp + 8;
760 EVP_CIPHER_CTX_init(&cctx); 764 if (!EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL))
761 EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL); 765 goto err;
762 EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen); 766 if (!EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen))
763 EVP_DecryptFinal_ex(&cctx, q + enctmplen, &enctmplen); 767 goto err;
768 if (!EVP_DecryptFinal_ex(&cctx, q + enctmplen, &enctmplen))
769 goto err;
764 magic = read_ledword((const unsigned char **)&q); 770 magic = read_ledword((const unsigned char **)&q);
765 if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC) 771 if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC)
766 { 772 {
767 q = enctmp + 8; 773 q = enctmp + 8;
768 memset(keybuf + 5, 0, 11); 774 memset(keybuf + 5, 0, 11);
769 EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, 775 if (!EVP_DecryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf,
770 NULL); 776 NULL))
777 goto err;
771 OPENSSL_cleanse(keybuf, 20); 778 OPENSSL_cleanse(keybuf, 20);
772 EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen); 779 if (!EVP_DecryptUpdate(&cctx, q, &enctmplen, p, inlen))
773 EVP_DecryptFinal_ex(&cctx, q + enctmplen, 780 goto err;
774 &enctmplen); 781 if (!EVP_DecryptFinal_ex(&cctx, q + enctmplen,
782 &enctmplen))
783 goto err;
775 magic = read_ledword((const unsigned char **)&q); 784 magic = read_ledword((const unsigned char **)&q);
776 if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC) 785 if (magic != MS_RSA2MAGIC && magic != MS_DSS2MAGIC)
777 { 786 {
778 EVP_CIPHER_CTX_cleanup(&cctx);
779 PEMerr(PEM_F_DO_PVK_BODY, PEM_R_BAD_DECRYPT); 787 PEMerr(PEM_F_DO_PVK_BODY, PEM_R_BAD_DECRYPT);
780 goto err; 788 goto err;
781 } 789 }
782 } 790 }
783 else 791 else
784 OPENSSL_cleanse(keybuf, 20); 792 OPENSSL_cleanse(keybuf, 20);
785 EVP_CIPHER_CTX_cleanup(&cctx);
786 p = enctmp; 793 p = enctmp;
787 } 794 }
788 795
789 ret = b2i_PrivateKey(&p, keylen); 796 ret = b2i_PrivateKey(&p, keylen);
790 err: 797 err:
798 EVP_CIPHER_CTX_cleanup(&cctx);
791 if (enctmp && saltlen) 799 if (enctmp && saltlen)
792 OPENSSL_free(enctmp); 800 OPENSSL_free(enctmp);
793 return ret; 801 return ret;
@@ -841,6 +849,8 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel,
841 { 849 {
842 int outlen = 24, pklen; 850 int outlen = 24, pklen;
843 unsigned char *p, *salt = NULL; 851 unsigned char *p, *salt = NULL;
852 EVP_CIPHER_CTX cctx;
853 EVP_CIPHER_CTX_init(&cctx);
844 if (enclevel) 854 if (enclevel)
845 outlen += PVK_SALTLEN; 855 outlen += PVK_SALTLEN;
846 pklen = do_i2b(NULL, pk, 0); 856 pklen = do_i2b(NULL, pk, 0);
@@ -885,7 +895,6 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel,
885 { 895 {
886 char psbuf[PEM_BUFSIZE]; 896 char psbuf[PEM_BUFSIZE];
887 unsigned char keybuf[20]; 897 unsigned char keybuf[20];
888 EVP_CIPHER_CTX cctx;
889 int enctmplen, inlen; 898 int enctmplen, inlen;
890 if (cb) 899 if (cb)
891 inlen=cb(psbuf,PEM_BUFSIZE,1,u); 900 inlen=cb(psbuf,PEM_BUFSIZE,1,u);
@@ -902,16 +911,19 @@ static int i2b_PVK(unsigned char **out, EVP_PKEY*pk, int enclevel,
902 if (enclevel == 1) 911 if (enclevel == 1)
903 memset(keybuf + 5, 0, 11); 912 memset(keybuf + 5, 0, 11);
904 p = salt + PVK_SALTLEN + 8; 913 p = salt + PVK_SALTLEN + 8;
905 EVP_CIPHER_CTX_init(&cctx); 914 if (!EVP_EncryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL))
906 EVP_EncryptInit_ex(&cctx, EVP_rc4(), NULL, keybuf, NULL); 915 goto error;
907 OPENSSL_cleanse(keybuf, 20); 916 OPENSSL_cleanse(keybuf, 20);
908 EVP_DecryptUpdate(&cctx, p, &enctmplen, p, pklen - 8); 917 if (!EVP_DecryptUpdate(&cctx, p, &enctmplen, p, pklen - 8))
909 EVP_DecryptFinal_ex(&cctx, p + enctmplen, &enctmplen); 918 goto error;
910 EVP_CIPHER_CTX_cleanup(&cctx); 919 if (!EVP_DecryptFinal_ex(&cctx, p + enctmplen, &enctmplen))
920 goto error;
911 } 921 }
922 EVP_CIPHER_CTX_cleanup(&cctx);
912 return outlen; 923 return outlen;
913 924
914 error: 925 error:
926 EVP_CIPHER_CTX_cleanup(&cctx);
915 return -1; 927 return -1;
916 } 928 }
917 929
diff --git a/src/lib/libssl/src/crypto/perlasm/ppc-xlate.pl b/src/lib/libssl/src/crypto/perlasm/ppc-xlate.pl
index 4579671c97..a3edd982b6 100755
--- a/src/lib/libssl/src/crypto/perlasm/ppc-xlate.pl
+++ b/src/lib/libssl/src/crypto/perlasm/ppc-xlate.pl
@@ -31,10 +31,9 @@ my $globl = sub {
31 $ret .= ".type $name,\@function"; 31 $ret .= ".type $name,\@function";
32 last; 32 last;
33 }; 33 };
34 /linux.*64/ && do { $ret .= ".globl .$name\n"; 34 /linux.*64/ && do { $ret .= ".globl $name\n";
35 $ret .= ".type .$name,\@function\n"; 35 $ret .= ".type $name,\@function\n";
36 $ret .= ".section \".opd\",\"aw\"\n"; 36 $ret .= ".section \".opd\",\"aw\"\n";
37 $ret .= ".globl $name\n";
38 $ret .= ".align 3\n"; 37 $ret .= ".align 3\n";
39 $ret .= "$name:\n"; 38 $ret .= "$name:\n";
40 $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; 39 $ret .= ".quad .$name,.TOC.\@tocbase,0\n";
@@ -62,6 +61,14 @@ my $machine = sub {
62 } 61 }
63 ".machine $arch"; 62 ".machine $arch";
64}; 63};
64my $size = sub {
65 if ($flavour =~ /linux.*32/)
66 { shift;
67 ".size " . join(",",@_);
68 }
69 else
70 { ""; }
71};
65my $asciz = sub { 72my $asciz = sub {
66 shift; 73 shift;
67 my $line = join(",",@_); 74 my $line = join(",",@_);
diff --git a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
index e47116b74b..56d9b64b6f 100755
--- a/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
+++ b/src/lib/libssl/src/crypto/perlasm/x86_64-xlate.pl
@@ -62,12 +62,8 @@ my $flavour = shift;
62my $output = shift; 62my $output = shift;
63if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 63if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
64 64
65{ my ($stddev,$stdino,@junk)=stat(STDOUT); 65open STDOUT,">$output" || die "can't open $output: $!"
66 my ($outdev,$outino,@junk)=stat($output); 66 if (defined($output));
67
68 open STDOUT,">$output" || die "can't open $output: $!"
69 if ($stddev!=$outdev || $stdino!=$outino);
70}
71 67
72my $gas=1; $gas=0 if ($output =~ /\.asm$/); 68my $gas=1; $gas=0 if ($output =~ /\.asm$/);
73my $elf=1; $elf=0 if (!$gas); 69my $elf=1; $elf=0 if (!$gas);
@@ -116,12 +112,16 @@ my %globals;
116 $line = substr($line,@+[0]); $line =~ s/^\s+//; 112 $line = substr($line,@+[0]); $line =~ s/^\s+//;
117 113
118 undef $self->{sz}; 114 undef $self->{sz};
119 if ($self->{op} =~ /^(movz)b.*/) { # movz is pain... 115 if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain...
120 $self->{op} = $1; 116 $self->{op} = $1;
121 $self->{sz} = "b"; 117 $self->{sz} = $2;
122 } elsif ($self->{op} =~ /call|jmp/) { 118 } elsif ($self->{op} =~ /call|jmp/) {
123 $self->{sz} = ""; 119 $self->{sz} = "";
124 } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op)/) { # SSEn 120 } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn
121 $self->{sz} = "";
122 } elsif ($self->{op} =~ /^v/) { # VEX
123 $self->{sz} = "";
124 } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) {
125 $self->{sz} = ""; 125 $self->{sz} = "";
126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { 126 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
127 $self->{op} = $1; 127 $self->{op} = $1;
@@ -246,35 +246,39 @@ my %globals;
246 $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; 246 $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
247 $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; 247 $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
248 248
249 # Solaris /usr/ccs/bin/as can't handle multiplications
250 # in $self->{label}, new gas requires sign extension...
251 use integer;
252 $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
253 $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
254 $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg;
255
249 if ($gas) { 256 if ($gas) {
250 # Solaris /usr/ccs/bin/as can't handle multiplications
251 # in $self->{label}, new gas requires sign extension...
252 use integer;
253 $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
254 $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
255 $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg;
256 $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); 257 $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64");
257 258
258 if (defined($self->{index})) { 259 if (defined($self->{index})) {
259 sprintf "%s%s(%%%s,%%%s,%d)",$self->{asterisk}, 260 sprintf "%s%s(%s,%%%s,%d)",$self->{asterisk},
260 $self->{label},$self->{base}, 261 $self->{label},
262 $self->{base}?"%$self->{base}":"",
261 $self->{index},$self->{scale}; 263 $self->{index},$self->{scale};
262 } else { 264 } else {
263 sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; 265 sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base};
264 } 266 }
265 } else { 267 } else {
266 %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", q=>"QWORD$PTR" ); 268 %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR",
269 q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR" );
267 270
268 $self->{label} =~ s/\./\$/g; 271 $self->{label} =~ s/\./\$/g;
269 $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig; 272 $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig;
270 $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); 273 $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
271 $sz="q" if ($self->{asterisk}); 274 $sz="q" if ($self->{asterisk} || opcode->mnemonic() eq "movq");
275 $sz="l" if (opcode->mnemonic() eq "movd");
272 276
273 if (defined($self->{index})) { 277 if (defined($self->{index})) {
274 sprintf "%s[%s%s*%d+%s]",$szmap{$sz}, 278 sprintf "%s[%s%s*%d%s]",$szmap{$sz},
275 $self->{label}?"$self->{label}+":"", 279 $self->{label}?"$self->{label}+":"",
276 $self->{index},$self->{scale}, 280 $self->{index},$self->{scale},
277 $self->{base}; 281 $self->{base}?"+$self->{base}":"";
278 } elsif ($self->{base} eq "rip") { 282 } elsif ($self->{base} eq "rip") {
279 sprintf "%s[%s]",$szmap{$sz},$self->{label}; 283 sprintf "%s[%s]",$szmap{$sz},$self->{label};
280 } else { 284 } else {
@@ -506,6 +510,12 @@ my %globals;
506 } 510 }
507 } elsif ($dir =~ /\.(text|data)/) { 511 } elsif ($dir =~ /\.(text|data)/) {
508 $current_segment=".$1"; 512 $current_segment=".$1";
513 } elsif ($dir =~ /\.hidden/) {
514 if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$line"; }
515 elsif ($flavour eq "mingw64") { $self->{value} = ""; }
516 } elsif ($dir =~ /\.comm/) {
517 $self->{value} = "$dir\t$prefix$line";
518 $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx");
509 } 519 }
510 $line = ""; 520 $line = "";
511 return $self; 521 return $self;
@@ -555,7 +565,8 @@ my %globals;
555 $v.=" READONLY"; 565 $v.=" READONLY";
556 $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); 566 $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref);
557 } elsif ($line=~/\.CRT\$/i) { 567 } elsif ($line=~/\.CRT\$/i) {
558 $v.=" READONLY DWORD"; 568 $v.=" READONLY ";
569 $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD";
559 } 570 }
560 } 571 }
561 $current_segment = $line; 572 $current_segment = $line;
@@ -577,7 +588,7 @@ my %globals;
577 $self->{value}="${decor}SEH_end_$current_function->{name}:"; 588 $self->{value}="${decor}SEH_end_$current_function->{name}:";
578 $self->{value}.=":\n" if($masm); 589 $self->{value}.=":\n" if($masm);
579 } 590 }
580 $self->{value}.="$current_function->{name}\tENDP" if($masm); 591 $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name});
581 undef $current_function; 592 undef $current_function;
582 } 593 }
583 last; 594 last;
@@ -613,6 +624,19 @@ my %globals;
613 .join(",",@str) if (@str); 624 .join(",",@str) if (@str);
614 last; 625 last;
615 }; 626 };
627 /\.comm/ && do { my @str=split(/,\s*/,$line);
628 my $v=undef;
629 if ($nasm) {
630 $v.="common $prefix@str[0] @str[1]";
631 } else {
632 $v="$current_segment\tENDS\n" if ($current_segment);
633 $current_segment = "_DATA";
634 $v.="$current_segment\tSEGMENT\n";
635 $v.="COMM @str[0]:DWORD:".@str[1]/4;
636 }
637 $self->{value} = $v;
638 last;
639 };
616 } 640 }
617 $line = ""; 641 $line = "";
618 } 642 }
@@ -625,9 +649,133 @@ my %globals;
625 } 649 }
626} 650}
627 651
652sub rex {
653 local *opcode=shift;
654 my ($dst,$src,$rex)=@_;
655
656 $rex|=0x04 if($dst>=8);
657 $rex|=0x01 if($src>=8);
658 push @opcode,($rex|0x40) if ($rex);
659}
660
661# older gas and ml64 don't handle SSE>2 instructions
662my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3,
663 "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 );
664
665my $movq = sub { # elderly gas can't handle inter-register movq
666 my $arg = shift;
667 my @opcode=(0x66);
668 if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) {
669 my ($src,$dst)=($1,$2);
670 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
671 rex(\@opcode,$src,$dst,0x8);
672 push @opcode,0x0f,0x7e;
673 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
674 @opcode;
675 } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) {
676 my ($src,$dst)=($2,$1);
677 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
678 rex(\@opcode,$src,$dst,0x8);
679 push @opcode,0x0f,0x6e;
680 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
681 @opcode;
682 } else {
683 ();
684 }
685};
686
687my $pextrd = sub {
688 if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) {
689 my @opcode=(0x66);
690 $imm=$1;
691 $src=$2;
692 $dst=$3;
693 if ($dst =~ /%r([0-9]+)d/) { $dst = $1; }
694 elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; }
695 rex(\@opcode,$src,$dst);
696 push @opcode,0x0f,0x3a,0x16;
697 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M
698 push @opcode,$imm;
699 @opcode;
700 } else {
701 ();
702 }
703};
704
705my $pinsrd = sub {
706 if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) {
707 my @opcode=(0x66);
708 $imm=$1;
709 $src=$2;
710 $dst=$3;
711 if ($src =~ /%r([0-9]+)/) { $src = $1; }
712 elsif ($src =~ /%e/) { $src = $regrm{$src}; }
713 rex(\@opcode,$dst,$src);
714 push @opcode,0x0f,0x3a,0x22;
715 push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M
716 push @opcode,$imm;
717 @opcode;
718 } else {
719 ();
720 }
721};
722
723my $pshufb = sub {
724 if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
725 my @opcode=(0x66);
726 rex(\@opcode,$2,$1);
727 push @opcode,0x0f,0x38,0x00;
728 push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M
729 @opcode;
730 } else {
731 ();
732 }
733};
734
735my $palignr = sub {
736 if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
737 my @opcode=(0x66);
738 rex(\@opcode,$3,$2);
739 push @opcode,0x0f,0x3a,0x0f;
740 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
741 push @opcode,$1;
742 @opcode;
743 } else {
744 ();
745 }
746};
747
748my $pclmulqdq = sub {
749 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
750 my @opcode=(0x66);
751 rex(\@opcode,$3,$2);
752 push @opcode,0x0f,0x3a,0x44;
753 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
754 my $c=$1;
755 push @opcode,$c=~/^0/?oct($c):$c;
756 @opcode;
757 } else {
758 ();
759 }
760};
761
762my $rdrand = sub {
763 if (shift =~ /%[er](\w+)/) {
764 my @opcode=();
765 my $dst=$1;
766 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
767 rex(\@opcode,0,$1,8);
768 push @opcode,0x0f,0xc7,0xf0|($dst&7);
769 @opcode;
770 } else {
771 ();
772 }
773};
774
628if ($nasm) { 775if ($nasm) {
629 print <<___; 776 print <<___;
630default rel 777default rel
778%define XMMWORD
631___ 779___
632} elsif ($masm) { 780} elsif ($masm) {
633 print <<___; 781 print <<___;
@@ -644,14 +792,22 @@ while($line=<>) {
644 792
645 undef $label; 793 undef $label;
646 undef $opcode; 794 undef $opcode;
647 undef $sz;
648 undef @args; 795 undef @args;
649 796
650 if ($label=label->re(\$line)) { print $label->out(); } 797 if ($label=label->re(\$line)) { print $label->out(); }
651 798
652 if (directive->re(\$line)) { 799 if (directive->re(\$line)) {
653 printf "%s",directive->out(); 800 printf "%s",directive->out();
654 } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: while (1) { 801 } elsif ($opcode=opcode->re(\$line)) {
802 my $asm = eval("\$".$opcode->mnemonic());
803 undef @bytes;
804
805 if ((ref($asm) eq 'CODE') && scalar(@bytes=&$asm($line))) {
806 print $gas?".byte\t":"DB\t",join(',',@bytes),"\n";
807 next;
808 }
809
810 ARGUMENT: while (1) {
655 my $arg; 811 my $arg;
656 812
657 if ($arg=register->re(\$line)) { opcode->size($arg->size()); } 813 if ($arg=register->re(\$line)) { opcode->size($arg->size()); }
@@ -667,19 +823,26 @@ while($line=<>) {
667 $line =~ s/^,\s*//; 823 $line =~ s/^,\s*//;
668 } # ARGUMENT: 824 } # ARGUMENT:
669 825
670 $sz=opcode->size();
671
672 if ($#args>=0) { 826 if ($#args>=0) {
673 my $insn; 827 my $insn;
828 my $sz=opcode->size();
829
674 if ($gas) { 830 if ($gas) {
675 $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); 831 $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz);
832 @args = map($_->out($sz),@args);
833 printf "\t%s\t%s",$insn,join(",",@args);
676 } else { 834 } else {
677 $insn = $opcode->out(); 835 $insn = $opcode->out();
678 $insn .= $sz if (map($_->out() =~ /x?mm/,@args)); 836 foreach (@args) {
837 my $arg = $_->out();
838 # $insn.=$sz compensates for movq, pinsrw, ...
839 if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; }
840 if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; }
841 }
679 @args = reverse(@args); 842 @args = reverse(@args);
680 undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); 843 undef $sz if ($nasm && $opcode->mnemonic() eq "lea");
844 printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
681 } 845 }
682 printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args));
683 } else { 846 } else {
684 printf "\t%s",$opcode->out(); 847 printf "\t%s",$opcode->out();
685 } 848 }
diff --git a/src/lib/libssl/src/crypto/perlasm/x86gas.pl b/src/lib/libssl/src/crypto/perlasm/x86gas.pl
index 6eab727fd4..682a3a3163 100644
--- a/src/lib/libssl/src/crypto/perlasm/x86gas.pl
+++ b/src/lib/libssl/src/crypto/perlasm/x86gas.pl
@@ -45,9 +45,8 @@ sub ::generic
45 undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); 45 undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o);
46 46
47 if ($#_==0) { &::emit($opcode); } 47 if ($#_==0) { &::emit($opcode); }
48 elsif ($opcode =~ m/^j/o && $#_==1) { &::emit($opcode,@arg); } 48 elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o)
49 elsif ($opcode eq "call" && $#_==1) { &::emit($opcode,@arg); } 49 { &::emit($opcode,@arg); }
50 elsif ($opcode =~ m/^set/&& $#_==1) { &::emit($opcode,@arg); }
51 else { &::emit($opcode.$suffix,@arg);} 50 else { &::emit($opcode.$suffix,@arg);}
52 51
53 1; 52 1;
@@ -91,6 +90,7 @@ sub ::DWP
91} 90}
92sub ::QWP { &::DWP(@_); } 91sub ::QWP { &::DWP(@_); }
93sub ::BP { &::DWP(@_); } 92sub ::BP { &::DWP(@_); }
93sub ::WP { &::DWP(@_); }
94sub ::BC { @_; } 94sub ::BC { @_; }
95sub ::DWC { @_; } 95sub ::DWC { @_; }
96 96
@@ -149,22 +149,24 @@ sub ::public_label
149{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } 149{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); }
150 150
151sub ::file_end 151sub ::file_end
152{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { 152{ if ($::macosx)
153 my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4";
154 if ($::elf) { push (@out,"$tmp,4\n"); }
155 else { push (@out,"$tmp\n"); }
156 }
157 if ($::macosx)
158 { if (%non_lazy_ptr) 153 { if (%non_lazy_ptr)
159 { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); 154 { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n");
160 foreach $i (keys %non_lazy_ptr) 155 foreach $i (keys %non_lazy_ptr)
161 { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } 156 { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); }
162 } 157 }
163 } 158 }
159 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
160 my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
161 if ($::macosx) { push (@out,"$tmp,2\n"); }
162 elsif ($::elf) { push (@out,"$tmp,4\n"); }
163 else { push (@out,"$tmp\n"); }
164 }
164 push(@out,$initseg) if ($initseg); 165 push(@out,$initseg) if ($initseg);
165} 166}
166 167
167sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } 168sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); }
169sub ::data_short{ push(@out,".value\t".join(',',@_)."\n"); }
168sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } 170sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); }
169 171
170sub ::align 172sub ::align
@@ -180,7 +182,7 @@ sub ::align
180sub ::picmeup 182sub ::picmeup
181{ my($dst,$sym,$base,$reflabel)=@_; 183{ my($dst,$sym,$base,$reflabel)=@_;
182 184
183 if ($::pic && ($::elf || $::aout)) 185 if (($::pic && ($::elf || $::aout)) || $::macosx)
184 { if (!defined($base)) 186 { if (!defined($base))
185 { &::call(&::label("PIC_me_up")); 187 { &::call(&::label("PIC_me_up"));
186 &::set_label("PIC_me_up"); 188 &::set_label("PIC_me_up");
@@ -206,13 +208,17 @@ sub ::picmeup
206sub ::initseg 208sub ::initseg
207{ my $f=$nmdecor.shift; 209{ my $f=$nmdecor.shift;
208 210
209 if ($::elf) 211 if ($::android)
212 { $initseg.=<<___;
213.section .init_array
214.align 4
215.long $f
216___
217 }
218 elsif ($::elf)
210 { $initseg.=<<___; 219 { $initseg.=<<___;
211.section .init 220.section .init
212 call $f 221 call $f
213 jmp .Linitalign
214.align $align
215.Linitalign:
216___ 222___
217 } 223 }
218 elsif ($::coff) 224 elsif ($::coff)
diff --git a/src/lib/libssl/src/crypto/perlasm/x86masm.pl b/src/lib/libssl/src/crypto/perlasm/x86masm.pl
index 3d50e4a786..96b1b73e1a 100644
--- a/src/lib/libssl/src/crypto/perlasm/x86masm.pl
+++ b/src/lib/libssl/src/crypto/perlasm/x86masm.pl
@@ -14,9 +14,11 @@ sub ::generic
14{ my ($opcode,@arg)=@_; 14{ my ($opcode,@arg)=@_;
15 15
16 # fix hexadecimal constants 16 # fix hexadecimal constants
17 for (@arg) { s/0x([0-9a-f]+)/0$1h/oi; } 17 for (@arg) { s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/oi; }
18 18
19 if ($opcode !~ /movq/) 19 if ($opcode =~ /lea/ && @arg[1] =~ s/.*PTR\s+(\(.*\))$/OFFSET $1/) # no []
20 { $opcode="mov"; }
21 elsif ($opcode !~ /movq/)
20 { # fix xmm references 22 { # fix xmm references
21 $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i); 23 $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i);
22 $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); 24 $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i);
@@ -65,6 +67,7 @@ sub get_mem
65 $ret; 67 $ret;
66} 68}
67sub ::BP { &get_mem("BYTE",@_); } 69sub ::BP { &get_mem("BYTE",@_); }
70sub ::WP { &get_mem("WORD",@_); }
68sub ::DWP { &get_mem("DWORD",@_); } 71sub ::DWP { &get_mem("DWORD",@_); }
69sub ::QWP { &get_mem("QWORD",@_); } 72sub ::QWP { &get_mem("QWORD",@_); }
70sub ::BC { "@_"; } 73sub ::BC { "@_"; }
@@ -129,7 +132,7 @@ ___
129 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) 132 if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
130 { my $comm=<<___; 133 { my $comm=<<___;
131.bss SEGMENT 'BSS' 134.bss SEGMENT 'BSS'
132COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD 135COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD
133.bss ENDS 136.bss ENDS
134___ 137___
135 # comment out OPENSSL_ia32cap_P declarations 138 # comment out OPENSSL_ia32cap_P declarations
@@ -156,6 +159,9 @@ sub ::public_label
156sub ::data_byte 159sub ::data_byte
157{ push(@out,("DB\t").join(',',@_)."\n"); } 160{ push(@out,("DB\t").join(',',@_)."\n"); }
158 161
162sub ::data_short
163{ push(@out,("DW\t").join(',',@_)."\n"); }
164
159sub ::data_word 165sub ::data_word
160{ push(@out,("DD\t").join(',',@_)."\n"); } 166{ push(@out,("DD\t").join(',',@_)."\n"); }
161 167
@@ -181,4 +187,11 @@ ___
181sub ::dataseg 187sub ::dataseg
182{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; } 188{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; }
183 189
190sub ::safeseh
191{ my $nm=shift;
192 push(@out,"IF \@Version GE 710\n");
193 push(@out,".SAFESEH ".&::LABEL($nm,$nmdecor.$nm)."\n");
194 push(@out,"ENDIF\n");
195}
196
1841; 1971;
diff --git a/src/lib/libssl/src/crypto/ppccap.c b/src/lib/libssl/src/crypto/ppccap.c
new file mode 100644
index 0000000000..ab89ccaa12
--- /dev/null
+++ b/src/lib/libssl/src/crypto/ppccap.c
@@ -0,0 +1,115 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <setjmp.h>
5#include <signal.h>
6#include <crypto.h>
7#include <openssl/bn.h>
8
9#define PPC_FPU64 (1<<0)
10#define PPC_ALTIVEC (1<<1)
11
12static int OPENSSL_ppccap_P = 0;
13
14static sigset_t all_masked;
15
16#ifdef OPENSSL_BN_ASM_MONT
17int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num)
18 {
19 int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
20 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
21
22 if (sizeof(size_t)==4)
23 {
24#if (defined(__APPLE__) && defined(__MACH__))
25 if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
26 return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
27#else
28 /* boundary of 32 was experimentally determined on
29 Linux 2.6.22, might have to be adjusted on AIX... */
30 if (num>=32 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
31 {
32 sigset_t oset;
33 int ret;
34
35 sigprocmask(SIG_SETMASK,&all_masked,&oset);
36 ret=bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
37 sigprocmask(SIG_SETMASK,&oset,NULL);
38
39 return ret;
40 }
41#endif
42 }
43 else if ((OPENSSL_ppccap_P&PPC_FPU64))
44 /* this is a "must" on POWER6, but run-time detection
45 * is not implemented yet... */
46 return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
47
48 return bn_mul_mont_int(rp,ap,bp,np,n0,num);
49 }
50#endif
51
52static sigjmp_buf ill_jmp;
53static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
54
55void OPENSSL_ppc64_probe(void);
56
57void OPENSSL_cpuid_setup(void)
58 {
59 char *e;
60 struct sigaction ill_oact,ill_act;
61 sigset_t oset;
62 static int trigger=0;
63
64 if (trigger) return;
65 trigger=1;
66
67 sigfillset(&all_masked);
68 sigdelset(&all_masked,SIGILL);
69 sigdelset(&all_masked,SIGTRAP);
70#ifdef SIGEMT
71 sigdelset(&all_masked,SIGEMT);
72#endif
73 sigdelset(&all_masked,SIGFPE);
74 sigdelset(&all_masked,SIGBUS);
75 sigdelset(&all_masked,SIGSEGV);
76
77 if ((e=getenv("OPENSSL_ppccap")))
78 {
79 OPENSSL_ppccap_P=strtoul(e,NULL,0);
80 return;
81 }
82
83 OPENSSL_ppccap_P = 0;
84
85 memset(&ill_act,0,sizeof(ill_act));
86 ill_act.sa_handler = ill_handler;
87 ill_act.sa_mask = all_masked;
88
89 sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset);
90 sigaction(SIGILL,&ill_act,&ill_oact);
91
92 if (sizeof(size_t)==4)
93 {
94 if (sigsetjmp(ill_jmp,1) == 0)
95 {
96 OPENSSL_ppc64_probe();
97 OPENSSL_ppccap_P |= PPC_FPU64;
98 }
99 }
100 else
101 {
102 /*
103 * Wanted code detecting POWER6 CPU and setting PPC_FPU64
104 */
105 }
106
107 if (sigsetjmp(ill_jmp,1) == 0)
108 {
109 OPENSSL_altivec_probe();
110 OPENSSL_ppccap_P |= PPC_ALTIVEC;
111 }
112
113 sigaction (SIGILL,&ill_oact,NULL);
114 sigprocmask(SIG_SETMASK,&oset,NULL);
115 }
diff --git a/src/lib/libssl/src/crypto/ppccpuid.pl b/src/lib/libssl/src/crypto/ppccpuid.pl
index 369e1d0df9..4ba736a1d1 100755
--- a/src/lib/libssl/src/crypto/ppccpuid.pl
+++ b/src/lib/libssl/src/crypto/ppccpuid.pl
@@ -23,36 +23,67 @@ $code=<<___;
23.machine "any" 23.machine "any"
24.text 24.text
25 25
26.globl .OPENSSL_cpuid_setup 26.globl .OPENSSL_ppc64_probe
27.align 4 27.align 4
28.OPENSSL_cpuid_setup: 28.OPENSSL_ppc64_probe:
29 fcfid f1,f1
30 extrdi r0,r0,32,0
29 blr 31 blr
32 .long 0
33 .byte 0,12,0x14,0,0,0,0,0
34
35.globl .OPENSSL_altivec_probe
36.align 4
37.OPENSSL_altivec_probe:
38 .long 0x10000484 # vor v0,v0,v0
39 blr
40 .long 0
41 .byte 0,12,0x14,0,0,0,0,0
30 42
31.globl .OPENSSL_wipe_cpu 43.globl .OPENSSL_wipe_cpu
32.align 4 44.align 4
33.OPENSSL_wipe_cpu: 45.OPENSSL_wipe_cpu:
34 xor r0,r0,r0 46 xor r0,r0,r0
47 fmr f0,f31
48 fmr f1,f31
49 fmr f2,f31
35 mr r3,r1 50 mr r3,r1
51 fmr f3,f31
36 xor r4,r4,r4 52 xor r4,r4,r4
53 fmr f4,f31
37 xor r5,r5,r5 54 xor r5,r5,r5
55 fmr f5,f31
38 xor r6,r6,r6 56 xor r6,r6,r6
57 fmr f6,f31
39 xor r7,r7,r7 58 xor r7,r7,r7
59 fmr f7,f31
40 xor r8,r8,r8 60 xor r8,r8,r8
61 fmr f8,f31
41 xor r9,r9,r9 62 xor r9,r9,r9
63 fmr f9,f31
42 xor r10,r10,r10 64 xor r10,r10,r10
65 fmr f10,f31
43 xor r11,r11,r11 66 xor r11,r11,r11
67 fmr f11,f31
44 xor r12,r12,r12 68 xor r12,r12,r12
69 fmr f12,f31
70 fmr f13,f31
45 blr 71 blr
72 .long 0
73 .byte 0,12,0x14,0,0,0,0,0
46 74
47.globl .OPENSSL_atomic_add 75.globl .OPENSSL_atomic_add
48.align 4 76.align 4
49.OPENSSL_atomic_add: 77.OPENSSL_atomic_add:
50Loop: lwarx r5,0,r3 78Ladd: lwarx r5,0,r3
51 add r0,r4,r5 79 add r0,r4,r5
52 stwcx. r0,0,r3 80 stwcx. r0,0,r3
53 bne- Loop 81 bne- Ladd
54 $SIGNX r3,r0 82 $SIGNX r3,r0
55 blr 83 blr
84 .long 0
85 .byte 0,12,0x14,0,0,0,2,0
86 .long 0
56 87
57.globl .OPENSSL_rdtsc 88.globl .OPENSSL_rdtsc
58.align 4 89.align 4
@@ -60,6 +91,8 @@ Loop: lwarx r5,0,r3
60 mftb r3 91 mftb r3
61 mftbu r4 92 mftbu r4
62 blr 93 blr
94 .long 0
95 .byte 0,12,0x14,0,0,0,0,0
63 96
64.globl .OPENSSL_cleanse 97.globl .OPENSSL_cleanse
65.align 4 98.align 4
@@ -72,7 +105,7 @@ Loop: lwarx r5,0,r3
72Little: mtctr r4 105Little: mtctr r4
73 stb r0,0(r3) 106 stb r0,0(r3)
74 addi r3,r3,1 107 addi r3,r3,1
75 bdnz- \$-8 108 bdnz \$-8
76 blr 109 blr
77Lot: andi. r5,r3,3 110Lot: andi. r5,r3,3
78 beq Laligned 111 beq Laligned
@@ -85,10 +118,13 @@ Laligned:
85 mtctr r5 118 mtctr r5
86 stw r0,0(r3) 119 stw r0,0(r3)
87 addi r3,r3,4 120 addi r3,r3,4
88 bdnz- \$-8 121 bdnz \$-8
89 andi. r4,r4,3 122 andi. r4,r4,3
90 bne Little 123 bne Little
91 blr 124 blr
125 .long 0
126 .byte 0,12,0x14,0,0,0,2,0
127 .long 0
92___ 128___
93 129
94$code =~ s/\`([^\`]*)\`/eval $1/gem; 130$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/rc4/asm/rc4-md5-x86_64.pl b/src/lib/libssl/src/crypto/rc4/asm/rc4-md5-x86_64.pl
new file mode 100644
index 0000000000..7f684092d4
--- /dev/null
+++ b/src/lib/libssl/src/crypto/rc4/asm/rc4-md5-x86_64.pl
@@ -0,0 +1,631 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# June 2011
11#
12# This is RC4+MD5 "stitch" implementation. The idea, as spelled in
13# http://download.intel.com/design/intarch/papers/323686.pdf, is that
14# since both algorithms exhibit instruction-level parallelism, ILP,
15# below theoretical maximum, interleaving them would allow to utilize
16# processor resources better and achieve better performance. RC4
17# instruction sequence is virtually identical to rc4-x86_64.pl, which
18# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin
19# and Jim Guilford of Intel. MD5 is fresh implementation aiming to
20# minimize register usage, which was used as "main thread" with RC4
21# weaved into it, one RC4 round per one MD5 round. In addition to the
22# stiched subroutine the script can generate standalone replacement
23# md5_block_asm_data_order and RC4. Below are performance numbers in
24# cycles per processed byte, less is better, for these the standalone
25# subroutines, sum of them, and stitched one:
26#
27# RC4 MD5 RC4+MD5 stitch gain
28# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*)
29# Core2 6.5 5.8 12.3 7.7 +60%
30# Westmere 4.3 5.2 9.5 7.0 +36%
31# Sandy Bridge 4.2 5.5 9.7 6.8 +43%
32# Atom 9.3 6.5 15.8 11.1 +42%
33#
34# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
35# is +53%...
36
37my ($rc4,$md5)=(1,1); # what to generate?
38my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
39 # but its result is discarded. Idea here is
40 # to be able to use 'openssl speed rc4' for
41 # benchmarking the stitched subroutine...
42
43my $flavour = shift;
44my $output = shift;
45if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
46
47my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
48
49$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
50( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
51( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
52die "can't locate x86_64-xlate.pl";
53
54open STDOUT,"| $^X $xlate $flavour $output";
55
56my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
57
58if ($rc4 && !$md5) {
59 ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx");
60 $func="RC4"; $nargs=4;
61} elsif ($md5 && !$rc4) {
62 ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx");
63 $func="md5_block_asm_data_order"; $nargs=3;
64} else {
65 ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
66 $func="rc4_md5_enc"; $nargs=6;
67 # void rc4_md5_enc(
68 # RC4_KEY *key, #
69 # const void *in0, # RC4 input
70 # void *out, # RC4 output
71 # MD5_CTX *ctx, #
72 # const void *inp, # MD5 input
73 # size_t len); # number of 64-byte blocks
74}
75
76my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
77 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
78 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
79 0x6b901122,0xfd987193,0xa679438e,0x49b40821,
80
81 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
82 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
83 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
84 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
85
86 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
87 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
88 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
89 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
90
91 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
92 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
93 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
94 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 );
95
96my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers
97my $tmp="%r12d";
98
99my @XX=("%rbp","%rsi"); # RC4 registers
100my @TX=("%rax","%rbx");
101my $YY="%rcx";
102my $TY="%rdx";
103
104my $MOD=32; # 16, 32 or 64
105
106$code.=<<___;
107.text
108.align 16
109
110.globl $func
111.type $func,\@function,$nargs
112$func:
113 cmp \$0,$len
114 je .Labort
115 push %rbx
116 push %rbp
117 push %r12
118 push %r13
119 push %r14
120 push %r15
121 sub \$40,%rsp
122.Lbody:
123___
124if ($rc4) {
125$code.=<<___;
126$D#md5# mov $ctx,%r11 # reassign arguments
127 mov $len,%r12
128 mov $in0,%r13
129 mov $out,%r14
130$D#md5# mov $inp,%r15
131___
132 $ctx="%r11" if ($md5); # reassign arguments
133 $len="%r12";
134 $in0="%r13";
135 $out="%r14";
136 $inp="%r15" if ($md5);
137 $inp=$in0 if (!$md5);
138$code.=<<___;
139 xor $XX[0],$XX[0]
140 xor $YY,$YY
141
142 lea 8($dat),$dat
143 mov -8($dat),$XX[0]#b
144 mov -4($dat),$YY#b
145
146 inc $XX[0]#b
147 sub $in0,$out
148 movl ($dat,$XX[0],4),$TX[0]#d
149___
150$code.=<<___ if (!$md5);
151 xor $TX[1],$TX[1]
152 test \$-128,$len
153 jz .Loop1
154 sub $XX[0],$TX[1]
155 and \$`$MOD-1`,$TX[1]
156 jz .Loop${MOD}_is_hot
157 sub $TX[1],$len
158.Loop${MOD}_warmup:
159 add $TX[0]#b,$YY#b
160 movl ($dat,$YY,4),$TY#d
161 movl $TX[0]#d,($dat,$YY,4)
162 movl $TY#d,($dat,$XX[0],4)
163 add $TY#b,$TX[0]#b
164 inc $XX[0]#b
165 movl ($dat,$TX[0],4),$TY#d
166 movl ($dat,$XX[0],4),$TX[0]#d
167 xorb ($in0),$TY#b
168 movb $TY#b,($out,$in0)
169 lea 1($in0),$in0
170 dec $TX[1]
171 jnz .Loop${MOD}_warmup
172
173 mov $YY,$TX[1]
174 xor $YY,$YY
175 mov $TX[1]#b,$YY#b
176
177.Loop${MOD}_is_hot:
178 mov $len,32(%rsp) # save original $len
179 shr \$6,$len # number of 64-byte blocks
180___
181 if ($D && !$md5) { # stitch in dummy MD5
182 $md5=1;
183 $ctx="%r11";
184 $inp="%r15";
185 $code.=<<___;
186 mov %rsp,$ctx
187 mov $in0,$inp
188___
189 }
190}
191$code.=<<___;
192#rc4# add $TX[0]#b,$YY#b
193#rc4# lea ($dat,$XX[0],4),$XX[1]
194 shl \$6,$len
195 add $inp,$len # pointer to the end of input
196 mov $len,16(%rsp)
197
198#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX
199#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX
200#md5# mov 1*4($ctx),$V[1]
201#md5# mov 2*4($ctx),$V[2]
202#md5# mov 3*4($ctx),$V[3]
203 jmp .Loop
204
205.align 16
206.Loop:
207#md5# mov $V[0],0*4(%rsp) # put aside current hash value
208#md5# mov $V[1],1*4(%rsp)
209#md5# mov $V[2],2*4(%rsp)
210#md5# mov $V[3],$tmp # forward reference
211#md5# mov $V[3],3*4(%rsp)
212___
213
214sub R0 {
215 my ($i,$a,$b,$c,$d)=@_;
216 my @rot0=(7,12,17,22);
217 my $j=$i%16;
218 my $k=$i%$MOD;
219 my $xmm="%xmm".($j&1);
220 $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15);
221 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
222 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
223 $code.=<<___;
224#rc4# movl ($dat,$YY,4),$TY#d
225#md5# xor $c,$tmp
226#rc4# movl $TX[0]#d,($dat,$YY,4)
227#md5# and $b,$tmp
228#md5# add 4*`$j`($inp),$a
229#rc4# add $TY#b,$TX[0]#b
230#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
231#md5# add \$$K[$i],$a
232#md5# xor $d,$tmp
233#rc4# movz $TX[0]#b,$TX[0]#d
234#rc4# movl $TY#d,4*$k($XX[1])
235#md5# add $tmp,$a
236#rc4# add $TX[1]#b,$YY#b
237#md5# rol \$$rot0[$j%4],$a
238#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference
239#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
240#md5# add $b,$a
241___
242 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
243 mov $YY,$XX[1]
244 xor $YY,$YY # keyword to partial register
245 mov $XX[1]#b,$YY#b
246 lea ($dat,$XX[0],4),$XX[1]
247___
248 $code.=<<___ if ($rc4 && $j==15);
249 psllq \$8,%xmm1
250 pxor %xmm0,%xmm2
251 pxor %xmm1,%xmm2
252___
253}
254sub R1 {
255 my ($i,$a,$b,$c,$d)=@_;
256 my @rot1=(5,9,14,20);
257 my $j=$i%16;
258 my $k=$i%$MOD;
259 my $xmm="%xmm".($j&1);
260 $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15);
261 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
262 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
263 $code.=<<___;
264#rc4# movl ($dat,$YY,4),$TY#d
265#md5# xor $b,$tmp
266#rc4# movl $TX[0]#d,($dat,$YY,4)
267#md5# and $d,$tmp
268#md5# add 4*`((1+5*$j)%16)`($inp),$a
269#rc4# add $TY#b,$TX[0]#b
270#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
271#md5# add \$$K[$i],$a
272#md5# xor $c,$tmp
273#rc4# movz $TX[0]#b,$TX[0]#d
274#rc4# movl $TY#d,4*$k($XX[1])
275#md5# add $tmp,$a
276#rc4# add $TX[1]#b,$YY#b
277#md5# rol \$$rot1[$j%4],$a
278#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference
279#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
280#md5# add $b,$a
281___
282 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
283 mov $YY,$XX[1]
284 xor $YY,$YY # keyword to partial register
285 mov $XX[1]#b,$YY#b
286 lea ($dat,$XX[0],4),$XX[1]
287___
288 $code.=<<___ if ($rc4 && $j==15);
289 psllq \$8,%xmm1
290 pxor %xmm0,%xmm3
291 pxor %xmm1,%xmm3
292___
293}
294sub R2 {
295 my ($i,$a,$b,$c,$d)=@_;
296 my @rot2=(4,11,16,23);
297 my $j=$i%16;
298 my $k=$i%$MOD;
299 my $xmm="%xmm".($j&1);
300 $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15);
301 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
302 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
303 $code.=<<___;
304#rc4# movl ($dat,$YY,4),$TY#d
305#md5# xor $c,$tmp
306#rc4# movl $TX[0]#d,($dat,$YY,4)
307#md5# xor $b,$tmp
308#md5# add 4*`((5+3*$j)%16)`($inp),$a
309#rc4# add $TY#b,$TX[0]#b
310#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
311#md5# add \$$K[$i],$a
312#rc4# movz $TX[0]#b,$TX[0]#d
313#md5# add $tmp,$a
314#rc4# movl $TY#d,4*$k($XX[1])
315#rc4# add $TX[1]#b,$YY#b
316#md5# rol \$$rot2[$j%4],$a
317#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference
318#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
319#md5# add $b,$a
320___
321 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
322 mov $YY,$XX[1]
323 xor $YY,$YY # keyword to partial register
324 mov $XX[1]#b,$YY#b
325 lea ($dat,$XX[0],4),$XX[1]
326___
327 $code.=<<___ if ($rc4 && $j==15);
328 psllq \$8,%xmm1
329 pxor %xmm0,%xmm4
330 pxor %xmm1,%xmm4
331___
332}
333sub R3 {
334 my ($i,$a,$b,$c,$d)=@_;
335 my @rot3=(6,10,15,21);
336 my $j=$i%16;
337 my $k=$i%$MOD;
338 my $xmm="%xmm".($j&1);
339 $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15);
340 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1);
341 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1);
342 $code.=<<___;
343#rc4# movl ($dat,$YY,4),$TY#d
344#md5# xor $d,$tmp
345#rc4# movl $TX[0]#d,($dat,$YY,4)
346#md5# or $b,$tmp
347#md5# add 4*`((7*$j)%16)`($inp),$a
348#rc4# add $TY#b,$TX[0]#b
349#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
350#md5# add \$$K[$i],$a
351#rc4# movz $TX[0]#b,$TX[0]#d
352#md5# xor $c,$tmp
353#rc4# movl $TY#d,4*$k($XX[1])
354#md5# add $tmp,$a
355#rc4# add $TX[1]#b,$YY#b
356#md5# rol \$$rot3[$j%4],$a
357#md5# mov \$-1,$tmp # forward reference
358#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
359#md5# add $b,$a
360___
361 $code.=<<___ if ($rc4 && $j==15);
362 mov $XX[0],$XX[1]
363 xor $XX[0],$XX[0] # keyword to partial register
364 mov $XX[1]#b,$XX[0]#b
365 mov $YY,$XX[1]
366 xor $YY,$YY # keyword to partial register
367 mov $XX[1]#b,$YY#b
368 lea ($dat,$XX[0],4),$XX[1]
369 psllq \$8,%xmm1
370 pxor %xmm0,%xmm5
371 pxor %xmm1,%xmm5
372___
373}
374
375my $i=0;
376for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
377for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
378for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
379for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
380
381$code.=<<___;
382#md5# add 0*4(%rsp),$V[0] # accumulate hash value
383#md5# add 1*4(%rsp),$V[1]
384#md5# add 2*4(%rsp),$V[2]
385#md5# add 3*4(%rsp),$V[3]
386
387#rc4# movdqu %xmm2,($out,$in0) # write RC4 output
388#rc4# movdqu %xmm3,16($out,$in0)
389#rc4# movdqu %xmm4,32($out,$in0)
390#rc4# movdqu %xmm5,48($out,$in0)
391#md5# lea 64($inp),$inp
392#rc4# lea 64($in0),$in0
393 cmp 16(%rsp),$inp # are we done?
394 jb .Loop
395
396#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX
397#rc4# sub $TX[0]#b,$YY#b # correct $YY
398#md5# mov $V[0],0*4($len) # write MD5_CTX
399#md5# mov $V[1],1*4($len)
400#md5# mov $V[2],2*4($len)
401#md5# mov $V[3],3*4($len)
402___
403$code.=<<___ if ($rc4 && (!$md5 || $D));
404 mov 32(%rsp),$len # restore original $len
405 and \$63,$len # remaining bytes
406 jnz .Loop1
407 jmp .Ldone
408
409.align 16
410.Loop1:
411 add $TX[0]#b,$YY#b
412 movl ($dat,$YY,4),$TY#d
413 movl $TX[0]#d,($dat,$YY,4)
414 movl $TY#d,($dat,$XX[0],4)
415 add $TY#b,$TX[0]#b
416 inc $XX[0]#b
417 movl ($dat,$TX[0],4),$TY#d
418 movl ($dat,$XX[0],4),$TX[0]#d
419 xorb ($in0),$TY#b
420 movb $TY#b,($out,$in0)
421 lea 1($in0),$in0
422 dec $len
423 jnz .Loop1
424
425.Ldone:
426___
427$code.=<<___;
428#rc4# sub \$1,$XX[0]#b
429#rc4# movl $XX[0]#d,-8($dat)
430#rc4# movl $YY#d,-4($dat)
431
432 mov 40(%rsp),%r15
433 mov 48(%rsp),%r14
434 mov 56(%rsp),%r13
435 mov 64(%rsp),%r12
436 mov 72(%rsp),%rbp
437 mov 80(%rsp),%rbx
438 lea 88(%rsp),%rsp
439.Lepilogue:
440.Labort:
441 ret
442.size $func,.-$func
443___
444
445if ($rc4 && $D) { # sole purpose of this section is to provide
446 # option to use the generated module as drop-in
447 # replacement for rc4-x86_64.pl for debugging
448 # and testing purposes...
449my ($idx,$ido)=("%r8","%r9");
450my ($dat,$len,$inp)=("%rdi","%rsi","%rdx");
451
452$code.=<<___;
453.globl RC4_set_key
454.type RC4_set_key,\@function,3
455.align 16
456RC4_set_key:
457 lea 8($dat),$dat
458 lea ($inp,$len),$inp
459 neg $len
460 mov $len,%rcx
461 xor %eax,%eax
462 xor $ido,$ido
463 xor %r10,%r10
464 xor %r11,%r11
465 jmp .Lw1stloop
466
467.align 16
468.Lw1stloop:
469 mov %eax,($dat,%rax,4)
470 add \$1,%al
471 jnc .Lw1stloop
472
473 xor $ido,$ido
474 xor $idx,$idx
475.align 16
476.Lw2ndloop:
477 mov ($dat,$ido,4),%r10d
478 add ($inp,$len,1),$idx#b
479 add %r10b,$idx#b
480 add \$1,$len
481 mov ($dat,$idx,4),%r11d
482 cmovz %rcx,$len
483 mov %r10d,($dat,$idx,4)
484 mov %r11d,($dat,$ido,4)
485 add \$1,$ido#b
486 jnc .Lw2ndloop
487
488 xor %eax,%eax
489 mov %eax,-8($dat)
490 mov %eax,-4($dat)
491 ret
492.size RC4_set_key,.-RC4_set_key
493
494.globl RC4_options
495.type RC4_options,\@abi-omnipotent
496.align 16
497RC4_options:
498 lea .Lopts(%rip),%rax
499 ret
500.align 64
501.Lopts:
502.asciz "rc4(64x,int)"
503.align 64
504.size RC4_options,.-RC4_options
505___
506}
507# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
508# CONTEXT *context,DISPATCHER_CONTEXT *disp)
509if ($win64) {
510my $rec="%rcx";
511my $frame="%rdx";
512my $context="%r8";
513my $disp="%r9";
514
515$code.=<<___;
516.extern __imp_RtlVirtualUnwind
517.type se_handler,\@abi-omnipotent
518.align 16
519se_handler:
520 push %rsi
521 push %rdi
522 push %rbx
523 push %rbp
524 push %r12
525 push %r13
526 push %r14
527 push %r15
528 pushfq
529 sub \$64,%rsp
530
531 mov 120($context),%rax # pull context->Rax
532 mov 248($context),%rbx # pull context->Rip
533
534 lea .Lbody(%rip),%r10
535 cmp %r10,%rbx # context->Rip<.Lbody
536 jb .Lin_prologue
537
538 mov 152($context),%rax # pull context->Rsp
539
540 lea .Lepilogue(%rip),%r10
541 cmp %r10,%rbx # context->Rip>=.Lepilogue
542 jae .Lin_prologue
543
544 mov 40(%rax),%r15
545 mov 48(%rax),%r14
546 mov 56(%rax),%r13
547 mov 64(%rax),%r12
548 mov 72(%rax),%rbp
549 mov 80(%rax),%rbx
550 lea 88(%rax),%rax
551
552 mov %rbx,144($context) # restore context->Rbx
553 mov %rbp,160($context) # restore context->Rbp
554 mov %r12,216($context) # restore context->R12
555 mov %r13,224($context) # restore context->R12
556 mov %r14,232($context) # restore context->R14
557 mov %r15,240($context) # restore context->R15
558
559.Lin_prologue:
560 mov 8(%rax),%rdi
561 mov 16(%rax),%rsi
562 mov %rax,152($context) # restore context->Rsp
563 mov %rsi,168($context) # restore context->Rsi
564 mov %rdi,176($context) # restore context->Rdi
565
566 mov 40($disp),%rdi # disp->ContextRecord
567 mov $context,%rsi # context
568 mov \$154,%ecx # sizeof(CONTEXT)
569 .long 0xa548f3fc # cld; rep movsq
570
571 mov $disp,%rsi
572 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
573 mov 8(%rsi),%rdx # arg2, disp->ImageBase
574 mov 0(%rsi),%r8 # arg3, disp->ControlPc
575 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
576 mov 40(%rsi),%r10 # disp->ContextRecord
577 lea 56(%rsi),%r11 # &disp->HandlerData
578 lea 24(%rsi),%r12 # &disp->EstablisherFrame
579 mov %r10,32(%rsp) # arg5
580 mov %r11,40(%rsp) # arg6
581 mov %r12,48(%rsp) # arg7
582 mov %rcx,56(%rsp) # arg8, (NULL)
583 call *__imp_RtlVirtualUnwind(%rip)
584
585 mov \$1,%eax # ExceptionContinueSearch
586 add \$64,%rsp
587 popfq
588 pop %r15
589 pop %r14
590 pop %r13
591 pop %r12
592 pop %rbp
593 pop %rbx
594 pop %rdi
595 pop %rsi
596 ret
597.size se_handler,.-se_handler
598
599.section .pdata
600.align 4
601 .rva .LSEH_begin_$func
602 .rva .LSEH_end_$func
603 .rva .LSEH_info_$func
604
605.section .xdata
606.align 8
607.LSEH_info_$func:
608 .byte 9,0,0,0
609 .rva se_handler
610___
611}
612
613sub reg_part {
614my ($reg,$conv)=@_;
615 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
616 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
617 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
618 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
619 return $reg;
620}
621
622$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
623$code =~ s/\`([^\`]*)\`/eval $1/gem;
624$code =~ s/pinsrw\s+\$0,/movd /gm;
625
626$code =~ s/#md5#//gm if ($md5);
627$code =~ s/#rc4#//gm if ($rc4);
628
629print $code;
630
631close STDOUT;
diff --git a/src/lib/libssl/src/crypto/rc4/asm/rc4-parisc.pl b/src/lib/libssl/src/crypto/rc4/asm/rc4-parisc.pl
new file mode 100644
index 0000000000..9165067080
--- /dev/null
+++ b/src/lib/libssl/src/crypto/rc4/asm/rc4-parisc.pl
@@ -0,0 +1,313 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# RC4 for PA-RISC.
11
12# June 2009.
13#
14# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
15# For reference, [4x] unrolled loop is >40% faster than folded one.
16# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
17# is believed to be not sufficient to justify the effort...
18#
19# Special thanks to polarhome.com for providing HP-UX account.
20
21$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
22
23$flavour = shift;
24$output = shift;
25open STDOUT,">$output";
26
27if ($flavour =~ /64/) {
28 $LEVEL ="2.0W";
29 $SIZE_T =8;
30 $FRAME_MARKER =80;
31 $SAVED_RP =16;
32 $PUSH ="std";
33 $PUSHMA ="std,ma";
34 $POP ="ldd";
35 $POPMB ="ldd,mb";
36} else {
37 $LEVEL ="1.0";
38 $SIZE_T =4;
39 $FRAME_MARKER =48;
40 $SAVED_RP =20;
41 $PUSH ="stw";
42 $PUSHMA ="stwm";
43 $POP ="ldw";
44 $POPMB ="ldwm";
45}
46
47$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
48 # [+ argument transfer]
49$SZ=1; # defaults to RC4_CHAR
50if (open CONF,"<${dir}../../opensslconf.h") {
51 while(<CONF>) {
52 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
53 $SZ = ($1=~/char$/) ? 1 : 4;
54 last;
55 }
56 }
57 close CONF;
58}
59
60if ($SZ==1) { # RC4_CHAR
61 $LD="ldb";
62 $LDX="ldbx";
63 $MKX="addl";
64 $ST="stb";
65} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
66 $LD="ldw";
67 $LDX="ldwx,s";
68 $MKX="sh2addl";
69 $ST="stw";
70}
71
72$key="%r26";
73$len="%r25";
74$inp="%r24";
75$out="%r23";
76
77@XX=("%r19","%r20");
78@TX=("%r21","%r22");
79$YY="%r28";
80$TY="%r29";
81
82$acc="%r1";
83$ix="%r2";
84$iy="%r3";
85$dat0="%r4";
86$dat1="%r5";
87$rem="%r6";
88$mask="%r31";
89
90sub unrolledloopbody {
91for ($i=0;$i<4;$i++) {
92$code.=<<___;
93 ldo 1($XX[0]),$XX[1]
94 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
95 and $mask,$XX[1],$XX[1]
96 $LDX $YY($key),$TY
97 $MKX $YY,$key,$ix
98 $LDX $XX[1]($key),$TX[1]
99 $MKX $XX[0],$key,$iy
100 $ST $TX[0],0($ix)
101 comclr,<> $XX[1],$YY,%r0 ; conditional
102 copy $TX[0],$TX[1] ; move
103 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
104 $ST $TY,0($iy)
105 addl $TX[0],$TY,$TY
106 addl $TX[1],$YY,$YY
107 and $mask,$TY,$TY
108 and $mask,$YY,$YY
109___
110push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
111} }
112
113sub foldedloop {
114my ($label,$count)=@_;
115$code.=<<___;
116$label
117 $MKX $YY,$key,$iy
118 $LDX $YY($key),$TY
119 $MKX $XX[0],$key,$ix
120 $ST $TX[0],0($iy)
121 ldo 1($XX[0]),$XX[0]
122 $ST $TY,0($ix)
123 addl $TX[0],$TY,$TY
124 ldbx $inp($out),$dat1
125 and $mask,$TY,$TY
126 and $mask,$XX[0],$XX[0]
127 $LDX $TY($key),$acc
128 $LDX $XX[0]($key),$TX[0]
129 ldo 1($out),$out
130 xor $dat1,$acc,$acc
131 addl $TX[0],$YY,$YY
132 stb $acc,-1($out)
133 addib,<> -1,$count,$label ; $count is always small
134 and $mask,$YY,$YY
135___
136}
137
138$code=<<___;
139 .LEVEL $LEVEL
140 .SPACE \$TEXT\$
141 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
142
143 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
144RC4
145 .PROC
146 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
147 .ENTRY
148 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
149 $PUSHMA %r3,$FRAME(%sp)
150 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
151 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
152 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
153
154 cmpib,*= 0,$len,L\$abort
155 sub $inp,$out,$inp ; distance between $inp and $out
156
157 $LD `0*$SZ`($key),$XX[0]
158 $LD `1*$SZ`($key),$YY
159 ldo `2*$SZ`($key),$key
160
161 ldi 0xff,$mask
162 ldi 3,$dat0
163
164 ldo 1($XX[0]),$XX[0] ; warm up loop
165 and $mask,$XX[0],$XX[0]
166 $LDX $XX[0]($key),$TX[0]
167 addl $TX[0],$YY,$YY
168 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
169 and $mask,$YY,$YY
170
171 and,<> $out,$dat0,$rem ; is $out aligned?
172 b L\$alignedout
173 subi 4,$rem,$rem
174 sub $len,$rem,$len
175___
176&foldedloop("L\$alignout",$rem); # process till $out is aligned
177
178$code.=<<___;
179L\$alignedout ; $len is at least 4 here
180 and,<> $inp,$dat0,$acc ; is $inp aligned?
181 b L\$oop4
182 sub $inp,$acc,$rem ; align $inp
183
184 sh3addl $acc,%r0,$acc
185 subi 32,$acc,$acc
186 mtctl $acc,%cr11 ; load %sar with vshd align factor
187 ldwx $rem($out),$dat0
188 ldo 4($rem),$rem
189L\$oop4misalignedinp
190___
191&unrolledloopbody();
192$code.=<<___;
193 $LDX $TY($key),$ix
194 ldwx $rem($out),$dat1
195 ldo -4($len),$len
196 or $ix,$acc,$acc ; last piece, no need to dep
197 vshd $dat0,$dat1,$iy ; align data
198 copy $dat1,$dat0
199 xor $iy,$acc,$acc
200 stw $acc,0($out)
201 cmpib,*<< 3,$len,L\$oop4misalignedinp
202 ldo 4($out),$out
203 cmpib,*= 0,$len,L\$done
204 nop
205 b L\$oop1
206 nop
207
208 .ALIGN 8
209L\$oop4
210___
211&unrolledloopbody();
212$code.=<<___;
213 $LDX $TY($key),$ix
214 ldwx $inp($out),$dat0
215 ldo -4($len),$len
216 or $ix,$acc,$acc ; last piece, no need to dep
217 xor $dat0,$acc,$acc
218 stw $acc,0($out)
219 cmpib,*<< 3,$len,L\$oop4
220 ldo 4($out),$out
221 cmpib,*= 0,$len,L\$done
222 nop
223___
224&foldedloop("L\$oop1",$len);
225$code.=<<___;
226L\$done
227 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
228 ldo -1($XX[0]),$XX[0] ; chill out loop
229 sub $YY,$TX[0],$YY
230 and $mask,$XX[0],$XX[0]
231 and $mask,$YY,$YY
232 $ST $XX[0],`-2*$SZ`($key)
233 $ST $YY,`-1*$SZ`($key)
234 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
235 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
236 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
237L\$abort
238 bv (%r2)
239 .EXIT
240 $POPMB -$FRAME(%sp),%r3
241 .PROCEND
242___
243
244$code.=<<___;
245
246 .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
247 .ALIGN 8
248private_RC4_set_key
249 .PROC
250 .CALLINFO NO_CALLS
251 .ENTRY
252 $ST %r0,`0*$SZ`($key)
253 $ST %r0,`1*$SZ`($key)
254 ldo `2*$SZ`($key),$key
255 copy %r0,@XX[0]
256L\$1st
257 $ST @XX[0],0($key)
258 ldo 1(@XX[0]),@XX[0]
259 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
260 ldo $SZ($key),$key
261
262 ldo `-256*$SZ`($key),$key ; rewind $key
263 addl $len,$inp,$inp ; $inp to point at the end
264 sub %r0,$len,%r23 ; inverse index
265 copy %r0,@XX[0]
266 copy %r0,@XX[1]
267 ldi 0xff,$mask
268
269L\$2nd
270 $LDX @XX[0]($key),@TX[0]
271 ldbx %r23($inp),@TX[1]
272 addi,nuv 1,%r23,%r23 ; increment and conditional
273 sub %r0,$len,%r23 ; inverse index
274 addl @TX[0],@XX[1],@XX[1]
275 addl @TX[1],@XX[1],@XX[1]
276 and $mask,@XX[1],@XX[1]
277 $MKX @XX[0],$key,$TY
278 $LDX @XX[1]($key),@TX[1]
279 $MKX @XX[1],$key,$YY
280 ldo 1(@XX[0]),@XX[0]
281 $ST @TX[0],0($YY)
282 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
283 $ST @TX[1],0($TY)
284
285 bv,n (%r2)
286 .EXIT
287 nop
288 .PROCEND
289
290 .EXPORT RC4_options,ENTRY
291 .ALIGN 8
292RC4_options
293 .PROC
294 .CALLINFO NO_CALLS
295 .ENTRY
296 blr %r0,%r28
297 ldi 3,%r1
298L\$pic
299 andcm %r28,%r1,%r28
300 bv (%r2)
301 .EXIT
302 ldo L\$opts-L\$pic(%r28),%r28
303 .PROCEND
304 .ALIGN 8
305L\$opts
306 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
307 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
308___
309$code =~ s/\`([^\`]*)\`/eval $1/gem;
310$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
311
312print $code;
313close STDOUT;
diff --git a/src/lib/libssl/src/crypto/rc4/asm/rc4-s390x.pl b/src/lib/libssl/src/crypto/rc4/asm/rc4-s390x.pl
index 96681fa05e..7528ece13c 100644
--- a/src/lib/libssl/src/crypto/rc4/asm/rc4-s390x.pl
+++ b/src/lib/libssl/src/crypto/rc4/asm/rc4-s390x.pl
@@ -13,6 +13,29 @@
13# "cluster" Address Generation Interlocks, so that one pipeline stall 13# "cluster" Address Generation Interlocks, so that one pipeline stall
14# resolves several dependencies. 14# resolves several dependencies.
15 15
16# November 2010.
17#
18# Adapt for -m31 build. If kernel supports what's called "highgprs"
19# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
20# instructions and achieve "64-bit" performance even in 31-bit legacy
21# application context. The feature is not specific to any particular
22# processor, as long as it's "z-CPU". Latter implies that the code
23# remains z/Architecture specific. On z990 it was measured to perform
24# 50% better than code generated by gcc 4.3.
25
26$flavour = shift;
27
28if ($flavour =~ /3[12]/) {
29 $SIZE_T=4;
30 $g="";
31} else {
32 $SIZE_T=8;
33 $g="g";
34}
35
36while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
37open STDOUT,">$output";
38
16$rp="%r14"; 39$rp="%r14";
17$sp="%r15"; 40$sp="%r15";
18$code=<<___; 41$code=<<___;
@@ -39,7 +62,12 @@ $code.=<<___;
39.type RC4,\@function 62.type RC4,\@function
40.align 64 63.align 64
41RC4: 64RC4:
42 stmg %r6,%r11,48($sp) 65 stm${g} %r6,%r11,6*$SIZE_T($sp)
66___
67$code.=<<___ if ($flavour =~ /3[12]/);
68 llgfr $len,$len
69___
70$code.=<<___;
43 llgc $XX[0],0($key) 71 llgc $XX[0],0($key)
44 llgc $YY,1($key) 72 llgc $YY,1($key)
45 la $XX[0],1($XX[0]) 73 la $XX[0],1($XX[0])
@@ -90,7 +118,7 @@ $code.=<<___;
90 xgr $acc,$TX[1] 118 xgr $acc,$TX[1]
91 stg $acc,0($out) 119 stg $acc,0($out)
92 la $out,8($out) 120 la $out,8($out)
93 brct $cnt,.Loop8 121 brctg $cnt,.Loop8
94 122
95.Lshort: 123.Lshort:
96 lghi $acc,7 124 lghi $acc,7
@@ -122,7 +150,7 @@ $code.=<<___;
122 ahi $XX[0],-1 150 ahi $XX[0],-1
123 stc $XX[0],0($key) 151 stc $XX[0],0($key)
124 stc $YY,1($key) 152 stc $YY,1($key)
125 lmg %r6,%r11,48($sp) 153 lm${g} %r6,%r11,6*$SIZE_T($sp)
126 br $rp 154 br $rp
127.size RC4,.-RC4 155.size RC4,.-RC4
128.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 156.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
@@ -143,11 +171,11 @@ $ikey="%r7";
143$iinp="%r8"; 171$iinp="%r8";
144 172
145$code.=<<___; 173$code.=<<___;
146.globl RC4_set_key 174.globl private_RC4_set_key
147.type RC4_set_key,\@function 175.type private_RC4_set_key,\@function
148.align 64 176.align 64
149RC4_set_key: 177private_RC4_set_key:
150 stmg %r6,%r8,48($sp) 178 stm${g} %r6,%r8,6*$SIZE_T($sp)
151 lhi $cnt,256 179 lhi $cnt,256
152 la $idx,0(%r0) 180 la $idx,0(%r0)
153 sth $idx,0($key) 181 sth $idx,0($key)
@@ -180,9 +208,9 @@ RC4_set_key:
180 la $iinp,0(%r0) 208 la $iinp,0(%r0)
181 j .L2ndloop 209 j .L2ndloop
182.Ldone: 210.Ldone:
183 lmg %r6,%r8,48($sp) 211 lm${g} %r6,%r8,6*$SIZE_T($sp)
184 br $rp 212 br $rp
185.size RC4_set_key,.-RC4_set_key 213.size private_RC4_set_key,.-private_RC4_set_key
186 214
187___ 215___
188} 216}
@@ -203,3 +231,4 @@ RC4_options:
203___ 231___
204 232
205print $code; 233print $code;
234close STDOUT; # force flush
diff --git a/src/lib/libssl/src/crypto/rc4/asm/rc4-x86_64.pl b/src/lib/libssl/src/crypto/rc4/asm/rc4-x86_64.pl
index 677be5fe25..d6eac205e9 100755
--- a/src/lib/libssl/src/crypto/rc4/asm/rc4-x86_64.pl
+++ b/src/lib/libssl/src/crypto/rc4/asm/rc4-x86_64.pl
@@ -7,6 +7,8 @@
7# details see http://www.openssl.org/~appro/cryptogams/. 7# details see http://www.openssl.org/~appro/cryptogams/.
8# ==================================================================== 8# ====================================================================
9# 9#
10# July 2004
11#
10# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in 12# 2.22x RC4 tune-up:-) It should be noted though that my hand [as in
11# "hand-coded assembler"] doesn't stand for the whole improvement 13# "hand-coded assembler"] doesn't stand for the whole improvement
12# coefficient. It turned out that eliminating RC4_CHAR from config 14# coefficient. It turned out that eliminating RC4_CHAR from config
@@ -19,6 +21,8 @@
19# to operate on partial registers, it turned out to be the best bet. 21# to operate on partial registers, it turned out to be the best bet.
20# At least for AMD... How IA32E would perform remains to be seen... 22# At least for AMD... How IA32E would perform remains to be seen...
21 23
24# November 2004
25#
22# As was shown by Marc Bevand reordering of couple of load operations 26# As was shown by Marc Bevand reordering of couple of load operations
23# results in even higher performance gain of 3.3x:-) At least on 27# results in even higher performance gain of 3.3x:-) At least on
24# Opteron... For reference, 1x in this case is RC4_CHAR C-code 28# Opteron... For reference, 1x in this case is RC4_CHAR C-code
@@ -26,6 +30,8 @@
26# Latter means that if you want to *estimate* what to expect from 30# Latter means that if you want to *estimate* what to expect from
27# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz. 31# *your* Opteron, then multiply 54 by 3.3 and clock frequency in GHz.
28 32
33# November 2004
34#
29# Intel P4 EM64T core was found to run the AMD64 code really slow... 35# Intel P4 EM64T core was found to run the AMD64 code really slow...
30# The only way to achieve comparable performance on P4 was to keep 36# The only way to achieve comparable performance on P4 was to keep
31# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to 37# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
@@ -33,10 +39,14 @@
33# on either AMD and Intel platforms, I implement both cases. See 39# on either AMD and Intel platforms, I implement both cases. See
34# rc4_skey.c for further details... 40# rc4_skey.c for further details...
35 41
42# April 2005
43#
36# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing 44# P4 EM64T core appears to be "allergic" to 64-bit inc/dec. Replacing
37# those with add/sub results in 50% performance improvement of folded 45# those with add/sub results in 50% performance improvement of folded
38# loop... 46# loop...
39 47
48# May 2005
49#
40# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T 50# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
41# performance by >30% [unlike P4 32-bit case that is]. But this is 51# performance by >30% [unlike P4 32-bit case that is]. But this is
42# provided that loads are reordered even more aggressively! Both code 52# provided that loads are reordered even more aggressively! Both code
@@ -50,6 +60,8 @@
50# is not implemented, then this final RC4_CHAR code-path should be 60# is not implemented, then this final RC4_CHAR code-path should be
51# preferred, as it provides better *all-round* performance]. 61# preferred, as it provides better *all-round* performance].
52 62
63# March 2007
64#
53# Intel Core2 was observed to perform poorly on both code paths:-( It 65# Intel Core2 was observed to perform poorly on both code paths:-( It
54# apparently suffers from some kind of partial register stall, which 66# apparently suffers from some kind of partial register stall, which
55# occurs in 64-bit mode only [as virtually identical 32-bit loop was 67# occurs in 64-bit mode only [as virtually identical 32-bit loop was
@@ -58,6 +70,37 @@
58# fit for Core2 and therefore the code was modified to skip cloop8 on 70# fit for Core2 and therefore the code was modified to skip cloop8 on
59# this CPU. 71# this CPU.
60 72
73# May 2010
74#
75# Intel Westmere was observed to perform suboptimally. Adding yet
76# another movzb to cloop1 improved performance by almost 50%! Core2
77# performance is improved too, but nominally...
78
79# May 2011
80#
81# The only code path that was not modified is P4-specific one. Non-P4
82# Intel code path optimization is heavily based on submission by Maxim
83# Perminov, Maxim Locktyukhin and Jim Guilford of Intel. I've used
84# some of the ideas even in attempt to optmize the original RC4_INT
85# code path... Current performance in cycles per processed byte (less
86# is better) and improvement coefficients relative to previous
87# version of this module are:
88#
89# Opteron 5.3/+0%(*)
90# P4 6.5
91# Core2 6.2/+15%(**)
92# Westmere 4.2/+60%
93# Sandy Bridge 4.2/+120%
94# Atom 9.3/+80%
95#
96# (*) But corresponding loop has less instructions, which should have
97# positive effect on upcoming Bulldozer, which has one less ALU.
98# For reference, Intel code runs at 6.8 cpb rate on Opteron.
99# (**) Note that Core2 result is ~15% lower than corresponding result
100# for 32-bit code, meaning that it's possible to improve it,
101# but more than likely at the cost of the others (see rc4-586.pl
102# to get the idea)...
103
61$flavour = shift; 104$flavour = shift;
62$output = shift; 105$output = shift;
63if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 106if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -76,13 +119,10 @@ $len="%rsi"; # arg2
76$inp="%rdx"; # arg3 119$inp="%rdx"; # arg3
77$out="%rcx"; # arg4 120$out="%rcx"; # arg4
78 121
79@XX=("%r8","%r10"); 122{
80@TX=("%r9","%r11");
81$YY="%r12";
82$TY="%r13";
83
84$code=<<___; 123$code=<<___;
85.text 124.text
125.extern OPENSSL_ia32cap_P
86 126
87.globl RC4 127.globl RC4
88.type RC4,\@function,4 128.type RC4,\@function,4
@@ -95,48 +135,173 @@ RC4: or $len,$len
95 push %r12 135 push %r12
96 push %r13 136 push %r13
97.Lprologue: 137.Lprologue:
138 mov $len,%r11
139 mov $inp,%r12
140 mov $out,%r13
141___
142my $len="%r11"; # reassign input arguments
143my $inp="%r12";
144my $out="%r13";
98 145
99 add \$8,$dat 146my @XX=("%r10","%rsi");
100 movl -8($dat),$XX[0]#d 147my @TX=("%rax","%rbx");
101 movl -4($dat),$YY#d 148my $YY="%rcx";
149my $TY="%rdx";
150
151$code.=<<___;
152 xor $XX[0],$XX[0]
153 xor $YY,$YY
154
155 lea 8($dat),$dat
156 mov -8($dat),$XX[0]#b
157 mov -4($dat),$YY#b
102 cmpl \$-1,256($dat) 158 cmpl \$-1,256($dat)
103 je .LRC4_CHAR 159 je .LRC4_CHAR
160 mov OPENSSL_ia32cap_P(%rip),%r8d
161 xor $TX[1],$TX[1]
104 inc $XX[0]#b 162 inc $XX[0]#b
163 sub $XX[0],$TX[1]
164 sub $inp,$out
105 movl ($dat,$XX[0],4),$TX[0]#d 165 movl ($dat,$XX[0],4),$TX[0]#d
106 test \$-8,$len 166 test \$-16,$len
107 jz .Lloop1 167 jz .Lloop1
108 jmp .Lloop8 168 bt \$30,%r8d # Intel CPU?
169 jc .Lintel
170 and \$7,$TX[1]
171 lea 1($XX[0]),$XX[1]
172 jz .Loop8
173 sub $TX[1],$len
174.Loop8_warmup:
175 add $TX[0]#b,$YY#b
176 movl ($dat,$YY,4),$TY#d
177 movl $TX[0]#d,($dat,$YY,4)
178 movl $TY#d,($dat,$XX[0],4)
179 add $TY#b,$TX[0]#b
180 inc $XX[0]#b
181 movl ($dat,$TX[0],4),$TY#d
182 movl ($dat,$XX[0],4),$TX[0]#d
183 xorb ($inp),$TY#b
184 movb $TY#b,($out,$inp)
185 lea 1($inp),$inp
186 dec $TX[1]
187 jnz .Loop8_warmup
188
189 lea 1($XX[0]),$XX[1]
190 jmp .Loop8
109.align 16 191.align 16
110.Lloop8: 192.Loop8:
111___ 193___
112for ($i=0;$i<8;$i++) { 194for ($i=0;$i<8;$i++) {
195$code.=<<___ if ($i==7);
196 add \$8,$XX[1]#b
197___
113$code.=<<___; 198$code.=<<___;
114 add $TX[0]#b,$YY#b 199 add $TX[0]#b,$YY#b
115 mov $XX[0],$XX[1]
116 movl ($dat,$YY,4),$TY#d 200 movl ($dat,$YY,4),$TY#d
117 ror \$8,%rax # ror is redundant when $i=0
118 inc $XX[1]#b
119 movl ($dat,$XX[1],4),$TX[1]#d
120 cmp $XX[1],$YY
121 movl $TX[0]#d,($dat,$YY,4) 201 movl $TX[0]#d,($dat,$YY,4)
122 cmove $TX[0],$TX[1] 202 movl `4*($i==7?-1:$i)`($dat,$XX[1],4),$TX[1]#d
123 movl $TY#d,($dat,$XX[0],4) 203 ror \$8,%r8 # ror is redundant when $i=0
204 movl $TY#d,4*$i($dat,$XX[0],4)
124 add $TX[0]#b,$TY#b 205 add $TX[0]#b,$TY#b
125 movb ($dat,$TY,4),%al 206 movb ($dat,$TY,4),%r8b
126___ 207___
127push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 208push(@TX,shift(@TX)); #push(@XX,shift(@XX)); # "rotate" registers
128} 209}
129$code.=<<___; 210$code.=<<___;
130 ror \$8,%rax 211 add \$8,$XX[0]#b
212 ror \$8,%r8
131 sub \$8,$len 213 sub \$8,$len
132 214
133 xor ($inp),%rax 215 xor ($inp),%r8
134 add \$8,$inp 216 mov %r8,($out,$inp)
135 mov %rax,($out) 217 lea 8($inp),$inp
136 add \$8,$out
137 218
138 test \$-8,$len 219 test \$-8,$len
139 jnz .Lloop8 220 jnz .Loop8
221 cmp \$0,$len
222 jne .Lloop1
223 jmp .Lexit
224
225.align 16
226.Lintel:
227 test \$-32,$len
228 jz .Lloop1
229 and \$15,$TX[1]
230 jz .Loop16_is_hot
231 sub $TX[1],$len
232.Loop16_warmup:
233 add $TX[0]#b,$YY#b
234 movl ($dat,$YY,4),$TY#d
235 movl $TX[0]#d,($dat,$YY,4)
236 movl $TY#d,($dat,$XX[0],4)
237 add $TY#b,$TX[0]#b
238 inc $XX[0]#b
239 movl ($dat,$TX[0],4),$TY#d
240 movl ($dat,$XX[0],4),$TX[0]#d
241 xorb ($inp),$TY#b
242 movb $TY#b,($out,$inp)
243 lea 1($inp),$inp
244 dec $TX[1]
245 jnz .Loop16_warmup
246
247 mov $YY,$TX[1]
248 xor $YY,$YY
249 mov $TX[1]#b,$YY#b
250
251.Loop16_is_hot:
252 lea ($dat,$XX[0],4),$XX[1]
253___
254sub RC4_loop {
255 my $i=shift;
256 my $j=$i<0?0:$i;
257 my $xmm="%xmm".($j&1);
258
259 $code.=" add \$16,$XX[0]#b\n" if ($i==15);
260 $code.=" movdqu ($inp),%xmm2\n" if ($i==15);
261 $code.=" add $TX[0]#b,$YY#b\n" if ($i<=0);
262 $code.=" movl ($dat,$YY,4),$TY#d\n";
263 $code.=" pxor %xmm0,%xmm2\n" if ($i==0);
264 $code.=" psllq \$8,%xmm1\n" if ($i==0);
265 $code.=" pxor $xmm,$xmm\n" if ($i<=1);
266 $code.=" movl $TX[0]#d,($dat,$YY,4)\n";
267 $code.=" add $TY#b,$TX[0]#b\n";
268 $code.=" movl `4*($j+1)`($XX[1]),$TX[1]#d\n" if ($i<15);
269 $code.=" movz $TX[0]#b,$TX[0]#d\n";
270 $code.=" movl $TY#d,4*$j($XX[1])\n";
271 $code.=" pxor %xmm1,%xmm2\n" if ($i==0);
272 $code.=" lea ($dat,$XX[0],4),$XX[1]\n" if ($i==15);
273 $code.=" add $TX[1]#b,$YY#b\n" if ($i<15);
274 $code.=" pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n";
275 $code.=" movdqu %xmm2,($out,$inp)\n" if ($i==0);
276 $code.=" lea 16($inp),$inp\n" if ($i==0);
277 $code.=" movl ($XX[1]),$TX[1]#d\n" if ($i==15);
278}
279 RC4_loop(-1);
280$code.=<<___;
281 jmp .Loop16_enter
282.align 16
283.Loop16:
284___
285
286for ($i=0;$i<16;$i++) {
287 $code.=".Loop16_enter:\n" if ($i==1);
288 RC4_loop($i);
289 push(@TX,shift(@TX)); # "rotate" registers
290}
291$code.=<<___;
292 mov $YY,$TX[1]
293 xor $YY,$YY # keyword to partial register
294 sub \$16,$len
295 mov $TX[1]#b,$YY#b
296 test \$-16,$len
297 jnz .Loop16
298
299 psllq \$8,%xmm1
300 pxor %xmm0,%xmm2
301 pxor %xmm1,%xmm2
302 movdqu %xmm2,($out,$inp)
303 lea 16($inp),$inp
304
140 cmp \$0,$len 305 cmp \$0,$len
141 jne .Lloop1 306 jne .Lloop1
142 jmp .Lexit 307 jmp .Lexit
@@ -152,9 +317,8 @@ $code.=<<___;
152 movl ($dat,$TX[0],4),$TY#d 317 movl ($dat,$TX[0],4),$TY#d
153 movl ($dat,$XX[0],4),$TX[0]#d 318 movl ($dat,$XX[0],4),$TX[0]#d
154 xorb ($inp),$TY#b 319 xorb ($inp),$TY#b
155 inc $inp 320 movb $TY#b,($out,$inp)
156 movb $TY#b,($out) 321 lea 1($inp),$inp
157 inc $out
158 dec $len 322 dec $len
159 jnz .Lloop1 323 jnz .Lloop1
160 jmp .Lexit 324 jmp .Lexit
@@ -165,13 +329,11 @@ $code.=<<___;
165 movzb ($dat,$XX[0]),$TX[0]#d 329 movzb ($dat,$XX[0]),$TX[0]#d
166 test \$-8,$len 330 test \$-8,$len
167 jz .Lcloop1 331 jz .Lcloop1
168 cmpl \$0,260($dat)
169 jnz .Lcloop1
170 jmp .Lcloop8 332 jmp .Lcloop8
171.align 16 333.align 16
172.Lcloop8: 334.Lcloop8:
173 mov ($inp),%eax 335 mov ($inp),%r8d
174 mov 4($inp),%ebx 336 mov 4($inp),%r9d
175___ 337___
176# unroll 2x4-wise, because 64-bit rotates kill Intel P4... 338# unroll 2x4-wise, because 64-bit rotates kill Intel P4...
177for ($i=0;$i<4;$i++) { 339for ($i=0;$i<4;$i++) {
@@ -188,8 +350,8 @@ $code.=<<___;
188 mov $TX[0],$TX[1] 350 mov $TX[0],$TX[1]
189.Lcmov$i: 351.Lcmov$i:
190 add $TX[0]#b,$TY#b 352 add $TX[0]#b,$TY#b
191 xor ($dat,$TY),%al 353 xor ($dat,$TY),%r8b
192 ror \$8,%eax 354 ror \$8,%r8d
193___ 355___
194push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 356push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
195} 357}
@@ -207,16 +369,16 @@ $code.=<<___;
207 mov $TX[0],$TX[1] 369 mov $TX[0],$TX[1]
208.Lcmov$i: 370.Lcmov$i:
209 add $TX[0]#b,$TY#b 371 add $TX[0]#b,$TY#b
210 xor ($dat,$TY),%bl 372 xor ($dat,$TY),%r9b
211 ror \$8,%ebx 373 ror \$8,%r9d
212___ 374___
213push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 375push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
214} 376}
215$code.=<<___; 377$code.=<<___;
216 lea -8($len),$len 378 lea -8($len),$len
217 mov %eax,($out) 379 mov %r8d,($out)
218 lea 8($inp),$inp 380 lea 8($inp),$inp
219 mov %ebx,4($out) 381 mov %r9d,4($out)
220 lea 8($out),$out 382 lea 8($out),$out
221 383
222 test \$-8,$len 384 test \$-8,$len
@@ -229,6 +391,7 @@ $code.=<<___;
229.align 16 391.align 16
230.Lcloop1: 392.Lcloop1:
231 add $TX[0]#b,$YY#b 393 add $TX[0]#b,$YY#b
394 movzb $YY#b,$YY#d
232 movzb ($dat,$YY),$TY#d 395 movzb ($dat,$YY),$TY#d
233 movb $TX[0]#b,($dat,$YY) 396 movb $TX[0]#b,($dat,$YY)
234 movb $TY#b,($dat,$XX[0]) 397 movb $TY#b,($dat,$XX[0])
@@ -260,16 +423,16 @@ $code.=<<___;
260 ret 423 ret
261.size RC4,.-RC4 424.size RC4,.-RC4
262___ 425___
426}
263 427
264$idx="%r8"; 428$idx="%r8";
265$ido="%r9"; 429$ido="%r9";
266 430
267$code.=<<___; 431$code.=<<___;
268.extern OPENSSL_ia32cap_P 432.globl private_RC4_set_key
269.globl RC4_set_key 433.type private_RC4_set_key,\@function,3
270.type RC4_set_key,\@function,3
271.align 16 434.align 16
272RC4_set_key: 435private_RC4_set_key:
273 lea 8($dat),$dat 436 lea 8($dat),$dat
274 lea ($inp,$len),$inp 437 lea ($inp,$len),$inp
275 neg $len 438 neg $len
@@ -280,12 +443,9 @@ RC4_set_key:
280 xor %r11,%r11 443 xor %r11,%r11
281 444
282 mov OPENSSL_ia32cap_P(%rip),$idx#d 445 mov OPENSSL_ia32cap_P(%rip),$idx#d
283 bt \$20,$idx#d 446 bt \$20,$idx#d # RC4_CHAR?
284 jnc .Lw1stloop 447 jc .Lc1stloop
285 bt \$30,$idx#d 448 jmp .Lw1stloop
286 setc $ido#b
287 mov $ido#d,260($dat)
288 jmp .Lc1stloop
289 449
290.align 16 450.align 16
291.Lw1stloop: 451.Lw1stloop:
@@ -339,7 +499,7 @@ RC4_set_key:
339 mov %eax,-8($dat) 499 mov %eax,-8($dat)
340 mov %eax,-4($dat) 500 mov %eax,-4($dat)
341 ret 501 ret
342.size RC4_set_key,.-RC4_set_key 502.size private_RC4_set_key,.-private_RC4_set_key
343 503
344.globl RC4_options 504.globl RC4_options
345.type RC4_options,\@abi-omnipotent 505.type RC4_options,\@abi-omnipotent
@@ -348,18 +508,20 @@ RC4_options:
348 lea .Lopts(%rip),%rax 508 lea .Lopts(%rip),%rax
349 mov OPENSSL_ia32cap_P(%rip),%edx 509 mov OPENSSL_ia32cap_P(%rip),%edx
350 bt \$20,%edx 510 bt \$20,%edx
351 jnc .Ldone 511 jc .L8xchar
352 add \$12,%rax
353 bt \$30,%edx 512 bt \$30,%edx
354 jnc .Ldone 513 jnc .Ldone
355 add \$13,%rax 514 add \$25,%rax
515 ret
516.L8xchar:
517 add \$12,%rax
356.Ldone: 518.Ldone:
357 ret 519 ret
358.align 64 520.align 64
359.Lopts: 521.Lopts:
360.asciz "rc4(8x,int)" 522.asciz "rc4(8x,int)"
361.asciz "rc4(8x,char)" 523.asciz "rc4(8x,char)"
362.asciz "rc4(1x,char)" 524.asciz "rc4(16x,int)"
363.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 525.asciz "RC4 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
364.align 64 526.align 64
365.size RC4_options,.-RC4_options 527.size RC4_options,.-RC4_options
@@ -482,22 +644,32 @@ key_se_handler:
482 .rva .LSEH_end_RC4 644 .rva .LSEH_end_RC4
483 .rva .LSEH_info_RC4 645 .rva .LSEH_info_RC4
484 646
485 .rva .LSEH_begin_RC4_set_key 647 .rva .LSEH_begin_private_RC4_set_key
486 .rva .LSEH_end_RC4_set_key 648 .rva .LSEH_end_private_RC4_set_key
487 .rva .LSEH_info_RC4_set_key 649 .rva .LSEH_info_private_RC4_set_key
488 650
489.section .xdata 651.section .xdata
490.align 8 652.align 8
491.LSEH_info_RC4: 653.LSEH_info_RC4:
492 .byte 9,0,0,0 654 .byte 9,0,0,0
493 .rva stream_se_handler 655 .rva stream_se_handler
494.LSEH_info_RC4_set_key: 656.LSEH_info_private_RC4_set_key:
495 .byte 9,0,0,0 657 .byte 9,0,0,0
496 .rva key_se_handler 658 .rva key_se_handler
497___ 659___
498} 660}
499 661
500$code =~ s/#([bwd])/$1/gm; 662sub reg_part {
663my ($reg,$conv)=@_;
664 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; }
665 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; }
666 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; }
667 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; }
668 return $reg;
669}
670
671$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
672$code =~ s/\`([^\`]*)\`/eval $1/gem;
501 673
502print $code; 674print $code;
503 675
diff --git a/src/lib/libssl/src/crypto/rc4/rc4_utl.c b/src/lib/libssl/src/crypto/rc4/rc4_utl.c
new file mode 100644
index 0000000000..ab3f02fe6a
--- /dev/null
+++ b/src/lib/libssl/src/crypto/rc4/rc4_utl.c
@@ -0,0 +1,62 @@
1/* crypto/rc4/rc4_utl.c -*- mode:C; c-file-style: "eay" -*- */
2/* ====================================================================
3 * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 */
51
52#include <openssl/opensslv.h>
53#include <openssl/crypto.h>
54#include <openssl/rc4.h>
55
56void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
57 {
58#ifdef OPENSSL_FIPS
59 fips_cipher_abort(RC4);
60#endif
61 private_RC4_set_key(key, len, data);
62 }
diff --git a/src/lib/libssl/src/crypto/rsa/rsa_ameth.c b/src/lib/libssl/src/crypto/rsa/rsa_ameth.c
index 8c3209885e..2460910ab2 100644
--- a/src/lib/libssl/src/crypto/rsa/rsa_ameth.c
+++ b/src/lib/libssl/src/crypto/rsa/rsa_ameth.c
@@ -265,6 +265,147 @@ static int rsa_priv_print(BIO *bp, const EVP_PKEY *pkey, int indent,
265 return do_rsa_print(bp, pkey->pkey.rsa, indent, 1); 265 return do_rsa_print(bp, pkey->pkey.rsa, indent, 1);
266 } 266 }
267 267
268static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg,
269 X509_ALGOR **pmaskHash)
270 {
271 const unsigned char *p;
272 int plen;
273 RSA_PSS_PARAMS *pss;
274
275 *pmaskHash = NULL;
276
277 if (!alg->parameter || alg->parameter->type != V_ASN1_SEQUENCE)
278 return NULL;
279 p = alg->parameter->value.sequence->data;
280 plen = alg->parameter->value.sequence->length;
281 pss = d2i_RSA_PSS_PARAMS(NULL, &p, plen);
282
283 if (!pss)
284 return NULL;
285
286 if (pss->maskGenAlgorithm)
287 {
288 ASN1_TYPE *param = pss->maskGenAlgorithm->parameter;
289 if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) == NID_mgf1
290 && param->type == V_ASN1_SEQUENCE)
291 {
292 p = param->value.sequence->data;
293 plen = param->value.sequence->length;
294 *pmaskHash = d2i_X509_ALGOR(NULL, &p, plen);
295 }
296 }
297
298 return pss;
299 }
300
301static int rsa_pss_param_print(BIO *bp, RSA_PSS_PARAMS *pss,
302 X509_ALGOR *maskHash, int indent)
303 {
304 int rv = 0;
305 if (!pss)
306 {
307 if (BIO_puts(bp, " (INVALID PSS PARAMETERS)\n") <= 0)
308 return 0;
309 return 1;
310 }
311 if (BIO_puts(bp, "\n") <= 0)
312 goto err;
313 if (!BIO_indent(bp, indent, 128))
314 goto err;
315 if (BIO_puts(bp, "Hash Algorithm: ") <= 0)
316 goto err;
317
318 if (pss->hashAlgorithm)
319 {
320 if (i2a_ASN1_OBJECT(bp, pss->hashAlgorithm->algorithm) <= 0)
321 goto err;
322 }
323 else if (BIO_puts(bp, "sha1 (default)") <= 0)
324 goto err;
325
326 if (BIO_puts(bp, "\n") <= 0)
327 goto err;
328
329 if (!BIO_indent(bp, indent, 128))
330 goto err;
331
332 if (BIO_puts(bp, "Mask Algorithm: ") <= 0)
333 goto err;
334 if (pss->maskGenAlgorithm)
335 {
336 if (i2a_ASN1_OBJECT(bp, pss->maskGenAlgorithm->algorithm) <= 0)
337 goto err;
338 if (BIO_puts(bp, " with ") <= 0)
339 goto err;
340 if (maskHash)
341 {
342 if (i2a_ASN1_OBJECT(bp, maskHash->algorithm) <= 0)
343 goto err;
344 }
345 else if (BIO_puts(bp, "INVALID") <= 0)
346 goto err;
347 }
348 else if (BIO_puts(bp, "mgf1 with sha1 (default)") <= 0)
349 goto err;
350 BIO_puts(bp, "\n");
351
352 if (!BIO_indent(bp, indent, 128))
353 goto err;
354 if (BIO_puts(bp, "Salt Length: ") <= 0)
355 goto err;
356 if (pss->saltLength)
357 {
358 if (i2a_ASN1_INTEGER(bp, pss->saltLength) <= 0)
359 goto err;
360 }
361 else if (BIO_puts(bp, "20 (default)") <= 0)
362 goto err;
363 BIO_puts(bp, "\n");
364
365 if (!BIO_indent(bp, indent, 128))
366 goto err;
367 if (BIO_puts(bp, "Trailer Field: ") <= 0)
368 goto err;
369 if (pss->trailerField)
370 {
371 if (i2a_ASN1_INTEGER(bp, pss->trailerField) <= 0)
372 goto err;
373 }
374 else if (BIO_puts(bp, "0xbc (default)") <= 0)
375 goto err;
376 BIO_puts(bp, "\n");
377
378 rv = 1;
379
380 err:
381 return rv;
382
383 }
384
385static int rsa_sig_print(BIO *bp, const X509_ALGOR *sigalg,
386 const ASN1_STRING *sig,
387 int indent, ASN1_PCTX *pctx)
388 {
389 if (OBJ_obj2nid(sigalg->algorithm) == NID_rsassaPss)
390 {
391 int rv;
392 RSA_PSS_PARAMS *pss;
393 X509_ALGOR *maskHash;
394 pss = rsa_pss_decode(sigalg, &maskHash);
395 rv = rsa_pss_param_print(bp, pss, maskHash, indent);
396 if (pss)
397 RSA_PSS_PARAMS_free(pss);
398 if (maskHash)
399 X509_ALGOR_free(maskHash);
400 if (!rv)
401 return 0;
402 }
403 else if (!sig && BIO_puts(bp, "\n") <= 0)
404 return 0;
405 if (sig)
406 return X509_signature_dump(bp, sig, indent);
407 return 1;
408 }
268 409
269static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) 410static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
270 { 411 {
@@ -310,6 +451,211 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
310 451
311 } 452 }
312 453
454/* Customised RSA item verification routine. This is called
455 * when a signature is encountered requiring special handling. We
456 * currently only handle PSS.
457 */
458
459
460static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
461 X509_ALGOR *sigalg, ASN1_BIT_STRING *sig,
462 EVP_PKEY *pkey)
463 {
464 int rv = -1;
465 int saltlen;
466 const EVP_MD *mgf1md = NULL, *md = NULL;
467 RSA_PSS_PARAMS *pss;
468 X509_ALGOR *maskHash;
469 EVP_PKEY_CTX *pkctx;
470 /* Sanity check: make sure it is PSS */
471 if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss)
472 {
473 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE);
474 return -1;
475 }
476 /* Decode PSS parameters */
477 pss = rsa_pss_decode(sigalg, &maskHash);
478
479 if (pss == NULL)
480 {
481 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_PSS_PARAMETERS);
482 goto err;
483 }
484 /* Check mask and lookup mask hash algorithm */
485 if (pss->maskGenAlgorithm)
486 {
487 if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) != NID_mgf1)
488 {
489 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_ALGORITHM);
490 goto err;
491 }
492 if (!maskHash)
493 {
494 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_PARAMETER);
495 goto err;
496 }
497 mgf1md = EVP_get_digestbyobj(maskHash->algorithm);
498 if (mgf1md == NULL)
499 {
500 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_MASK_DIGEST);
501 goto err;
502 }
503 }
504 else
505 mgf1md = EVP_sha1();
506
507 if (pss->hashAlgorithm)
508 {
509 md = EVP_get_digestbyobj(pss->hashAlgorithm->algorithm);
510 if (md == NULL)
511 {
512 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_PSS_DIGEST);
513 goto err;
514 }
515 }
516 else
517 md = EVP_sha1();
518
519 if (pss->saltLength)
520 {
521 saltlen = ASN1_INTEGER_get(pss->saltLength);
522
523 /* Could perform more salt length sanity checks but the main
524 * RSA routines will trap other invalid values anyway.
525 */
526 if (saltlen < 0)
527 {
528 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_SALT_LENGTH);
529 goto err;
530 }
531 }
532 else
533 saltlen = 20;
534
535 /* low-level routines support only trailer field 0xbc (value 1)
536 * and PKCS#1 says we should reject any other value anyway.
537 */
538 if (pss->trailerField && ASN1_INTEGER_get(pss->trailerField) != 1)
539 {
540 RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_TRAILER);
541 goto err;
542 }
543
544 /* We have all parameters now set up context */
545
546 if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey))
547 goto err;
548
549 if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_PSS_PADDING) <= 0)
550 goto err;
551
552 if (EVP_PKEY_CTX_set_rsa_pss_saltlen(pkctx, saltlen) <= 0)
553 goto err;
554
555 if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0)
556 goto err;
557 /* Carry on */
558 rv = 2;
559
560 err:
561 RSA_PSS_PARAMS_free(pss);
562 if (maskHash)
563 X509_ALGOR_free(maskHash);
564 return rv;
565 }
566
567static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
568 X509_ALGOR *alg1, X509_ALGOR *alg2,
569 ASN1_BIT_STRING *sig)
570 {
571 int pad_mode;
572 EVP_PKEY_CTX *pkctx = ctx->pctx;
573 if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0)
574 return 0;
575 if (pad_mode == RSA_PKCS1_PADDING)
576 return 2;
577 if (pad_mode == RSA_PKCS1_PSS_PADDING)
578 {
579 const EVP_MD *sigmd, *mgf1md;
580 RSA_PSS_PARAMS *pss = NULL;
581 X509_ALGOR *mgf1alg = NULL;
582 ASN1_STRING *os1 = NULL, *os2 = NULL;
583 EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx);
584 int saltlen, rv = 0;
585 sigmd = EVP_MD_CTX_md(ctx);
586 if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0)
587 goto err;
588 if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen))
589 goto err;
590 if (saltlen == -1)
591 saltlen = EVP_MD_size(sigmd);
592 else if (saltlen == -2)
593 {
594 saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2;
595 if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0)
596 saltlen--;
597 }
598 pss = RSA_PSS_PARAMS_new();
599 if (!pss)
600 goto err;
601 if (saltlen != 20)
602 {
603 pss->saltLength = ASN1_INTEGER_new();
604 if (!pss->saltLength)
605 goto err;
606 if (!ASN1_INTEGER_set(pss->saltLength, saltlen))
607 goto err;
608 }
609 if (EVP_MD_type(sigmd) != NID_sha1)
610 {
611 pss->hashAlgorithm = X509_ALGOR_new();
612 if (!pss->hashAlgorithm)
613 goto err;
614 X509_ALGOR_set_md(pss->hashAlgorithm, sigmd);
615 }
616 if (EVP_MD_type(mgf1md) != NID_sha1)
617 {
618 ASN1_STRING *stmp = NULL;
619 /* need to embed algorithm ID inside another */
620 mgf1alg = X509_ALGOR_new();
621 X509_ALGOR_set_md(mgf1alg, mgf1md);
622 if (!ASN1_item_pack(mgf1alg, ASN1_ITEM_rptr(X509_ALGOR),
623 &stmp))
624 goto err;
625 pss->maskGenAlgorithm = X509_ALGOR_new();
626 if (!pss->maskGenAlgorithm)
627 goto err;
628 X509_ALGOR_set0(pss->maskGenAlgorithm,
629 OBJ_nid2obj(NID_mgf1),
630 V_ASN1_SEQUENCE, stmp);
631 }
632 /* Finally create string with pss parameter encoding. */
633 if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os1))
634 goto err;
635 if (alg2)
636 {
637 os2 = ASN1_STRING_dup(os1);
638 if (!os2)
639 goto err;
640 X509_ALGOR_set0(alg2, OBJ_nid2obj(NID_rsassaPss),
641 V_ASN1_SEQUENCE, os2);
642 }
643 X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_rsassaPss),
644 V_ASN1_SEQUENCE, os1);
645 os1 = os2 = NULL;
646 rv = 3;
647 err:
648 if (mgf1alg)
649 X509_ALGOR_free(mgf1alg);
650 if (pss)
651 RSA_PSS_PARAMS_free(pss);
652 if (os1)
653 ASN1_STRING_free(os1);
654 return rv;
655
656 }
657 return 2;
658 }
313 659
314const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = 660const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] =
315 { 661 {
@@ -335,10 +681,13 @@ const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] =
335 681
336 0,0,0,0,0,0, 682 0,0,0,0,0,0,
337 683
684 rsa_sig_print,
338 int_rsa_free, 685 int_rsa_free,
339 rsa_pkey_ctrl, 686 rsa_pkey_ctrl,
340 old_rsa_priv_decode, 687 old_rsa_priv_decode,
341 old_rsa_priv_encode 688 old_rsa_priv_encode,
689 rsa_item_verify,
690 rsa_item_sign
342 }, 691 },
343 692
344 { 693 {
diff --git a/src/lib/libssl/src/crypto/rsa/rsa_crpt.c b/src/lib/libssl/src/crypto/rsa/rsa_crpt.c
new file mode 100644
index 0000000000..d3e44785dc
--- /dev/null
+++ b/src/lib/libssl/src/crypto/rsa/rsa_crpt.c
@@ -0,0 +1,257 @@
1/* crypto/rsa/rsa_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <openssl/crypto.h>
61#include "cryptlib.h"
62#include <openssl/lhash.h>
63#include <openssl/bn.h>
64#include <openssl/rsa.h>
65#include <openssl/rand.h>
66#ifndef OPENSSL_NO_ENGINE
67#include <openssl/engine.h>
68#endif
69
70int RSA_size(const RSA *r)
71 {
72 return(BN_num_bytes(r->n));
73 }
74
75int RSA_public_encrypt(int flen, const unsigned char *from, unsigned char *to,
76 RSA *rsa, int padding)
77 {
78#ifdef OPENSSL_FIPS
79 if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD)
80 && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW))
81 {
82 RSAerr(RSA_F_RSA_PUBLIC_ENCRYPT, RSA_R_NON_FIPS_RSA_METHOD);
83 return -1;
84 }
85#endif
86 return(rsa->meth->rsa_pub_enc(flen, from, to, rsa, padding));
87 }
88
89int RSA_private_encrypt(int flen, const unsigned char *from, unsigned char *to,
90 RSA *rsa, int padding)
91 {
92#ifdef OPENSSL_FIPS
93 if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD)
94 && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW))
95 {
96 RSAerr(RSA_F_RSA_PRIVATE_ENCRYPT, RSA_R_NON_FIPS_RSA_METHOD);
97 return -1;
98 }
99#endif
100 return(rsa->meth->rsa_priv_enc(flen, from, to, rsa, padding));
101 }
102
103int RSA_private_decrypt(int flen, const unsigned char *from, unsigned char *to,
104 RSA *rsa, int padding)
105 {
106#ifdef OPENSSL_FIPS
107 if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD)
108 && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW))
109 {
110 RSAerr(RSA_F_RSA_PRIVATE_DECRYPT, RSA_R_NON_FIPS_RSA_METHOD);
111 return -1;
112 }
113#endif
114 return(rsa->meth->rsa_priv_dec(flen, from, to, rsa, padding));
115 }
116
117int RSA_public_decrypt(int flen, const unsigned char *from, unsigned char *to,
118 RSA *rsa, int padding)
119 {
120#ifdef OPENSSL_FIPS
121 if (FIPS_mode() && !(rsa->meth->flags & RSA_FLAG_FIPS_METHOD)
122 && !(rsa->flags & RSA_FLAG_NON_FIPS_ALLOW))
123 {
124 RSAerr(RSA_F_RSA_PUBLIC_DECRYPT, RSA_R_NON_FIPS_RSA_METHOD);
125 return -1;
126 }
127#endif
128 return(rsa->meth->rsa_pub_dec(flen, from, to, rsa, padding));
129 }
130
131int RSA_flags(const RSA *r)
132 {
133 return((r == NULL)?0:r->meth->flags);
134 }
135
136void RSA_blinding_off(RSA *rsa)
137 {
138 if (rsa->blinding != NULL)
139 {
140 BN_BLINDING_free(rsa->blinding);
141 rsa->blinding=NULL;
142 }
143 rsa->flags &= ~RSA_FLAG_BLINDING;
144 rsa->flags |= RSA_FLAG_NO_BLINDING;
145 }
146
147int RSA_blinding_on(RSA *rsa, BN_CTX *ctx)
148 {
149 int ret=0;
150
151 if (rsa->blinding != NULL)
152 RSA_blinding_off(rsa);
153
154 rsa->blinding = RSA_setup_blinding(rsa, ctx);
155 if (rsa->blinding == NULL)
156 goto err;
157
158 rsa->flags |= RSA_FLAG_BLINDING;
159 rsa->flags &= ~RSA_FLAG_NO_BLINDING;
160 ret=1;
161err:
162 return(ret);
163 }
164
165static BIGNUM *rsa_get_public_exp(const BIGNUM *d, const BIGNUM *p,
166 const BIGNUM *q, BN_CTX *ctx)
167{
168 BIGNUM *ret = NULL, *r0, *r1, *r2;
169
170 if (d == NULL || p == NULL || q == NULL)
171 return NULL;
172
173 BN_CTX_start(ctx);
174 r0 = BN_CTX_get(ctx);
175 r1 = BN_CTX_get(ctx);
176 r2 = BN_CTX_get(ctx);
177 if (r2 == NULL)
178 goto err;
179
180 if (!BN_sub(r1, p, BN_value_one())) goto err;
181 if (!BN_sub(r2, q, BN_value_one())) goto err;
182 if (!BN_mul(r0, r1, r2, ctx)) goto err;
183
184 ret = BN_mod_inverse(NULL, d, r0, ctx);
185err:
186 BN_CTX_end(ctx);
187 return ret;
188}
189
190BN_BLINDING *RSA_setup_blinding(RSA *rsa, BN_CTX *in_ctx)
191{
192 BIGNUM local_n;
193 BIGNUM *e,*n;
194 BN_CTX *ctx;
195 BN_BLINDING *ret = NULL;
196
197 if (in_ctx == NULL)
198 {
199 if ((ctx = BN_CTX_new()) == NULL) return 0;
200 }
201 else
202 ctx = in_ctx;
203
204 BN_CTX_start(ctx);
205 e = BN_CTX_get(ctx);
206 if (e == NULL)
207 {
208 RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_MALLOC_FAILURE);
209 goto err;
210 }
211
212 if (rsa->e == NULL)
213 {
214 e = rsa_get_public_exp(rsa->d, rsa->p, rsa->q, ctx);
215 if (e == NULL)
216 {
217 RSAerr(RSA_F_RSA_SETUP_BLINDING, RSA_R_NO_PUBLIC_EXPONENT);
218 goto err;
219 }
220 }
221 else
222 e = rsa->e;
223
224
225 if ((RAND_status() == 0) && rsa->d != NULL && rsa->d->d != NULL)
226 {
227 /* if PRNG is not properly seeded, resort to secret
228 * exponent as unpredictable seed */
229 RAND_add(rsa->d->d, rsa->d->dmax * sizeof rsa->d->d[0], 0.0);
230 }
231
232 if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
233 {
234 /* Set BN_FLG_CONSTTIME flag */
235 n = &local_n;
236 BN_with_flags(n, rsa->n, BN_FLG_CONSTTIME);
237 }
238 else
239 n = rsa->n;
240
241 ret = BN_BLINDING_create_param(NULL, e, n, ctx,
242 rsa->meth->bn_mod_exp, rsa->_method_mod_n);
243 if (ret == NULL)
244 {
245 RSAerr(RSA_F_RSA_SETUP_BLINDING, ERR_R_BN_LIB);
246 goto err;
247 }
248 CRYPTO_THREADID_current(BN_BLINDING_thread_id(ret));
249err:
250 BN_CTX_end(ctx);
251 if (in_ctx == NULL)
252 BN_CTX_free(ctx);
253 if(rsa->e == NULL)
254 BN_free(e);
255
256 return ret;
257}
diff --git a/src/lib/libssl/src/crypto/rsa/rsa_pmeth.c b/src/lib/libssl/src/crypto/rsa/rsa_pmeth.c
index c6892ecd09..5b2ecf56ad 100644
--- a/src/lib/libssl/src/crypto/rsa/rsa_pmeth.c
+++ b/src/lib/libssl/src/crypto/rsa/rsa_pmeth.c
@@ -63,6 +63,12 @@
63#include <openssl/rsa.h> 63#include <openssl/rsa.h>
64#include <openssl/bn.h> 64#include <openssl/bn.h>
65#include <openssl/evp.h> 65#include <openssl/evp.h>
66#ifndef OPENSSL_NO_CMS
67#include <openssl/cms.h>
68#endif
69#ifdef OPENSSL_FIPS
70#include <openssl/fips.h>
71#endif
66#include "evp_locl.h" 72#include "evp_locl.h"
67#include "rsa_locl.h" 73#include "rsa_locl.h"
68 74
@@ -79,6 +85,8 @@ typedef struct
79 int pad_mode; 85 int pad_mode;
80 /* message digest */ 86 /* message digest */
81 const EVP_MD *md; 87 const EVP_MD *md;
88 /* message digest for MGF1 */
89 const EVP_MD *mgf1md;
82 /* PSS/OAEP salt length */ 90 /* PSS/OAEP salt length */
83 int saltlen; 91 int saltlen;
84 /* Temp buffer */ 92 /* Temp buffer */
@@ -95,6 +103,7 @@ static int pkey_rsa_init(EVP_PKEY_CTX *ctx)
95 rctx->pub_exp = NULL; 103 rctx->pub_exp = NULL;
96 rctx->pad_mode = RSA_PKCS1_PADDING; 104 rctx->pad_mode = RSA_PKCS1_PADDING;
97 rctx->md = NULL; 105 rctx->md = NULL;
106 rctx->mgf1md = NULL;
98 rctx->tbuf = NULL; 107 rctx->tbuf = NULL;
99 108
100 rctx->saltlen = -2; 109 rctx->saltlen = -2;
@@ -147,6 +156,31 @@ static void pkey_rsa_cleanup(EVP_PKEY_CTX *ctx)
147 OPENSSL_free(rctx); 156 OPENSSL_free(rctx);
148 } 157 }
149 } 158 }
159#ifdef OPENSSL_FIPS
160/* FIP checker. Return value indicates status of context parameters:
161 * 1 : redirect to FIPS.
162 * 0 : don't redirect to FIPS.
163 * -1 : illegal operation in FIPS mode.
164 */
165
166static int pkey_fips_check_ctx(EVP_PKEY_CTX *ctx)
167 {
168 RSA_PKEY_CTX *rctx = ctx->data;
169 RSA *rsa = ctx->pkey->pkey.rsa;
170 int rv = -1;
171 if (!FIPS_mode())
172 return 0;
173 if (rsa->flags & RSA_FLAG_NON_FIPS_ALLOW)
174 rv = 0;
175 if (!(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) && rv)
176 return -1;
177 if (rctx->md && !(rctx->md->flags & EVP_MD_FLAG_FIPS))
178 return rv;
179 if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS))
180 return rv;
181 return 1;
182 }
183#endif
150 184
151static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen, 185static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
152 const unsigned char *tbs, size_t tbslen) 186 const unsigned char *tbs, size_t tbslen)
@@ -155,6 +189,15 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
155 RSA_PKEY_CTX *rctx = ctx->data; 189 RSA_PKEY_CTX *rctx = ctx->data;
156 RSA *rsa = ctx->pkey->pkey.rsa; 190 RSA *rsa = ctx->pkey->pkey.rsa;
157 191
192#ifdef OPENSSL_FIPS
193 ret = pkey_fips_check_ctx(ctx);
194 if (ret < 0)
195 {
196 RSAerr(RSA_F_PKEY_RSA_SIGN, RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE);
197 return -1;
198 }
199#endif
200
158 if (rctx->md) 201 if (rctx->md)
159 { 202 {
160 if (tbslen != (size_t)EVP_MD_size(rctx->md)) 203 if (tbslen != (size_t)EVP_MD_size(rctx->md))
@@ -163,7 +206,36 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
163 RSA_R_INVALID_DIGEST_LENGTH); 206 RSA_R_INVALID_DIGEST_LENGTH);
164 return -1; 207 return -1;
165 } 208 }
166 if (rctx->pad_mode == RSA_X931_PADDING) 209#ifdef OPENSSL_FIPS
210 if (ret > 0)
211 {
212 unsigned int slen;
213 ret = FIPS_rsa_sign_digest(rsa, tbs, tbslen, rctx->md,
214 rctx->pad_mode,
215 rctx->saltlen,
216 rctx->mgf1md,
217 sig, &slen);
218 if (ret > 0)
219 *siglen = slen;
220 else
221 *siglen = 0;
222 return ret;
223 }
224#endif
225
226 if (EVP_MD_type(rctx->md) == NID_mdc2)
227 {
228 unsigned int sltmp;
229 if (rctx->pad_mode != RSA_PKCS1_PADDING)
230 return -1;
231 ret = RSA_sign_ASN1_OCTET_STRING(NID_mdc2,
232 tbs, tbslen, sig, &sltmp, rsa);
233
234 if (ret <= 0)
235 return ret;
236 ret = sltmp;
237 }
238 else if (rctx->pad_mode == RSA_X931_PADDING)
167 { 239 {
168 if (!setup_tbuf(rctx, ctx)) 240 if (!setup_tbuf(rctx, ctx))
169 return -1; 241 return -1;
@@ -186,8 +258,10 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, size_t *siglen,
186 { 258 {
187 if (!setup_tbuf(rctx, ctx)) 259 if (!setup_tbuf(rctx, ctx))
188 return -1; 260 return -1;
189 if (!RSA_padding_add_PKCS1_PSS(rsa, rctx->tbuf, tbs, 261 if (!RSA_padding_add_PKCS1_PSS_mgf1(rsa,
190 rctx->md, rctx->saltlen)) 262 rctx->tbuf, tbs,
263 rctx->md, rctx->mgf1md,
264 rctx->saltlen))
191 return -1; 265 return -1;
192 ret = RSA_private_encrypt(RSA_size(rsa), rctx->tbuf, 266 ret = RSA_private_encrypt(RSA_size(rsa), rctx->tbuf,
193 sig, rsa, RSA_NO_PADDING); 267 sig, rsa, RSA_NO_PADDING);
@@ -269,8 +343,30 @@ static int pkey_rsa_verify(EVP_PKEY_CTX *ctx,
269 RSA_PKEY_CTX *rctx = ctx->data; 343 RSA_PKEY_CTX *rctx = ctx->data;
270 RSA *rsa = ctx->pkey->pkey.rsa; 344 RSA *rsa = ctx->pkey->pkey.rsa;
271 size_t rslen; 345 size_t rslen;
346#ifdef OPENSSL_FIPS
347 int rv;
348 rv = pkey_fips_check_ctx(ctx);
349 if (rv < 0)
350 {
351 RSAerr(RSA_F_PKEY_RSA_VERIFY, RSA_R_OPERATION_NOT_ALLOWED_IN_FIPS_MODE);
352 return -1;
353 }
354#endif
272 if (rctx->md) 355 if (rctx->md)
273 { 356 {
357#ifdef OPENSSL_FIPS
358 if (rv > 0)
359 {
360 return FIPS_rsa_verify_digest(rsa,
361 tbs, tbslen,
362 rctx->md,
363 rctx->pad_mode,
364 rctx->saltlen,
365 rctx->mgf1md,
366 sig, siglen);
367
368 }
369#endif
274 if (rctx->pad_mode == RSA_PKCS1_PADDING) 370 if (rctx->pad_mode == RSA_PKCS1_PADDING)
275 return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen, 371 return RSA_verify(EVP_MD_type(rctx->md), tbs, tbslen,
276 sig, siglen, rsa); 372 sig, siglen, rsa);
@@ -289,7 +385,8 @@ static int pkey_rsa_verify(EVP_PKEY_CTX *ctx,
289 rsa, RSA_NO_PADDING); 385 rsa, RSA_NO_PADDING);
290 if (ret <= 0) 386 if (ret <= 0)
291 return 0; 387 return 0;
292 ret = RSA_verify_PKCS1_PSS(rsa, tbs, rctx->md, 388 ret = RSA_verify_PKCS1_PSS_mgf1(rsa, tbs,
389 rctx->md, rctx->mgf1md,
293 rctx->tbuf, rctx->saltlen); 390 rctx->tbuf, rctx->saltlen);
294 if (ret <= 0) 391 if (ret <= 0)
295 return 0; 392 return 0;
@@ -403,15 +500,25 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
403 RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE); 500 RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE);
404 return -2; 501 return -2;
405 502
503 case EVP_PKEY_CTRL_GET_RSA_PADDING:
504 *(int *)p2 = rctx->pad_mode;
505 return 1;
506
406 case EVP_PKEY_CTRL_RSA_PSS_SALTLEN: 507 case EVP_PKEY_CTRL_RSA_PSS_SALTLEN:
407 if (p1 < -2) 508 case EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN:
408 return -2;
409 if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) 509 if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING)
410 { 510 {
411 RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PSS_SALTLEN); 511 RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PSS_SALTLEN);
412 return -2; 512 return -2;
413 } 513 }
414 rctx->saltlen = p1; 514 if (type == EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN)
515 *(int *)p2 = rctx->saltlen;
516 else
517 {
518 if (p1 < -2)
519 return -2;
520 rctx->saltlen = p1;
521 }
415 return 1; 522 return 1;
416 523
417 case EVP_PKEY_CTRL_RSA_KEYGEN_BITS: 524 case EVP_PKEY_CTRL_RSA_KEYGEN_BITS:
@@ -435,16 +542,45 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
435 rctx->md = p2; 542 rctx->md = p2;
436 return 1; 543 return 1;
437 544
545 case EVP_PKEY_CTRL_RSA_MGF1_MD:
546 case EVP_PKEY_CTRL_GET_RSA_MGF1_MD:
547 if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING)
548 {
549 RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_MGF1_MD);
550 return -2;
551 }
552 if (type == EVP_PKEY_CTRL_GET_RSA_MGF1_MD)
553 {
554 if (rctx->mgf1md)
555 *(const EVP_MD **)p2 = rctx->mgf1md;
556 else
557 *(const EVP_MD **)p2 = rctx->md;
558 }
559 else
560 rctx->mgf1md = p2;
561 return 1;
562
438 case EVP_PKEY_CTRL_DIGESTINIT: 563 case EVP_PKEY_CTRL_DIGESTINIT:
439 case EVP_PKEY_CTRL_PKCS7_ENCRYPT: 564 case EVP_PKEY_CTRL_PKCS7_ENCRYPT:
440 case EVP_PKEY_CTRL_PKCS7_DECRYPT: 565 case EVP_PKEY_CTRL_PKCS7_DECRYPT:
441 case EVP_PKEY_CTRL_PKCS7_SIGN: 566 case EVP_PKEY_CTRL_PKCS7_SIGN:
567 return 1;
442#ifndef OPENSSL_NO_CMS 568#ifndef OPENSSL_NO_CMS
443 case EVP_PKEY_CTRL_CMS_ENCRYPT:
444 case EVP_PKEY_CTRL_CMS_DECRYPT: 569 case EVP_PKEY_CTRL_CMS_DECRYPT:
570 {
571 X509_ALGOR *alg = NULL;
572 ASN1_OBJECT *encalg = NULL;
573 if (p2)
574 CMS_RecipientInfo_ktri_get0_algs(p2, NULL, NULL, &alg);
575 if (alg)
576 X509_ALGOR_get0(&encalg, NULL, NULL, alg);
577 if (encalg && OBJ_obj2nid(encalg) == NID_rsaesOaep)
578 rctx->pad_mode = RSA_PKCS1_OAEP_PADDING;
579 }
580 case EVP_PKEY_CTRL_CMS_ENCRYPT:
445 case EVP_PKEY_CTRL_CMS_SIGN: 581 case EVP_PKEY_CTRL_CMS_SIGN:
446#endif
447 return 1; 582 return 1;
583#endif
448 case EVP_PKEY_CTRL_PEER_KEY: 584 case EVP_PKEY_CTRL_PEER_KEY:
449 RSAerr(RSA_F_PKEY_RSA_CTRL, 585 RSAerr(RSA_F_PKEY_RSA_CTRL,
450 RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); 586 RSA_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE);
diff --git a/src/lib/libssl/src/crypto/rsa/rsa_pss.c b/src/lib/libssl/src/crypto/rsa/rsa_pss.c
index ac211e2ffe..5f9f533d0c 100644
--- a/src/lib/libssl/src/crypto/rsa/rsa_pss.c
+++ b/src/lib/libssl/src/crypto/rsa/rsa_pss.c
@@ -73,6 +73,13 @@ static const unsigned char zeroes[] = {0,0,0,0,0,0,0,0};
73int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash, 73int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
74 const EVP_MD *Hash, const unsigned char *EM, int sLen) 74 const EVP_MD *Hash, const unsigned char *EM, int sLen)
75 { 75 {
76 return RSA_verify_PKCS1_PSS_mgf1(rsa, mHash, Hash, NULL, EM, sLen);
77 }
78
79int RSA_verify_PKCS1_PSS_mgf1(RSA *rsa, const unsigned char *mHash,
80 const EVP_MD *Hash, const EVP_MD *mgf1Hash,
81 const unsigned char *EM, int sLen)
82 {
76 int i; 83 int i;
77 int ret = 0; 84 int ret = 0;
78 int hLen, maskedDBLen, MSBits, emLen; 85 int hLen, maskedDBLen, MSBits, emLen;
@@ -80,6 +87,10 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
80 unsigned char *DB = NULL; 87 unsigned char *DB = NULL;
81 EVP_MD_CTX ctx; 88 EVP_MD_CTX ctx;
82 unsigned char H_[EVP_MAX_MD_SIZE]; 89 unsigned char H_[EVP_MAX_MD_SIZE];
90 EVP_MD_CTX_init(&ctx);
91
92 if (mgf1Hash == NULL)
93 mgf1Hash = Hash;
83 94
84 hLen = EVP_MD_size(Hash); 95 hLen = EVP_MD_size(Hash);
85 if (hLen < 0) 96 if (hLen < 0)
@@ -94,7 +105,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
94 else if (sLen == -2) sLen = -2; 105 else if (sLen == -2) sLen = -2;
95 else if (sLen < -2) 106 else if (sLen < -2)
96 { 107 {
97 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); 108 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED);
98 goto err; 109 goto err;
99 } 110 }
100 111
@@ -102,7 +113,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
102 emLen = RSA_size(rsa); 113 emLen = RSA_size(rsa);
103 if (EM[0] & (0xFF << MSBits)) 114 if (EM[0] & (0xFF << MSBits))
104 { 115 {
105 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_FIRST_OCTET_INVALID); 116 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_FIRST_OCTET_INVALID);
106 goto err; 117 goto err;
107 } 118 }
108 if (MSBits == 0) 119 if (MSBits == 0)
@@ -112,12 +123,12 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
112 } 123 }
113 if (emLen < (hLen + sLen + 2)) /* sLen can be small negative */ 124 if (emLen < (hLen + sLen + 2)) /* sLen can be small negative */
114 { 125 {
115 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_DATA_TOO_LARGE); 126 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_DATA_TOO_LARGE);
116 goto err; 127 goto err;
117 } 128 }
118 if (EM[emLen - 1] != 0xbc) 129 if (EM[emLen - 1] != 0xbc)
119 { 130 {
120 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_LAST_OCTET_INVALID); 131 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_LAST_OCTET_INVALID);
121 goto err; 132 goto err;
122 } 133 }
123 maskedDBLen = emLen - hLen - 1; 134 maskedDBLen = emLen - hLen - 1;
@@ -125,10 +136,10 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
125 DB = OPENSSL_malloc(maskedDBLen); 136 DB = OPENSSL_malloc(maskedDBLen);
126 if (!DB) 137 if (!DB)
127 { 138 {
128 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, ERR_R_MALLOC_FAILURE); 139 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, ERR_R_MALLOC_FAILURE);
129 goto err; 140 goto err;
130 } 141 }
131 if (PKCS1_MGF1(DB, maskedDBLen, H, hLen, Hash) < 0) 142 if (PKCS1_MGF1(DB, maskedDBLen, H, hLen, mgf1Hash) < 0)
132 goto err; 143 goto err;
133 for (i = 0; i < maskedDBLen; i++) 144 for (i = 0; i < maskedDBLen; i++)
134 DB[i] ^= EM[i]; 145 DB[i] ^= EM[i];
@@ -137,25 +148,28 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
137 for (i = 0; DB[i] == 0 && i < (maskedDBLen-1); i++) ; 148 for (i = 0; DB[i] == 0 && i < (maskedDBLen-1); i++) ;
138 if (DB[i++] != 0x1) 149 if (DB[i++] != 0x1)
139 { 150 {
140 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_RECOVERY_FAILED); 151 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_RECOVERY_FAILED);
141 goto err; 152 goto err;
142 } 153 }
143 if (sLen >= 0 && (maskedDBLen - i) != sLen) 154 if (sLen >= 0 && (maskedDBLen - i) != sLen)
144 { 155 {
145 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); 156 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED);
146 goto err; 157 goto err;
147 } 158 }
148 EVP_MD_CTX_init(&ctx); 159 if (!EVP_DigestInit_ex(&ctx, Hash, NULL)
149 EVP_DigestInit_ex(&ctx, Hash, NULL); 160 || !EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes)
150 EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes); 161 || !EVP_DigestUpdate(&ctx, mHash, hLen))
151 EVP_DigestUpdate(&ctx, mHash, hLen); 162 goto err;
152 if (maskedDBLen - i) 163 if (maskedDBLen - i)
153 EVP_DigestUpdate(&ctx, DB + i, maskedDBLen - i); 164 {
154 EVP_DigestFinal(&ctx, H_, NULL); 165 if (!EVP_DigestUpdate(&ctx, DB + i, maskedDBLen - i))
155 EVP_MD_CTX_cleanup(&ctx); 166 goto err;
167 }
168 if (!EVP_DigestFinal_ex(&ctx, H_, NULL))
169 goto err;
156 if (memcmp(H_, H, hLen)) 170 if (memcmp(H_, H, hLen))
157 { 171 {
158 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS, RSA_R_BAD_SIGNATURE); 172 RSAerr(RSA_F_RSA_VERIFY_PKCS1_PSS_MGF1, RSA_R_BAD_SIGNATURE);
159 ret = 0; 173 ret = 0;
160 } 174 }
161 else 175 else
@@ -164,6 +178,7 @@ int RSA_verify_PKCS1_PSS(RSA *rsa, const unsigned char *mHash,
164 err: 178 err:
165 if (DB) 179 if (DB)
166 OPENSSL_free(DB); 180 OPENSSL_free(DB);
181 EVP_MD_CTX_cleanup(&ctx);
167 182
168 return ret; 183 return ret;
169 184
@@ -173,12 +188,22 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM,
173 const unsigned char *mHash, 188 const unsigned char *mHash,
174 const EVP_MD *Hash, int sLen) 189 const EVP_MD *Hash, int sLen)
175 { 190 {
191 return RSA_padding_add_PKCS1_PSS_mgf1(rsa, EM, mHash, Hash, NULL, sLen);
192 }
193
194int RSA_padding_add_PKCS1_PSS_mgf1(RSA *rsa, unsigned char *EM,
195 const unsigned char *mHash,
196 const EVP_MD *Hash, const EVP_MD *mgf1Hash, int sLen)
197 {
176 int i; 198 int i;
177 int ret = 0; 199 int ret = 0;
178 int hLen, maskedDBLen, MSBits, emLen; 200 int hLen, maskedDBLen, MSBits, emLen;
179 unsigned char *H, *salt = NULL, *p; 201 unsigned char *H, *salt = NULL, *p;
180 EVP_MD_CTX ctx; 202 EVP_MD_CTX ctx;
181 203
204 if (mgf1Hash == NULL)
205 mgf1Hash = Hash;
206
182 hLen = EVP_MD_size(Hash); 207 hLen = EVP_MD_size(Hash);
183 if (hLen < 0) 208 if (hLen < 0)
184 goto err; 209 goto err;
@@ -192,7 +217,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM,
192 else if (sLen == -2) sLen = -2; 217 else if (sLen == -2) sLen = -2;
193 else if (sLen < -2) 218 else if (sLen < -2)
194 { 219 {
195 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, RSA_R_SLEN_CHECK_FAILED); 220 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1, RSA_R_SLEN_CHECK_FAILED);
196 goto err; 221 goto err;
197 } 222 }
198 223
@@ -209,8 +234,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM,
209 } 234 }
210 else if (emLen < (hLen + sLen + 2)) 235 else if (emLen < (hLen + sLen + 2))
211 { 236 {
212 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, 237 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1,RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE);
213 RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE);
214 goto err; 238 goto err;
215 } 239 }
216 if (sLen > 0) 240 if (sLen > 0)
@@ -218,8 +242,7 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM,
218 salt = OPENSSL_malloc(sLen); 242 salt = OPENSSL_malloc(sLen);
219 if (!salt) 243 if (!salt)
220 { 244 {
221 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS, 245 RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1,ERR_R_MALLOC_FAILURE);
222 ERR_R_MALLOC_FAILURE);
223 goto err; 246 goto err;
224 } 247 }
225 if (RAND_bytes(salt, sLen) <= 0) 248 if (RAND_bytes(salt, sLen) <= 0)
@@ -228,16 +251,18 @@ int RSA_padding_add_PKCS1_PSS(RSA *rsa, unsigned char *EM,
228 maskedDBLen = emLen - hLen - 1; 251 maskedDBLen = emLen - hLen - 1;
229 H = EM + maskedDBLen; 252 H = EM + maskedDBLen;
230 EVP_MD_CTX_init(&ctx); 253 EVP_MD_CTX_init(&ctx);
231 EVP_DigestInit_ex(&ctx, Hash, NULL); 254 if (!EVP_DigestInit_ex(&ctx, Hash, NULL)
232 EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes); 255 || !EVP_DigestUpdate(&ctx, zeroes, sizeof zeroes)
233 EVP_DigestUpdate(&ctx, mHash, hLen); 256 || !EVP_DigestUpdate(&ctx, mHash, hLen))
234 if (sLen) 257 goto err;
235 EVP_DigestUpdate(&ctx, salt, sLen); 258 if (sLen && !EVP_DigestUpdate(&ctx, salt, sLen))
236 EVP_DigestFinal(&ctx, H, NULL); 259 goto err;
260 if (!EVP_DigestFinal_ex(&ctx, H, NULL))
261 goto err;
237 EVP_MD_CTX_cleanup(&ctx); 262 EVP_MD_CTX_cleanup(&ctx);
238 263
239 /* Generate dbMask in place then perform XOR on it */ 264 /* Generate dbMask in place then perform XOR on it */
240 if (PKCS1_MGF1(EM, maskedDBLen, H, hLen, Hash)) 265 if (PKCS1_MGF1(EM, maskedDBLen, H, hLen, mgf1Hash))
241 goto err; 266 goto err;
242 267
243 p = EM; 268 p = EM;
diff --git a/src/lib/libssl/src/crypto/s390xcap.c b/src/lib/libssl/src/crypto/s390xcap.c
index ffbe0235f9..f2e94ef47e 100644
--- a/src/lib/libssl/src/crypto/s390xcap.c
+++ b/src/lib/libssl/src/crypto/s390xcap.c
@@ -4,7 +4,7 @@
4#include <setjmp.h> 4#include <setjmp.h>
5#include <signal.h> 5#include <signal.h>
6 6
7extern unsigned long OPENSSL_s390xcap_P; 7extern unsigned long OPENSSL_s390xcap_P[];
8 8
9static sigjmp_buf ill_jmp; 9static sigjmp_buf ill_jmp;
10static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } 10static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
@@ -16,7 +16,9 @@ void OPENSSL_cpuid_setup(void)
16 sigset_t oset; 16 sigset_t oset;
17 struct sigaction ill_act,oact; 17 struct sigaction ill_act,oact;
18 18
19 if (OPENSSL_s390xcap_P) return; 19 if (OPENSSL_s390xcap_P[0]) return;
20
21 OPENSSL_s390xcap_P[0] = 1UL<<(8*sizeof(unsigned long)-1);
20 22
21 memset(&ill_act,0,sizeof(ill_act)); 23 memset(&ill_act,0,sizeof(ill_act));
22 ill_act.sa_handler = ill_handler; 24 ill_act.sa_handler = ill_handler;
@@ -27,10 +29,8 @@ void OPENSSL_cpuid_setup(void)
27 sigaction (SIGILL,&ill_act,&oact); 29 sigaction (SIGILL,&ill_act,&oact);
28 30
29 /* protection against missing store-facility-list-extended */ 31 /* protection against missing store-facility-list-extended */
30 if (sigsetjmp(ill_jmp,0) == 0) 32 if (sigsetjmp(ill_jmp,1) == 0)
31 OPENSSL_s390xcap_P = OPENSSL_s390x_facilities(); 33 OPENSSL_s390x_facilities();
32 else
33 OPENSSL_s390xcap_P = 1UL<<63;
34 34
35 sigaction (SIGILL,&oact,NULL); 35 sigaction (SIGILL,&oact,NULL);
36 sigprocmask(SIG_SETMASK,&oset,NULL); 36 sigprocmask(SIG_SETMASK,&oset,NULL);
diff --git a/src/lib/libssl/src/crypto/s390xcpuid.S b/src/lib/libssl/src/crypto/s390xcpuid.S
index b053c6a281..06815347e6 100644
--- a/src/lib/libssl/src/crypto/s390xcpuid.S
+++ b/src/lib/libssl/src/crypto/s390xcpuid.S
@@ -5,10 +5,14 @@
5.align 16 5.align 16
6OPENSSL_s390x_facilities: 6OPENSSL_s390x_facilities:
7 lghi %r0,0 7 lghi %r0,0
8 .long 0xb2b0f010 # stfle 16(%r15) 8 larl %r2,OPENSSL_s390xcap_P
9 lg %r2,16(%r15) 9 stg %r0,8(%r2)
10 larl %r1,OPENSSL_s390xcap_P 10 .long 0xb2b02000 # stfle 0(%r2)
11 stg %r2,0(%r1) 11 brc 8,.Ldone
12 lghi %r0,1
13 .long 0xb2b02000 # stfle 0(%r2)
14.Ldone:
15 lg %r2,0(%r2)
12 br %r14 16 br %r14
13.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities 17.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities
14 18
@@ -58,6 +62,9 @@ OPENSSL_wipe_cpu:
58.type OPENSSL_cleanse,@function 62.type OPENSSL_cleanse,@function
59.align 16 63.align 16
60OPENSSL_cleanse: 64OPENSSL_cleanse:
65#if !defined(__s390x__) && !defined(__s390x)
66 llgfr %r3,%r3
67#endif
61 lghi %r4,15 68 lghi %r4,15
62 lghi %r0,0 69 lghi %r0,0
63 clgr %r3,%r4 70 clgr %r3,%r4
@@ -89,4 +96,4 @@ OPENSSL_cleanse:
89.section .init 96.section .init
90 brasl %r14,OPENSSL_cpuid_setup 97 brasl %r14,OPENSSL_cpuid_setup
91 98
92.comm OPENSSL_s390xcap_P,8,8 99.comm OPENSSL_s390xcap_P,16,8
diff --git a/src/lib/libssl/src/crypto/seed/seed.c b/src/lib/libssl/src/crypto/seed/seed.c
index 2bc384a19f..3e675a8d75 100644
--- a/src/lib/libssl/src/crypto/seed/seed.c
+++ b/src/lib/libssl/src/crypto/seed/seed.c
@@ -32,9 +32,14 @@
32#include <memory.h> 32#include <memory.h>
33#endif 33#endif
34 34
35#include <openssl/crypto.h>
35#include <openssl/seed.h> 36#include <openssl/seed.h>
36#include "seed_locl.h" 37#include "seed_locl.h"
37 38
39#ifdef SS /* can get defined on Solaris by inclusion of <stdlib.h> */
40#undef SS
41#endif
42
38static const seed_word SS[4][256] = { { 43static const seed_word SS[4][256] = { {
39 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124, 44 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124,
40 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, 45 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360,
@@ -192,8 +197,14 @@ static const seed_word KC[] = {
192 KC0, KC1, KC2, KC3, KC4, KC5, KC6, KC7, 197 KC0, KC1, KC2, KC3, KC4, KC5, KC6, KC7,
193 KC8, KC9, KC10, KC11, KC12, KC13, KC14, KC15 }; 198 KC8, KC9, KC10, KC11, KC12, KC13, KC14, KC15 };
194#endif 199#endif
195
196void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks) 200void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks)
201#ifdef OPENSSL_FIPS
202 {
203 fips_cipher_abort(SEED);
204 private_SEED_set_key(rawkey, ks);
205 }
206void private_SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks)
207#endif
197{ 208{
198 seed_word x1, x2, x3, x4; 209 seed_word x1, x2, x3, x4;
199 seed_word t0, t1; 210 seed_word t0, t1;
diff --git a/src/lib/libssl/src/crypto/seed/seed.h b/src/lib/libssl/src/crypto/seed/seed.h
index 6ffa5f024e..c50fdd3607 100644
--- a/src/lib/libssl/src/crypto/seed/seed.h
+++ b/src/lib/libssl/src/crypto/seed/seed.h
@@ -116,7 +116,9 @@ typedef struct seed_key_st {
116#endif 116#endif
117} SEED_KEY_SCHEDULE; 117} SEED_KEY_SCHEDULE;
118 118
119 119#ifdef OPENSSL_FIPS
120void private_SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks);
121#endif
120void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks); 122void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], SEED_KEY_SCHEDULE *ks);
121 123
122void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], unsigned char d[SEED_BLOCK_SIZE], const SEED_KEY_SCHEDULE *ks); 124void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], unsigned char d[SEED_BLOCK_SIZE], const SEED_KEY_SCHEDULE *ks);
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-alpha.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-alpha.pl
new file mode 100644
index 0000000000..6c4b9251fd
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-alpha.pl
@@ -0,0 +1,322 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for Alpha.
11
12# On 21264 performance is 33% better than code generated by vendor
13# compiler, and 75% better than GCC [3.4], and in absolute terms is
14# 8.7 cycles per processed byte. Implementation features vectorized
15# byte swap, but not Xupdate.
16
17@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7",
18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15");
19$ctx="a0"; # $16
20$inp="a1";
21$num="a2";
22$A="a3";
23$B="a4"; # 20
24$C="a5";
25$D="t8";
26$E="t9"; @V=($A,$B,$C,$D,$E);
27$t0="t10"; # 24
28$t1="t11";
29$t2="ra";
30$t3="t12";
31$K="AT"; # 28
32
33sub BODY_00_19 {
34my ($i,$a,$b,$c,$d,$e)=@_;
35my $j=$i+1;
36$code.=<<___ if ($i==0);
37 ldq_u @X[0],0+0($inp)
38 ldq_u @X[1],0+7($inp)
39___
40$code.=<<___ if (!($i&1) && $i<14);
41 ldq_u @X[$i+2],($i+2)*4+0($inp)
42 ldq_u @X[$i+3],($i+2)*4+7($inp)
43___
44$code.=<<___ if (!($i&1) && $i<15);
45 extql @X[$i],$inp,@X[$i]
46 extqh @X[$i+1],$inp,@X[$i+1]
47
48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched
49
50 srl @X[$i],24,$t0 # vectorized byte swap
51 srl @X[$i],8,$t2
52
53 sll @X[$i],8,$t3
54 sll @X[$i],24,@X[$i]
55 zapnot $t0,0x11,$t0
56 zapnot $t2,0x22,$t2
57
58 zapnot @X[$i],0x88,@X[$i]
59 or $t0,$t2,$t0
60 zapnot $t3,0x44,$t3
61 sll $a,5,$t1
62
63 or @X[$i],$t0,@X[$i]
64 addl $K,$e,$e
65 and $b,$c,$t2
66 zapnot $a,0xf,$a
67
68 or @X[$i],$t3,@X[$i]
69 srl $a,27,$t0
70 bic $d,$b,$t3
71 sll $b,30,$b
72
73 extll @X[$i],4,@X[$i+1] # extract upper half
74 or $t2,$t3,$t2
75 addl @X[$i],$e,$e
76
77 addl $t1,$e,$e
78 srl $b,32,$t3
79 zapnot @X[$i],0xf,@X[$i]
80
81 addl $t0,$e,$e
82 addl $t2,$e,$e
83 or $t3,$b,$b
84___
85$code.=<<___ if (($i&1) && $i<15);
86 sll $a,5,$t1
87 addl $K,$e,$e
88 and $b,$c,$t2
89 zapnot $a,0xf,$a
90
91 srl $a,27,$t0
92 addl @X[$i%16],$e,$e
93 bic $d,$b,$t3
94 sll $b,30,$b
95
96 or $t2,$t3,$t2
97 addl $t1,$e,$e
98 srl $b,32,$t3
99 zapnot @X[$i],0xf,@X[$i]
100
101 addl $t0,$e,$e
102 addl $t2,$e,$e
103 or $t3,$b,$b
104___
105$code.=<<___ if ($i>=15); # with forward Xupdate
106 sll $a,5,$t1
107 addl $K,$e,$e
108 and $b,$c,$t2
109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
110
111 zapnot $a,0xf,$a
112 addl @X[$i%16],$e,$e
113 bic $d,$b,$t3
114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
115
116 srl $a,27,$t0
117 addl $t1,$e,$e
118 or $t2,$t3,$t2
119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
120
121 sll $b,30,$b
122 addl $t0,$e,$e
123 srl @X[$j%16],31,$t1
124
125 addl $t2,$e,$e
126 srl $b,32,$t3
127 addl @X[$j%16],@X[$j%16],@X[$j%16]
128
129 or $t3,$b,$b
130 zapnot @X[$i%16],0xf,@X[$i%16]
131 or $t1,@X[$j%16],@X[$j%16]
132___
133}
134
135sub BODY_20_39 {
136my ($i,$a,$b,$c,$d,$e)=@_;
137my $j=$i+1;
138$code.=<<___ if ($i<79); # with forward Xupdate
139 sll $a,5,$t1
140 addl $K,$e,$e
141 zapnot $a,0xf,$a
142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
143
144 sll $b,30,$t3
145 addl $t1,$e,$e
146 xor $b,$c,$t2
147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
148
149 srl $b,2,$b
150 addl @X[$i%16],$e,$e
151 xor $d,$t2,$t2
152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
153
154 srl @X[$j%16],31,$t1
155 addl $t2,$e,$e
156 srl $a,27,$t0
157 addl @X[$j%16],@X[$j%16],@X[$j%16]
158
159 or $t3,$b,$b
160 addl $t0,$e,$e
161 or $t1,@X[$j%16],@X[$j%16]
162___
163$code.=<<___ if ($i<77);
164 zapnot @X[$i%16],0xf,@X[$i%16]
165___
166$code.=<<___ if ($i==79); # with context fetch
167 sll $a,5,$t1
168 addl $K,$e,$e
169 zapnot $a,0xf,$a
170 ldl @X[0],0($ctx)
171
172 sll $b,30,$t3
173 addl $t1,$e,$e
174 xor $b,$c,$t2
175 ldl @X[1],4($ctx)
176
177 srl $b,2,$b
178 addl @X[$i%16],$e,$e
179 xor $d,$t2,$t2
180 ldl @X[2],8($ctx)
181
182 srl $a,27,$t0
183 addl $t2,$e,$e
184 ldl @X[3],12($ctx)
185
186 or $t3,$b,$b
187 addl $t0,$e,$e
188 ldl @X[4],16($ctx)
189___
190}
191
192sub BODY_40_59 {
193my ($i,$a,$b,$c,$d,$e)=@_;
194my $j=$i+1;
195$code.=<<___; # with forward Xupdate
196 sll $a,5,$t1
197 addl $K,$e,$e
198 zapnot $a,0xf,$a
199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
200
201 srl $a,27,$t0
202 and $b,$c,$t2
203 and $b,$d,$t3
204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
205
206 sll $b,30,$b
207 addl $t1,$e,$e
208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
209
210 srl @X[$j%16],31,$t1
211 addl $t0,$e,$e
212 or $t2,$t3,$t2
213 and $c,$d,$t3
214
215 or $t2,$t3,$t2
216 srl $b,32,$t3
217 addl @X[$i%16],$e,$e
218 addl @X[$j%16],@X[$j%16],@X[$j%16]
219
220 or $t3,$b,$b
221 addl $t2,$e,$e
222 or $t1,@X[$j%16],@X[$j%16]
223 zapnot @X[$i%16],0xf,@X[$i%16]
224___
225}
226
227$code=<<___;
228#ifdef __linux__
229#include <asm/regdef.h>
230#else
231#include <asm.h>
232#include <regdef.h>
233#endif
234
235.text
236
237.set noat
238.set noreorder
239.globl sha1_block_data_order
240.align 5
241.ent sha1_block_data_order
242sha1_block_data_order:
243 lda sp,-64(sp)
244 stq ra,0(sp)
245 stq s0,8(sp)
246 stq s1,16(sp)
247 stq s2,24(sp)
248 stq s3,32(sp)
249 stq s4,40(sp)
250 stq s5,48(sp)
251 stq fp,56(sp)
252 .mask 0x0400fe00,-64
253 .frame sp,64,ra
254 .prologue 0
255
256 ldl $A,0($ctx)
257 ldl $B,4($ctx)
258 sll $num,6,$num
259 ldl $C,8($ctx)
260 ldl $D,12($ctx)
261 ldl $E,16($ctx)
262 addq $inp,$num,$num
263
264.Lloop:
265 .set noreorder
266 ldah $K,23170(zero)
267 zapnot $B,0xf,$B
268 lda $K,31129($K) # K_00_19
269___
270for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
271
272$code.=<<___;
273 ldah $K,28378(zero)
274 lda $K,-5215($K) # K_20_39
275___
276for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
277
278$code.=<<___;
279 ldah $K,-28900(zero)
280 lda $K,-17188($K) # K_40_59
281___
282for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
283
284$code.=<<___;
285 ldah $K,-13725(zero)
286 lda $K,-15914($K) # K_60_79
287___
288for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
289
290$code.=<<___;
291 addl @X[0],$A,$A
292 addl @X[1],$B,$B
293 addl @X[2],$C,$C
294 addl @X[3],$D,$D
295 addl @X[4],$E,$E
296 stl $A,0($ctx)
297 stl $B,4($ctx)
298 addq $inp,64,$inp
299 stl $C,8($ctx)
300 stl $D,12($ctx)
301 stl $E,16($ctx)
302 cmpult $inp,$num,$t1
303 bne $t1,.Lloop
304
305 .set noreorder
306 ldq ra,0(sp)
307 ldq s0,8(sp)
308 ldq s1,16(sp)
309 ldq s2,24(sp)
310 ldq s3,32(sp)
311 ldq s4,40(sp)
312 ldq s5,48(sp)
313 ldq fp,56(sp)
314 lda sp,64(sp)
315 ret (ra)
316.end sha1_block_data_order
317.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
318.align 2
319___
320$output=shift and open STDOUT,">$output";
321print $code;
322close STDOUT;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-armv4-large.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-armv4-large.pl
index 6e65fe3e01..fe8207f77f 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha1-armv4-large.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-armv4-large.pl
@@ -47,6 +47,10 @@
47# Cortex A8 core and in absolute terms ~870 cycles per input block 47# Cortex A8 core and in absolute terms ~870 cycles per input block
48# [or 13.6 cycles per byte]. 48# [or 13.6 cycles per byte].
49 49
50# February 2011.
51#
52# Profiler-assisted and platform-specific optimization resulted in 10%
53# improvement on Cortex A8 core and 12.2 cycles per byte.
50 54
51while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 55while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
52open STDOUT,">$output"; 56open STDOUT,">$output";
@@ -76,31 +80,41 @@ $code.=<<___;
76 add $e,$K,$e,ror#2 @ E+=K_xx_xx 80 add $e,$K,$e,ror#2 @ E+=K_xx_xx
77 ldr $t3,[$Xi,#2*4] 81 ldr $t3,[$Xi,#2*4]
78 eor $t0,$t0,$t1 82 eor $t0,$t0,$t1
79 eor $t2,$t2,$t3 83 eor $t2,$t2,$t3 @ 1 cycle stall
80 eor $t1,$c,$d @ F_xx_xx 84 eor $t1,$c,$d @ F_xx_xx
81 mov $t0,$t0,ror#31 85 mov $t0,$t0,ror#31
82 add $e,$e,$a,ror#27 @ E+=ROR(A,27) 86 add $e,$e,$a,ror#27 @ E+=ROR(A,27)
83 eor $t0,$t0,$t2,ror#31 87 eor $t0,$t0,$t2,ror#31
88 str $t0,[$Xi,#-4]!
84 $opt1 @ F_xx_xx 89 $opt1 @ F_xx_xx
85 $opt2 @ F_xx_xx 90 $opt2 @ F_xx_xx
86 add $e,$e,$t0 @ E+=X[i] 91 add $e,$e,$t0 @ E+=X[i]
87 str $t0,[$Xi,#-4]!
88___ 92___
89} 93}
90 94
91sub BODY_00_15 { 95sub BODY_00_15 {
92my ($a,$b,$c,$d,$e)=@_; 96my ($a,$b,$c,$d,$e)=@_;
93$code.=<<___; 97$code.=<<___;
94 ldrb $t0,[$inp],#4 98#if __ARM_ARCH__<7
95 ldrb $t1,[$inp,#-1] 99 ldrb $t1,[$inp,#2]
96 ldrb $t2,[$inp,#-2] 100 ldrb $t0,[$inp,#3]
101 ldrb $t2,[$inp,#1]
97 add $e,$K,$e,ror#2 @ E+=K_00_19 102 add $e,$K,$e,ror#2 @ E+=K_00_19
98 ldrb $t3,[$inp,#-3] 103 ldrb $t3,[$inp],#4
104 orr $t0,$t0,$t1,lsl#8
105 eor $t1,$c,$d @ F_xx_xx
106 orr $t0,$t0,$t2,lsl#16
99 add $e,$e,$a,ror#27 @ E+=ROR(A,27) 107 add $e,$e,$a,ror#27 @ E+=ROR(A,27)
100 orr $t0,$t1,$t0,lsl#24 108 orr $t0,$t0,$t3,lsl#24
109#else
110 ldr $t0,[$inp],#4 @ handles unaligned
111 add $e,$K,$e,ror#2 @ E+=K_00_19
101 eor $t1,$c,$d @ F_xx_xx 112 eor $t1,$c,$d @ F_xx_xx
102 orr $t0,$t0,$t2,lsl#8 113 add $e,$e,$a,ror#27 @ E+=ROR(A,27)
103 orr $t0,$t0,$t3,lsl#16 114#ifdef __ARMEL__
115 rev $t0,$t0 @ byte swap
116#endif
117#endif
104 and $t1,$b,$t1,ror#2 118 and $t1,$b,$t1,ror#2
105 add $e,$e,$t0 @ E+=X[i] 119 add $e,$e,$t0 @ E+=X[i]
106 eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) 120 eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D)
@@ -136,6 +150,8 @@ ___
136} 150}
137 151
138$code=<<___; 152$code=<<___;
153#include "arm_arch.h"
154
139.text 155.text
140 156
141.global sha1_block_data_order 157.global sha1_block_data_order
@@ -209,10 +225,14 @@ $code.=<<___;
209 teq $inp,$len 225 teq $inp,$len
210 bne .Lloop @ [+18], total 1307 226 bne .Lloop @ [+18], total 1307
211 227
228#if __ARM_ARCH__>=5
229 ldmia sp!,{r4-r12,pc}
230#else
212 ldmia sp!,{r4-r12,lr} 231 ldmia sp!,{r4-r12,lr}
213 tst lr,#1 232 tst lr,#1
214 moveq pc,lr @ be binary compatible with V4, yet 233 moveq pc,lr @ be binary compatible with V4, yet
215 bx lr @ interoperable with Thumb ISA:-) 234 bx lr @ interoperable with Thumb ISA:-)
235#endif
216.align 2 236.align 2
217.LK_00_19: .word 0x5a827999 237.LK_00_19: .word 0x5a827999
218.LK_20_39: .word 0x6ed9eba1 238.LK_20_39: .word 0x6ed9eba1
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-mips.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-mips.pl
new file mode 100644
index 0000000000..f1a702f38f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-mips.pl
@@ -0,0 +1,354 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for MIPS.
11
12# Performance improvement is 30% on unaligned input. The "secret" is
13# to deploy lwl/lwr pair to load unaligned input. One could have
14# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
15# compatible subroutine. There is room for minor optimization on
16# little-endian platforms...
17
18######################################################################
19# There is a number of MIPS ABI in use, O32 and N32/64 are most
20# widely used. Then there is a new contender: NUBI. It appears that if
21# one picks the latter, it's possible to arrange code in ABI neutral
22# manner. Therefore let's stick to NUBI register layout:
23#
24($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
25($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
26($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
27($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
28#
29# The return value is placed in $a0. Following coding rules facilitate
30# interoperability:
31#
32# - never ever touch $tp, "thread pointer", former $gp;
33# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
34# old code];
35# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
36#
37# For reference here is register layout for N32/64 MIPS ABIs:
38#
39# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
40# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
41# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
42# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
43# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
44#
45$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
46
47if ($flavour =~ /64|n32/i) {
48 $PTR_ADD="dadd"; # incidentally works even on n32
49 $PTR_SUB="dsub"; # incidentally works even on n32
50 $REG_S="sd";
51 $REG_L="ld";
52 $PTR_SLL="dsll"; # incidentally works even on n32
53 $SZREG=8;
54} else {
55 $PTR_ADD="add";
56 $PTR_SUB="sub";
57 $REG_S="sw";
58 $REG_L="lw";
59 $PTR_SLL="sll";
60 $SZREG=4;
61}
62#
63# <appro@openssl.org>
64#
65######################################################################
66
67$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
68
69for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
70open STDOUT,">$output";
71
72if (!defined($big_endian))
73 { $big_endian=(unpack('L',pack('N',1))==1); }
74
75# offsets of the Most and Least Significant Bytes
76$MSB=$big_endian?0:3;
77$LSB=3&~$MSB;
78
79@X=map("\$$_",(8..23)); # a4-a7,s0-s11
80
81$ctx=$a0;
82$inp=$a1;
83$num=$a2;
84$A="\$1";
85$B="\$2";
86$C="\$3";
87$D="\$7";
88$E="\$24"; @V=($A,$B,$C,$D,$E);
89$t0="\$25";
90$t1=$num; # $num is offloaded to stack
91$t2="\$30"; # fp
92$K="\$31"; # ra
93
94sub BODY_00_14 {
95my ($i,$a,$b,$c,$d,$e)=@_;
96my $j=$i+1;
97$code.=<<___ if (!$big_endian);
98 srl $t0,@X[$i],24 # byte swap($i)
99 srl $t1,@X[$i],8
100 andi $t2,@X[$i],0xFF00
101 sll @X[$i],@X[$i],24
102 andi $t1,0xFF00
103 sll $t2,$t2,8
104 or @X[$i],$t0
105 or $t1,$t2
106 or @X[$i],$t1
107___
108$code.=<<___;
109 lwl @X[$j],$j*4+$MSB($inp)
110 sll $t0,$a,5 # $i
111 addu $e,$K
112 lwr @X[$j],$j*4+$LSB($inp)
113 srl $t1,$a,27
114 addu $e,$t0
115 xor $t0,$c,$d
116 addu $e,$t1
117 sll $t2,$b,30
118 and $t0,$b
119 srl $b,$b,2
120 xor $t0,$d
121 addu $e,@X[$i]
122 or $b,$t2
123 addu $e,$t0
124___
125}
126
127sub BODY_15_19 {
128my ($i,$a,$b,$c,$d,$e)=@_;
129my $j=$i+1;
130
131$code.=<<___ if (!$big_endian && $i==15);
132 srl $t0,@X[$i],24 # byte swap($i)
133 srl $t1,@X[$i],8
134 andi $t2,@X[$i],0xFF00
135 sll @X[$i],@X[$i],24
136 andi $t1,0xFF00
137 sll $t2,$t2,8
138 or @X[$i],$t0
139 or @X[$i],$t1
140 or @X[$i],$t2
141___
142$code.=<<___;
143 xor @X[$j%16],@X[($j+2)%16]
144 sll $t0,$a,5 # $i
145 addu $e,$K
146 srl $t1,$a,27
147 addu $e,$t0
148 xor @X[$j%16],@X[($j+8)%16]
149 xor $t0,$c,$d
150 addu $e,$t1
151 xor @X[$j%16],@X[($j+13)%16]
152 sll $t2,$b,30
153 and $t0,$b
154 srl $t1,@X[$j%16],31
155 addu @X[$j%16],@X[$j%16]
156 srl $b,$b,2
157 xor $t0,$d
158 or @X[$j%16],$t1
159 addu $e,@X[$i%16]
160 or $b,$t2
161 addu $e,$t0
162___
163}
164
165sub BODY_20_39 {
166my ($i,$a,$b,$c,$d,$e)=@_;
167my $j=$i+1;
168$code.=<<___ if ($i<79);
169 xor @X[$j%16],@X[($j+2)%16]
170 sll $t0,$a,5 # $i
171 addu $e,$K
172 srl $t1,$a,27
173 addu $e,$t0
174 xor @X[$j%16],@X[($j+8)%16]
175 xor $t0,$c,$d
176 addu $e,$t1
177 xor @X[$j%16],@X[($j+13)%16]
178 sll $t2,$b,30
179 xor $t0,$b
180 srl $t1,@X[$j%16],31
181 addu @X[$j%16],@X[$j%16]
182 srl $b,$b,2
183 addu $e,@X[$i%16]
184 or @X[$j%16],$t1
185 or $b,$t2
186 addu $e,$t0
187___
188$code.=<<___ if ($i==79);
189 lw @X[0],0($ctx)
190 sll $t0,$a,5 # $i
191 addu $e,$K
192 lw @X[1],4($ctx)
193 srl $t1,$a,27
194 addu $e,$t0
195 lw @X[2],8($ctx)
196 xor $t0,$c,$d
197 addu $e,$t1
198 lw @X[3],12($ctx)
199 sll $t2,$b,30
200 xor $t0,$b
201 lw @X[4],16($ctx)
202 srl $b,$b,2
203 addu $e,@X[$i%16]
204 or $b,$t2
205 addu $e,$t0
206___
207}
208
209sub BODY_40_59 {
210my ($i,$a,$b,$c,$d,$e)=@_;
211my $j=$i+1;
212$code.=<<___ if ($i<79);
213 xor @X[$j%16],@X[($j+2)%16]
214 sll $t0,$a,5 # $i
215 addu $e,$K
216 srl $t1,$a,27
217 addu $e,$t0
218 xor @X[$j%16],@X[($j+8)%16]
219 and $t0,$c,$d
220 addu $e,$t1
221 xor @X[$j%16],@X[($j+13)%16]
222 sll $t2,$b,30
223 addu $e,$t0
224 srl $t1,@X[$j%16],31
225 xor $t0,$c,$d
226 addu @X[$j%16],@X[$j%16]
227 and $t0,$b
228 srl $b,$b,2
229 or @X[$j%16],$t1
230 addu $e,@X[$i%16]
231 or $b,$t2
232 addu $e,$t0
233___
234}
235
236$FRAMESIZE=16; # large enough to accomodate NUBI saved registers
237$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
238
239$code=<<___;
240#ifdef OPENSSL_FIPSCANISTER
241# include <openssl/fipssyms.h>
242#endif
243
244.text
245
246.set noat
247.set noreorder
248.align 5
249.globl sha1_block_data_order
250.ent sha1_block_data_order
251sha1_block_data_order:
252 .frame $sp,$FRAMESIZE*$SZREG,$ra
253 .mask $SAVED_REGS_MASK,-$SZREG
254 .set noreorder
255 $PTR_SUB $sp,$FRAMESIZE*$SZREG
256 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
257 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
258 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
259 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
260 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
261 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
262 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
263 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
264 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
265 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
266___
267$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
268 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
269 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
270 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
271 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
272 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
273___
274$code.=<<___;
275 $PTR_SLL $num,6
276 $PTR_ADD $num,$inp
277 $REG_S $num,0($sp)
278 lw $A,0($ctx)
279 lw $B,4($ctx)
280 lw $C,8($ctx)
281 lw $D,12($ctx)
282 b .Loop
283 lw $E,16($ctx)
284.align 4
285.Loop:
286 .set reorder
287 lwl @X[0],$MSB($inp)
288 lui $K,0x5a82
289 lwr @X[0],$LSB($inp)
290 ori $K,0x7999 # K_00_19
291___
292for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
293for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
294$code.=<<___;
295 lui $K,0x6ed9
296 ori $K,0xeba1 # K_20_39
297___
298for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
299$code.=<<___;
300 lui $K,0x8f1b
301 ori $K,0xbcdc # K_40_59
302___
303for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
304$code.=<<___;
305 lui $K,0xca62
306 ori $K,0xc1d6 # K_60_79
307___
308for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
309$code.=<<___;
310 $PTR_ADD $inp,64
311 $REG_L $num,0($sp)
312
313 addu $A,$X[0]
314 addu $B,$X[1]
315 sw $A,0($ctx)
316 addu $C,$X[2]
317 addu $D,$X[3]
318 sw $B,4($ctx)
319 addu $E,$X[4]
320 sw $C,8($ctx)
321 sw $D,12($ctx)
322 sw $E,16($ctx)
323 .set noreorder
324 bne $inp,$num,.Loop
325 nop
326
327 .set noreorder
328 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
329 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
330 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
331 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
332 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
333 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
334 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
335 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
336 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
337 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
338___
339$code.=<<___ if ($flavour =~ /nubi/i);
340 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
341 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
342 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
343 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
344 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
345___
346$code.=<<___;
347 jr $ra
348 $PTR_ADD $sp,$FRAMESIZE*$SZREG
349.end sha1_block_data_order
350.rdata
351.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
352___
353print $code;
354close STDOUT;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-parisc.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-parisc.pl
new file mode 100644
index 0000000000..6d7bf495b2
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-parisc.pl
@@ -0,0 +1,259 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for PA-RISC.
11
12# June 2009.
13#
14# On PA-7100LC performance is >30% better than gcc 3.2 generated code
15# for aligned input and >50% better for unaligned. Compared to vendor
16# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17# few percent faster in 32-bit one (this for aligned input, data for
18# unaligned input is not available).
19#
20# Special thanks to polarhome.com for providing HP-UX account.
21
22$flavour = shift;
23$output = shift;
24open STDOUT,">$output";
25
26if ($flavour =~ /64/) {
27 $LEVEL ="2.0W";
28 $SIZE_T =8;
29 $FRAME_MARKER =80;
30 $SAVED_RP =16;
31 $PUSH ="std";
32 $PUSHMA ="std,ma";
33 $POP ="ldd";
34 $POPMB ="ldd,mb";
35} else {
36 $LEVEL ="1.0";
37 $SIZE_T =4;
38 $FRAME_MARKER =48;
39 $SAVED_RP =20;
40 $PUSH ="stw";
41 $PUSHMA ="stwm";
42 $POP ="ldw";
43 $POPMB ="ldwm";
44}
45
46$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47 # [+ argument transfer]
48$ctx="%r26"; # arg0
49$inp="%r25"; # arg1
50$num="%r24"; # arg2
51
52$t0="%r28";
53$t1="%r29";
54$K="%r31";
55
56@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
58
59@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
60
61sub BODY_00_19 {
62my ($i,$a,$b,$c,$d,$e)=@_;
63my $j=$i+1;
64$code.=<<___ if ($i<15);
65 addl $K,$e,$e ; $i
66 shd $a,$a,27,$t1
67 addl @X[$i],$e,$e
68 and $c,$b,$t0
69 addl $t1,$e,$e
70 andcm $d,$b,$t1
71 shd $b,$b,2,$b
72 or $t1,$t0,$t0
73 addl $t0,$e,$e
74___
75$code.=<<___ if ($i>=15); # with forward Xupdate
76 addl $K,$e,$e ; $i
77 shd $a,$a,27,$t1
78 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
79 addl @X[$i%16],$e,$e
80 and $c,$b,$t0
81 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
82 addl $t1,$e,$e
83 andcm $d,$b,$t1
84 shd $b,$b,2,$b
85 or $t1,$t0,$t0
86 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
87 add $t0,$e,$e
88 shd @X[$j%16],@X[$j%16],31,@X[$j%16]
89___
90}
91
92sub BODY_20_39 {
93my ($i,$a,$b,$c,$d,$e)=@_;
94my $j=$i+1;
95$code.=<<___ if ($i<79);
96 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
97 addl $K,$e,$e
98 shd $a,$a,27,$t1
99 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
100 addl @X[$i%16],$e,$e
101 xor $b,$c,$t0
102 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
103 addl $t1,$e,$e
104 shd $b,$b,2,$b
105 xor $d,$t0,$t0
106 shd @X[$j%16],@X[$j%16],31,@X[$j%16]
107 addl $t0,$e,$e
108___
109$code.=<<___ if ($i==79); # with context load
110 ldw 0($ctx),@X[0] ; $i
111 addl $K,$e,$e
112 shd $a,$a,27,$t1
113 ldw 4($ctx),@X[1]
114 addl @X[$i%16],$e,$e
115 xor $b,$c,$t0
116 ldw 8($ctx),@X[2]
117 addl $t1,$e,$e
118 shd $b,$b,2,$b
119 xor $d,$t0,$t0
120 ldw 12($ctx),@X[3]
121 addl $t0,$e,$e
122 ldw 16($ctx),@X[4]
123___
124}
125
126sub BODY_40_59 {
127my ($i,$a,$b,$c,$d,$e)=@_;
128my $j=$i+1;
129$code.=<<___;
130 shd $a,$a,27,$t1 ; $i
131 addl $K,$e,$e
132 xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
133 xor $d,$c,$t0
134 addl @X[$i%16],$e,$e
135 xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
136 and $b,$t0,$t0
137 addl $t1,$e,$e
138 shd $b,$b,2,$b
139 xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
140 addl $t0,$e,$e
141 and $d,$c,$t1
142 shd @X[$j%16],@X[$j%16],31,@X[$j%16]
143 addl $t1,$e,$e
144___
145}
146
147$code=<<___;
148 .LEVEL $LEVEL
149 .SPACE \$TEXT\$
150 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
151
152 .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
153sha1_block_data_order
154 .PROC
155 .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
156 .ENTRY
157 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
158 $PUSHMA %r3,$FRAME(%sp)
159 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
160 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
161 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
162 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
163 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
164 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
165 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
166 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
167 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
168 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
169 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
170 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
171 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
172
173 ldw 0($ctx),$A
174 ldw 4($ctx),$B
175 ldw 8($ctx),$C
176 ldw 12($ctx),$D
177 ldw 16($ctx),$E
178
179 extru $inp,31,2,$t0 ; t0=inp&3;
180 sh3addl $t0,%r0,$t0 ; t0*=8;
181 subi 32,$t0,$t0 ; t0=32-t0;
182 mtctl $t0,%cr11 ; %sar=t0;
183
184L\$oop
185 ldi 3,$t0
186 andcm $inp,$t0,$t0 ; 64-bit neutral
187___
188 for ($i=0;$i<15;$i++) { # load input block
189 $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
190$code.=<<___;
191 cmpb,*= $inp,$t0,L\$aligned
192 ldw 60($t0),@X[15]
193 ldw 64($t0),@X[16]
194___
195 for ($i=0;$i<16;$i++) { # align input
196 $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
197$code.=<<___;
198L\$aligned
199 ldil L'0x5a827000,$K ; K_00_19
200 ldo 0x999($K),$K
201___
202for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
203$code.=<<___;
204 ldil L'0x6ed9e000,$K ; K_20_39
205 ldo 0xba1($K),$K
206___
207
208for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
209$code.=<<___;
210 ldil L'0x8f1bb000,$K ; K_40_59
211 ldo 0xcdc($K),$K
212___
213
214for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
215$code.=<<___;
216 ldil L'0xca62c000,$K ; K_60_79
217 ldo 0x1d6($K),$K
218___
219for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
220
221$code.=<<___;
222 addl @X[0],$A,$A
223 addl @X[1],$B,$B
224 addl @X[2],$C,$C
225 addl @X[3],$D,$D
226 addl @X[4],$E,$E
227 stw $A,0($ctx)
228 stw $B,4($ctx)
229 stw $C,8($ctx)
230 stw $D,12($ctx)
231 stw $E,16($ctx)
232 addib,*<> -1,$num,L\$oop
233 ldo 64($inp),$inp
234
235 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
236 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
237 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
238 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
239 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
240 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
241 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
242 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
243 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
244 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
245 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
246 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
247 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
248 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
249 bv (%r2)
250 .EXIT
251 $POPMB -$FRAME(%sp),%r3
252 .PROCEND
253 .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
254___
255
256$code =~ s/\`([^\`]*)\`/eval $1/gem;
257$code =~ s/,\*/,/gm if ($SIZE_T==4);
258print $code;
259close STDOUT;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-ppc.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-ppc.pl
index dcd0fcdfcf..2140dd2f8d 100755
--- a/src/lib/libssl/src/crypto/sha/asm/sha1-ppc.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-ppc.pl
@@ -24,12 +24,14 @@ $flavour = shift;
24 24
25if ($flavour =~ /64/) { 25if ($flavour =~ /64/) {
26 $SIZE_T =8; 26 $SIZE_T =8;
27 $LRSAVE =2*$SIZE_T;
27 $UCMP ="cmpld"; 28 $UCMP ="cmpld";
28 $STU ="stdu"; 29 $STU ="stdu";
29 $POP ="ld"; 30 $POP ="ld";
30 $PUSH ="std"; 31 $PUSH ="std";
31} elsif ($flavour =~ /32/) { 32} elsif ($flavour =~ /32/) {
32 $SIZE_T =4; 33 $SIZE_T =4;
34 $LRSAVE =$SIZE_T;
33 $UCMP ="cmplw"; 35 $UCMP ="cmplw";
34 $STU ="stwu"; 36 $STU ="stwu";
35 $POP ="lwz"; 37 $POP ="lwz";
@@ -43,7 +45,8 @@ die "can't locate ppc-xlate.pl";
43 45
44open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 46open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
45 47
46$FRAME=24*$SIZE_T; 48$FRAME=24*$SIZE_T+64;
49$LOCALS=6*$SIZE_T;
47 50
48$K ="r0"; 51$K ="r0";
49$sp ="r1"; 52$sp ="r1";
@@ -162,9 +165,8 @@ $code=<<___;
162.globl .sha1_block_data_order 165.globl .sha1_block_data_order
163.align 4 166.align 4
164.sha1_block_data_order: 167.sha1_block_data_order:
168 $STU $sp,-$FRAME($sp)
165 mflr r0 169 mflr r0
166 $STU $sp,`-($FRAME+64)`($sp)
167 $PUSH r0,`$FRAME-$SIZE_T*18`($sp)
168 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 170 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
169 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 171 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
170 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 172 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
@@ -182,6 +184,7 @@ $code=<<___;
182 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 184 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
183 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 185 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
184 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 186 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
187 $PUSH r0,`$FRAME+$LRSAVE`($sp)
185 lwz $A,0($ctx) 188 lwz $A,0($ctx)
186 lwz $B,4($ctx) 189 lwz $B,4($ctx)
187 lwz $C,8($ctx) 190 lwz $C,8($ctx)
@@ -192,37 +195,14 @@ $code=<<___;
192Laligned: 195Laligned:
193 mtctr $num 196 mtctr $num
194 bl Lsha1_block_private 197 bl Lsha1_block_private
195Ldone: 198 b Ldone
196 $POP r0,`$FRAME-$SIZE_T*18`($sp)
197 $POP r15,`$FRAME-$SIZE_T*17`($sp)
198 $POP r16,`$FRAME-$SIZE_T*16`($sp)
199 $POP r17,`$FRAME-$SIZE_T*15`($sp)
200 $POP r18,`$FRAME-$SIZE_T*14`($sp)
201 $POP r19,`$FRAME-$SIZE_T*13`($sp)
202 $POP r20,`$FRAME-$SIZE_T*12`($sp)
203 $POP r21,`$FRAME-$SIZE_T*11`($sp)
204 $POP r22,`$FRAME-$SIZE_T*10`($sp)
205 $POP r23,`$FRAME-$SIZE_T*9`($sp)
206 $POP r24,`$FRAME-$SIZE_T*8`($sp)
207 $POP r25,`$FRAME-$SIZE_T*7`($sp)
208 $POP r26,`$FRAME-$SIZE_T*6`($sp)
209 $POP r27,`$FRAME-$SIZE_T*5`($sp)
210 $POP r28,`$FRAME-$SIZE_T*4`($sp)
211 $POP r29,`$FRAME-$SIZE_T*3`($sp)
212 $POP r30,`$FRAME-$SIZE_T*2`($sp)
213 $POP r31,`$FRAME-$SIZE_T*1`($sp)
214 mtlr r0
215 addi $sp,$sp,`$FRAME+64`
216 blr
217___
218 199
219# PowerPC specification allows an implementation to be ill-behaved 200; PowerPC specification allows an implementation to be ill-behaved
220# upon unaligned access which crosses page boundary. "Better safe 201; upon unaligned access which crosses page boundary. "Better safe
221# than sorry" principle makes me treat it specially. But I don't 202; than sorry" principle makes me treat it specially. But I don't
222# look for particular offending word, but rather for 64-byte input 203; look for particular offending word, but rather for 64-byte input
223# block which crosses the boundary. Once found that block is aligned 204; block which crosses the boundary. Once found that block is aligned
224# and hashed separately... 205; and hashed separately...
225$code.=<<___;
226.align 4 206.align 4
227Lunaligned: 207Lunaligned:
228 subfic $t1,$inp,4096 208 subfic $t1,$inp,4096
@@ -237,7 +217,7 @@ Lunaligned:
237Lcross_page: 217Lcross_page:
238 li $t1,16 218 li $t1,16
239 mtctr $t1 219 mtctr $t1
240 addi r20,$sp,$FRAME ; spot below the frame 220 addi r20,$sp,$LOCALS ; spot within the frame
241Lmemcpy: 221Lmemcpy:
242 lbz r16,0($inp) 222 lbz r16,0($inp)
243 lbz r17,1($inp) 223 lbz r17,1($inp)
@@ -251,15 +231,40 @@ Lmemcpy:
251 addi r20,r20,4 231 addi r20,r20,4
252 bdnz Lmemcpy 232 bdnz Lmemcpy
253 233
254 $PUSH $inp,`$FRAME-$SIZE_T*19`($sp) 234 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
255 li $t1,1 235 li $t1,1
256 addi $inp,$sp,$FRAME 236 addi $inp,$sp,$LOCALS
257 mtctr $t1 237 mtctr $t1
258 bl Lsha1_block_private 238 bl Lsha1_block_private
259 $POP $inp,`$FRAME-$SIZE_T*19`($sp) 239 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
260 addic. $num,$num,-1 240 addic. $num,$num,-1
261 bne- Lunaligned 241 bne- Lunaligned
262 b Ldone 242
243Ldone:
244 $POP r0,`$FRAME+$LRSAVE`($sp)
245 $POP r15,`$FRAME-$SIZE_T*17`($sp)
246 $POP r16,`$FRAME-$SIZE_T*16`($sp)
247 $POP r17,`$FRAME-$SIZE_T*15`($sp)
248 $POP r18,`$FRAME-$SIZE_T*14`($sp)
249 $POP r19,`$FRAME-$SIZE_T*13`($sp)
250 $POP r20,`$FRAME-$SIZE_T*12`($sp)
251 $POP r21,`$FRAME-$SIZE_T*11`($sp)
252 $POP r22,`$FRAME-$SIZE_T*10`($sp)
253 $POP r23,`$FRAME-$SIZE_T*9`($sp)
254 $POP r24,`$FRAME-$SIZE_T*8`($sp)
255 $POP r25,`$FRAME-$SIZE_T*7`($sp)
256 $POP r26,`$FRAME-$SIZE_T*6`($sp)
257 $POP r27,`$FRAME-$SIZE_T*5`($sp)
258 $POP r28,`$FRAME-$SIZE_T*4`($sp)
259 $POP r29,`$FRAME-$SIZE_T*3`($sp)
260 $POP r30,`$FRAME-$SIZE_T*2`($sp)
261 $POP r31,`$FRAME-$SIZE_T*1`($sp)
262 mtlr r0
263 addi $sp,$sp,$FRAME
264 blr
265 .long 0
266 .byte 0,12,4,1,0x80,18,3,0
267 .long 0
263___ 268___
264 269
265# This is private block function, which uses tailored calling 270# This is private block function, which uses tailored calling
@@ -309,6 +314,8 @@ $code.=<<___;
309 addi $inp,$inp,`16*4` 314 addi $inp,$inp,`16*4`
310 bdnz- Lsha1_block_private 315 bdnz- Lsha1_block_private
311 blr 316 blr
317 .long 0
318 .byte 0,12,0x14,0,0,0,0,0
312___ 319___
313$code.=<<___; 320$code.=<<___;
314.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" 321.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-s390x.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-s390x.pl
index 4b17848287..9193dda45e 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha1-s390x.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-s390x.pl
@@ -21,9 +21,28 @@
21# instructions to favour dual-issue z10 pipeline. On z10 hardware is 21# instructions to favour dual-issue z10 pipeline. On z10 hardware is
22# "only" ~2.3x faster than software. 22# "only" ~2.3x faster than software.
23 23
24# November 2010.
25#
26# Adapt for -m31 build. If kernel supports what's called "highgprs"
27# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
28# instructions and achieve "64-bit" performance even in 31-bit legacy
29# application context. The feature is not specific to any particular
30# processor, as long as it's "z-CPU". Latter implies that the code
31# remains z/Architecture specific.
32
24$kimdfunc=1; # magic function code for kimd instruction 33$kimdfunc=1; # magic function code for kimd instruction
25 34
26$output=shift; 35$flavour = shift;
36
37if ($flavour =~ /3[12]/) {
38 $SIZE_T=4;
39 $g="";
40} else {
41 $SIZE_T=8;
42 $g="g";
43}
44
45while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
27open STDOUT,">$output"; 46open STDOUT,">$output";
28 47
29$K_00_39="%r0"; $K=$K_00_39; 48$K_00_39="%r0"; $K=$K_00_39;
@@ -42,13 +61,14 @@ $t1="%r11";
42@X=("%r12","%r13","%r14"); 61@X=("%r12","%r13","%r14");
43$sp="%r15"; 62$sp="%r15";
44 63
45$frame=160+16*4; 64$stdframe=16*$SIZE_T+4*8;
65$frame=$stdframe+16*4;
46 66
47sub Xupdate { 67sub Xupdate {
48my $i=shift; 68my $i=shift;
49 69
50$code.=<<___ if ($i==15); 70$code.=<<___ if ($i==15);
51 lg $prefetch,160($sp) ### Xupdate(16) warm-up 71 lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up
52 lr $X[0],$X[2] 72 lr $X[0],$X[2]
53___ 73___
54return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle 74return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
@@ -58,8 +78,8 @@ $code.=<<___ if ($i<16);
58___ 78___
59$code.=<<___ if ($i>=16); 79$code.=<<___ if ($i>=16);
60 xgr $X[0],$prefetch ### Xupdate($i) 80 xgr $X[0],$prefetch ### Xupdate($i)
61 lg $prefetch,`160+4*(($i+2)%16)`($sp) 81 lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp)
62 xg $X[0],`160+4*(($i+8)%16)`($sp) 82 xg $X[0],`$stdframe+4*(($i+8)%16)`($sp)
63 xgr $X[0],$prefetch 83 xgr $X[0],$prefetch
64 rll $X[0],$X[0],1 84 rll $X[0],$X[0],1
65 rllg $X[1],$X[0],32 85 rllg $X[1],$X[0],32
@@ -68,7 +88,7 @@ $code.=<<___ if ($i>=16);
68 lr $X[2],$X[1] # feedback 88 lr $X[2],$X[1] # feedback
69___ 89___
70$code.=<<___ if ($i<=70); 90$code.=<<___ if ($i<=70);
71 stg $X[0],`160+4*($i%16)`($sp) 91 stg $X[0],`$stdframe+4*($i%16)`($sp)
72___ 92___
73unshift(@X,pop(@X)); 93unshift(@X,pop(@X));
74} 94}
@@ -148,9 +168,9 @@ $code.=<<___ if ($kimdfunc);
148 tmhl %r0,0x4000 # check for message-security assist 168 tmhl %r0,0x4000 # check for message-security assist
149 jz .Lsoftware 169 jz .Lsoftware
150 lghi %r0,0 170 lghi %r0,0
151 la %r1,16($sp) 171 la %r1,`2*$SIZE_T`($sp)
152 .long 0xb93e0002 # kimd %r0,%r2 172 .long 0xb93e0002 # kimd %r0,%r2
153 lg %r0,16($sp) 173 lg %r0,`2*$SIZE_T`($sp)
154 tmhh %r0,`0x8000>>$kimdfunc` 174 tmhh %r0,`0x8000>>$kimdfunc`
155 jz .Lsoftware 175 jz .Lsoftware
156 lghi %r0,$kimdfunc 176 lghi %r0,$kimdfunc
@@ -165,11 +185,11 @@ $code.=<<___ if ($kimdfunc);
165___ 185___
166$code.=<<___; 186$code.=<<___;
167 lghi %r1,-$frame 187 lghi %r1,-$frame
168 stg $ctx,16($sp) 188 st${g} $ctx,`2*$SIZE_T`($sp)
169 stmg %r6,%r15,48($sp) 189 stm${g} %r6,%r15,`6*$SIZE_T`($sp)
170 lgr %r0,$sp 190 lgr %r0,$sp
171 la $sp,0(%r1,$sp) 191 la $sp,0(%r1,$sp)
172 stg %r0,0($sp) 192 st${g} %r0,0($sp)
173 193
174 larl $t0,Ktable 194 larl $t0,Ktable
175 llgf $A,0($ctx) 195 llgf $A,0($ctx)
@@ -199,7 +219,7 @@ ___
199for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 219for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
200$code.=<<___; 220$code.=<<___;
201 221
202 lg $ctx,`$frame+16`($sp) 222 l${g} $ctx,`$frame+2*$SIZE_T`($sp)
203 la $inp,64($inp) 223 la $inp,64($inp)
204 al $A,0($ctx) 224 al $A,0($ctx)
205 al $B,4($ctx) 225 al $B,4($ctx)
@@ -211,13 +231,13 @@ $code.=<<___;
211 st $C,8($ctx) 231 st $C,8($ctx)
212 st $D,12($ctx) 232 st $D,12($ctx)
213 st $E,16($ctx) 233 st $E,16($ctx)
214 brct $len,.Lloop 234 brct${g} $len,.Lloop
215 235
216 lmg %r6,%r15,`$frame+48`($sp) 236 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
217 br %r14 237 br %r14
218.size sha1_block_data_order,.-sha1_block_data_order 238.size sha1_block_data_order,.-sha1_block_data_order
219.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" 239.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
220.comm OPENSSL_s390xcap_P,8,8 240.comm OPENSSL_s390xcap_P,16,8
221___ 241___
222 242
223$code =~ s/\`([^\`]*)\`/eval $1/gem; 243$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha1-x86_64.pl b/src/lib/libssl/src/crypto/sha/asm/sha1-x86_64.pl
index 4edc5ea9ad..f27c1e3fb0 100755
--- a/src/lib/libssl/src/crypto/sha/asm/sha1-x86_64.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha1-x86_64.pl
@@ -16,7 +16,7 @@
16# There was suggestion to mechanically translate 32-bit code, but I 16# There was suggestion to mechanically translate 32-bit code, but I
17# dismissed it, reasoning that x86_64 offers enough register bank 17# dismissed it, reasoning that x86_64 offers enough register bank
18# capacity to fully utilize SHA-1 parallelism. Therefore this fresh 18# capacity to fully utilize SHA-1 parallelism. Therefore this fresh
19# implementation:-) However! While 64-bit code does performs better 19# implementation:-) However! While 64-bit code does perform better
20# on Opteron, I failed to beat 32-bit assembler on EM64T core. Well, 20# on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
21# x86_64 does offer larger *addressable* bank, but out-of-order core 21# x86_64 does offer larger *addressable* bank, but out-of-order core
22# reaches for even more registers through dynamic aliasing, and EM64T 22# reaches for even more registers through dynamic aliasing, and EM64T
@@ -29,6 +29,38 @@
29# Xeon P4 +65% +0% 9.9 29# Xeon P4 +65% +0% 9.9
30# Core2 +60% +10% 7.0 30# Core2 +60% +10% 7.0
31 31
32# August 2009.
33#
34# The code was revised to minimize code size and to maximize
35# "distance" between instructions producing input to 'lea'
36# instruction and the 'lea' instruction itself, which is essential
37# for Intel Atom core.
38
39# October 2010.
40#
41# Add SSSE3, Supplemental[!] SSE3, implementation. The idea behind it
42# is to offload message schedule denoted by Wt in NIST specification,
43# or Xupdate in OpenSSL source, to SIMD unit. See sha1-586.pl module
44# for background and implementation details. The only difference from
45# 32-bit code is that 64-bit code doesn't have to spill @X[] elements
46# to free temporary registers.
47
48# April 2011.
49#
50# Add AVX code path. See sha1-586.pl for further information.
51
52######################################################################
53# Current performance is summarized in following table. Numbers are
54# CPU clock cycles spent to process single byte (less is better).
55#
56# x86_64 SSSE3 AVX
57# P4 9.8 -
58# Opteron 6.6 -
59# Core2 6.7 6.1/+10% -
60# Atom 11.0 9.7/+13% -
61# Westmere 7.1 5.6/+27% -
62# Sandy Bridge 7.9 6.3/+25% 5.2/+51%
63
32$flavour = shift; 64$flavour = shift;
33$output = shift; 65$output = shift;
34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 66if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -40,6 +72,16 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 72( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
41die "can't locate x86_64-xlate.pl"; 73die "can't locate x86_64-xlate.pl";
42 74
75$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
76 =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
77 $1>=2.19);
78$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
79 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
80 $1>=2.09);
81$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
82 `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
83 $1>=10);
84
43open STDOUT,"| $^X $xlate $flavour $output"; 85open STDOUT,"| $^X $xlate $flavour $output";
44 86
45$ctx="%rdi"; # 1st arg 87$ctx="%rdi"; # 1st arg
@@ -51,196 +93,994 @@ $ctx="%r8";
51$inp="%r9"; 93$inp="%r9";
52$num="%r10"; 94$num="%r10";
53 95
54$xi="%eax"; 96$t0="%eax";
55$t0="%ebx"; 97$t1="%ebx";
56$t1="%ecx"; 98$t2="%ecx";
57$A="%edx"; 99@xi=("%edx","%ebp");
58$B="%esi"; 100$A="%esi";
59$C="%edi"; 101$B="%edi";
60$D="%ebp"; 102$C="%r11d";
61$E="%r11d"; 103$D="%r12d";
62$T="%r12d"; 104$E="%r13d";
63
64@V=($A,$B,$C,$D,$E,$T);
65 105
66sub PROLOGUE { 106@V=($A,$B,$C,$D,$E);
67my $func=shift;
68$code.=<<___;
69.globl $func
70.type $func,\@function,3
71.align 16
72$func:
73 push %rbx
74 push %rbp
75 push %r12
76 mov %rsp,%r11
77 mov %rdi,$ctx # reassigned argument
78 sub \$`8+16*4`,%rsp
79 mov %rsi,$inp # reassigned argument
80 and \$-64,%rsp
81 mov %rdx,$num # reassigned argument
82 mov %r11,`16*4`(%rsp)
83.Lprologue:
84
85 mov 0($ctx),$A
86 mov 4($ctx),$B
87 mov 8($ctx),$C
88 mov 12($ctx),$D
89 mov 16($ctx),$E
90___
91}
92
93sub EPILOGUE {
94my $func=shift;
95$code.=<<___;
96 mov `16*4`(%rsp),%rsi
97 mov (%rsi),%r12
98 mov 8(%rsi),%rbp
99 mov 16(%rsi),%rbx
100 lea 24(%rsi),%rsp
101.Lepilogue:
102 ret
103.size $func,.-$func
104___
105}
106 107
107sub BODY_00_19 { 108sub BODY_00_19 {
108my ($i,$a,$b,$c,$d,$e,$f,$host)=@_; 109my ($i,$a,$b,$c,$d,$e)=@_;
109my $j=$i+1; 110my $j=$i+1;
110$code.=<<___ if ($i==0); 111$code.=<<___ if ($i==0);
111 mov `4*$i`($inp),$xi 112 mov `4*$i`($inp),$xi[0]
112 `"bswap $xi" if(!defined($host))` 113 bswap $xi[0]
113 mov $xi,`4*$i`(%rsp) 114 mov $xi[0],`4*$i`(%rsp)
114___ 115___
115$code.=<<___ if ($i<15); 116$code.=<<___ if ($i<15);
116 lea 0x5a827999($xi,$e),$f
117 mov $c,$t0 117 mov $c,$t0
118 mov `4*$j`($inp),$xi 118 mov `4*$j`($inp),$xi[1]
119 mov $a,$e 119 mov $a,$t2
120 xor $d,$t0 120 xor $d,$t0
121 `"bswap $xi" if(!defined($host))` 121 bswap $xi[1]
122 rol \$5,$e 122 rol \$5,$t2
123 lea 0x5a827999($xi[0],$e),$e
123 and $b,$t0 124 and $b,$t0
124 mov $xi,`4*$j`(%rsp) 125 mov $xi[1],`4*$j`(%rsp)
125 add $e,$f 126 add $t2,$e
126 xor $d,$t0 127 xor $d,$t0
127 rol \$30,$b 128 rol \$30,$b
128 add $t0,$f 129 add $t0,$e
129___ 130___
130$code.=<<___ if ($i>=15); 131$code.=<<___ if ($i>=15);
131 lea 0x5a827999($xi,$e),$f 132 mov `4*($j%16)`(%rsp),$xi[1]
132 mov `4*($j%16)`(%rsp),$xi
133 mov $c,$t0 133 mov $c,$t0
134 mov $a,$e 134 mov $a,$t2
135 xor `4*(($j+2)%16)`(%rsp),$xi 135 xor `4*(($j+2)%16)`(%rsp),$xi[1]
136 xor $d,$t0 136 xor $d,$t0
137 rol \$5,$e 137 rol \$5,$t2
138 xor `4*(($j+8)%16)`(%rsp),$xi 138 xor `4*(($j+8)%16)`(%rsp),$xi[1]
139 and $b,$t0 139 and $b,$t0
140 add $e,$f 140 lea 0x5a827999($xi[0],$e),$e
141 xor `4*(($j+13)%16)`(%rsp),$xi 141 xor `4*(($j+13)%16)`(%rsp),$xi[1]
142 xor $d,$t0 142 xor $d,$t0
143 rol \$1,$xi[1]
144 add $t2,$e
143 rol \$30,$b 145 rol \$30,$b
144 add $t0,$f 146 mov $xi[1],`4*($j%16)`(%rsp)
145 rol \$1,$xi 147 add $t0,$e
146 mov $xi,`4*($j%16)`(%rsp)
147___ 148___
149unshift(@xi,pop(@xi));
148} 150}
149 151
150sub BODY_20_39 { 152sub BODY_20_39 {
151my ($i,$a,$b,$c,$d,$e,$f)=@_; 153my ($i,$a,$b,$c,$d,$e)=@_;
152my $j=$i+1; 154my $j=$i+1;
153my $K=($i<40)?0x6ed9eba1:0xca62c1d6; 155my $K=($i<40)?0x6ed9eba1:0xca62c1d6;
154$code.=<<___ if ($i<79); 156$code.=<<___ if ($i<79);
155 lea $K($xi,$e),$f 157 mov `4*($j%16)`(%rsp),$xi[1]
156 mov `4*($j%16)`(%rsp),$xi
157 mov $c,$t0 158 mov $c,$t0
158 mov $a,$e 159 mov $a,$t2
159 xor `4*(($j+2)%16)`(%rsp),$xi 160 xor `4*(($j+2)%16)`(%rsp),$xi[1]
160 xor $b,$t0 161 xor $b,$t0
161 rol \$5,$e 162 rol \$5,$t2
162 xor `4*(($j+8)%16)`(%rsp),$xi 163 lea $K($xi[0],$e),$e
164 xor `4*(($j+8)%16)`(%rsp),$xi[1]
163 xor $d,$t0 165 xor $d,$t0
164 add $e,$f 166 add $t2,$e
165 xor `4*(($j+13)%16)`(%rsp),$xi 167 xor `4*(($j+13)%16)`(%rsp),$xi[1]
166 rol \$30,$b 168 rol \$30,$b
167 add $t0,$f 169 add $t0,$e
168 rol \$1,$xi 170 rol \$1,$xi[1]
169___ 171___
170$code.=<<___ if ($i<76); 172$code.=<<___ if ($i<76);
171 mov $xi,`4*($j%16)`(%rsp) 173 mov $xi[1],`4*($j%16)`(%rsp)
172___ 174___
173$code.=<<___ if ($i==79); 175$code.=<<___ if ($i==79);
174 lea $K($xi,$e),$f
175 mov $c,$t0 176 mov $c,$t0
176 mov $a,$e 177 mov $a,$t2
177 xor $b,$t0 178 xor $b,$t0
178 rol \$5,$e 179 lea $K($xi[0],$e),$e
180 rol \$5,$t2
179 xor $d,$t0 181 xor $d,$t0
180 add $e,$f 182 add $t2,$e
181 rol \$30,$b 183 rol \$30,$b
182 add $t0,$f 184 add $t0,$e
183___ 185___
186unshift(@xi,pop(@xi));
184} 187}
185 188
186sub BODY_40_59 { 189sub BODY_40_59 {
187my ($i,$a,$b,$c,$d,$e,$f)=@_; 190my ($i,$a,$b,$c,$d,$e)=@_;
188my $j=$i+1; 191my $j=$i+1;
189$code.=<<___; 192$code.=<<___;
190 lea 0x8f1bbcdc($xi,$e),$f 193 mov `4*($j%16)`(%rsp),$xi[1]
191 mov `4*($j%16)`(%rsp),$xi 194 mov $c,$t0
192 mov $b,$t0 195 mov $c,$t1
193 mov $b,$t1 196 xor `4*(($j+2)%16)`(%rsp),$xi[1]
194 xor `4*(($j+2)%16)`(%rsp),$xi 197 and $d,$t0
195 mov $a,$e 198 mov $a,$t2
196 and $c,$t0 199 xor `4*(($j+8)%16)`(%rsp),$xi[1]
197 xor `4*(($j+8)%16)`(%rsp),$xi 200 xor $d,$t1
198 or $c,$t1 201 lea 0x8f1bbcdc($xi[0],$e),$e
199 rol \$5,$e 202 rol \$5,$t2
200 xor `4*(($j+13)%16)`(%rsp),$xi 203 xor `4*(($j+13)%16)`(%rsp),$xi[1]
201 and $d,$t1 204 add $t0,$e
202 add $e,$f 205 and $b,$t1
203 rol \$1,$xi 206 rol \$1,$xi[1]
204 or $t1,$t0 207 add $t1,$e
205 rol \$30,$b 208 rol \$30,$b
206 mov $xi,`4*($j%16)`(%rsp) 209 mov $xi[1],`4*($j%16)`(%rsp)
207 add $t0,$f 210 add $t2,$e
208___ 211___
212unshift(@xi,pop(@xi));
209} 213}
210 214
211$code=".text\n"; 215$code.=<<___;
216.text
217.extern OPENSSL_ia32cap_P
212 218
213&PROLOGUE("sha1_block_data_order"); 219.globl sha1_block_data_order
214$code.=".align 4\n.Lloop:\n"; 220.type sha1_block_data_order,\@function,3
221.align 16
222sha1_block_data_order:
223 mov OPENSSL_ia32cap_P+0(%rip),%r9d
224 mov OPENSSL_ia32cap_P+4(%rip),%r8d
225 test \$`1<<9`,%r8d # check SSSE3 bit
226 jz .Lialu
227___
228$code.=<<___ if ($avx);
229 and \$`1<<28`,%r8d # mask AVX bit
230 and \$`1<<30`,%r9d # mask "Intel CPU" bit
231 or %r9d,%r8d
232 cmp \$`1<<28|1<<30`,%r8d
233 je _avx_shortcut
234___
235$code.=<<___;
236 jmp _ssse3_shortcut
237
238.align 16
239.Lialu:
240 push %rbx
241 push %rbp
242 push %r12
243 push %r13
244 mov %rsp,%r11
245 mov %rdi,$ctx # reassigned argument
246 sub \$`8+16*4`,%rsp
247 mov %rsi,$inp # reassigned argument
248 and \$-64,%rsp
249 mov %rdx,$num # reassigned argument
250 mov %r11,`16*4`(%rsp)
251.Lprologue:
252
253 mov 0($ctx),$A
254 mov 4($ctx),$B
255 mov 8($ctx),$C
256 mov 12($ctx),$D
257 mov 16($ctx),$E
258 jmp .Lloop
259
260.align 16
261.Lloop:
262___
215for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 263for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
216for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 264for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
217for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 265for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
218for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 266for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
219$code.=<<___; 267$code.=<<___;
220 add 0($ctx),$E 268 add 0($ctx),$A
221 add 4($ctx),$T 269 add 4($ctx),$B
222 add 8($ctx),$A 270 add 8($ctx),$C
223 add 12($ctx),$B 271 add 12($ctx),$D
224 add 16($ctx),$C 272 add 16($ctx),$E
225 mov $E,0($ctx) 273 mov $A,0($ctx)
226 mov $T,4($ctx) 274 mov $B,4($ctx)
227 mov $A,8($ctx) 275 mov $C,8($ctx)
228 mov $B,12($ctx) 276 mov $D,12($ctx)
229 mov $C,16($ctx) 277 mov $E,16($ctx)
230 278
231 xchg $E,$A # mov $E,$A
232 xchg $T,$B # mov $T,$B
233 xchg $E,$C # mov $A,$C
234 xchg $T,$D # mov $B,$D
235 # mov $C,$E
236 lea `16*4`($inp),$inp
237 sub \$1,$num 279 sub \$1,$num
280 lea `16*4`($inp),$inp
238 jnz .Lloop 281 jnz .Lloop
282
283 mov `16*4`(%rsp),%rsi
284 mov (%rsi),%r13
285 mov 8(%rsi),%r12
286 mov 16(%rsi),%rbp
287 mov 24(%rsi),%rbx
288 lea 32(%rsi),%rsp
289.Lepilogue:
290 ret
291.size sha1_block_data_order,.-sha1_block_data_order
239___ 292___
240&EPILOGUE("sha1_block_data_order"); 293{{{
294my $Xi=4;
295my @X=map("%xmm$_",(4..7,0..3));
296my @Tx=map("%xmm$_",(8..10));
297my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
298my @T=("%esi","%edi");
299my $j=0;
300my $K_XX_XX="%r11";
301
302my $_rol=sub { &rol(@_) };
303my $_ror=sub { &ror(@_) };
304
305$code.=<<___;
306.type sha1_block_data_order_ssse3,\@function,3
307.align 16
308sha1_block_data_order_ssse3:
309_ssse3_shortcut:
310 push %rbx
311 push %rbp
312 push %r12
313 lea `-64-($win64?5*16:0)`(%rsp),%rsp
314___
315$code.=<<___ if ($win64);
316 movaps %xmm6,64+0(%rsp)
317 movaps %xmm7,64+16(%rsp)
318 movaps %xmm8,64+32(%rsp)
319 movaps %xmm9,64+48(%rsp)
320 movaps %xmm10,64+64(%rsp)
321.Lprologue_ssse3:
322___
323$code.=<<___;
324 mov %rdi,$ctx # reassigned argument
325 mov %rsi,$inp # reassigned argument
326 mov %rdx,$num # reassigned argument
327
328 shl \$6,$num
329 add $inp,$num
330 lea K_XX_XX(%rip),$K_XX_XX
331
332 mov 0($ctx),$A # load context
333 mov 4($ctx),$B
334 mov 8($ctx),$C
335 mov 12($ctx),$D
336 mov $B,@T[0] # magic seed
337 mov 16($ctx),$E
338
339 movdqa 64($K_XX_XX),@X[2] # pbswap mask
340 movdqa 0($K_XX_XX),@Tx[1] # K_00_19
341 movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
342 movdqu 16($inp),@X[-3&7]
343 movdqu 32($inp),@X[-2&7]
344 movdqu 48($inp),@X[-1&7]
345 pshufb @X[2],@X[-4&7] # byte swap
346 add \$64,$inp
347 pshufb @X[2],@X[-3&7]
348 pshufb @X[2],@X[-2&7]
349 pshufb @X[2],@X[-1&7]
350 paddd @Tx[1],@X[-4&7] # add K_00_19
351 paddd @Tx[1],@X[-3&7]
352 paddd @Tx[1],@X[-2&7]
353 movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU
354 psubd @Tx[1],@X[-4&7] # restore X[]
355 movdqa @X[-3&7],16(%rsp)
356 psubd @Tx[1],@X[-3&7]
357 movdqa @X[-2&7],32(%rsp)
358 psubd @Tx[1],@X[-2&7]
359 jmp .Loop_ssse3
360___
361
362sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
363{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
364 my $arg = pop;
365 $arg = "\$$arg" if ($arg*1 eq $arg);
366 $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
367}
368
369sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
370{ use integer;
371 my $body = shift;
372 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
373 my ($a,$b,$c,$d,$e);
374
375 &movdqa (@X[0],@X[-3&7]);
376 eval(shift(@insns));
377 eval(shift(@insns));
378 &movdqa (@Tx[0],@X[-1&7]);
379 &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]"
380 eval(shift(@insns));
381 eval(shift(@insns));
382
383 &paddd (@Tx[1],@X[-1&7]);
384 eval(shift(@insns));
385 eval(shift(@insns));
386 &psrldq (@Tx[0],4); # "X[-3]", 3 dwords
387 eval(shift(@insns));
388 eval(shift(@insns));
389 &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
390 eval(shift(@insns));
391 eval(shift(@insns));
392
393 &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
394 eval(shift(@insns));
395 eval(shift(@insns));
396 eval(shift(@insns));
397 eval(shift(@insns));
398
399 &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
400 eval(shift(@insns));
401 eval(shift(@insns));
402 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
403 eval(shift(@insns));
404 eval(shift(@insns));
405
406 &movdqa (@Tx[2],@X[0]);
407 &movdqa (@Tx[0],@X[0]);
408 eval(shift(@insns));
409 eval(shift(@insns));
410 eval(shift(@insns));
411 eval(shift(@insns));
412
413 &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword
414 &paddd (@X[0],@X[0]);
415 eval(shift(@insns));
416 eval(shift(@insns));
417 eval(shift(@insns));
418 eval(shift(@insns));
419
420 &psrld (@Tx[0],31);
421 eval(shift(@insns));
422 eval(shift(@insns));
423 &movdqa (@Tx[1],@Tx[2]);
424 eval(shift(@insns));
425 eval(shift(@insns));
426
427 &psrld (@Tx[2],30);
428 &por (@X[0],@Tx[0]); # "X[0]"<<<=1
429 eval(shift(@insns));
430 eval(shift(@insns));
431 eval(shift(@insns));
432 eval(shift(@insns));
433
434 &pslld (@Tx[1],2);
435 &pxor (@X[0],@Tx[2]);
436 eval(shift(@insns));
437 eval(shift(@insns));
438 &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
439 eval(shift(@insns));
440 eval(shift(@insns));
441
442 &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2
443
444 foreach (@insns) { eval; } # remaining instructions [if any]
445
446 $Xi++; push(@X,shift(@X)); # "rotate" X[]
447 push(@Tx,shift(@Tx));
448}
449
450sub Xupdate_ssse3_32_79()
451{ use integer;
452 my $body = shift;
453 my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
454 my ($a,$b,$c,$d,$e);
455
456 &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8);
457 eval(shift(@insns)); # body_20_39
458 &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
459 &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]"
460 eval(shift(@insns));
461 eval(shift(@insns));
462 eval(shift(@insns)); # rol
463
464 &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
465 eval(shift(@insns));
466 eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
467 if ($Xi%5) {
468 &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
469 } else { # ... or load next one
470 &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
471 }
472 &paddd (@Tx[1],@X[-1&7]);
473 eval(shift(@insns)); # ror
474 eval(shift(@insns));
475
476 &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]"
477 eval(shift(@insns)); # body_20_39
478 eval(shift(@insns));
479 eval(shift(@insns));
480 eval(shift(@insns)); # rol
481
482 &movdqa (@Tx[0],@X[0]);
483 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
484 eval(shift(@insns));
485 eval(shift(@insns));
486 eval(shift(@insns)); # ror
487 eval(shift(@insns));
488
489 &pslld (@X[0],2);
490 eval(shift(@insns)); # body_20_39
491 eval(shift(@insns));
492 &psrld (@Tx[0],30);
493 eval(shift(@insns));
494 eval(shift(@insns)); # rol
495 eval(shift(@insns));
496 eval(shift(@insns));
497 eval(shift(@insns)); # ror
498 eval(shift(@insns));
499
500 &por (@X[0],@Tx[0]); # "X[0]"<<<=2
501 eval(shift(@insns)); # body_20_39
502 eval(shift(@insns));
503 &movdqa (@Tx[1],@X[0]) if ($Xi<19);
504 eval(shift(@insns));
505 eval(shift(@insns)); # rol
506 eval(shift(@insns));
507 eval(shift(@insns));
508 eval(shift(@insns)); # rol
509 eval(shift(@insns));
510
511 foreach (@insns) { eval; } # remaining instructions
512
513 $Xi++; push(@X,shift(@X)); # "rotate" X[]
514 push(@Tx,shift(@Tx));
515}
516
517sub Xuplast_ssse3_80()
518{ use integer;
519 my $body = shift;
520 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
521 my ($a,$b,$c,$d,$e);
522
523 eval(shift(@insns));
524 &paddd (@Tx[1],@X[-1&7]);
525 eval(shift(@insns));
526 eval(shift(@insns));
527 eval(shift(@insns));
528 eval(shift(@insns));
529
530 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
531
532 foreach (@insns) { eval; } # remaining instructions
533
534 &cmp ($inp,$num);
535 &je (".Ldone_ssse3");
536
537 unshift(@Tx,pop(@Tx));
538
539 &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask
540 &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19
541 &movdqu (@X[-4&7],"0($inp)"); # load input
542 &movdqu (@X[-3&7],"16($inp)");
543 &movdqu (@X[-2&7],"32($inp)");
544 &movdqu (@X[-1&7],"48($inp)");
545 &pshufb (@X[-4&7],@X[2]); # byte swap
546 &add ($inp,64);
547
548 $Xi=0;
549}
550
551sub Xloop_ssse3()
552{ use integer;
553 my $body = shift;
554 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
555 my ($a,$b,$c,$d,$e);
556
557 eval(shift(@insns));
558 eval(shift(@insns));
559 &pshufb (@X[($Xi-3)&7],@X[2]);
560 eval(shift(@insns));
561 eval(shift(@insns));
562 &paddd (@X[($Xi-4)&7],@Tx[1]);
563 eval(shift(@insns));
564 eval(shift(@insns));
565 eval(shift(@insns));
566 eval(shift(@insns));
567 &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU
568 eval(shift(@insns));
569 eval(shift(@insns));
570 &psubd (@X[($Xi-4)&7],@Tx[1]);
571
572 foreach (@insns) { eval; }
573 $Xi++;
574}
575
576sub Xtail_ssse3()
577{ use integer;
578 my $body = shift;
579 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
580 my ($a,$b,$c,$d,$e);
581
582 foreach (@insns) { eval; }
583}
584
585sub body_00_19 () {
586 (
587 '($a,$b,$c,$d,$e)=@V;'.
588 '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer
589 '&xor ($c,$d);',
590 '&mov (@T[1],$a);', # $b in next round
591 '&$_rol ($a,5);',
592 '&and (@T[0],$c);', # ($b&($c^$d))
593 '&xor ($c,$d);', # restore $c
594 '&xor (@T[0],$d);',
595 '&add ($e,$a);',
596 '&$_ror ($b,$j?7:2);', # $b>>>2
597 '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
598 );
599}
600
601sub body_20_39 () {
602 (
603 '($a,$b,$c,$d,$e)=@V;'.
604 '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
605 '&xor (@T[0],$d);', # ($b^$d)
606 '&mov (@T[1],$a);', # $b in next round
607 '&$_rol ($a,5);',
608 '&xor (@T[0],$c);', # ($b^$d^$c)
609 '&add ($e,$a);',
610 '&$_ror ($b,7);', # $b>>>2
611 '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
612 );
613}
614
615sub body_40_59 () {
616 (
617 '($a,$b,$c,$d,$e)=@V;'.
618 '&mov (@T[1],$c);',
619 '&xor ($c,$d);',
620 '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer
621 '&and (@T[1],$d);',
622 '&and (@T[0],$c);', # ($b&($c^$d))
623 '&$_ror ($b,7);', # $b>>>2
624 '&add ($e,@T[1]);',
625 '&mov (@T[1],$a);', # $b in next round
626 '&$_rol ($a,5);',
627 '&add ($e,@T[0]);',
628 '&xor ($c,$d);', # restore $c
629 '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
630 );
631}
241$code.=<<___; 632$code.=<<___;
242.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
243.align 16 633.align 16
634.Loop_ssse3:
635___
636 &Xupdate_ssse3_16_31(\&body_00_19);
637 &Xupdate_ssse3_16_31(\&body_00_19);
638 &Xupdate_ssse3_16_31(\&body_00_19);
639 &Xupdate_ssse3_16_31(\&body_00_19);
640 &Xupdate_ssse3_32_79(\&body_00_19);
641 &Xupdate_ssse3_32_79(\&body_20_39);
642 &Xupdate_ssse3_32_79(\&body_20_39);
643 &Xupdate_ssse3_32_79(\&body_20_39);
644 &Xupdate_ssse3_32_79(\&body_20_39);
645 &Xupdate_ssse3_32_79(\&body_20_39);
646 &Xupdate_ssse3_32_79(\&body_40_59);
647 &Xupdate_ssse3_32_79(\&body_40_59);
648 &Xupdate_ssse3_32_79(\&body_40_59);
649 &Xupdate_ssse3_32_79(\&body_40_59);
650 &Xupdate_ssse3_32_79(\&body_40_59);
651 &Xupdate_ssse3_32_79(\&body_20_39);
652 &Xuplast_ssse3_80(\&body_20_39); # can jump to "done"
653
654 $saved_j=$j; @saved_V=@V;
655
656 &Xloop_ssse3(\&body_20_39);
657 &Xloop_ssse3(\&body_20_39);
658 &Xloop_ssse3(\&body_20_39);
659
660$code.=<<___;
661 add 0($ctx),$A # update context
662 add 4($ctx),@T[0]
663 add 8($ctx),$C
664 add 12($ctx),$D
665 mov $A,0($ctx)
666 add 16($ctx),$E
667 mov @T[0],4($ctx)
668 mov @T[0],$B # magic seed
669 mov $C,8($ctx)
670 mov $D,12($ctx)
671 mov $E,16($ctx)
672 jmp .Loop_ssse3
673
674.align 16
675.Ldone_ssse3:
676___
677 $j=$saved_j; @V=@saved_V;
678
679 &Xtail_ssse3(\&body_20_39);
680 &Xtail_ssse3(\&body_20_39);
681 &Xtail_ssse3(\&body_20_39);
682
683$code.=<<___;
684 add 0($ctx),$A # update context
685 add 4($ctx),@T[0]
686 add 8($ctx),$C
687 mov $A,0($ctx)
688 add 12($ctx),$D
689 mov @T[0],4($ctx)
690 add 16($ctx),$E
691 mov $C,8($ctx)
692 mov $D,12($ctx)
693 mov $E,16($ctx)
694___
695$code.=<<___ if ($win64);
696 movaps 64+0(%rsp),%xmm6
697 movaps 64+16(%rsp),%xmm7
698 movaps 64+32(%rsp),%xmm8
699 movaps 64+48(%rsp),%xmm9
700 movaps 64+64(%rsp),%xmm10
701___
702$code.=<<___;
703 lea `64+($win64?5*16:0)`(%rsp),%rsi
704 mov 0(%rsi),%r12
705 mov 8(%rsi),%rbp
706 mov 16(%rsi),%rbx
707 lea 24(%rsi),%rsp
708.Lepilogue_ssse3:
709 ret
710.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
711___
712
713if ($avx) {
714my $Xi=4;
715my @X=map("%xmm$_",(4..7,0..3));
716my @Tx=map("%xmm$_",(8..10));
717my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization
718my @T=("%esi","%edi");
719my $j=0;
720my $K_XX_XX="%r11";
721
722my $_rol=sub { &shld(@_[0],@_) };
723my $_ror=sub { &shrd(@_[0],@_) };
724
725$code.=<<___;
726.type sha1_block_data_order_avx,\@function,3
727.align 16
728sha1_block_data_order_avx:
729_avx_shortcut:
730 push %rbx
731 push %rbp
732 push %r12
733 lea `-64-($win64?5*16:0)`(%rsp),%rsp
734___
735$code.=<<___ if ($win64);
736 movaps %xmm6,64+0(%rsp)
737 movaps %xmm7,64+16(%rsp)
738 movaps %xmm8,64+32(%rsp)
739 movaps %xmm9,64+48(%rsp)
740 movaps %xmm10,64+64(%rsp)
741.Lprologue_avx:
742___
743$code.=<<___;
744 mov %rdi,$ctx # reassigned argument
745 mov %rsi,$inp # reassigned argument
746 mov %rdx,$num # reassigned argument
747 vzeroall
748
749 shl \$6,$num
750 add $inp,$num
751 lea K_XX_XX(%rip),$K_XX_XX
752
753 mov 0($ctx),$A # load context
754 mov 4($ctx),$B
755 mov 8($ctx),$C
756 mov 12($ctx),$D
757 mov $B,@T[0] # magic seed
758 mov 16($ctx),$E
759
760 vmovdqa 64($K_XX_XX),@X[2] # pbswap mask
761 vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19
762 vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3]
763 vmovdqu 16($inp),@X[-3&7]
764 vmovdqu 32($inp),@X[-2&7]
765 vmovdqu 48($inp),@X[-1&7]
766 vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap
767 add \$64,$inp
768 vpshufb @X[2],@X[-3&7],@X[-3&7]
769 vpshufb @X[2],@X[-2&7],@X[-2&7]
770 vpshufb @X[2],@X[-1&7],@X[-1&7]
771 vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19
772 vpaddd @Tx[1],@X[-3&7],@X[1]
773 vpaddd @Tx[1],@X[-2&7],@X[2]
774 vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU
775 vmovdqa @X[1],16(%rsp)
776 vmovdqa @X[2],32(%rsp)
777 jmp .Loop_avx
778___
779
780sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
781{ use integer;
782 my $body = shift;
783 my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
784 my ($a,$b,$c,$d,$e);
785
786 eval(shift(@insns));
787 eval(shift(@insns));
788 &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]"
789 eval(shift(@insns));
790 eval(shift(@insns));
791
792 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
793 eval(shift(@insns));
794 eval(shift(@insns));
795 &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords
796 eval(shift(@insns));
797 eval(shift(@insns));
798 &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]"
799 eval(shift(@insns));
800 eval(shift(@insns));
801
802 &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]"
803 eval(shift(@insns));
804 eval(shift(@insns));
805 eval(shift(@insns));
806 eval(shift(@insns));
807
808 &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]"
809 eval(shift(@insns));
810 eval(shift(@insns));
811 &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
812 eval(shift(@insns));
813 eval(shift(@insns));
814
815 &vpsrld (@Tx[0],@X[0],31);
816 eval(shift(@insns));
817 eval(shift(@insns));
818 eval(shift(@insns));
819 eval(shift(@insns));
820
821 &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword
822 &vpaddd (@X[0],@X[0],@X[0]);
823 eval(shift(@insns));
824 eval(shift(@insns));
825 eval(shift(@insns));
826 eval(shift(@insns));
827
828 &vpsrld (@Tx[1],@Tx[2],30);
829 &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1
830 eval(shift(@insns));
831 eval(shift(@insns));
832 eval(shift(@insns));
833 eval(shift(@insns));
834
835 &vpslld (@Tx[2],@Tx[2],2);
836 &vpxor (@X[0],@X[0],@Tx[1]);
837 eval(shift(@insns));
838 eval(shift(@insns));
839 eval(shift(@insns));
840 eval(shift(@insns));
841
842 &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2
843 eval(shift(@insns));
844 eval(shift(@insns));
845 &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX
846 eval(shift(@insns));
847 eval(shift(@insns));
848
849
850 foreach (@insns) { eval; } # remaining instructions [if any]
851
852 $Xi++; push(@X,shift(@X)); # "rotate" X[]
853 push(@Tx,shift(@Tx));
854}
855
856sub Xupdate_avx_32_79()
857{ use integer;
858 my $body = shift;
859 my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions
860 my ($a,$b,$c,$d,$e);
861
862 &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]"
863 &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]"
864 eval(shift(@insns)); # body_20_39
865 eval(shift(@insns));
866 eval(shift(@insns));
867 eval(shift(@insns)); # rol
868
869 &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]"
870 eval(shift(@insns));
871 eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/);
872 if ($Xi%5) {
873 &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
874 } else { # ... or load next one
875 &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
876 }
877 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
878 eval(shift(@insns)); # ror
879 eval(shift(@insns));
880
881 &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]"
882 eval(shift(@insns)); # body_20_39
883 eval(shift(@insns));
884 eval(shift(@insns));
885 eval(shift(@insns)); # rol
886
887 &vpsrld (@Tx[0],@X[0],30);
888 &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
889 eval(shift(@insns));
890 eval(shift(@insns));
891 eval(shift(@insns)); # ror
892 eval(shift(@insns));
893
894 &vpslld (@X[0],@X[0],2);
895 eval(shift(@insns)); # body_20_39
896 eval(shift(@insns));
897 eval(shift(@insns));
898 eval(shift(@insns)); # rol
899 eval(shift(@insns));
900 eval(shift(@insns));
901 eval(shift(@insns)); # ror
902 eval(shift(@insns));
903
904 &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2
905 eval(shift(@insns)); # body_20_39
906 eval(shift(@insns));
907 &vmovdqa (@Tx[1],@X[0]) if ($Xi<19);
908 eval(shift(@insns));
909 eval(shift(@insns)); # rol
910 eval(shift(@insns));
911 eval(shift(@insns));
912 eval(shift(@insns)); # rol
913 eval(shift(@insns));
914
915 foreach (@insns) { eval; } # remaining instructions
916
917 $Xi++; push(@X,shift(@X)); # "rotate" X[]
918 push(@Tx,shift(@Tx));
919}
920
921sub Xuplast_avx_80()
922{ use integer;
923 my $body = shift;
924 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
925 my ($a,$b,$c,$d,$e);
926
927 eval(shift(@insns));
928 &vpaddd (@Tx[1],@Tx[1],@X[-1&7]);
929 eval(shift(@insns));
930 eval(shift(@insns));
931 eval(shift(@insns));
932 eval(shift(@insns));
933
934 &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
935
936 foreach (@insns) { eval; } # remaining instructions
937
938 &cmp ($inp,$num);
939 &je (".Ldone_avx");
940
941 unshift(@Tx,pop(@Tx));
942
943 &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask
944 &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19
945 &vmovdqu(@X[-4&7],"0($inp)"); # load input
946 &vmovdqu(@X[-3&7],"16($inp)");
947 &vmovdqu(@X[-2&7],"32($inp)");
948 &vmovdqu(@X[-1&7],"48($inp)");
949 &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap
950 &add ($inp,64);
951
952 $Xi=0;
953}
954
955sub Xloop_avx()
956{ use integer;
957 my $body = shift;
958 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
959 my ($a,$b,$c,$d,$e);
960
961 eval(shift(@insns));
962 eval(shift(@insns));
963 &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
964 eval(shift(@insns));
965 eval(shift(@insns));
966 &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]);
967 eval(shift(@insns));
968 eval(shift(@insns));
969 eval(shift(@insns));
970 eval(shift(@insns));
971 &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU
972 eval(shift(@insns));
973 eval(shift(@insns));
974
975 foreach (@insns) { eval; }
976 $Xi++;
977}
978
979sub Xtail_avx()
980{ use integer;
981 my $body = shift;
982 my @insns = (&$body,&$body,&$body,&$body); # 32 instructions
983 my ($a,$b,$c,$d,$e);
984
985 foreach (@insns) { eval; }
986}
987
988$code.=<<___;
989.align 16
990.Loop_avx:
991___
992 &Xupdate_avx_16_31(\&body_00_19);
993 &Xupdate_avx_16_31(\&body_00_19);
994 &Xupdate_avx_16_31(\&body_00_19);
995 &Xupdate_avx_16_31(\&body_00_19);
996 &Xupdate_avx_32_79(\&body_00_19);
997 &Xupdate_avx_32_79(\&body_20_39);
998 &Xupdate_avx_32_79(\&body_20_39);
999 &Xupdate_avx_32_79(\&body_20_39);
1000 &Xupdate_avx_32_79(\&body_20_39);
1001 &Xupdate_avx_32_79(\&body_20_39);
1002 &Xupdate_avx_32_79(\&body_40_59);
1003 &Xupdate_avx_32_79(\&body_40_59);
1004 &Xupdate_avx_32_79(\&body_40_59);
1005 &Xupdate_avx_32_79(\&body_40_59);
1006 &Xupdate_avx_32_79(\&body_40_59);
1007 &Xupdate_avx_32_79(\&body_20_39);
1008 &Xuplast_avx_80(\&body_20_39); # can jump to "done"
1009
1010 $saved_j=$j; @saved_V=@V;
1011
1012 &Xloop_avx(\&body_20_39);
1013 &Xloop_avx(\&body_20_39);
1014 &Xloop_avx(\&body_20_39);
1015
1016$code.=<<___;
1017 add 0($ctx),$A # update context
1018 add 4($ctx),@T[0]
1019 add 8($ctx),$C
1020 add 12($ctx),$D
1021 mov $A,0($ctx)
1022 add 16($ctx),$E
1023 mov @T[0],4($ctx)
1024 mov @T[0],$B # magic seed
1025 mov $C,8($ctx)
1026 mov $D,12($ctx)
1027 mov $E,16($ctx)
1028 jmp .Loop_avx
1029
1030.align 16
1031.Ldone_avx:
1032___
1033 $j=$saved_j; @V=@saved_V;
1034
1035 &Xtail_avx(\&body_20_39);
1036 &Xtail_avx(\&body_20_39);
1037 &Xtail_avx(\&body_20_39);
1038
1039$code.=<<___;
1040 vzeroall
1041
1042 add 0($ctx),$A # update context
1043 add 4($ctx),@T[0]
1044 add 8($ctx),$C
1045 mov $A,0($ctx)
1046 add 12($ctx),$D
1047 mov @T[0],4($ctx)
1048 add 16($ctx),$E
1049 mov $C,8($ctx)
1050 mov $D,12($ctx)
1051 mov $E,16($ctx)
1052___
1053$code.=<<___ if ($win64);
1054 movaps 64+0(%rsp),%xmm6
1055 movaps 64+16(%rsp),%xmm7
1056 movaps 64+32(%rsp),%xmm8
1057 movaps 64+48(%rsp),%xmm9
1058 movaps 64+64(%rsp),%xmm10
1059___
1060$code.=<<___;
1061 lea `64+($win64?5*16:0)`(%rsp),%rsi
1062 mov 0(%rsi),%r12
1063 mov 8(%rsi),%rbp
1064 mov 16(%rsi),%rbx
1065 lea 24(%rsi),%rsp
1066.Lepilogue_avx:
1067 ret
1068.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
1069___
1070}
1071$code.=<<___;
1072.align 64
1073K_XX_XX:
1074.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19
1075.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39
1076.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59
1077.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79
1078.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask
1079___
1080}}}
1081$code.=<<___;
1082.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
1083.align 64
244___ 1084___
245 1085
246# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 1086# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
@@ -272,25 +1112,75 @@ se_handler:
272 1112
273 lea .Lprologue(%rip),%r10 1113 lea .Lprologue(%rip),%r10
274 cmp %r10,%rbx # context->Rip<.Lprologue 1114 cmp %r10,%rbx # context->Rip<.Lprologue
275 jb .Lin_prologue 1115 jb .Lcommon_seh_tail
276 1116
277 mov 152($context),%rax # pull context->Rsp 1117 mov 152($context),%rax # pull context->Rsp
278 1118
279 lea .Lepilogue(%rip),%r10 1119 lea .Lepilogue(%rip),%r10
280 cmp %r10,%rbx # context->Rip>=.Lepilogue 1120 cmp %r10,%rbx # context->Rip>=.Lepilogue
281 jae .Lin_prologue 1121 jae .Lcommon_seh_tail
282 1122
283 mov `16*4`(%rax),%rax # pull saved stack pointer 1123 mov `16*4`(%rax),%rax # pull saved stack pointer
284 lea 24(%rax),%rax 1124 lea 32(%rax),%rax
285 1125
286 mov -8(%rax),%rbx 1126 mov -8(%rax),%rbx
287 mov -16(%rax),%rbp 1127 mov -16(%rax),%rbp
288 mov -24(%rax),%r12 1128 mov -24(%rax),%r12
1129 mov -32(%rax),%r13
289 mov %rbx,144($context) # restore context->Rbx 1130 mov %rbx,144($context) # restore context->Rbx
290 mov %rbp,160($context) # restore context->Rbp 1131 mov %rbp,160($context) # restore context->Rbp
291 mov %r12,216($context) # restore context->R12 1132 mov %r12,216($context) # restore context->R12
1133 mov %r13,224($context) # restore context->R13
1134
1135 jmp .Lcommon_seh_tail
1136.size se_handler,.-se_handler
292 1137
293.Lin_prologue: 1138.type ssse3_handler,\@abi-omnipotent
1139.align 16
1140ssse3_handler:
1141 push %rsi
1142 push %rdi
1143 push %rbx
1144 push %rbp
1145 push %r12
1146 push %r13
1147 push %r14
1148 push %r15
1149 pushfq
1150 sub \$64,%rsp
1151
1152 mov 120($context),%rax # pull context->Rax
1153 mov 248($context),%rbx # pull context->Rip
1154
1155 mov 8($disp),%rsi # disp->ImageBase
1156 mov 56($disp),%r11 # disp->HandlerData
1157
1158 mov 0(%r11),%r10d # HandlerData[0]
1159 lea (%rsi,%r10),%r10 # prologue label
1160 cmp %r10,%rbx # context->Rip<prologue label
1161 jb .Lcommon_seh_tail
1162
1163 mov 152($context),%rax # pull context->Rsp
1164
1165 mov 4(%r11),%r10d # HandlerData[1]
1166 lea (%rsi,%r10),%r10 # epilogue label
1167 cmp %r10,%rbx # context->Rip>=epilogue label
1168 jae .Lcommon_seh_tail
1169
1170 lea 64(%rax),%rsi
1171 lea 512($context),%rdi # &context.Xmm6
1172 mov \$10,%ecx
1173 .long 0xa548f3fc # cld; rep movsq
1174 lea `24+64+5*16`(%rax),%rax # adjust stack pointer
1175
1176 mov -8(%rax),%rbx
1177 mov -16(%rax),%rbp
1178 mov -24(%rax),%r12
1179 mov %rbx,144($context) # restore context->Rbx
1180 mov %rbp,160($context) # restore context->Rbp
1181 mov %r12,216($context) # restore cotnext->R12
1182
1183.Lcommon_seh_tail:
294 mov 8(%rax),%rdi 1184 mov 8(%rax),%rdi
295 mov 16(%rax),%rsi 1185 mov 16(%rax),%rsi
296 mov %rax,152($context) # restore context->Rsp 1186 mov %rax,152($context) # restore context->Rsp
@@ -328,19 +1218,38 @@ se_handler:
328 pop %rdi 1218 pop %rdi
329 pop %rsi 1219 pop %rsi
330 ret 1220 ret
331.size se_handler,.-se_handler 1221.size ssse3_handler,.-ssse3_handler
332 1222
333.section .pdata 1223.section .pdata
334.align 4 1224.align 4
335 .rva .LSEH_begin_sha1_block_data_order 1225 .rva .LSEH_begin_sha1_block_data_order
336 .rva .LSEH_end_sha1_block_data_order 1226 .rva .LSEH_end_sha1_block_data_order
337 .rva .LSEH_info_sha1_block_data_order 1227 .rva .LSEH_info_sha1_block_data_order
338 1228 .rva .LSEH_begin_sha1_block_data_order_ssse3
1229 .rva .LSEH_end_sha1_block_data_order_ssse3
1230 .rva .LSEH_info_sha1_block_data_order_ssse3
1231___
1232$code.=<<___ if ($avx);
1233 .rva .LSEH_begin_sha1_block_data_order_avx
1234 .rva .LSEH_end_sha1_block_data_order_avx
1235 .rva .LSEH_info_sha1_block_data_order_avx
1236___
1237$code.=<<___;
339.section .xdata 1238.section .xdata
340.align 8 1239.align 8
341.LSEH_info_sha1_block_data_order: 1240.LSEH_info_sha1_block_data_order:
342 .byte 9,0,0,0 1241 .byte 9,0,0,0
343 .rva se_handler 1242 .rva se_handler
1243.LSEH_info_sha1_block_data_order_ssse3:
1244 .byte 9,0,0,0
1245 .rva ssse3_handler
1246 .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[]
1247___
1248$code.=<<___ if ($avx);
1249.LSEH_info_sha1_block_data_order_avx:
1250 .byte 9,0,0,0
1251 .rva ssse3_handler
1252 .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[]
344___ 1253___
345} 1254}
346 1255
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha256-586.pl b/src/lib/libssl/src/crypto/sha/asm/sha256-586.pl
index ecc8b69c75..928ec53123 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha256-586.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha256-586.pl
@@ -14,8 +14,8 @@
14# Pentium PIII P4 AMD K8 Core2 14# Pentium PIII P4 AMD K8 Core2
15# gcc 46 36 41 27 26 15# gcc 46 36 41 27 26
16# icc 57 33 38 25 23 16# icc 57 33 38 25 23
17# x86 asm 40 30 35 20 20 17# x86 asm 40 30 33 20 18
18# x86_64 asm(*) - - 21 15.8 16.5 18# x86_64 asm(*) - - 21 16 16
19# 19#
20# (*) x86_64 assembler performance is presented for reference 20# (*) x86_64 assembler performance is presented for reference
21# purposes. 21# purposes.
@@ -48,20 +48,19 @@ sub BODY_00_15() {
48 my $in_16_63=shift; 48 my $in_16_63=shift;
49 49
50 &mov ("ecx",$E); 50 &mov ("ecx",$E);
51 &add ($T,&DWP(4*(8+15+16-9),"esp")) if ($in_16_63); # T += X[-7] 51 &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2])
52 &ror ("ecx",6); 52 &ror ("ecx",25-11);
53 &mov ("edi",$E);
54 &ror ("edi",11);
55 &mov ("esi",$Foff); 53 &mov ("esi",$Foff);
56 &xor ("ecx","edi"); 54 &xor ("ecx",$E);
57 &ror ("edi",25-11); 55 &ror ("ecx",11-6);
58 &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0] 56 &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0]
59 &xor ("ecx","edi"); # Sigma1(e) 57 &xor ("ecx",$E);
58 &ror ("ecx",6); # Sigma1(e)
60 &mov ("edi",$Goff); 59 &mov ("edi",$Goff);
61 &add ($T,"ecx"); # T += Sigma1(e) 60 &add ($T,"ecx"); # T += Sigma1(e)
62 &mov ($Eoff,$E); # modulo-scheduled
63 61
64 &xor ("esi","edi"); 62 &xor ("esi","edi");
63 &mov ($Eoff,$E); # modulo-scheduled
65 &mov ("ecx",$A); 64 &mov ("ecx",$A);
66 &and ("esi",$E); 65 &and ("esi",$E);
67 &mov ($E,$Doff); # e becomes d, which is e in next iteration 66 &mov ($E,$Doff); # e becomes d, which is e in next iteration
@@ -69,14 +68,14 @@ sub BODY_00_15() {
69 &mov ("edi",$A); 68 &mov ("edi",$A);
70 &add ($T,"esi"); # T += Ch(e,f,g) 69 &add ($T,"esi"); # T += Ch(e,f,g)
71 70
72 &ror ("ecx",2); 71 &ror ("ecx",22-13);
73 &add ($T,$Hoff); # T += h 72 &add ($T,$Hoff); # T += h
74 &ror ("edi",13); 73 &xor ("ecx",$A);
74 &ror ("ecx",13-2);
75 &mov ("esi",$Boff); 75 &mov ("esi",$Boff);
76 &xor ("ecx","edi"); 76 &xor ("ecx",$A);
77 &ror ("edi",22-13); 77 &ror ("ecx",2); # Sigma0(a)
78 &add ($E,$T); # d += T 78 &add ($E,$T); # d += T
79 &xor ("ecx","edi"); # Sigma0(a)
80 &mov ("edi",$Coff); 79 &mov ("edi",$Coff);
81 80
82 &add ($T,"ecx"); # T += Sigma0(a) 81 &add ($T,"ecx"); # T += Sigma0(a)
@@ -168,23 +167,22 @@ sub BODY_00_15() {
168&set_label("16_63",16); 167&set_label("16_63",16);
169 &mov ("esi",$T); 168 &mov ("esi",$T);
170 &mov ("ecx",&DWP(4*(8+15+16-14),"esp")); 169 &mov ("ecx",&DWP(4*(8+15+16-14),"esp"));
171 &shr ($T,3);
172 &ror ("esi",7);
173 &xor ($T,"esi");
174 &ror ("esi",18-7); 170 &ror ("esi",18-7);
175 &mov ("edi","ecx"); 171 &mov ("edi","ecx");
176 &xor ($T,"esi"); # T = sigma0(X[-15]) 172 &xor ("esi",$T);
173 &ror ("esi",7);
174 &shr ($T,3);
177 175
178 &shr ("ecx",10);
179 &mov ("esi",&DWP(4*(8+15+16),"esp"));
180 &ror ("edi",17);
181 &xor ("ecx","edi");
182 &ror ("edi",19-17); 176 &ror ("edi",19-17);
183 &add ($T,"esi"); # T += X[-16] 177 &xor ($T,"esi"); # T = sigma0(X[-15])
184 &xor ("edi","ecx") # sigma1(X[-2]) 178 &xor ("edi","ecx");
179 &ror ("edi",17);
180 &shr ("ecx",10);
181 &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16]
182 &xor ("edi","ecx"); # sigma1(X[-2])
185 183
186 &add ($T,"edi"); # T += sigma1(X[-2]) 184 &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
187 # &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7], moved to BODY_00_15(1) 185 # &add ($T,"edi"); # T += sigma1(X[-2])
188 # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0] 186 # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0]
189 187
190 &BODY_00_15(1); 188 &BODY_00_15(1);
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha256-armv4.pl b/src/lib/libssl/src/crypto/sha/asm/sha256-armv4.pl
index 492cb62bc0..9c84e8d93c 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha256-armv4.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha256-armv4.pl
@@ -18,11 +18,16 @@
18# Rescheduling for dual-issue pipeline resulted in 22% improvement on 18# Rescheduling for dual-issue pipeline resulted in 22% improvement on
19# Cortex A8 core and ~20 cycles per processed byte. 19# Cortex A8 core and ~20 cycles per processed byte.
20 20
21# February 2011.
22#
23# Profiler-assisted and platform-specific optimization resulted in 16%
24# improvement on Cortex A8 core and ~17 cycles per processed byte.
25
21while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 26while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
22open STDOUT,">$output"; 27open STDOUT,">$output";
23 28
24$ctx="r0"; $t0="r0"; 29$ctx="r0"; $t0="r0";
25$inp="r1"; 30$inp="r1"; $t3="r1";
26$len="r2"; $t1="r2"; 31$len="r2"; $t1="r2";
27$T1="r3"; 32$T1="r3";
28$A="r4"; 33$A="r4";
@@ -46,6 +51,9 @@ sub BODY_00_15 {
46my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 51my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
47 52
48$code.=<<___ if ($i<16); 53$code.=<<___ if ($i<16);
54#if __ARM_ARCH__>=7
55 ldr $T1,[$inp],#4
56#else
49 ldrb $T1,[$inp,#3] @ $i 57 ldrb $T1,[$inp,#3] @ $i
50 ldrb $t2,[$inp,#2] 58 ldrb $t2,[$inp,#2]
51 ldrb $t1,[$inp,#1] 59 ldrb $t1,[$inp,#1]
@@ -53,16 +61,24 @@ $code.=<<___ if ($i<16);
53 orr $T1,$T1,$t2,lsl#8 61 orr $T1,$T1,$t2,lsl#8
54 orr $T1,$T1,$t1,lsl#16 62 orr $T1,$T1,$t1,lsl#16
55 orr $T1,$T1,$t0,lsl#24 63 orr $T1,$T1,$t0,lsl#24
56 `"str $inp,[sp,#17*4]" if ($i==15)` 64#endif
57___ 65___
58$code.=<<___; 66$code.=<<___;
59 ldr $t2,[$Ktbl],#4 @ *K256++
60 mov $t0,$e,ror#$Sigma1[0] 67 mov $t0,$e,ror#$Sigma1[0]
61 str $T1,[sp,#`$i%16`*4] 68 ldr $t2,[$Ktbl],#4 @ *K256++
62 eor $t0,$t0,$e,ror#$Sigma1[1] 69 eor $t0,$t0,$e,ror#$Sigma1[1]
63 eor $t1,$f,$g 70 eor $t1,$f,$g
71#if $i>=16
72 add $T1,$T1,$t3 @ from BODY_16_xx
73#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
74 rev $T1,$T1
75#endif
76#if $i==15
77 str $inp,[sp,#17*4] @ leave room for $t3
78#endif
64 eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) 79 eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e)
65 and $t1,$t1,$e 80 and $t1,$t1,$e
81 str $T1,[sp,#`$i%16`*4]
66 add $T1,$T1,$t0 82 add $T1,$T1,$t0
67 eor $t1,$t1,$g @ Ch(e,f,g) 83 eor $t1,$t1,$g @ Ch(e,f,g)
68 add $T1,$T1,$h 84 add $T1,$T1,$h
@@ -71,6 +87,9 @@ $code.=<<___;
71 eor $h,$h,$a,ror#$Sigma0[1] 87 eor $h,$h,$a,ror#$Sigma0[1]
72 add $T1,$T1,$t2 88 add $T1,$T1,$t2
73 eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) 89 eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a)
90#if $i>=15
91 ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx
92#endif
74 orr $t0,$a,$b 93 orr $t0,$a,$b
75 and $t1,$a,$b 94 and $t1,$a,$b
76 and $t0,$t0,$c 95 and $t0,$t0,$c
@@ -85,24 +104,26 @@ sub BODY_16_XX {
85my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 104my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
86 105
87$code.=<<___; 106$code.=<<___;
88 ldr $t1,[sp,#`($i+1)%16`*4] @ $i 107 @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i
89 ldr $t2,[sp,#`($i+14)%16`*4] 108 ldr $t2,[sp,#`($i+14)%16`*4]
109 mov $t0,$t3,ror#$sigma0[0]
90 ldr $T1,[sp,#`($i+0)%16`*4] 110 ldr $T1,[sp,#`($i+0)%16`*4]
91 mov $t0,$t1,ror#$sigma0[0] 111 eor $t0,$t0,$t3,ror#$sigma0[1]
92 ldr $inp,[sp,#`($i+9)%16`*4] 112 ldr $t1,[sp,#`($i+9)%16`*4]
93 eor $t0,$t0,$t1,ror#$sigma0[1] 113 eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1])
94 eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 114 mov $t3,$t2,ror#$sigma1[0]
95 mov $t1,$t2,ror#$sigma1[0]
96 add $T1,$T1,$t0 115 add $T1,$T1,$t0
97 eor $t1,$t1,$t2,ror#$sigma1[1] 116 eor $t3,$t3,$t2,ror#$sigma1[1]
98 add $T1,$T1,$inp
99 eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
100 add $T1,$T1,$t1 117 add $T1,$T1,$t1
118 eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14])
119 @ add $T1,$T1,$t3
101___ 120___
102 &BODY_00_15(@_); 121 &BODY_00_15(@_);
103} 122}
104 123
105$code=<<___; 124$code=<<___;
125#include "arm_arch.h"
126
106.text 127.text
107.code 32 128.code 32
108 129
@@ -132,7 +153,7 @@ K256:
132sha256_block_data_order: 153sha256_block_data_order:
133 sub r3,pc,#8 @ sha256_block_data_order 154 sub r3,pc,#8 @ sha256_block_data_order
134 add $len,$inp,$len,lsl#6 @ len to point at the end of inp 155 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
135 stmdb sp!,{$ctx,$inp,$len,r4-r12,lr} 156 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
136 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 157 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
137 sub $Ktbl,r3,#256 @ K256 158 sub $Ktbl,r3,#256 @ K256
138 sub sp,sp,#16*4 @ alloca(X[16]) 159 sub sp,sp,#16*4 @ alloca(X[16])
@@ -171,10 +192,14 @@ $code.=<<___;
171 bne .Loop 192 bne .Loop
172 193
173 add sp,sp,#`16+3`*4 @ destroy frame 194 add sp,sp,#`16+3`*4 @ destroy frame
174 ldmia sp!,{r4-r12,lr} 195#if __ARM_ARCH__>=5
196 ldmia sp!,{r4-r11,pc}
197#else
198 ldmia sp!,{r4-r11,lr}
175 tst lr,#1 199 tst lr,#1
176 moveq pc,lr @ be binary compatible with V4, yet 200 moveq pc,lr @ be binary compatible with V4, yet
177 bx lr @ interoperable with Thumb ISA:-) 201 bx lr @ interoperable with Thumb ISA:-)
202#endif
178.size sha256_block_data_order,.-sha256_block_data_order 203.size sha256_block_data_order,.-sha256_block_data_order
179.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 204.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
180.align 2 205.align 2
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-armv4.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-armv4.pl
index 3a35861ac6..7faf37b147 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha512-armv4.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-armv4.pl
@@ -18,22 +18,33 @@
18# Rescheduling for dual-issue pipeline resulted in 6% improvement on 18# Rescheduling for dual-issue pipeline resulted in 6% improvement on
19# Cortex A8 core and ~40 cycles per processed byte. 19# Cortex A8 core and ~40 cycles per processed byte.
20 20
21# February 2011.
22#
23# Profiler-assisted and platform-specific optimization resulted in 7%
24# improvement on Coxtex A8 core and ~38 cycles per byte.
25
26# March 2011.
27#
28# Add NEON implementation. On Cortex A8 it was measured to process
29# one byte in 25.5 cycles or 47% faster than integer-only code.
30
21# Byte order [in]dependence. ========================================= 31# Byte order [in]dependence. =========================================
22# 32#
23# Caller is expected to maintain specific *dword* order in h[0-7], 33# Originally caller was expected to maintain specific *dword* order in
24# namely with most significant dword at *lower* address, which is 34# h[0-7], namely with most significant dword at *lower* address, which
25# reflected in below two parameters. *Byte* order within these dwords 35# was reflected in below two parameters as 0 and 4. Now caller is
26# in turn is whatever *native* byte order on current platform. 36# expected to maintain native byte order for whole 64-bit values.
27$hi=0; 37$hi="HI";
28$lo=4; 38$lo="LO";
29# ==================================================================== 39# ====================================================================
30 40
31while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 41while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
32open STDOUT,">$output"; 42open STDOUT,">$output";
33 43
34$ctx="r0"; 44$ctx="r0"; # parameter block
35$inp="r1"; 45$inp="r1";
36$len="r2"; 46$len="r2";
47
37$Tlo="r3"; 48$Tlo="r3";
38$Thi="r4"; 49$Thi="r4";
39$Alo="r5"; 50$Alo="r5";
@@ -61,15 +72,17 @@ $Xoff=8*8;
61sub BODY_00_15() { 72sub BODY_00_15() {
62my $magic = shift; 73my $magic = shift;
63$code.=<<___; 74$code.=<<___;
64 ldr $t2,[sp,#$Hoff+0] @ h.lo
65 ldr $t3,[sp,#$Hoff+4] @ h.hi
66 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 75 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
67 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 76 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
68 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 77 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
69 mov $t0,$Elo,lsr#14 78 mov $t0,$Elo,lsr#14
79 str $Tlo,[sp,#$Xoff+0]
70 mov $t1,$Ehi,lsr#14 80 mov $t1,$Ehi,lsr#14
81 str $Thi,[sp,#$Xoff+4]
71 eor $t0,$t0,$Ehi,lsl#18 82 eor $t0,$t0,$Ehi,lsl#18
83 ldr $t2,[sp,#$Hoff+0] @ h.lo
72 eor $t1,$t1,$Elo,lsl#18 84 eor $t1,$t1,$Elo,lsl#18
85 ldr $t3,[sp,#$Hoff+4] @ h.hi
73 eor $t0,$t0,$Elo,lsr#18 86 eor $t0,$t0,$Elo,lsr#18
74 eor $t1,$t1,$Ehi,lsr#18 87 eor $t1,$t1,$Ehi,lsr#18
75 eor $t0,$t0,$Ehi,lsl#14 88 eor $t0,$t0,$Ehi,lsl#14
@@ -96,25 +109,24 @@ $code.=<<___;
96 and $t1,$t1,$Ehi 109 and $t1,$t1,$Ehi
97 str $Ahi,[sp,#$Aoff+4] 110 str $Ahi,[sp,#$Aoff+4]
98 eor $t0,$t0,$t2 111 eor $t0,$t0,$t2
99 ldr $t2,[$Ktbl,#4] @ K[i].lo 112 ldr $t2,[$Ktbl,#$lo] @ K[i].lo
100 eor $t1,$t1,$t3 @ Ch(e,f,g) 113 eor $t1,$t1,$t3 @ Ch(e,f,g)
101 ldr $t3,[$Ktbl,#0] @ K[i].hi 114 ldr $t3,[$Ktbl,#$hi] @ K[i].hi
102 115
103 adds $Tlo,$Tlo,$t0 116 adds $Tlo,$Tlo,$t0
104 ldr $Elo,[sp,#$Doff+0] @ d.lo 117 ldr $Elo,[sp,#$Doff+0] @ d.lo
105 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) 118 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
106 ldr $Ehi,[sp,#$Doff+4] @ d.hi 119 ldr $Ehi,[sp,#$Doff+4] @ d.hi
107 adds $Tlo,$Tlo,$t2 120 adds $Tlo,$Tlo,$t2
121 and $t0,$t2,#0xff
108 adc $Thi,$Thi,$t3 @ T += K[i] 122 adc $Thi,$Thi,$t3 @ T += K[i]
109 adds $Elo,$Elo,$Tlo 123 adds $Elo,$Elo,$Tlo
124 ldr $t2,[sp,#$Boff+0] @ b.lo
110 adc $Ehi,$Ehi,$Thi @ d += T 125 adc $Ehi,$Ehi,$Thi @ d += T
111
112 and $t0,$t2,#0xff
113 teq $t0,#$magic 126 teq $t0,#$magic
114 orreq $Ktbl,$Ktbl,#1
115 127
116 ldr $t2,[sp,#$Boff+0] @ b.lo
117 ldr $t3,[sp,#$Coff+0] @ c.lo 128 ldr $t3,[sp,#$Coff+0] @ c.lo
129 orreq $Ktbl,$Ktbl,#1
118 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 130 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
119 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 131 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
120 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 132 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
@@ -131,80 +143,100 @@ $code.=<<___;
131 eor $t0,$t0,$Alo,lsl#25 143 eor $t0,$t0,$Alo,lsl#25
132 eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) 144 eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
133 adds $Tlo,$Tlo,$t0 145 adds $Tlo,$Tlo,$t0
146 and $t0,$Alo,$t2
134 adc $Thi,$Thi,$t1 @ T += Sigma0(a) 147 adc $Thi,$Thi,$t1 @ T += Sigma0(a)
135 148
136 and $t0,$Alo,$t2
137 orr $Alo,$Alo,$t2
138 ldr $t1,[sp,#$Boff+4] @ b.hi 149 ldr $t1,[sp,#$Boff+4] @ b.hi
150 orr $Alo,$Alo,$t2
139 ldr $t2,[sp,#$Coff+4] @ c.hi 151 ldr $t2,[sp,#$Coff+4] @ c.hi
140 and $Alo,$Alo,$t3 152 and $Alo,$Alo,$t3
141 orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
142 and $t3,$Ahi,$t1 153 and $t3,$Ahi,$t1
143 orr $Ahi,$Ahi,$t1 154 orr $Ahi,$Ahi,$t1
155 orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
144 and $Ahi,$Ahi,$t2 156 and $Ahi,$Ahi,$t2
145 orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
146 adds $Alo,$Alo,$Tlo 157 adds $Alo,$Alo,$Tlo
147 adc $Ahi,$Ahi,$Thi @ h += T 158 orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
148
149 sub sp,sp,#8 159 sub sp,sp,#8
160 adc $Ahi,$Ahi,$Thi @ h += T
161 tst $Ktbl,#1
150 add $Ktbl,$Ktbl,#8 162 add $Ktbl,$Ktbl,#8
151___ 163___
152} 164}
153$code=<<___; 165$code=<<___;
166#include "arm_arch.h"
167#ifdef __ARMEL__
168# define LO 0
169# define HI 4
170# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
171#else
172# define HI 0
173# define LO 4
174# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
175#endif
176
154.text 177.text
155.code 32 178.code 32
156.type K512,%object 179.type K512,%object
157.align 5 180.align 5
158K512: 181K512:
159.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd 182WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
160.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc 183WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
161.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 184WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
162.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 185WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
163.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe 186WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
164.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 187WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
165.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 188WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
166.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 189WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
167.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 190WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
168.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 191WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
169.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 192WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
170.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 193WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
171.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 194WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
172.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 195WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
173.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 196WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
174.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 197WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
175.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 198WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
176.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df 199WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
177.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 200WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
178.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b 201WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
179.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 202WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
180.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 203WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
181.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 204WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
182.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 205WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
183.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 206WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
184.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 207WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
185.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb 208WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
186.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 209WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
187.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 210WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
188.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec 211WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
189.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 212WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
190.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b 213WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
191.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 214WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
192.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 215WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
193.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 216WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
194.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b 217WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
195.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 218WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
196.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c 219WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
197.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a 220WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
198.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 221WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
199.size K512,.-K512 222.size K512,.-K512
223.LOPENSSL_armcap:
224.word OPENSSL_armcap_P-sha512_block_data_order
225.skip 32-4
200 226
201.global sha512_block_data_order 227.global sha512_block_data_order
202.type sha512_block_data_order,%function 228.type sha512_block_data_order,%function
203sha512_block_data_order: 229sha512_block_data_order:
204 sub r3,pc,#8 @ sha512_block_data_order 230 sub r3,pc,#8 @ sha512_block_data_order
205 add $len,$inp,$len,lsl#7 @ len to point at the end of inp 231 add $len,$inp,$len,lsl#7 @ len to point at the end of inp
232#if __ARM_ARCH__>=7
233 ldr r12,.LOPENSSL_armcap
234 ldr r12,[r3,r12] @ OPENSSL_armcap_P
235 tst r12,#1
236 bne .LNEON
237#endif
206 stmdb sp!,{r4-r12,lr} 238 stmdb sp!,{r4-r12,lr}
207 sub $Ktbl,r3,#640 @ K512 239 sub $Ktbl,r3,#672 @ K512
208 sub sp,sp,#9*8 240 sub sp,sp,#9*8
209 241
210 ldr $Elo,[$ctx,#$Eoff+$lo] 242 ldr $Elo,[$ctx,#$Eoff+$lo]
@@ -238,6 +270,7 @@ sha512_block_data_order:
238 str $Thi,[sp,#$Foff+4] 270 str $Thi,[sp,#$Foff+4]
239 271
240.L00_15: 272.L00_15:
273#if __ARM_ARCH__<7
241 ldrb $Tlo,[$inp,#7] 274 ldrb $Tlo,[$inp,#7]
242 ldrb $t0, [$inp,#6] 275 ldrb $t0, [$inp,#6]
243 ldrb $t1, [$inp,#5] 276 ldrb $t1, [$inp,#5]
@@ -252,26 +285,30 @@ sha512_block_data_order:
252 orr $Thi,$Thi,$t3,lsl#8 285 orr $Thi,$Thi,$t3,lsl#8
253 orr $Thi,$Thi,$t0,lsl#16 286 orr $Thi,$Thi,$t0,lsl#16
254 orr $Thi,$Thi,$t1,lsl#24 287 orr $Thi,$Thi,$t1,lsl#24
255 str $Tlo,[sp,#$Xoff+0] 288#else
256 str $Thi,[sp,#$Xoff+4] 289 ldr $Tlo,[$inp,#4]
290 ldr $Thi,[$inp],#8
291#ifdef __ARMEL__
292 rev $Tlo,$Tlo
293 rev $Thi,$Thi
294#endif
295#endif
257___ 296___
258 &BODY_00_15(0x94); 297 &BODY_00_15(0x94);
259$code.=<<___; 298$code.=<<___;
260 tst $Ktbl,#1 299 tst $Ktbl,#1
261 beq .L00_15 300 beq .L00_15
262 bic $Ktbl,$Ktbl,#1
263
264.L16_79:
265 ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] 301 ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
266 ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] 302 ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
267 ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] 303 bic $Ktbl,$Ktbl,#1
268 ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] 304.L16_79:
269
270 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 305 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
271 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 306 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
272 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 307 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
273 mov $Tlo,$t0,lsr#1 308 mov $Tlo,$t0,lsr#1
309 ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
274 mov $Thi,$t1,lsr#1 310 mov $Thi,$t1,lsr#1
311 ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
275 eor $Tlo,$Tlo,$t1,lsl#31 312 eor $Tlo,$Tlo,$t1,lsl#31
276 eor $Thi,$Thi,$t0,lsl#31 313 eor $Thi,$Thi,$t0,lsl#31
277 eor $Tlo,$Tlo,$t0,lsr#8 314 eor $Tlo,$Tlo,$t0,lsr#8
@@ -295,25 +332,24 @@ $code.=<<___;
295 eor $t1,$t1,$t3,lsl#3 332 eor $t1,$t1,$t3,lsl#3
296 eor $t0,$t0,$t2,lsr#6 333 eor $t0,$t0,$t2,lsr#6
297 eor $t1,$t1,$t3,lsr#6 334 eor $t1,$t1,$t3,lsr#6
335 ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
298 eor $t0,$t0,$t3,lsl#26 336 eor $t0,$t0,$t3,lsl#26
299 337
300 ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
301 ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] 338 ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
302 adds $Tlo,$Tlo,$t0 339 adds $Tlo,$Tlo,$t0
340 ldr $t0,[sp,#`$Xoff+8*16`+0]
303 adc $Thi,$Thi,$t1 341 adc $Thi,$Thi,$t1
304 342
305 ldr $t0,[sp,#`$Xoff+8*16`+0]
306 ldr $t1,[sp,#`$Xoff+8*16`+4] 343 ldr $t1,[sp,#`$Xoff+8*16`+4]
307 adds $Tlo,$Tlo,$t2 344 adds $Tlo,$Tlo,$t2
308 adc $Thi,$Thi,$t3 345 adc $Thi,$Thi,$t3
309 adds $Tlo,$Tlo,$t0 346 adds $Tlo,$Tlo,$t0
310 adc $Thi,$Thi,$t1 347 adc $Thi,$Thi,$t1
311 str $Tlo,[sp,#$Xoff+0]
312 str $Thi,[sp,#$Xoff+4]
313___ 348___
314 &BODY_00_15(0x17); 349 &BODY_00_15(0x17);
315$code.=<<___; 350$code.=<<___;
316 tst $Ktbl,#1 351 ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
352 ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
317 beq .L16_79 353 beq .L16_79
318 bic $Ktbl,$Ktbl,#1 354 bic $Ktbl,$Ktbl,#1
319 355
@@ -324,12 +360,12 @@ $code.=<<___;
324 ldr $t2, [$ctx,#$Boff+$lo] 360 ldr $t2, [$ctx,#$Boff+$lo]
325 ldr $t3, [$ctx,#$Boff+$hi] 361 ldr $t3, [$ctx,#$Boff+$hi]
326 adds $t0,$Alo,$t0 362 adds $t0,$Alo,$t0
327 adc $t1,$Ahi,$t1
328 adds $t2,$Tlo,$t2
329 adc $t3,$Thi,$t3
330 str $t0, [$ctx,#$Aoff+$lo] 363 str $t0, [$ctx,#$Aoff+$lo]
364 adc $t1,$Ahi,$t1
331 str $t1, [$ctx,#$Aoff+$hi] 365 str $t1, [$ctx,#$Aoff+$hi]
366 adds $t2,$Tlo,$t2
332 str $t2, [$ctx,#$Boff+$lo] 367 str $t2, [$ctx,#$Boff+$lo]
368 adc $t3,$Thi,$t3
333 str $t3, [$ctx,#$Boff+$hi] 369 str $t3, [$ctx,#$Boff+$hi]
334 370
335 ldr $Alo,[sp,#$Coff+0] 371 ldr $Alo,[sp,#$Coff+0]
@@ -341,12 +377,12 @@ $code.=<<___;
341 ldr $t2, [$ctx,#$Doff+$lo] 377 ldr $t2, [$ctx,#$Doff+$lo]
342 ldr $t3, [$ctx,#$Doff+$hi] 378 ldr $t3, [$ctx,#$Doff+$hi]
343 adds $t0,$Alo,$t0 379 adds $t0,$Alo,$t0
344 adc $t1,$Ahi,$t1
345 adds $t2,$Tlo,$t2
346 adc $t3,$Thi,$t3
347 str $t0, [$ctx,#$Coff+$lo] 380 str $t0, [$ctx,#$Coff+$lo]
381 adc $t1,$Ahi,$t1
348 str $t1, [$ctx,#$Coff+$hi] 382 str $t1, [$ctx,#$Coff+$hi]
383 adds $t2,$Tlo,$t2
349 str $t2, [$ctx,#$Doff+$lo] 384 str $t2, [$ctx,#$Doff+$lo]
385 adc $t3,$Thi,$t3
350 str $t3, [$ctx,#$Doff+$hi] 386 str $t3, [$ctx,#$Doff+$hi]
351 387
352 ldr $Tlo,[sp,#$Foff+0] 388 ldr $Tlo,[sp,#$Foff+0]
@@ -356,12 +392,12 @@ $code.=<<___;
356 ldr $t2, [$ctx,#$Foff+$lo] 392 ldr $t2, [$ctx,#$Foff+$lo]
357 ldr $t3, [$ctx,#$Foff+$hi] 393 ldr $t3, [$ctx,#$Foff+$hi]
358 adds $Elo,$Elo,$t0 394 adds $Elo,$Elo,$t0
359 adc $Ehi,$Ehi,$t1
360 adds $t2,$Tlo,$t2
361 adc $t3,$Thi,$t3
362 str $Elo,[$ctx,#$Eoff+$lo] 395 str $Elo,[$ctx,#$Eoff+$lo]
396 adc $Ehi,$Ehi,$t1
363 str $Ehi,[$ctx,#$Eoff+$hi] 397 str $Ehi,[$ctx,#$Eoff+$hi]
398 adds $t2,$Tlo,$t2
364 str $t2, [$ctx,#$Foff+$lo] 399 str $t2, [$ctx,#$Foff+$lo]
400 adc $t3,$Thi,$t3
365 str $t3, [$ctx,#$Foff+$hi] 401 str $t3, [$ctx,#$Foff+$hi]
366 402
367 ldr $Alo,[sp,#$Goff+0] 403 ldr $Alo,[sp,#$Goff+0]
@@ -373,12 +409,12 @@ $code.=<<___;
373 ldr $t2, [$ctx,#$Hoff+$lo] 409 ldr $t2, [$ctx,#$Hoff+$lo]
374 ldr $t3, [$ctx,#$Hoff+$hi] 410 ldr $t3, [$ctx,#$Hoff+$hi]
375 adds $t0,$Alo,$t0 411 adds $t0,$Alo,$t0
376 adc $t1,$Ahi,$t1
377 adds $t2,$Tlo,$t2
378 adc $t3,$Thi,$t3
379 str $t0, [$ctx,#$Goff+$lo] 412 str $t0, [$ctx,#$Goff+$lo]
413 adc $t1,$Ahi,$t1
380 str $t1, [$ctx,#$Goff+$hi] 414 str $t1, [$ctx,#$Goff+$hi]
415 adds $t2,$Tlo,$t2
381 str $t2, [$ctx,#$Hoff+$lo] 416 str $t2, [$ctx,#$Hoff+$lo]
417 adc $t3,$Thi,$t3
382 str $t3, [$ctx,#$Hoff+$hi] 418 str $t3, [$ctx,#$Hoff+$hi]
383 419
384 add sp,sp,#640 420 add sp,sp,#640
@@ -388,13 +424,156 @@ $code.=<<___;
388 bne .Loop 424 bne .Loop
389 425
390 add sp,sp,#8*9 @ destroy frame 426 add sp,sp,#8*9 @ destroy frame
427#if __ARM_ARCH__>=5
428 ldmia sp!,{r4-r12,pc}
429#else
391 ldmia sp!,{r4-r12,lr} 430 ldmia sp!,{r4-r12,lr}
392 tst lr,#1 431 tst lr,#1
393 moveq pc,lr @ be binary compatible with V4, yet 432 moveq pc,lr @ be binary compatible with V4, yet
394 bx lr @ interoperable with Thumb ISA:-) 433 bx lr @ interoperable with Thumb ISA:-)
395.size sha512_block_data_order,.-sha512_block_data_order 434#endif
396.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" 435___
436
437{
438my @Sigma0=(28,34,39);
439my @Sigma1=(14,18,41);
440my @sigma0=(1, 8, 7);
441my @sigma1=(19,61,6);
442
443my $Ktbl="r3";
444my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch
445
446my @X=map("d$_",(0..15));
447my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
448
449sub NEON_00_15() {
450my $i=shift;
451my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
452my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps
453
454$code.=<<___ if ($i<16 || $i&1);
455 vshr.u64 $t0,$e,#@Sigma1[0] @ $i
456#if $i<16
457 vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
458#endif
459 vshr.u64 $t1,$e,#@Sigma1[1]
460 vshr.u64 $t2,$e,#@Sigma1[2]
461___
462$code.=<<___;
463 vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
464 vsli.64 $t0,$e,#`64-@Sigma1[0]`
465 vsli.64 $t1,$e,#`64-@Sigma1[1]`
466 vsli.64 $t2,$e,#`64-@Sigma1[2]`
467#if $i<16 && defined(__ARMEL__)
468 vrev64.8 @X[$i],@X[$i]
469#endif
470 vadd.i64 $T1,$K,$h
471 veor $Ch,$f,$g
472 veor $t0,$t1
473 vand $Ch,$e
474 veor $t0,$t2 @ Sigma1(e)
475 veor $Ch,$g @ Ch(e,f,g)
476 vadd.i64 $T1,$t0
477 vshr.u64 $t0,$a,#@Sigma0[0]
478 vadd.i64 $T1,$Ch
479 vshr.u64 $t1,$a,#@Sigma0[1]
480 vshr.u64 $t2,$a,#@Sigma0[2]
481 vsli.64 $t0,$a,#`64-@Sigma0[0]`
482 vsli.64 $t1,$a,#`64-@Sigma0[1]`
483 vsli.64 $t2,$a,#`64-@Sigma0[2]`
484 vadd.i64 $T1,@X[$i%16]
485 vorr $Maj,$a,$c
486 vand $Ch,$a,$c
487 veor $h,$t0,$t1
488 vand $Maj,$b
489 veor $h,$t2 @ Sigma0(a)
490 vorr $Maj,$Ch @ Maj(a,b,c)
491 vadd.i64 $h,$T1
492 vadd.i64 $d,$T1
493 vadd.i64 $h,$Maj
494___
495}
496
497sub NEON_16_79() {
498my $i=shift;
499
500if ($i&1) { &NEON_00_15($i,@_); return; }
501
502# 2x-vectorized, therefore runs every 2nd round
503my @X=map("q$_",(0..7)); # view @X as 128-bit vector
504my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps
505my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15
506my $e=@_[4]; # $e from NEON_00_15
507$i /= 2;
508$code.=<<___;
509 vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0]
510 vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1]
511 vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2]
512 vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]`
513 vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1]
514 vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]`
515 veor $s1,$t0
516 vshr.u64 $t0,$s0,#@sigma0[0]
517 veor $s1,$t1 @ sigma1(X[i+14])
518 vshr.u64 $t1,$s0,#@sigma0[1]
519 vadd.i64 @X[$i%8],$s1
520 vshr.u64 $s1,$s0,#@sigma0[2]
521 vsli.64 $t0,$s0,#`64-@sigma0[0]`
522 vsli.64 $t1,$s0,#`64-@sigma0[1]`
523 vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9]
524 veor $s1,$t0
525 vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15
526 vadd.i64 @X[$i%8],$s0
527 vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15
528 veor $s1,$t1 @ sigma0(X[i+1])
529 vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15
530 vadd.i64 @X[$i%8],$s1
531___
532 &NEON_00_15(2*$i,@_);
533}
534
535$code.=<<___;
536#if __ARM_ARCH__>=7
537.fpu neon
538
539.align 4
540.LNEON:
541 dmb @ errata #451034 on early Cortex A8
542 vstmdb sp!,{d8-d15} @ ABI specification says so
543 sub $Ktbl,r3,#672 @ K512
544 vldmia $ctx,{$A-$H} @ load context
545.Loop_neon:
546___
547for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
548$code.=<<___;
549 mov $cnt,#4
550.L16_79_neon:
551 subs $cnt,#1
552___
553for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
554$code.=<<___;
555 bne .L16_79_neon
556
557 vldmia $ctx,{d24-d31} @ load context to temp
558 vadd.i64 q8,q12 @ vectorized accumulate
559 vadd.i64 q9,q13
560 vadd.i64 q10,q14
561 vadd.i64 q11,q15
562 vstmia $ctx,{$A-$H} @ save context
563 teq $inp,$len
564 sub $Ktbl,#640 @ rewind K512
565 bne .Loop_neon
566
567 vldmia sp!,{d8-d15} @ epilogue
568 bx lr
569#endif
570___
571}
572$code.=<<___;
573.size sha512_block_data_order,.-sha512_block_data_order
574.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
397.align 2 575.align 2
576.comm OPENSSL_armcap_P,4,4
398___ 577___
399 578
400$code =~ s/\`([^\`]*)\`/eval $1/gem; 579$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-mips.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-mips.pl
new file mode 100644
index 0000000000..ba5b250890
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-mips.pl
@@ -0,0 +1,455 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA2 block procedures for MIPS.
11
12# October 2010.
13#
14# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc-
15# generated code in o32 build and ~55% in n32/64 build. SHA512 [which
16# for now can only be compiled for MIPS64 ISA] improvement is modest
17# ~17%, but it comes for free, because it's same instruction sequence.
18# Improvement coefficients are for aligned input.
19
20######################################################################
21# There is a number of MIPS ABI in use, O32 and N32/64 are most
22# widely used. Then there is a new contender: NUBI. It appears that if
23# one picks the latter, it's possible to arrange code in ABI neutral
24# manner. Therefore let's stick to NUBI register layout:
25#
26($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
27($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
28($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
29($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
30#
31# The return value is placed in $a0. Following coding rules facilitate
32# interoperability:
33#
34# - never ever touch $tp, "thread pointer", former $gp [o32 can be
35# excluded from the rule, because it's specified volatile];
36# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
37# old code];
38# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
39#
40# For reference here is register layout for N32/64 MIPS ABIs:
41#
42# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
43# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
44# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
45# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
46# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
47#
48$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
49
50if ($flavour =~ /64|n32/i) {
51 $PTR_ADD="dadd"; # incidentally works even on n32
52 $PTR_SUB="dsub"; # incidentally works even on n32
53 $REG_S="sd";
54 $REG_L="ld";
55 $PTR_SLL="dsll"; # incidentally works even on n32
56 $SZREG=8;
57} else {
58 $PTR_ADD="add";
59 $PTR_SUB="sub";
60 $REG_S="sw";
61 $REG_L="lw";
62 $PTR_SLL="sll";
63 $SZREG=4;
64}
65$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
66#
67# <appro@openssl.org>
68#
69######################################################################
70
71$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
72
73for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
74open STDOUT,">$output";
75
76if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
77
78if ($output =~ /512/) {
79 $label="512";
80 $SZ=8;
81 $LD="ld"; # load from memory
82 $ST="sd"; # store to memory
83 $SLL="dsll"; # shift left logical
84 $SRL="dsrl"; # shift right logical
85 $ADDU="daddu";
86 @Sigma0=(28,34,39);
87 @Sigma1=(14,18,41);
88 @sigma0=( 7, 1, 8); # right shift first
89 @sigma1=( 6,19,61); # right shift first
90 $lastK=0x817;
91 $rounds=80;
92} else {
93 $label="256";
94 $SZ=4;
95 $LD="lw"; # load from memory
96 $ST="sw"; # store to memory
97 $SLL="sll"; # shift left logical
98 $SRL="srl"; # shift right logical
99 $ADDU="addu";
100 @Sigma0=( 2,13,22);
101 @Sigma1=( 6,11,25);
102 @sigma0=( 3, 7,18); # right shift first
103 @sigma1=(10,17,19); # right shift first
104 $lastK=0x8f2;
105 $rounds=64;
106}
107
108$MSB = $big_endian ? 0 : ($SZ-1);
109$LSB = ($SZ-1)&~$MSB;
110
111@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31));
112@X=map("\$$_",(8..23));
113
114$ctx=$a0;
115$inp=$a1;
116$len=$a2; $Ktbl=$len;
117
118sub BODY_00_15 {
119my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
120my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
121
122$code.=<<___ if ($i<15);
123 ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp)
124 ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
125___
126$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
127 srl $tmp0,@X[0],24 # byte swap($i)
128 srl $tmp1,@X[0],8
129 andi $tmp2,@X[0],0xFF00
130 sll @X[0],@X[0],24
131 andi $tmp1,0xFF00
132 sll $tmp2,$tmp2,8
133 or @X[0],$tmp0
134 or $tmp1,$tmp2
135 or @X[0],$tmp1
136___
137$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
138 ori $tmp0,$zero,0xFF
139 dsll $tmp2,$tmp0,32
140 or $tmp0,$tmp2 # 0x000000FF000000FF
141 and $tmp1,@X[0],$tmp0 # byte swap($i)
142 dsrl $tmp2,@X[0],24
143 dsll $tmp1,24
144 and $tmp2,$tmp0
145 dsll $tmp0,8 # 0x0000FF000000FF00
146 or $tmp1,$tmp2
147 and $tmp2,@X[0],$tmp0
148 dsrl @X[0],8
149 dsll $tmp2,8
150 and @X[0],$tmp0
151 or $tmp1,$tmp2
152 or @X[0],$tmp1
153 dsrl $tmp1,@X[0],32
154 dsll @X[0],32
155 or @X[0],$tmp1
156___
157$code.=<<___;
158 $ADDU $T1,$X[0],$h # $i
159 $SRL $h,$e,@Sigma1[0]
160 xor $tmp2,$f,$g
161 $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]`
162 and $tmp2,$e
163 $SRL $tmp0,$e,@Sigma1[1]
164 xor $h,$tmp1
165 $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]`
166 xor $h,$tmp0
167 $SRL $tmp0,$e,@Sigma1[2]
168 xor $h,$tmp1
169 $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]`
170 xor $h,$tmp0
171 xor $tmp2,$g # Ch(e,f,g)
172 xor $tmp0,$tmp1,$h # Sigma1(e)
173
174 $SRL $h,$a,@Sigma0[0]
175 $ADDU $T1,$tmp2
176 $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
177 $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]`
178 $ADDU $T1,$tmp0
179 $SRL $tmp0,$a,@Sigma0[1]
180 xor $h,$tmp1
181 $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]`
182 xor $h,$tmp0
183 $SRL $tmp0,$a,@Sigma0[2]
184 xor $h,$tmp1
185 $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
186 xor $h,$tmp0
187 $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
188 xor $h,$tmp1 # Sigma0(a)
189
190 or $tmp0,$a,$b
191 and $tmp1,$a,$b
192 and $tmp0,$c
193 or $tmp1,$tmp0 # Maj(a,b,c)
194 $ADDU $T1,$tmp2 # +=K[$i]
195 $ADDU $h,$tmp1
196
197 $ADDU $d,$T1
198 $ADDU $h,$T1
199___
200$code.=<<___ if ($i>=13);
201 $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer
202___
203}
204
205sub BODY_16_XX {
206my $i=@_[0];
207my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
208
209$code.=<<___;
210 $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
211 $ADDU @X[0],@X[9] # +=X[i+9]
212 $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
213 $SRL $tmp0,@X[1],@sigma0[1]
214 xor $tmp2,$tmp1
215 $SLL $tmp1,`@sigma0[2]-@sigma0[1]`
216 xor $tmp2,$tmp0
217 $SRL $tmp0,@X[1],@sigma0[2]
218 xor $tmp2,$tmp1
219
220 $SRL $tmp3,@X[14],@sigma1[0]
221 xor $tmp2,$tmp0 # sigma0(X[i+1])
222 $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]`
223 $ADDU @X[0],$tmp2
224 $SRL $tmp0,@X[14],@sigma1[1]
225 xor $tmp3,$tmp1
226 $SLL $tmp1,`@sigma1[2]-@sigma1[1]`
227 xor $tmp3,$tmp0
228 $SRL $tmp0,@X[14],@sigma1[2]
229 xor $tmp3,$tmp1
230
231 xor $tmp3,$tmp0 # sigma1(X[i+14])
232 $ADDU @X[0],$tmp3
233___
234 &BODY_00_15(@_);
235}
236
237$FRAMESIZE=16*$SZ+16*$SZREG;
238$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
239
240$code.=<<___;
241#ifdef OPENSSL_FIPSCANISTER
242# include <openssl/fipssyms.h>
243#endif
244
245.text
246.set noat
247#if !defined(__vxworks) || defined(__pic__)
248.option pic2
249#endif
250
251.align 5
252.globl sha${label}_block_data_order
253.ent sha${label}_block_data_order
254sha${label}_block_data_order:
255 .frame $sp,$FRAMESIZE,$ra
256 .mask $SAVED_REGS_MASK,-$SZREG
257 .set noreorder
258___
259$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
260 .cpload $pf
261___
262$code.=<<___;
263 $PTR_SUB $sp,$FRAMESIZE
264 $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
265 $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
266 $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
267 $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
268 $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
269 $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
270 $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
271 $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
272 $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
273 $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
274___
275$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
276 $REG_S $s3,$FRAMESIZE-11*$SZREG($sp)
277 $REG_S $s2,$FRAMESIZE-12*$SZREG($sp)
278 $REG_S $s1,$FRAMESIZE-13*$SZREG($sp)
279 $REG_S $s0,$FRAMESIZE-14*$SZREG($sp)
280 $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
281___
282$code.=<<___;
283 $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)`
284___
285$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
286 .cplocal $Ktbl
287 .cpsetup $pf,$zero,sha${label}_block_data_order
288___
289$code.=<<___;
290 .set reorder
291 la $Ktbl,K${label} # PIC-ified 'load address'
292
293 $LD $A,0*$SZ($ctx) # load context
294 $LD $B,1*$SZ($ctx)
295 $LD $C,2*$SZ($ctx)
296 $LD $D,3*$SZ($ctx)
297 $LD $E,4*$SZ($ctx)
298 $LD $F,5*$SZ($ctx)
299 $LD $G,6*$SZ($ctx)
300 $LD $H,7*$SZ($ctx)
301
302 $PTR_ADD @X[15],$inp # pointer to the end of input
303 $REG_S @X[15],16*$SZ($sp)
304 b .Loop
305
306.align 5
307.Loop:
308 ${LD}l @X[0],$MSB($inp)
309 ${LD}r @X[0],$LSB($inp)
310___
311for ($i=0;$i<16;$i++)
312{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
313$code.=<<___;
314 b .L16_xx
315.align 4
316.L16_xx:
317___
318for (;$i<32;$i++)
319{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
320$code.=<<___;
321 and @X[6],0xfff
322 li @X[7],$lastK
323 .set noreorder
324 bne @X[6],@X[7],.L16_xx
325 $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16
326
327 $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input
328 $LD @X[0],0*$SZ($ctx)
329 $LD @X[1],1*$SZ($ctx)
330 $LD @X[2],2*$SZ($ctx)
331 $PTR_ADD $inp,16*$SZ
332 $LD @X[3],3*$SZ($ctx)
333 $ADDU $A,@X[0]
334 $LD @X[4],4*$SZ($ctx)
335 $ADDU $B,@X[1]
336 $LD @X[5],5*$SZ($ctx)
337 $ADDU $C,@X[2]
338 $LD @X[6],6*$SZ($ctx)
339 $ADDU $D,@X[3]
340 $LD @X[7],7*$SZ($ctx)
341 $ADDU $E,@X[4]
342 $ST $A,0*$SZ($ctx)
343 $ADDU $F,@X[5]
344 $ST $B,1*$SZ($ctx)
345 $ADDU $G,@X[6]
346 $ST $C,2*$SZ($ctx)
347 $ADDU $H,@X[7]
348 $ST $D,3*$SZ($ctx)
349 $ST $E,4*$SZ($ctx)
350 $ST $F,5*$SZ($ctx)
351 $ST $G,6*$SZ($ctx)
352 $ST $H,7*$SZ($ctx)
353
354 bnel $inp,@X[15],.Loop
355 $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl
356
357 $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
358 $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
359 $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
360 $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
361 $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
362 $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
363 $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
364 $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
365 $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
366 $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
367___
368$code.=<<___ if ($flavour =~ /nubi/i);
369 $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
370 $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
371 $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
372 $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
373 $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
374___
375$code.=<<___;
376 jr $ra
377 $PTR_ADD $sp,$FRAMESIZE
378.end sha${label}_block_data_order
379
380.rdata
381.align 5
382K${label}:
383___
384if ($SZ==4) {
385$code.=<<___;
386 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
387 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
388 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
389 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
390 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
391 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
392 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
393 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
394 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
395 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
396 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
397 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
398 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
399 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
400 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
401 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
402___
403} else {
404$code.=<<___;
405 .dword 0x428a2f98d728ae22, 0x7137449123ef65cd
406 .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
407 .dword 0x3956c25bf348b538, 0x59f111f1b605d019
408 .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
409 .dword 0xd807aa98a3030242, 0x12835b0145706fbe
410 .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
411 .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
412 .dword 0x9bdc06a725c71235, 0xc19bf174cf692694
413 .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
414 .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
415 .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
416 .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
417 .dword 0x983e5152ee66dfab, 0xa831c66d2db43210
418 .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4
419 .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725
420 .dword 0x06ca6351e003826f, 0x142929670a0e6e70
421 .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926
422 .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
423 .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8
424 .dword 0x81c2c92e47edaee6, 0x92722c851482353b
425 .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001
426 .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30
427 .dword 0xd192e819d6ef5218, 0xd69906245565a910
428 .dword 0xf40e35855771202a, 0x106aa07032bbd1b8
429 .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
430 .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
431 .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
432 .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
433 .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60
434 .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec
435 .dword 0x90befffa23631e28, 0xa4506cebde82bde9
436 .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b
437 .dword 0xca273eceea26619c, 0xd186b8c721c0c207
438 .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
439 .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6
440 .dword 0x113f9804bef90dae, 0x1b710b35131c471b
441 .dword 0x28db77f523047d84, 0x32caab7b40c72493
442 .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
443 .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
444 .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
445___
446}
447$code.=<<___;
448.asciiz "SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
449.align 5
450
451___
452
453$code =~ s/\`([^\`]*)\`/eval $1/gem;
454print $code;
455close STDOUT;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-parisc.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-parisc.pl
new file mode 100755
index 0000000000..e24ee58ae9
--- /dev/null
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-parisc.pl
@@ -0,0 +1,791 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA256/512 block procedure for PA-RISC.
11
12# June 2009.
13#
14# SHA256 performance is >75% better than gcc 3.2 generated code on
15# PA-7100LC. Compared to code generated by vendor compiler this
16# implementation is almost 70% faster in 64-bit build, but delivers
17# virtually same performance in 32-bit build on PA-8600.
18#
19# SHA512 performance is >2.9x better than gcc 3.2 generated code on
20# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21# code is executed on PA-RISC 2.0 processor and switches to 64-bit
22# code path delivering adequate peformance even in "blended" 32-bit
23# build. Though 64-bit code is not any faster than code generated by
24# vendor compiler on PA-8600...
25#
26# Special thanks to polarhome.com for providing HP-UX account.
27
28$flavour = shift;
29$output = shift;
30open STDOUT,">$output";
31
32if ($flavour =~ /64/) {
33 $LEVEL ="2.0W";
34 $SIZE_T =8;
35 $FRAME_MARKER =80;
36 $SAVED_RP =16;
37 $PUSH ="std";
38 $PUSHMA ="std,ma";
39 $POP ="ldd";
40 $POPMB ="ldd,mb";
41} else {
42 $LEVEL ="1.0";
43 $SIZE_T =4;
44 $FRAME_MARKER =48;
45 $SAVED_RP =20;
46 $PUSH ="stw";
47 $PUSHMA ="stwm";
48 $POP ="ldw";
49 $POPMB ="ldwm";
50}
51
52if ($output =~ /512/) {
53 $func="sha512_block_data_order";
54 $SZ=8;
55 @Sigma0=(28,34,39);
56 @Sigma1=(14,18,41);
57 @sigma0=(1, 8, 7);
58 @sigma1=(19,61, 6);
59 $rounds=80;
60 $LAST10BITS=0x017;
61 $LD="ldd";
62 $LDM="ldd,ma";
63 $ST="std";
64} else {
65 $func="sha256_block_data_order";
66 $SZ=4;
67 @Sigma0=( 2,13,22);
68 @Sigma1=( 6,11,25);
69 @sigma0=( 7,18, 3);
70 @sigma1=(17,19,10);
71 $rounds=64;
72 $LAST10BITS=0x0f2;
73 $LD="ldw";
74 $LDM="ldwm";
75 $ST="stw";
76}
77
78$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79 # [+ argument transfer]
80$XOFF=16*$SZ+32; # local variables
81$FRAME+=$XOFF;
82$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
83
84$ctx="%r26"; # zapped by $a0
85$inp="%r25"; # zapped by $a1
86$num="%r24"; # zapped by $t0
87
88$a0 ="%r26";
89$a1 ="%r25";
90$t0 ="%r24";
91$t1 ="%r29";
92$Tbl="%r31";
93
94@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
95
96@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
98
99sub ROUND_00_15 {
100my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
101$code.=<<___;
102 _ror $e,$Sigma1[0],$a0
103 and $f,$e,$t0
104 _ror $e,$Sigma1[1],$a1
105 addl $t1,$h,$h
106 andcm $g,$e,$t1
107 xor $a1,$a0,$a0
108 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109 or $t0,$t1,$t1 ; Ch(e,f,g)
110 addl @X[$i%16],$h,$h
111 xor $a0,$a1,$a1 ; Sigma1(e)
112 addl $t1,$h,$h
113 _ror $a,$Sigma0[0],$a0
114 addl $a1,$h,$h
115
116 _ror $a,$Sigma0[1],$a1
117 and $a,$b,$t0
118 and $a,$c,$t1
119 xor $a1,$a0,$a0
120 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
121 xor $t1,$t0,$t0
122 and $b,$c,$t1
123 xor $a0,$a1,$a1 ; Sigma0(a)
124 addl $h,$d,$d
125 xor $t1,$t0,$t0 ; Maj(a,b,c)
126 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
127 addl $a1,$h,$h
128 addl $t0,$h,$h
129
130___
131}
132
133sub ROUND_16_xx {
134my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
135$i-=16;
136$code.=<<___;
137 _ror @X[($i+1)%16],$sigma0[0],$a0
138 _ror @X[($i+1)%16],$sigma0[1],$a1
139 addl @X[($i+9)%16],@X[$i],@X[$i]
140 _ror @X[($i+14)%16],$sigma1[0],$t0
141 _ror @X[($i+14)%16],$sigma1[1],$t1
142 xor $a1,$a0,$a0
143 _shr @X[($i+1)%16],$sigma0[2],$a1
144 xor $t1,$t0,$t0
145 _shr @X[($i+14)%16],$sigma1[2],$t1
146 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
147 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
148 $LDM $SZ($Tbl),$t1
149 addl $a0,@X[$i],@X[$i]
150 addl $t0,@X[$i],@X[$i]
151___
152$code.=<<___ if ($i==15);
153 extru $t1,31,10,$a1
154 comiclr,<> $LAST10BITS,$a1,%r0
155 ldo 1($Tbl),$Tbl ; signal end of $Tbl
156___
157&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
158}
159
160$code=<<___;
161 .LEVEL $LEVEL
162 .SPACE \$TEXT\$
163 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
164
165 .ALIGN 64
166L\$table
167___
168$code.=<<___ if ($SZ==8);
169 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
170 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
171 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
172 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
173 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
174 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
175 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
176 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
177 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
178 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
179 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
180 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
181 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
182 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
183 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
184 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
185 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
186 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
187 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
188 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
189 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
190 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
191 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
192 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
193 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
194 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
195 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
196 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
197 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
198 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
199 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
200 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
201 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
202 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
203 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
204 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
205 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
206 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
207 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
208 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
209___
210$code.=<<___ if ($SZ==4);
211 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
212 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
213 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
214 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
215 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
216 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
217 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
218 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
219 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
220 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
221 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
222 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
223 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
224 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
225 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
226 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
227___
228$code.=<<___;
229
230 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
231 .ALIGN 64
232$func
233 .PROC
234 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
235 .ENTRY
236 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
237 $PUSHMA %r3,$FRAME(%sp)
238 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
239 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
240 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
241 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
242 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
243 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
244 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
245 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
246 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
247 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
248 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
249 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
250 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
251 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
252 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
253
254 _shl $num,`log(16*$SZ)/log(2)`,$num
255 addl $inp,$num,$num ; $num to point at the end of $inp
256
257 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
258 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
259 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
260
261 blr %r0,$Tbl
262 ldi 3,$t1
263L\$pic
264 andcm $Tbl,$t1,$Tbl ; wipe privilege level
265 ldo L\$table-L\$pic($Tbl),$Tbl
266___
267$code.=<<___ if ($SZ==8 && $SIZE_T==4);
268 ldi 31,$t1
269 mtctl $t1,%cr11
270 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
271 b L\$parisc1
272 nop
273___
274$code.=<<___;
275 $LD `0*$SZ`($ctx),$A ; load context
276 $LD `1*$SZ`($ctx),$B
277 $LD `2*$SZ`($ctx),$C
278 $LD `3*$SZ`($ctx),$D
279 $LD `4*$SZ`($ctx),$E
280 $LD `5*$SZ`($ctx),$F
281 $LD `6*$SZ`($ctx),$G
282 $LD `7*$SZ`($ctx),$H
283
284 extru $inp,31,`log($SZ)/log(2)`,$t0
285 sh3addl $t0,%r0,$t0
286 subi `8*$SZ`,$t0,$t0
287 mtctl $t0,%cr11 ; load %sar with align factor
288
289L\$oop
290 ldi `$SZ-1`,$t0
291 $LDM $SZ($Tbl),$t1
292 andcm $inp,$t0,$t0 ; align $inp
293___
294 for ($i=0;$i<15;$i++) { # load input block
295 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
296$code.=<<___;
297 cmpb,*= $inp,$t0,L\$aligned
298 $LD `$SZ*15`($t0),@X[15]
299 $LD `$SZ*16`($t0),@X[16]
300___
301 for ($i=0;$i<16;$i++) { # align data
302 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
303$code.=<<___;
304L\$aligned
305 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
306___
307
308for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
309$code.=<<___;
310L\$rounds
311 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
312___
313for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
314$code.=<<___;
315 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
316 nop
317
318 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
319 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
320 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
321 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
322
323 $LD `0*$SZ`($ctx),@X[0] ; load context
324 $LD `1*$SZ`($ctx),@X[1]
325 $LD `2*$SZ`($ctx),@X[2]
326 $LD `3*$SZ`($ctx),@X[3]
327 $LD `4*$SZ`($ctx),@X[4]
328 $LD `5*$SZ`($ctx),@X[5]
329 addl @X[0],$A,$A
330 $LD `6*$SZ`($ctx),@X[6]
331 addl @X[1],$B,$B
332 $LD `7*$SZ`($ctx),@X[7]
333 ldo `16*$SZ`($inp),$inp ; advance $inp
334
335 $ST $A,`0*$SZ`($ctx) ; save context
336 addl @X[2],$C,$C
337 $ST $B,`1*$SZ`($ctx)
338 addl @X[3],$D,$D
339 $ST $C,`2*$SZ`($ctx)
340 addl @X[4],$E,$E
341 $ST $D,`3*$SZ`($ctx)
342 addl @X[5],$F,$F
343 $ST $E,`4*$SZ`($ctx)
344 addl @X[6],$G,$G
345 $ST $F,`5*$SZ`($ctx)
346 addl @X[7],$H,$H
347 $ST $G,`6*$SZ`($ctx)
348 $ST $H,`7*$SZ`($ctx)
349
350 cmpb,*<>,n $inp,$num,L\$oop
351 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
352___
353if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
354{{
355$code.=<<___;
356 b L\$done
357 nop
358
359 .ALIGN 64
360L\$parisc1
361___
362
363@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
364 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
365 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
366 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
367$a0 ="%r17";
368$a1 ="%r18";
369$a2 ="%r19";
370$a3 ="%r20";
371$t0 ="%r21";
372$t1 ="%r22";
373$t2 ="%r28";
374$t3 ="%r29";
375$Tbl="%r31";
376
377@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
378
379sub ROUND_00_15_pa1 {
380my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
381 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
382my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
383
384$code.=<<___ if (!$flag);
385 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
386 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
387___
388$code.=<<___;
389 shd $ehi,$elo,$Sigma1[0],$t0
390 add $Xlo,$hlo,$hlo
391 shd $elo,$ehi,$Sigma1[0],$t1
392 addc $Xhi,$hhi,$hhi ; h += X[i]
393 shd $ehi,$elo,$Sigma1[1],$t2
394 ldwm 8($Tbl),$Xhi
395 shd $elo,$ehi,$Sigma1[1],$t3
396 ldw -4($Tbl),$Xlo ; load K[i]
397 xor $t2,$t0,$t0
398 xor $t3,$t1,$t1
399 and $flo,$elo,$a0
400 and $fhi,$ehi,$a1
401 shd $ehi,$elo,$Sigma1[2],$t2
402 andcm $glo,$elo,$a2
403 shd $elo,$ehi,$Sigma1[2],$t3
404 andcm $ghi,$ehi,$a3
405 xor $t2,$t0,$t0
406 xor $t3,$t1,$t1 ; Sigma1(e)
407 add $Xlo,$hlo,$hlo
408 xor $a2,$a0,$a0
409 addc $Xhi,$hhi,$hhi ; h += K[i]
410 xor $a3,$a1,$a1 ; Ch(e,f,g)
411
412 add $t0,$hlo,$hlo
413 shd $ahi,$alo,$Sigma0[0],$t0
414 addc $t1,$hhi,$hhi ; h += Sigma1(e)
415 shd $alo,$ahi,$Sigma0[0],$t1
416 add $a0,$hlo,$hlo
417 shd $ahi,$alo,$Sigma0[1],$t2
418 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
419 shd $alo,$ahi,$Sigma0[1],$t3
420
421 xor $t2,$t0,$t0
422 xor $t3,$t1,$t1
423 shd $ahi,$alo,$Sigma0[2],$t2
424 and $alo,$blo,$a0
425 shd $alo,$ahi,$Sigma0[2],$t3
426 and $ahi,$bhi,$a1
427 xor $t2,$t0,$t0
428 xor $t3,$t1,$t1 ; Sigma0(a)
429
430 and $alo,$clo,$a2
431 and $ahi,$chi,$a3
432 xor $a2,$a0,$a0
433 add $hlo,$dlo,$dlo
434 xor $a3,$a1,$a1
435 addc $hhi,$dhi,$dhi ; d += h
436 and $blo,$clo,$a2
437 add $t0,$hlo,$hlo
438 and $bhi,$chi,$a3
439 addc $t1,$hhi,$hhi ; h += Sigma0(a)
440 xor $a2,$a0,$a0
441 add $a0,$hlo,$hlo
442 xor $a3,$a1,$a1 ; Maj(a,b,c)
443 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
444
445___
446$code.=<<___ if ($i==15 && $flag);
447 extru $Xlo,31,10,$Xlo
448 comiclr,= $LAST10BITS,$Xlo,%r0
449 b L\$rounds_pa1
450 nop
451___
452push(@X,shift(@X)); push(@X,shift(@X));
453}
454
455sub ROUND_16_xx_pa1 {
456my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
457my ($i)=shift;
458$i-=16;
459$code.=<<___;
460 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
461 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
462 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
463 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
464 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
465 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
466 shd $Xnhi,$Xnlo,$sigma0[0],$t0
467 shd $Xnlo,$Xnhi,$sigma0[0],$t1
468 add $a0,$Xlo,$Xlo
469 shd $Xnhi,$Xnlo,$sigma0[1],$t2
470 addc $a1,$Xhi,$Xhi
471 shd $Xnlo,$Xnhi,$sigma0[1],$t3
472 xor $t2,$t0,$t0
473 shd $Xnhi,$Xnlo,$sigma0[2],$t2
474 xor $t3,$t1,$t1
475 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
476 xor $t2,$t0,$t0
477 shd $a3,$a2,$sigma1[0],$a0
478 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
479 shd $a2,$a3,$sigma1[0],$a1
480 add $t0,$Xlo,$Xlo
481 shd $a3,$a2,$sigma1[1],$t2
482 addc $t1,$Xhi,$Xhi
483 shd $a2,$a3,$sigma1[1],$t3
484 xor $t2,$a0,$a0
485 shd $a3,$a2,$sigma1[2],$t2
486 xor $t3,$a1,$a1
487 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
488 xor $t2,$a0,$a0
489 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
490 add $a0,$Xlo,$Xlo
491 addc $a1,$Xhi,$Xhi
492
493 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
494 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
495___
496&ROUND_00_15_pa1($i,@_,1);
497}
498$code.=<<___;
499 ldw `0*4`($ctx),$Ahi ; load context
500 ldw `1*4`($ctx),$Alo
501 ldw `2*4`($ctx),$Bhi
502 ldw `3*4`($ctx),$Blo
503 ldw `4*4`($ctx),$Chi
504 ldw `5*4`($ctx),$Clo
505 ldw `6*4`($ctx),$Dhi
506 ldw `7*4`($ctx),$Dlo
507 ldw `8*4`($ctx),$Ehi
508 ldw `9*4`($ctx),$Elo
509 ldw `10*4`($ctx),$Fhi
510 ldw `11*4`($ctx),$Flo
511 ldw `12*4`($ctx),$Ghi
512 ldw `13*4`($ctx),$Glo
513 ldw `14*4`($ctx),$Hhi
514 ldw `15*4`($ctx),$Hlo
515
516 extru $inp,31,2,$t0
517 sh3addl $t0,%r0,$t0
518 subi 32,$t0,$t0
519 mtctl $t0,%cr11 ; load %sar with align factor
520
521L\$oop_pa1
522 extru $inp,31,2,$a3
523 comib,= 0,$a3,L\$aligned_pa1
524 sub $inp,$a3,$inp
525
526 ldw `0*4`($inp),$X[0]
527 ldw `1*4`($inp),$X[1]
528 ldw `2*4`($inp),$t2
529 ldw `3*4`($inp),$t3
530 ldw `4*4`($inp),$a0
531 ldw `5*4`($inp),$a1
532 ldw `6*4`($inp),$a2
533 ldw `7*4`($inp),$a3
534 vshd $X[0],$X[1],$X[0]
535 vshd $X[1],$t2,$X[1]
536 stw $X[0],`-$XOFF+0*4`(%sp)
537 ldw `8*4`($inp),$t0
538 vshd $t2,$t3,$t2
539 stw $X[1],`-$XOFF+1*4`(%sp)
540 ldw `9*4`($inp),$t1
541 vshd $t3,$a0,$t3
542___
543{
544my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
545for ($i=2;$i<=(128/4-8);$i++) {
546$code.=<<___;
547 stw $t[0],`-$XOFF+$i*4`(%sp)
548 ldw `(8+$i)*4`($inp),$t[0]
549 vshd $t[1],$t[2],$t[1]
550___
551push(@t,shift(@t));
552}
553for (;$i<(128/4-1);$i++) {
554$code.=<<___;
555 stw $t[0],`-$XOFF+$i*4`(%sp)
556 vshd $t[1],$t[2],$t[1]
557___
558push(@t,shift(@t));
559}
560$code.=<<___;
561 b L\$collected_pa1
562 stw $t[0],`-$XOFF+$i*4`(%sp)
563
564___
565}
566$code.=<<___;
567L\$aligned_pa1
568 ldw `0*4`($inp),$X[0]
569 ldw `1*4`($inp),$X[1]
570 ldw `2*4`($inp),$t2
571 ldw `3*4`($inp),$t3
572 ldw `4*4`($inp),$a0
573 ldw `5*4`($inp),$a1
574 ldw `6*4`($inp),$a2
575 ldw `7*4`($inp),$a3
576 stw $X[0],`-$XOFF+0*4`(%sp)
577 ldw `8*4`($inp),$t0
578 stw $X[1],`-$XOFF+1*4`(%sp)
579 ldw `9*4`($inp),$t1
580___
581{
582my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
583for ($i=2;$i<(128/4-8);$i++) {
584$code.=<<___;
585 stw $t[0],`-$XOFF+$i*4`(%sp)
586 ldw `(8+$i)*4`($inp),$t[0]
587___
588push(@t,shift(@t));
589}
590for (;$i<128/4;$i++) {
591$code.=<<___;
592 stw $t[0],`-$XOFF+$i*4`(%sp)
593___
594push(@t,shift(@t));
595}
596$code.="L\$collected_pa1\n";
597}
598
599for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
600$code.="L\$rounds_pa1\n";
601for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
602
603$code.=<<___;
604 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
605 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
606 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
607 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
608
609 ldw `0*4`($ctx),$t1 ; update context
610 ldw `1*4`($ctx),$t0
611 ldw `2*4`($ctx),$t3
612 ldw `3*4`($ctx),$t2
613 ldw `4*4`($ctx),$a1
614 ldw `5*4`($ctx),$a0
615 ldw `6*4`($ctx),$a3
616 add $t0,$Alo,$Alo
617 ldw `7*4`($ctx),$a2
618 addc $t1,$Ahi,$Ahi
619 ldw `8*4`($ctx),$t1
620 add $t2,$Blo,$Blo
621 ldw `9*4`($ctx),$t0
622 addc $t3,$Bhi,$Bhi
623 ldw `10*4`($ctx),$t3
624 add $a0,$Clo,$Clo
625 ldw `11*4`($ctx),$t2
626 addc $a1,$Chi,$Chi
627 ldw `12*4`($ctx),$a1
628 add $a2,$Dlo,$Dlo
629 ldw `13*4`($ctx),$a0
630 addc $a3,$Dhi,$Dhi
631 ldw `14*4`($ctx),$a3
632 add $t0,$Elo,$Elo
633 ldw `15*4`($ctx),$a2
634 addc $t1,$Ehi,$Ehi
635 stw $Ahi,`0*4`($ctx)
636 add $t2,$Flo,$Flo
637 stw $Alo,`1*4`($ctx)
638 addc $t3,$Fhi,$Fhi
639 stw $Bhi,`2*4`($ctx)
640 add $a0,$Glo,$Glo
641 stw $Blo,`3*4`($ctx)
642 addc $a1,$Ghi,$Ghi
643 stw $Chi,`4*4`($ctx)
644 add $a2,$Hlo,$Hlo
645 stw $Clo,`5*4`($ctx)
646 addc $a3,$Hhi,$Hhi
647 stw $Dhi,`6*4`($ctx)
648 ldo `16*$SZ`($inp),$inp ; advance $inp
649 stw $Dlo,`7*4`($ctx)
650 stw $Ehi,`8*4`($ctx)
651 stw $Elo,`9*4`($ctx)
652 stw $Fhi,`10*4`($ctx)
653 stw $Flo,`11*4`($ctx)
654 stw $Ghi,`12*4`($ctx)
655 stw $Glo,`13*4`($ctx)
656 stw $Hhi,`14*4`($ctx)
657 comb,= $inp,$num,L\$done
658 stw $Hlo,`15*4`($ctx)
659 b L\$oop_pa1
660 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
661L\$done
662___
663}}
664$code.=<<___;
665 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
666 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
667 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
668 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
669 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
670 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
671 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
672 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
673 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
674 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
675 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
676 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
677 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
678 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
679 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
680 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
681 bv (%r2)
682 .EXIT
683 $POPMB -$FRAME(%sp),%r3
684 .PROCEND
685 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
686___
687
688# Explicitly encode PA-RISC 2.0 instructions used in this module, so
689# that it can be compiled with .LEVEL 1.0. It should be noted that I
690# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
691# directive...
692
693my $ldd = sub {
694 my ($mod,$args) = @_;
695 my $orig = "ldd$mod\t$args";
696
697 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
698 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
699 $opcode|=(1<<3) if ($mod =~ /^,m/);
700 $opcode|=(1<<2) if ($mod =~ /^,mb/);
701 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
702 }
703 else { "\t".$orig; }
704};
705
706my $std = sub {
707 my ($mod,$args) = @_;
708 my $orig = "std$mod\t$args";
709
710 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
711 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
712 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
713 }
714 else { "\t".$orig; }
715};
716
717my $extrd = sub {
718 my ($mod,$args) = @_;
719 my $orig = "extrd$mod\t$args";
720
721 # I only have ",u" completer, it's implicitly encoded...
722 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
723 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
724 my $len=32-$3;
725 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
726 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
727 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
728 }
729 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
730 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
731 my $len=32-$2;
732 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
733 $opcode |= (1<<13) if ($mod =~ /,\**=/);
734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
735 }
736 else { "\t".$orig; }
737};
738
739my $shrpd = sub {
740 my ($mod,$args) = @_;
741 my $orig = "shrpd$mod\t$args";
742
743 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
744 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
745 my $cpos=63-$3;
746 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
747 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
748 }
749 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
750 { sprintf "\t.WORD\t0x%08x\t; %s",
751 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
752 }
753 else { "\t".$orig; }
754};
755
756sub assemble {
757 my ($mnemonic,$mod,$args)=@_;
758 my $opcode = eval("\$$mnemonic");
759
760 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
761}
762
763foreach (split("\n",$code)) {
764 s/\`([^\`]*)\`/eval $1/ge;
765
766 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
767 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
768 : sprintf("shd\t%$1,%$2,%d",$3)/e or
769 # translate made up instructons: _ror, _shr, _align, _shl
770 s/_ror(\s+)(%r[0-9]+),/
771 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
772
773 s/_shr(\s+%r[0-9]+),([0-9]+),/
774 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
775 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
776
777 s/_align(\s+%r[0-9]+,%r[0-9]+),/
778 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
779
780 s/_shl(\s+%r[0-9]+),([0-9]+),/
781 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
782 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
783
784 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
785
786 s/cmpb,\*/comb,/ if ($SIZE_T==4);
787
788 print $_,"\n";
789}
790
791close STDOUT;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-ppc.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-ppc.pl
index 768a6a6fad..6b44a68e59 100755
--- a/src/lib/libssl/src/crypto/sha/asm/sha512-ppc.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-ppc.pl
@@ -40,6 +40,7 @@ $output =shift;
40 40
41if ($flavour =~ /64/) { 41if ($flavour =~ /64/) {
42 $SIZE_T=8; 42 $SIZE_T=8;
43 $LRSAVE=2*$SIZE_T;
43 $STU="stdu"; 44 $STU="stdu";
44 $UCMP="cmpld"; 45 $UCMP="cmpld";
45 $SHL="sldi"; 46 $SHL="sldi";
@@ -47,6 +48,7 @@ if ($flavour =~ /64/) {
47 $PUSH="std"; 48 $PUSH="std";
48} elsif ($flavour =~ /32/) { 49} elsif ($flavour =~ /32/) {
49 $SIZE_T=4; 50 $SIZE_T=4;
51 $LRSAVE=$SIZE_T;
50 $STU="stwu"; 52 $STU="stwu";
51 $UCMP="cmplw"; 53 $UCMP="cmplw";
52 $SHL="slwi"; 54 $SHL="slwi";
@@ -87,7 +89,8 @@ if ($output =~ /512/) {
87 $SHR="srwi"; 89 $SHR="srwi";
88} 90}
89 91
90$FRAME=32*$SIZE_T; 92$FRAME=32*$SIZE_T+16*$SZ;
93$LOCALS=6*$SIZE_T;
91 94
92$sp ="r1"; 95$sp ="r1";
93$toc="r2"; 96$toc="r2";
@@ -179,13 +182,12 @@ $code=<<___;
179.globl $func 182.globl $func
180.align 6 183.align 6
181$func: 184$func:
185 $STU $sp,-$FRAME($sp)
182 mflr r0 186 mflr r0
183 $STU $sp,`-($FRAME+16*$SZ)`($sp)
184 $SHL $num,$num,`log(16*$SZ)/log(2)` 187 $SHL $num,$num,`log(16*$SZ)/log(2)`
185 188
186 $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) 189 $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
187 190
188 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
189 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 191 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
190 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 192 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
191 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 193 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
@@ -206,6 +208,7 @@ $func:
206 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 208 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
207 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 209 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
208 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 210 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
211 $PUSH r0,`$FRAME+$LRSAVE`($sp)
209 212
210 $LD $A,`0*$SZ`($ctx) 213 $LD $A,`0*$SZ`($ctx)
211 mr $inp,r4 ; incarnate $inp 214 mr $inp,r4 ; incarnate $inp
@@ -217,7 +220,7 @@ $func:
217 $LD $G,`6*$SZ`($ctx) 220 $LD $G,`6*$SZ`($ctx)
218 $LD $H,`7*$SZ`($ctx) 221 $LD $H,`7*$SZ`($ctx)
219 222
220 b LPICmeup 223 bl LPICmeup
221LPICedup: 224LPICedup:
222 andi. r0,$inp,3 225 andi. r0,$inp,3
223 bne Lunaligned 226 bne Lunaligned
@@ -226,40 +229,14 @@ Laligned:
226 $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer 229 $PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
227 $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer 230 $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
228 bl Lsha2_block_private 231 bl Lsha2_block_private
229Ldone: 232 b Ldone
230 $POP r0,`$FRAME-$SIZE_T*21`($sp)
231 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
232 $POP r13,`$FRAME-$SIZE_T*19`($sp)
233 $POP r14,`$FRAME-$SIZE_T*18`($sp)
234 $POP r15,`$FRAME-$SIZE_T*17`($sp)
235 $POP r16,`$FRAME-$SIZE_T*16`($sp)
236 $POP r17,`$FRAME-$SIZE_T*15`($sp)
237 $POP r18,`$FRAME-$SIZE_T*14`($sp)
238 $POP r19,`$FRAME-$SIZE_T*13`($sp)
239 $POP r20,`$FRAME-$SIZE_T*12`($sp)
240 $POP r21,`$FRAME-$SIZE_T*11`($sp)
241 $POP r22,`$FRAME-$SIZE_T*10`($sp)
242 $POP r23,`$FRAME-$SIZE_T*9`($sp)
243 $POP r24,`$FRAME-$SIZE_T*8`($sp)
244 $POP r25,`$FRAME-$SIZE_T*7`($sp)
245 $POP r26,`$FRAME-$SIZE_T*6`($sp)
246 $POP r27,`$FRAME-$SIZE_T*5`($sp)
247 $POP r28,`$FRAME-$SIZE_T*4`($sp)
248 $POP r29,`$FRAME-$SIZE_T*3`($sp)
249 $POP r30,`$FRAME-$SIZE_T*2`($sp)
250 $POP r31,`$FRAME-$SIZE_T*1`($sp)
251 mtlr r0
252 addi $sp,$sp,`$FRAME+16*$SZ`
253 blr
254___
255 233
256# PowerPC specification allows an implementation to be ill-behaved 234; PowerPC specification allows an implementation to be ill-behaved
257# upon unaligned access which crosses page boundary. "Better safe 235; upon unaligned access which crosses page boundary. "Better safe
258# than sorry" principle makes me treat it specially. But I don't 236; than sorry" principle makes me treat it specially. But I don't
259# look for particular offending word, but rather for the input 237; look for particular offending word, but rather for the input
260# block which crosses the boundary. Once found that block is aligned 238; block which crosses the boundary. Once found that block is aligned
261# and hashed separately... 239; and hashed separately...
262$code.=<<___;
263.align 4 240.align 4
264Lunaligned: 241Lunaligned:
265 subfic $t1,$inp,4096 242 subfic $t1,$inp,4096
@@ -278,7 +255,7 @@ Lunaligned:
278Lcross_page: 255Lcross_page:
279 li $t1,`16*$SZ/4` 256 li $t1,`16*$SZ/4`
280 mtctr $t1 257 mtctr $t1
281 addi r20,$sp,$FRAME ; aligned spot below the frame 258 addi r20,$sp,$LOCALS ; aligned spot below the frame
282Lmemcpy: 259Lmemcpy:
283 lbz r16,0($inp) 260 lbz r16,0($inp)
284 lbz r17,1($inp) 261 lbz r17,1($inp)
@@ -293,8 +270,8 @@ Lmemcpy:
293 bdnz Lmemcpy 270 bdnz Lmemcpy
294 271
295 $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp 272 $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
296 addi $t1,$sp,`$FRAME+16*$SZ` ; fictitious end pointer 273 addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
297 addi $inp,$sp,$FRAME ; fictitious inp pointer 274 addi $inp,$sp,$LOCALS ; fictitious inp pointer
298 $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num 275 $PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
299 $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer 276 $PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
300 $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer 277 $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
@@ -303,10 +280,36 @@ Lmemcpy:
303 $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num 280 $POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
304 addic. $num,$num,`-16*$SZ` ; num-- 281 addic. $num,$num,`-16*$SZ` ; num--
305 bne- Lunaligned 282 bne- Lunaligned
306 b Ldone
307___
308 283
309$code.=<<___; 284Ldone:
285 $POP r0,`$FRAME+$LRSAVE`($sp)
286 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
287 $POP r13,`$FRAME-$SIZE_T*19`($sp)
288 $POP r14,`$FRAME-$SIZE_T*18`($sp)
289 $POP r15,`$FRAME-$SIZE_T*17`($sp)
290 $POP r16,`$FRAME-$SIZE_T*16`($sp)
291 $POP r17,`$FRAME-$SIZE_T*15`($sp)
292 $POP r18,`$FRAME-$SIZE_T*14`($sp)
293 $POP r19,`$FRAME-$SIZE_T*13`($sp)
294 $POP r20,`$FRAME-$SIZE_T*12`($sp)
295 $POP r21,`$FRAME-$SIZE_T*11`($sp)
296 $POP r22,`$FRAME-$SIZE_T*10`($sp)
297 $POP r23,`$FRAME-$SIZE_T*9`($sp)
298 $POP r24,`$FRAME-$SIZE_T*8`($sp)
299 $POP r25,`$FRAME-$SIZE_T*7`($sp)
300 $POP r26,`$FRAME-$SIZE_T*6`($sp)
301 $POP r27,`$FRAME-$SIZE_T*5`($sp)
302 $POP r28,`$FRAME-$SIZE_T*4`($sp)
303 $POP r29,`$FRAME-$SIZE_T*3`($sp)
304 $POP r30,`$FRAME-$SIZE_T*2`($sp)
305 $POP r31,`$FRAME-$SIZE_T*1`($sp)
306 mtlr r0
307 addi $sp,$sp,$FRAME
308 blr
309 .long 0
310 .byte 0,12,4,1,0x80,18,3,0
311 .long 0
312
310.align 4 313.align 4
311Lsha2_block_private: 314Lsha2_block_private:
312___ 315___
@@ -372,6 +375,8 @@ $code.=<<___;
372 $ST $H,`7*$SZ`($ctx) 375 $ST $H,`7*$SZ`($ctx)
373 bne Lsha2_block_private 376 bne Lsha2_block_private
374 blr 377 blr
378 .long 0
379 .byte 0,12,0x14,0,0,0,0,0
375___ 380___
376 381
377# Ugly hack here, because PPC assembler syntax seem to vary too 382# Ugly hack here, because PPC assembler syntax seem to vary too
@@ -379,22 +384,15 @@ ___
379$code.=<<___; 384$code.=<<___;
380.align 6 385.align 6
381LPICmeup: 386LPICmeup:
382 bl LPIC 387 mflr r0
383 addi $Tbl,$Tbl,`64-4` ; "distance" between . and last nop 388 bcl 20,31,\$+4
384 b LPICedup 389 mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
385 nop 390 addi $Tbl,$Tbl,`64-8`
386 nop 391 mtlr r0
387 nop
388 nop
389 nop
390LPIC: mflr $Tbl
391 blr 392 blr
392 nop 393 .long 0
393 nop 394 .byte 0,12,0x14,0,0,0,0,0
394 nop 395 .space `64-9*4`
395 nop
396 nop
397 nop
398___ 396___
399$code.=<<___ if ($SZ==8); 397$code.=<<___ if ($SZ==8);
400 .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd 398 .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-s390x.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-s390x.pl
index e7ef2d5a9f..079a3fc78a 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha512-s390x.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-s390x.pl
@@ -26,6 +26,26 @@
26# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster 26# favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster
27# than software. 27# than software.
28 28
29# November 2010.
30#
31# Adapt for -m31 build. If kernel supports what's called "highgprs"
32# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
33# instructions and achieve "64-bit" performance even in 31-bit legacy
34# application context. The feature is not specific to any particular
35# processor, as long as it's "z-CPU". Latter implies that the code
36# remains z/Architecture specific. On z900 SHA256 was measured to
37# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3.
38
39$flavour = shift;
40
41if ($flavour =~ /3[12]/) {
42 $SIZE_T=4;
43 $g="";
44} else {
45 $SIZE_T=8;
46 $g="g";
47}
48
29$t0="%r0"; 49$t0="%r0";
30$t1="%r1"; 50$t1="%r1";
31$ctx="%r2"; $t2="%r2"; 51$ctx="%r2"; $t2="%r2";
@@ -44,7 +64,7 @@ $tbl="%r13";
44$T1="%r14"; 64$T1="%r14";
45$sp="%r15"; 65$sp="%r15";
46 66
47$output=shift; 67while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
48open STDOUT,">$output"; 68open STDOUT,">$output";
49 69
50if ($output =~ /512/) { 70if ($output =~ /512/) {
@@ -78,7 +98,8 @@ if ($output =~ /512/) {
78} 98}
79$Func="sha${label}_block_data_order"; 99$Func="sha${label}_block_data_order";
80$Table="K${label}"; 100$Table="K${label}";
81$frame=160+16*$SZ; 101$stdframe=16*$SIZE_T+4*8;
102$frame=$stdframe+16*$SZ;
82 103
83sub BODY_00_15 { 104sub BODY_00_15 {
84my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 105my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
@@ -93,9 +114,9 @@ $code.=<<___;
93 xgr $t0,$t1 114 xgr $t0,$t1
94 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` 115 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]`
95 xgr $t2,$g 116 xgr $t2,$g
96 $ST $T1,`160+$SZ*($i%16)`($sp) 117 $ST $T1,`$stdframe+$SZ*($i%16)`($sp)
97 xgr $t0,$t1 # Sigma1(e) 118 xgr $t0,$t1 # Sigma1(e)
98 la $T1,0($T1,$h) # T1+=h 119 algr $T1,$h # T1+=h
99 ngr $t2,$e 120 ngr $t2,$e
100 lgr $t1,$a 121 lgr $t1,$a
101 algr $T1,$t0 # T1+=Sigma1(e) 122 algr $T1,$t0 # T1+=Sigma1(e)
@@ -113,7 +134,7 @@ $code.=<<___;
113 ngr $t2,$b 134 ngr $t2,$b
114 algr $h,$T1 # h+=T1 135 algr $h,$T1 # h+=T1
115 ogr $t2,$t1 # Maj(a,b,c) 136 ogr $t2,$t1 # Maj(a,b,c)
116 la $d,0($d,$T1) # d+=T1 137 algr $d,$T1 # d+=T1
117 algr $h,$t2 # h+=Maj(a,b,c) 138 algr $h,$t2 # h+=Maj(a,b,c)
118___ 139___
119} 140}
@@ -122,19 +143,19 @@ sub BODY_16_XX {
122my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 143my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
123 144
124$code.=<<___; 145$code.=<<___;
125 $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i 146 $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i
126 $LD $t1,`160+$SZ*(($i+14)%16)`($sp) 147 $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp)
127 $ROT $t0,$T1,$sigma0[0] 148 $ROT $t0,$T1,$sigma0[0]
128 $SHR $T1,$sigma0[2] 149 $SHR $T1,$sigma0[2]
129 $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` 150 $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]`
130 xgr $T1,$t0 151 xgr $T1,$t0
131 $ROT $t0,$t1,$sigma1[0] 152 $ROT $t0,$t1,$sigma1[0]
132 xgr $T1,$t2 # sigma0(X[i+1]) 153 xgr $T1,$t2 # sigma0(X[i+1])
133 $SHR $t1,$sigma1[2] 154 $SHR $t1,$sigma1[2]
134 $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] 155 $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i]
135 xgr $t1,$t0 156 xgr $t1,$t0
136 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` 157 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]`
137 $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] 158 $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9]
138 xgr $t1,$t0 # sigma1(X[i+14]) 159 xgr $t1,$t0 # sigma1(X[i+14])
139 algr $T1,$t1 # +=sigma1(X[i+14]) 160 algr $T1,$t1 # +=sigma1(X[i+14])
140___ 161___
@@ -212,6 +233,7 @@ $code.=<<___;
212.globl $Func 233.globl $Func
213.type $Func,\@function 234.type $Func,\@function
214$Func: 235$Func:
236 sllg $len,$len,`log(16*$SZ)/log(2)`
215___ 237___
216$code.=<<___ if ($kimdfunc); 238$code.=<<___ if ($kimdfunc);
217 larl %r1,OPENSSL_s390xcap_P 239 larl %r1,OPENSSL_s390xcap_P
@@ -219,15 +241,15 @@ $code.=<<___ if ($kimdfunc);
219 tmhl %r0,0x4000 # check for message-security assist 241 tmhl %r0,0x4000 # check for message-security assist
220 jz .Lsoftware 242 jz .Lsoftware
221 lghi %r0,0 243 lghi %r0,0
222 la %r1,16($sp) 244 la %r1,`2*$SIZE_T`($sp)
223 .long 0xb93e0002 # kimd %r0,%r2 245 .long 0xb93e0002 # kimd %r0,%r2
224 lg %r0,16($sp) 246 lg %r0,`2*$SIZE_T`($sp)
225 tmhh %r0,`0x8000>>$kimdfunc` 247 tmhh %r0,`0x8000>>$kimdfunc`
226 jz .Lsoftware 248 jz .Lsoftware
227 lghi %r0,$kimdfunc 249 lghi %r0,$kimdfunc
228 lgr %r1,$ctx 250 lgr %r1,$ctx
229 lgr %r2,$inp 251 lgr %r2,$inp
230 sllg %r3,$len,`log(16*$SZ)/log(2)` 252 lgr %r3,$len
231 .long 0xb93e0002 # kimd %r0,%r2 253 .long 0xb93e0002 # kimd %r0,%r2
232 brc 1,.-4 # pay attention to "partial completion" 254 brc 1,.-4 # pay attention to "partial completion"
233 br %r14 255 br %r14
@@ -235,13 +257,12 @@ $code.=<<___ if ($kimdfunc);
235.Lsoftware: 257.Lsoftware:
236___ 258___
237$code.=<<___; 259$code.=<<___;
238 sllg $len,$len,`log(16*$SZ)/log(2)`
239 lghi %r1,-$frame 260 lghi %r1,-$frame
240 agr $len,$inp 261 la $len,0($len,$inp)
241 stmg $ctx,%r15,16($sp) 262 stm${g} $ctx,%r15,`2*$SIZE_T`($sp)
242 lgr %r0,$sp 263 lgr %r0,$sp
243 la $sp,0(%r1,$sp) 264 la $sp,0(%r1,$sp)
244 stg %r0,0($sp) 265 st${g} %r0,0($sp)
245 266
246 larl $tbl,$Table 267 larl $tbl,$Table
247 $LD $A,`0*$SZ`($ctx) 268 $LD $A,`0*$SZ`($ctx)
@@ -265,7 +286,7 @@ $code.=<<___;
265 clgr $len,$t0 286 clgr $len,$t0
266 jne .Lrounds_16_xx 287 jne .Lrounds_16_xx
267 288
268 lg $ctx,`$frame+16`($sp) 289 l${g} $ctx,`$frame+2*$SIZE_T`($sp)
269 la $inp,`16*$SZ`($inp) 290 la $inp,`16*$SZ`($inp)
270 $ADD $A,`0*$SZ`($ctx) 291 $ADD $A,`0*$SZ`($ctx)
271 $ADD $B,`1*$SZ`($ctx) 292 $ADD $B,`1*$SZ`($ctx)
@@ -283,14 +304,14 @@ $code.=<<___;
283 $ST $F,`5*$SZ`($ctx) 304 $ST $F,`5*$SZ`($ctx)
284 $ST $G,`6*$SZ`($ctx) 305 $ST $G,`6*$SZ`($ctx)
285 $ST $H,`7*$SZ`($ctx) 306 $ST $H,`7*$SZ`($ctx)
286 clg $inp,`$frame+32`($sp) 307 cl${g} $inp,`$frame+4*$SIZE_T`($sp)
287 jne .Lloop 308 jne .Lloop
288 309
289 lmg %r6,%r15,`$frame+48`($sp) 310 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
290 br %r14 311 br %r14
291.size $Func,.-$Func 312.size $Func,.-$Func
292.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>" 313.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
293.comm OPENSSL_s390xcap_P,8,8 314.comm OPENSSL_s390xcap_P,16,8
294___ 315___
295 316
296$code =~ s/\`([^\`]*)\`/eval $1/gem; 317$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-sparcv9.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-sparcv9.pl
index ec5d78135e..585740789e 100644
--- a/src/lib/libssl/src/crypto/sha/asm/sha512-sparcv9.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-sparcv9.pl
@@ -305,9 +305,9 @@ $code.=<<___;
305 srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9] 305 srlx @X[(($i+9)/2)%8],32,$tmp1 ! X[i+9]
306 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) 306 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])
307 srl @X[($i/2)%8],0,$tmp0 307 srl @X[($i/2)%8],0,$tmp0
308 add $tmp2,$tmp1,$tmp1
308 add $xi,$T1,$T1 ! +=X[i] 309 add $xi,$T1,$T1 ! +=X[i]
309 xor $tmp0,@X[($i/2)%8],@X[($i/2)%8] 310 xor $tmp0,@X[($i/2)%8],@X[($i/2)%8]
310 add $tmp2,$T1,$T1
311 add $tmp1,$T1,$T1 311 add $tmp1,$T1,$T1
312 312
313 srl $T1,0,$T1 313 srl $T1,0,$T1
@@ -318,9 +318,9 @@ ___
318$code.=<<___; 318$code.=<<___;
319 srlx @X[($i/2)%8],32,$tmp1 ! X[i] 319 srlx @X[($i/2)%8],32,$tmp1 ! X[i]
320 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14]) 320 xor $tmp0,$tmp2,$tmp2 ! sigma1(X[i+14])
321 srl @X[($i/2)%8],0,@X[($i/2)%8]
322 add $xi,$T1,$T1 ! +=X[i+9] 321 add $xi,$T1,$T1 ! +=X[i+9]
323 add $tmp2,$T1,$T1 322 add $tmp2,$tmp1,$tmp1
323 srl @X[($i/2)%8],0,@X[($i/2)%8]
324 add $tmp1,$T1,$T1 324 add $tmp1,$T1,$T1
325 325
326 sllx $T1,32,$tmp0 326 sllx $T1,32,$tmp0
diff --git a/src/lib/libssl/src/crypto/sha/asm/sha512-x86_64.pl b/src/lib/libssl/src/crypto/sha/asm/sha512-x86_64.pl
index e6643f8cf6..f611a2d898 100755
--- a/src/lib/libssl/src/crypto/sha/asm/sha512-x86_64.pl
+++ b/src/lib/libssl/src/crypto/sha/asm/sha512-x86_64.pl
@@ -95,50 +95,44 @@ sub ROUND_00_15()
95{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 95{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
96 96
97$code.=<<___; 97$code.=<<___;
98 mov $e,$a0 98 ror \$`$Sigma1[2]-$Sigma1[1]`,$a0
99 mov $e,$a1
100 mov $f,$a2 99 mov $f,$a2
100 mov $T1,`$SZ*($i&0xf)`(%rsp)
101 101
102 ror \$$Sigma1[0],$a0 102 ror \$`$Sigma0[2]-$Sigma0[1]`,$a1
103 ror \$$Sigma1[1],$a1 103 xor $e,$a0
104 xor $g,$a2 # f^g 104 xor $g,$a2 # f^g
105 105
106 xor $a1,$a0 106 ror \$`$Sigma1[1]-$Sigma1[0]`,$a0
107 ror \$`$Sigma1[2]-$Sigma1[1]`,$a1 107 add $h,$T1 # T1+=h
108 xor $a,$a1
109
110 add ($Tbl,$round,$SZ),$T1 # T1+=K[round]
108 and $e,$a2 # (f^g)&e 111 and $e,$a2 # (f^g)&e
109 mov $T1,`$SZ*($i&0xf)`(%rsp) 112 mov $b,$h
110 113
111 xor $a1,$a0 # Sigma1(e) 114 ror \$`$Sigma0[1]-$Sigma0[0]`,$a1
115 xor $e,$a0
112 xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g 116 xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g
113 add $h,$T1 # T1+=h
114
115 mov $a,$h
116 add $a0,$T1 # T1+=Sigma1(e)
117 117
118 xor $c,$h # b^c
119 xor $a,$a1
118 add $a2,$T1 # T1+=Ch(e,f,g) 120 add $a2,$T1 # T1+=Ch(e,f,g)
119 mov $a,$a0 121 mov $b,$a2
120 mov $a,$a1
121 122
122 ror \$$Sigma0[0],$h 123 ror \$$Sigma1[0],$a0 # Sigma1(e)
123 ror \$$Sigma0[1],$a0 124 and $a,$h # h=(b^c)&a
124 mov $a,$a2 125 and $c,$a2 # b&c
125 add ($Tbl,$round,$SZ),$T1 # T1+=K[round]
126 126
127 xor $a0,$h 127 ror \$$Sigma0[0],$a1 # Sigma0(a)
128 ror \$`$Sigma0[2]-$Sigma0[1]`,$a0 128 add $a0,$T1 # T1+=Sigma1(e)
129 or $c,$a1 # a|c 129 add $a2,$h # h+=b&c (completes +=Maj(a,b,c)
130 130
131 xor $a0,$h # h=Sigma0(a)
132 and $c,$a2 # a&c
133 add $T1,$d # d+=T1 131 add $T1,$d # d+=T1
134
135 and $b,$a1 # (a|c)&b
136 add $T1,$h # h+=T1 132 add $T1,$h # h+=T1
137
138 or $a2,$a1 # Maj(a,b,c)=((a|c)&b)|(a&c)
139 lea 1($round),$round # round++ 133 lea 1($round),$round # round++
134 add $a1,$h # h+=Sigma0(a)
140 135
141 add $a1,$h # h+=Maj(a,b,c)
142___ 136___
143} 137}
144 138
@@ -147,32 +141,30 @@ sub ROUND_16_XX()
147 141
148$code.=<<___; 142$code.=<<___;
149 mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 143 mov `$SZ*(($i+1)&0xf)`(%rsp),$a0
150 mov `$SZ*(($i+14)&0xf)`(%rsp),$T1 144 mov `$SZ*(($i+14)&0xf)`(%rsp),$a1
151 145 mov $a0,$T1
152 mov $a0,$a2 146 mov $a1,$a2
153 147
148 ror \$`$sigma0[1]-$sigma0[0]`,$T1
149 xor $a0,$T1
154 shr \$$sigma0[2],$a0 150 shr \$$sigma0[2],$a0
155 ror \$$sigma0[0],$a2
156
157 xor $a2,$a0
158 ror \$`$sigma0[1]-$sigma0[0]`,$a2
159 151
160 xor $a2,$a0 # sigma0(X[(i+1)&0xf]) 152 ror \$$sigma0[0],$T1
161 mov $T1,$a1 153 xor $T1,$a0 # sigma0(X[(i+1)&0xf])
154 mov `$SZ*(($i+9)&0xf)`(%rsp),$T1
162 155
163 shr \$$sigma1[2],$T1 156 ror \$`$sigma1[1]-$sigma1[0]`,$a2
164 ror \$$sigma1[0],$a1 157 xor $a1,$a2
165 158 shr \$$sigma1[2],$a1
166 xor $a1,$T1
167 ror \$`$sigma1[1]-$sigma1[0]`,$a1
168
169 xor $a1,$T1 # sigma1(X[(i+14)&0xf])
170 159
160 ror \$$sigma1[0],$a2
171 add $a0,$T1 161 add $a0,$T1
172 162 xor $a2,$a1 # sigma1(X[(i+14)&0xf])
173 add `$SZ*(($i+9)&0xf)`(%rsp),$T1
174 163
175 add `$SZ*($i&0xf)`(%rsp),$T1 164 add `$SZ*($i&0xf)`(%rsp),$T1
165 mov $e,$a0
166 add $a1,$T1
167 mov $a,$a1
176___ 168___
177 &ROUND_00_15(@_); 169 &ROUND_00_15(@_);
178} 170}
@@ -219,6 +211,8 @@ $func:
219___ 211___
220 for($i=0;$i<16;$i++) { 212 for($i=0;$i<16;$i++) {
221 $code.=" mov $SZ*$i($inp),$T1\n"; 213 $code.=" mov $SZ*$i($inp),$T1\n";
214 $code.=" mov @ROT[4],$a0\n";
215 $code.=" mov @ROT[0],$a1\n";
222 $code.=" bswap $T1\n"; 216 $code.=" bswap $T1\n";
223 &ROUND_00_15($i,@ROT); 217 &ROUND_00_15($i,@ROT);
224 unshift(@ROT,pop(@ROT)); 218 unshift(@ROT,pop(@ROT));
diff --git a/src/lib/libssl/src/crypto/sha/sha256.c b/src/lib/libssl/src/crypto/sha/sha256.c
index 8952d87673..f88d3d6dad 100644
--- a/src/lib/libssl/src/crypto/sha/sha256.c
+++ b/src/lib/libssl/src/crypto/sha/sha256.c
@@ -16,7 +16,7 @@
16 16
17const char SHA256_version[]="SHA-256" OPENSSL_VERSION_PTEXT; 17const char SHA256_version[]="SHA-256" OPENSSL_VERSION_PTEXT;
18 18
19int SHA224_Init (SHA256_CTX *c) 19fips_md_init_ctx(SHA224, SHA256)
20 { 20 {
21 memset (c,0,sizeof(*c)); 21 memset (c,0,sizeof(*c));
22 c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL; 22 c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL;
@@ -27,7 +27,7 @@ int SHA224_Init (SHA256_CTX *c)
27 return 1; 27 return 1;
28 } 28 }
29 29
30int SHA256_Init (SHA256_CTX *c) 30fips_md_init(SHA256)
31 { 31 {
32 memset (c,0,sizeof(*c)); 32 memset (c,0,sizeof(*c));
33 c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; 33 c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL;
diff --git a/src/lib/libssl/src/crypto/sha/sha512.c b/src/lib/libssl/src/crypto/sha/sha512.c
index cbc0e58c48..50dd7dc744 100644
--- a/src/lib/libssl/src/crypto/sha/sha512.c
+++ b/src/lib/libssl/src/crypto/sha/sha512.c
@@ -59,21 +59,8 @@ const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
59#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 59#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60#endif 60#endif
61 61
62int SHA384_Init (SHA512_CTX *c) 62fips_md_init_ctx(SHA384, SHA512)
63 { 63 {
64#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
65 /* maintain dword order required by assembler module */
66 unsigned int *h = (unsigned int *)c->h;
67
68 h[0] = 0xcbbb9d5d; h[1] = 0xc1059ed8;
69 h[2] = 0x629a292a; h[3] = 0x367cd507;
70 h[4] = 0x9159015a; h[5] = 0x3070dd17;
71 h[6] = 0x152fecd8; h[7] = 0xf70e5939;
72 h[8] = 0x67332667; h[9] = 0xffc00b31;
73 h[10] = 0x8eb44a87; h[11] = 0x68581511;
74 h[12] = 0xdb0c2e0d; h[13] = 0x64f98fa7;
75 h[14] = 0x47b5481d; h[15] = 0xbefa4fa4;
76#else
77 c->h[0]=U64(0xcbbb9d5dc1059ed8); 64 c->h[0]=U64(0xcbbb9d5dc1059ed8);
78 c->h[1]=U64(0x629a292a367cd507); 65 c->h[1]=U64(0x629a292a367cd507);
79 c->h[2]=U64(0x9159015a3070dd17); 66 c->h[2]=U64(0x9159015a3070dd17);
@@ -82,27 +69,14 @@ int SHA384_Init (SHA512_CTX *c)
82 c->h[5]=U64(0x8eb44a8768581511); 69 c->h[5]=U64(0x8eb44a8768581511);
83 c->h[6]=U64(0xdb0c2e0d64f98fa7); 70 c->h[6]=U64(0xdb0c2e0d64f98fa7);
84 c->h[7]=U64(0x47b5481dbefa4fa4); 71 c->h[7]=U64(0x47b5481dbefa4fa4);
85#endif 72
86 c->Nl=0; c->Nh=0; 73 c->Nl=0; c->Nh=0;
87 c->num=0; c->md_len=SHA384_DIGEST_LENGTH; 74 c->num=0; c->md_len=SHA384_DIGEST_LENGTH;
88 return 1; 75 return 1;
89 } 76 }
90 77
91int SHA512_Init (SHA512_CTX *c) 78fips_md_init(SHA512)
92 { 79 {
93#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
94 /* maintain dword order required by assembler module */
95 unsigned int *h = (unsigned int *)c->h;
96
97 h[0] = 0x6a09e667; h[1] = 0xf3bcc908;
98 h[2] = 0xbb67ae85; h[3] = 0x84caa73b;
99 h[4] = 0x3c6ef372; h[5] = 0xfe94f82b;
100 h[6] = 0xa54ff53a; h[7] = 0x5f1d36f1;
101 h[8] = 0x510e527f; h[9] = 0xade682d1;
102 h[10] = 0x9b05688c; h[11] = 0x2b3e6c1f;
103 h[12] = 0x1f83d9ab; h[13] = 0xfb41bd6b;
104 h[14] = 0x5be0cd19; h[15] = 0x137e2179;
105#else
106 c->h[0]=U64(0x6a09e667f3bcc908); 80 c->h[0]=U64(0x6a09e667f3bcc908);
107 c->h[1]=U64(0xbb67ae8584caa73b); 81 c->h[1]=U64(0xbb67ae8584caa73b);
108 c->h[2]=U64(0x3c6ef372fe94f82b); 82 c->h[2]=U64(0x3c6ef372fe94f82b);
@@ -111,7 +85,7 @@ int SHA512_Init (SHA512_CTX *c)
111 c->h[5]=U64(0x9b05688c2b3e6c1f); 85 c->h[5]=U64(0x9b05688c2b3e6c1f);
112 c->h[6]=U64(0x1f83d9abfb41bd6b); 86 c->h[6]=U64(0x1f83d9abfb41bd6b);
113 c->h[7]=U64(0x5be0cd19137e2179); 87 c->h[7]=U64(0x5be0cd19137e2179);
114#endif 88
115 c->Nl=0; c->Nh=0; 89 c->Nl=0; c->Nh=0;
116 c->num=0; c->md_len=SHA512_DIGEST_LENGTH; 90 c->num=0; c->md_len=SHA512_DIGEST_LENGTH;
117 return 1; 91 return 1;
@@ -160,24 +134,6 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c)
160 134
161 if (md==0) return 0; 135 if (md==0) return 0;
162 136
163#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
164 /* recall assembler dword order... */
165 n = c->md_len;
166 if (n == SHA384_DIGEST_LENGTH || n == SHA512_DIGEST_LENGTH)
167 {
168 unsigned int *h = (unsigned int *)c->h, t;
169
170 for (n/=4;n;n--)
171 {
172 t = *(h++);
173 *(md++) = (unsigned char)(t>>24);
174 *(md++) = (unsigned char)(t>>16);
175 *(md++) = (unsigned char)(t>>8);
176 *(md++) = (unsigned char)(t);
177 }
178 }
179 else return 0;
180#else
181 switch (c->md_len) 137 switch (c->md_len)
182 { 138 {
183 /* Let compiler decide if it's appropriate to unroll... */ 139 /* Let compiler decide if it's appropriate to unroll... */
@@ -214,7 +170,7 @@ int SHA512_Final (unsigned char *md, SHA512_CTX *c)
214 /* ... as well as make sure md_len is not abused. */ 170 /* ... as well as make sure md_len is not abused. */
215 default: return 0; 171 default: return 0;
216 } 172 }
217#endif 173
218 return 1; 174 return 1;
219 } 175 }
220 176
diff --git a/src/lib/libssl/src/crypto/sparcv9cap.c b/src/lib/libssl/src/crypto/sparcv9cap.c
index ed195ab402..43b3ac6f81 100644
--- a/src/lib/libssl/src/crypto/sparcv9cap.c
+++ b/src/lib/libssl/src/crypto/sparcv9cap.c
@@ -19,7 +19,8 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
19 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); 19 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
20 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); 20 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
21 21
22 if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == 22 if (num>=8 && !(num&1) &&
23 (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
23 (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) 24 (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
24 return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); 25 return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
25 else 26 else
@@ -169,7 +170,6 @@ void OPENSSL_cpuid_setup(void)
169 char *e; 170 char *e;
170 struct sigaction common_act,ill_oact,bus_oact; 171 struct sigaction common_act,ill_oact,bus_oact;
171 sigset_t all_masked,oset; 172 sigset_t all_masked,oset;
172 int sig;
173 static int trigger=0; 173 static int trigger=0;
174 174
175 if (trigger) return; 175 if (trigger) return;
diff --git a/src/lib/libssl/src/crypto/srp/Makefile b/src/lib/libssl/src/crypto/srp/Makefile
new file mode 100644
index 0000000000..41859d46fa
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/Makefile
@@ -0,0 +1,98 @@
1DIR= srp
2TOP= ../..
3CC= cc
4INCLUDES= -I.. -I$(TOP) -I../../include
5CFLAG=-g
6INSTALL_PREFIX=
7OPENSSLDIR= /usr/local/ssl
8INSTALLTOP=/usr/local/ssl
9MAKE= make -f Makefile.ssl
10MAKEDEPPROG= makedepend
11MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG)
12MAKEFILE= Makefile.ssl
13AR= ar r
14
15CFLAGS= $(INCLUDES) $(CFLAG)
16
17GENERAL=Makefile
18TEST=srptest.c
19APPS=
20
21LIB=$(TOP)/libcrypto.a
22LIBSRC=srp_lib.c srp_vfy.c
23LIBOBJ=srp_lib.o srp_vfy.o
24
25SRC= $(LIBSRC)
26
27EXHEADER= srp.h
28HEADER= $(EXHEADER)
29
30top:
31 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
32
33all: lib
34
35lib: $(LIBOBJ)
36 $(AR) $(LIB) $(LIBOBJ)
37 $(RANLIB) $(LIB) || echo Never mind.
38 @touch lib
39
40links:
41 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
42 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
43 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
44
45install:
46 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
47 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
48 do \
49 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
50 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
51 done;
52
53tags:
54 ctags $(SRC)
55
56tests:
57
58srptest: top srptest.c $(LIB)
59 $(CC) $(CFLAGS) -Wall -Werror -g -o srptest srptest.c $(LIB)
60
61lint:
62 lint -DLINT $(INCLUDES) $(SRC)>fluff
63
64depend:
65 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
66
67dclean:
68 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
69 mv -f Makefile.new $(MAKEFILE)
70
71clean:
72 rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
73
74# DO NOT DELETE THIS LINE -- make depend depends on it.
75
76srp_lib.o: ../../e_os.h ../../include/openssl/asn1.h
77srp_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
78srp_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
79srp_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
80srp_lib.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
81srp_lib.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
82srp_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
83srp_lib.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
84srp_lib.o: ../../include/openssl/sha.h ../../include/openssl/srp.h
85srp_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
86srp_lib.o: ../cryptlib.h srp_grps.h srp_lcl.h srp_lib.c
87srp_vfy.o: ../../e_os.h ../../include/openssl/asn1.h
88srp_vfy.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
89srp_vfy.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
90srp_vfy.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
91srp_vfy.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
92srp_vfy.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
93srp_vfy.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
94srp_vfy.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h
95srp_vfy.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
96srp_vfy.o: ../../include/openssl/srp.h ../../include/openssl/stack.h
97srp_vfy.o: ../../include/openssl/symhacks.h ../../include/openssl/txt_db.h
98srp_vfy.o: ../cryptlib.h srp_lcl.h srp_vfy.c
diff --git a/src/lib/libssl/src/crypto/srp/srp.h b/src/lib/libssl/src/crypto/srp/srp.h
new file mode 100644
index 0000000000..7ec7825cad
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srp.h
@@ -0,0 +1,172 @@
1/* crypto/srp/srp.h */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef __SRP_H__
60#define __SRP_H__
61
62#ifndef OPENSSL_NO_SRP
63
64#include <stdio.h>
65#include <string.h>
66
67#ifdef __cplusplus
68extern "C" {
69#endif
70
71#include <openssl/safestack.h>
72#include <openssl/bn.h>
73#include <openssl/crypto.h>
74
75typedef struct SRP_gN_cache_st
76 {
77 char *b64_bn;
78 BIGNUM *bn;
79 } SRP_gN_cache;
80
81
82DECLARE_STACK_OF(SRP_gN_cache)
83
84typedef struct SRP_user_pwd_st
85 {
86 char *id;
87 BIGNUM *s;
88 BIGNUM *v;
89 const BIGNUM *g;
90 const BIGNUM *N;
91 char *info;
92 } SRP_user_pwd;
93
94DECLARE_STACK_OF(SRP_user_pwd)
95
96typedef struct SRP_VBASE_st
97 {
98 STACK_OF(SRP_user_pwd) *users_pwd;
99 STACK_OF(SRP_gN_cache) *gN_cache;
100/* to simulate a user */
101 char *seed_key;
102 BIGNUM *default_g;
103 BIGNUM *default_N;
104 } SRP_VBASE;
105
106
107/*Structure interne pour retenir les couples N et g*/
108typedef struct SRP_gN_st
109 {
110 char *id;
111 BIGNUM *g;
112 BIGNUM *N;
113 } SRP_gN;
114
115DECLARE_STACK_OF(SRP_gN)
116
117SRP_VBASE *SRP_VBASE_new(char *seed_key);
118int SRP_VBASE_free(SRP_VBASE *vb);
119int SRP_VBASE_init(SRP_VBASE *vb, char * verifier_file);
120SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username);
121char *SRP_create_verifier(const char *user, const char *pass, char **salt,
122 char **verifier, const char *N, const char *g);
123int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g);
124
125
126#define SRP_NO_ERROR 0
127#define SRP_ERR_VBASE_INCOMPLETE_FILE 1
128#define SRP_ERR_VBASE_BN_LIB 2
129#define SRP_ERR_OPEN_FILE 3
130#define SRP_ERR_MEMORY 4
131
132#define DB_srptype 0
133#define DB_srpverifier 1
134#define DB_srpsalt 2
135#define DB_srpid 3
136#define DB_srpgN 4
137#define DB_srpinfo 5
138#undef DB_NUMBER
139#define DB_NUMBER 6
140
141#define DB_SRP_INDEX 'I'
142#define DB_SRP_VALID 'V'
143#define DB_SRP_REVOKED 'R'
144#define DB_SRP_MODIF 'v'
145
146
147/* see srp.c */
148char * SRP_check_known_gN_param(BIGNUM* g, BIGNUM* N);
149SRP_gN *SRP_get_default_gN(const char * id) ;
150
151/* server side .... */
152BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N);
153BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v);
154int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N);
155BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N) ;
156
157
158
159/* client side .... */
160BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass);
161BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g);
162BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u);
163int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N);
164
165#define SRP_MINIMAL_N 1024
166
167#ifdef __cplusplus
168}
169#endif
170
171#endif
172#endif
diff --git a/src/lib/libssl/src/crypto/srp/srp_grps.h b/src/lib/libssl/src/crypto/srp/srp_grps.h
new file mode 100644
index 0000000000..d77c9fff4b
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srp_grps.h
@@ -0,0 +1,517 @@
1/* start of generated data */
2
3static BN_ULONG bn_group_1024_value[] = {
4 bn_pack4(9FC6,1D2F,C0EB,06E3),
5 bn_pack4(FD51,38FE,8376,435B),
6 bn_pack4(2FD4,CBF4,976E,AA9A),
7 bn_pack4(68ED,BC3C,0572,6CC0),
8 bn_pack4(C529,F566,660E,57EC),
9 bn_pack4(8255,9B29,7BCF,1885),
10 bn_pack4(CE8E,F4AD,69B1,5D49),
11 bn_pack4(5DC7,D7B4,6154,D6B6),
12 bn_pack4(8E49,5C1D,6089,DAD1),
13 bn_pack4(E0D5,D8E2,50B9,8BE4),
14 bn_pack4(383B,4813,D692,C6E0),
15 bn_pack4(D674,DF74,96EA,81D3),
16 bn_pack4(9EA2,314C,9C25,6576),
17 bn_pack4(6072,6187,75FF,3C0B),
18 bn_pack4(9C33,F80A,FA8F,C5E8),
19 bn_pack4(EEAF,0AB9,ADB3,8DD6)
20};
21static BIGNUM bn_group_1024 = {
22 bn_group_1024_value,
23 (sizeof bn_group_1024_value)/sizeof(BN_ULONG),
24 (sizeof bn_group_1024_value)/sizeof(BN_ULONG),
25 0,
26 BN_FLG_STATIC_DATA
27};
28
29static BN_ULONG bn_group_1536_value[] = {
30 bn_pack4(CF76,E3FE,D135,F9BB),
31 bn_pack4(1518,0F93,499A,234D),
32 bn_pack4(8CE7,A28C,2442,C6F3),
33 bn_pack4(5A02,1FFF,5E91,479E),
34 bn_pack4(7F8A,2FE9,B8B5,292E),
35 bn_pack4(837C,264A,E3A9,BEB8),
36 bn_pack4(E442,734A,F7CC,B7AE),
37 bn_pack4(6577,2E43,7D6C,7F8C),
38 bn_pack4(DB2F,D53D,24B7,C486),
39 bn_pack4(6EDF,0195,3934,9627),
40 bn_pack4(158B,FD3E,2B9C,8CF5),
41 bn_pack4(764E,3F4B,53DD,9DA1),
42 bn_pack4(4754,8381,DBC5,B1FC),
43 bn_pack4(9B60,9E0B,E3BA,B63D),
44 bn_pack4(8134,B1C8,B979,8914),
45 bn_pack4(DF02,8A7C,EC67,F0D0),
46 bn_pack4(80B6,55BB,9A22,E8DC),
47 bn_pack4(1558,903B,A0D0,F843),
48 bn_pack4(51C6,A94B,E460,7A29),
49 bn_pack4(5F4F,5F55,6E27,CBDE),
50 bn_pack4(BEEE,A961,4B19,CC4D),
51 bn_pack4(DBA5,1DF4,99AC,4C80),
52 bn_pack4(B1F1,2A86,17A4,7BBB),
53 bn_pack4(9DEF,3CAF,B939,277A)
54};
55static BIGNUM bn_group_1536 = {
56 bn_group_1536_value,
57 (sizeof bn_group_1536_value)/sizeof(BN_ULONG),
58 (sizeof bn_group_1536_value)/sizeof(BN_ULONG),
59 0,
60 BN_FLG_STATIC_DATA
61};
62
63static BN_ULONG bn_group_2048_value[] = {
64 bn_pack4(0FA7,111F,9E4A,FF73),
65 bn_pack4(9B65,E372,FCD6,8EF2),
66 bn_pack4(35DE,236D,525F,5475),
67 bn_pack4(94B5,C803,D89F,7AE4),
68 bn_pack4(71AE,35F8,E9DB,FBB6),
69 bn_pack4(2A56,98F3,A8D0,C382),
70 bn_pack4(9CCC,041C,7BC3,08D8),
71 bn_pack4(AF87,4E73,03CE,5329),
72 bn_pack4(6160,2790,04E5,7AE6),
73 bn_pack4(032C,FBDB,F52F,B378),
74 bn_pack4(5EA7,7A27,75D2,ECFA),
75 bn_pack4(5445,23B5,24B0,D57D),
76 bn_pack4(5B9D,32E6,88F8,7748),
77 bn_pack4(F1D2,B907,8717,461A),
78 bn_pack4(76BD,207A,436C,6481),
79 bn_pack4(CA97,B43A,23FB,8016),
80 bn_pack4(1D28,1E44,6B14,773B),
81 bn_pack4(7359,D041,D5C3,3EA7),
82 bn_pack4(A80D,740A,DBF4,FF74),
83 bn_pack4(55F9,7993,EC97,5EEA),
84 bn_pack4(2918,A996,2F0B,93B8),
85 bn_pack4(661A,05FB,D5FA,AAE8),
86 bn_pack4(CF60,9517,9A16,3AB3),
87 bn_pack4(E808,3969,EDB7,67B0),
88 bn_pack4(CD7F,48A9,DA04,FD50),
89 bn_pack4(D523,12AB,4B03,310D),
90 bn_pack4(8193,E075,7767,A13D),
91 bn_pack4(A373,29CB,B4A0,99ED),
92 bn_pack4(FC31,9294,3DB5,6050),
93 bn_pack4(AF72,B665,1987,EE07),
94 bn_pack4(F166,DE5E,1389,582F),
95 bn_pack4(AC6B,DB41,324A,9A9B)
96};
97static BIGNUM bn_group_2048 = {
98 bn_group_2048_value,
99 (sizeof bn_group_2048_value)/sizeof(BN_ULONG),
100 (sizeof bn_group_2048_value)/sizeof(BN_ULONG),
101 0,
102 BN_FLG_STATIC_DATA
103};
104
105static BN_ULONG bn_group_3072_value[] = {
106 bn_pack4(FFFF,FFFF,FFFF,FFFF),
107 bn_pack4(4B82,D120,A93A,D2CA),
108 bn_pack4(43DB,5BFC,E0FD,108E),
109 bn_pack4(08E2,4FA0,74E5,AB31),
110 bn_pack4(7709,88C0,BAD9,46E2),
111 bn_pack4(BBE1,1757,7A61,5D6C),
112 bn_pack4(521F,2B18,177B,200C),
113 bn_pack4(D876,0273,3EC8,6A64),
114 bn_pack4(F12F,FA06,D98A,0864),
115 bn_pack4(CEE3,D226,1AD2,EE6B),
116 bn_pack4(1E8C,94E0,4A25,619D),
117 bn_pack4(ABF5,AE8C,DB09,33D7),
118 bn_pack4(B397,0F85,A6E1,E4C7),
119 bn_pack4(8AEA,7157,5D06,0C7D),
120 bn_pack4(ECFB,8504,58DB,EF0A),
121 bn_pack4(A855,21AB,DF1C,BA64),
122 bn_pack4(AD33,170D,0450,7A33),
123 bn_pack4(1572,8E5A,8AAA,C42D),
124 bn_pack4(15D2,2618,98FA,0510),
125 bn_pack4(3995,497C,EA95,6AE5),
126 bn_pack4(DE2B,CBF6,9558,1718),
127 bn_pack4(B5C5,5DF0,6F4C,52C9),
128 bn_pack4(9B27,83A2,EC07,A28F),
129 bn_pack4(E39E,772C,180E,8603),
130 bn_pack4(3290,5E46,2E36,CE3B),
131 bn_pack4(F174,6C08,CA18,217C),
132 bn_pack4(670C,354E,4ABC,9804),
133 bn_pack4(9ED5,2907,7096,966D),
134 bn_pack4(1C62,F356,2085,52BB),
135 bn_pack4(8365,5D23,DCA3,AD96),
136 bn_pack4(6916,3FA8,FD24,CF5F),
137 bn_pack4(98DA,4836,1C55,D39A),
138 bn_pack4(C200,7CB8,A163,BF05),
139 bn_pack4(4928,6651,ECE4,5B3D),
140 bn_pack4(AE9F,2411,7C4B,1FE6),
141 bn_pack4(EE38,6BFB,5A89,9FA5),
142 bn_pack4(0BFF,5CB6,F406,B7ED),
143 bn_pack4(F44C,42E9,A637,ED6B),
144 bn_pack4(E485,B576,625E,7EC6),
145 bn_pack4(4FE1,356D,6D51,C245),
146 bn_pack4(302B,0A6D,F25F,1437),
147 bn_pack4(EF95,19B3,CD3A,431B),
148 bn_pack4(514A,0879,8E34,04DD),
149 bn_pack4(020B,BEA6,3B13,9B22),
150 bn_pack4(2902,4E08,8A67,CC74),
151 bn_pack4(C4C6,628B,80DC,1CD1),
152 bn_pack4(C90F,DAA2,2168,C234),
153 bn_pack4(FFFF,FFFF,FFFF,FFFF)
154};
155static BIGNUM bn_group_3072 = {
156 bn_group_3072_value,
157 (sizeof bn_group_3072_value)/sizeof(BN_ULONG),
158 (sizeof bn_group_3072_value)/sizeof(BN_ULONG),
159 0,
160 BN_FLG_STATIC_DATA
161};
162
163static BN_ULONG bn_group_4096_value[] = {
164 bn_pack4(FFFF,FFFF,FFFF,FFFF),
165 bn_pack4(4DF4,35C9,3406,3199),
166 bn_pack4(86FF,B7DC,90A6,C08F),
167 bn_pack4(93B4,EA98,8D8F,DDC1),
168 bn_pack4(D006,9127,D5B0,5AA9),
169 bn_pack4(B81B,DD76,2170,481C),
170 bn_pack4(1F61,2970,CEE2,D7AF),
171 bn_pack4(233B,A186,515B,E7ED),
172 bn_pack4(99B2,964F,A090,C3A2),
173 bn_pack4(287C,5947,4E6B,C05D),
174 bn_pack4(2E8E,FC14,1FBE,CAA6),
175 bn_pack4(DBBB,C2DB,04DE,8EF9),
176 bn_pack4(2583,E9CA,2AD4,4CE8),
177 bn_pack4(1A94,6834,B615,0BDA),
178 bn_pack4(99C3,2718,6AF4,E23C),
179 bn_pack4(8871,9A10,BDBA,5B26),
180 bn_pack4(1A72,3C12,A787,E6D7),
181 bn_pack4(4B82,D120,A921,0801),
182 bn_pack4(43DB,5BFC,E0FD,108E),
183 bn_pack4(08E2,4FA0,74E5,AB31),
184 bn_pack4(7709,88C0,BAD9,46E2),
185 bn_pack4(BBE1,1757,7A61,5D6C),
186 bn_pack4(521F,2B18,177B,200C),
187 bn_pack4(D876,0273,3EC8,6A64),
188 bn_pack4(F12F,FA06,D98A,0864),
189 bn_pack4(CEE3,D226,1AD2,EE6B),
190 bn_pack4(1E8C,94E0,4A25,619D),
191 bn_pack4(ABF5,AE8C,DB09,33D7),
192 bn_pack4(B397,0F85,A6E1,E4C7),
193 bn_pack4(8AEA,7157,5D06,0C7D),
194 bn_pack4(ECFB,8504,58DB,EF0A),
195 bn_pack4(A855,21AB,DF1C,BA64),
196 bn_pack4(AD33,170D,0450,7A33),
197 bn_pack4(1572,8E5A,8AAA,C42D),
198 bn_pack4(15D2,2618,98FA,0510),
199 bn_pack4(3995,497C,EA95,6AE5),
200 bn_pack4(DE2B,CBF6,9558,1718),
201 bn_pack4(B5C5,5DF0,6F4C,52C9),
202 bn_pack4(9B27,83A2,EC07,A28F),
203 bn_pack4(E39E,772C,180E,8603),
204 bn_pack4(3290,5E46,2E36,CE3B),
205 bn_pack4(F174,6C08,CA18,217C),
206 bn_pack4(670C,354E,4ABC,9804),
207 bn_pack4(9ED5,2907,7096,966D),
208 bn_pack4(1C62,F356,2085,52BB),
209 bn_pack4(8365,5D23,DCA3,AD96),
210 bn_pack4(6916,3FA8,FD24,CF5F),
211 bn_pack4(98DA,4836,1C55,D39A),
212 bn_pack4(C200,7CB8,A163,BF05),
213 bn_pack4(4928,6651,ECE4,5B3D),
214 bn_pack4(AE9F,2411,7C4B,1FE6),
215 bn_pack4(EE38,6BFB,5A89,9FA5),
216 bn_pack4(0BFF,5CB6,F406,B7ED),
217 bn_pack4(F44C,42E9,A637,ED6B),
218 bn_pack4(E485,B576,625E,7EC6),
219 bn_pack4(4FE1,356D,6D51,C245),
220 bn_pack4(302B,0A6D,F25F,1437),
221 bn_pack4(EF95,19B3,CD3A,431B),
222 bn_pack4(514A,0879,8E34,04DD),
223 bn_pack4(020B,BEA6,3B13,9B22),
224 bn_pack4(2902,4E08,8A67,CC74),
225 bn_pack4(C4C6,628B,80DC,1CD1),
226 bn_pack4(C90F,DAA2,2168,C234),
227 bn_pack4(FFFF,FFFF,FFFF,FFFF)
228};
229static BIGNUM bn_group_4096 = {
230 bn_group_4096_value,
231 (sizeof bn_group_4096_value)/sizeof(BN_ULONG),
232 (sizeof bn_group_4096_value)/sizeof(BN_ULONG),
233 0,
234 BN_FLG_STATIC_DATA
235};
236
237static BN_ULONG bn_group_6144_value[] = {
238 bn_pack4(FFFF,FFFF,FFFF,FFFF),
239 bn_pack4(E694,F91E,6DCC,4024),
240 bn_pack4(12BF,2D5B,0B74,74D6),
241 bn_pack4(043E,8F66,3F48,60EE),
242 bn_pack4(387F,E8D7,6E3C,0468),
243 bn_pack4(DA56,C9EC,2EF2,9632),
244 bn_pack4(EB19,CCB1,A313,D55C),
245 bn_pack4(F550,AA3D,8A1F,BFF0),
246 bn_pack4(06A1,D58B,B7C5,DA76),
247 bn_pack4(A797,15EE,F29B,E328),
248 bn_pack4(14CC,5ED2,0F80,37E0),
249 bn_pack4(CC8F,6D7E,BF48,E1D8),
250 bn_pack4(4BD4,07B2,2B41,54AA),
251 bn_pack4(0F1D,45B7,FF58,5AC5),
252 bn_pack4(23A9,7A7E,36CC,88BE),
253 bn_pack4(59E7,C97F,BEC7,E8F3),
254 bn_pack4(B5A8,4031,900B,1C9E),
255 bn_pack4(D55E,702F,4698,0C82),
256 bn_pack4(F482,D7CE,6E74,FEF6),
257 bn_pack4(F032,EA15,D172,1D03),
258 bn_pack4(5983,CA01,C64B,92EC),
259 bn_pack4(6FB8,F401,378C,D2BF),
260 bn_pack4(3320,5151,2BD7,AF42),
261 bn_pack4(DB7F,1447,E6CC,254B),
262 bn_pack4(44CE,6CBA,CED4,BB1B),
263 bn_pack4(DA3E,DBEB,CF9B,14ED),
264 bn_pack4(1797,27B0,865A,8918),
265 bn_pack4(B06A,53ED,9027,D831),
266 bn_pack4(E5DB,382F,4130,01AE),
267 bn_pack4(F8FF,9406,AD9E,530E),
268 bn_pack4(C975,1E76,3DBA,37BD),
269 bn_pack4(C1D4,DCB2,6026,46DE),
270 bn_pack4(36C3,FAB4,D27C,7026),
271 bn_pack4(4DF4,35C9,3402,8492),
272 bn_pack4(86FF,B7DC,90A6,C08F),
273 bn_pack4(93B4,EA98,8D8F,DDC1),
274 bn_pack4(D006,9127,D5B0,5AA9),
275 bn_pack4(B81B,DD76,2170,481C),
276 bn_pack4(1F61,2970,CEE2,D7AF),
277 bn_pack4(233B,A186,515B,E7ED),
278 bn_pack4(99B2,964F,A090,C3A2),
279 bn_pack4(287C,5947,4E6B,C05D),
280 bn_pack4(2E8E,FC14,1FBE,CAA6),
281 bn_pack4(DBBB,C2DB,04DE,8EF9),
282 bn_pack4(2583,E9CA,2AD4,4CE8),
283 bn_pack4(1A94,6834,B615,0BDA),
284 bn_pack4(99C3,2718,6AF4,E23C),
285 bn_pack4(8871,9A10,BDBA,5B26),
286 bn_pack4(1A72,3C12,A787,E6D7),
287 bn_pack4(4B82,D120,A921,0801),
288 bn_pack4(43DB,5BFC,E0FD,108E),
289 bn_pack4(08E2,4FA0,74E5,AB31),
290 bn_pack4(7709,88C0,BAD9,46E2),
291 bn_pack4(BBE1,1757,7A61,5D6C),
292 bn_pack4(521F,2B18,177B,200C),
293 bn_pack4(D876,0273,3EC8,6A64),
294 bn_pack4(F12F,FA06,D98A,0864),
295 bn_pack4(CEE3,D226,1AD2,EE6B),
296 bn_pack4(1E8C,94E0,4A25,619D),
297 bn_pack4(ABF5,AE8C,DB09,33D7),
298 bn_pack4(B397,0F85,A6E1,E4C7),
299 bn_pack4(8AEA,7157,5D06,0C7D),
300 bn_pack4(ECFB,8504,58DB,EF0A),
301 bn_pack4(A855,21AB,DF1C,BA64),
302 bn_pack4(AD33,170D,0450,7A33),
303 bn_pack4(1572,8E5A,8AAA,C42D),
304 bn_pack4(15D2,2618,98FA,0510),
305 bn_pack4(3995,497C,EA95,6AE5),
306 bn_pack4(DE2B,CBF6,9558,1718),
307 bn_pack4(B5C5,5DF0,6F4C,52C9),
308 bn_pack4(9B27,83A2,EC07,A28F),
309 bn_pack4(E39E,772C,180E,8603),
310 bn_pack4(3290,5E46,2E36,CE3B),
311 bn_pack4(F174,6C08,CA18,217C),
312 bn_pack4(670C,354E,4ABC,9804),
313 bn_pack4(9ED5,2907,7096,966D),
314 bn_pack4(1C62,F356,2085,52BB),
315 bn_pack4(8365,5D23,DCA3,AD96),
316 bn_pack4(6916,3FA8,FD24,CF5F),
317 bn_pack4(98DA,4836,1C55,D39A),
318 bn_pack4(C200,7CB8,A163,BF05),
319 bn_pack4(4928,6651,ECE4,5B3D),
320 bn_pack4(AE9F,2411,7C4B,1FE6),
321 bn_pack4(EE38,6BFB,5A89,9FA5),
322 bn_pack4(0BFF,5CB6,F406,B7ED),
323 bn_pack4(F44C,42E9,A637,ED6B),
324 bn_pack4(E485,B576,625E,7EC6),
325 bn_pack4(4FE1,356D,6D51,C245),
326 bn_pack4(302B,0A6D,F25F,1437),
327 bn_pack4(EF95,19B3,CD3A,431B),
328 bn_pack4(514A,0879,8E34,04DD),
329 bn_pack4(020B,BEA6,3B13,9B22),
330 bn_pack4(2902,4E08,8A67,CC74),
331 bn_pack4(C4C6,628B,80DC,1CD1),
332 bn_pack4(C90F,DAA2,2168,C234),
333 bn_pack4(FFFF,FFFF,FFFF,FFFF)
334};
335static BIGNUM bn_group_6144 = {
336 bn_group_6144_value,
337 (sizeof bn_group_6144_value)/sizeof(BN_ULONG),
338 (sizeof bn_group_6144_value)/sizeof(BN_ULONG),
339 0,
340 BN_FLG_STATIC_DATA
341};
342
343static BN_ULONG bn_group_8192_value[] = {
344 bn_pack4(FFFF,FFFF,FFFF,FFFF),
345 bn_pack4(60C9,80DD,98ED,D3DF),
346 bn_pack4(C81F,56E8,80B9,6E71),
347 bn_pack4(9E30,50E2,7656,94DF),
348 bn_pack4(9558,E447,5677,E9AA),
349 bn_pack4(C919,0DA6,FC02,6E47),
350 bn_pack4(889A,002E,D5EE,382B),
351 bn_pack4(4009,438B,481C,6CD7),
352 bn_pack4(3590,46F4,EB87,9F92),
353 bn_pack4(FAF3,6BC3,1ECF,A268),
354 bn_pack4(B1D5,10BD,7EE7,4D73),
355 bn_pack4(F9AB,4819,5DED,7EA1),
356 bn_pack4(64F3,1CC5,0846,851D),
357 bn_pack4(4597,E899,A025,5DC1),
358 bn_pack4(DF31,0EE0,74AB,6A36),
359 bn_pack4(6D2A,13F8,3F44,F82D),
360 bn_pack4(062B,3CF5,B3A2,78A6),
361 bn_pack4(7968,3303,ED5B,DD3A),
362 bn_pack4(FA9D,4B7F,A2C0,87E8),
363 bn_pack4(4BCB,C886,2F83,85DD),
364 bn_pack4(3473,FC64,6CEA,306B),
365 bn_pack4(13EB,57A8,1A23,F0C7),
366 bn_pack4(2222,2E04,A403,7C07),
367 bn_pack4(E3FD,B8BE,FC84,8AD9),
368 bn_pack4(238F,16CB,E39D,652D),
369 bn_pack4(3423,B474,2BF1,C978),
370 bn_pack4(3AAB,639C,5AE4,F568),
371 bn_pack4(2576,F693,6BA4,2466),
372 bn_pack4(741F,A7BF,8AFC,47ED),
373 bn_pack4(3BC8,32B6,8D9D,D300),
374 bn_pack4(D8BE,C4D0,73B9,31BA),
375 bn_pack4(3877,7CB6,A932,DF8C),
376 bn_pack4(74A3,926F,12FE,E5E4),
377 bn_pack4(E694,F91E,6DBE,1159),
378 bn_pack4(12BF,2D5B,0B74,74D6),
379 bn_pack4(043E,8F66,3F48,60EE),
380 bn_pack4(387F,E8D7,6E3C,0468),
381 bn_pack4(DA56,C9EC,2EF2,9632),
382 bn_pack4(EB19,CCB1,A313,D55C),
383 bn_pack4(F550,AA3D,8A1F,BFF0),
384 bn_pack4(06A1,D58B,B7C5,DA76),
385 bn_pack4(A797,15EE,F29B,E328),
386 bn_pack4(14CC,5ED2,0F80,37E0),
387 bn_pack4(CC8F,6D7E,BF48,E1D8),
388 bn_pack4(4BD4,07B2,2B41,54AA),
389 bn_pack4(0F1D,45B7,FF58,5AC5),
390 bn_pack4(23A9,7A7E,36CC,88BE),
391 bn_pack4(59E7,C97F,BEC7,E8F3),
392 bn_pack4(B5A8,4031,900B,1C9E),
393 bn_pack4(D55E,702F,4698,0C82),
394 bn_pack4(F482,D7CE,6E74,FEF6),
395 bn_pack4(F032,EA15,D172,1D03),
396 bn_pack4(5983,CA01,C64B,92EC),
397 bn_pack4(6FB8,F401,378C,D2BF),
398 bn_pack4(3320,5151,2BD7,AF42),
399 bn_pack4(DB7F,1447,E6CC,254B),
400 bn_pack4(44CE,6CBA,CED4,BB1B),
401 bn_pack4(DA3E,DBEB,CF9B,14ED),
402 bn_pack4(1797,27B0,865A,8918),
403 bn_pack4(B06A,53ED,9027,D831),
404 bn_pack4(E5DB,382F,4130,01AE),
405 bn_pack4(F8FF,9406,AD9E,530E),
406 bn_pack4(C975,1E76,3DBA,37BD),
407 bn_pack4(C1D4,DCB2,6026,46DE),
408 bn_pack4(36C3,FAB4,D27C,7026),
409 bn_pack4(4DF4,35C9,3402,8492),
410 bn_pack4(86FF,B7DC,90A6,C08F),
411 bn_pack4(93B4,EA98,8D8F,DDC1),
412 bn_pack4(D006,9127,D5B0,5AA9),
413 bn_pack4(B81B,DD76,2170,481C),
414 bn_pack4(1F61,2970,CEE2,D7AF),
415 bn_pack4(233B,A186,515B,E7ED),
416 bn_pack4(99B2,964F,A090,C3A2),
417 bn_pack4(287C,5947,4E6B,C05D),
418 bn_pack4(2E8E,FC14,1FBE,CAA6),
419 bn_pack4(DBBB,C2DB,04DE,8EF9),
420 bn_pack4(2583,E9CA,2AD4,4CE8),
421 bn_pack4(1A94,6834,B615,0BDA),
422 bn_pack4(99C3,2718,6AF4,E23C),
423 bn_pack4(8871,9A10,BDBA,5B26),
424 bn_pack4(1A72,3C12,A787,E6D7),
425 bn_pack4(4B82,D120,A921,0801),
426 bn_pack4(43DB,5BFC,E0FD,108E),
427 bn_pack4(08E2,4FA0,74E5,AB31),
428 bn_pack4(7709,88C0,BAD9,46E2),
429 bn_pack4(BBE1,1757,7A61,5D6C),
430 bn_pack4(521F,2B18,177B,200C),
431 bn_pack4(D876,0273,3EC8,6A64),
432 bn_pack4(F12F,FA06,D98A,0864),
433 bn_pack4(CEE3,D226,1AD2,EE6B),
434 bn_pack4(1E8C,94E0,4A25,619D),
435 bn_pack4(ABF5,AE8C,DB09,33D7),
436 bn_pack4(B397,0F85,A6E1,E4C7),
437 bn_pack4(8AEA,7157,5D06,0C7D),
438 bn_pack4(ECFB,8504,58DB,EF0A),
439 bn_pack4(A855,21AB,DF1C,BA64),
440 bn_pack4(AD33,170D,0450,7A33),
441 bn_pack4(1572,8E5A,8AAA,C42D),
442 bn_pack4(15D2,2618,98FA,0510),
443 bn_pack4(3995,497C,EA95,6AE5),
444 bn_pack4(DE2B,CBF6,9558,1718),
445 bn_pack4(B5C5,5DF0,6F4C,52C9),
446 bn_pack4(9B27,83A2,EC07,A28F),
447 bn_pack4(E39E,772C,180E,8603),
448 bn_pack4(3290,5E46,2E36,CE3B),
449 bn_pack4(F174,6C08,CA18,217C),
450 bn_pack4(670C,354E,4ABC,9804),
451 bn_pack4(9ED5,2907,7096,966D),
452 bn_pack4(1C62,F356,2085,52BB),
453 bn_pack4(8365,5D23,DCA3,AD96),
454 bn_pack4(6916,3FA8,FD24,CF5F),
455 bn_pack4(98DA,4836,1C55,D39A),
456 bn_pack4(C200,7CB8,A163,BF05),
457 bn_pack4(4928,6651,ECE4,5B3D),
458 bn_pack4(AE9F,2411,7C4B,1FE6),
459 bn_pack4(EE38,6BFB,5A89,9FA5),
460 bn_pack4(0BFF,5CB6,F406,B7ED),
461 bn_pack4(F44C,42E9,A637,ED6B),
462 bn_pack4(E485,B576,625E,7EC6),
463 bn_pack4(4FE1,356D,6D51,C245),
464 bn_pack4(302B,0A6D,F25F,1437),
465 bn_pack4(EF95,19B3,CD3A,431B),
466 bn_pack4(514A,0879,8E34,04DD),
467 bn_pack4(020B,BEA6,3B13,9B22),
468 bn_pack4(2902,4E08,8A67,CC74),
469 bn_pack4(C4C6,628B,80DC,1CD1),
470 bn_pack4(C90F,DAA2,2168,C234),
471 bn_pack4(FFFF,FFFF,FFFF,FFFF)
472};
473static BIGNUM bn_group_8192 = {
474 bn_group_8192_value,
475 (sizeof bn_group_8192_value)/sizeof(BN_ULONG),
476 (sizeof bn_group_8192_value)/sizeof(BN_ULONG),
477 0,
478 BN_FLG_STATIC_DATA
479};
480
481static BN_ULONG bn_generator_19_value[] = {19} ;
482static BIGNUM bn_generator_19 = {
483 bn_generator_19_value,
484 1,
485 1,
486 0,
487 BN_FLG_STATIC_DATA
488};
489static BN_ULONG bn_generator_5_value[] = {5} ;
490static BIGNUM bn_generator_5 = {
491 bn_generator_5_value,
492 1,
493 1,
494 0,
495 BN_FLG_STATIC_DATA
496};
497static BN_ULONG bn_generator_2_value[] = {2} ;
498static BIGNUM bn_generator_2 = {
499 bn_generator_2_value,
500 1,
501 1,
502 0,
503 BN_FLG_STATIC_DATA
504};
505
506static SRP_gN knowngN[] = {
507 {"8192",&bn_generator_19 , &bn_group_8192},
508 {"6144",&bn_generator_5 , &bn_group_6144},
509 {"4096",&bn_generator_5 , &bn_group_4096},
510 {"3072",&bn_generator_5 , &bn_group_3072},
511 {"2048",&bn_generator_2 , &bn_group_2048},
512 {"1536",&bn_generator_2 , &bn_group_1536},
513 {"1024",&bn_generator_2 , &bn_group_1024},
514};
515#define KNOWN_GN_NUMBER sizeof(knowngN) / sizeof(SRP_gN)
516
517/* end of generated data */
diff --git a/src/lib/libssl/src/crypto/srp/srp_lcl.h b/src/lib/libssl/src/crypto/srp/srp_lcl.h
new file mode 100644
index 0000000000..42bda3f148
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srp_lcl.h
@@ -0,0 +1,83 @@
1/* crypto/srp/srp_lcl.h */
2/* Written by Peter Sylvester (peter.sylvester@edelweb.fr)
3 * for the EdelKey project and contributed to the OpenSSL project 2004.
4 */
5/* ====================================================================
6 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58#ifndef HEADER_SRP_LCL_H
59#define HEADER_SRP_LCL_H
60
61#include <openssl/srp.h>
62#include <openssl/sha.h>
63
64#if 0
65#define srp_bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \
66 fprintf(stderr,"\n");}
67#else
68#define srp_bn_print(a)
69#endif
70
71
72
73#ifdef __cplusplus
74extern "C" {
75#endif
76
77
78
79#ifdef __cplusplus
80}
81#endif
82
83#endif
diff --git a/src/lib/libssl/src/crypto/srp/srp_lib.c b/src/lib/libssl/src/crypto/srp/srp_lib.c
new file mode 100644
index 0000000000..92cea98dcd
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srp_lib.c
@@ -0,0 +1,357 @@
1/* crypto/srp/srp_lib.c */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef OPENSSL_NO_SRP
60#include "cryptlib.h"
61#include "srp_lcl.h"
62#include <openssl/srp.h>
63#include <openssl/evp.h>
64
65#if (BN_BYTES == 8)
66#define bn_pack4(a1,a2,a3,a4) 0x##a1##a2##a3##a4##ul
67#endif
68#if (BN_BYTES == 4)
69#define bn_pack4(a1,a2,a3,a4) 0x##a3##a4##ul, 0x##a1##a2##ul
70#endif
71#if (BN_BYTES == 2)
72#define bn_pack4(a1,a2,a3,a4) 0x##a4##u,0x##a3##u,0x##a2##u,0x##a1##u
73#endif
74
75
76#include "srp_grps.h"
77
78static BIGNUM *srp_Calc_k(BIGNUM *N, BIGNUM *g)
79 {
80 /* k = SHA1(N | PAD(g)) -- tls-srp draft 8 */
81
82 unsigned char digest[SHA_DIGEST_LENGTH];
83 unsigned char *tmp;
84 EVP_MD_CTX ctxt;
85 int longg ;
86 int longN = BN_num_bytes(N);
87
88 if ((tmp = OPENSSL_malloc(longN)) == NULL)
89 return NULL;
90 BN_bn2bin(N,tmp) ;
91
92 EVP_MD_CTX_init(&ctxt);
93 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
94 EVP_DigestUpdate(&ctxt, tmp, longN);
95
96 memset(tmp, 0, longN);
97 longg = BN_bn2bin(g,tmp) ;
98 /* use the zeros behind to pad on left */
99 EVP_DigestUpdate(&ctxt, tmp + longg, longN-longg);
100 EVP_DigestUpdate(&ctxt, tmp, longg);
101 OPENSSL_free(tmp);
102
103 EVP_DigestFinal_ex(&ctxt, digest, NULL);
104 EVP_MD_CTX_cleanup(&ctxt);
105 return BN_bin2bn(digest, sizeof(digest), NULL);
106 }
107
108BIGNUM *SRP_Calc_u(BIGNUM *A, BIGNUM *B, BIGNUM *N)
109 {
110 /* k = SHA1(PAD(A) || PAD(B) ) -- tls-srp draft 8 */
111
112 BIGNUM *u;
113 unsigned char cu[SHA_DIGEST_LENGTH];
114 unsigned char *cAB;
115 EVP_MD_CTX ctxt;
116 int longN;
117 if ((A == NULL) ||(B == NULL) || (N == NULL))
118 return NULL;
119
120 longN= BN_num_bytes(N);
121
122 if ((cAB = OPENSSL_malloc(2*longN)) == NULL)
123 return NULL;
124
125 memset(cAB, 0, longN);
126
127 EVP_MD_CTX_init(&ctxt);
128 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
129 EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(A,cAB+longN), longN);
130 EVP_DigestUpdate(&ctxt, cAB + BN_bn2bin(B,cAB+longN), longN);
131 OPENSSL_free(cAB);
132 EVP_DigestFinal_ex(&ctxt, cu, NULL);
133 EVP_MD_CTX_cleanup(&ctxt);
134
135 if (!(u = BN_bin2bn(cu, sizeof(cu), NULL)))
136 return NULL;
137 if (!BN_is_zero(u))
138 return u;
139 BN_free(u);
140 return NULL;
141}
142
143BIGNUM *SRP_Calc_server_key(BIGNUM *A, BIGNUM *v, BIGNUM *u, BIGNUM *b, BIGNUM *N)
144 {
145 BIGNUM *tmp = NULL, *S = NULL;
146 BN_CTX *bn_ctx;
147
148 if (u == NULL || A == NULL || v == NULL || b == NULL || N == NULL)
149 return NULL;
150
151 if ((bn_ctx = BN_CTX_new()) == NULL ||
152 (tmp = BN_new()) == NULL ||
153 (S = BN_new()) == NULL )
154 goto err;
155
156 /* S = (A*v**u) ** b */
157
158 if (!BN_mod_exp(tmp,v,u,N,bn_ctx))
159 goto err;
160 if (!BN_mod_mul(tmp,A,tmp,N,bn_ctx))
161 goto err;
162 if (!BN_mod_exp(S,tmp,b,N,bn_ctx))
163 goto err;
164err:
165 BN_CTX_free(bn_ctx);
166 BN_clear_free(tmp);
167 return S;
168 }
169
170BIGNUM *SRP_Calc_B(BIGNUM *b, BIGNUM *N, BIGNUM *g, BIGNUM *v)
171 {
172 BIGNUM *kv = NULL, *gb = NULL;
173 BIGNUM *B = NULL, *k = NULL;
174 BN_CTX *bn_ctx;
175
176 if (b == NULL || N == NULL || g == NULL || v == NULL ||
177 (bn_ctx = BN_CTX_new()) == NULL)
178 return NULL;
179
180 if ( (kv = BN_new()) == NULL ||
181 (gb = BN_new()) == NULL ||
182 (B = BN_new())== NULL)
183 goto err;
184
185 /* B = g**b + k*v */
186
187 if (!BN_mod_exp(gb,g,b,N,bn_ctx) ||
188 !(k = srp_Calc_k(N,g)) ||
189 !BN_mod_mul(kv,v,k,N,bn_ctx) ||
190 !BN_mod_add(B,gb,kv,N,bn_ctx))
191 {
192 BN_free(B);
193 B = NULL;
194 }
195err:
196 BN_CTX_free(bn_ctx);
197 BN_clear_free(kv);
198 BN_clear_free(gb);
199 BN_free(k);
200 return B;
201 }
202
203BIGNUM *SRP_Calc_x(BIGNUM *s, const char *user, const char *pass)
204 {
205 unsigned char dig[SHA_DIGEST_LENGTH];
206 EVP_MD_CTX ctxt;
207 unsigned char *cs;
208
209 if ((s == NULL) ||
210 (user == NULL) ||
211 (pass == NULL))
212 return NULL;
213
214 if ((cs = OPENSSL_malloc(BN_num_bytes(s))) == NULL)
215 return NULL;
216
217 EVP_MD_CTX_init(&ctxt);
218 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
219 EVP_DigestUpdate(&ctxt, user, strlen(user));
220 EVP_DigestUpdate(&ctxt, ":", 1);
221 EVP_DigestUpdate(&ctxt, pass, strlen(pass));
222 EVP_DigestFinal_ex(&ctxt, dig, NULL);
223
224 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
225 BN_bn2bin(s,cs);
226 EVP_DigestUpdate(&ctxt, cs, BN_num_bytes(s));
227 OPENSSL_free(cs);
228 EVP_DigestUpdate(&ctxt, dig, sizeof(dig));
229 EVP_DigestFinal_ex(&ctxt, dig, NULL);
230 EVP_MD_CTX_cleanup(&ctxt);
231
232 return BN_bin2bn(dig, sizeof(dig), NULL);
233 }
234
235BIGNUM *SRP_Calc_A(BIGNUM *a, BIGNUM *N, BIGNUM *g)
236 {
237 BN_CTX *bn_ctx;
238 BIGNUM * A = NULL;
239
240 if (a == NULL || N == NULL || g == NULL ||
241 (bn_ctx = BN_CTX_new()) == NULL)
242 return NULL;
243
244 if ((A = BN_new()) != NULL &&
245 !BN_mod_exp(A,g,a,N,bn_ctx))
246 {
247 BN_free(A);
248 A = NULL;
249 }
250 BN_CTX_free(bn_ctx);
251 return A;
252 }
253
254
255BIGNUM *SRP_Calc_client_key(BIGNUM *N, BIGNUM *B, BIGNUM *g, BIGNUM *x, BIGNUM *a, BIGNUM *u)
256 {
257 BIGNUM *tmp = NULL, *tmp2 = NULL, *tmp3 = NULL , *k = NULL, *K = NULL;
258 BN_CTX *bn_ctx;
259
260 if (u == NULL || B == NULL || N == NULL || g == NULL || x == NULL || a == NULL ||
261 (bn_ctx = BN_CTX_new()) == NULL)
262 return NULL;
263
264 if ((tmp = BN_new()) == NULL ||
265 (tmp2 = BN_new())== NULL ||
266 (tmp3 = BN_new())== NULL ||
267 (K = BN_new()) == NULL)
268 goto err;
269
270 if (!BN_mod_exp(tmp,g,x,N,bn_ctx))
271 goto err;
272 if (!(k = srp_Calc_k(N,g)))
273 goto err;
274 if (!BN_mod_mul(tmp2,tmp,k,N,bn_ctx))
275 goto err;
276 if (!BN_mod_sub(tmp,B,tmp2,N,bn_ctx))
277 goto err;
278
279 if (!BN_mod_mul(tmp3,u,x,N,bn_ctx))
280 goto err;
281 if (!BN_mod_add(tmp2,a,tmp3,N,bn_ctx))
282 goto err;
283 if (!BN_mod_exp(K,tmp,tmp2,N,bn_ctx))
284 goto err;
285
286err :
287 BN_CTX_free(bn_ctx);
288 BN_clear_free(tmp);
289 BN_clear_free(tmp2);
290 BN_clear_free(tmp3);
291 BN_free(k);
292 return K;
293 }
294
295int SRP_Verify_B_mod_N(BIGNUM *B, BIGNUM *N)
296 {
297 BIGNUM *r;
298 BN_CTX *bn_ctx;
299 int ret = 0;
300
301 if (B == NULL || N == NULL ||
302 (bn_ctx = BN_CTX_new()) == NULL)
303 return 0;
304
305 if ((r = BN_new()) == NULL)
306 goto err;
307 /* Checks if B % N == 0 */
308 if (!BN_nnmod(r,B,N,bn_ctx))
309 goto err;
310 ret = !BN_is_zero(r);
311err:
312 BN_CTX_free(bn_ctx);
313 BN_free(r);
314 return ret;
315 }
316
317int SRP_Verify_A_mod_N(BIGNUM *A, BIGNUM *N)
318 {
319 /* Checks if A % N == 0 */
320 return SRP_Verify_B_mod_N(A,N) ;
321 }
322
323
324/* Check if G and N are kwown parameters.
325 The values have been generated from the ietf-tls-srp draft version 8
326*/
327char *SRP_check_known_gN_param(BIGNUM *g, BIGNUM *N)
328 {
329 size_t i;
330 if ((g == NULL) || (N == NULL))
331 return 0;
332
333 srp_bn_print(g);
334 srp_bn_print(N);
335
336 for(i = 0; i < KNOWN_GN_NUMBER; i++)
337 {
338 if (BN_cmp(knowngN[i].g, g) == 0 && BN_cmp(knowngN[i].N, N) == 0)
339 return knowngN[i].id;
340 }
341 return NULL;
342 }
343
344SRP_gN *SRP_get_default_gN(const char *id)
345 {
346 size_t i;
347
348 if (id == NULL)
349 return knowngN;
350 for(i = 0; i < KNOWN_GN_NUMBER; i++)
351 {
352 if (strcmp(knowngN[i].id, id)==0)
353 return knowngN + i;
354 }
355 return NULL;
356 }
357#endif
diff --git a/src/lib/libssl/src/crypto/srp/srp_vfy.c b/src/lib/libssl/src/crypto/srp/srp_vfy.c
new file mode 100644
index 0000000000..c8be907d7f
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srp_vfy.c
@@ -0,0 +1,657 @@
1/* crypto/srp/srp_vfy.c */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#ifndef OPENSSL_NO_SRP
60#include "cryptlib.h"
61#include "srp_lcl.h"
62#include <openssl/srp.h>
63#include <openssl/evp.h>
64#include <openssl/buffer.h>
65#include <openssl/rand.h>
66#include <openssl/txt_db.h>
67
68#define SRP_RANDOM_SALT_LEN 20
69#define MAX_LEN 2500
70
71static char b64table[] =
72 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./";
73
74/* the following two conversion routines have been inspired by code from Stanford */
75
76/*
77 * Convert a base64 string into raw byte array representation.
78 */
79static int t_fromb64(unsigned char *a, const char *src)
80 {
81 char *loc;
82 int i, j;
83 int size;
84
85 while(*src && (*src == ' ' || *src == '\t' || *src == '\n'))
86 ++src;
87 size = strlen(src);
88 i = 0;
89 while(i < size)
90 {
91 loc = strchr(b64table, src[i]);
92 if(loc == (char *) 0) break;
93 else a[i] = loc - b64table;
94 ++i;
95 }
96 size = i;
97 i = size - 1;
98 j = size;
99 while(1)
100 {
101 a[j] = a[i];
102 if(--i < 0) break;
103 a[j] |= (a[i] & 3) << 6;
104 --j;
105 a[j] = (unsigned char) ((a[i] & 0x3c) >> 2);
106 if(--i < 0) break;
107 a[j] |= (a[i] & 0xf) << 4;
108 --j;
109 a[j] = (unsigned char) ((a[i] & 0x30) >> 4);
110 if(--i < 0) break;
111 a[j] |= (a[i] << 2);
112
113 a[--j] = 0;
114 if(--i < 0) break;
115 }
116 while(a[j] == 0 && j <= size) ++j;
117 i = 0;
118 while (j <= size) a[i++] = a[j++];
119 return i;
120 }
121
122
123/*
124 * Convert a raw byte string into a null-terminated base64 ASCII string.
125 */
126static char *t_tob64(char *dst, const unsigned char *src, int size)
127 {
128 int c, pos = size % 3;
129 unsigned char b0 = 0, b1 = 0, b2 = 0, notleading = 0;
130 char *olddst = dst;
131
132 switch(pos)
133 {
134 case 1:
135 b2 = src[0];
136 break;
137 case 2:
138 b1 = src[0];
139 b2 = src[1];
140 break;
141 }
142
143 while(1)
144 {
145 c = (b0 & 0xfc) >> 2;
146 if(notleading || c != 0)
147 {
148 *dst++ = b64table[c];
149 notleading = 1;
150 }
151 c = ((b0 & 3) << 4) | ((b1 & 0xf0) >> 4);
152 if(notleading || c != 0)
153 {
154 *dst++ = b64table[c];
155 notleading = 1;
156 }
157 c = ((b1 & 0xf) << 2) | ((b2 & 0xc0) >> 6);
158 if(notleading || c != 0)
159 {
160 *dst++ = b64table[c];
161 notleading = 1;
162 }
163 c = b2 & 0x3f;
164 if(notleading || c != 0)
165 {
166 *dst++ = b64table[c];
167 notleading = 1;
168 }
169 if(pos >= size) break;
170 else
171 {
172 b0 = src[pos++];
173 b1 = src[pos++];
174 b2 = src[pos++];
175 }
176 }
177
178 *dst++ = '\0';
179 return olddst;
180 }
181
182static void SRP_user_pwd_free(SRP_user_pwd *user_pwd)
183 {
184 if (user_pwd == NULL)
185 return;
186 BN_free(user_pwd->s);
187 BN_clear_free(user_pwd->v);
188 OPENSSL_free(user_pwd->id);
189 OPENSSL_free(user_pwd->info);
190 OPENSSL_free(user_pwd);
191 }
192
193static SRP_user_pwd *SRP_user_pwd_new()
194 {
195 SRP_user_pwd *ret = OPENSSL_malloc(sizeof(SRP_user_pwd));
196 if (ret == NULL)
197 return NULL;
198 ret->N = NULL;
199 ret->g = NULL;
200 ret->s = NULL;
201 ret->v = NULL;
202 ret->id = NULL ;
203 ret->info = NULL;
204 return ret;
205 }
206
207static void SRP_user_pwd_set_gN(SRP_user_pwd *vinfo, const BIGNUM *g,
208 const BIGNUM *N)
209 {
210 vinfo->N = N;
211 vinfo->g = g;
212 }
213
214static int SRP_user_pwd_set_ids(SRP_user_pwd *vinfo, const char *id,
215 const char *info)
216 {
217 if (id != NULL && NULL == (vinfo->id = BUF_strdup(id)))
218 return 0;
219 return (info == NULL || NULL != (vinfo->info = BUF_strdup(info))) ;
220 }
221
222static int SRP_user_pwd_set_sv(SRP_user_pwd *vinfo, const char *s,
223 const char *v)
224 {
225 unsigned char tmp[MAX_LEN];
226 int len;
227
228 if (strlen(s) > MAX_LEN || strlen(v) > MAX_LEN)
229 return 0;
230 len = t_fromb64(tmp, v);
231 if (NULL == (vinfo->v = BN_bin2bn(tmp, len, NULL)) )
232 return 0;
233 len = t_fromb64(tmp, s);
234 return ((vinfo->s = BN_bin2bn(tmp, len, NULL)) != NULL) ;
235 }
236
237static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v)
238 {
239 vinfo->v = v;
240 vinfo->s = s;
241 return (vinfo->s != NULL && vinfo->v != NULL) ;
242 }
243
244SRP_VBASE *SRP_VBASE_new(char *seed_key)
245 {
246 SRP_VBASE *vb = (SRP_VBASE *) OPENSSL_malloc(sizeof(SRP_VBASE));
247
248 if (vb == NULL)
249 return NULL;
250 if (!(vb->users_pwd = sk_SRP_user_pwd_new_null()) ||
251 !(vb->gN_cache = sk_SRP_gN_cache_new_null()))
252 {
253 OPENSSL_free(vb);
254 return NULL;
255 }
256 vb->default_g = NULL;
257 vb->default_N = NULL;
258 vb->seed_key = NULL;
259 if ((seed_key != NULL) &&
260 (vb->seed_key = BUF_strdup(seed_key)) == NULL)
261 {
262 sk_SRP_user_pwd_free(vb->users_pwd);
263 sk_SRP_gN_cache_free(vb->gN_cache);
264 OPENSSL_free(vb);
265 return NULL;
266 }
267 return vb;
268 }
269
270
271int SRP_VBASE_free(SRP_VBASE *vb)
272 {
273 sk_SRP_user_pwd_pop_free(vb->users_pwd,SRP_user_pwd_free);
274 sk_SRP_gN_cache_free(vb->gN_cache);
275 OPENSSL_free(vb->seed_key);
276 OPENSSL_free(vb);
277 return 0;
278 }
279
280
281static SRP_gN_cache *SRP_gN_new_init(const char *ch)
282 {
283 unsigned char tmp[MAX_LEN];
284 int len;
285
286 SRP_gN_cache *newgN = (SRP_gN_cache *)OPENSSL_malloc(sizeof(SRP_gN_cache));
287 if (newgN == NULL)
288 return NULL;
289
290 if ((newgN->b64_bn = BUF_strdup(ch)) == NULL)
291 goto err;
292
293 len = t_fromb64(tmp, ch);
294 if ((newgN->bn = BN_bin2bn(tmp, len, NULL)))
295 return newgN;
296
297 OPENSSL_free(newgN->b64_bn);
298err:
299 OPENSSL_free(newgN);
300 return NULL;
301 }
302
303
304static void SRP_gN_free(SRP_gN_cache *gN_cache)
305 {
306 if (gN_cache == NULL)
307 return;
308 OPENSSL_free(gN_cache->b64_bn);
309 BN_free(gN_cache->bn);
310 OPENSSL_free(gN_cache);
311 }
312
313static SRP_gN *SRP_get_gN_by_id(const char *id, STACK_OF(SRP_gN) *gN_tab)
314 {
315 int i;
316
317 SRP_gN *gN;
318 if (gN_tab != NULL)
319 for(i = 0; i < sk_SRP_gN_num(gN_tab); i++)
320 {
321 gN = sk_SRP_gN_value(gN_tab, i);
322 if (gN && (id == NULL || strcmp(gN->id,id)==0))
323 return gN;
324 }
325
326 return SRP_get_default_gN(id);
327 }
328
329static BIGNUM *SRP_gN_place_bn(STACK_OF(SRP_gN_cache) *gN_cache, char *ch)
330 {
331 int i;
332 if (gN_cache == NULL)
333 return NULL;
334
335 /* search if we have already one... */
336 for(i = 0; i < sk_SRP_gN_cache_num(gN_cache); i++)
337 {
338 SRP_gN_cache *cache = sk_SRP_gN_cache_value(gN_cache, i);
339 if (strcmp(cache->b64_bn,ch)==0)
340 return cache->bn;
341 }
342 { /* it is the first time that we find it */
343 SRP_gN_cache *newgN = SRP_gN_new_init(ch);
344 if (newgN)
345 {
346 if (sk_SRP_gN_cache_insert(gN_cache,newgN,0)>0)
347 return newgN->bn;
348 SRP_gN_free(newgN);
349 }
350 }
351 return NULL;
352 }
353
354/* this function parses verifier file. Format is:
355 * string(index):base64(N):base64(g):0
356 * string(username):base64(v):base64(salt):int(index)
357 */
358
359
360int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file)
361 {
362 int error_code ;
363 STACK_OF(SRP_gN) *SRP_gN_tab = sk_SRP_gN_new_null();
364 char *last_index = NULL;
365 int i;
366 char **pp;
367
368 SRP_gN *gN = NULL;
369 SRP_user_pwd *user_pwd = NULL ;
370
371 TXT_DB *tmpdb = NULL;
372 BIO *in = BIO_new(BIO_s_file());
373
374 error_code = SRP_ERR_OPEN_FILE;
375
376 if (in == NULL || BIO_read_filename(in,verifier_file) <= 0)
377 goto err;
378
379 error_code = SRP_ERR_VBASE_INCOMPLETE_FILE;
380
381 if ((tmpdb =TXT_DB_read(in,DB_NUMBER)) == NULL)
382 goto err;
383
384 error_code = SRP_ERR_MEMORY;
385
386
387 if (vb->seed_key)
388 {
389 last_index = SRP_get_default_gN(NULL)->id;
390 }
391 for (i = 0; i < sk_OPENSSL_PSTRING_num(tmpdb->data); i++)
392 {
393 pp = (char **)sk_OPENSSL_PSTRING_value(tmpdb->data,i);
394 if (pp[DB_srptype][0] == DB_SRP_INDEX)
395 {
396 /*we add this couple in the internal Stack */
397
398 if ((gN = (SRP_gN *)OPENSSL_malloc(sizeof(SRP_gN))) == NULL)
399 goto err;
400
401 if (!(gN->id = BUF_strdup(pp[DB_srpid]))
402 || !(gN->N = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpverifier]))
403 || !(gN->g = SRP_gN_place_bn(vb->gN_cache,pp[DB_srpsalt]))
404 || sk_SRP_gN_insert(SRP_gN_tab,gN,0) == 0)
405 goto err;
406
407 gN = NULL;
408
409 if (vb->seed_key != NULL)
410 {
411 last_index = pp[DB_srpid];
412 }
413 }
414 else if (pp[DB_srptype][0] == DB_SRP_VALID)
415 {
416 /* it is a user .... */
417 SRP_gN *lgN;
418 if ((lgN = SRP_get_gN_by_id(pp[DB_srpgN],SRP_gN_tab))!=NULL)
419 {
420 error_code = SRP_ERR_MEMORY;
421 if ((user_pwd = SRP_user_pwd_new()) == NULL)
422 goto err;
423
424 SRP_user_pwd_set_gN(user_pwd,lgN->g,lgN->N);
425 if (!SRP_user_pwd_set_ids(user_pwd, pp[DB_srpid],pp[DB_srpinfo]))
426 goto err;
427
428 error_code = SRP_ERR_VBASE_BN_LIB;
429 if (!SRP_user_pwd_set_sv(user_pwd, pp[DB_srpsalt],pp[DB_srpverifier]))
430 goto err;
431
432 if (sk_SRP_user_pwd_insert(vb->users_pwd, user_pwd, 0) == 0)
433 goto err;
434 user_pwd = NULL; /* abandon responsability */
435 }
436 }
437 }
438
439 if (last_index != NULL)
440 {
441 /* this means that we want to simulate a default user */
442
443 if (((gN = SRP_get_gN_by_id(last_index,SRP_gN_tab))==NULL))
444 {
445 error_code = SRP_ERR_VBASE_BN_LIB;
446 goto err;
447 }
448 vb->default_g = gN->g ;
449 vb->default_N = gN->N ;
450 gN = NULL ;
451 }
452 error_code = SRP_NO_ERROR;
453
454 err:
455 /* there may be still some leaks to fix, if this fails, the application terminates most likely */
456
457 if (gN != NULL)
458 {
459 OPENSSL_free(gN->id);
460 OPENSSL_free(gN);
461 }
462
463 SRP_user_pwd_free(user_pwd);
464
465 if (tmpdb) TXT_DB_free(tmpdb);
466 if (in) BIO_free_all(in);
467
468 sk_SRP_gN_free(SRP_gN_tab);
469
470 return error_code;
471
472 }
473
474
475SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username)
476 {
477 int i;
478 SRP_user_pwd *user;
479 unsigned char digv[SHA_DIGEST_LENGTH];
480 unsigned char digs[SHA_DIGEST_LENGTH];
481 EVP_MD_CTX ctxt;
482
483 if (vb == NULL)
484 return NULL;
485 for(i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++)
486 {
487 user = sk_SRP_user_pwd_value(vb->users_pwd, i);
488 if (strcmp(user->id,username)==0)
489 return user;
490 }
491 if ((vb->seed_key == NULL) ||
492 (vb->default_g == NULL) ||
493 (vb->default_N == NULL))
494 return NULL;
495
496/* if the user is unknown we set parameters as well if we have a seed_key */
497
498 if ((user = SRP_user_pwd_new()) == NULL)
499 return NULL;
500
501 SRP_user_pwd_set_gN(user,vb->default_g,vb->default_N);
502
503 if (!SRP_user_pwd_set_ids(user,username,NULL))
504 goto err;
505
506 RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH);
507 EVP_MD_CTX_init(&ctxt);
508 EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL);
509 EVP_DigestUpdate(&ctxt, vb->seed_key, strlen(vb->seed_key));
510 EVP_DigestUpdate(&ctxt, username, strlen(username));
511 EVP_DigestFinal_ex(&ctxt, digs, NULL);
512 EVP_MD_CTX_cleanup(&ctxt);
513 if (SRP_user_pwd_set_sv_BN(user, BN_bin2bn(digs,SHA_DIGEST_LENGTH,NULL), BN_bin2bn(digv,SHA_DIGEST_LENGTH, NULL)))
514 return user;
515
516err: SRP_user_pwd_free(user);
517 return NULL;
518 }
519
520
521/*
522 create a verifier (*salt,*verifier,g and N are in base64)
523*/
524char *SRP_create_verifier(const char *user, const char *pass, char **salt,
525 char **verifier, const char *N, const char *g)
526 {
527 int len;
528 char * result=NULL;
529 char *vf;
530 BIGNUM *N_bn = NULL, *g_bn = NULL, *s = NULL, *v = NULL;
531 unsigned char tmp[MAX_LEN];
532 unsigned char tmp2[MAX_LEN];
533 char * defgNid = NULL;
534
535 if ((user == NULL)||
536 (pass == NULL)||
537 (salt == NULL)||
538 (verifier == NULL))
539 goto err;
540
541 if (N)
542 {
543 if (!(len = t_fromb64(tmp, N))) goto err;
544 N_bn = BN_bin2bn(tmp, len, NULL);
545 if (!(len = t_fromb64(tmp, g))) goto err;
546 g_bn = BN_bin2bn(tmp, len, NULL);
547 defgNid = "*";
548 }
549 else
550 {
551 SRP_gN * gN = SRP_get_gN_by_id(g, NULL) ;
552 if (gN == NULL)
553 goto err;
554 N_bn = gN->N;
555 g_bn = gN->g;
556 defgNid = gN->id;
557 }
558
559 if (*salt == NULL)
560 {
561 RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN);
562
563 s = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL);
564 }
565 else
566 {
567 if (!(len = t_fromb64(tmp2, *salt)))
568 goto err;
569 s = BN_bin2bn(tmp2, len, NULL);
570 }
571
572
573 if(!SRP_create_verifier_BN(user, pass, &s, &v, N_bn, g_bn)) goto err;
574
575 BN_bn2bin(v,tmp);
576 if (((vf = OPENSSL_malloc(BN_num_bytes(v)*2)) == NULL))
577 goto err;
578 t_tob64(vf, tmp, BN_num_bytes(v));
579
580 *verifier = vf;
581 if (*salt == NULL)
582 {
583 char *tmp_salt;
584 if ((tmp_salt = (char *)OPENSSL_malloc(SRP_RANDOM_SALT_LEN * 2)) == NULL)
585 {
586 OPENSSL_free(vf);
587 goto err;
588 }
589 t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN);
590 *salt = tmp_salt;
591 }
592
593 result=defgNid;
594
595err:
596 if(N)
597 {
598 BN_free(N_bn);
599 BN_free(g_bn);
600 }
601 return result;
602 }
603
604/*
605 create a verifier (*salt,*verifier,g and N are BIGNUMs)
606*/
607int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, BIGNUM **verifier, BIGNUM *N, BIGNUM *g)
608 {
609 int result=0;
610 BIGNUM *x = NULL;
611 BN_CTX *bn_ctx = BN_CTX_new();
612 unsigned char tmp2[MAX_LEN];
613
614 if ((user == NULL)||
615 (pass == NULL)||
616 (salt == NULL)||
617 (verifier == NULL)||
618 (N == NULL)||
619 (g == NULL)||
620 (bn_ctx == NULL))
621 goto err;
622
623 srp_bn_print(N);
624 srp_bn_print(g);
625
626 if (*salt == NULL)
627 {
628 RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN);
629
630 *salt = BN_bin2bn(tmp2,SRP_RANDOM_SALT_LEN,NULL);
631 }
632
633 x = SRP_Calc_x(*salt,user,pass);
634
635 *verifier = BN_new();
636 if(*verifier == NULL) goto err;
637
638 if (!BN_mod_exp(*verifier,g,x,N,bn_ctx))
639 {
640 BN_clear_free(*verifier);
641 goto err;
642 }
643
644 srp_bn_print(*verifier);
645
646 result=1;
647
648err:
649
650 BN_clear_free(x);
651 BN_CTX_free(bn_ctx);
652 return result;
653 }
654
655
656
657#endif
diff --git a/src/lib/libssl/src/crypto/srp/srptest.c b/src/lib/libssl/src/crypto/srp/srptest.c
new file mode 100644
index 0000000000..04b66b4544
--- /dev/null
+++ b/src/lib/libssl/src/crypto/srp/srptest.c
@@ -0,0 +1,162 @@
1#include <openssl/opensslconf.h>
2#ifdef OPENSSL_NO_SRP
3
4#include <stdio.h>
5
6int main(int argc, char *argv[])
7 {
8 printf("No SRP support\n");
9 return(0);
10 }
11
12#else
13
14#include <openssl/srp.h>
15#include <openssl/rand.h>
16#include <openssl/err.h>
17
18static void showbn(const char *name, const BIGNUM *bn)
19 {
20 fputs(name, stdout);
21 fputs(" = ", stdout);
22 BN_print_fp(stdout, bn);
23 putc('\n', stdout);
24 }
25
26#define RANDOM_SIZE 32 /* use 256 bits on each side */
27
28static int run_srp(const char *username, const char *client_pass, const char *server_pass)
29 {
30 int ret=-1;
31 BIGNUM *s = NULL;
32 BIGNUM *v = NULL;
33 BIGNUM *a = NULL;
34 BIGNUM *b = NULL;
35 BIGNUM *u = NULL;
36 BIGNUM *x = NULL;
37 BIGNUM *Apub = NULL;
38 BIGNUM *Bpub = NULL;
39 BIGNUM *Kclient = NULL;
40 BIGNUM *Kserver = NULL;
41 unsigned char rand_tmp[RANDOM_SIZE];
42 /* use builtin 1024-bit params */
43 SRP_gN *GN = SRP_get_default_gN("1024");
44
45 if(GN == NULL)
46 {
47 fprintf(stderr, "Failed to get SRP parameters\n");
48 return -1;
49 }
50 /* Set up server's password entry */
51 if(!SRP_create_verifier_BN(username, server_pass, &s, &v, GN->N, GN->g))
52 {
53 fprintf(stderr, "Failed to create SRP verifier\n");
54 return -1;
55 }
56
57 showbn("N", GN->N);
58 showbn("g", GN->g);
59 showbn("Salt", s);
60 showbn("Verifier", v);
61
62 /* Server random */
63 RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp));
64 b = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL);
65 /* TODO - check b != 0 */
66 showbn("b", b);
67
68 /* Server's first message */
69 Bpub = SRP_Calc_B(b, GN->N, GN->g, v);
70 showbn("B", Bpub);
71
72 if(!SRP_Verify_B_mod_N(Bpub, GN->N))
73 {
74 fprintf(stderr, "Invalid B\n");
75 return -1;
76 }
77
78 /* Client random */
79 RAND_pseudo_bytes(rand_tmp, sizeof(rand_tmp));
80 a = BN_bin2bn(rand_tmp, sizeof(rand_tmp), NULL);
81 /* TODO - check a != 0 */
82 showbn("a", a);
83
84 /* Client's response */
85 Apub = SRP_Calc_A(a, GN->N, GN->g);
86 showbn("A", Apub);
87
88 if(!SRP_Verify_A_mod_N(Apub, GN->N))
89 {
90 fprintf(stderr, "Invalid A\n");
91 return -1;
92 }
93
94 /* Both sides calculate u */
95 u = SRP_Calc_u(Apub, Bpub, GN->N);
96
97 /* Client's key */
98 x = SRP_Calc_x(s, username, client_pass);
99 Kclient = SRP_Calc_client_key(GN->N, Bpub, GN->g, x, a, u);
100 showbn("Client's key", Kclient);
101
102 /* Server's key */
103 Kserver = SRP_Calc_server_key(Apub, v, u, b, GN->N);
104 showbn("Server's key", Kserver);
105
106 if(BN_cmp(Kclient, Kserver) == 0)
107 {
108 ret = 0;
109 }
110 else
111 {
112 fprintf(stderr, "Keys mismatch\n");
113 ret = 1;
114 }
115
116 BN_clear_free(Kclient);
117 BN_clear_free(Kserver);
118 BN_clear_free(x);
119 BN_free(u);
120 BN_free(Apub);
121 BN_clear_free(a);
122 BN_free(Bpub);
123 BN_clear_free(b);
124 BN_free(s);
125 BN_clear_free(v);
126
127 return ret;
128 }
129
130int main(int argc, char **argv)
131 {
132 BIO *bio_err;
133 bio_err = BIO_new_fp(stderr, BIO_NOCLOSE);
134
135 CRYPTO_malloc_debug_init();
136 CRYPTO_dbg_set_options(V_CRYPTO_MDEBUG_ALL);
137 CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON);
138
139 ERR_load_crypto_strings();
140
141 /* "Negative" test, expect a mismatch */
142 if(run_srp("alice", "password1", "password2") == 0)
143 {
144 fprintf(stderr, "Mismatched SRP run failed\n");
145 return 1;
146 }
147
148 /* "Positive" test, should pass */
149 if(run_srp("alice", "password", "password") != 0)
150 {
151 fprintf(stderr, "Plain SRP run failed\n");
152 return 1;
153 }
154
155 CRYPTO_cleanup_all_ex_data();
156 ERR_remove_thread_state(NULL);
157 ERR_free_strings();
158 CRYPTO_mem_leaks(bio_err);
159
160 return 0;
161 }
162#endif
diff --git a/src/lib/libssl/src/crypto/ts/ts.h b/src/lib/libssl/src/crypto/ts/ts.h
index 190e8a1bf2..c2448e3c3b 100644
--- a/src/lib/libssl/src/crypto/ts/ts.h
+++ b/src/lib/libssl/src/crypto/ts/ts.h
@@ -86,9 +86,6 @@
86#include <openssl/dh.h> 86#include <openssl/dh.h>
87#endif 87#endif
88 88
89#include <openssl/evp.h>
90
91
92#ifdef __cplusplus 89#ifdef __cplusplus
93extern "C" { 90extern "C" {
94#endif 91#endif
diff --git a/src/lib/libssl/src/crypto/ts/ts_rsp_verify.c b/src/lib/libssl/src/crypto/ts/ts_rsp_verify.c
index e1f3b534af..afe16afbe4 100644
--- a/src/lib/libssl/src/crypto/ts/ts_rsp_verify.c
+++ b/src/lib/libssl/src/crypto/ts/ts_rsp_verify.c
@@ -614,12 +614,15 @@ static int TS_compute_imprint(BIO *data, TS_TST_INFO *tst_info,
614 goto err; 614 goto err;
615 } 615 }
616 616
617 EVP_DigestInit(&md_ctx, md); 617 if (!EVP_DigestInit(&md_ctx, md))
618 goto err;
618 while ((length = BIO_read(data, buffer, sizeof(buffer))) > 0) 619 while ((length = BIO_read(data, buffer, sizeof(buffer))) > 0)
619 { 620 {
620 EVP_DigestUpdate(&md_ctx, buffer, length); 621 if (!EVP_DigestUpdate(&md_ctx, buffer, length))
622 goto err;
621 } 623 }
622 EVP_DigestFinal(&md_ctx, *imprint, NULL); 624 if (!EVP_DigestFinal(&md_ctx, *imprint, NULL))
625 goto err;
623 626
624 return 1; 627 return 1;
625 err: 628 err:
diff --git a/src/lib/libssl/src/crypto/whrlpool/Makefile b/src/lib/libssl/src/crypto/whrlpool/Makefile
index 566b996290..f4d46e4d17 100644
--- a/src/lib/libssl/src/crypto/whrlpool/Makefile
+++ b/src/lib/libssl/src/crypto/whrlpool/Makefile
@@ -89,5 +89,8 @@ clean:
89 89
90wp_block.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h 90wp_block.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
91wp_block.o: ../../include/openssl/whrlpool.h wp_block.c wp_locl.h 91wp_block.o: ../../include/openssl/whrlpool.h wp_block.c wp_locl.h
92wp_dgst.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h 92wp_dgst.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
93wp_dgst.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
94wp_dgst.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
95wp_dgst.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
93wp_dgst.o: ../../include/openssl/whrlpool.h wp_dgst.c wp_locl.h 96wp_dgst.o: ../../include/openssl/whrlpool.h wp_dgst.c wp_locl.h
diff --git a/src/lib/libssl/src/crypto/whrlpool/whrlpool.h b/src/lib/libssl/src/crypto/whrlpool/whrlpool.h
index 03c91da115..9e01f5b076 100644
--- a/src/lib/libssl/src/crypto/whrlpool/whrlpool.h
+++ b/src/lib/libssl/src/crypto/whrlpool/whrlpool.h
@@ -24,6 +24,9 @@ typedef struct {
24 } WHIRLPOOL_CTX; 24 } WHIRLPOOL_CTX;
25 25
26#ifndef OPENSSL_NO_WHIRLPOOL 26#ifndef OPENSSL_NO_WHIRLPOOL
27#ifdef OPENSSL_FIPS
28int private_WHIRLPOOL_Init(WHIRLPOOL_CTX *c);
29#endif
27int WHIRLPOOL_Init (WHIRLPOOL_CTX *c); 30int WHIRLPOOL_Init (WHIRLPOOL_CTX *c);
28int WHIRLPOOL_Update (WHIRLPOOL_CTX *c,const void *inp,size_t bytes); 31int WHIRLPOOL_Update (WHIRLPOOL_CTX *c,const void *inp,size_t bytes);
29void WHIRLPOOL_BitUpdate(WHIRLPOOL_CTX *c,const void *inp,size_t bits); 32void WHIRLPOOL_BitUpdate(WHIRLPOOL_CTX *c,const void *inp,size_t bits);
diff --git a/src/lib/libssl/src/crypto/whrlpool/wp_block.c b/src/lib/libssl/src/crypto/whrlpool/wp_block.c
index 221f6cc59f..824ed1827c 100644
--- a/src/lib/libssl/src/crypto/whrlpool/wp_block.c
+++ b/src/lib/libssl/src/crypto/whrlpool/wp_block.c
@@ -68,9 +68,9 @@ typedef unsigned long long u64;
68 CPUs this is actually faster! */ 68 CPUs this is actually faster! */
69# endif 69# endif
70# define GO_FOR_MMX(ctx,inp,num) do { \ 70# define GO_FOR_MMX(ctx,inp,num) do { \
71 extern unsigned long OPENSSL_ia32cap_P; \ 71 extern unsigned int OPENSSL_ia32cap_P[]; \
72 void whirlpool_block_mmx(void *,const void *,size_t); \ 72 void whirlpool_block_mmx(void *,const void *,size_t); \
73 if (!(OPENSSL_ia32cap_P & (1<<23))) break; \ 73 if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \
74 whirlpool_block_mmx(ctx->H.c,inp,num); return; \ 74 whirlpool_block_mmx(ctx->H.c,inp,num); return; \
75 } while (0) 75 } while (0)
76# endif 76# endif
diff --git a/src/lib/libssl/src/crypto/whrlpool/wp_dgst.c b/src/lib/libssl/src/crypto/whrlpool/wp_dgst.c
index ee5c5c1bf3..7e28bef51d 100644
--- a/src/lib/libssl/src/crypto/whrlpool/wp_dgst.c
+++ b/src/lib/libssl/src/crypto/whrlpool/wp_dgst.c
@@ -52,9 +52,10 @@
52 */ 52 */
53 53
54#include "wp_locl.h" 54#include "wp_locl.h"
55#include <openssl/crypto.h>
55#include <string.h> 56#include <string.h>
56 57
57int WHIRLPOOL_Init (WHIRLPOOL_CTX *c) 58fips_md_init(WHIRLPOOL)
58 { 59 {
59 memset (c,0,sizeof(*c)); 60 memset (c,0,sizeof(*c));
60 return(1); 61 return(1);
diff --git a/src/lib/libssl/src/crypto/x509v3/v3_asid.c b/src/lib/libssl/src/crypto/x509v3/v3_asid.c
index 3f434c0603..1587e8ed72 100644
--- a/src/lib/libssl/src/crypto/x509v3/v3_asid.c
+++ b/src/lib/libssl/src/crypto/x509v3/v3_asid.c
@@ -358,6 +358,20 @@ static int ASIdentifierChoice_is_canonical(ASIdentifierChoice *choice)
358 goto done; 358 goto done;
359 } 359 }
360 360
361 /*
362 * Check for inverted range.
363 */
364 i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1;
365 {
366 ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i);
367 ASN1_INTEGER *a_min, *a_max;
368 if (a != NULL && a->type == ASIdOrRange_range) {
369 extract_min_max(a, &a_min, &a_max);
370 if (ASN1_INTEGER_cmp(a_min, a_max) > 0)
371 goto done;
372 }
373 }
374
361 ret = 1; 375 ret = 1;
362 376
363 done: 377 done:
@@ -392,9 +406,18 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
392 return 1; 406 return 1;
393 407
394 /* 408 /*
395 * We have a list. Sort it. 409 * If not a list, or if empty list, it's broken.
410 */
411 if (choice->type != ASIdentifierChoice_asIdsOrRanges ||
412 sk_ASIdOrRange_num(choice->u.asIdsOrRanges) == 0) {
413 X509V3err(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE,
414 X509V3_R_EXTENSION_VALUE_ERROR);
415 return 0;
416 }
417
418 /*
419 * We have a non-empty list. Sort it.
396 */ 420 */
397 OPENSSL_assert(choice->type == ASIdentifierChoice_asIdsOrRanges);
398 sk_ASIdOrRange_sort(choice->u.asIdsOrRanges); 421 sk_ASIdOrRange_sort(choice->u.asIdsOrRanges);
399 422
400 /* 423 /*
@@ -415,6 +438,13 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
415 OPENSSL_assert(ASN1_INTEGER_cmp(a_min, b_min) <= 0); 438 OPENSSL_assert(ASN1_INTEGER_cmp(a_min, b_min) <= 0);
416 439
417 /* 440 /*
441 * Punt inverted ranges.
442 */
443 if (ASN1_INTEGER_cmp(a_min, a_max) > 0 ||
444 ASN1_INTEGER_cmp(b_min, b_max) > 0)
445 goto done;
446
447 /*
418 * Check for overlaps. 448 * Check for overlaps.
419 */ 449 */
420 if (ASN1_INTEGER_cmp(a_max, b_min) >= 0) { 450 if (ASN1_INTEGER_cmp(a_max, b_min) >= 0) {
@@ -465,12 +495,26 @@ static int ASIdentifierChoice_canonize(ASIdentifierChoice *choice)
465 break; 495 break;
466 } 496 }
467 ASIdOrRange_free(b); 497 ASIdOrRange_free(b);
468 sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1); 498 (void) sk_ASIdOrRange_delete(choice->u.asIdsOrRanges, i + 1);
469 i--; 499 i--;
470 continue; 500 continue;
471 } 501 }
472 } 502 }
473 503
504 /*
505 * Check for final inverted range.
506 */
507 i = sk_ASIdOrRange_num(choice->u.asIdsOrRanges) - 1;
508 {
509 ASIdOrRange *a = sk_ASIdOrRange_value(choice->u.asIdsOrRanges, i);
510 ASN1_INTEGER *a_min, *a_max;
511 if (a != NULL && a->type == ASIdOrRange_range) {
512 extract_min_max(a, &a_min, &a_max);
513 if (ASN1_INTEGER_cmp(a_min, a_max) > 0)
514 goto done;
515 }
516 }
517
474 OPENSSL_assert(ASIdentifierChoice_is_canonical(choice)); /* Paranoia */ 518 OPENSSL_assert(ASIdentifierChoice_is_canonical(choice)); /* Paranoia */
475 519
476 ret = 1; 520 ret = 1;
@@ -498,6 +542,7 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
498 struct v3_ext_ctx *ctx, 542 struct v3_ext_ctx *ctx,
499 STACK_OF(CONF_VALUE) *values) 543 STACK_OF(CONF_VALUE) *values)
500{ 544{
545 ASN1_INTEGER *min = NULL, *max = NULL;
501 ASIdentifiers *asid = NULL; 546 ASIdentifiers *asid = NULL;
502 int i; 547 int i;
503 548
@@ -508,7 +553,6 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
508 553
509 for (i = 0; i < sk_CONF_VALUE_num(values); i++) { 554 for (i = 0; i < sk_CONF_VALUE_num(values); i++) {
510 CONF_VALUE *val = sk_CONF_VALUE_value(values, i); 555 CONF_VALUE *val = sk_CONF_VALUE_value(values, i);
511 ASN1_INTEGER *min = NULL, *max = NULL;
512 int i1, i2, i3, is_range, which; 556 int i1, i2, i3, is_range, which;
513 557
514 /* 558 /*
@@ -578,18 +622,19 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
578 max = s2i_ASN1_INTEGER(NULL, s + i2); 622 max = s2i_ASN1_INTEGER(NULL, s + i2);
579 OPENSSL_free(s); 623 OPENSSL_free(s);
580 if (min == NULL || max == NULL) { 624 if (min == NULL || max == NULL) {
581 ASN1_INTEGER_free(min);
582 ASN1_INTEGER_free(max);
583 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); 625 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE);
584 goto err; 626 goto err;
585 } 627 }
628 if (ASN1_INTEGER_cmp(min, max) > 0) {
629 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, X509V3_R_EXTENSION_VALUE_ERROR);
630 goto err;
631 }
586 } 632 }
587 if (!v3_asid_add_id_or_range(asid, which, min, max)) { 633 if (!v3_asid_add_id_or_range(asid, which, min, max)) {
588 ASN1_INTEGER_free(min);
589 ASN1_INTEGER_free(max);
590 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE); 634 X509V3err(X509V3_F_V2I_ASIDENTIFIERS, ERR_R_MALLOC_FAILURE);
591 goto err; 635 goto err;
592 } 636 }
637 min = max = NULL;
593 } 638 }
594 639
595 /* 640 /*
@@ -601,6 +646,8 @@ static void *v2i_ASIdentifiers(const struct v3_ext_method *method,
601 646
602 err: 647 err:
603 ASIdentifiers_free(asid); 648 ASIdentifiers_free(asid);
649 ASN1_INTEGER_free(min);
650 ASN1_INTEGER_free(max);
604 return NULL; 651 return NULL;
605} 652}
606 653
diff --git a/src/lib/libssl/src/crypto/x86_64cpuid.pl b/src/lib/libssl/src/crypto/x86_64cpuid.pl
index c96821a3c8..7b7b93b223 100644
--- a/src/lib/libssl/src/crypto/x86_64cpuid.pl
+++ b/src/lib/libssl/src/crypto/x86_64cpuid.pl
@@ -7,15 +7,24 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8 8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; 10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open STDOUT,"| $^X $xlate $flavour $output";
15
16($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
17 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
11 18
12if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
13else { $arg1="%rdi"; $arg2="%rsi"; }
14print<<___; 19print<<___;
15.extern OPENSSL_cpuid_setup 20.extern OPENSSL_cpuid_setup
21.hidden OPENSSL_cpuid_setup
16.section .init 22.section .init
17 call OPENSSL_cpuid_setup 23 call OPENSSL_cpuid_setup
18 24
25.hidden OPENSSL_ia32cap_P
26.comm OPENSSL_ia32cap_P,8,4
27
19.text 28.text
20 29
21.globl OPENSSL_atomic_add 30.globl OPENSSL_atomic_add
@@ -46,7 +55,7 @@ OPENSSL_rdtsc:
46.type OPENSSL_ia32_cpuid,\@abi-omnipotent 55.type OPENSSL_ia32_cpuid,\@abi-omnipotent
47.align 16 56.align 16
48OPENSSL_ia32_cpuid: 57OPENSSL_ia32_cpuid:
49 mov %rbx,%r8 58 mov %rbx,%r8 # save %rbx
50 59
51 xor %eax,%eax 60 xor %eax,%eax
52 cpuid 61 cpuid
@@ -78,7 +87,15 @@ OPENSSL_ia32_cpuid:
78 # AMD specific 87 # AMD specific
79 mov \$0x80000000,%eax 88 mov \$0x80000000,%eax
80 cpuid 89 cpuid
81 cmp \$0x80000008,%eax 90 cmp \$0x80000001,%eax
91 jb .Lintel
92 mov %eax,%r10d
93 mov \$0x80000001,%eax
94 cpuid
95 or %ecx,%r9d
96 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
97
98 cmp \$0x80000008,%r10d
82 jb .Lintel 99 jb .Lintel
83 100
84 mov \$0x80000008,%eax 101 mov \$0x80000008,%eax
@@ -89,12 +106,12 @@ OPENSSL_ia32_cpuid:
89 mov \$1,%eax 106 mov \$1,%eax
90 cpuid 107 cpuid
91 bt \$28,%edx # test hyper-threading bit 108 bt \$28,%edx # test hyper-threading bit
92 jnc .Ldone 109 jnc .Lgeneric
93 shr \$16,%ebx # number of logical processors 110 shr \$16,%ebx # number of logical processors
94 cmp %r10b,%bl 111 cmp %r10b,%bl
95 ja .Ldone 112 ja .Lgeneric
96 and \$0xefffffff,%edx # ~(1<<28) 113 and \$0xefffffff,%edx # ~(1<<28)
97 jmp .Ldone 114 jmp .Lgeneric
98 115
99.Lintel: 116.Lintel:
100 cmp \$4,%r11d 117 cmp \$4,%r11d
@@ -111,30 +128,47 @@ OPENSSL_ia32_cpuid:
111.Lnocacheinfo: 128.Lnocacheinfo:
112 mov \$1,%eax 129 mov \$1,%eax
113 cpuid 130 cpuid
131 and \$0xbfefffff,%edx # force reserved bits to 0
114 cmp \$0,%r9d 132 cmp \$0,%r9d
115 jne .Lnotintel 133 jne .Lnotintel
116 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR 134 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
117 and \$15,%ah 135 and \$15,%ah
118 cmp \$15,%ah # examine Family ID 136 cmp \$15,%ah # examine Family ID
119 je .Lnotintel 137 jne .Lnotintel
120 or \$0x40000000,%edx # use reserved bit to skip unrolled loop 138 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
121.Lnotintel: 139.Lnotintel:
122 bt \$28,%edx # test hyper-threading bit 140 bt \$28,%edx # test hyper-threading bit
123 jnc .Ldone 141 jnc .Lgeneric
124 and \$0xefffffff,%edx # ~(1<<28) 142 and \$0xefffffff,%edx # ~(1<<28)
125 cmp \$0,%r10d 143 cmp \$0,%r10d
126 je .Ldone 144 je .Lgeneric
127 145
128 or \$0x10000000,%edx # 1<<28 146 or \$0x10000000,%edx # 1<<28
129 shr \$16,%ebx 147 shr \$16,%ebx
130 cmp \$1,%bl # see if cache is shared 148 cmp \$1,%bl # see if cache is shared
131 ja .Ldone 149 ja .Lgeneric
132 and \$0xefffffff,%edx # ~(1<<28) 150 and \$0xefffffff,%edx # ~(1<<28)
151.Lgeneric:
152 and \$0x00000800,%r9d # isolate AMD XOP flag
153 and \$0xfffff7ff,%ecx
154 or %ecx,%r9d # merge AMD XOP flag
155
156 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
157 bt \$27,%r9d # check OSXSAVE bit
158 jnc .Lclear_avx
159 xor %ecx,%ecx # XCR0
160 .byte 0x0f,0x01,0xd0 # xgetbv
161 and \$6,%eax # isolate XMM and YMM state support
162 cmp \$6,%eax
163 je .Ldone
164.Lclear_avx:
165 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
166 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
133.Ldone: 167.Ldone:
134 shl \$32,%rcx 168 shl \$32,%r9
135 mov %edx,%eax 169 mov %r10d,%eax
136 mov %r8,%rbx 170 mov %r8,%rbx # restore %rbx
137 or %rcx,%rax 171 or %r9,%rax
138 ret 172 ret
139.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 173.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
140 174
@@ -229,4 +263,21 @@ OPENSSL_wipe_cpu:
229.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 263.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
230___ 264___
231 265
266print<<___;
267.globl OPENSSL_ia32_rdrand
268.type OPENSSL_ia32_rdrand,\@abi-omnipotent
269.align 16
270OPENSSL_ia32_rdrand:
271 mov \$8,%ecx
272.Loop_rdrand:
273 rdrand %rax
274 jc .Lbreak_rdrand
275 loop .Loop_rdrand
276.Lbreak_rdrand:
277 cmp \$0,%rax
278 cmove %rcx,%rax
279 ret
280.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
281___
282
232close STDOUT; # flush 283close STDOUT; # flush
diff --git a/src/lib/libssl/src/crypto/x86cpuid.pl b/src/lib/libssl/src/crypto/x86cpuid.pl
index a7464af19b..39fd8f2293 100644
--- a/src/lib/libssl/src/crypto/x86cpuid.pl
+++ b/src/lib/libssl/src/crypto/x86cpuid.pl
@@ -19,9 +19,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
19 &pushf (); 19 &pushf ();
20 &pop ("eax"); 20 &pop ("eax");
21 &xor ("ecx","eax"); 21 &xor ("ecx","eax");
22 &bt ("ecx",21);
23 &jnc (&label("done"));
24 &xor ("eax","eax"); 22 &xor ("eax","eax");
23 &bt ("ecx",21);
24 &jnc (&label("nocpuid"));
25 &cpuid (); 25 &cpuid ();
26 &mov ("edi","eax"); # max value for standard query level 26 &mov ("edi","eax"); # max value for standard query level
27 27
@@ -51,7 +51,14 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
51 # AMD specific 51 # AMD specific
52 &mov ("eax",0x80000000); 52 &mov ("eax",0x80000000);
53 &cpuid (); 53 &cpuid ();
54 &cmp ("eax",0x80000008); 54 &cmp ("eax",0x80000001);
55 &jb (&label("intel"));
56 &mov ("esi","eax");
57 &mov ("eax",0x80000001);
58 &cpuid ();
59 &or ("ebp","ecx");
60 &and ("ebp",1<<11|1); # isolate XOP bit
61 &cmp ("esi",0x80000008);
55 &jb (&label("intel")); 62 &jb (&label("intel"));
56 63
57 &mov ("eax",0x80000008); 64 &mov ("eax",0x80000008);
@@ -62,13 +69,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
62 &mov ("eax",1); 69 &mov ("eax",1);
63 &cpuid (); 70 &cpuid ();
64 &bt ("edx",28); 71 &bt ("edx",28);
65 &jnc (&label("done")); 72 &jnc (&label("generic"));
66 &shr ("ebx",16); 73 &shr ("ebx",16);
67 &and ("ebx",0xff); 74 &and ("ebx",0xff);
68 &cmp ("ebx","esi"); 75 &cmp ("ebx","esi");
69 &ja (&label("done")); 76 &ja (&label("generic"));
70 &and ("edx",0xefffffff); # clear hyper-threading bit 77 &and ("edx",0xefffffff); # clear hyper-threading bit
71 &jmp (&label("done")); 78 &jmp (&label("generic"));
72 79
73&set_label("intel"); 80&set_label("intel");
74 &cmp ("edi",4); 81 &cmp ("edi",4);
@@ -85,27 +92,51 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
85&set_label("nocacheinfo"); 92&set_label("nocacheinfo");
86 &mov ("eax",1); 93 &mov ("eax",1);
87 &cpuid (); 94 &cpuid ();
95 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0
88 &cmp ("ebp",0); 96 &cmp ("ebp",0);
89 &jne (&label("notP4")); 97 &jne (&label("notintel"));
98 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs
90 &and (&HB("eax"),15); # familiy ID 99 &and (&HB("eax"),15); # familiy ID
91 &cmp (&HB("eax"),15); # P4? 100 &cmp (&HB("eax"),15); # P4?
92 &jne (&label("notP4")); 101 &jne (&label("notintel"));
93 &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR 102 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR
94&set_label("notP4"); 103&set_label("notintel");
95 &bt ("edx",28); # test hyper-threading bit 104 &bt ("edx",28); # test hyper-threading bit
96 &jnc (&label("done")); 105 &jnc (&label("generic"));
97 &and ("edx",0xefffffff); 106 &and ("edx",0xefffffff);
98 &cmp ("edi",0); 107 &cmp ("edi",0);
99 &je (&label("done")); 108 &je (&label("generic"));
100 109
101 &or ("edx",0x10000000); 110 &or ("edx",0x10000000);
102 &shr ("ebx",16); 111 &shr ("ebx",16);
103 &cmp (&LB("ebx"),1); 112 &cmp (&LB("ebx"),1);
104 &ja (&label("done")); 113 &ja (&label("generic"));
105 &and ("edx",0xefffffff); # clear hyper-threading bit if not 114 &and ("edx",0xefffffff); # clear hyper-threading bit if not
115
116&set_label("generic");
117 &and ("ebp",1<<11); # isolate AMD XOP flag
118 &and ("ecx",0xfffff7ff); # force 11th bit to 0
119 &mov ("esi","edx");
120 &or ("ebp","ecx"); # merge AMD XOP flag
121
122 &bt ("ecx",27); # check OSXSAVE bit
123 &jnc (&label("clear_avx"));
124 &xor ("ecx","ecx");
125 &data_byte(0x0f,0x01,0xd0); # xgetbv
126 &and ("eax",6);
127 &cmp ("eax",6);
128 &je (&label("done"));
129 &cmp ("eax",2);
130 &je (&label("clear_avx"));
131&set_label("clear_xmm");
132 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits
133 &and ("esi",0xfeffffff); # clear FXSR
134&set_label("clear_avx");
135 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
106&set_label("done"); 136&set_label("done");
107 &mov ("eax","edx"); 137 &mov ("eax","esi");
108 &mov ("edx","ecx"); 138 &mov ("edx","ebp");
139&set_label("nocpuid");
109&function_end("OPENSSL_ia32_cpuid"); 140&function_end("OPENSSL_ia32_cpuid");
110 141
111&external_label("OPENSSL_ia32cap_P"); 142&external_label("OPENSSL_ia32cap_P");
@@ -199,8 +230,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
199 &bt (&DWP(0,"ecx"),1); 230 &bt (&DWP(0,"ecx"),1);
200 &jnc (&label("no_x87")); 231 &jnc (&label("no_x87"));
201 if ($sse2) { 232 if ($sse2) {
202 &bt (&DWP(0,"ecx"),26); 233 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits
203 &jnc (&label("no_sse2")); 234 &cmp ("ecx",1<<26|1<<24);
235 &jne (&label("no_sse2"));
204 &pxor ("xmm0","xmm0"); 236 &pxor ("xmm0","xmm0");
205 &pxor ("xmm1","xmm1"); 237 &pxor ("xmm1","xmm1");
206 &pxor ("xmm2","xmm2"); 238 &pxor ("xmm2","xmm2");
@@ -307,6 +339,18 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
307 &ret (); 339 &ret ();
308&function_end_B("OPENSSL_cleanse"); 340&function_end_B("OPENSSL_cleanse");
309 341
342&function_begin_B("OPENSSL_ia32_rdrand");
343 &mov ("ecx",8);
344&set_label("loop");
345 &rdrand ("eax");
346 &jc (&label("break"));
347 &loop (&label("loop"));
348&set_label("break");
349 &cmp ("eax",0);
350 &cmove ("eax","ecx");
351 &ret ();
352&function_end_B("OPENSSL_ia32_rdrand");
353
310&initseg("OPENSSL_cpuid_setup"); 354&initseg("OPENSSL_cpuid_setup");
311 355
312&asm_finish(); 356&asm_finish();
diff --git a/src/lib/libssl/src/doc/apps/genpkey.pod b/src/lib/libssl/src/doc/apps/genpkey.pod
index 1611b5ca78..c74d097fb3 100644
--- a/src/lib/libssl/src/doc/apps/genpkey.pod
+++ b/src/lib/libssl/src/doc/apps/genpkey.pod
@@ -114,6 +114,8 @@ hexadecimal value if preceded by B<0x>. Default value is 65537.
114 114
115The number of bits in the generated parameters. If not specified 1024 is used. 115The number of bits in the generated parameters. If not specified 1024 is used.
116 116
117=back
118
117=head1 DH PARAMETER GENERATION OPTIONS 119=head1 DH PARAMETER GENERATION OPTIONS
118 120
119=over 4 121=over 4
diff --git a/src/lib/libssl/src/doc/crypto/ecdsa.pod b/src/lib/libssl/src/doc/crypto/ecdsa.pod
index 49b10f2249..20edff97ff 100644
--- a/src/lib/libssl/src/doc/crypto/ecdsa.pod
+++ b/src/lib/libssl/src/doc/crypto/ecdsa.pod
@@ -114,7 +114,7 @@ using the public key B<eckey>.
114 114
115ECDSA_size() returns the maximum length signature or 0 on error. 115ECDSA_size() returns the maximum length signature or 0 on error.
116 116
117ECDSA_sign_setup() and ECDSA_sign() return 1 if successful or -1 117ECDSA_sign_setup() and ECDSA_sign() return 1 if successful or 0
118on error. 118on error.
119 119
120ECDSA_verify() and ECDSA_do_verify() return 1 for a valid 120ECDSA_verify() and ECDSA_do_verify() return 1 for a valid
diff --git a/src/lib/libssl/src/engines/ccgost/Makefile b/src/lib/libssl/src/engines/ccgost/Makefile
index dadb5230ec..d661c10828 100644
--- a/src/lib/libssl/src/engines/ccgost/Makefile
+++ b/src/lib/libssl/src/engines/ccgost/Makefile
@@ -142,13 +142,13 @@ gost94_keyx.o: ../../include/openssl/x509_vfy.h e_gost_err.h gost89.h
142gost94_keyx.o: gost94_keyx.c gost_keywrap.h gost_lcl.h gosthash.h 142gost94_keyx.o: gost94_keyx.c gost_keywrap.h gost_lcl.h gosthash.h
143gost_ameth.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h 143gost_ameth.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
144gost_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 144gost_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
145gost_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 145gost_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
146gost_ameth.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h 146gost_ameth.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
147gost_ameth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h 147gost_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
148gost_ameth.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h 148gost_ameth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
149gost_ameth.o: ../../include/openssl/err.h ../../include/openssl/evp.h 149gost_ameth.o: ../../include/openssl/engine.h ../../include/openssl/err.h
150gost_ameth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h 150gost_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
151gost_ameth.o: ../../include/openssl/objects.h 151gost_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
152gost_ameth.o: ../../include/openssl/opensslconf.h 152gost_ameth.o: ../../include/openssl/opensslconf.h
153gost_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h 153gost_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
154gost_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h 154gost_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
diff --git a/src/lib/libssl/src/engines/ccgost/gost_ameth.c b/src/lib/libssl/src/engines/ccgost/gost_ameth.c
index e6c2839e5f..2cde1fcfd9 100644
--- a/src/lib/libssl/src/engines/ccgost/gost_ameth.c
+++ b/src/lib/libssl/src/engines/ccgost/gost_ameth.c
@@ -13,6 +13,9 @@
13#include <openssl/engine.h> 13#include <openssl/engine.h>
14#include <openssl/evp.h> 14#include <openssl/evp.h>
15#include <openssl/asn1.h> 15#include <openssl/asn1.h>
16#ifndef OPENSSL_NO_CMS
17#include <openssl/cms.h>
18#endif
16#include "gost_params.h" 19#include "gost_params.h"
17#include "gost_lcl.h" 20#include "gost_lcl.h"
18#include "e_gost_err.h" 21#include "e_gost_err.h"
@@ -230,6 +233,24 @@ static int pkey_ctrl_gost(EVP_PKEY *pkey, int op,
230 X509_ALGOR_set0(alg2, OBJ_nid2obj(nid), V_ASN1_NULL, 0); 233 X509_ALGOR_set0(alg2, OBJ_nid2obj(nid), V_ASN1_NULL, 0);
231 } 234 }
232 return 1; 235 return 1;
236#ifndef OPENSSL_NO_CMS
237 case ASN1_PKEY_CTRL_CMS_SIGN:
238 if (arg1 == 0)
239 {
240 X509_ALGOR *alg1 = NULL, *alg2 = NULL;
241 int nid = EVP_PKEY_base_id(pkey);
242 CMS_SignerInfo_get0_algs((CMS_SignerInfo *)arg2,
243 NULL, NULL, &alg1, &alg2);
244 X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_id_GostR3411_94),
245 V_ASN1_NULL, 0);
246 if (nid == NID_undef)
247 {
248 return (-1);
249 }
250 X509_ALGOR_set0(alg2, OBJ_nid2obj(nid), V_ASN1_NULL, 0);
251 }
252 return 1;
253#endif
233 case ASN1_PKEY_CTRL_PKCS7_ENCRYPT: 254 case ASN1_PKEY_CTRL_PKCS7_ENCRYPT:
234 if (arg1 == 0) 255 if (arg1 == 0)
235 { 256 {
@@ -244,6 +265,22 @@ static int pkey_ctrl_gost(EVP_PKEY *pkey, int op,
244 V_ASN1_SEQUENCE, params); 265 V_ASN1_SEQUENCE, params);
245 } 266 }
246 return 1; 267 return 1;
268#ifndef OPENSSL_NO_CMS
269 case ASN1_PKEY_CTRL_CMS_ENVELOPE:
270 if (arg1 == 0)
271 {
272 X509_ALGOR *alg;
273 ASN1_STRING * params = encode_gost_algor_params(pkey);
274 if (!params)
275 {
276 return -1;
277 }
278 CMS_RecipientInfo_ktri_get0_algs((CMS_RecipientInfo *)arg2, NULL, NULL, &alg);
279 X509_ALGOR_set0(alg, OBJ_nid2obj(pkey->type),
280 V_ASN1_SEQUENCE, params);
281 }
282 return 1;
283#endif
247 case ASN1_PKEY_CTRL_DEFAULT_MD_NID: 284 case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
248 *(int *)arg2 = NID_id_GostR3411_94; 285 *(int *)arg2 = NID_id_GostR3411_94;
249 return 2; 286 return 2;
diff --git a/src/lib/libssl/src/engines/ccgost/gost_pmeth.c b/src/lib/libssl/src/engines/ccgost/gost_pmeth.c
index caaea99d36..f91c9b1939 100644
--- a/src/lib/libssl/src/engines/ccgost/gost_pmeth.c
+++ b/src/lib/libssl/src/engines/ccgost/gost_pmeth.c
@@ -89,6 +89,12 @@ static int pkey_gost_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
89 case EVP_PKEY_CTRL_PKCS7_ENCRYPT: 89 case EVP_PKEY_CTRL_PKCS7_ENCRYPT:
90 case EVP_PKEY_CTRL_PKCS7_DECRYPT: 90 case EVP_PKEY_CTRL_PKCS7_DECRYPT:
91 case EVP_PKEY_CTRL_PKCS7_SIGN: 91 case EVP_PKEY_CTRL_PKCS7_SIGN:
92 case EVP_PKEY_CTRL_DIGESTINIT:
93#ifndef OPENSSL_NO_CMS
94 case EVP_PKEY_CTRL_CMS_ENCRYPT:
95 case EVP_PKEY_CTRL_CMS_DECRYPT:
96 case EVP_PKEY_CTRL_CMS_SIGN:
97#endif
92 return 1; 98 return 1;
93 99
94 case EVP_PKEY_CTRL_GOST_PARAMSET: 100 case EVP_PKEY_CTRL_GOST_PARAMSET:
@@ -123,7 +129,7 @@ static int pkey_gost_ctrl94_str(EVP_PKEY_CTX *ctx,
123 } 129 }
124 if (strlen(value) == 1) 130 if (strlen(value) == 1)
125 { 131 {
126 switch(toupper(value[0])) 132 switch(toupper((unsigned char)value[0]))
127 { 133 {
128 case 'A': 134 case 'A':
129 param_nid = NID_id_GostR3410_94_CryptoPro_A_ParamSet; 135 param_nid = NID_id_GostR3410_94_CryptoPro_A_ParamSet;
@@ -142,9 +148,9 @@ static int pkey_gost_ctrl94_str(EVP_PKEY_CTX *ctx,
142 break; 148 break;
143 } 149 }
144 } 150 }
145 else if ((strlen(value) == 2) && (toupper(value[0]) == 'X')) 151 else if ((strlen(value) == 2) && (toupper((unsigned char)value[0]) == 'X'))
146 { 152 {
147 switch (toupper(value[1])) 153 switch (toupper((unsigned char)value[1]))
148 { 154 {
149 case 'A': 155 case 'A':
150 param_nid = NID_id_GostR3410_94_CryptoPro_XchA_ParamSet; 156 param_nid = NID_id_GostR3410_94_CryptoPro_XchA_ParamSet;
@@ -198,7 +204,7 @@ static int pkey_gost_ctrl01_str(EVP_PKEY_CTX *ctx,
198 } 204 }
199 if (strlen(value) == 1) 205 if (strlen(value) == 1)
200 { 206 {
201 switch(toupper(value[0])) 207 switch(toupper((unsigned char)value[0]))
202 { 208 {
203 case 'A': 209 case 'A':
204 param_nid = NID_id_GostR3410_2001_CryptoPro_A_ParamSet; 210 param_nid = NID_id_GostR3410_2001_CryptoPro_A_ParamSet;
@@ -217,9 +223,9 @@ static int pkey_gost_ctrl01_str(EVP_PKEY_CTX *ctx,
217 break; 223 break;
218 } 224 }
219 } 225 }
220 else if ((strlen(value) == 2) && (toupper(value[0]) == 'X')) 226 else if ((strlen(value) == 2) && (toupper((unsigned char)value[0]) == 'X'))
221 { 227 {
222 switch (toupper(value[1])) 228 switch (toupper((unsigned char)value[1]))
223 { 229 {
224 case 'A': 230 case 'A':
225 param_nid = NID_id_GostR3410_2001_CryptoPro_XchA_ParamSet; 231 param_nid = NID_id_GostR3410_2001_CryptoPro_XchA_ParamSet;
@@ -521,6 +527,7 @@ static int pkey_gost_mac_ctrl_str(EVP_PKEY_CTX *ctx,
521 { 527 {
522 GOSTerr(GOST_F_PKEY_GOST_MAC_CTRL_STR, 528 GOSTerr(GOST_F_PKEY_GOST_MAC_CTRL_STR,
523 GOST_R_INVALID_MAC_KEY_LENGTH); 529 GOST_R_INVALID_MAC_KEY_LENGTH);
530 OPENSSL_free(keybuf);
524 return 0; 531 return 0;
525 } 532 }
526 ret= pkey_gost_mac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY, 533 ret= pkey_gost_mac_ctrl(ctx, EVP_PKEY_CTRL_SET_MAC_KEY,
diff --git a/src/lib/libssl/src/engines/e_aep.c b/src/lib/libssl/src/engines/e_aep.c
index d7f89e5156..1953f0643c 100644
--- a/src/lib/libssl/src/engines/e_aep.c
+++ b/src/lib/libssl/src/engines/e_aep.c
@@ -85,7 +85,6 @@ extern int GetThreadID(void);
85#ifndef OPENSSL_NO_DH 85#ifndef OPENSSL_NO_DH
86#include <openssl/dh.h> 86#include <openssl/dh.h>
87#endif 87#endif
88#include <openssl/bn.h>
89 88
90#ifndef OPENSSL_NO_HW 89#ifndef OPENSSL_NO_HW
91#ifndef OPENSSL_NO_HW_AEP 90#ifndef OPENSSL_NO_HW_AEP
diff --git a/src/lib/libssl/src/engines/e_capi.c b/src/lib/libssl/src/engines/e_capi.c
index 24b620fc07..bfedde0eb0 100644
--- a/src/lib/libssl/src/engines/e_capi.c
+++ b/src/lib/libssl/src/engines/e_capi.c
@@ -442,28 +442,36 @@ static int capi_init(ENGINE *e)
442 CAPI_CTX *ctx; 442 CAPI_CTX *ctx;
443 const RSA_METHOD *ossl_rsa_meth; 443 const RSA_METHOD *ossl_rsa_meth;
444 const DSA_METHOD *ossl_dsa_meth; 444 const DSA_METHOD *ossl_dsa_meth;
445 capi_idx = ENGINE_get_ex_new_index(0, NULL, NULL, NULL, 0); 445
446 cert_capi_idx = X509_get_ex_new_index(0, NULL, NULL, NULL, 0); 446 if (capi_idx < 0)
447 {
448 capi_idx = ENGINE_get_ex_new_index(0, NULL, NULL, NULL, 0);
449 if (capi_idx < 0)
450 goto memerr;
451
452 cert_capi_idx = X509_get_ex_new_index(0, NULL, NULL, NULL, 0);
453
454 /* Setup RSA_METHOD */
455 rsa_capi_idx = RSA_get_ex_new_index(0, NULL, NULL, NULL, 0);
456 ossl_rsa_meth = RSA_PKCS1_SSLeay();
457 capi_rsa_method.rsa_pub_enc = ossl_rsa_meth->rsa_pub_enc;
458 capi_rsa_method.rsa_pub_dec = ossl_rsa_meth->rsa_pub_dec;
459 capi_rsa_method.rsa_mod_exp = ossl_rsa_meth->rsa_mod_exp;
460 capi_rsa_method.bn_mod_exp = ossl_rsa_meth->bn_mod_exp;
461
462 /* Setup DSA Method */
463 dsa_capi_idx = DSA_get_ex_new_index(0, NULL, NULL, NULL, 0);
464 ossl_dsa_meth = DSA_OpenSSL();
465 capi_dsa_method.dsa_do_verify = ossl_dsa_meth->dsa_do_verify;
466 capi_dsa_method.dsa_mod_exp = ossl_dsa_meth->dsa_mod_exp;
467 capi_dsa_method.bn_mod_exp = ossl_dsa_meth->bn_mod_exp;
468 }
447 469
448 ctx = capi_ctx_new(); 470 ctx = capi_ctx_new();
449 if (!ctx || (capi_idx < 0)) 471 if (!ctx)
450 goto memerr; 472 goto memerr;
451 473
452 ENGINE_set_ex_data(e, capi_idx, ctx); 474 ENGINE_set_ex_data(e, capi_idx, ctx);
453 /* Setup RSA_METHOD */
454 rsa_capi_idx = RSA_get_ex_new_index(0, NULL, NULL, NULL, 0);
455 ossl_rsa_meth = RSA_PKCS1_SSLeay();
456 capi_rsa_method.rsa_pub_enc = ossl_rsa_meth->rsa_pub_enc;
457 capi_rsa_method.rsa_pub_dec = ossl_rsa_meth->rsa_pub_dec;
458 capi_rsa_method.rsa_mod_exp = ossl_rsa_meth->rsa_mod_exp;
459 capi_rsa_method.bn_mod_exp = ossl_rsa_meth->bn_mod_exp;
460
461 /* Setup DSA Method */
462 dsa_capi_idx = DSA_get_ex_new_index(0, NULL, NULL, NULL, 0);
463 ossl_dsa_meth = DSA_OpenSSL();
464 capi_dsa_method.dsa_do_verify = ossl_dsa_meth->dsa_do_verify;
465 capi_dsa_method.dsa_mod_exp = ossl_dsa_meth->dsa_mod_exp;
466 capi_dsa_method.bn_mod_exp = ossl_dsa_meth->bn_mod_exp;
467 475
468#ifdef OPENSSL_CAPIENG_DIALOG 476#ifdef OPENSSL_CAPIENG_DIALOG
469 { 477 {
@@ -522,6 +530,7 @@ static int bind_capi(ENGINE *e)
522 { 530 {
523 if (!ENGINE_set_id(e, engine_capi_id) 531 if (!ENGINE_set_id(e, engine_capi_id)
524 || !ENGINE_set_name(e, engine_capi_name) 532 || !ENGINE_set_name(e, engine_capi_name)
533 || !ENGINE_set_flags(e, ENGINE_FLAGS_NO_REGISTER_ALL)
525 || !ENGINE_set_init_function(e, capi_init) 534 || !ENGINE_set_init_function(e, capi_init)
526 || !ENGINE_set_finish_function(e, capi_finish) 535 || !ENGINE_set_finish_function(e, capi_finish)
527 || !ENGINE_set_destroy_function(e, capi_destroy) 536 || !ENGINE_set_destroy_function(e, capi_destroy)
@@ -1155,6 +1164,7 @@ static int capi_list_containers(CAPI_CTX *ctx, BIO *out)
1155 { 1164 {
1156 CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS, CAPI_R_ENUMCONTAINERS_ERROR); 1165 CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS, CAPI_R_ENUMCONTAINERS_ERROR);
1157 capi_addlasterror(); 1166 capi_addlasterror();
1167 CryptReleaseContext(hprov, 0);
1158 return 0; 1168 return 0;
1159 } 1169 }
1160 CAPI_trace(ctx, "Got max container len %d\n", buflen); 1170 CAPI_trace(ctx, "Got max container len %d\n", buflen);
@@ -1572,6 +1582,8 @@ static int capi_ctx_set_provname(CAPI_CTX *ctx, LPSTR pname, DWORD type, int che
1572 } 1582 }
1573 CryptReleaseContext(hprov, 0); 1583 CryptReleaseContext(hprov, 0);
1574 } 1584 }
1585 if (ctx->cspname)
1586 OPENSSL_free(ctx->cspname);
1575 ctx->cspname = BUF_strdup(pname); 1587 ctx->cspname = BUF_strdup(pname);
1576 ctx->csptype = type; 1588 ctx->csptype = type;
1577 return 1; 1589 return 1;
@@ -1581,9 +1593,12 @@ static int capi_ctx_set_provname_idx(CAPI_CTX *ctx, int idx)
1581 { 1593 {
1582 LPSTR pname; 1594 LPSTR pname;
1583 DWORD type; 1595 DWORD type;
1596 int res;
1584 if (capi_get_provname(ctx, &pname, &type, idx) != 1) 1597 if (capi_get_provname(ctx, &pname, &type, idx) != 1)
1585 return 0; 1598 return 0;
1586 return capi_ctx_set_provname(ctx, pname, type, 0); 1599 res = capi_ctx_set_provname(ctx, pname, type, 0);
1600 OPENSSL_free(pname);
1601 return res;
1587 } 1602 }
1588 1603
1589static int cert_issuer_match(STACK_OF(X509_NAME) *ca_dn, X509 *x) 1604static int cert_issuer_match(STACK_OF(X509_NAME) *ca_dn, X509 *x)
diff --git a/src/lib/libssl/src/engines/e_padlock.c b/src/lib/libssl/src/engines/e_padlock.c
index 7d09419804..9f7a85a8da 100644
--- a/src/lib/libssl/src/engines/e_padlock.c
+++ b/src/lib/libssl/src/engines/e_padlock.c
@@ -104,11 +104,13 @@
104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ 104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86)) 105 (defined(_MSC_VER) && defined(_M_IX86))
106# define COMPILE_HW_PADLOCK 106# define COMPILE_HW_PADLOCK
107static ENGINE *ENGINE_padlock (void);
108# endif 107# endif
109#endif 108#endif
110 109
111#ifdef OPENSSL_NO_DYNAMIC_ENGINE 110#ifdef OPENSSL_NO_DYNAMIC_ENGINE
111#ifdef COMPILE_HW_PADLOCK
112static ENGINE *ENGINE_padlock (void);
113#endif
112 114
113void ENGINE_load_padlock (void) 115void ENGINE_load_padlock (void)
114{ 116{
@@ -197,6 +199,8 @@ padlock_bind_helper(ENGINE *e)
197 return 1; 199 return 1;
198} 200}
199 201
202#ifdef OPENSSL_NO_DYNAMIC_ENGINE
203
200/* Constructor */ 204/* Constructor */
201static ENGINE * 205static ENGINE *
202ENGINE_padlock(void) 206ENGINE_padlock(void)
@@ -215,6 +219,8 @@ ENGINE_padlock(void)
215 return eng; 219 return eng;
216} 220}
217 221
222#endif
223
218/* Check availability of the engine */ 224/* Check availability of the engine */
219static int 225static int
220padlock_init(ENGINE *e) 226padlock_init(ENGINE *e)
diff --git a/src/lib/libssl/src/ssl/d1_both.c b/src/lib/libssl/src/ssl/d1_both.c
index 9f898d6997..de8bab873f 100644
--- a/src/lib/libssl/src/ssl/d1_both.c
+++ b/src/lib/libssl/src/ssl/d1_both.c
@@ -227,14 +227,14 @@ int dtls1_do_write(SSL *s, int type)
227 unsigned int len, frag_off, mac_size, blocksize; 227 unsigned int len, frag_off, mac_size, blocksize;
228 228
229 /* AHA! Figure out the MTU, and stick to the right size */ 229 /* AHA! Figure out the MTU, and stick to the right size */
230 if ( ! (SSL_get_options(s) & SSL_OP_NO_QUERY_MTU)) 230 if (s->d1->mtu < dtls1_min_mtu() && !(SSL_get_options(s) & SSL_OP_NO_QUERY_MTU))
231 { 231 {
232 s->d1->mtu = 232 s->d1->mtu =
233 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_QUERY_MTU, 0, NULL); 233 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_QUERY_MTU, 0, NULL);
234 234
235 /* I've seen the kernel return bogus numbers when it doesn't know 235 /* I've seen the kernel return bogus numbers when it doesn't know
236 * (initial write), so just make sure we have a reasonable number */ 236 * (initial write), so just make sure we have a reasonable number */
237 if ( s->d1->mtu < dtls1_min_mtu()) 237 if (s->d1->mtu < dtls1_min_mtu())
238 { 238 {
239 s->d1->mtu = 0; 239 s->d1->mtu = 0;
240 s->d1->mtu = dtls1_guess_mtu(s->d1->mtu); 240 s->d1->mtu = dtls1_guess_mtu(s->d1->mtu);
@@ -1084,7 +1084,11 @@ int dtls1_read_failed(SSL *s, int code)
1084 return code; 1084 return code;
1085 } 1085 }
1086 1086
1087 if ( ! SSL_in_init(s)) /* done, no need to send a retransmit */ 1087#ifndef OPENSSL_NO_HEARTBEATS
1088 if (!SSL_in_init(s) && !s->tlsext_hb_pending) /* done, no need to send a retransmit */
1089#else
1090 if (!SSL_in_init(s)) /* done, no need to send a retransmit */
1091#endif
1088 { 1092 {
1089 BIO_set_flags(SSL_get_rbio(s), BIO_FLAGS_READ); 1093 BIO_set_flags(SSL_get_rbio(s), BIO_FLAGS_READ);
1090 return code; 1094 return code;
@@ -1417,3 +1421,171 @@ dtls1_get_ccs_header(unsigned char *data, struct ccs_header_st *ccs_hdr)
1417 1421
1418 ccs_hdr->type = *(data++); 1422 ccs_hdr->type = *(data++);
1419 } 1423 }
1424
1425int dtls1_shutdown(SSL *s)
1426 {
1427 int ret;
1428#ifndef OPENSSL_NO_SCTP
1429 if (BIO_dgram_is_sctp(SSL_get_wbio(s)) &&
1430 !(s->shutdown & SSL_SENT_SHUTDOWN))
1431 {
1432 ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s));
1433 if (ret < 0) return -1;
1434
1435 if (ret == 0)
1436 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN, 1, NULL);
1437 }
1438#endif
1439 ret = ssl3_shutdown(s);
1440#ifndef OPENSSL_NO_SCTP
1441 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN, 0, NULL);
1442#endif
1443 return ret;
1444 }
1445
1446#ifndef OPENSSL_NO_HEARTBEATS
1447int
1448dtls1_process_heartbeat(SSL *s)
1449 {
1450 unsigned char *p = &s->s3->rrec.data[0], *pl;
1451 unsigned short hbtype;
1452 unsigned int payload;
1453 unsigned int padding = 16; /* Use minimum padding */
1454
1455 /* Read type and payload length first */
1456 hbtype = *p++;
1457 n2s(p, payload);
1458 pl = p;
1459
1460 if (s->msg_callback)
1461 s->msg_callback(0, s->version, TLS1_RT_HEARTBEAT,
1462 &s->s3->rrec.data[0], s->s3->rrec.length,
1463 s, s->msg_callback_arg);
1464
1465 if (hbtype == TLS1_HB_REQUEST)
1466 {
1467 unsigned char *buffer, *bp;
1468 int r;
1469
1470 /* Allocate memory for the response, size is 1 byte
1471 * message type, plus 2 bytes payload length, plus
1472 * payload, plus padding
1473 */
1474 buffer = OPENSSL_malloc(1 + 2 + payload + padding);
1475 bp = buffer;
1476
1477 /* Enter response type, length and copy payload */
1478 *bp++ = TLS1_HB_RESPONSE;
1479 s2n(payload, bp);
1480 memcpy(bp, pl, payload);
1481 bp += payload;
1482 /* Random padding */
1483 RAND_pseudo_bytes(bp, padding);
1484
1485 r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, 3 + payload + padding);
1486
1487 if (r >= 0 && s->msg_callback)
1488 s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT,
1489 buffer, 3 + payload + padding,
1490 s, s->msg_callback_arg);
1491
1492 OPENSSL_free(buffer);
1493
1494 if (r < 0)
1495 return r;
1496 }
1497 else if (hbtype == TLS1_HB_RESPONSE)
1498 {
1499 unsigned int seq;
1500
1501 /* We only send sequence numbers (2 bytes unsigned int),
1502 * and 16 random bytes, so we just try to read the
1503 * sequence number */
1504 n2s(pl, seq);
1505
1506 if (payload == 18 && seq == s->tlsext_hb_seq)
1507 {
1508 dtls1_stop_timer(s);
1509 s->tlsext_hb_seq++;
1510 s->tlsext_hb_pending = 0;
1511 }
1512 }
1513
1514 return 0;
1515 }
1516
1517int
1518dtls1_heartbeat(SSL *s)
1519 {
1520 unsigned char *buf, *p;
1521 int ret;
1522 unsigned int payload = 18; /* Sequence number + random bytes */
1523 unsigned int padding = 16; /* Use minimum padding */
1524
1525 /* Only send if peer supports and accepts HB requests... */
1526 if (!(s->tlsext_heartbeat & SSL_TLSEXT_HB_ENABLED) ||
1527 s->tlsext_heartbeat & SSL_TLSEXT_HB_DONT_SEND_REQUESTS)
1528 {
1529 SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PEER_DOESNT_ACCEPT);
1530 return -1;
1531 }
1532
1533 /* ...and there is none in flight yet... */
1534 if (s->tlsext_hb_pending)
1535 {
1536 SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_TLS_HEARTBEAT_PENDING);
1537 return -1;
1538 }
1539
1540 /* ...and no handshake in progress. */
1541 if (SSL_in_init(s) || s->in_handshake)
1542 {
1543 SSLerr(SSL_F_DTLS1_HEARTBEAT,SSL_R_UNEXPECTED_MESSAGE);
1544 return -1;
1545 }
1546
1547 /* Check if padding is too long, payload and padding
1548 * must not exceed 2^14 - 3 = 16381 bytes in total.
1549 */
1550 OPENSSL_assert(payload + padding <= 16381);
1551
1552 /* Create HeartBeat message, we just use a sequence number
1553 * as payload to distuingish different messages and add
1554 * some random stuff.
1555 * - Message Type, 1 byte
1556 * - Payload Length, 2 bytes (unsigned int)
1557 * - Payload, the sequence number (2 bytes uint)
1558 * - Payload, random bytes (16 bytes uint)
1559 * - Padding
1560 */
1561 buf = OPENSSL_malloc(1 + 2 + payload + padding);
1562 p = buf;
1563 /* Message Type */
1564 *p++ = TLS1_HB_REQUEST;
1565 /* Payload length (18 bytes here) */
1566 s2n(payload, p);
1567 /* Sequence number */
1568 s2n(s->tlsext_hb_seq, p);
1569 /* 16 random bytes */
1570 RAND_pseudo_bytes(p, 16);
1571 p += 16;
1572 /* Random padding */
1573 RAND_pseudo_bytes(p, padding);
1574
1575 ret = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding);
1576 if (ret >= 0)
1577 {
1578 if (s->msg_callback)
1579 s->msg_callback(1, s->version, TLS1_RT_HEARTBEAT,
1580 buf, 3 + payload + padding,
1581 s, s->msg_callback_arg);
1582
1583 dtls1_start_timer(s);
1584 s->tlsext_hb_pending = 1;
1585 }
1586
1587 OPENSSL_free(buf);
1588
1589 return ret;
1590 }
1591#endif
diff --git a/src/lib/libssl/src/ssl/d1_clnt.c b/src/lib/libssl/src/ssl/d1_clnt.c
index 089fa4c7f8..a6ed09c51d 100644
--- a/src/lib/libssl/src/ssl/d1_clnt.c
+++ b/src/lib/libssl/src/ssl/d1_clnt.c
@@ -150,7 +150,11 @@ int dtls1_connect(SSL *s)
150 unsigned long Time=(unsigned long)time(NULL); 150 unsigned long Time=(unsigned long)time(NULL);
151 void (*cb)(const SSL *ssl,int type,int val)=NULL; 151 void (*cb)(const SSL *ssl,int type,int val)=NULL;
152 int ret= -1; 152 int ret= -1;
153 int new_state,state,skip=0;; 153 int new_state,state,skip=0;
154#ifndef OPENSSL_NO_SCTP
155 unsigned char sctpauthkey[64];
156 char labelbuffer[sizeof(DTLS1_SCTP_AUTH_LABEL)];
157#endif
154 158
155 RAND_add(&Time,sizeof(Time),0); 159 RAND_add(&Time,sizeof(Time),0);
156 ERR_clear_error(); 160 ERR_clear_error();
@@ -164,6 +168,27 @@ int dtls1_connect(SSL *s)
164 s->in_handshake++; 168 s->in_handshake++;
165 if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s); 169 if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s);
166 170
171#ifndef OPENSSL_NO_SCTP
172 /* Notify SCTP BIO socket to enter handshake
173 * mode and prevent stream identifier other
174 * than 0. Will be ignored if no SCTP is used.
175 */
176 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL);
177#endif
178
179#ifndef OPENSSL_NO_HEARTBEATS
180 /* If we're awaiting a HeartbeatResponse, pretend we
181 * already got and don't await it anymore, because
182 * Heartbeats don't make sense during handshakes anyway.
183 */
184 if (s->tlsext_hb_pending)
185 {
186 dtls1_stop_timer(s);
187 s->tlsext_hb_pending = 0;
188 s->tlsext_hb_seq++;
189 }
190#endif
191
167 for (;;) 192 for (;;)
168 { 193 {
169 state=s->state; 194 state=s->state;
@@ -171,7 +196,7 @@ int dtls1_connect(SSL *s)
171 switch(s->state) 196 switch(s->state)
172 { 197 {
173 case SSL_ST_RENEGOTIATE: 198 case SSL_ST_RENEGOTIATE:
174 s->new_session=1; 199 s->renegotiate=1;
175 s->state=SSL_ST_CONNECT; 200 s->state=SSL_ST_CONNECT;
176 s->ctx->stats.sess_connect_renegotiate++; 201 s->ctx->stats.sess_connect_renegotiate++;
177 /* break */ 202 /* break */
@@ -226,6 +251,42 @@ int dtls1_connect(SSL *s)
226 s->hit = 0; 251 s->hit = 0;
227 break; 252 break;
228 253
254#ifndef OPENSSL_NO_SCTP
255 case DTLS1_SCTP_ST_CR_READ_SOCK:
256
257 if (BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s)))
258 {
259 s->s3->in_read_app_data=2;
260 s->rwstate=SSL_READING;
261 BIO_clear_retry_flags(SSL_get_rbio(s));
262 BIO_set_retry_read(SSL_get_rbio(s));
263 ret = -1;
264 goto end;
265 }
266
267 s->state=s->s3->tmp.next_state;
268 break;
269
270 case DTLS1_SCTP_ST_CW_WRITE_SOCK:
271 /* read app data until dry event */
272
273 ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s));
274 if (ret < 0) goto end;
275
276 if (ret == 0)
277 {
278 s->s3->in_read_app_data=2;
279 s->rwstate=SSL_READING;
280 BIO_clear_retry_flags(SSL_get_rbio(s));
281 BIO_set_retry_read(SSL_get_rbio(s));
282 ret = -1;
283 goto end;
284 }
285
286 s->state=s->d1->next_state;
287 break;
288#endif
289
229 case SSL3_ST_CW_CLNT_HELLO_A: 290 case SSL3_ST_CW_CLNT_HELLO_A:
230 case SSL3_ST_CW_CLNT_HELLO_B: 291 case SSL3_ST_CW_CLNT_HELLO_B:
231 292
@@ -248,9 +309,17 @@ int dtls1_connect(SSL *s)
248 309
249 s->init_num=0; 310 s->init_num=0;
250 311
251 /* turn on buffering for the next lot of output */ 312#ifndef OPENSSL_NO_SCTP
252 if (s->bbio != s->wbio) 313 /* Disable buffering for SCTP */
253 s->wbio=BIO_push(s->bbio,s->wbio); 314 if (!BIO_dgram_is_sctp(SSL_get_wbio(s)))
315 {
316#endif
317 /* turn on buffering for the next lot of output */
318 if (s->bbio != s->wbio)
319 s->wbio=BIO_push(s->bbio,s->wbio);
320#ifndef OPENSSL_NO_SCTP
321 }
322#endif
254 323
255 break; 324 break;
256 325
@@ -260,9 +329,25 @@ int dtls1_connect(SSL *s)
260 if (ret <= 0) goto end; 329 if (ret <= 0) goto end;
261 else 330 else
262 { 331 {
263 dtls1_stop_timer(s);
264 if (s->hit) 332 if (s->hit)
333 {
334#ifndef OPENSSL_NO_SCTP
335 /* Add new shared key for SCTP-Auth,
336 * will be ignored if no SCTP used.
337 */
338 snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL),
339 DTLS1_SCTP_AUTH_LABEL);
340
341 SSL_export_keying_material(s, sctpauthkey,
342 sizeof(sctpauthkey), labelbuffer,
343 sizeof(labelbuffer), NULL, 0, 0);
344
345 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY,
346 sizeof(sctpauthkey), sctpauthkey);
347#endif
348
265 s->state=SSL3_ST_CR_FINISHED_A; 349 s->state=SSL3_ST_CR_FINISHED_A;
350 }
266 else 351 else
267 s->state=DTLS1_ST_CR_HELLO_VERIFY_REQUEST_A; 352 s->state=DTLS1_ST_CR_HELLO_VERIFY_REQUEST_A;
268 } 353 }
@@ -354,12 +439,20 @@ int dtls1_connect(SSL *s)
354 case SSL3_ST_CR_SRVR_DONE_B: 439 case SSL3_ST_CR_SRVR_DONE_B:
355 ret=ssl3_get_server_done(s); 440 ret=ssl3_get_server_done(s);
356 if (ret <= 0) goto end; 441 if (ret <= 0) goto end;
442 dtls1_stop_timer(s);
357 if (s->s3->tmp.cert_req) 443 if (s->s3->tmp.cert_req)
358 s->state=SSL3_ST_CW_CERT_A; 444 s->s3->tmp.next_state=SSL3_ST_CW_CERT_A;
359 else 445 else
360 s->state=SSL3_ST_CW_KEY_EXCH_A; 446 s->s3->tmp.next_state=SSL3_ST_CW_KEY_EXCH_A;
361 s->init_num=0; 447 s->init_num=0;
362 448
449#ifndef OPENSSL_NO_SCTP
450 if (BIO_dgram_is_sctp(SSL_get_wbio(s)) &&
451 state == SSL_ST_RENEGOTIATE)
452 s->state=DTLS1_SCTP_ST_CR_READ_SOCK;
453 else
454#endif
455 s->state=s->s3->tmp.next_state;
363 break; 456 break;
364 457
365 case SSL3_ST_CW_CERT_A: 458 case SSL3_ST_CW_CERT_A:
@@ -378,6 +471,22 @@ int dtls1_connect(SSL *s)
378 dtls1_start_timer(s); 471 dtls1_start_timer(s);
379 ret=dtls1_send_client_key_exchange(s); 472 ret=dtls1_send_client_key_exchange(s);
380 if (ret <= 0) goto end; 473 if (ret <= 0) goto end;
474
475#ifndef OPENSSL_NO_SCTP
476 /* Add new shared key for SCTP-Auth,
477 * will be ignored if no SCTP used.
478 */
479 snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL),
480 DTLS1_SCTP_AUTH_LABEL);
481
482 SSL_export_keying_material(s, sctpauthkey,
483 sizeof(sctpauthkey), labelbuffer,
484 sizeof(labelbuffer), NULL, 0, 0);
485
486 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY,
487 sizeof(sctpauthkey), sctpauthkey);
488#endif
489
381 /* EAY EAY EAY need to check for DH fix cert 490 /* EAY EAY EAY need to check for DH fix cert
382 * sent back */ 491 * sent back */
383 /* For TLS, cert_req is set to 2, so a cert chain 492 /* For TLS, cert_req is set to 2, so a cert chain
@@ -388,7 +497,15 @@ int dtls1_connect(SSL *s)
388 } 497 }
389 else 498 else
390 { 499 {
391 s->state=SSL3_ST_CW_CHANGE_A; 500#ifndef OPENSSL_NO_SCTP
501 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
502 {
503 s->d1->next_state=SSL3_ST_CW_CHANGE_A;
504 s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK;
505 }
506 else
507#endif
508 s->state=SSL3_ST_CW_CHANGE_A;
392 s->s3->change_cipher_spec=0; 509 s->s3->change_cipher_spec=0;
393 } 510 }
394 511
@@ -400,7 +517,15 @@ int dtls1_connect(SSL *s)
400 dtls1_start_timer(s); 517 dtls1_start_timer(s);
401 ret=dtls1_send_client_verify(s); 518 ret=dtls1_send_client_verify(s);
402 if (ret <= 0) goto end; 519 if (ret <= 0) goto end;
403 s->state=SSL3_ST_CW_CHANGE_A; 520#ifndef OPENSSL_NO_SCTP
521 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
522 {
523 s->d1->next_state=SSL3_ST_CW_CHANGE_A;
524 s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK;
525 }
526 else
527#endif
528 s->state=SSL3_ST_CW_CHANGE_A;
404 s->init_num=0; 529 s->init_num=0;
405 s->s3->change_cipher_spec=0; 530 s->s3->change_cipher_spec=0;
406 break; 531 break;
@@ -412,6 +537,14 @@ int dtls1_connect(SSL *s)
412 ret=dtls1_send_change_cipher_spec(s, 537 ret=dtls1_send_change_cipher_spec(s,
413 SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B); 538 SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B);
414 if (ret <= 0) goto end; 539 if (ret <= 0) goto end;
540
541#ifndef OPENSSL_NO_SCTP
542 /* Change to new shared key of SCTP-Auth,
543 * will be ignored if no SCTP used.
544 */
545 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL);
546#endif
547
415 s->state=SSL3_ST_CW_FINISHED_A; 548 s->state=SSL3_ST_CW_FINISHED_A;
416 s->init_num=0; 549 s->init_num=0;
417 550
@@ -457,9 +590,23 @@ int dtls1_connect(SSL *s)
457 if (s->hit) 590 if (s->hit)
458 { 591 {
459 s->s3->tmp.next_state=SSL_ST_OK; 592 s->s3->tmp.next_state=SSL_ST_OK;
593#ifndef OPENSSL_NO_SCTP
594 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
595 {
596 s->d1->next_state = s->s3->tmp.next_state;
597 s->s3->tmp.next_state=DTLS1_SCTP_ST_CW_WRITE_SOCK;
598 }
599#endif
460 if (s->s3->flags & SSL3_FLAGS_DELAY_CLIENT_FINISHED) 600 if (s->s3->flags & SSL3_FLAGS_DELAY_CLIENT_FINISHED)
461 { 601 {
462 s->state=SSL_ST_OK; 602 s->state=SSL_ST_OK;
603#ifndef OPENSSL_NO_SCTP
604 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
605 {
606 s->d1->next_state = SSL_ST_OK;
607 s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK;
608 }
609#endif
463 s->s3->flags|=SSL3_FLAGS_POP_BUFFER; 610 s->s3->flags|=SSL3_FLAGS_POP_BUFFER;
464 s->s3->delay_buf_pop_ret=0; 611 s->s3->delay_buf_pop_ret=0;
465 } 612 }
@@ -508,6 +655,16 @@ int dtls1_connect(SSL *s)
508 s->state=SSL3_ST_CW_CHANGE_A; 655 s->state=SSL3_ST_CW_CHANGE_A;
509 else 656 else
510 s->state=SSL_ST_OK; 657 s->state=SSL_ST_OK;
658
659#ifndef OPENSSL_NO_SCTP
660 if (BIO_dgram_is_sctp(SSL_get_wbio(s)) &&
661 state == SSL_ST_RENEGOTIATE)
662 {
663 s->d1->next_state=s->state;
664 s->state=DTLS1_SCTP_ST_CW_WRITE_SOCK;
665 }
666#endif
667
511 s->init_num=0; 668 s->init_num=0;
512 break; 669 break;
513 670
@@ -515,6 +672,13 @@ int dtls1_connect(SSL *s)
515 s->rwstate=SSL_WRITING; 672 s->rwstate=SSL_WRITING;
516 if (BIO_flush(s->wbio) <= 0) 673 if (BIO_flush(s->wbio) <= 0)
517 { 674 {
675 /* If the write error was fatal, stop trying */
676 if (!BIO_should_retry(s->wbio))
677 {
678 s->rwstate=SSL_NOTHING;
679 s->state=s->s3->tmp.next_state;
680 }
681
518 ret= -1; 682 ret= -1;
519 goto end; 683 goto end;
520 } 684 }
@@ -541,6 +705,7 @@ int dtls1_connect(SSL *s)
541 /* else do it later in ssl3_write */ 705 /* else do it later in ssl3_write */
542 706
543 s->init_num=0; 707 s->init_num=0;
708 s->renegotiate=0;
544 s->new_session=0; 709 s->new_session=0;
545 710
546 ssl_update_cache(s,SSL_SESS_CACHE_CLIENT); 711 ssl_update_cache(s,SSL_SESS_CACHE_CLIENT);
@@ -587,6 +752,15 @@ int dtls1_connect(SSL *s)
587 } 752 }
588end: 753end:
589 s->in_handshake--; 754 s->in_handshake--;
755
756#ifndef OPENSSL_NO_SCTP
757 /* Notify SCTP BIO socket to leave handshake
758 * mode and allow stream identifier other
759 * than 0. Will be ignored if no SCTP is used.
760 */
761 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL);
762#endif
763
590 if (buf != NULL) 764 if (buf != NULL)
591 BUF_MEM_free(buf); 765 BUF_MEM_free(buf);
592 if (cb != NULL) 766 if (cb != NULL)
diff --git a/src/lib/libssl/src/ssl/d1_enc.c b/src/lib/libssl/src/ssl/d1_enc.c
index becbab91c2..07a5e97ce5 100644
--- a/src/lib/libssl/src/ssl/d1_enc.c
+++ b/src/lib/libssl/src/ssl/d1_enc.c
@@ -260,7 +260,7 @@ int dtls1_enc(SSL *s, int send)
260 } 260 }
261 /* TLS 1.0 does not bound the number of padding bytes by the block size. 261 /* TLS 1.0 does not bound the number of padding bytes by the block size.
262 * All of them must have value 'padding_length'. */ 262 * All of them must have value 'padding_length'. */
263 if (i > (int)rec->length) 263 if (i + bs > (int)rec->length)
264 { 264 {
265 /* Incorrect padding. SSLerr() and ssl3_alert are done 265 /* Incorrect padding. SSLerr() and ssl3_alert are done
266 * by caller: we don't want to reveal whether this is 266 * by caller: we don't want to reveal whether this is
diff --git a/src/lib/libssl/src/ssl/d1_lib.c b/src/lib/libssl/src/ssl/d1_lib.c
index c3b77c889b..f61f718183 100644
--- a/src/lib/libssl/src/ssl/d1_lib.c
+++ b/src/lib/libssl/src/ssl/d1_lib.c
@@ -82,6 +82,7 @@ SSL3_ENC_METHOD DTLSv1_enc_data={
82 TLS_MD_CLIENT_FINISH_CONST,TLS_MD_CLIENT_FINISH_CONST_SIZE, 82 TLS_MD_CLIENT_FINISH_CONST,TLS_MD_CLIENT_FINISH_CONST_SIZE,
83 TLS_MD_SERVER_FINISH_CONST,TLS_MD_SERVER_FINISH_CONST_SIZE, 83 TLS_MD_SERVER_FINISH_CONST,TLS_MD_SERVER_FINISH_CONST_SIZE,
84 tls1_alert_code, 84 tls1_alert_code,
85 tls1_export_keying_material,
85 }; 86 };
86 87
87long dtls1_default_timeout(void) 88long dtls1_default_timeout(void)
@@ -291,6 +292,15 @@ const SSL_CIPHER *dtls1_get_cipher(unsigned int u)
291 292
292void dtls1_start_timer(SSL *s) 293void dtls1_start_timer(SSL *s)
293 { 294 {
295#ifndef OPENSSL_NO_SCTP
296 /* Disable timer for SCTP */
297 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
298 {
299 memset(&(s->d1->next_timeout), 0, sizeof(struct timeval));
300 return;
301 }
302#endif
303
294 /* If timer is not set, initialize duration with 1 second */ 304 /* If timer is not set, initialize duration with 1 second */
295 if (s->d1->next_timeout.tv_sec == 0 && s->d1->next_timeout.tv_usec == 0) 305 if (s->d1->next_timeout.tv_sec == 0 && s->d1->next_timeout.tv_usec == 0)
296 { 306 {
@@ -381,6 +391,7 @@ void dtls1_double_timeout(SSL *s)
381void dtls1_stop_timer(SSL *s) 391void dtls1_stop_timer(SSL *s)
382 { 392 {
383 /* Reset everything */ 393 /* Reset everything */
394 memset(&(s->d1->timeout), 0, sizeof(struct dtls1_timeout_st));
384 memset(&(s->d1->next_timeout), 0, sizeof(struct timeval)); 395 memset(&(s->d1->next_timeout), 0, sizeof(struct timeval));
385 s->d1->timeout_duration = 1; 396 s->d1->timeout_duration = 1;
386 BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT, 0, &(s->d1->next_timeout)); 397 BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT, 0, &(s->d1->next_timeout));
@@ -388,10 +399,28 @@ void dtls1_stop_timer(SSL *s)
388 dtls1_clear_record_buffer(s); 399 dtls1_clear_record_buffer(s);
389 } 400 }
390 401
391int dtls1_handle_timeout(SSL *s) 402int dtls1_check_timeout_num(SSL *s)
392 { 403 {
393 DTLS1_STATE *state; 404 s->d1->timeout.num_alerts++;
405
406 /* Reduce MTU after 2 unsuccessful retransmissions */
407 if (s->d1->timeout.num_alerts > 2)
408 {
409 s->d1->mtu = BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_GET_FALLBACK_MTU, 0, NULL);
410 }
394 411
412 if (s->d1->timeout.num_alerts > DTLS1_TMO_ALERT_COUNT)
413 {
414 /* fail the connection, enough alerts have been sent */
415 SSLerr(SSL_F_DTLS1_CHECK_TIMEOUT_NUM,SSL_R_READ_TIMEOUT_EXPIRED);
416 return -1;
417 }
418
419 return 0;
420 }
421
422int dtls1_handle_timeout(SSL *s)
423 {
395 /* if no timer is expired, don't do anything */ 424 /* if no timer is expired, don't do anything */
396 if (!dtls1_is_timer_expired(s)) 425 if (!dtls1_is_timer_expired(s))
397 { 426 {
@@ -399,20 +428,23 @@ int dtls1_handle_timeout(SSL *s)
399 } 428 }
400 429
401 dtls1_double_timeout(s); 430 dtls1_double_timeout(s);
402 state = s->d1; 431
403 state->timeout.num_alerts++; 432 if (dtls1_check_timeout_num(s) < 0)
404 if ( state->timeout.num_alerts > DTLS1_TMO_ALERT_COUNT)
405 {
406 /* fail the connection, enough alerts have been sent */
407 SSLerr(SSL_F_DTLS1_HANDLE_TIMEOUT,SSL_R_READ_TIMEOUT_EXPIRED);
408 return -1; 433 return -1;
434
435 s->d1->timeout.read_timeouts++;
436 if (s->d1->timeout.read_timeouts > DTLS1_TMO_READ_COUNT)
437 {
438 s->d1->timeout.read_timeouts = 1;
409 } 439 }
410 440
411 state->timeout.read_timeouts++; 441#ifndef OPENSSL_NO_HEARTBEATS
412 if ( state->timeout.read_timeouts > DTLS1_TMO_READ_COUNT) 442 if (s->tlsext_hb_pending)
413 { 443 {
414 state->timeout.read_timeouts = 1; 444 s->tlsext_hb_pending = 0;
445 return dtls1_heartbeat(s);
415 } 446 }
447#endif
416 448
417 dtls1_start_timer(s); 449 dtls1_start_timer(s);
418 return dtls1_retransmit_buffered_messages(s); 450 return dtls1_retransmit_buffered_messages(s);
diff --git a/src/lib/libssl/src/ssl/d1_pkt.c b/src/lib/libssl/src/ssl/d1_pkt.c
index e0c0f0cc9a..987af60835 100644
--- a/src/lib/libssl/src/ssl/d1_pkt.c
+++ b/src/lib/libssl/src/ssl/d1_pkt.c
@@ -179,7 +179,6 @@ static int dtls1_record_needs_buffering(SSL *s, SSL3_RECORD *rr,
179static int dtls1_buffer_record(SSL *s, record_pqueue *q, 179static int dtls1_buffer_record(SSL *s, record_pqueue *q,
180 unsigned char *priority); 180 unsigned char *priority);
181static int dtls1_process_record(SSL *s); 181static int dtls1_process_record(SSL *s);
182static void dtls1_clear_timeouts(SSL *s);
183 182
184/* copy buffered record into SSL structure */ 183/* copy buffered record into SSL structure */
185static int 184static int
@@ -232,6 +231,14 @@ dtls1_buffer_record(SSL *s, record_pqueue *queue, unsigned char *priority)
232 231
233 item->data = rdata; 232 item->data = rdata;
234 233
234#ifndef OPENSSL_NO_SCTP
235 /* Store bio_dgram_sctp_rcvinfo struct */
236 if (BIO_dgram_is_sctp(SSL_get_rbio(s)) &&
237 (s->state == SSL3_ST_SR_FINISHED_A || s->state == SSL3_ST_CR_FINISHED_A)) {
238 BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SCTP_GET_RCVINFO, sizeof(rdata->recordinfo), &rdata->recordinfo);
239 }
240#endif
241
235 /* insert should not fail, since duplicates are dropped */ 242 /* insert should not fail, since duplicates are dropped */
236 if (pqueue_insert(queue->q, item) == NULL) 243 if (pqueue_insert(queue->q, item) == NULL)
237 { 244 {
@@ -376,6 +383,7 @@ dtls1_process_record(SSL *s)
376 unsigned int mac_size; 383 unsigned int mac_size;
377 unsigned char md[EVP_MAX_MD_SIZE]; 384 unsigned char md[EVP_MAX_MD_SIZE];
378 int decryption_failed_or_bad_record_mac = 0; 385 int decryption_failed_or_bad_record_mac = 0;
386 unsigned char *mac = NULL;
379 387
380 388
381 rr= &(s->s3->rrec); 389 rr= &(s->s3->rrec);
@@ -447,19 +455,15 @@ printf("\n");
447#endif 455#endif
448 } 456 }
449 /* check the MAC for rr->input (it's in mac_size bytes at the tail) */ 457 /* check the MAC for rr->input (it's in mac_size bytes at the tail) */
450 if (rr->length < mac_size) 458 if (rr->length >= mac_size)
451 { 459 {
452#if 0 /* OK only for stream ciphers */ 460 rr->length -= mac_size;
453 al=SSL_AD_DECODE_ERROR; 461 mac = &rr->data[rr->length];
454 SSLerr(SSL_F_DTLS1_PROCESS_RECORD,SSL_R_LENGTH_TOO_SHORT);
455 goto f_err;
456#else
457 decryption_failed_or_bad_record_mac = 1;
458#endif
459 } 462 }
460 rr->length-=mac_size; 463 else
464 rr->length = 0;
461 i=s->method->ssl3_enc->mac(s,md,0); 465 i=s->method->ssl3_enc->mac(s,md,0);
462 if (i < 0 || memcmp(md,&(rr->data[rr->length]),mac_size) != 0) 466 if (i < 0 || mac == NULL || memcmp(md, mac, mac_size) != 0)
463 { 467 {
464 decryption_failed_or_bad_record_mac = 1; 468 decryption_failed_or_bad_record_mac = 1;
465 } 469 }
@@ -644,20 +648,28 @@ again:
644 goto again; /* get another record */ 648 goto again; /* get another record */
645 } 649 }
646 650
647 /* Check whether this is a repeat, or aged record. 651#ifndef OPENSSL_NO_SCTP
648 * Don't check if we're listening and this message is 652 /* Only do replay check if no SCTP bio */
649 * a ClientHello. They can look as if they're replayed, 653 if (!BIO_dgram_is_sctp(SSL_get_rbio(s)))
650 * since they arrive from different connections and 654 {
651 * would be dropped unnecessarily. 655#endif
652 */ 656 /* Check whether this is a repeat, or aged record.
653 if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE && 657 * Don't check if we're listening and this message is
654 *p == SSL3_MT_CLIENT_HELLO) && 658 * a ClientHello. They can look as if they're replayed,
655 !dtls1_record_replay_check(s, bitmap)) 659 * since they arrive from different connections and
656 { 660 * would be dropped unnecessarily.
657 rr->length = 0; 661 */
658 s->packet_length=0; /* dump this record */ 662 if (!(s->d1->listen && rr->type == SSL3_RT_HANDSHAKE &&
659 goto again; /* get another record */ 663 *p == SSL3_MT_CLIENT_HELLO) &&
660 } 664 !dtls1_record_replay_check(s, bitmap))
665 {
666 rr->length = 0;
667 s->packet_length=0; /* dump this record */
668 goto again; /* get another record */
669 }
670#ifndef OPENSSL_NO_SCTP
671 }
672#endif
661 673
662 /* just read a 0 length packet */ 674 /* just read a 0 length packet */
663 if (rr->length == 0) goto again; 675 if (rr->length == 0) goto again;
@@ -685,7 +697,6 @@ again:
685 goto again; /* get another record */ 697 goto again; /* get another record */
686 } 698 }
687 699
688 dtls1_clear_timeouts(s); /* done waiting */
689 return(1); 700 return(1);
690 701
691 } 702 }
@@ -743,7 +754,17 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek)
743 754
744 /* Now s->d1->handshake_fragment_len == 0 if type == SSL3_RT_HANDSHAKE. */ 755 /* Now s->d1->handshake_fragment_len == 0 if type == SSL3_RT_HANDSHAKE. */
745 756
757#ifndef OPENSSL_NO_SCTP
758 /* Continue handshake if it had to be interrupted to read
759 * app data with SCTP.
760 */
761 if ((!s->in_handshake && SSL_in_init(s)) ||
762 (BIO_dgram_is_sctp(SSL_get_rbio(s)) &&
763 (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK) &&
764 s->s3->in_read_app_data != 2))
765#else
746 if (!s->in_handshake && SSL_in_init(s)) 766 if (!s->in_handshake && SSL_in_init(s))
767#endif
747 { 768 {
748 /* type == SSL3_RT_APPLICATION_DATA */ 769 /* type == SSL3_RT_APPLICATION_DATA */
749 i=s->handshake_func(s); 770 i=s->handshake_func(s);
@@ -774,6 +795,15 @@ start:
774 item = pqueue_pop(s->d1->buffered_app_data.q); 795 item = pqueue_pop(s->d1->buffered_app_data.q);
775 if (item) 796 if (item)
776 { 797 {
798#ifndef OPENSSL_NO_SCTP
799 /* Restore bio_dgram_sctp_rcvinfo struct */
800 if (BIO_dgram_is_sctp(SSL_get_rbio(s)))
801 {
802 DTLS1_RECORD_DATA *rdata = (DTLS1_RECORD_DATA *) item->data;
803 BIO_ctrl(SSL_get_rbio(s), BIO_CTRL_DGRAM_SCTP_SET_RCVINFO, sizeof(rdata->recordinfo), &rdata->recordinfo);
804 }
805#endif
806
777 dtls1_copy_record(s, item); 807 dtls1_copy_record(s, item);
778 808
779 OPENSSL_free(item->data); 809 OPENSSL_free(item->data);
@@ -856,6 +886,31 @@ start:
856 rr->off=0; 886 rr->off=0;
857 } 887 }
858 } 888 }
889
890#ifndef OPENSSL_NO_SCTP
891 /* We were about to renegotiate but had to read
892 * belated application data first, so retry.
893 */
894 if (BIO_dgram_is_sctp(SSL_get_rbio(s)) &&
895 rr->type == SSL3_RT_APPLICATION_DATA &&
896 (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK))
897 {
898 s->rwstate=SSL_READING;
899 BIO_clear_retry_flags(SSL_get_rbio(s));
900 BIO_set_retry_read(SSL_get_rbio(s));
901 }
902
903 /* We might had to delay a close_notify alert because
904 * of reordered app data. If there was an alert and there
905 * is no message to read anymore, finally set shutdown.
906 */
907 if (BIO_dgram_is_sctp(SSL_get_rbio(s)) &&
908 s->d1->shutdown_received && !BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s)))
909 {
910 s->shutdown |= SSL_RECEIVED_SHUTDOWN;
911 return(0);
912 }
913#endif
859 return(n); 914 return(n);
860 } 915 }
861 916
@@ -883,6 +938,19 @@ start:
883 dest = s->d1->alert_fragment; 938 dest = s->d1->alert_fragment;
884 dest_len = &s->d1->alert_fragment_len; 939 dest_len = &s->d1->alert_fragment_len;
885 } 940 }
941#ifndef OPENSSL_NO_HEARTBEATS
942 else if (rr->type == TLS1_RT_HEARTBEAT)
943 {
944 dtls1_process_heartbeat(s);
945
946 /* Exit and notify application to read again */
947 rr->length = 0;
948 s->rwstate=SSL_READING;
949 BIO_clear_retry_flags(SSL_get_rbio(s));
950 BIO_set_retry_read(SSL_get_rbio(s));
951 return(-1);
952 }
953#endif
886 /* else it's a CCS message, or application data or wrong */ 954 /* else it's a CCS message, or application data or wrong */
887 else if (rr->type != SSL3_RT_CHANGE_CIPHER_SPEC) 955 else if (rr->type != SSL3_RT_CHANGE_CIPHER_SPEC)
888 { 956 {
@@ -966,6 +1034,7 @@ start:
966 !(s->s3->flags & SSL3_FLAGS_NO_RENEGOTIATE_CIPHERS) && 1034 !(s->s3->flags & SSL3_FLAGS_NO_RENEGOTIATE_CIPHERS) &&
967 !s->s3->renegotiate) 1035 !s->s3->renegotiate)
968 { 1036 {
1037 s->new_session = 1;
969 ssl3_renegotiate(s); 1038 ssl3_renegotiate(s);
970 if (ssl3_renegotiate_check(s)) 1039 if (ssl3_renegotiate_check(s))
971 { 1040 {
@@ -1027,6 +1096,21 @@ start:
1027 s->s3->warn_alert = alert_descr; 1096 s->s3->warn_alert = alert_descr;
1028 if (alert_descr == SSL_AD_CLOSE_NOTIFY) 1097 if (alert_descr == SSL_AD_CLOSE_NOTIFY)
1029 { 1098 {
1099#ifndef OPENSSL_NO_SCTP
1100 /* With SCTP and streams the socket may deliver app data
1101 * after a close_notify alert. We have to check this
1102 * first so that nothing gets discarded.
1103 */
1104 if (BIO_dgram_is_sctp(SSL_get_rbio(s)) &&
1105 BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s)))
1106 {
1107 s->d1->shutdown_received = 1;
1108 s->rwstate=SSL_READING;
1109 BIO_clear_retry_flags(SSL_get_rbio(s));
1110 BIO_set_retry_read(SSL_get_rbio(s));
1111 return -1;
1112 }
1113#endif
1030 s->shutdown |= SSL_RECEIVED_SHUTDOWN; 1114 s->shutdown |= SSL_RECEIVED_SHUTDOWN;
1031 return(0); 1115 return(0);
1032 } 1116 }
@@ -1133,6 +1217,15 @@ start:
1133 if (s->version == DTLS1_BAD_VER) 1217 if (s->version == DTLS1_BAD_VER)
1134 s->d1->handshake_read_seq++; 1218 s->d1->handshake_read_seq++;
1135 1219
1220#ifndef OPENSSL_NO_SCTP
1221 /* Remember that a CCS has been received,
1222 * so that an old key of SCTP-Auth can be
1223 * deleted when a CCS is sent. Will be ignored
1224 * if no SCTP is used
1225 */
1226 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD, 1, NULL);
1227#endif
1228
1136 goto start; 1229 goto start;
1137 } 1230 }
1138 1231
@@ -1155,6 +1248,9 @@ start:
1155 */ 1248 */
1156 if (msg_hdr.type == SSL3_MT_FINISHED) 1249 if (msg_hdr.type == SSL3_MT_FINISHED)
1157 { 1250 {
1251 if (dtls1_check_timeout_num(s) < 0)
1252 return -1;
1253
1158 dtls1_retransmit_buffered_messages(s); 1254 dtls1_retransmit_buffered_messages(s);
1159 rr->length = 0; 1255 rr->length = 0;
1160 goto start; 1256 goto start;
@@ -1172,6 +1268,7 @@ start:
1172#else 1268#else
1173 s->state = s->server ? SSL_ST_ACCEPT : SSL_ST_CONNECT; 1269 s->state = s->server ? SSL_ST_ACCEPT : SSL_ST_CONNECT;
1174#endif 1270#endif
1271 s->renegotiate=1;
1175 s->new_session=1; 1272 s->new_session=1;
1176 } 1273 }
1177 i=s->handshake_func(s); 1274 i=s->handshake_func(s);
@@ -1268,7 +1365,16 @@ dtls1_write_app_data_bytes(SSL *s, int type, const void *buf_, int len)
1268 { 1365 {
1269 int i; 1366 int i;
1270 1367
1271 if (SSL_in_init(s) && !s->in_handshake) 1368#ifndef OPENSSL_NO_SCTP
1369 /* Check if we have to continue an interrupted handshake
1370 * for reading belated app data with SCTP.
1371 */
1372 if ((SSL_in_init(s) && !s->in_handshake) ||
1373 (BIO_dgram_is_sctp(SSL_get_wbio(s)) &&
1374 (s->state == DTLS1_SCTP_ST_SR_READ_SOCK || s->state == DTLS1_SCTP_ST_CR_READ_SOCK)))
1375#else
1376 if (SSL_in_init(s) && !s->in_handshake)
1377#endif
1272 { 1378 {
1273 i=s->handshake_func(s); 1379 i=s->handshake_func(s);
1274 if (i < 0) return(i); 1380 if (i < 0) return(i);
@@ -1768,10 +1874,3 @@ dtls1_reset_seq_numbers(SSL *s, int rw)
1768 1874
1769 memset(seq, 0x00, seq_bytes); 1875 memset(seq, 0x00, seq_bytes);
1770 } 1876 }
1771
1772
1773static void
1774dtls1_clear_timeouts(SSL *s)
1775 {
1776 memset(&(s->d1->timeout), 0x00, sizeof(struct dtls1_timeout_st));
1777 }
diff --git a/src/lib/libssl/src/ssl/d1_srtp.c b/src/lib/libssl/src/ssl/d1_srtp.c
new file mode 100644
index 0000000000..928935bd8b
--- /dev/null
+++ b/src/lib/libssl/src/ssl/d1_srtp.c
@@ -0,0 +1,493 @@
1/* ssl/t1_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111/*
112 DTLS code by Eric Rescorla <ekr@rtfm.com>
113
114 Copyright (C) 2006, Network Resonance, Inc.
115 Copyright (C) 2011, RTFM, Inc.
116*/
117
118#ifndef OPENSSL_NO_SRTP
119
120#include <stdio.h>
121#include <openssl/objects.h>
122#include "ssl_locl.h"
123#include "srtp.h"
124
125
126static SRTP_PROTECTION_PROFILE srtp_known_profiles[]=
127 {
128 {
129 "SRTP_AES128_CM_SHA1_80",
130 SRTP_AES128_CM_SHA1_80,
131 },
132 {
133 "SRTP_AES128_CM_SHA1_32",
134 SRTP_AES128_CM_SHA1_32,
135 },
136#if 0
137 {
138 "SRTP_NULL_SHA1_80",
139 SRTP_NULL_SHA1_80,
140 },
141 {
142 "SRTP_NULL_SHA1_32",
143 SRTP_NULL_SHA1_32,
144 },
145#endif
146 {0}
147 };
148
149static int find_profile_by_name(char *profile_name,
150 SRTP_PROTECTION_PROFILE **pptr,unsigned len)
151 {
152 SRTP_PROTECTION_PROFILE *p;
153
154 p=srtp_known_profiles;
155 while(p->name)
156 {
157 if((len == strlen(p->name)) && !strncmp(p->name,profile_name,
158 len))
159 {
160 *pptr=p;
161 return 0;
162 }
163
164 p++;
165 }
166
167 return 1;
168 }
169
170static int find_profile_by_num(unsigned profile_num,
171 SRTP_PROTECTION_PROFILE **pptr)
172 {
173 SRTP_PROTECTION_PROFILE *p;
174
175 p=srtp_known_profiles;
176 while(p->name)
177 {
178 if(p->id == profile_num)
179 {
180 *pptr=p;
181 return 0;
182 }
183 p++;
184 }
185
186 return 1;
187 }
188
189static int ssl_ctx_make_profiles(const char *profiles_string,STACK_OF(SRTP_PROTECTION_PROFILE) **out)
190 {
191 STACK_OF(SRTP_PROTECTION_PROFILE) *profiles;
192
193 char *col;
194 char *ptr=(char *)profiles_string;
195
196 SRTP_PROTECTION_PROFILE *p;
197
198 if(!(profiles=sk_SRTP_PROTECTION_PROFILE_new_null()))
199 {
200 SSLerr(SSL_F_SSL_CTX_MAKE_PROFILES, SSL_R_SRTP_COULD_NOT_ALLOCATE_PROFILES);
201 return 1;
202 }
203
204 do
205 {
206 col=strchr(ptr,':');
207
208 if(!find_profile_by_name(ptr,&p,
209 col ? col-ptr : (int)strlen(ptr)))
210 {
211 sk_SRTP_PROTECTION_PROFILE_push(profiles,p);
212 }
213 else
214 {
215 SSLerr(SSL_F_SSL_CTX_MAKE_PROFILES,SSL_R_SRTP_UNKNOWN_PROTECTION_PROFILE);
216 return 1;
217 }
218
219 if(col) ptr=col+1;
220 } while (col);
221
222 *out=profiles;
223
224 return 0;
225 }
226
227int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx,const char *profiles)
228 {
229 return ssl_ctx_make_profiles(profiles,&ctx->srtp_profiles);
230 }
231
232int SSL_set_tlsext_use_srtp(SSL *s,const char *profiles)
233 {
234 return ssl_ctx_make_profiles(profiles,&s->srtp_profiles);
235 }
236
237
238STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *s)
239 {
240 if(s != NULL)
241 {
242 if(s->srtp_profiles != NULL)
243 {
244 return s->srtp_profiles;
245 }
246 else if((s->ctx != NULL) &&
247 (s->ctx->srtp_profiles != NULL))
248 {
249 return s->ctx->srtp_profiles;
250 }
251 }
252
253 return NULL;
254 }
255
256SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s)
257 {
258 return s->srtp_profile;
259 }
260
261/* Note: this function returns 0 length if there are no
262 profiles specified */
263int ssl_add_clienthello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen)
264 {
265 int ct=0;
266 int i;
267 STACK_OF(SRTP_PROTECTION_PROFILE) *clnt=0;
268 SRTP_PROTECTION_PROFILE *prof;
269
270 clnt=SSL_get_srtp_profiles(s);
271 ct=sk_SRTP_PROTECTION_PROFILE_num(clnt); /* -1 if clnt == 0 */
272
273 if(p)
274 {
275 if(ct==0)
276 {
277 SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT,SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST);
278 return 1;
279 }
280
281 if((2 + ct*2 + 1) > maxlen)
282 {
283 SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT,SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG);
284 return 1;
285 }
286
287 /* Add the length */
288 s2n(ct * 2, p);
289 for(i=0;i<ct;i++)
290 {
291 prof=sk_SRTP_PROTECTION_PROFILE_value(clnt,i);
292 s2n(prof->id,p);
293 }
294
295 /* Add an empty use_mki value */
296 *p++ = 0;
297 }
298
299 *len=2 + ct*2 + 1;
300
301 return 0;
302 }
303
304
305int ssl_parse_clienthello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al)
306 {
307 SRTP_PROTECTION_PROFILE *cprof,*sprof;
308 STACK_OF(SRTP_PROTECTION_PROFILE) *clnt=0,*srvr;
309 int ct;
310 int mki_len;
311 int i,j;
312 int id;
313 int ret;
314
315 /* Length value + the MKI length */
316 if(len < 3)
317 {
318 SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
319 *al=SSL_AD_DECODE_ERROR;
320 return 1;
321 }
322
323 /* Pull off the length of the cipher suite list */
324 n2s(d, ct);
325 len -= 2;
326
327 /* Check that it is even */
328 if(ct%2)
329 {
330 SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
331 *al=SSL_AD_DECODE_ERROR;
332 return 1;
333 }
334
335 /* Check that lengths are consistent */
336 if(len < (ct + 1))
337 {
338 SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
339 *al=SSL_AD_DECODE_ERROR;
340 return 1;
341 }
342
343
344 clnt=sk_SRTP_PROTECTION_PROFILE_new_null();
345
346 while(ct)
347 {
348 n2s(d,id);
349 ct-=2;
350 len-=2;
351
352 if(!find_profile_by_num(id,&cprof))
353 {
354 sk_SRTP_PROTECTION_PROFILE_push(clnt,cprof);
355 }
356 else
357 {
358 ; /* Ignore */
359 }
360 }
361
362 /* Now extract the MKI value as a sanity check, but discard it for now */
363 mki_len = *d;
364 d++; len--;
365
366 if (mki_len != len)
367 {
368 SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_MKI_VALUE);
369 *al=SSL_AD_DECODE_ERROR;
370 return 1;
371 }
372
373 srvr=SSL_get_srtp_profiles(s);
374
375 /* Pick our most preferred profile. If no profiles have been
376 configured then the outer loop doesn't run
377 (sk_SRTP_PROTECTION_PROFILE_num() = -1)
378 and so we just return without doing anything */
379 for(i=0;i<sk_SRTP_PROTECTION_PROFILE_num(srvr);i++)
380 {
381 sprof=sk_SRTP_PROTECTION_PROFILE_value(srvr,i);
382
383 for(j=0;j<sk_SRTP_PROTECTION_PROFILE_num(clnt);j++)
384 {
385 cprof=sk_SRTP_PROTECTION_PROFILE_value(clnt,j);
386
387 if(cprof->id==sprof->id)
388 {
389 s->srtp_profile=sprof;
390 *al=0;
391 ret=0;
392 goto done;
393 }
394 }
395 }
396
397 ret=0;
398
399done:
400 if(clnt) sk_SRTP_PROTECTION_PROFILE_free(clnt);
401
402 return ret;
403 }
404
405int ssl_add_serverhello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen)
406 {
407 if(p)
408 {
409 if(maxlen < 5)
410 {
411 SSLerr(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT,SSL_R_SRTP_PROTECTION_PROFILE_LIST_TOO_LONG);
412 return 1;
413 }
414
415 if(s->srtp_profile==0)
416 {
417 SSLerr(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT,SSL_R_USE_SRTP_NOT_NEGOTIATED);
418 return 1;
419 }
420 s2n(2, p);
421 s2n(s->srtp_profile->id,p);
422 *p++ = 0;
423 }
424 *len=5;
425
426 return 0;
427 }
428
429
430int ssl_parse_serverhello_use_srtp_ext(SSL *s, unsigned char *d, int len,int *al)
431 {
432 unsigned id;
433 int i;
434 int ct;
435
436 STACK_OF(SRTP_PROTECTION_PROFILE) *clnt;
437 SRTP_PROTECTION_PROFILE *prof;
438
439 if(len!=5)
440 {
441 SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
442 *al=SSL_AD_DECODE_ERROR;
443 return 1;
444 }
445
446 n2s(d, ct);
447 if(ct!=2)
448 {
449 SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
450 *al=SSL_AD_DECODE_ERROR;
451 return 1;
452 }
453
454 n2s(d,id);
455 if (*d) /* Must be no MKI, since we never offer one */
456 {
457 SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_MKI_VALUE);
458 *al=SSL_AD_ILLEGAL_PARAMETER;
459 return 1;
460 }
461
462 clnt=SSL_get_srtp_profiles(s);
463
464 /* Throw an error if the server gave us an unsolicited extension */
465 if (clnt == NULL)
466 {
467 SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_NO_SRTP_PROFILES);
468 *al=SSL_AD_DECODE_ERROR;
469 return 1;
470 }
471
472 /* Check to see if the server gave us something we support
473 (and presumably offered)
474 */
475 for(i=0;i<sk_SRTP_PROTECTION_PROFILE_num(clnt);i++)
476 {
477 prof=sk_SRTP_PROTECTION_PROFILE_value(clnt,i);
478
479 if(prof->id == id)
480 {
481 s->srtp_profile=prof;
482 *al=0;
483 return 0;
484 }
485 }
486
487 SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT,SSL_R_BAD_SRTP_PROTECTION_PROFILE_LIST);
488 *al=SSL_AD_DECODE_ERROR;
489 return 1;
490 }
491
492
493#endif
diff --git a/src/lib/libssl/src/ssl/d1_srvr.c b/src/lib/libssl/src/ssl/d1_srvr.c
index 149983be30..29421da9aa 100644
--- a/src/lib/libssl/src/ssl/d1_srvr.c
+++ b/src/lib/libssl/src/ssl/d1_srvr.c
@@ -151,6 +151,10 @@ int dtls1_accept(SSL *s)
151 int ret= -1; 151 int ret= -1;
152 int new_state,state,skip=0; 152 int new_state,state,skip=0;
153 int listen; 153 int listen;
154#ifndef OPENSSL_NO_SCTP
155 unsigned char sctpauthkey[64];
156 char labelbuffer[sizeof(DTLS1_SCTP_AUTH_LABEL)];
157#endif
154 158
155 RAND_add(&Time,sizeof(Time),0); 159 RAND_add(&Time,sizeof(Time),0);
156 ERR_clear_error(); 160 ERR_clear_error();
@@ -168,6 +172,13 @@ int dtls1_accept(SSL *s)
168 if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s); 172 if (!SSL_in_init(s) || SSL_in_before(s)) SSL_clear(s);
169 173
170 s->d1->listen = listen; 174 s->d1->listen = listen;
175#ifndef OPENSSL_NO_SCTP
176 /* Notify SCTP BIO socket to enter handshake
177 * mode and prevent stream identifier other
178 * than 0. Will be ignored if no SCTP is used.
179 */
180 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL);
181#endif
171 182
172 if (s->cert == NULL) 183 if (s->cert == NULL)
173 { 184 {
@@ -175,6 +186,19 @@ int dtls1_accept(SSL *s)
175 return(-1); 186 return(-1);
176 } 187 }
177 188
189#ifndef OPENSSL_NO_HEARTBEATS
190 /* If we're awaiting a HeartbeatResponse, pretend we
191 * already got and don't await it anymore, because
192 * Heartbeats don't make sense during handshakes anyway.
193 */
194 if (s->tlsext_hb_pending)
195 {
196 dtls1_stop_timer(s);
197 s->tlsext_hb_pending = 0;
198 s->tlsext_hb_seq++;
199 }
200#endif
201
178 for (;;) 202 for (;;)
179 { 203 {
180 state=s->state; 204 state=s->state;
@@ -182,7 +206,7 @@ int dtls1_accept(SSL *s)
182 switch (s->state) 206 switch (s->state)
183 { 207 {
184 case SSL_ST_RENEGOTIATE: 208 case SSL_ST_RENEGOTIATE:
185 s->new_session=1; 209 s->renegotiate=1;
186 /* s->state=SSL_ST_ACCEPT; */ 210 /* s->state=SSL_ST_ACCEPT; */
187 211
188 case SSL_ST_BEFORE: 212 case SSL_ST_BEFORE:
@@ -227,8 +251,12 @@ int dtls1_accept(SSL *s)
227 { 251 {
228 /* Ok, we now need to push on a buffering BIO so that 252 /* Ok, we now need to push on a buffering BIO so that
229 * the output is sent in a way that TCP likes :-) 253 * the output is sent in a way that TCP likes :-)
254 * ...but not with SCTP :-)
230 */ 255 */
231 if (!ssl_init_wbio_buffer(s,1)) { ret= -1; goto end; } 256#ifndef OPENSSL_NO_SCTP
257 if (!BIO_dgram_is_sctp(SSL_get_wbio(s)))
258#endif
259 if (!ssl_init_wbio_buffer(s,1)) { ret= -1; goto end; }
232 260
233 ssl3_init_finished_mac(s); 261 ssl3_init_finished_mac(s);
234 s->state=SSL3_ST_SR_CLNT_HELLO_A; 262 s->state=SSL3_ST_SR_CLNT_HELLO_A;
@@ -313,25 +341,75 @@ int dtls1_accept(SSL *s)
313 ssl3_init_finished_mac(s); 341 ssl3_init_finished_mac(s);
314 break; 342 break;
315 343
344#ifndef OPENSSL_NO_SCTP
345 case DTLS1_SCTP_ST_SR_READ_SOCK:
346
347 if (BIO_dgram_sctp_msg_waiting(SSL_get_rbio(s)))
348 {
349 s->s3->in_read_app_data=2;
350 s->rwstate=SSL_READING;
351 BIO_clear_retry_flags(SSL_get_rbio(s));
352 BIO_set_retry_read(SSL_get_rbio(s));
353 ret = -1;
354 goto end;
355 }
356
357 s->state=SSL3_ST_SR_FINISHED_A;
358 break;
359
360 case DTLS1_SCTP_ST_SW_WRITE_SOCK:
361 ret = BIO_dgram_sctp_wait_for_dry(SSL_get_wbio(s));
362 if (ret < 0) goto end;
363
364 if (ret == 0)
365 {
366 if (s->d1->next_state != SSL_ST_OK)
367 {
368 s->s3->in_read_app_data=2;
369 s->rwstate=SSL_READING;
370 BIO_clear_retry_flags(SSL_get_rbio(s));
371 BIO_set_retry_read(SSL_get_rbio(s));
372 ret = -1;
373 goto end;
374 }
375 }
376
377 s->state=s->d1->next_state;
378 break;
379#endif
380
316 case SSL3_ST_SW_SRVR_HELLO_A: 381 case SSL3_ST_SW_SRVR_HELLO_A:
317 case SSL3_ST_SW_SRVR_HELLO_B: 382 case SSL3_ST_SW_SRVR_HELLO_B:
318 s->new_session = 2; 383 s->renegotiate = 2;
319 dtls1_start_timer(s); 384 dtls1_start_timer(s);
320 ret=dtls1_send_server_hello(s); 385 ret=dtls1_send_server_hello(s);
321 if (ret <= 0) goto end; 386 if (ret <= 0) goto end;
322 387
323#ifndef OPENSSL_NO_TLSEXT
324 if (s->hit) 388 if (s->hit)
325 { 389 {
390#ifndef OPENSSL_NO_SCTP
391 /* Add new shared key for SCTP-Auth,
392 * will be ignored if no SCTP used.
393 */
394 snprintf((char*) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL),
395 DTLS1_SCTP_AUTH_LABEL);
396
397 SSL_export_keying_material(s, sctpauthkey,
398 sizeof(sctpauthkey), labelbuffer,
399 sizeof(labelbuffer), NULL, 0, 0);
400
401 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY,
402 sizeof(sctpauthkey), sctpauthkey);
403#endif
404#ifndef OPENSSL_NO_TLSEXT
326 if (s->tlsext_ticket_expected) 405 if (s->tlsext_ticket_expected)
327 s->state=SSL3_ST_SW_SESSION_TICKET_A; 406 s->state=SSL3_ST_SW_SESSION_TICKET_A;
328 else 407 else
329 s->state=SSL3_ST_SW_CHANGE_A; 408 s->state=SSL3_ST_SW_CHANGE_A;
330 }
331#else 409#else
332 if (s->hit) 410 s->state=SSL3_ST_SW_CHANGE_A;
333 s->state=SSL3_ST_SW_CHANGE_A;
334#endif 411#endif
412 }
335 else 413 else
336 s->state=SSL3_ST_SW_CERT_A; 414 s->state=SSL3_ST_SW_CERT_A;
337 s->init_num=0; 415 s->init_num=0;
@@ -441,6 +519,13 @@ int dtls1_accept(SSL *s)
441 skip=1; 519 skip=1;
442 s->s3->tmp.cert_request=0; 520 s->s3->tmp.cert_request=0;
443 s->state=SSL3_ST_SW_SRVR_DONE_A; 521 s->state=SSL3_ST_SW_SRVR_DONE_A;
522#ifndef OPENSSL_NO_SCTP
523 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
524 {
525 s->d1->next_state = SSL3_ST_SW_SRVR_DONE_A;
526 s->state = DTLS1_SCTP_ST_SW_WRITE_SOCK;
527 }
528#endif
444 } 529 }
445 else 530 else
446 { 531 {
@@ -450,9 +535,23 @@ int dtls1_accept(SSL *s)
450 if (ret <= 0) goto end; 535 if (ret <= 0) goto end;
451#ifndef NETSCAPE_HANG_BUG 536#ifndef NETSCAPE_HANG_BUG
452 s->state=SSL3_ST_SW_SRVR_DONE_A; 537 s->state=SSL3_ST_SW_SRVR_DONE_A;
538#ifndef OPENSSL_NO_SCTP
539 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
540 {
541 s->d1->next_state = SSL3_ST_SW_SRVR_DONE_A;
542 s->state = DTLS1_SCTP_ST_SW_WRITE_SOCK;
543 }
544#endif
453#else 545#else
454 s->state=SSL3_ST_SW_FLUSH; 546 s->state=SSL3_ST_SW_FLUSH;
455 s->s3->tmp.next_state=SSL3_ST_SR_CERT_A; 547 s->s3->tmp.next_state=SSL3_ST_SR_CERT_A;
548#ifndef OPENSSL_NO_SCTP
549 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
550 {
551 s->d1->next_state = s->s3->tmp.next_state;
552 s->s3->tmp.next_state=DTLS1_SCTP_ST_SW_WRITE_SOCK;
553 }
554#endif
456#endif 555#endif
457 s->init_num=0; 556 s->init_num=0;
458 } 557 }
@@ -472,6 +571,13 @@ int dtls1_accept(SSL *s)
472 s->rwstate=SSL_WRITING; 571 s->rwstate=SSL_WRITING;
473 if (BIO_flush(s->wbio) <= 0) 572 if (BIO_flush(s->wbio) <= 0)
474 { 573 {
574 /* If the write error was fatal, stop trying */
575 if (!BIO_should_retry(s->wbio))
576 {
577 s->rwstate=SSL_NOTHING;
578 s->state=s->s3->tmp.next_state;
579 }
580
475 ret= -1; 581 ret= -1;
476 goto end; 582 goto end;
477 } 583 }
@@ -485,15 +591,16 @@ int dtls1_accept(SSL *s)
485 ret = ssl3_check_client_hello(s); 591 ret = ssl3_check_client_hello(s);
486 if (ret <= 0) 592 if (ret <= 0)
487 goto end; 593 goto end;
488 dtls1_stop_timer(s);
489 if (ret == 2) 594 if (ret == 2)
595 {
596 dtls1_stop_timer(s);
490 s->state = SSL3_ST_SR_CLNT_HELLO_C; 597 s->state = SSL3_ST_SR_CLNT_HELLO_C;
598 }
491 else { 599 else {
492 /* could be sent for a DH cert, even if we 600 /* could be sent for a DH cert, even if we
493 * have not asked for it :-) */ 601 * have not asked for it :-) */
494 ret=ssl3_get_client_certificate(s); 602 ret=ssl3_get_client_certificate(s);
495 if (ret <= 0) goto end; 603 if (ret <= 0) goto end;
496 dtls1_stop_timer(s);
497 s->init_num=0; 604 s->init_num=0;
498 s->state=SSL3_ST_SR_KEY_EXCH_A; 605 s->state=SSL3_ST_SR_KEY_EXCH_A;
499 } 606 }
@@ -503,7 +610,21 @@ int dtls1_accept(SSL *s)
503 case SSL3_ST_SR_KEY_EXCH_B: 610 case SSL3_ST_SR_KEY_EXCH_B:
504 ret=ssl3_get_client_key_exchange(s); 611 ret=ssl3_get_client_key_exchange(s);
505 if (ret <= 0) goto end; 612 if (ret <= 0) goto end;
506 dtls1_stop_timer(s); 613#ifndef OPENSSL_NO_SCTP
614 /* Add new shared key for SCTP-Auth,
615 * will be ignored if no SCTP used.
616 */
617 snprintf((char *) labelbuffer, sizeof(DTLS1_SCTP_AUTH_LABEL),
618 DTLS1_SCTP_AUTH_LABEL);
619
620 SSL_export_keying_material(s, sctpauthkey,
621 sizeof(sctpauthkey), labelbuffer,
622 sizeof(labelbuffer), NULL, 0, 0);
623
624 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY,
625 sizeof(sctpauthkey), sctpauthkey);
626#endif
627
507 s->state=SSL3_ST_SR_CERT_VRFY_A; 628 s->state=SSL3_ST_SR_CERT_VRFY_A;
508 s->init_num=0; 629 s->init_num=0;
509 630
@@ -540,9 +661,13 @@ int dtls1_accept(SSL *s)
540 /* we should decide if we expected this one */ 661 /* we should decide if we expected this one */
541 ret=ssl3_get_cert_verify(s); 662 ret=ssl3_get_cert_verify(s);
542 if (ret <= 0) goto end; 663 if (ret <= 0) goto end;
543 dtls1_stop_timer(s); 664#ifndef OPENSSL_NO_SCTP
544 665 if (BIO_dgram_is_sctp(SSL_get_wbio(s)) &&
545 s->state=SSL3_ST_SR_FINISHED_A; 666 state == SSL_ST_RENEGOTIATE)
667 s->state=DTLS1_SCTP_ST_SR_READ_SOCK;
668 else
669#endif
670 s->state=SSL3_ST_SR_FINISHED_A;
546 s->init_num=0; 671 s->init_num=0;
547 break; 672 break;
548 673
@@ -594,6 +719,14 @@ int dtls1_accept(SSL *s)
594 SSL3_ST_SW_CHANGE_A,SSL3_ST_SW_CHANGE_B); 719 SSL3_ST_SW_CHANGE_A,SSL3_ST_SW_CHANGE_B);
595 720
596 if (ret <= 0) goto end; 721 if (ret <= 0) goto end;
722
723#ifndef OPENSSL_NO_SCTP
724 /* Change to new shared key of SCTP-Auth,
725 * will be ignored if no SCTP used.
726 */
727 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY, 0, NULL);
728#endif
729
597 s->state=SSL3_ST_SW_FINISHED_A; 730 s->state=SSL3_ST_SW_FINISHED_A;
598 s->init_num=0; 731 s->init_num=0;
599 732
@@ -618,7 +751,16 @@ int dtls1_accept(SSL *s)
618 if (s->hit) 751 if (s->hit)
619 s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A; 752 s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
620 else 753 else
754 {
621 s->s3->tmp.next_state=SSL_ST_OK; 755 s->s3->tmp.next_state=SSL_ST_OK;
756#ifndef OPENSSL_NO_SCTP
757 if (BIO_dgram_is_sctp(SSL_get_wbio(s)))
758 {
759 s->d1->next_state = s->s3->tmp.next_state;
760 s->s3->tmp.next_state=DTLS1_SCTP_ST_SW_WRITE_SOCK;
761 }
762#endif
763 }
622 s->init_num=0; 764 s->init_num=0;
623 break; 765 break;
624 766
@@ -636,11 +778,9 @@ int dtls1_accept(SSL *s)
636 778
637 s->init_num=0; 779 s->init_num=0;
638 780
639 if (s->new_session == 2) /* skipped if we just sent a HelloRequest */ 781 if (s->renegotiate == 2) /* skipped if we just sent a HelloRequest */
640 { 782 {
641 /* actually not necessarily a 'new' session unless 783 s->renegotiate=0;
642 * SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION is set */
643
644 s->new_session=0; 784 s->new_session=0;
645 785
646 ssl_update_cache(s,SSL_SESS_CACHE_SERVER); 786 ssl_update_cache(s,SSL_SESS_CACHE_SERVER);
@@ -692,6 +832,14 @@ end:
692 /* BIO_flush(s->wbio); */ 832 /* BIO_flush(s->wbio); */
693 833
694 s->in_handshake--; 834 s->in_handshake--;
835#ifndef OPENSSL_NO_SCTP
836 /* Notify SCTP BIO socket to leave handshake
837 * mode and prevent stream identifier other
838 * than 0. Will be ignored if no SCTP is used.
839 */
840 BIO_ctrl(SSL_get_wbio(s), BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE, s->in_handshake, NULL);
841#endif
842
695 if (cb != NULL) 843 if (cb != NULL)
696 cb(s,SSL_CB_ACCEPT_EXIT,ret); 844 cb(s,SSL_CB_ACCEPT_EXIT,ret);
697 return(ret); 845 return(ret);
@@ -772,7 +920,7 @@ int dtls1_send_server_hello(SSL *s)
772 p=s->s3->server_random; 920 p=s->s3->server_random;
773 Time=(unsigned long)time(NULL); /* Time */ 921 Time=(unsigned long)time(NULL); /* Time */
774 l2n(Time,p); 922 l2n(Time,p);
775 RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-sizeof(Time)); 923 RAND_pseudo_bytes(p,SSL3_RANDOM_SIZE-4);
776 /* Do the message type and length last */ 924 /* Do the message type and length last */
777 d=p= &(buf[DTLS1_HM_HEADER_LENGTH]); 925 d=p= &(buf[DTLS1_HM_HEADER_LENGTH]);
778 926
@@ -1147,7 +1295,7 @@ int dtls1_send_server_key_exchange(SSL *s)
1147 if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) 1295 if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
1148 && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) 1296 && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK))
1149 { 1297 {
1150 if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher)) 1298 if ((pkey=ssl_get_sign_pkey(s,s->s3->tmp.new_cipher, NULL))
1151 == NULL) 1299 == NULL)
1152 { 1300 {
1153 al=SSL_AD_DECODE_ERROR; 1301 al=SSL_AD_DECODE_ERROR;
diff --git a/src/lib/libssl/src/ssl/dtls1.h b/src/lib/libssl/src/ssl/dtls1.h
index 2900d1d8ae..5008bf6081 100644
--- a/src/lib/libssl/src/ssl/dtls1.h
+++ b/src/lib/libssl/src/ssl/dtls1.h
@@ -105,6 +105,11 @@ extern "C" {
105#define DTLS1_AL_HEADER_LENGTH 2 105#define DTLS1_AL_HEADER_LENGTH 2
106#endif 106#endif
107 107
108#ifndef OPENSSL_NO_SSL_INTERN
109
110#ifndef OPENSSL_NO_SCTP
111#define DTLS1_SCTP_AUTH_LABEL "EXPORTER_DTLS_OVER_SCTP"
112#endif
108 113
109typedef struct dtls1_bitmap_st 114typedef struct dtls1_bitmap_st
110 { 115 {
@@ -227,7 +232,7 @@ typedef struct dtls1_state_st
227 232
228 struct dtls1_timeout_st timeout; 233 struct dtls1_timeout_st timeout;
229 234
230 /* Indicates when the last handshake msg sent will timeout */ 235 /* Indicates when the last handshake msg or heartbeat sent will timeout */
231 struct timeval next_timeout; 236 struct timeval next_timeout;
232 237
233 /* Timeout duration */ 238 /* Timeout duration */
@@ -243,6 +248,13 @@ typedef struct dtls1_state_st
243 unsigned int retransmitting; 248 unsigned int retransmitting;
244 unsigned int change_cipher_spec_ok; 249 unsigned int change_cipher_spec_ok;
245 250
251#ifndef OPENSSL_NO_SCTP
252 /* used when SSL_ST_XX_FLUSH is entered */
253 int next_state;
254
255 int shutdown_received;
256#endif
257
246 } DTLS1_STATE; 258 } DTLS1_STATE;
247 259
248typedef struct dtls1_record_data_st 260typedef struct dtls1_record_data_st
@@ -251,8 +263,12 @@ typedef struct dtls1_record_data_st
251 unsigned int packet_length; 263 unsigned int packet_length;
252 SSL3_BUFFER rbuf; 264 SSL3_BUFFER rbuf;
253 SSL3_RECORD rrec; 265 SSL3_RECORD rrec;
266#ifndef OPENSSL_NO_SCTP
267 struct bio_dgram_sctp_rcvinfo recordinfo;
268#endif
254 } DTLS1_RECORD_DATA; 269 } DTLS1_RECORD_DATA;
255 270
271#endif
256 272
257/* Timeout multipliers (timeout slice is defined in apps/timeouts.h */ 273/* Timeout multipliers (timeout slice is defined in apps/timeouts.h */
258#define DTLS1_TMO_READ_COUNT 2 274#define DTLS1_TMO_READ_COUNT 2
diff --git a/src/lib/libssl/src/ssl/srtp.h b/src/lib/libssl/src/ssl/srtp.h
new file mode 100644
index 0000000000..c0cf33ef28
--- /dev/null
+++ b/src/lib/libssl/src/ssl/srtp.h
@@ -0,0 +1,145 @@
1/* ssl/tls1.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111/*
112 DTLS code by Eric Rescorla <ekr@rtfm.com>
113
114 Copyright (C) 2006, Network Resonance, Inc.
115 Copyright (C) 2011, RTFM, Inc.
116*/
117
118#ifndef HEADER_D1_SRTP_H
119#define HEADER_D1_SRTP_H
120
121#ifdef __cplusplus
122extern "C" {
123#endif
124
125
126#define SRTP_AES128_CM_SHA1_80 0x0001
127#define SRTP_AES128_CM_SHA1_32 0x0002
128#define SRTP_AES128_F8_SHA1_80 0x0003
129#define SRTP_AES128_F8_SHA1_32 0x0004
130#define SRTP_NULL_SHA1_80 0x0005
131#define SRTP_NULL_SHA1_32 0x0006
132
133int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles);
134int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles);
135SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
136
137STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl);
138SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
139
140#ifdef __cplusplus
141}
142#endif
143
144#endif
145
diff --git a/src/lib/libssl/src/ssl/tls_srp.c b/src/lib/libssl/src/ssl/tls_srp.c
new file mode 100644
index 0000000000..8512c4daf6
--- /dev/null
+++ b/src/lib/libssl/src/ssl/tls_srp.c
@@ -0,0 +1,506 @@
1/* ssl/tls_srp.c */
2/* Written by Christophe Renou (christophe.renou@edelweb.fr) with
3 * the precious help of Peter Sylvester (peter.sylvester@edelweb.fr)
4 * for the EdelKey project and contributed to the OpenSSL project 2004.
5 */
6/* ====================================================================
7 * Copyright (c) 2004-2011 The OpenSSL Project. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. All advertising materials mentioning features or use of this
22 * software must display the following acknowledgment:
23 * "This product includes software developed by the OpenSSL Project
24 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
25 *
26 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
27 * endorse or promote products derived from this software without
28 * prior written permission. For written permission, please contact
29 * licensing@OpenSSL.org.
30 *
31 * 5. Products derived from this software may not be called "OpenSSL"
32 * nor may "OpenSSL" appear in their names without prior written
33 * permission of the OpenSSL Project.
34 *
35 * 6. Redistributions of any form whatsoever must retain the following
36 * acknowledgment:
37 * "This product includes software developed by the OpenSSL Project
38 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
41 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
43 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
47 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
49 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
50 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
51 * OF THE POSSIBILITY OF SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This product includes cryptographic software written by Eric Young
55 * (eay@cryptsoft.com). This product includes software written by Tim
56 * Hudson (tjh@cryptsoft.com).
57 *
58 */
59#include "ssl_locl.h"
60#ifndef OPENSSL_NO_SRP
61
62#include <openssl/rand.h>
63#include <openssl/srp.h>
64#include <openssl/err.h>
65
66int SSL_CTX_SRP_CTX_free(struct ssl_ctx_st *ctx)
67 {
68 if (ctx == NULL)
69 return 0;
70 OPENSSL_free(ctx->srp_ctx.login);
71 BN_free(ctx->srp_ctx.N);
72 BN_free(ctx->srp_ctx.g);
73 BN_free(ctx->srp_ctx.s);
74 BN_free(ctx->srp_ctx.B);
75 BN_free(ctx->srp_ctx.A);
76 BN_free(ctx->srp_ctx.a);
77 BN_free(ctx->srp_ctx.b);
78 BN_free(ctx->srp_ctx.v);
79 ctx->srp_ctx.TLS_ext_srp_username_callback = NULL;
80 ctx->srp_ctx.SRP_cb_arg = NULL;
81 ctx->srp_ctx.SRP_verify_param_callback = NULL;
82 ctx->srp_ctx.SRP_give_srp_client_pwd_callback = NULL;
83 ctx->srp_ctx.N = NULL;
84 ctx->srp_ctx.g = NULL;
85 ctx->srp_ctx.s = NULL;
86 ctx->srp_ctx.B = NULL;
87 ctx->srp_ctx.A = NULL;
88 ctx->srp_ctx.a = NULL;
89 ctx->srp_ctx.b = NULL;
90 ctx->srp_ctx.v = NULL;
91 ctx->srp_ctx.login = NULL;
92 ctx->srp_ctx.info = NULL;
93 ctx->srp_ctx.strength = SRP_MINIMAL_N;
94 ctx->srp_ctx.srp_Mask = 0;
95 return (1);
96 }
97
98int SSL_SRP_CTX_free(struct ssl_st *s)
99 {
100 if (s == NULL)
101 return 0;
102 OPENSSL_free(s->srp_ctx.login);
103 BN_free(s->srp_ctx.N);
104 BN_free(s->srp_ctx.g);
105 BN_free(s->srp_ctx.s);
106 BN_free(s->srp_ctx.B);
107 BN_free(s->srp_ctx.A);
108 BN_free(s->srp_ctx.a);
109 BN_free(s->srp_ctx.b);
110 BN_free(s->srp_ctx.v);
111 s->srp_ctx.TLS_ext_srp_username_callback = NULL;
112 s->srp_ctx.SRP_cb_arg = NULL;
113 s->srp_ctx.SRP_verify_param_callback = NULL;
114 s->srp_ctx.SRP_give_srp_client_pwd_callback = NULL;
115 s->srp_ctx.N = NULL;
116 s->srp_ctx.g = NULL;
117 s->srp_ctx.s = NULL;
118 s->srp_ctx.B = NULL;
119 s->srp_ctx.A = NULL;
120 s->srp_ctx.a = NULL;
121 s->srp_ctx.b = NULL;
122 s->srp_ctx.v = NULL;
123 s->srp_ctx.login = NULL;
124 s->srp_ctx.info = NULL;
125 s->srp_ctx.strength = SRP_MINIMAL_N;
126 s->srp_ctx.srp_Mask = 0;
127 return (1);
128 }
129
130int SSL_SRP_CTX_init(struct ssl_st *s)
131 {
132 SSL_CTX *ctx;
133
134 if ((s == NULL) || ((ctx = s->ctx) == NULL))
135 return 0;
136 s->srp_ctx.SRP_cb_arg = ctx->srp_ctx.SRP_cb_arg;
137 /* set client Hello login callback */
138 s->srp_ctx.TLS_ext_srp_username_callback = ctx->srp_ctx.TLS_ext_srp_username_callback;
139 /* set SRP N/g param callback for verification */
140 s->srp_ctx.SRP_verify_param_callback = ctx->srp_ctx.SRP_verify_param_callback;
141 /* set SRP client passwd callback */
142 s->srp_ctx.SRP_give_srp_client_pwd_callback = ctx->srp_ctx.SRP_give_srp_client_pwd_callback;
143
144 s->srp_ctx.N = NULL;
145 s->srp_ctx.g = NULL;
146 s->srp_ctx.s = NULL;
147 s->srp_ctx.B = NULL;
148 s->srp_ctx.A = NULL;
149 s->srp_ctx.a = NULL;
150 s->srp_ctx.b = NULL;
151 s->srp_ctx.v = NULL;
152 s->srp_ctx.login = NULL;
153 s->srp_ctx.info = ctx->srp_ctx.info;
154 s->srp_ctx.strength = ctx->srp_ctx.strength;
155
156 if (((ctx->srp_ctx.N != NULL) &&
157 ((s->srp_ctx.N = BN_dup(ctx->srp_ctx.N)) == NULL)) ||
158 ((ctx->srp_ctx.g != NULL) &&
159 ((s->srp_ctx.g = BN_dup(ctx->srp_ctx.g)) == NULL)) ||
160 ((ctx->srp_ctx.s != NULL) &&
161 ((s->srp_ctx.s = BN_dup(ctx->srp_ctx.s)) == NULL)) ||
162 ((ctx->srp_ctx.B != NULL) &&
163 ((s->srp_ctx.B = BN_dup(ctx->srp_ctx.B)) == NULL)) ||
164 ((ctx->srp_ctx.A != NULL) &&
165 ((s->srp_ctx.A = BN_dup(ctx->srp_ctx.A)) == NULL)) ||
166 ((ctx->srp_ctx.a != NULL) &&
167 ((s->srp_ctx.a = BN_dup(ctx->srp_ctx.a)) == NULL)) ||
168 ((ctx->srp_ctx.v != NULL) &&
169 ((s->srp_ctx.v = BN_dup(ctx->srp_ctx.v)) == NULL)) ||
170 ((ctx->srp_ctx.b != NULL) &&
171 ((s->srp_ctx.b = BN_dup(ctx->srp_ctx.b)) == NULL)))
172 {
173 SSLerr(SSL_F_SSL_SRP_CTX_INIT,ERR_R_BN_LIB);
174 goto err;
175 }
176 if ((ctx->srp_ctx.login != NULL) &&
177 ((s->srp_ctx.login = BUF_strdup(ctx->srp_ctx.login)) == NULL))
178 {
179 SSLerr(SSL_F_SSL_SRP_CTX_INIT,ERR_R_INTERNAL_ERROR);
180 goto err;
181 }
182 s->srp_ctx.srp_Mask = ctx->srp_ctx.srp_Mask;
183
184 return (1);
185err:
186 OPENSSL_free(s->srp_ctx.login);
187 BN_free(s->srp_ctx.N);
188 BN_free(s->srp_ctx.g);
189 BN_free(s->srp_ctx.s);
190 BN_free(s->srp_ctx.B);
191 BN_free(s->srp_ctx.A);
192 BN_free(s->srp_ctx.a);
193 BN_free(s->srp_ctx.b);
194 BN_free(s->srp_ctx.v);
195 return (0);
196 }
197
198int SSL_CTX_SRP_CTX_init(struct ssl_ctx_st *ctx)
199 {
200 if (ctx == NULL)
201 return 0;
202
203 ctx->srp_ctx.SRP_cb_arg = NULL;
204 /* set client Hello login callback */
205 ctx->srp_ctx.TLS_ext_srp_username_callback = NULL;
206 /* set SRP N/g param callback for verification */
207 ctx->srp_ctx.SRP_verify_param_callback = NULL;
208 /* set SRP client passwd callback */
209 ctx->srp_ctx.SRP_give_srp_client_pwd_callback = NULL;
210
211 ctx->srp_ctx.N = NULL;
212 ctx->srp_ctx.g = NULL;
213 ctx->srp_ctx.s = NULL;
214 ctx->srp_ctx.B = NULL;
215 ctx->srp_ctx.A = NULL;
216 ctx->srp_ctx.a = NULL;
217 ctx->srp_ctx.b = NULL;
218 ctx->srp_ctx.v = NULL;
219 ctx->srp_ctx.login = NULL;
220 ctx->srp_ctx.srp_Mask = 0;
221 ctx->srp_ctx.info = NULL;
222 ctx->srp_ctx.strength = SRP_MINIMAL_N;
223
224 return (1);
225 }
226
227/* server side */
228int SSL_srp_server_param_with_username(SSL *s, int *ad)
229 {
230 unsigned char b[SSL_MAX_MASTER_KEY_LENGTH];
231 int al;
232
233 *ad = SSL_AD_UNKNOWN_PSK_IDENTITY;
234 if ((s->srp_ctx.TLS_ext_srp_username_callback !=NULL) &&
235 ((al = s->srp_ctx.TLS_ext_srp_username_callback(s, ad, s->srp_ctx.SRP_cb_arg))!=SSL_ERROR_NONE))
236 return al;
237
238 *ad = SSL_AD_INTERNAL_ERROR;
239 if ((s->srp_ctx.N == NULL) ||
240 (s->srp_ctx.g == NULL) ||
241 (s->srp_ctx.s == NULL) ||
242 (s->srp_ctx.v == NULL))
243 return SSL3_AL_FATAL;
244
245 RAND_bytes(b, sizeof(b));
246 s->srp_ctx.b = BN_bin2bn(b,sizeof(b),NULL);
247 OPENSSL_cleanse(b,sizeof(b));
248
249 /* Calculate: B = (kv + g^b) % N */
250
251 return ((s->srp_ctx.B = SRP_Calc_B(s->srp_ctx.b, s->srp_ctx.N, s->srp_ctx.g, s->srp_ctx.v)) != NULL)?
252 SSL_ERROR_NONE:SSL3_AL_FATAL;
253 }
254
255/* If the server just has the raw password, make up a verifier entry on the fly */
256int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, const char *grp)
257 {
258 SRP_gN *GN = SRP_get_default_gN(grp);
259 if(GN == NULL) return -1;
260 s->srp_ctx.N = BN_dup(GN->N);
261 s->srp_ctx.g = BN_dup(GN->g);
262 if(s->srp_ctx.v != NULL)
263 {
264 BN_clear_free(s->srp_ctx.v);
265 s->srp_ctx.v = NULL;
266 }
267 if(s->srp_ctx.s != NULL)
268 {
269 BN_clear_free(s->srp_ctx.s);
270 s->srp_ctx.s = NULL;
271 }
272 if(!SRP_create_verifier_BN(user, pass, &s->srp_ctx.s, &s->srp_ctx.v, GN->N, GN->g)) return -1;
273
274 return 1;
275 }
276
277int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g,
278 BIGNUM *sa, BIGNUM *v, char *info)
279 {
280 if (N!= NULL)
281 {
282 if (s->srp_ctx.N != NULL)
283 {
284 if (!BN_copy(s->srp_ctx.N,N))
285 {
286 BN_free(s->srp_ctx.N);
287 s->srp_ctx.N = NULL;
288 }
289 }
290 else
291 s->srp_ctx.N = BN_dup(N);
292 }
293 if (g!= NULL)
294 {
295 if (s->srp_ctx.g != NULL)
296 {
297 if (!BN_copy(s->srp_ctx.g,g))
298 {
299 BN_free(s->srp_ctx.g);
300 s->srp_ctx.g = NULL;
301 }
302 }
303 else
304 s->srp_ctx.g = BN_dup(g);
305 }
306 if (sa!= NULL)
307 {
308 if (s->srp_ctx.s != NULL)
309 {
310 if (!BN_copy(s->srp_ctx.s,sa))
311 {
312 BN_free(s->srp_ctx.s);
313 s->srp_ctx.s = NULL;
314 }
315 }
316 else
317 s->srp_ctx.s = BN_dup(sa);
318 }
319 if (v!= NULL)
320 {
321 if (s->srp_ctx.v != NULL)
322 {
323 if (!BN_copy(s->srp_ctx.v,v))
324 {
325 BN_free(s->srp_ctx.v);
326 s->srp_ctx.v = NULL;
327 }
328 }
329 else
330 s->srp_ctx.v = BN_dup(v);
331 }
332 s->srp_ctx.info = info;
333
334 if (!(s->srp_ctx.N) ||
335 !(s->srp_ctx.g) ||
336 !(s->srp_ctx.s) ||
337 !(s->srp_ctx.v))
338 return -1;
339
340 return 1;
341 }
342
343int SRP_generate_server_master_secret(SSL *s,unsigned char *master_key)
344 {
345 BIGNUM *K = NULL, *u = NULL;
346 int ret = -1, tmp_len;
347 unsigned char *tmp = NULL;
348
349 if (!SRP_Verify_A_mod_N(s->srp_ctx.A,s->srp_ctx.N))
350 goto err;
351 if (!(u = SRP_Calc_u(s->srp_ctx.A,s->srp_ctx.B,s->srp_ctx.N)))
352 goto err;
353 if (!(K = SRP_Calc_server_key(s->srp_ctx.A, s->srp_ctx.v, u, s->srp_ctx.b, s->srp_ctx.N)))
354 goto err;
355
356 tmp_len = BN_num_bytes(K);
357 if ((tmp = OPENSSL_malloc(tmp_len)) == NULL)
358 goto err;
359 BN_bn2bin(K, tmp);
360 ret = s->method->ssl3_enc->generate_master_secret(s,master_key,tmp,tmp_len);
361err:
362 if (tmp)
363 {
364 OPENSSL_cleanse(tmp,tmp_len) ;
365 OPENSSL_free(tmp);
366 }
367 BN_clear_free(K);
368 BN_clear_free(u);
369 return ret;
370 }
371
372/* client side */
373int SRP_generate_client_master_secret(SSL *s,unsigned char *master_key)
374 {
375 BIGNUM *x = NULL, *u = NULL, *K = NULL;
376 int ret = -1, tmp_len;
377 char *passwd = NULL;
378 unsigned char *tmp = NULL;
379
380 /* Checks if b % n == 0
381 */
382 if (SRP_Verify_B_mod_N(s->srp_ctx.B,s->srp_ctx.N)==0) goto err;
383 if (!(u = SRP_Calc_u(s->srp_ctx.A,s->srp_ctx.B,s->srp_ctx.N))) goto err;
384 if (s->srp_ctx.SRP_give_srp_client_pwd_callback == NULL) goto err;
385 if (!(passwd = s->srp_ctx.SRP_give_srp_client_pwd_callback(s, s->srp_ctx.SRP_cb_arg))) goto err;
386 if (!(x = SRP_Calc_x(s->srp_ctx.s,s->srp_ctx.login,passwd))) goto err;
387 if (!(K = SRP_Calc_client_key(s->srp_ctx.N, s->srp_ctx.B, s->srp_ctx.g, x, s->srp_ctx.a, u))) goto err;
388
389 tmp_len = BN_num_bytes(K);
390 if ((tmp = OPENSSL_malloc(tmp_len)) == NULL) goto err;
391 BN_bn2bin(K, tmp);
392 ret = s->method->ssl3_enc->generate_master_secret(s,master_key,tmp,tmp_len);
393err:
394 if (tmp)
395 {
396 OPENSSL_cleanse(tmp,tmp_len) ;
397 OPENSSL_free(tmp);
398 }
399 BN_clear_free(K);
400 BN_clear_free(x);
401 if (passwd)
402 {
403 OPENSSL_cleanse(passwd,strlen(passwd)) ;
404 OPENSSL_free(passwd);
405 }
406 BN_clear_free(u);
407 return ret;
408 }
409
410int SRP_Calc_A_param(SSL *s)
411 {
412 unsigned char rnd[SSL_MAX_MASTER_KEY_LENGTH];
413
414 if (BN_num_bits(s->srp_ctx.N) < s->srp_ctx.strength)
415 return -1;
416
417 if (s->srp_ctx.SRP_verify_param_callback ==NULL &&
418 !SRP_check_known_gN_param(s->srp_ctx.g,s->srp_ctx.N))
419 return -1 ;
420
421 RAND_bytes(rnd, sizeof(rnd));
422 s->srp_ctx.a = BN_bin2bn(rnd, sizeof(rnd), s->srp_ctx.a);
423 OPENSSL_cleanse(rnd, sizeof(rnd));
424
425 if (!(s->srp_ctx.A = SRP_Calc_A(s->srp_ctx.a,s->srp_ctx.N,s->srp_ctx.g)))
426 return -1;
427
428 /* We can have a callback to verify SRP param!! */
429 if (s->srp_ctx.SRP_verify_param_callback !=NULL)
430 return s->srp_ctx.SRP_verify_param_callback(s,s->srp_ctx.SRP_cb_arg);
431
432 return 1;
433 }
434
435BIGNUM *SSL_get_srp_g(SSL *s)
436 {
437 if (s->srp_ctx.g != NULL)
438 return s->srp_ctx.g;
439 return s->ctx->srp_ctx.g;
440 }
441
442BIGNUM *SSL_get_srp_N(SSL *s)
443 {
444 if (s->srp_ctx.N != NULL)
445 return s->srp_ctx.N;
446 return s->ctx->srp_ctx.N;
447 }
448
449char *SSL_get_srp_username(SSL *s)
450 {
451 if (s->srp_ctx.login != NULL)
452 return s->srp_ctx.login;
453 return s->ctx->srp_ctx.login;
454 }
455
456char *SSL_get_srp_userinfo(SSL *s)
457 {
458 if (s->srp_ctx.info != NULL)
459 return s->srp_ctx.info;
460 return s->ctx->srp_ctx.info;
461 }
462
463#define tls1_ctx_ctrl ssl3_ctx_ctrl
464#define tls1_ctx_callback_ctrl ssl3_ctx_callback_ctrl
465
466int SSL_CTX_set_srp_username(SSL_CTX *ctx,char *name)
467 {
468 return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_USERNAME,0,name);
469 }
470
471int SSL_CTX_set_srp_password(SSL_CTX *ctx,char *password)
472 {
473 return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD,0,password);
474 }
475
476int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength)
477 {
478 return tls1_ctx_ctrl(ctx, SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH, strength,
479 NULL);
480 }
481
482int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, int (*cb)(SSL *,void *))
483 {
484 return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_SRP_VERIFY_PARAM_CB,
485 (void (*)(void))cb);
486 }
487
488int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg)
489 {
490 return tls1_ctx_ctrl(ctx,SSL_CTRL_SET_SRP_ARG,0,arg);
491 }
492
493int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx,
494 int (*cb)(SSL *,int *,void *))
495 {
496 return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB,
497 (void (*)(void))cb);
498 }
499
500int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, char *(*cb)(SSL *,void *))
501 {
502 return tls1_ctx_callback_ctrl(ctx,SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB,
503 (void (*)(void))cb);
504 }
505
506#endif
diff --git a/src/lib/libssl/src/test/pkits-test.pl b/src/lib/libssl/src/test/pkits-test.pl
index 69dffa16f9..5c6b89fcdb 100644
--- a/src/lib/libssl/src/test/pkits-test.pl
+++ b/src/lib/libssl/src/test/pkits-test.pl
@@ -784,6 +784,15 @@ my $ossl = "ossl/apps/openssl";
784 784
785my $ossl_cmd = "$ossl_path cms -verify -verify_retcode "; 785my $ossl_cmd = "$ossl_path cms -verify -verify_retcode ";
786$ossl_cmd .= "-CAfile pkitsta.pem -crl_check_all -x509_strict "; 786$ossl_cmd .= "-CAfile pkitsta.pem -crl_check_all -x509_strict ";
787
788# Check for expiry of trust anchor
789system "$ossl_path x509 -inform DER -in $pkitsta -checkend 0";
790if ($? == 256)
791 {
792 print STDERR "WARNING: using older expired data\n";
793 $ossl_cmd .= "-attime 1291940972 ";
794 }
795
787$ossl_cmd .= "-policy_check -extended_crl -use_deltas -out /dev/null 2>&1 "; 796$ossl_cmd .= "-policy_check -extended_crl -use_deltas -out /dev/null 2>&1 ";
788 797
789system "$ossl_path x509 -inform DER -in $pkitsta -out pkitsta.pem"; 798system "$ossl_path x509 -inform DER -in $pkitsta -out pkitsta.pem";
diff --git a/src/lib/libssl/src/util/copy.pl b/src/lib/libssl/src/util/copy.pl
index e20b45530a..eba6d5815e 100644
--- a/src/lib/libssl/src/util/copy.pl
+++ b/src/lib/libssl/src/util/copy.pl
@@ -8,9 +8,16 @@ use Fcntl;
8# Perl script 'copy' comment. On Windows the built in "copy" command also 8# Perl script 'copy' comment. On Windows the built in "copy" command also
9# copies timestamps: this messes up Makefile dependencies. 9# copies timestamps: this messes up Makefile dependencies.
10 10
11my $stripcr = 0;
12
11my $arg; 13my $arg;
12 14
13foreach $arg (@ARGV) { 15foreach $arg (@ARGV) {
16 if ($arg eq "-stripcr")
17 {
18 $stripcr = 1;
19 next;
20 }
14 $arg =~ s|\\|/|g; # compensate for bug/feature in cygwin glob... 21 $arg =~ s|\\|/|g; # compensate for bug/feature in cygwin glob...
15 foreach (glob $arg) 22 foreach (glob $arg)
16 { 23 {
@@ -49,6 +56,10 @@ foreach (@filelist)
49 || die "Can't Open $dfile"; 56 || die "Can't Open $dfile";
50 while (sysread IN, $buf, 10240) 57 while (sysread IN, $buf, 10240)
51 { 58 {
59 if ($stripcr)
60 {
61 $buf =~ tr/\015//d;
62 }
52 syswrite(OUT, $buf, length($buf)); 63 syswrite(OUT, $buf, length($buf));
53 } 64 }
54 close(IN); 65 close(IN);