summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorbeck <>2024-12-13 00:03:57 +0000
committerbeck <>2024-12-13 00:03:57 +0000
commited3c5d3a4797d4b1d8f9769cfc43f8e686a59621 (patch)
treea9ff1c725db56dbeb46224505b3dd6fd05a21777 /src/lib
parentfd906c7b27573203602764309c3cf5faaefdf573 (diff)
downloadopenbsd-ed3c5d3a4797d4b1d8f9769cfc43f8e686a59621.tar.gz
openbsd-ed3c5d3a4797d4b1d8f9769cfc43f8e686a59621.tar.bz2
openbsd-ed3c5d3a4797d4b1d8f9769cfc43f8e686a59621.zip
Add ML-KEM 768 from BoringSSL
Changes include conversion from C++, basic KNF, then adaptation to use our sha3 functions for sha3 and shake instead of the BorinSSL version. This Adds units tests to run against BoringSSL and NIST test vectors. The future public API is the same as Boring's - but is not yet exposed pending making bytesring.h public (which will happen separately) and a minor bump Currently this will just ensure we build and run regress. ok tb@ to get it into the tree and massage from there.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/Makefile9
-rw-r--r--src/lib/libcrypto/hidden/openssl/mlkem.h40
-rw-r--r--src/lib/libcrypto/mlkem/mlkem.h168
-rw-r--r--src/lib/libcrypto/mlkem/mlkem768.c1118
-rw-r--r--src/lib/libcrypto/mlkem/mlkem_internal.h78
5 files changed, 1412 insertions, 1 deletions
diff --git a/src/lib/libcrypto/Makefile b/src/lib/libcrypto/Makefile
index c981a4189f..f43b09d176 100644
--- a/src/lib/libcrypto/Makefile
+++ b/src/lib/libcrypto/Makefile
@@ -1,4 +1,4 @@
1# $OpenBSD: Makefile,v 1.228 2024/11/16 10:38:10 tb Exp $ 1# $OpenBSD: Makefile,v 1.229 2024/12/13 00:03:57 beck Exp $
2 2
3LIB= crypto 3LIB= crypto
4LIBREBUILD=y 4LIBREBUILD=y
@@ -43,6 +43,7 @@ CFLAGS+= -I${LCRYPTO_SRC}/hidden
43CFLAGS+= -I${LCRYPTO_SRC}/hmac 43CFLAGS+= -I${LCRYPTO_SRC}/hmac
44CFLAGS+= -I${LCRYPTO_SRC}/kdf 44CFLAGS+= -I${LCRYPTO_SRC}/kdf
45CFLAGS+= -I${LCRYPTO_SRC}/lhash 45CFLAGS+= -I${LCRYPTO_SRC}/lhash
46CFLAGS+= -I${LCRYPTO_SRC}/mlkem
46CFLAGS+= -I${LCRYPTO_SRC}/modes 47CFLAGS+= -I${LCRYPTO_SRC}/modes
47CFLAGS+= -I${LCRYPTO_SRC}/ocsp 48CFLAGS+= -I${LCRYPTO_SRC}/ocsp
48CFLAGS+= -I${LCRYPTO_SRC}/pkcs12 49CFLAGS+= -I${LCRYPTO_SRC}/pkcs12
@@ -371,6 +372,9 @@ SRCS+= md4.c
371# md5/ 372# md5/
372SRCS+= md5.c 373SRCS+= md5.c
373 374
375# mlkem/
376SRCS+= mlkem768.c
377
374# modes/ 378# modes/
375SRCS+= cbc128.c 379SRCS+= cbc128.c
376SRCS+= ccm128.c 380SRCS+= ccm128.c
@@ -607,6 +611,7 @@ SRCS+= x_all.c
607 ${LCRYPTO_SRC}/lhash \ 611 ${LCRYPTO_SRC}/lhash \
608 ${LCRYPTO_SRC}/md4 \ 612 ${LCRYPTO_SRC}/md4 \
609 ${LCRYPTO_SRC}/md5 \ 613 ${LCRYPTO_SRC}/md5 \
614 ${LCRYPTO_SRC}/mlkem \
610 ${LCRYPTO_SRC}/modes \ 615 ${LCRYPTO_SRC}/modes \
611 ${LCRYPTO_SRC}/objects \ 616 ${LCRYPTO_SRC}/objects \
612 ${LCRYPTO_SRC}/ocsp \ 617 ${LCRYPTO_SRC}/ocsp \
@@ -639,6 +644,7 @@ HDRS=\
639 ${LCRYPTO_SRC}/bio/bio.h \ 644 ${LCRYPTO_SRC}/bio/bio.h \
640 ${LCRYPTO_SRC}/bn/bn.h \ 645 ${LCRYPTO_SRC}/bn/bn.h \
641 ${LCRYPTO_SRC}/buffer/buffer.h \ 646 ${LCRYPTO_SRC}/buffer/buffer.h \
647 ${LCRYPTO_SRC}/bytestring/bytestring.h \
642 ${LCRYPTO_SRC}/camellia/camellia.h \ 648 ${LCRYPTO_SRC}/camellia/camellia.h \
643 ${LCRYPTO_SRC}/cast/cast.h \ 649 ${LCRYPTO_SRC}/cast/cast.h \
644 ${LCRYPTO_SRC}/chacha/chacha.h \ 650 ${LCRYPTO_SRC}/chacha/chacha.h \
@@ -665,6 +671,7 @@ HDRS=\
665 ${LCRYPTO_SRC}/lhash/lhash.h \ 671 ${LCRYPTO_SRC}/lhash/lhash.h \
666 ${LCRYPTO_SRC}/md4/md4.h \ 672 ${LCRYPTO_SRC}/md4/md4.h \
667 ${LCRYPTO_SRC}/md5/md5.h \ 673 ${LCRYPTO_SRC}/md5/md5.h \
674 ${LCRYPTO_SRC}/mlkem/mlkem.h \
668 ${LCRYPTO_SRC}/modes/modes.h \ 675 ${LCRYPTO_SRC}/modes/modes.h \
669 ${LCRYPTO_SRC}/objects/objects.h \ 676 ${LCRYPTO_SRC}/objects/objects.h \
670 ${LCRYPTO_SRC}/ocsp/ocsp.h \ 677 ${LCRYPTO_SRC}/ocsp/ocsp.h \
diff --git a/src/lib/libcrypto/hidden/openssl/mlkem.h b/src/lib/libcrypto/hidden/openssl/mlkem.h
new file mode 100644
index 0000000000..01ac28cffd
--- /dev/null
+++ b/src/lib/libcrypto/hidden/openssl/mlkem.h
@@ -0,0 +1,40 @@
1/* $OpenBSD: mlkem.h,v 1.1 2024/12/13 00:03:57 beck Exp $ */
2/*
3 * Copyright (c) 2024 Bob Beck <beck@obtuse.com>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifndef _LIBCRYPTO_MLKEM_H
19#define _LIBCRYPTO_MLKEM_H
20
21#ifndef _MSC_VER
22#include_next <openssl/mlkem.h>
23#else
24#include "../include/openssl/mlkem.h"
25#endif
26#include "crypto_namespace.h"
27
28/* Undo when making public */
29#ifdef LIBRESSL_HAS_MLKEM
30LCRYPTO_USED(MLKEM768_generate_key);
31LCRYPTO_USED(MLKEM768_public_from_private);
32LCRYPTO_USED(MLKEM768_encap);
33LCRYPTO_USED(MLKEM768_decap);
34LCRYPTO_USED(MLKEM768_marshal_public_key);
35LCRYPTO_USED(MLKEM768_parse_public_key);
36LCRYPTO_USED(MLKEM768_private_key_from_seed);
37LCRYPTO_USED(MLKEM768_parse_private_key);
38#endif
39
40#endif /* _LIBCRYPTO_MLKEM_H */
diff --git a/src/lib/libcrypto/mlkem/mlkem.h b/src/lib/libcrypto/mlkem/mlkem.h
new file mode 100644
index 0000000000..8040f4844b
--- /dev/null
+++ b/src/lib/libcrypto/mlkem/mlkem.h
@@ -0,0 +1,168 @@
1/* Copyright (c) 2024, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15#ifndef OPENSSL_HEADER_MLKEM_H
16#define OPENSSL_HEADER_MLKEM_H
17
18#include <sys/types.h>
19#include <stdint.h>
20
21#ifdef LIBRESSL_HAS_MLKEM
22/* This needs to become public */
23#include <openssl/bytestring.h>
24#else
25/* Hack for now */
26struct cbs_st;
27struct cbb_st;
28#endif
29
30#if defined(__cplusplus)
31extern "C" {
32#endif
33
34/*
35 * ML-KEM-768
36 *
37 * This implements the Module-Lattice-Based Key-Encapsulation Mechanism from
38 * https://csrc.nist.gov/pubs/fips/204/final
39 */
40
41/*
42 * MLKEM768_public_key contains a ML-KEM-768 public key. The contents of this
43 * object should never leave the address space since the format is unstable.
44 */
45struct MLKEM768_public_key {
46 union {
47 uint8_t bytes[512 * (3 + 9) + 32 + 32];
48 uint16_t alignment;
49 } opaque;
50};
51
52/*
53 * MLKEM768_private_key contains a ML-KEM-768 private key. The contents of this
54 * object should never leave the address space since the format is unstable.
55 */
56struct MLKEM768_private_key {
57 union {
58 uint8_t bytes[512 * (3 + 3 + 9) + 32 + 32 + 32];
59 uint16_t alignment;
60 } opaque;
61};
62
63/*
64 * MLKEM768_PUBLIC_KEY_BYTES is the number of bytes in an encoded ML-KEM768 public
65 * key.
66 */
67#define MLKEM768_PUBLIC_KEY_BYTES 1184
68
69/* MLKEM_SEED_BYTES is the number of bytes in an ML-KEM seed. */
70#define MLKEM_SEED_BYTES 64
71
72/*
73 * MLKEM_SHARED_SECRET_BYTES is the number of bytes in the ML-KEM768 shared
74 * secret. Although the round-3 specification has a variable-length output, the
75 * final ML-KEM construction is expected to use a fixed 32-byte output. To
76 * simplify the future transition, we apply the same restriction.
77 */
78#define MLKEM_SHARED_SECRET_BYTES 32
79
80/*
81 * MLKEM_generate_key generates a random public/private key pair, writes the
82 * encoded public key to |out_encoded_public_key| and sets |out_private_key| to
83 * the private key. If |optional_out_seed| us not NULL then te seed used to
84 * generate te private key is written to it.
85 */
86void MLKEM768_generate_key(
87 uint8_t out_encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES],
88 uint8_t optional_out_seed[MLKEM_SEED_BYTES],
89 struct MLKEM768_private_key *out_private_key);
90
91/*
92 * MLKEM768_private_key_from_seed derives a private key from a seed that was
93 * generated by |MLKEM768_generate_key|. It fails and returns 0 if |seed_len| is
94 * incorrect, otherwise it writes |*out_private_key| and returns 1.
95 */
96int MLKEM768_private_key_from_seed(struct MLKEM768_private_key *out_private_key,
97 const uint8_t *seed, size_t seed_len);
98
99/*
100 * MLKEM_public_from_private sets |*out_public_key| to the public key that
101 * corresponds to |private_key|. (This is faster than parsing the output of
102 * |MLKEM_generate_key| if, for some reason, you need to encapsulate to a key
103 * that was just generated.)
104 */
105void MLKEM768_public_from_private(struct MLKEM768_public_key *out_public_key,
106 const struct MLKEM768_private_key *private_key);
107
108/* MLKEM768_CIPHERTEXT_BYTES is number of bytes in the ML-KEM768 ciphertext. */
109#define MLKEM768_CIPHERTEXT_BYTES 1088
110
111/*
112 * MLKEM768_encap encrypts a random shared secret for |public_key|, writes the
113 * ciphertext to |out_ciphertext|, and writes the random shared secret to
114 * |out_shared_secret|.
115 */
116void MLKEM768_encap(uint8_t out_ciphertext[MLKEM768_CIPHERTEXT_BYTES],
117 uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
118 const struct MLKEM768_public_key *public_key);
119
120/*
121 * MLKEM768_decap decrypts a shared secret from |ciphertext| using |private_key|
122 * and writes it to |out_shared_secret|. If |ciphertext_len| is incorrect it
123 * returns 0, otherwise it rreturns 1. If |ciphertext| is invalid,
124 * |out_shared_secret| is filled with a key that will always be the same for the
125 * same |ciphertext| and |private_key|, but which appears to be random unless
126 * one has access to |private_key|. These alternatives occur in constant time.
127 * Any subsequent symmetric encryption using |out_shared_secret| must use an
128 * authenticated encryption scheme in order to discover the decapsulation
129 * failure.
130 */
131int MLKEM768_decap(uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
132 const uint8_t *ciphertext, size_t ciphertext_len,
133 const struct MLKEM768_private_key *private_key);
134
135/* Serialisation of keys. */
136
137/*
138 * MLKEM768_marshal_public_key serializes |public_key| to |out| in the standard
139 * format for ML-KEM public keys. It returns one on success or zero on allocation
140 * error.
141 */
142int MLKEM768_marshal_public_key(struct cbb_st *out,
143 const struct MLKEM768_public_key *public_key);
144
145/*
146 * MLKEM768_parse_public_key parses a public key, in the format generated by
147 * |MLKEM_marshal_public_key|, from |in| and writes the result to
148 * |out_public_key|. It returns one on success or zero on parse error or if
149 * there are trailing bytes in |in|.
150 */
151int MLKEM768_parse_public_key(struct MLKEM768_public_key *out_public_key,
152 struct cbs_st *in);
153
154/*
155 * MLKEM_parse_private_key parses a private key, in the format generated by
156 * |MLKEM_marshal_private_key|, from |in| and writes the result to
157 * |out_private_key|. It returns one on success or zero on parse error or if
158 * there are trailing bytes in |in|. This formate is verbose and should be avoided.
159 * Private keys should be stored as seeds and parsed using |MLKEM768_private_key_from_seed|.
160 */
161int MLKEM768_parse_private_key(struct MLKEM768_private_key *out_private_key,
162 struct cbs_st *in);
163
164#if defined(__cplusplus)
165}
166#endif
167
168#endif /* OPENSSL_HEADER_MLKEM_H */
diff --git a/src/lib/libcrypto/mlkem/mlkem768.c b/src/lib/libcrypto/mlkem/mlkem768.c
new file mode 100644
index 0000000000..2ab1f5b0d9
--- /dev/null
+++ b/src/lib/libcrypto/mlkem/mlkem768.c
@@ -0,0 +1,1118 @@
1/* $OpenBSD: mlkem768.c,v 1.1 2024/12/13 00:03:57 beck Exp $ */
2/*
3 * Copyright (c) 2024, Google Inc.
4 * Copyright (c) 2024, Bob Beck <beck@obtuse.com>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
13 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
15 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
16 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <openssl/mlkem.h>
20
21#include <assert.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "bytestring.h"
26
27#include "sha3_internal.h"
28#include "mlkem_internal.h"
29#include "constant_time.h"
30#include "crypto_internal.h"
31
32/* Remove later */
33#undef LCRYPTO_ALIAS
34#define LCRYPTO_ALIAS(A)
35
36/*
37 * See
38 * https://csrc.nist.gov/pubs/fips/203/final
39 */
40
41static void
42prf(uint8_t *out, size_t out_len, const uint8_t in[33])
43{
44 sha3_ctx ctx;
45 shake256_init(&ctx);
46 shake_update(&ctx, in, 33);
47 shake_xof(&ctx);
48 shake_out(&ctx, out, out_len);
49}
50
51/* Section 4.1 */
52static void
53hash_h(uint8_t out[32], const uint8_t *in, size_t len)
54{
55 sha3_ctx ctx;
56 sha3_init(&ctx, 32);
57 sha3_update(&ctx, in, len);
58 sha3_final(out, &ctx);
59}
60
61static void
62hash_g(uint8_t out[64], const uint8_t *in, size_t len)
63{
64 sha3_ctx ctx;
65 sha3_init(&ctx, 64);
66 sha3_update(&ctx, in, len);
67 sha3_final(out, &ctx);
68}
69
70/* this is called 'J' in the spec */
71static void
72kdf(uint8_t out[MLKEM_SHARED_SECRET_BYTES], const uint8_t failure_secret[32],
73 const uint8_t *in, size_t len)
74{
75 sha3_ctx ctx;
76 shake256_init(&ctx);
77 shake_update(&ctx, failure_secret, 32);
78 shake_update(&ctx, in, len);
79 shake_xof(&ctx);
80 shake_out(&ctx, out, MLKEM_SHARED_SECRET_BYTES);
81}
82
83#define DEGREE 256
84#define RANK768 3
85
86static const size_t kBarrettMultiplier = 5039;
87static const unsigned kBarrettShift = 24;
88static const uint16_t kPrime = 3329;
89static const int kLog2Prime = 12;
90static const uint16_t kHalfPrime = (/*kPrime=*/3329 - 1) / 2;
91static const int kDU768 = 10;
92static const int kDV768 = 4;
93/*
94 * kInverseDegree is 128^-1 mod 3329; 128 because kPrime does not have a 512th
95 * root of unity.
96 */
97static const uint16_t kInverseDegree = 3303;
98static const size_t kEncodedVectorSize =
99 (/*kLog2Prime=*/12 * DEGREE / 8) * RANK768;
100static const size_t kCompressedVectorSize = /*kDU768=*/ 10 * RANK768 * DEGREE /
101 8;
102
103typedef struct scalar {
104 /* On every function entry and exit, 0 <= c < kPrime. */
105 uint16_t c[DEGREE];
106} scalar;
107
108typedef struct vector {
109 scalar v[RANK768];
110} vector;
111
112typedef struct matrix {
113 scalar v[RANK768][RANK768];
114} matrix;
115
116/*
117 * This bit of Python will be referenced in some of the following comments:
118 *
119 * p = 3329
120 *
121 * def bitreverse(i):
122 * ret = 0
123 * for n in range(7):
124 * bit = i & 1
125 * ret <<= 1
126 * ret |= bit
127 * i >>= 1
128 * return ret
129 */
130
131/* kNTTRoots = [pow(17, bitreverse(i), p) for i in range(128)] */
132static const uint16_t kNTTRoots[128] = {
133 1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797,
134 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333,
135 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756,
136 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915,
137 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648,
138 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100,
139 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789,
140 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641,
141 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757,
142 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886,
143 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154,
144};
145
146/* kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] */
147static const uint16_t kInverseNTTRoots[128] = {
148 1, 1600, 40, 749, 2481, 1432, 2699, 687, 1583, 2760, 69, 543,
149 2532, 3136, 1410, 2267, 2508, 1355, 450, 936, 447, 2794, 1235, 1903,
150 1996, 1089, 3273, 283, 1853, 1990, 882, 3033, 2419, 2102, 219, 855,
151 2681, 1848, 712, 682, 927, 1795, 461, 1891, 2877, 2522, 1894, 1010,
152 1414, 2009, 3296, 464, 2697, 816, 1352, 2679, 1274, 1052, 1025, 2132,
153 1573, 76, 2998, 3040, 1175, 2444, 394, 1219, 2300, 1455, 2117, 1607,
154 2443, 554, 1179, 2186, 2303, 2926, 2237, 525, 735, 863, 2768, 1230,
155 2572, 556, 3010, 2266, 1684, 1239, 780, 2954, 109, 1292, 1031, 1745,
156 2688, 3061, 992, 2596, 941, 892, 1021, 2390, 642, 1868, 2377, 1482,
157 1540, 540, 1678, 1626, 279, 314, 1173, 2573, 3096, 48, 667, 1920,
158 2229, 1041, 2606, 1692, 680, 2746, 568, 3312,
159};
160
161/* kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] */
162static const uint16_t kModRoots[128] = {
163 17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606,
164 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096,
165 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678,
166 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642,
167 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992,
168 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109,
169 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010,
170 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735,
171 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179,
172 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300,
173 2110, 1219, 2935, 394, 885, 2444, 2154, 1175,
174};
175
176/* reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. */
177static uint16_t
178reduce_once(uint16_t x)
179{
180 assert(x < 2 * kPrime);
181 const uint16_t subtracted = x - kPrime;
182 uint16_t mask = 0u - (subtracted >> 15);
183 /*
184 * On Aarch64, omitting a |value_barrier_u16| results in a 2x speedup of
185 * ML-KEM overall and Clang still produces constant-time code using
186 * `csel`. On other platforms & compilers on godbolt that we care about,
187 * this code also produces constant-time output.
188 */
189 return (mask & x) | (~mask & subtracted);
190}
191
192/*
193 * constant time reduce x mod kPrime using Barrett reduction. x must be less
194 * than kPrime + 2×kPrime².
195 */
196static uint16_t
197reduce(uint32_t x)
198{
199 uint64_t product = (uint64_t)x * kBarrettMultiplier;
200 uint32_t quotient = (uint32_t)(product >> kBarrettShift);
201 uint32_t remainder = x - quotient * kPrime;
202
203 assert(x < kPrime + 2u * kPrime * kPrime);
204 return reduce_once(remainder);
205}
206
207static void
208scalar_zero(scalar *out)
209{
210 memset(out, 0, sizeof(*out));
211}
212
213static void
214vector_zero(vector *out)
215{
216 memset(out, 0, sizeof(*out));
217}
218
219/*
220 * In place number theoretic transform of a given scalar.
221 * Note that MLKEM's kPrime 3329 does not have a 512th root of unity, so this
222 * transform leaves off the last iteration of the usual FFT code, with the 128
223 * relevant roots of unity being stored in |kNTTRoots|. This means the output
224 * should be seen as 128 elements in GF(3329^2), with the coefficients of the
225 * elements being consecutive entries in |s->c|.
226 */
227static void
228scalar_ntt(scalar *s)
229{
230 int offset = DEGREE;
231 int step;
232 /*
233 * `int` is used here because using `size_t` throughout caused a ~5% slowdown
234 * with Clang 14 on Aarch64.
235 */
236 for (step = 1; step < DEGREE / 2; step <<= 1) {
237 int i, j, k = 0;
238
239 offset >>= 1;
240 for (i = 0; i < step; i++) {
241 const uint32_t step_root = kNTTRoots[i + step];
242
243 for (j = k; j < k + offset; j++) {
244 uint16_t odd, even;
245
246 odd = reduce(step_root * s->c[j + offset]);
247 even = s->c[j];
248 s->c[j] = reduce_once(odd + even);
249 s->c[j + offset] = reduce_once(even - odd +
250 kPrime);
251 }
252 k += 2 * offset;
253 }
254 }
255}
256
257static void
258vector_ntt(vector *a)
259{
260 int i;
261
262 for (i = 0; i < RANK768; i++) {
263 scalar_ntt(&a->v[i]);
264 }
265}
266
267/*
268 * In place inverse number theoretic transform of a given scalar, with pairs of
269 * entries of s->v being interpreted as elements of GF(3329^2). Just as with the
270 * number theoretic transform, this leaves off the first step of the normal iFFT
271 * to account for the fact that 3329 does not have a 512th root of unity, using
272 * the precomputed 128 roots of unity stored in |kInverseNTTRoots|.
273 */
274static void
275scalar_inverse_ntt(scalar *s)
276{
277 int i, j, k, offset, step = DEGREE / 2;
278
279 /*
280 * `int` is used here because using `size_t` throughout caused a ~5% slowdown
281 * with Clang 14 on Aarch64.
282 */
283 for (offset = 2; offset < DEGREE; offset <<= 1) {
284 step >>= 1;
285 k = 0;
286 for (i = 0; i < step; i++) {
287 uint32_t step_root = kInverseNTTRoots[i + step];
288 for (j = k; j < k + offset; j++) {
289 uint16_t odd, even;
290 odd = s->c[j + offset];
291 even = s->c[j];
292 s->c[j] = reduce_once(odd + even);
293 s->c[j + offset] = reduce(step_root *
294 (even - odd + kPrime));
295 }
296 k += 2 * offset;
297 }
298 }
299 for (i = 0; i < DEGREE; i++) {
300 s->c[i] = reduce(s->c[i] * kInverseDegree);
301 }
302}
303
304static void
305vector_inverse_ntt(vector *a)
306{
307 int i;
308
309 for (i = 0; i < RANK768; i++) {
310 scalar_inverse_ntt(&a->v[i]);
311 }
312}
313
314static void
315scalar_add(scalar *lhs, const scalar *rhs)
316{
317 int i;
318
319 for (i = 0; i < DEGREE; i++) {
320 lhs->c[i] = reduce_once(lhs->c[i] + rhs->c[i]);
321 }
322}
323
324static void
325scalar_sub(scalar *lhs, const scalar *rhs)
326{
327 int i;
328
329 for (i = 0; i < DEGREE; i++) {
330 lhs->c[i] = reduce_once(lhs->c[i] - rhs->c[i] + kPrime);
331 }
332}
333
334/*
335 * Multiplying two scalars in the number theoretically transformed state. Since
336 * 3329 does not have a 512th root of unity, this means we have to interpret
337 * the 2*ith and (2*i+1)th entries of the scalar as elements of GF(3329)[X]/(X^2
338 * - 17^(2*bitreverse(i)+1)) The value of 17^(2*bitreverse(i)+1) mod 3329 is
339 * stored in the precomputed |kModRoots| table. Note that our Barrett transform
340 * only allows us to multipy two reduced numbers together, so we need some
341 * intermediate reduction steps, even if an uint64_t could hold 3 multiplied
342 * numbers.
343 */
344static void
345scalar_mult(scalar *out, const scalar *lhs, const scalar *rhs)
346{
347 int i;
348
349 for (i = 0; i < DEGREE / 2; i++) {
350 uint32_t real_real = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i];
351 uint32_t img_img = (uint32_t)lhs->c[2 * i + 1] *
352 rhs->c[2 * i + 1];
353 uint32_t real_img = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i + 1];
354 uint32_t img_real = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i];
355
356 out->c[2 * i] =
357 reduce(real_real +
358 (uint32_t)reduce(img_img) * kModRoots[i]);
359 out->c[2 * i + 1] = reduce(img_real + real_img);
360 }
361}
362
363static void
364vector_add(vector *lhs, const vector *rhs)
365{
366 int i;
367
368 for (i = 0; i < RANK768; i++) {
369 scalar_add(&lhs->v[i], &rhs->v[i]);
370 }
371}
372
373static void
374matrix_mult(vector *out, const matrix *m, const vector *a)
375{
376 int i, j;
377
378 vector_zero(out);
379 for (i = 0; i < RANK768; i++) {
380 for (j = 0; j < RANK768; j++) {
381 scalar product;
382
383 scalar_mult(&product, &m->v[i][j], &a->v[j]);
384 scalar_add(&out->v[i], &product);
385 }
386 }
387}
388
389static void
390matrix_mult_transpose(vector *out, const matrix *m,
391 const vector *a)
392{
393 int i, j;
394
395 vector_zero(out);
396 for (i = 0; i < RANK768; i++) {
397 for (j = 0; j < RANK768; j++) {
398 scalar product;
399
400 scalar_mult(&product, &m->v[j][i], &a->v[j]);
401 scalar_add(&out->v[i], &product);
402 }
403 }
404}
405
406static void
407scalar_inner_product(scalar *out, const vector *lhs,
408 const vector *rhs)
409{
410 int i;
411 scalar_zero(out);
412 for (i = 0; i < RANK768; i++) {
413 scalar product;
414
415 scalar_mult(&product, &lhs->v[i], &rhs->v[i]);
416 scalar_add(out, &product);
417 }
418}
419
420/*
421 * Algorithm 6 of spec. Rejection samples a Keccak stream to get uniformly
422 * distributed elements. This is used for matrix expansion and only operates on
423 * public inputs.
424 */
425static void
426scalar_from_keccak_vartime(scalar *out, sha3_ctx *keccak_ctx)
427{
428 int i, done = 0;
429
430 while (done < DEGREE) {
431 uint8_t block[168];
432
433 shake_out(keccak_ctx, block, sizeof(block));
434 for (i = 0; i < sizeof(block) && done < DEGREE; i += 3) {
435 uint16_t d1 = block[i] + 256 * (block[i + 1] % 16);
436 uint16_t d2 = block[i + 1] / 16 + 16 * block[i + 2];
437
438 if (d1 < kPrime) {
439 out->c[done++] = d1;
440 }
441 if (d2 < kPrime && done < DEGREE) {
442 out->c[done++] = d2;
443 }
444 }
445 }
446}
447
448/*
449 * Algorithm 7 of the spec, with eta fixed to two and the PRF call
450 * included. Creates binominally distributed elements by sampling 2*|eta| bits,
451 * and setting the coefficient to the count of the first bits minus the count of
452 * the second bits, resulting in a centered binomial distribution. Since eta is
453 * two this gives -2/2 with a probability of 1/16, -1/1 with probability 1/4,
454 * and 0 with probability 3/8.
455 */
456static void
457scalar_centered_binomial_distribution_eta_2_with_prf(scalar *out,
458 const uint8_t input[33])
459{
460 uint8_t entropy[128];
461 int i;
462
463 CTASSERT(sizeof(entropy) == 2 * /*kEta=*/ 2 * DEGREE / 8);
464 prf(entropy, sizeof(entropy), input);
465
466 for (i = 0; i < DEGREE; i += 2) {
467 uint8_t byte = entropy[i / 2];
468 uint16_t value = kPrime;
469
470 value += (byte & 1) + ((byte >> 1) & 1);
471 value -= ((byte >> 2) & 1) + ((byte >> 3) & 1);
472 out->c[i] = reduce_once(value);
473
474 byte >>= 4;
475 value = kPrime;
476 value += (byte & 1) + ((byte >> 1) & 1);
477 value -= ((byte >> 2) & 1) + ((byte >> 3) & 1);
478 out->c[i + 1] = reduce_once(value);
479 }
480}
481
482/*
483 * Generates a secret vector by using
484 * |scalar_centered_binomial_distribution_eta_2_with_prf|, using the given seed
485 * appending and incrementing |counter| for entry of the vector.
486 */
487static void
488vector_generate_secret_eta_2(vector *out, uint8_t *counter,
489 const uint8_t seed[32])
490{
491 uint8_t input[33];
492 int i;
493
494 memcpy(input, seed, 32);
495 for (i = 0; i < RANK768; i++) {
496 input[32] = (*counter)++;
497 scalar_centered_binomial_distribution_eta_2_with_prf(&out->v[i],
498 input);
499 }
500}
501
502/* Expands the matrix of a seed for key generation and for encaps-CPA. */
503static void
504matrix_expand(matrix *out, const uint8_t rho[32])
505{
506 uint8_t input[34];
507 int i, j;
508
509 memcpy(input, rho, 32);
510 for (i = 0; i < RANK768; i++) {
511 for (j = 0; j < RANK768; j++) {
512 sha3_ctx keccak_ctx;
513
514 input[32] = i;
515 input[33] = j;
516 shake128_init(&keccak_ctx);
517 shake_update(&keccak_ctx, input, sizeof(input));
518 shake_xof(&keccak_ctx);
519 scalar_from_keccak_vartime(&out->v[i][j], &keccak_ctx);
520 }
521 }
522}
523
524static const uint8_t kMasks[8] = {0x01, 0x03, 0x07, 0x0f,
525 0x1f, 0x3f, 0x7f, 0xff};
526
527static void
528scalar_encode(uint8_t *out, const scalar *s, int bits)
529{
530 uint8_t out_byte = 0;
531 int i, out_byte_bits = 0;
532
533 assert(bits <= (int)sizeof(*s->c) * 8 && bits != 1);
534 for (i = 0; i < DEGREE; i++) {
535 uint16_t element = s->c[i];
536 int element_bits_done = 0;
537
538 while (element_bits_done < bits) {
539 int chunk_bits = bits - element_bits_done;
540 int out_bits_remaining = 8 - out_byte_bits;
541
542 if (chunk_bits >= out_bits_remaining) {
543 chunk_bits = out_bits_remaining;
544 out_byte |= (element &
545 kMasks[chunk_bits - 1]) << out_byte_bits;
546 *out = out_byte;
547 out++;
548 out_byte_bits = 0;
549 out_byte = 0;
550 } else {
551 out_byte |= (element &
552 kMasks[chunk_bits - 1]) << out_byte_bits;
553 out_byte_bits += chunk_bits;
554 }
555
556 element_bits_done += chunk_bits;
557 element >>= chunk_bits;
558 }
559 }
560
561 if (out_byte_bits > 0) {
562 *out = out_byte;
563 }
564}
565
566/* scalar_encode_1 is |scalar_encode| specialised for |bits| == 1. */
567static void
568scalar_encode_1(uint8_t out[32], const scalar *s)
569{
570 int i, j;
571
572 for (i = 0; i < DEGREE; i += 8) {
573 uint8_t out_byte = 0;
574
575 for (j = 0; j < 8; j++) {
576 out_byte |= (s->c[i + j] & 1) << j;
577 }
578 *out = out_byte;
579 out++;
580 }
581}
582
583/*
584 * Encodes an entire vector into 32*|RANK768|*|bits| bytes. Note that since 256
585 * (DEGREE) is divisible by 8, the individual vector entries will always fill a
586 * whole number of bytes, so we do not need to worry about bit packing here.
587 */
588static void
589vector_encode(uint8_t *out, const vector *a, int bits)
590{
591 int i;
592
593 for (i = 0; i < RANK768; i++) {
594 scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits);
595 }
596}
597
598/*
599 * scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in
600 * |out|. It returns one on success and zero if any parsed value is >=
601 * |kPrime|.
602 */
603static int
604scalar_decode(scalar *out, const uint8_t *in, int bits)
605{
606 uint8_t in_byte = 0;
607 int i, in_byte_bits_left = 0;
608
609 assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1);
610
611 for (i = 0; i < DEGREE; i++) {
612 uint16_t element = 0;
613 int element_bits_done = 0;
614
615 while (element_bits_done < bits) {
616 int chunk_bits = bits - element_bits_done;
617
618 if (in_byte_bits_left == 0) {
619 in_byte = *in;
620 in++;
621 in_byte_bits_left = 8;
622 }
623
624 if (chunk_bits > in_byte_bits_left) {
625 chunk_bits = in_byte_bits_left;
626 }
627
628 element |= (in_byte & kMasks[chunk_bits - 1]) <<
629 element_bits_done;
630 in_byte_bits_left -= chunk_bits;
631 in_byte >>= chunk_bits;
632
633 element_bits_done += chunk_bits;
634 }
635
636 if (element >= kPrime) {
637 return 0;
638 }
639 out->c[i] = element;
640 }
641
642 return 1;
643}
644
645/* scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. */
646static void
647scalar_decode_1(scalar *out, const uint8_t in[32])
648{
649 int i, j;
650
651 for (i = 0; i < DEGREE; i += 8) {
652 uint8_t in_byte = *in;
653
654 in++;
655 for (j = 0; j < 8; j++) {
656 out->c[i + j] = in_byte & 1;
657 in_byte >>= 1;
658 }
659 }
660}
661
662/*
663 * Decodes 32*|RANK768|*|bits| bytes from |in| into |out|. It returns one on
664 * success or zero if any parsed value is >= |kPrime|.
665 */
666static int
667vector_decode(vector *out, const uint8_t *in, int bits)
668{
669 int i;
670
671 for (i = 0; i < RANK768; i++) {
672 if (!scalar_decode(&out->v[i], in + i * bits * DEGREE / 8,
673 bits)) {
674 return 0;
675 }
676 }
677 return 1;
678}
679
680/*
681 * Compresses (lossily) an input |x| mod 3329 into |bits| many bits by grouping
682 * numbers close to each other together. The formula used is
683 * round(2^|bits|/kPrime*x) mod 2^|bits|.
684 * Uses Barrett reduction to achieve constant time. Since we need both the
685 * remainder (for rounding) and the quotient (as the result), we cannot use
686 * |reduce| here, but need to do the Barrett reduction directly.
687 */
688static uint16_t
689compress(uint16_t x, int bits)
690{
691 uint32_t shifted = (uint32_t)x << bits;
692 uint64_t product = (uint64_t)shifted * kBarrettMultiplier;
693 uint32_t quotient = (uint32_t)(product >> kBarrettShift);
694 uint32_t remainder = shifted - quotient * kPrime;
695
696 /*
697 * Adjust the quotient to round correctly:
698 * 0 <= remainder <= kHalfPrime round to 0
699 * kHalfPrime < remainder <= kPrime + kHalfPrime round to 1
700 * kPrime + kHalfPrime < remainder < 2 * kPrime round to 2
701 */
702 assert(remainder < 2u * kPrime);
703 quotient += 1 & constant_time_lt(kHalfPrime, remainder);
704 quotient += 1 & constant_time_lt(kPrime + kHalfPrime, remainder);
705 return quotient & ((1 << bits) - 1);
706}
707
708/*
709 * Decompresses |x| by using an equi-distant representative. The formula is
710 * round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to
711 * implement this logic using only bit operations.
712 */
713static uint16_t
714decompress(uint16_t x, int bits)
715{
716 uint32_t product = (uint32_t)x * kPrime;
717 uint32_t power = 1 << bits;
718 /* This is |product| % power, since |power| is a power of 2. */
719 uint32_t remainder = product & (power - 1);
720 /* This is |product| / power, since |power| is a power of 2. */
721 uint32_t lower = product >> bits;
722
723 /*
724 * The rounding logic works since the first half of numbers mod |power| have a
725 * 0 as first bit, and the second half has a 1 as first bit, since |power| is
726 * a power of 2. As a 12 bit number, |remainder| is always positive, so we
727 * will shift in 0s for a right shift.
728 */
729 return lower + (remainder >> (bits - 1));
730}
731
732static void
733scalar_compress(scalar *s, int bits)
734{
735 int i;
736
737 for (i = 0; i < DEGREE; i++) {
738 s->c[i] = compress(s->c[i], bits);
739 }
740}
741
742static void
743scalar_decompress(scalar *s, int bits)
744{
745 int i;
746
747 for (i = 0; i < DEGREE; i++) {
748 s->c[i] = decompress(s->c[i], bits);
749 }
750}
751
752static void
753vector_compress(vector *a, int bits)
754{
755 int i;
756
757 for (i = 0; i < RANK768; i++) {
758 scalar_compress(&a->v[i], bits);
759 }
760}
761
762static void
763vector_decompress(vector *a, int bits)
764{
765 int i;
766
767 for (i = 0; i < RANK768; i++) {
768 scalar_decompress(&a->v[i], bits);
769 }
770}
771
772struct public_key {
773 vector t;
774 uint8_t rho[32];
775 uint8_t public_key_hash[32];
776 matrix m;
777};
778
779static struct public_key *
780public_key_768_from_external(const struct MLKEM768_public_key *external)
781{
782 return (struct public_key *)external;
783}
784
785struct private_key {
786 struct public_key pub;
787 vector s;
788 uint8_t fo_failure_secret[32];
789};
790
791static struct private_key *
792private_key_768_from_external(const struct MLKEM768_private_key *external)
793{
794 return (struct private_key *)external;
795}
796
797/*
798 * Calls |MLKEM768_generate_key_external_entropy| with random bytes from
799 * |RAND_bytes|.
800 */
801void
802MLKEM768_generate_key(uint8_t out_encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES],
803 uint8_t optional_out_seed[MLKEM_SEED_BYTES],
804 struct MLKEM768_private_key *out_private_key)
805{
806 uint8_t entropy_buf[MLKEM_SEED_BYTES];
807 uint8_t *entropy = optional_out_seed != NULL ? optional_out_seed :
808 entropy_buf;
809
810 arc4random_buf(entropy, MLKEM_SEED_BYTES);
811 MLKEM768_generate_key_external_entropy(out_encoded_public_key,
812 out_private_key, entropy);
813}
814LCRYPTO_ALIAS(MLKEM768_generate_key);
815
816int
817MLKEM768_private_key_from_seed(struct MLKEM768_private_key *out_private_key,
818 const uint8_t *seed, size_t seed_len)
819{
820 if (seed_len != MLKEM_SEED_BYTES) {
821 return 0;
822 }
823 uint8_t public_key_bytes[MLKEM768_PUBLIC_KEY_BYTES];
824 MLKEM768_generate_key_external_entropy(public_key_bytes,
825 out_private_key, seed);
826 return 1;
827}
828LCRYPTO_ALIAS(MLKEM768_private_key_from_seed);
829
830static int
831mlkem_marshal_public_key(CBB *out, const struct public_key *pub)
832{
833 uint8_t *vector_output;
834
835 if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) {
836 return 0;
837 }
838 vector_encode(vector_output, &pub->t, kLog2Prime);
839 if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) {
840 return 0;
841 }
842 return 1;
843}
844
845void
846MLKEM768_generate_key_external_entropy(
847 uint8_t out_encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES],
848 struct MLKEM768_private_key *out_private_key,
849 const uint8_t entropy[MLKEM_SEED_BYTES])
850{
851 struct private_key *priv = private_key_768_from_external(
852 out_private_key);
853 uint8_t augmented_seed[33];
854 uint8_t *rho, *sigma;
855 uint8_t counter = 0;
856 uint8_t hashed[64];
857 vector error;
858 CBB cbb;
859
860 memcpy(augmented_seed, entropy, 32);
861 augmented_seed[32] = RANK768;
862 hash_g(hashed, augmented_seed, 33);
863 rho = hashed;
864 sigma = hashed + 32;
865 memcpy(priv->pub.rho, hashed, sizeof(priv->pub.rho));
866 matrix_expand(&priv->pub.m, rho);
867 vector_generate_secret_eta_2(&priv->s, &counter, sigma);
868 vector_ntt(&priv->s);
869 vector_generate_secret_eta_2(&error, &counter, sigma);
870 vector_ntt(&error);
871 matrix_mult_transpose(&priv->pub.t, &priv->pub.m, &priv->s);
872 vector_add(&priv->pub.t, &error);
873
874 CBB_init_fixed(&cbb, out_encoded_public_key, MLKEM768_PUBLIC_KEY_BYTES);
875 if (!mlkem_marshal_public_key(&cbb, &priv->pub)) {
876 abort();
877 }
878
879 hash_h(priv->pub.public_key_hash, out_encoded_public_key,
880 MLKEM768_PUBLIC_KEY_BYTES);
881 memcpy(priv->fo_failure_secret, entropy + 32, 32);
882}
883
884void
885MLKEM768_public_from_private(struct MLKEM768_public_key *out_public_key,
886 const struct MLKEM768_private_key *private_key)
887{
888 struct public_key *const pub = public_key_768_from_external(
889 out_public_key);
890 const struct private_key *const priv = private_key_768_from_external(
891 private_key);
892
893 *pub = priv->pub;
894}
895LCRYPTO_ALIAS(MLKEM768_public_from_private);
896
897/*
898 * Encrypts a message with given randomness to the ciphertext in |out|. Without
899 * applying the Fujisaki-Okamoto transform this would not result in a CCA secure
900 * scheme, since lattice schemes are vulnerable to decryption failure oracles.
901 */
902static void
903encrypt_cpa(uint8_t out[MLKEM768_CIPHERTEXT_BYTES],
904 const struct public_key *pub, const uint8_t message[32],
905 const uint8_t randomness[32])
906{
907 scalar expanded_message, scalar_error;
908 vector secret, error, u;
909 uint8_t counter = 0;
910 uint8_t input[33];
911 scalar v;
912
913 vector_generate_secret_eta_2(&secret, &counter, randomness);
914 vector_ntt(&secret);
915 vector_generate_secret_eta_2(&error, &counter, randomness);
916 memcpy(input, randomness, 32);
917 input[32] = counter;
918 scalar_centered_binomial_distribution_eta_2_with_prf(&scalar_error,
919 input);
920 matrix_mult(&u, &pub->m, &secret);
921 vector_inverse_ntt(&u);
922 vector_add(&u, &error);
923 scalar_inner_product(&v, &pub->t, &secret);
924 scalar_inverse_ntt(&v);
925 scalar_add(&v, &scalar_error);
926 scalar_decode_1(&expanded_message, message);
927 scalar_decompress(&expanded_message, 1);
928 scalar_add(&v, &expanded_message);
929 vector_compress(&u, kDU768);
930 vector_encode(out, &u, kDU768);
931 scalar_compress(&v, kDV768);
932 scalar_encode(out + kCompressedVectorSize, &v, kDV768);
933}
934
935/* Calls MLKEM768_encap_external_entropy| with random bytes */
936void
937MLKEM768_encap(uint8_t out_ciphertext[MLKEM768_CIPHERTEXT_BYTES],
938 uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
939 const struct MLKEM768_public_key *public_key)
940{
941 uint8_t entropy[MLKEM_ENCAP_ENTROPY];
942
943 arc4random_buf(entropy, MLKEM_ENCAP_ENTROPY);
944 MLKEM768_encap_external_entropy(out_ciphertext, out_shared_secret,
945 public_key, entropy);
946}
947LCRYPTO_ALIAS(MLKEM768_encap);
948
949/* See section 6.2 of the spec. */
950void
951MLKEM768_encap_external_entropy(
952 uint8_t out_ciphertext[MLKEM768_CIPHERTEXT_BYTES],
953 uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
954 const struct MLKEM768_public_key *public_key,
955 const uint8_t entropy[MLKEM_ENCAP_ENTROPY])
956{
957 const struct public_key *pub = public_key_768_from_external(public_key);
958 uint8_t key_and_randomness[64];
959 uint8_t input[64];
960
961 memcpy(input, entropy, MLKEM_ENCAP_ENTROPY);
962 memcpy(input + MLKEM_ENCAP_ENTROPY, pub->public_key_hash,
963 sizeof(input) - MLKEM_ENCAP_ENTROPY);
964 hash_g(key_and_randomness, input, sizeof(input));
965 encrypt_cpa(out_ciphertext, pub, entropy, key_and_randomness + 32);
966 memcpy(out_shared_secret, key_and_randomness, 32);
967}
968
969static void
970decrypt_cpa(uint8_t out[32], const struct private_key *priv,
971 const uint8_t ciphertext[MLKEM768_CIPHERTEXT_BYTES])
972{
973 scalar mask, v;
974 vector u;
975
976 vector_decode(&u, ciphertext, kDU768);
977 vector_decompress(&u, kDU768);
978 vector_ntt(&u);
979 scalar_decode(&v, ciphertext + kCompressedVectorSize, kDV768);
980 scalar_decompress(&v, kDV768);
981 scalar_inner_product(&mask, &priv->s, &u);
982 scalar_inverse_ntt(&mask);
983 scalar_sub(&v, &mask);
984 scalar_compress(&v, 1);
985 scalar_encode_1(out, &v);
986}
987
988/* See section 6.3 */
989int
990MLKEM768_decap(uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
991 const uint8_t *ciphertext, size_t ciphertext_len,
992 const struct MLKEM768_private_key *private_key)
993{
994 const struct private_key *priv = private_key_768_from_external(
995 private_key);
996 uint8_t expected_ciphertext[MLKEM768_CIPHERTEXT_BYTES];
997 uint8_t key_and_randomness[64];
998 uint8_t failure_key[32];
999 uint8_t decrypted[64];
1000 uint8_t mask;
1001 int i;
1002
1003 if (ciphertext_len != MLKEM768_CIPHERTEXT_BYTES) {
1004 arc4random_buf(out_shared_secret, MLKEM_SHARED_SECRET_BYTES);
1005 return 0;
1006 }
1007
1008 decrypt_cpa(decrypted, priv, ciphertext);
1009 memcpy(decrypted + 32, priv->pub.public_key_hash,
1010 sizeof(decrypted) - 32);
1011 hash_g(key_and_randomness, decrypted, sizeof(decrypted));
1012 encrypt_cpa(expected_ciphertext, &priv->pub, decrypted,
1013 key_and_randomness + 32);
1014 kdf(failure_key, priv->fo_failure_secret, ciphertext, ciphertext_len);
1015 mask = constant_time_eq_int_8(memcmp(ciphertext, expected_ciphertext,
1016 sizeof(expected_ciphertext)), 0);
1017 for (i = 0; i < MLKEM_SHARED_SECRET_BYTES; i++) {
1018 out_shared_secret[i] = constant_time_select_8(mask,
1019 key_and_randomness[i], failure_key[i]);
1020 }
1021
1022 return 1;
1023}
1024LCRYPTO_ALIAS(MLKEM768_decap);
1025
1026int
1027MLKEM768_marshal_public_key(CBB *out,
1028 const struct MLKEM768_public_key *public_key)
1029{
1030 return mlkem_marshal_public_key(out,
1031 public_key_768_from_external(public_key));
1032}
1033LCRYPTO_ALIAS(MLKEM768_marshal_public_key);
1034
1035/*
1036 * mlkem_parse_public_key_no_hash parses |in| into |pub| but doesn't calculate
1037 * the value of |pub->public_key_hash|.
1038 */
1039static int
1040mlkem_parse_public_key_no_hash(struct public_key *pub, CBS *in)
1041{
1042 CBS t_bytes;
1043
1044 if (!CBS_get_bytes(in, &t_bytes, kEncodedVectorSize) ||
1045 !vector_decode(&pub->t, CBS_data(&t_bytes), kLog2Prime)) {
1046 return 0;
1047 }
1048 memcpy(pub->rho, CBS_data(in), sizeof(pub->rho));
1049 if (!CBS_skip(in, sizeof(pub->rho)))
1050 return 0;
1051 matrix_expand(&pub->m, pub->rho);
1052 return 1;
1053}
1054
1055int
1056MLKEM768_parse_public_key(struct MLKEM768_public_key *public_key, CBS *in)
1057{
1058 struct public_key *pub = public_key_768_from_external(public_key);
1059 CBS orig_in = *in;
1060
1061 if (!mlkem_parse_public_key_no_hash(pub, in) ||
1062 CBS_len(in) != 0) {
1063 return 0;
1064 }
1065 hash_h(pub->public_key_hash, CBS_data(&orig_in), CBS_len(&orig_in));
1066 return 1;
1067}
1068LCRYPTO_ALIAS(MLKEM768_parse_public_key);
1069
1070int
1071MLKEM768_marshal_private_key(CBB *out,
1072 const struct MLKEM768_private_key *private_key)
1073{
1074 const struct private_key *const priv = private_key_768_from_external(
1075 private_key);
1076 uint8_t *s_output;
1077
1078 if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) {
1079 return 0;
1080 }
1081 vector_encode(s_output, &priv->s, kLog2Prime);
1082 if (!mlkem_marshal_public_key(out, &priv->pub) ||
1083 !CBB_add_bytes(out, priv->pub.public_key_hash,
1084 sizeof(priv->pub.public_key_hash)) ||
1085 !CBB_add_bytes(out, priv->fo_failure_secret,
1086 sizeof(priv->fo_failure_secret))) {
1087 return 0;
1088 }
1089 return 1;
1090}
1091
1092int
1093MLKEM768_parse_private_key(struct MLKEM768_private_key *out_private_key,
1094 CBS *in)
1095{
1096 struct private_key *const priv = private_key_768_from_external(
1097 out_private_key);
1098 CBS s_bytes;
1099
1100 if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) ||
1101 !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) ||
1102 !mlkem_parse_public_key_no_hash(&priv->pub, in)) {
1103 return 0;
1104 }
1105 memcpy(priv->pub.public_key_hash, CBS_data(in),
1106 sizeof(priv->pub.public_key_hash));
1107 if (!CBS_skip(in, sizeof(priv->pub.public_key_hash)))
1108 return 0;
1109 memcpy(priv->fo_failure_secret, CBS_data(in),
1110 sizeof(priv->fo_failure_secret));
1111 if (!CBS_skip(in, sizeof(priv->fo_failure_secret)))
1112 return 0;
1113 if (CBS_len(in) != 0)
1114 return 0;
1115
1116 return 1;
1117}
1118LCRYPTO_ALIAS(MLKEM768_parse_private_key);
diff --git a/src/lib/libcrypto/mlkem/mlkem_internal.h b/src/lib/libcrypto/mlkem/mlkem_internal.h
new file mode 100644
index 0000000000..3ef877f6d1
--- /dev/null
+++ b/src/lib/libcrypto/mlkem/mlkem_internal.h
@@ -0,0 +1,78 @@
1/* Copyright (c) 2023, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15#ifndef OPENSSL_HEADER_CRYPTO_MLKEM_INTERNAL_H
16#define OPENSSL_HEADER_CRYPTO_MLKEM_INTERNAL_H
17
18#include <openssl/mlkem.h>
19
20#if defined(__cplusplus)
21extern "C" {
22#endif
23
24__BEGIN_HIDDEN_DECLS
25
26/*
27 * MLKEM_ENCAP_ENTROPY is the number of bytes of uniformly random entropy
28 * necessary to encapsulate a secret. The entropy will be leaked to the
29 * decapsulating party.
30 */
31#define MLKEM_ENCAP_ENTROPY 32
32
33/*
34 * MLKEM768_generate_key_external_entropy is a deterministic function to create a
35 * pair of ML-KEM 768 keys, using the supplied entropy. The entropy needs to be
36 * uniformly random generated. This function is should only be used for tests,
37 * regular callers should use the non-deterministic |MLKEM_generate_key|
38 * directly.
39 */
40void MLKEM768_generate_key_external_entropy(
41 uint8_t out_encoded_public_key[MLKEM768_PUBLIC_KEY_BYTES],
42 struct MLKEM768_private_key *out_private_key,
43 const uint8_t entropy[MLKEM_SEED_BYTES]);
44
45/*
46 * MLKEM768_PRIVATE_KEY_BYTES is the length of the data produced by
47 * |MLKEM768_marshal_private_key|.
48 */
49#define MLKEM768_PRIVATE_KEY_BYTES 2400
50
51/*
52 * MLKEM768_marshal_private_key serializes |private_key| to |out| in the standard
53 * format for ML-KEM private keys. It returns one on success or zero on
54 * allocation error.
55 */
56int MLKEM768_marshal_private_key(CBB *out,
57 const struct MLKEM768_private_key *private_key);
58
59/*
60 * MLKEM_encap_external_entropy behaves like |MLKEM_encap|, but uses
61 * |MLKEM_ENCAP_ENTROPY| bytes of |entropy| for randomization. The decapsulating
62 * side will be able to recover |entropy| in full. This function should only be
63 * used for tests, regular callers should use the non-deterministic
64 * |MLKEM_encap| directly.
65 */
66void MLKEM768_encap_external_entropy(
67 uint8_t out_ciphertext[MLKEM768_CIPHERTEXT_BYTES],
68 uint8_t out_shared_secret[MLKEM_SHARED_SECRET_BYTES],
69 const struct MLKEM768_public_key *public_key,
70 const uint8_t entropy[MLKEM_ENCAP_ENTROPY]);
71
72__END_HIDDEN_DECLS
73
74#if defined(__cplusplus)
75}
76#endif
77
78#endif /* OPENSSL_HEADER_CRYPTO_MLKEM_INTERNAL_H */