summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2024-03-27 08:24:13 +0000
committerjsing <>2024-03-27 08:24:13 +0000
commitdcbf0a4f999b6c933d8a3b910293af515367fae3 (patch)
treed2d10a0d0b90a02caab0f6dcd2d81c8eac19605a
parentddd9b2994f4684a973bc70f1ebcfc1cb05c2c215 (diff)
downloadopenbsd-dcbf0a4f999b6c933d8a3b910293af515367fae3.tar.gz
openbsd-dcbf0a4f999b6c933d8a3b910293af515367fae3.tar.bz2
openbsd-dcbf0a4f999b6c933d8a3b910293af515367fae3.zip
Remove near duplicate AES_set_{encrypt,decrypt}_key() functions.
There are currently three ways in which AES is implemented - all in assembly (amd64 et al), all in C (aarch64 et al) and, half in C and half in assembly (hppa and sparc64). The last of these cases currently makes use of a near duplicate AES_set_{encrypt,decrypt}_key() implementation that avoids using the AES tables. Remove the near duplicate version and if only a half assembly version is implemented, use the same C version of AES_set_{encrypt,decrypt}_key() as everyone else. This adds around 8KB of rodata to libcrypto on these two platforms. Discussed with beck and tb.
-rw-r--r--src/lib/libcrypto/aes/aes_core.c210
1 files changed, 2 insertions, 208 deletions
diff --git a/src/lib/libcrypto/aes/aes_core.c b/src/lib/libcrypto/aes/aes_core.c
index 9ec84a5c82..6449ca7cfa 100644
--- a/src/lib/libcrypto/aes/aes_core.c
+++ b/src/lib/libcrypto/aes/aes_core.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: aes_core.c,v 1.17 2024/03/27 06:51:59 jsing Exp $ */ 1/* $OpenBSD: aes_core.c,v 1.18 2024/03/27 08:24:13 jsing Exp $ */
2/** 2/**
3 * rijndael-alg-fst.c 3 * rijndael-alg-fst.c
4 * 4 *
@@ -37,7 +37,6 @@
37#include "aes_local.h" 37#include "aes_local.h"
38#include "crypto_internal.h" 38#include "crypto_internal.h"
39 39
40#ifndef AES_ASM
41/* 40/*
42Te0[x] = S [x].[02, 01, 01, 03]; 41Te0[x] = S [x].[02, 01, 01, 03];
43Te1[x] = S [x].[03, 02, 01, 01]; 42Te1[x] = S [x].[03, 02, 01, 01];
@@ -780,6 +779,7 @@ AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
780 return 0; 779 return 0;
781} 780}
782 781
782#ifndef AES_ASM
783/* 783/*
784 * Encrypt a single block 784 * Encrypt a single block
785 * in and out can overlap 785 * in and out can overlap
@@ -1159,210 +1159,4 @@ AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
1159 rk[3]; 1159 rk[3];
1160 PUTU32(out + 12, s3); 1160 PUTU32(out + 12, s3);
1161} 1161}
1162
1163#else /* AES_ASM */
1164
1165static const u8 Te4[256] = {
1166 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1167 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1168 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1169 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1170 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1171 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1172 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1173 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1174 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1175 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1176 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1177 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1178 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1179 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1180 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1181 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1182 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1183 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1184 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1185 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1186 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1187 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1188 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1189 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1190 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1191 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1192 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1193 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1194 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1195 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1196 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1197 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1198};
1199static const u32 rcon[] = {
1200 0x01000000, 0x02000000, 0x04000000, 0x08000000,
1201 0x10000000, 0x20000000, 0x40000000, 0x80000000,
1202 0x1B000000, 0x36000000,
1203 /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1204};
1205
1206/**
1207 * Expand the cipher key into the encryption key schedule.
1208 */
1209int
1210AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
1211{
1212 u32 *rk;
1213 int i = 0;
1214 u32 temp;
1215
1216 if (!userKey || !key)
1217 return -1;
1218 if (bits != 128 && bits != 192 && bits != 256)
1219 return -2;
1220
1221 rk = key->rd_key;
1222
1223 if (bits == 128)
1224 key->rounds = 10;
1225 else if (bits == 192)
1226 key->rounds = 12;
1227 else
1228 key->rounds = 14;
1229
1230 rk[0] = GETU32(userKey);
1231 rk[1] = GETU32(userKey + 4);
1232 rk[2] = GETU32(userKey + 8);
1233 rk[3] = GETU32(userKey + 12);
1234 if (bits == 128) {
1235 while (1) {
1236 temp = rk[3];
1237 rk[4] = rk[0] ^
1238 (Te4[(temp >> 16) & 0xff] << 24) ^
1239 (Te4[(temp >> 8) & 0xff] << 16) ^
1240 (Te4[(temp) & 0xff] << 8) ^
1241 (Te4[(temp >> 24)]) ^
1242 rcon[i];
1243 rk[5] = rk[1] ^ rk[4];
1244 rk[6] = rk[2] ^ rk[5];
1245 rk[7] = rk[3] ^ rk[6];
1246 if (++i == 10) {
1247 return 0;
1248 }
1249 rk += 4;
1250 }
1251 }
1252 rk[4] = GETU32(userKey + 16);
1253 rk[5] = GETU32(userKey + 20);
1254 if (bits == 192) {
1255 while (1) {
1256 temp = rk[5];
1257 rk[6] = rk[0] ^
1258 (Te4[(temp >> 16) & 0xff] << 24) ^
1259 (Te4[(temp >> 8) & 0xff] << 16) ^
1260 (Te4[(temp) & 0xff] << 8) ^
1261 (Te4[(temp >> 24)]) ^
1262 rcon[i];
1263 rk[7] = rk[1] ^ rk[6];
1264 rk[8] = rk[2] ^ rk[7];
1265 rk[9] = rk[3] ^ rk[8];
1266 if (++i == 8) {
1267 return 0;
1268 }
1269 rk[10] = rk[4] ^ rk[9];
1270 rk[11] = rk[5] ^ rk[10];
1271 rk += 6;
1272 }
1273 }
1274 rk[6] = GETU32(userKey + 24);
1275 rk[7] = GETU32(userKey + 28);
1276 if (bits == 256) {
1277 while (1) {
1278 temp = rk[7];
1279 rk[8] = rk[0] ^
1280 (Te4[(temp >> 16) & 0xff] << 24) ^
1281 (Te4[(temp >> 8) & 0xff] << 16) ^
1282 (Te4[(temp) & 0xff] << 8) ^
1283 (Te4[(temp >> 24)]) ^
1284 rcon[i];
1285 rk[9] = rk[1] ^ rk[8];
1286 rk[10] = rk[2] ^ rk[9];
1287 rk[11] = rk[3] ^ rk[10];
1288 if (++i == 7) {
1289 return 0;
1290 }
1291 temp = rk[11];
1292 rk[12] = rk[4] ^
1293 (Te4[(temp >> 24)] << 24) ^
1294 (Te4[(temp >> 16) & 0xff] << 16) ^
1295 (Te4[(temp >> 8) & 0xff] << 8) ^
1296 (Te4[(temp) & 0xff]);
1297 rk[13] = rk[5] ^ rk[12];
1298 rk[14] = rk[6] ^ rk[13];
1299 rk[15] = rk[7] ^ rk[14];
1300
1301 rk += 8;
1302 }
1303 }
1304 return 0;
1305}
1306
1307/**
1308 * Expand the cipher key into the decryption key schedule.
1309 */
1310int
1311AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1312 AES_KEY *key)
1313{
1314 u32 *rk;
1315 int i, j, status;
1316 u32 temp;
1317
1318 /* first, start with an encryption schedule */
1319 status = AES_set_encrypt_key(userKey, bits, key);
1320 if (status < 0)
1321 return status;
1322
1323 rk = key->rd_key;
1324
1325 /* invert the order of the round keys: */
1326 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1327 temp = rk[i];
1328 rk[i] = rk[j];
1329 rk[j] = temp;
1330 temp = rk[i + 1];
1331 rk[i + 1] = rk[j + 1];
1332 rk[j + 1] = temp;
1333 temp = rk[i + 2];
1334 rk[i + 2] = rk[j + 2];
1335 rk[j + 2] = temp;
1336 temp = rk[i + 3];
1337 rk[i + 3] = rk[j + 3];
1338 rk[j + 3] = temp;
1339 }
1340 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1341 for (i = 1; i < (key->rounds); i++) {
1342 rk += 4;
1343 for (j = 0; j < 4; j++) {
1344 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1345
1346 tp1 = rk[j];
1347 m = tp1 & 0x80808080;
1348 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1349 ((m - (m >> 7)) & 0x1b1b1b1b);
1350 m = tp2 & 0x80808080;
1351 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1352 ((m - (m >> 7)) & 0x1b1b1b1b);
1353 m = tp4 & 0x80808080;
1354 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1355 ((m - (m >> 7)) & 0x1b1b1b1b);
1356 tp9 = tp8 ^ tp1;
1357 tpb = tp9 ^ tp2;
1358 tpd = tp9 ^ tp4;
1359 tpe = tp8 ^ tp4 ^ tp2;
1360
1361 rk[j] = tpe ^ crypto_rol_u32(tpd, 16) ^
1362 crypto_rol_u32(tp9, 24) ^ crypto_rol_u32(tpb, 8);
1363 }
1364 }
1365 return 0;
1366}
1367
1368#endif /* AES_ASM */ 1162#endif /* AES_ASM */