diff options
Diffstat (limited to 'src/lib/libcrypto/engine/eng_padlock.c')
| -rw-r--r-- | src/lib/libcrypto/engine/eng_padlock.c | 1219 |
1 files changed, 1219 insertions, 0 deletions
diff --git a/src/lib/libcrypto/engine/eng_padlock.c b/src/lib/libcrypto/engine/eng_padlock.c new file mode 100644 index 0000000000..743558ab33 --- /dev/null +++ b/src/lib/libcrypto/engine/eng_padlock.c | |||
| @@ -0,0 +1,1219 @@ | |||
| 1 | /* | ||
| 2 | * Support for VIA PadLock Advanced Cryptography Engine (ACE) | ||
| 3 | * Written by Michal Ludvig <michal@logix.cz> | ||
| 4 | * http://www.logix.cz/michal | ||
| 5 | * | ||
| 6 | * Big thanks to Andy Polyakov for a help with optimization, | ||
| 7 | * assembler fixes, port to MS Windows and a lot of other | ||
| 8 | * valuable work on this engine! | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* ==================================================================== | ||
| 12 | * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. | ||
| 13 | * | ||
| 14 | * Redistribution and use in source and binary forms, with or without | ||
| 15 | * modification, are permitted provided that the following conditions | ||
| 16 | * are met: | ||
| 17 | * | ||
| 18 | * 1. Redistributions of source code must retain the above copyright | ||
| 19 | * notice, this list of conditions and the following disclaimer. | ||
| 20 | * | ||
| 21 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 22 | * notice, this list of conditions and the following disclaimer in | ||
| 23 | * the documentation and/or other materials provided with the | ||
| 24 | * distribution. | ||
| 25 | * | ||
| 26 | * 3. All advertising materials mentioning features or use of this | ||
| 27 | * software must display the following acknowledgment: | ||
| 28 | * "This product includes software developed by the OpenSSL Project | ||
| 29 | * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | ||
| 30 | * | ||
| 31 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
| 32 | * endorse or promote products derived from this software without | ||
| 33 | * prior written permission. For written permission, please contact | ||
| 34 | * licensing@OpenSSL.org. | ||
| 35 | * | ||
| 36 | * 5. Products derived from this software may not be called "OpenSSL" | ||
| 37 | * nor may "OpenSSL" appear in their names without prior written | ||
| 38 | * permission of the OpenSSL Project. | ||
| 39 | * | ||
| 40 | * 6. Redistributions of any form whatsoever must retain the following | ||
| 41 | * acknowledgment: | ||
| 42 | * "This product includes software developed by the OpenSSL Project | ||
| 43 | * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | ||
| 44 | * | ||
| 45 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
| 46 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
| 48 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
| 49 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 50 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
| 51 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 52 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 53 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 54 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 55 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 56 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 57 | * ==================================================================== | ||
| 58 | * | ||
| 59 | * This product includes cryptographic software written by Eric Young | ||
| 60 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
| 61 | * Hudson (tjh@cryptsoft.com). | ||
| 62 | * | ||
| 63 | */ | ||
| 64 | |||
| 65 | |||
| 66 | #include <stdio.h> | ||
| 67 | #include <string.h> | ||
| 68 | |||
| 69 | #include <openssl/opensslconf.h> | ||
| 70 | #include <openssl/crypto.h> | ||
| 71 | #include <openssl/dso.h> | ||
| 72 | #include <openssl/engine.h> | ||
| 73 | #include <openssl/evp.h> | ||
| 74 | #ifndef OPENSSL_NO_AES | ||
| 75 | #include <openssl/aes.h> | ||
| 76 | #endif | ||
| 77 | #include <openssl/rand.h> | ||
| 78 | #include <openssl/err.h> | ||
| 79 | |||
| 80 | #ifndef OPENSSL_NO_HW | ||
| 81 | #ifndef OPENSSL_NO_HW_PADLOCK | ||
| 82 | |||
| 83 | /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ | ||
| 84 | #if (OPENSSL_VERSION_NUMBER >= 0x00908000L) | ||
| 85 | # ifndef OPENSSL_NO_DYNAMIC_ENGINE | ||
| 86 | # define DYNAMIC_ENGINE | ||
| 87 | # endif | ||
| 88 | #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) | ||
| 89 | # ifdef ENGINE_DYNAMIC_SUPPORT | ||
| 90 | # define DYNAMIC_ENGINE | ||
| 91 | # endif | ||
| 92 | #else | ||
| 93 | # error "Only OpenSSL >= 0.9.7 is supported" | ||
| 94 | #endif | ||
| 95 | |||
| 96 | /* VIA PadLock AES is available *ONLY* on some x86 CPUs. | ||
| 97 | Not only that it doesn't exist elsewhere, but it | ||
| 98 | even can't be compiled on other platforms! | ||
| 99 | |||
| 100 | In addition, because of the heavy use of inline assembler, | ||
| 101 | compiler choice is limited to GCC and Microsoft C. */ | ||
| 102 | #undef COMPILE_HW_PADLOCK | ||
| 103 | #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) | ||
| 104 | # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ | ||
| 105 | (defined(_MSC_VER) && defined(_M_IX86)) | ||
| 106 | # define COMPILE_HW_PADLOCK | ||
| 107 | static ENGINE *ENGINE_padlock (void); | ||
| 108 | # endif | ||
| 109 | #endif | ||
| 110 | |||
| 111 | void ENGINE_load_padlock (void) | ||
| 112 | { | ||
| 113 | /* On non-x86 CPUs it just returns. */ | ||
| 114 | #ifdef COMPILE_HW_PADLOCK | ||
| 115 | ENGINE *toadd = ENGINE_padlock (); | ||
| 116 | if (!toadd) return; | ||
| 117 | ENGINE_add (toadd); | ||
| 118 | ENGINE_free (toadd); | ||
| 119 | ERR_clear_error (); | ||
| 120 | #endif | ||
| 121 | } | ||
| 122 | |||
| 123 | #ifdef COMPILE_HW_PADLOCK | ||
| 124 | /* We do these includes here to avoid header problems on platforms that | ||
| 125 | do not have the VIA padlock anyway... */ | ||
| 126 | #ifdef _MSC_VER | ||
| 127 | # include <malloc.h> | ||
| 128 | # define alloca _alloca | ||
| 129 | #elif defined(NETWARE_CLIB) && defined(__GNUC__) | ||
| 130 | void *alloca(size_t); | ||
| 131 | # define alloca(s) __builtin_alloca(s) | ||
| 132 | #else | ||
| 133 | # include <stdlib.h> | ||
| 134 | #endif | ||
| 135 | |||
| 136 | /* Function for ENGINE detection and control */ | ||
| 137 | static int padlock_available(void); | ||
| 138 | static int padlock_init(ENGINE *e); | ||
| 139 | |||
| 140 | /* RNG Stuff */ | ||
| 141 | static RAND_METHOD padlock_rand; | ||
| 142 | |||
| 143 | /* Cipher Stuff */ | ||
| 144 | #ifndef OPENSSL_NO_AES | ||
| 145 | static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid); | ||
| 146 | #endif | ||
| 147 | |||
| 148 | /* Engine names */ | ||
| 149 | static const char *padlock_id = "padlock"; | ||
| 150 | static char padlock_name[100]; | ||
| 151 | |||
| 152 | /* Available features */ | ||
| 153 | static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ | ||
| 154 | static int padlock_use_rng = 0; /* Random Number Generator */ | ||
| 155 | #ifndef OPENSSL_NO_AES | ||
| 156 | static int padlock_aes_align_required = 1; | ||
| 157 | #endif | ||
| 158 | |||
| 159 | /* ===== Engine "management" functions ===== */ | ||
| 160 | |||
| 161 | /* Prepare the ENGINE structure for registration */ | ||
| 162 | static int | ||
| 163 | padlock_bind_helper(ENGINE *e) | ||
| 164 | { | ||
| 165 | /* Check available features */ | ||
| 166 | padlock_available(); | ||
| 167 | |||
| 168 | #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ | ||
| 169 | padlock_use_rng=0; | ||
| 170 | #endif | ||
| 171 | |||
| 172 | /* Generate a nice engine name with available features */ | ||
| 173 | BIO_snprintf(padlock_name, sizeof(padlock_name), | ||
| 174 | "VIA PadLock (%s, %s)", | ||
| 175 | padlock_use_rng ? "RNG" : "no-RNG", | ||
| 176 | padlock_use_ace ? "ACE" : "no-ACE"); | ||
| 177 | |||
| 178 | /* Register everything or return with an error */ | ||
| 179 | if (!ENGINE_set_id(e, padlock_id) || | ||
| 180 | !ENGINE_set_name(e, padlock_name) || | ||
| 181 | |||
| 182 | !ENGINE_set_init_function(e, padlock_init) || | ||
| 183 | #ifndef OPENSSL_NO_AES | ||
| 184 | (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || | ||
| 185 | #endif | ||
| 186 | (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { | ||
| 187 | return 0; | ||
| 188 | } | ||
| 189 | |||
| 190 | /* Everything looks good */ | ||
| 191 | return 1; | ||
| 192 | } | ||
| 193 | |||
| 194 | /* Constructor */ | ||
| 195 | static ENGINE * | ||
| 196 | ENGINE_padlock(void) | ||
| 197 | { | ||
| 198 | ENGINE *eng = ENGINE_new(); | ||
| 199 | |||
| 200 | if (!eng) { | ||
| 201 | return NULL; | ||
| 202 | } | ||
| 203 | |||
| 204 | if (!padlock_bind_helper(eng)) { | ||
| 205 | ENGINE_free(eng); | ||
| 206 | return NULL; | ||
| 207 | } | ||
| 208 | |||
| 209 | return eng; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* Check availability of the engine */ | ||
| 213 | static int | ||
| 214 | padlock_init(ENGINE *e) | ||
| 215 | { | ||
| 216 | return (padlock_use_rng || padlock_use_ace); | ||
| 217 | } | ||
| 218 | |||
| 219 | /* This stuff is needed if this ENGINE is being compiled into a self-contained | ||
| 220 | * shared-library. | ||
| 221 | */ | ||
| 222 | #ifdef DYNAMIC_ENGINE | ||
| 223 | static int | ||
| 224 | padlock_bind_fn(ENGINE *e, const char *id) | ||
| 225 | { | ||
| 226 | if (id && (strcmp(id, padlock_id) != 0)) { | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | |||
| 230 | if (!padlock_bind_helper(e)) { | ||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | |||
| 234 | return 1; | ||
| 235 | } | ||
| 236 | |||
| 237 | IMPLEMENT_DYNAMIC_CHECK_FN () | ||
| 238 | IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) | ||
| 239 | #endif /* DYNAMIC_ENGINE */ | ||
| 240 | |||
| 241 | /* ===== Here comes the "real" engine ===== */ | ||
| 242 | |||
| 243 | #ifndef OPENSSL_NO_AES | ||
| 244 | /* Some AES-related constants */ | ||
| 245 | #define AES_BLOCK_SIZE 16 | ||
| 246 | #define AES_KEY_SIZE_128 16 | ||
| 247 | #define AES_KEY_SIZE_192 24 | ||
| 248 | #define AES_KEY_SIZE_256 32 | ||
| 249 | |||
| 250 | /* Here we store the status information relevant to the | ||
| 251 | current context. */ | ||
| 252 | /* BIG FAT WARNING: | ||
| 253 | * Inline assembler in PADLOCK_XCRYPT_ASM() | ||
| 254 | * depends on the order of items in this structure. | ||
| 255 | * Don't blindly modify, reorder, etc! | ||
| 256 | */ | ||
| 257 | struct padlock_cipher_data | ||
| 258 | { | ||
| 259 | unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ | ||
| 260 | union { unsigned int pad[4]; | ||
| 261 | struct { | ||
| 262 | int rounds:4; | ||
| 263 | int dgst:1; /* n/a in C3 */ | ||
| 264 | int align:1; /* n/a in C3 */ | ||
| 265 | int ciphr:1; /* n/a in C3 */ | ||
| 266 | unsigned int keygen:1; | ||
| 267 | int interm:1; | ||
| 268 | unsigned int encdec:1; | ||
| 269 | int ksize:2; | ||
| 270 | } b; | ||
| 271 | } cword; /* Control word */ | ||
| 272 | AES_KEY ks; /* Encryption key */ | ||
| 273 | }; | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Essentially this variable belongs in thread local storage. | ||
| 277 | * Having this variable global on the other hand can only cause | ||
| 278 | * few bogus key reloads [if any at all on single-CPU system], | ||
| 279 | * so we accept the penatly... | ||
| 280 | */ | ||
| 281 | static volatile struct padlock_cipher_data *padlock_saved_context; | ||
| 282 | #endif | ||
| 283 | |||
| 284 | /* | ||
| 285 | * ======================================================= | ||
| 286 | * Inline assembler section(s). | ||
| 287 | * ======================================================= | ||
| 288 | * Order of arguments is chosen to facilitate Windows port | ||
| 289 | * using __fastcall calling convention. If you wish to add | ||
| 290 | * more routines, keep in mind that first __fastcall | ||
| 291 | * argument is passed in %ecx and second - in %edx. | ||
| 292 | * ======================================================= | ||
| 293 | */ | ||
| 294 | #if defined(__GNUC__) && __GNUC__>=2 | ||
| 295 | /* | ||
| 296 | * As for excessive "push %ebx"/"pop %ebx" found all over. | ||
| 297 | * When generating position-independent code GCC won't let | ||
| 298 | * us use "b" in assembler templates nor even respect "ebx" | ||
| 299 | * in "clobber description." Therefore the trouble... | ||
| 300 | */ | ||
| 301 | |||
| 302 | /* Helper function - check if a CPUID instruction | ||
| 303 | is available on this CPU */ | ||
| 304 | static int | ||
| 305 | padlock_insn_cpuid_available(void) | ||
| 306 | { | ||
| 307 | int result = -1; | ||
| 308 | |||
| 309 | /* We're checking if the bit #21 of EFLAGS | ||
| 310 | can be toggled. If yes = CPUID is available. */ | ||
| 311 | asm volatile ( | ||
| 312 | "pushf\n" | ||
| 313 | "popl %%eax\n" | ||
| 314 | "xorl $0x200000, %%eax\n" | ||
| 315 | "movl %%eax, %%ecx\n" | ||
| 316 | "andl $0x200000, %%ecx\n" | ||
| 317 | "pushl %%eax\n" | ||
| 318 | "popf\n" | ||
| 319 | "pushf\n" | ||
| 320 | "popl %%eax\n" | ||
| 321 | "andl $0x200000, %%eax\n" | ||
| 322 | "xorl %%eax, %%ecx\n" | ||
| 323 | "movl %%ecx, %0\n" | ||
| 324 | : "=r" (result) : : "eax", "ecx"); | ||
| 325 | |||
| 326 | return (result == 0); | ||
| 327 | } | ||
| 328 | |||
| 329 | /* Load supported features of the CPU to see if | ||
| 330 | the PadLock is available. */ | ||
| 331 | static int | ||
| 332 | padlock_available(void) | ||
| 333 | { | ||
| 334 | char vendor_string[16]; | ||
| 335 | unsigned int eax, edx; | ||
| 336 | |||
| 337 | /* First check if the CPUID instruction is available at all... */ | ||
| 338 | if (! padlock_insn_cpuid_available()) | ||
| 339 | return 0; | ||
| 340 | |||
| 341 | /* Are we running on the Centaur (VIA) CPU? */ | ||
| 342 | eax = 0x00000000; | ||
| 343 | vendor_string[12] = 0; | ||
| 344 | asm volatile ( | ||
| 345 | "pushl %%ebx\n" | ||
| 346 | "cpuid\n" | ||
| 347 | "movl %%ebx,(%%edi)\n" | ||
| 348 | "movl %%edx,4(%%edi)\n" | ||
| 349 | "movl %%ecx,8(%%edi)\n" | ||
| 350 | "popl %%ebx" | ||
| 351 | : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); | ||
| 352 | if (strcmp(vendor_string, "CentaurHauls") != 0) | ||
| 353 | return 0; | ||
| 354 | |||
| 355 | /* Check for Centaur Extended Feature Flags presence */ | ||
| 356 | eax = 0xC0000000; | ||
| 357 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx" | ||
| 358 | : "+a"(eax) : : "ecx", "edx"); | ||
| 359 | if (eax < 0xC0000001) | ||
| 360 | return 0; | ||
| 361 | |||
| 362 | /* Read the Centaur Extended Feature Flags */ | ||
| 363 | eax = 0xC0000001; | ||
| 364 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx" | ||
| 365 | : "+a"(eax), "=d"(edx) : : "ecx"); | ||
| 366 | |||
| 367 | /* Fill up some flags */ | ||
| 368 | padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); | ||
| 369 | padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); | ||
| 370 | |||
| 371 | return padlock_use_ace + padlock_use_rng; | ||
| 372 | } | ||
| 373 | |||
| 374 | #ifndef OPENSSL_NO_AES | ||
| 375 | /* Our own htonl()/ntohl() */ | ||
| 376 | static inline void | ||
| 377 | padlock_bswapl(AES_KEY *ks) | ||
| 378 | { | ||
| 379 | size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); | ||
| 380 | unsigned int *key = ks->rd_key; | ||
| 381 | |||
| 382 | while (i--) { | ||
| 383 | asm volatile ("bswapl %0" : "+r"(*key)); | ||
| 384 | key++; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | #endif | ||
| 388 | |||
| 389 | /* Force key reload from memory to the CPU microcode. | ||
| 390 | Loading EFLAGS from the stack clears EFLAGS[30] | ||
| 391 | which does the trick. */ | ||
| 392 | static inline void | ||
| 393 | padlock_reload_key(void) | ||
| 394 | { | ||
| 395 | asm volatile ("pushfl; popfl"); | ||
| 396 | } | ||
| 397 | |||
| 398 | #ifndef OPENSSL_NO_AES | ||
| 399 | /* | ||
| 400 | * This is heuristic key context tracing. At first one | ||
| 401 | * believes that one should use atomic swap instructions, | ||
| 402 | * but it's not actually necessary. Point is that if | ||
| 403 | * padlock_saved_context was changed by another thread | ||
| 404 | * after we've read it and before we compare it with cdata, | ||
| 405 | * our key *shall* be reloaded upon thread context switch | ||
| 406 | * and we are therefore set in either case... | ||
| 407 | */ | ||
| 408 | static inline void | ||
| 409 | padlock_verify_context(struct padlock_cipher_data *cdata) | ||
| 410 | { | ||
| 411 | asm volatile ( | ||
| 412 | "pushfl\n" | ||
| 413 | " btl $30,(%%esp)\n" | ||
| 414 | " jnc 1f\n" | ||
| 415 | " cmpl %2,%1\n" | ||
| 416 | " je 1f\n" | ||
| 417 | " popfl\n" | ||
| 418 | " subl $4,%%esp\n" | ||
| 419 | "1: addl $4,%%esp\n" | ||
| 420 | " movl %2,%0" | ||
| 421 | :"+m"(padlock_saved_context) | ||
| 422 | : "r"(padlock_saved_context), "r"(cdata) : "cc"); | ||
| 423 | } | ||
| 424 | |||
| 425 | /* Template for padlock_xcrypt_* modes */ | ||
| 426 | /* BIG FAT WARNING: | ||
| 427 | * The offsets used with 'leal' instructions | ||
| 428 | * describe items of the 'padlock_cipher_data' | ||
| 429 | * structure. | ||
| 430 | */ | ||
| 431 | #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ | ||
| 432 | static inline void *name(size_t cnt, \ | ||
| 433 | struct padlock_cipher_data *cdata, \ | ||
| 434 | void *out, const void *inp) \ | ||
| 435 | { void *iv; \ | ||
| 436 | asm volatile ( "pushl %%ebx\n" \ | ||
| 437 | " leal 16(%0),%%edx\n" \ | ||
| 438 | " leal 32(%0),%%ebx\n" \ | ||
| 439 | rep_xcrypt "\n" \ | ||
| 440 | " popl %%ebx" \ | ||
| 441 | : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ | ||
| 442 | : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ | ||
| 443 | : "edx", "cc", "memory"); \ | ||
| 444 | return iv; \ | ||
| 445 | } | ||
| 446 | |||
| 447 | /* Generate all functions with appropriate opcodes */ | ||
| 448 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ | ||
| 449 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ | ||
| 450 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ | ||
| 451 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ | ||
| 452 | #endif | ||
| 453 | |||
| 454 | /* The RNG call itself */ | ||
| 455 | static inline unsigned int | ||
| 456 | padlock_xstore(void *addr, unsigned int edx_in) | ||
| 457 | { | ||
| 458 | unsigned int eax_out; | ||
| 459 | |||
| 460 | asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ | ||
| 461 | : "=a"(eax_out),"=m"(*(unsigned *)addr) | ||
| 462 | : "D"(addr), "d" (edx_in) | ||
| 463 | ); | ||
| 464 | |||
| 465 | return eax_out; | ||
| 466 | } | ||
| 467 | |||
| 468 | /* Why not inline 'rep movsd'? I failed to find information on what | ||
| 469 | * value in Direction Flag one can expect and consequently have to | ||
| 470 | * apply "better-safe-than-sorry" approach and assume "undefined." | ||
| 471 | * I could explicitly clear it and restore the original value upon | ||
| 472 | * return from padlock_aes_cipher, but it's presumably too much | ||
| 473 | * trouble for too little gain... | ||
| 474 | * | ||
| 475 | * In case you wonder 'rep xcrypt*' instructions above are *not* | ||
| 476 | * affected by the Direction Flag and pointers advance toward | ||
| 477 | * larger addresses unconditionally. | ||
| 478 | */ | ||
| 479 | static inline unsigned char * | ||
| 480 | padlock_memcpy(void *dst,const void *src,size_t n) | ||
| 481 | { | ||
| 482 | long *d=dst; | ||
| 483 | const long *s=src; | ||
| 484 | |||
| 485 | n /= sizeof(*d); | ||
| 486 | do { *d++ = *s++; } while (--n); | ||
| 487 | |||
| 488 | return dst; | ||
| 489 | } | ||
| 490 | |||
| 491 | #elif defined(_MSC_VER) | ||
| 492 | /* | ||
| 493 | * Unlike GCC these are real functions. In order to minimize impact | ||
| 494 | * on performance we adhere to __fastcall calling convention in | ||
| 495 | * order to get two first arguments passed through %ecx and %edx. | ||
| 496 | * Which kind of suits very well, as instructions in question use | ||
| 497 | * both %ecx and %edx as input:-) | ||
| 498 | */ | ||
| 499 | #define REP_XCRYPT(code) \ | ||
| 500 | _asm _emit 0xf3 \ | ||
| 501 | _asm _emit 0x0f _asm _emit 0xa7 \ | ||
| 502 | _asm _emit code | ||
| 503 | |||
| 504 | /* BIG FAT WARNING: | ||
| 505 | * The offsets used with 'lea' instructions | ||
| 506 | * describe items of the 'padlock_cipher_data' | ||
| 507 | * structure. | ||
| 508 | */ | ||
| 509 | #define PADLOCK_XCRYPT_ASM(name,code) \ | ||
| 510 | static void * __fastcall \ | ||
| 511 | name (size_t cnt, void *cdata, \ | ||
| 512 | void *outp, const void *inp) \ | ||
| 513 | { _asm mov eax,edx \ | ||
| 514 | _asm lea edx,[eax+16] \ | ||
| 515 | _asm lea ebx,[eax+32] \ | ||
| 516 | _asm mov edi,outp \ | ||
| 517 | _asm mov esi,inp \ | ||
| 518 | REP_XCRYPT(code) \ | ||
| 519 | } | ||
| 520 | |||
| 521 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) | ||
| 522 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) | ||
| 523 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) | ||
| 524 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) | ||
| 525 | |||
| 526 | static int __fastcall | ||
| 527 | padlock_xstore(void *outp,unsigned int code) | ||
| 528 | { _asm mov edi,ecx | ||
| 529 | _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 | ||
| 530 | } | ||
| 531 | |||
| 532 | static void __fastcall | ||
| 533 | padlock_reload_key(void) | ||
| 534 | { _asm pushfd _asm popfd } | ||
| 535 | |||
| 536 | static void __fastcall | ||
| 537 | padlock_verify_context(void *cdata) | ||
| 538 | { _asm { | ||
| 539 | pushfd | ||
| 540 | bt DWORD PTR[esp],30 | ||
| 541 | jnc skip | ||
| 542 | cmp ecx,padlock_saved_context | ||
| 543 | je skip | ||
| 544 | popfd | ||
| 545 | sub esp,4 | ||
| 546 | skip: add esp,4 | ||
| 547 | mov padlock_saved_context,ecx | ||
| 548 | } | ||
| 549 | } | ||
| 550 | |||
| 551 | static int | ||
| 552 | padlock_available(void) | ||
| 553 | { _asm { | ||
| 554 | pushfd | ||
| 555 | pop eax | ||
| 556 | mov ecx,eax | ||
| 557 | xor eax,1<<21 | ||
| 558 | push eax | ||
| 559 | popfd | ||
| 560 | pushfd | ||
| 561 | pop eax | ||
| 562 | xor eax,ecx | ||
| 563 | bt eax,21 | ||
| 564 | jnc noluck | ||
| 565 | mov eax,0 | ||
| 566 | cpuid | ||
| 567 | xor eax,eax | ||
| 568 | cmp ebx,'tneC' | ||
| 569 | jne noluck | ||
| 570 | cmp edx,'Hrua' | ||
| 571 | jne noluck | ||
| 572 | cmp ecx,'slua' | ||
| 573 | jne noluck | ||
| 574 | mov eax,0xC0000000 | ||
| 575 | cpuid | ||
| 576 | mov edx,eax | ||
| 577 | xor eax,eax | ||
| 578 | cmp edx,0xC0000001 | ||
| 579 | jb noluck | ||
| 580 | mov eax,0xC0000001 | ||
| 581 | cpuid | ||
| 582 | xor eax,eax | ||
| 583 | bt edx,6 | ||
| 584 | jnc skip_a | ||
| 585 | bt edx,7 | ||
| 586 | jnc skip_a | ||
| 587 | mov padlock_use_ace,1 | ||
| 588 | inc eax | ||
| 589 | skip_a: bt edx,2 | ||
| 590 | jnc skip_r | ||
| 591 | bt edx,3 | ||
| 592 | jnc skip_r | ||
| 593 | mov padlock_use_rng,1 | ||
| 594 | inc eax | ||
| 595 | skip_r: | ||
| 596 | noluck: | ||
| 597 | } | ||
| 598 | } | ||
| 599 | |||
| 600 | static void __fastcall | ||
| 601 | padlock_bswapl(void *key) | ||
| 602 | { _asm { | ||
| 603 | pushfd | ||
| 604 | cld | ||
| 605 | mov esi,ecx | ||
| 606 | mov edi,ecx | ||
| 607 | mov ecx,60 | ||
| 608 | up: lodsd | ||
| 609 | bswap eax | ||
| 610 | stosd | ||
| 611 | loop up | ||
| 612 | popfd | ||
| 613 | } | ||
| 614 | } | ||
| 615 | |||
| 616 | /* MS actually specifies status of Direction Flag and compiler even | ||
| 617 | * manages to compile following as 'rep movsd' all by itself... | ||
| 618 | */ | ||
| 619 | #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) | ||
| 620 | #endif | ||
| 621 | |||
| 622 | /* ===== AES encryption/decryption ===== */ | ||
| 623 | #ifndef OPENSSL_NO_AES | ||
| 624 | |||
| 625 | #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) | ||
| 626 | #define NID_aes_128_cfb NID_aes_128_cfb128 | ||
| 627 | #endif | ||
| 628 | |||
| 629 | #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) | ||
| 630 | #define NID_aes_128_ofb NID_aes_128_ofb128 | ||
| 631 | #endif | ||
| 632 | |||
| 633 | #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) | ||
| 634 | #define NID_aes_192_cfb NID_aes_192_cfb128 | ||
| 635 | #endif | ||
| 636 | |||
| 637 | #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) | ||
| 638 | #define NID_aes_192_ofb NID_aes_192_ofb128 | ||
| 639 | #endif | ||
| 640 | |||
| 641 | #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) | ||
| 642 | #define NID_aes_256_cfb NID_aes_256_cfb128 | ||
| 643 | #endif | ||
| 644 | |||
| 645 | #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) | ||
| 646 | #define NID_aes_256_ofb NID_aes_256_ofb128 | ||
| 647 | #endif | ||
| 648 | |||
| 649 | /* List of supported ciphers. */ | ||
| 650 | static int padlock_cipher_nids[] = { | ||
| 651 | NID_aes_128_ecb, | ||
| 652 | NID_aes_128_cbc, | ||
| 653 | NID_aes_128_cfb, | ||
| 654 | NID_aes_128_ofb, | ||
| 655 | |||
| 656 | NID_aes_192_ecb, | ||
| 657 | NID_aes_192_cbc, | ||
| 658 | NID_aes_192_cfb, | ||
| 659 | NID_aes_192_ofb, | ||
| 660 | |||
| 661 | NID_aes_256_ecb, | ||
| 662 | NID_aes_256_cbc, | ||
| 663 | NID_aes_256_cfb, | ||
| 664 | NID_aes_256_ofb, | ||
| 665 | }; | ||
| 666 | static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ | ||
| 667 | sizeof(padlock_cipher_nids[0])); | ||
| 668 | |||
| 669 | /* Function prototypes ... */ | ||
| 670 | static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | ||
| 671 | const unsigned char *iv, int enc); | ||
| 672 | static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | ||
| 673 | const unsigned char *in, size_t nbytes); | ||
| 674 | |||
| 675 | #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ | ||
| 676 | ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) | ||
| 677 | #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ | ||
| 678 | NEAREST_ALIGNED(ctx->cipher_data)) | ||
| 679 | |||
| 680 | #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE | ||
| 681 | #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE | ||
| 682 | #define EVP_CIPHER_block_size_OFB 1 | ||
| 683 | #define EVP_CIPHER_block_size_CFB 1 | ||
| 684 | |||
| 685 | /* Declaring so many ciphers by hand would be a pain. | ||
| 686 | Instead introduce a bit of preprocessor magic :-) */ | ||
| 687 | #define DECLARE_AES_EVP(ksize,lmode,umode) \ | ||
| 688 | static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ | ||
| 689 | NID_aes_##ksize##_##lmode, \ | ||
| 690 | EVP_CIPHER_block_size_##umode, \ | ||
| 691 | AES_KEY_SIZE_##ksize, \ | ||
| 692 | AES_BLOCK_SIZE, \ | ||
| 693 | 0 | EVP_CIPH_##umode##_MODE, \ | ||
| 694 | padlock_aes_init_key, \ | ||
| 695 | padlock_aes_cipher, \ | ||
| 696 | NULL, \ | ||
| 697 | sizeof(struct padlock_cipher_data) + 16, \ | ||
| 698 | EVP_CIPHER_set_asn1_iv, \ | ||
| 699 | EVP_CIPHER_get_asn1_iv, \ | ||
| 700 | NULL, \ | ||
| 701 | NULL \ | ||
| 702 | } | ||
| 703 | |||
| 704 | DECLARE_AES_EVP(128,ecb,ECB); | ||
| 705 | DECLARE_AES_EVP(128,cbc,CBC); | ||
| 706 | DECLARE_AES_EVP(128,cfb,CFB); | ||
| 707 | DECLARE_AES_EVP(128,ofb,OFB); | ||
| 708 | |||
| 709 | DECLARE_AES_EVP(192,ecb,ECB); | ||
| 710 | DECLARE_AES_EVP(192,cbc,CBC); | ||
| 711 | DECLARE_AES_EVP(192,cfb,CFB); | ||
| 712 | DECLARE_AES_EVP(192,ofb,OFB); | ||
| 713 | |||
| 714 | DECLARE_AES_EVP(256,ecb,ECB); | ||
| 715 | DECLARE_AES_EVP(256,cbc,CBC); | ||
| 716 | DECLARE_AES_EVP(256,cfb,CFB); | ||
| 717 | DECLARE_AES_EVP(256,ofb,OFB); | ||
| 718 | |||
| 719 | static int | ||
| 720 | padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid) | ||
| 721 | { | ||
| 722 | /* No specific cipher => return a list of supported nids ... */ | ||
| 723 | if (!cipher) { | ||
| 724 | *nids = padlock_cipher_nids; | ||
| 725 | return padlock_cipher_nids_num; | ||
| 726 | } | ||
| 727 | |||
| 728 | /* ... or the requested "cipher" otherwise */ | ||
| 729 | switch (nid) { | ||
| 730 | case NID_aes_128_ecb: | ||
| 731 | *cipher = &padlock_aes_128_ecb; | ||
| 732 | break; | ||
| 733 | case NID_aes_128_cbc: | ||
| 734 | *cipher = &padlock_aes_128_cbc; | ||
| 735 | break; | ||
| 736 | case NID_aes_128_cfb: | ||
| 737 | *cipher = &padlock_aes_128_cfb; | ||
| 738 | break; | ||
| 739 | case NID_aes_128_ofb: | ||
| 740 | *cipher = &padlock_aes_128_ofb; | ||
| 741 | break; | ||
| 742 | |||
| 743 | case NID_aes_192_ecb: | ||
| 744 | *cipher = &padlock_aes_192_ecb; | ||
| 745 | break; | ||
| 746 | case NID_aes_192_cbc: | ||
| 747 | *cipher = &padlock_aes_192_cbc; | ||
| 748 | break; | ||
| 749 | case NID_aes_192_cfb: | ||
| 750 | *cipher = &padlock_aes_192_cfb; | ||
| 751 | break; | ||
| 752 | case NID_aes_192_ofb: | ||
| 753 | *cipher = &padlock_aes_192_ofb; | ||
| 754 | break; | ||
| 755 | |||
| 756 | case NID_aes_256_ecb: | ||
| 757 | *cipher = &padlock_aes_256_ecb; | ||
| 758 | break; | ||
| 759 | case NID_aes_256_cbc: | ||
| 760 | *cipher = &padlock_aes_256_cbc; | ||
| 761 | break; | ||
| 762 | case NID_aes_256_cfb: | ||
| 763 | *cipher = &padlock_aes_256_cfb; | ||
| 764 | break; | ||
| 765 | case NID_aes_256_ofb: | ||
| 766 | *cipher = &padlock_aes_256_ofb; | ||
| 767 | break; | ||
| 768 | |||
| 769 | default: | ||
| 770 | /* Sorry, we don't support this NID */ | ||
| 771 | *cipher = NULL; | ||
| 772 | return 0; | ||
| 773 | } | ||
| 774 | |||
| 775 | return 1; | ||
| 776 | } | ||
| 777 | |||
| 778 | /* Prepare the encryption key for PadLock usage */ | ||
| 779 | static int | ||
| 780 | padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, | ||
| 781 | const unsigned char *iv, int enc) | ||
| 782 | { | ||
| 783 | struct padlock_cipher_data *cdata; | ||
| 784 | int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; | ||
| 785 | |||
| 786 | if (key==NULL) return 0; /* ERROR */ | ||
| 787 | |||
| 788 | cdata = ALIGNED_CIPHER_DATA(ctx); | ||
| 789 | memset(cdata, 0, sizeof(struct padlock_cipher_data)); | ||
| 790 | |||
| 791 | /* Prepare Control word. */ | ||
| 792 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) | ||
| 793 | cdata->cword.b.encdec = 0; | ||
| 794 | else | ||
| 795 | cdata->cword.b.encdec = (ctx->encrypt == 0); | ||
| 796 | cdata->cword.b.rounds = 10 + (key_len - 128) / 32; | ||
| 797 | cdata->cword.b.ksize = (key_len - 128) / 64; | ||
| 798 | |||
| 799 | switch(key_len) { | ||
| 800 | case 128: | ||
| 801 | /* PadLock can generate an extended key for | ||
| 802 | AES128 in hardware */ | ||
| 803 | memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); | ||
| 804 | cdata->cword.b.keygen = 0; | ||
| 805 | break; | ||
| 806 | |||
| 807 | case 192: | ||
| 808 | case 256: | ||
| 809 | /* Generate an extended AES key in software. | ||
| 810 | Needed for AES192/AES256 */ | ||
| 811 | /* Well, the above applies to Stepping 8 CPUs | ||
| 812 | and is listed as hardware errata. They most | ||
| 813 | likely will fix it at some point and then | ||
| 814 | a check for stepping would be due here. */ | ||
| 815 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || | ||
| 816 | EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || | ||
| 817 | enc) | ||
| 818 | AES_set_encrypt_key(key, key_len, &cdata->ks); | ||
| 819 | else | ||
| 820 | AES_set_decrypt_key(key, key_len, &cdata->ks); | ||
| 821 | #ifndef AES_ASM | ||
| 822 | /* OpenSSL C functions use byte-swapped extended key. */ | ||
| 823 | padlock_bswapl(&cdata->ks); | ||
| 824 | #endif | ||
| 825 | cdata->cword.b.keygen = 1; | ||
| 826 | break; | ||
| 827 | |||
| 828 | default: | ||
| 829 | /* ERROR */ | ||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | |||
| 833 | /* | ||
| 834 | * This is done to cover for cases when user reuses the | ||
| 835 | * context for new key. The catch is that if we don't do | ||
| 836 | * this, padlock_eas_cipher might proceed with old key... | ||
| 837 | */ | ||
| 838 | padlock_reload_key (); | ||
| 839 | |||
| 840 | return 1; | ||
| 841 | } | ||
| 842 | |||
| 843 | /* | ||
| 844 | * Simplified version of padlock_aes_cipher() used when | ||
| 845 | * 1) both input and output buffers are at aligned addresses. | ||
| 846 | * or when | ||
| 847 | * 2) running on a newer CPU that doesn't require aligned buffers. | ||
| 848 | */ | ||
| 849 | static int | ||
| 850 | padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | ||
| 851 | const unsigned char *in_arg, size_t nbytes) | ||
| 852 | { | ||
| 853 | struct padlock_cipher_data *cdata; | ||
| 854 | void *iv; | ||
| 855 | |||
| 856 | cdata = ALIGNED_CIPHER_DATA(ctx); | ||
| 857 | padlock_verify_context(cdata); | ||
| 858 | |||
| 859 | switch (EVP_CIPHER_CTX_mode(ctx)) { | ||
| 860 | case EVP_CIPH_ECB_MODE: | ||
| 861 | padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); | ||
| 862 | break; | ||
| 863 | |||
| 864 | case EVP_CIPH_CBC_MODE: | ||
| 865 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 866 | iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); | ||
| 867 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | ||
| 868 | break; | ||
| 869 | |||
| 870 | case EVP_CIPH_CFB_MODE: | ||
| 871 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 872 | iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); | ||
| 873 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | ||
| 874 | break; | ||
| 875 | |||
| 876 | case EVP_CIPH_OFB_MODE: | ||
| 877 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 878 | padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg); | ||
| 879 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | ||
| 880 | break; | ||
| 881 | |||
| 882 | default: | ||
| 883 | return 0; | ||
| 884 | } | ||
| 885 | |||
| 886 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | ||
| 887 | |||
| 888 | return 1; | ||
| 889 | } | ||
| 890 | |||
| 891 | #ifndef PADLOCK_CHUNK | ||
| 892 | # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ | ||
| 893 | #endif | ||
| 894 | #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) | ||
| 895 | # error "insane PADLOCK_CHUNK..." | ||
| 896 | #endif | ||
| 897 | |||
| 898 | /* Re-align the arguments to 16-Bytes boundaries and run the | ||
| 899 | encryption function itself. This function is not AES-specific. */ | ||
| 900 | static int | ||
| 901 | padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | ||
| 902 | const unsigned char *in_arg, size_t nbytes) | ||
| 903 | { | ||
| 904 | struct padlock_cipher_data *cdata; | ||
| 905 | const void *inp; | ||
| 906 | unsigned char *out; | ||
| 907 | void *iv; | ||
| 908 | int inp_misaligned, out_misaligned, realign_in_loop; | ||
| 909 | size_t chunk, allocated=0; | ||
| 910 | |||
| 911 | /* ctx->num is maintained in byte-oriented modes, | ||
| 912 | such as CFB and OFB... */ | ||
| 913 | if ((chunk = ctx->num)) { /* borrow chunk variable */ | ||
| 914 | unsigned char *ivp=ctx->iv; | ||
| 915 | |||
| 916 | switch (EVP_CIPHER_CTX_mode(ctx)) { | ||
| 917 | case EVP_CIPH_CFB_MODE: | ||
| 918 | if (chunk >= AES_BLOCK_SIZE) | ||
| 919 | return 0; /* bogus value */ | ||
| 920 | |||
| 921 | if (ctx->encrypt) | ||
| 922 | while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | ||
| 923 | ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | ||
| 924 | chunk++, nbytes--; | ||
| 925 | } | ||
| 926 | else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | ||
| 927 | unsigned char c = *(in_arg++); | ||
| 928 | *(out_arg++) = c ^ ivp[chunk]; | ||
| 929 | ivp[chunk++] = c, nbytes--; | ||
| 930 | } | ||
| 931 | |||
| 932 | ctx->num = chunk%AES_BLOCK_SIZE; | ||
| 933 | break; | ||
| 934 | case EVP_CIPH_OFB_MODE: | ||
| 935 | if (chunk >= AES_BLOCK_SIZE) | ||
| 936 | return 0; /* bogus value */ | ||
| 937 | |||
| 938 | while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | ||
| 939 | *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | ||
| 940 | chunk++, nbytes--; | ||
| 941 | } | ||
| 942 | |||
| 943 | ctx->num = chunk%AES_BLOCK_SIZE; | ||
| 944 | break; | ||
| 945 | } | ||
| 946 | } | ||
| 947 | |||
| 948 | if (nbytes == 0) | ||
| 949 | return 1; | ||
| 950 | #if 0 | ||
| 951 | if (nbytes % AES_BLOCK_SIZE) | ||
| 952 | return 0; /* are we expected to do tail processing? */ | ||
| 953 | #else | ||
| 954 | /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC | ||
| 955 | modes and arbitrary value in byte-oriented modes, such as | ||
| 956 | CFB and OFB... */ | ||
| 957 | #endif | ||
| 958 | |||
| 959 | /* VIA promises CPUs that won't require alignment in the future. | ||
| 960 | For now padlock_aes_align_required is initialized to 1 and | ||
| 961 | the condition is never met... */ | ||
| 962 | /* C7 core is capable to manage unaligned input in non-ECB[!] | ||
| 963 | mode, but performance penalties appear to be approximately | ||
| 964 | same as for software alignment below or ~3x. They promise to | ||
| 965 | improve it in the future, but for now we can just as well | ||
| 966 | pretend that it can only handle aligned input... */ | ||
| 967 | if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) | ||
| 968 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | ||
| 969 | |||
| 970 | inp_misaligned = (((size_t)in_arg) & 0x0F); | ||
| 971 | out_misaligned = (((size_t)out_arg) & 0x0F); | ||
| 972 | |||
| 973 | /* Note that even if output is aligned and input not, | ||
| 974 | * I still prefer to loop instead of copy the whole | ||
| 975 | * input and then encrypt in one stroke. This is done | ||
| 976 | * in order to improve L1 cache utilization... */ | ||
| 977 | realign_in_loop = out_misaligned|inp_misaligned; | ||
| 978 | |||
| 979 | if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) | ||
| 980 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | ||
| 981 | |||
| 982 | /* this takes one "if" out of the loops */ | ||
| 983 | chunk = nbytes; | ||
| 984 | chunk %= PADLOCK_CHUNK; | ||
| 985 | if (chunk==0) chunk = PADLOCK_CHUNK; | ||
| 986 | |||
| 987 | if (out_misaligned) { | ||
| 988 | /* optmize for small input */ | ||
| 989 | allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); | ||
| 990 | out = alloca(0x10 + allocated); | ||
| 991 | out = NEAREST_ALIGNED(out); | ||
| 992 | } | ||
| 993 | else | ||
| 994 | out = out_arg; | ||
| 995 | |||
| 996 | cdata = ALIGNED_CIPHER_DATA(ctx); | ||
| 997 | padlock_verify_context(cdata); | ||
| 998 | |||
| 999 | switch (EVP_CIPHER_CTX_mode(ctx)) { | ||
| 1000 | case EVP_CIPH_ECB_MODE: | ||
| 1001 | do { | ||
| 1002 | if (inp_misaligned) | ||
| 1003 | inp = padlock_memcpy(out, in_arg, chunk); | ||
| 1004 | else | ||
| 1005 | inp = in_arg; | ||
| 1006 | in_arg += chunk; | ||
| 1007 | |||
| 1008 | padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp); | ||
| 1009 | |||
| 1010 | if (out_misaligned) | ||
| 1011 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | ||
| 1012 | else | ||
| 1013 | out = out_arg+=chunk; | ||
| 1014 | |||
| 1015 | nbytes -= chunk; | ||
| 1016 | chunk = PADLOCK_CHUNK; | ||
| 1017 | } while (nbytes); | ||
| 1018 | break; | ||
| 1019 | |||
| 1020 | case EVP_CIPH_CBC_MODE: | ||
| 1021 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 1022 | goto cbc_shortcut; | ||
| 1023 | do { | ||
| 1024 | if (iv != cdata->iv) | ||
| 1025 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | ||
| 1026 | chunk = PADLOCK_CHUNK; | ||
| 1027 | cbc_shortcut: /* optimize for small input */ | ||
| 1028 | if (inp_misaligned) | ||
| 1029 | inp = padlock_memcpy(out, in_arg, chunk); | ||
| 1030 | else | ||
| 1031 | inp = in_arg; | ||
| 1032 | in_arg += chunk; | ||
| 1033 | |||
| 1034 | iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp); | ||
| 1035 | |||
| 1036 | if (out_misaligned) | ||
| 1037 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | ||
| 1038 | else | ||
| 1039 | out = out_arg+=chunk; | ||
| 1040 | |||
| 1041 | } while (nbytes -= chunk); | ||
| 1042 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | ||
| 1043 | break; | ||
| 1044 | |||
| 1045 | case EVP_CIPH_CFB_MODE: | ||
| 1046 | memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 1047 | chunk &= ~(AES_BLOCK_SIZE-1); | ||
| 1048 | if (chunk) goto cfb_shortcut; | ||
| 1049 | else goto cfb_skiploop; | ||
| 1050 | do { | ||
| 1051 | if (iv != cdata->iv) | ||
| 1052 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | ||
| 1053 | chunk = PADLOCK_CHUNK; | ||
| 1054 | cfb_shortcut: /* optimize for small input */ | ||
| 1055 | if (inp_misaligned) | ||
| 1056 | inp = padlock_memcpy(out, in_arg, chunk); | ||
| 1057 | else | ||
| 1058 | inp = in_arg; | ||
| 1059 | in_arg += chunk; | ||
| 1060 | |||
| 1061 | iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp); | ||
| 1062 | |||
| 1063 | if (out_misaligned) | ||
| 1064 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | ||
| 1065 | else | ||
| 1066 | out = out_arg+=chunk; | ||
| 1067 | |||
| 1068 | nbytes -= chunk; | ||
| 1069 | } while (nbytes >= AES_BLOCK_SIZE); | ||
| 1070 | |||
| 1071 | cfb_skiploop: | ||
| 1072 | if (nbytes) { | ||
| 1073 | unsigned char *ivp = cdata->iv; | ||
| 1074 | |||
| 1075 | if (iv != ivp) { | ||
| 1076 | memcpy(ivp, iv, AES_BLOCK_SIZE); | ||
| 1077 | iv = ivp; | ||
| 1078 | } | ||
| 1079 | ctx->num = nbytes; | ||
| 1080 | if (cdata->cword.b.encdec) { | ||
| 1081 | cdata->cword.b.encdec=0; | ||
| 1082 | padlock_reload_key(); | ||
| 1083 | padlock_xcrypt_ecb(1,cdata,ivp,ivp); | ||
| 1084 | cdata->cword.b.encdec=1; | ||
| 1085 | padlock_reload_key(); | ||
| 1086 | while(nbytes) { | ||
| 1087 | unsigned char c = *(in_arg++); | ||
| 1088 | *(out_arg++) = c ^ *ivp; | ||
| 1089 | *(ivp++) = c, nbytes--; | ||
| 1090 | } | ||
| 1091 | } | ||
| 1092 | else { padlock_reload_key(); | ||
| 1093 | padlock_xcrypt_ecb(1,cdata,ivp,ivp); | ||
| 1094 | padlock_reload_key(); | ||
| 1095 | while (nbytes) { | ||
| 1096 | *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; | ||
| 1097 | ivp++, nbytes--; | ||
| 1098 | } | ||
| 1099 | } | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | ||
| 1103 | break; | ||
| 1104 | |||
| 1105 | case EVP_CIPH_OFB_MODE: | ||
| 1106 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | ||
| 1107 | chunk &= ~(AES_BLOCK_SIZE-1); | ||
| 1108 | if (chunk) do { | ||
| 1109 | if (inp_misaligned) | ||
| 1110 | inp = padlock_memcpy(out, in_arg, chunk); | ||
| 1111 | else | ||
| 1112 | inp = in_arg; | ||
| 1113 | in_arg += chunk; | ||
| 1114 | |||
| 1115 | padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp); | ||
| 1116 | |||
| 1117 | if (out_misaligned) | ||
| 1118 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | ||
| 1119 | else | ||
| 1120 | out = out_arg+=chunk; | ||
| 1121 | |||
| 1122 | nbytes -= chunk; | ||
| 1123 | chunk = PADLOCK_CHUNK; | ||
| 1124 | } while (nbytes >= AES_BLOCK_SIZE); | ||
| 1125 | |||
| 1126 | if (nbytes) { | ||
| 1127 | unsigned char *ivp = cdata->iv; | ||
| 1128 | |||
| 1129 | ctx->num = nbytes; | ||
| 1130 | padlock_reload_key(); /* empirically found */ | ||
| 1131 | padlock_xcrypt_ecb(1,cdata,ivp,ivp); | ||
| 1132 | padlock_reload_key(); /* empirically found */ | ||
| 1133 | while (nbytes) { | ||
| 1134 | *(out_arg++) = *(in_arg++) ^ *ivp; | ||
| 1135 | ivp++, nbytes--; | ||
| 1136 | } | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | ||
| 1140 | break; | ||
| 1141 | |||
| 1142 | default: | ||
| 1143 | return 0; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | /* Clean the realign buffer if it was used */ | ||
| 1147 | if (out_misaligned) { | ||
| 1148 | volatile unsigned long *p=(void *)out; | ||
| 1149 | size_t n = allocated/sizeof(*p); | ||
| 1150 | while (n--) *p++=0; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | ||
| 1154 | |||
| 1155 | return 1; | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | #endif /* OPENSSL_NO_AES */ | ||
| 1159 | |||
| 1160 | /* ===== Random Number Generator ===== */ | ||
| 1161 | /* | ||
| 1162 | * This code is not engaged. The reason is that it does not comply | ||
| 1163 | * with recommendations for VIA RNG usage for secure applications | ||
| 1164 | * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it | ||
| 1165 | * provide meaningful error control... | ||
| 1166 | */ | ||
| 1167 | /* Wrapper that provides an interface between the API and | ||
| 1168 | the raw PadLock RNG */ | ||
| 1169 | static int | ||
| 1170 | padlock_rand_bytes(unsigned char *output, int count) | ||
| 1171 | { | ||
| 1172 | unsigned int eax, buf; | ||
| 1173 | |||
| 1174 | while (count >= 8) { | ||
| 1175 | eax = padlock_xstore(output, 0); | ||
| 1176 | if (!(eax&(1<<6))) return 0; /* RNG disabled */ | ||
| 1177 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | ||
| 1178 | if (eax&(0x1F<<10)) return 0; | ||
| 1179 | if ((eax&0x1F)==0) continue; /* no data, retry... */ | ||
| 1180 | if ((eax&0x1F)!=8) return 0; /* fatal failure... */ | ||
| 1181 | output += 8; | ||
| 1182 | count -= 8; | ||
| 1183 | } | ||
| 1184 | while (count > 0) { | ||
| 1185 | eax = padlock_xstore(&buf, 3); | ||
| 1186 | if (!(eax&(1<<6))) return 0; /* RNG disabled */ | ||
| 1187 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | ||
| 1188 | if (eax&(0x1F<<10)) return 0; | ||
| 1189 | if ((eax&0x1F)==0) continue; /* no data, retry... */ | ||
| 1190 | if ((eax&0x1F)!=1) return 0; /* fatal failure... */ | ||
| 1191 | *output++ = (unsigned char)buf; | ||
| 1192 | count--; | ||
| 1193 | } | ||
| 1194 | *(volatile unsigned int *)&buf=0; | ||
| 1195 | |||
| 1196 | return 1; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | /* Dummy but necessary function */ | ||
| 1200 | static int | ||
| 1201 | padlock_rand_status(void) | ||
| 1202 | { | ||
| 1203 | return 1; | ||
| 1204 | } | ||
| 1205 | |||
| 1206 | /* Prepare structure for registration */ | ||
| 1207 | static RAND_METHOD padlock_rand = { | ||
| 1208 | NULL, /* seed */ | ||
| 1209 | padlock_rand_bytes, /* bytes */ | ||
| 1210 | NULL, /* cleanup */ | ||
| 1211 | NULL, /* add */ | ||
| 1212 | padlock_rand_bytes, /* pseudorand */ | ||
| 1213 | padlock_rand_status, /* rand status */ | ||
| 1214 | }; | ||
| 1215 | |||
| 1216 | #endif /* COMPILE_HW_PADLOCK */ | ||
| 1217 | |||
| 1218 | #endif /* !OPENSSL_NO_HW_PADLOCK */ | ||
| 1219 | #endif /* !OPENSSL_NO_HW */ | ||
