diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/rc4/rc4.c | 162 |
1 files changed, 45 insertions, 117 deletions
diff --git a/src/lib/libcrypto/rc4/rc4.c b/src/lib/libcrypto/rc4/rc4.c index 9c0a61162d..69b7d0a815 100644 --- a/src/lib/libcrypto/rc4/rc4.c +++ b/src/lib/libcrypto/rc4/rc4.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: rc4.c,v 1.14 2025/08/14 14:55:43 jsing Exp $ */ | 1 | /* $OpenBSD: rc4.c,v 1.15 2025/08/17 08:04:25 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -57,22 +57,15 @@ | |||
57 | */ | 57 | */ |
58 | 58 | ||
59 | #include <endian.h> | 59 | #include <endian.h> |
60 | #include <stdint.h> | ||
60 | 61 | ||
61 | #include <openssl/rc4.h> | 62 | #include <openssl/rc4.h> |
62 | 63 | ||
63 | #include "crypto_arch.h" | 64 | #include "crypto_arch.h" |
64 | 65 | ||
65 | /* RC4 as implemented from a posting from | ||
66 | * Newsgroups: sci.crypt | ||
67 | * From: sterndark@netcom.com (David Sterndark) | ||
68 | * Subject: RC4 Algorithm revealed. | ||
69 | * Message-ID: <sternCvKL4B.Hyy@netcom.com> | ||
70 | * Date: Wed, 14 Sep 1994 06:35:31 GMT | ||
71 | */ | ||
72 | |||
73 | #ifdef HAVE_RC4_INTERNAL | 66 | #ifdef HAVE_RC4_INTERNAL |
74 | void rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, | 67 | void rc4_internal(RC4_KEY *key, size_t len, const uint8_t *in, |
75 | unsigned char *outdata); | 68 | uint8_t *out); |
76 | 69 | ||
77 | #else | 70 | #else |
78 | static inline RC4_INT | 71 | static inline RC4_INT |
@@ -89,9 +82,35 @@ rc4_step(RC4_INT *d, RC4_INT *x, RC4_INT *y) | |||
89 | return d[(tx + ty) & 0xff]; | 82 | return d[(tx + ty) & 0xff]; |
90 | } | 83 | } |
91 | 84 | ||
85 | #if BYTE_ORDER == BIG_ENDIAN | ||
86 | static inline uint64_t | ||
87 | rc4_chunk(RC4_INT *d, RC4_INT *x, RC4_INT *y) | ||
88 | { | ||
89 | uint64_t chunk = 0; | ||
90 | size_t i; | ||
91 | |||
92 | for (i = 0; i < 8; i++) | ||
93 | chunk = chunk << 8 | (uint64_t)rc4_step(d, x, y); | ||
94 | |||
95 | return chunk; | ||
96 | } | ||
97 | |||
98 | #else | ||
99 | static inline uint64_t | ||
100 | rc4_chunk(RC4_INT *d, RC4_INT *x, RC4_INT *y) | ||
101 | { | ||
102 | uint64_t chunk = 0; | ||
103 | size_t i; | ||
104 | |||
105 | for (i = 0; i < 8; i++) | ||
106 | chunk |= (uint64_t)rc4_step(d, x, y) << (i * 8); | ||
107 | |||
108 | return chunk; | ||
109 | } | ||
110 | #endif | ||
111 | |||
92 | static void | 112 | static void |
93 | rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, | 113 | rc4_internal(RC4_KEY *key, size_t len, const uint8_t *in, uint8_t *out) |
94 | unsigned char *outdata) | ||
95 | { | 114 | { |
96 | RC4_INT *d, x, y; | 115 | RC4_INT *d, x, y; |
97 | size_t i; | 116 | size_t i; |
@@ -100,118 +119,27 @@ rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, | |||
100 | y = key->y; | 119 | y = key->y; |
101 | d = key->data; | 120 | d = key->data; |
102 | 121 | ||
103 | #if defined(RC4_CHUNK) | 122 | /* Process uint64_t chunks if 8 byte aligned. */ |
104 | /* | 123 | if ((((size_t)in | (size_t)out) % 8) == 0) { |
105 | * The original reason for implementing this(*) was the fact that | 124 | while (len >= 8) { |
106 | * pre-21164a Alpha CPUs don't have byte load/store instructions | 125 | *(uint64_t *)out = *(const uint64_t *)in ^ rc4_chunk(d, &x, &y); |
107 | * and e.g. a byte store has to be done with 64-bit load, shift, | ||
108 | * and, or and finally 64-bit store. Peaking data and operating | ||
109 | * at natural word size made it possible to reduce amount of | ||
110 | * instructions as well as to perform early read-ahead without | ||
111 | * suffering from RAW (read-after-write) hazard. This resulted | ||
112 | * in ~40%(**) performance improvement on 21064 box with gcc. | ||
113 | * But it's not only Alpha users who win here:-) Thanks to the | ||
114 | * early-n-wide read-ahead this implementation also exhibits | ||
115 | * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending | ||
116 | * on sizeof(RC4_INT)). | ||
117 | * | ||
118 | * (*) "this" means code which recognizes the case when input | ||
119 | * and output pointers appear to be aligned at natural CPU | ||
120 | * word boundary | ||
121 | * (**) i.e. according to 'apps/openssl speed rc4' benchmark, | ||
122 | * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... | ||
123 | * | ||
124 | * Caveats. | ||
125 | * | ||
126 | * - RC4_CHUNK="unsigned long long" should be a #1 choice for | ||
127 | * UltraSPARC. Unfortunately gcc generates very slow code | ||
128 | * (2.5-3 times slower than one generated by Sun's WorkShop | ||
129 | * C) and therefore gcc (at least 2.95 and earlier) should | ||
130 | * always be told that RC4_CHUNK="unsigned long". | ||
131 | * | ||
132 | * <appro@fy.chalmers.se> | ||
133 | */ | ||
134 | |||
135 | # define RC4_STEP ((RC4_CHUNK)rc4_step(d, &x, &y)) | ||
136 | 126 | ||
137 | if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) | | 127 | in += 8; |
138 | ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) { | 128 | out += 8; |
139 | RC4_CHUNK ichunk, otp; | 129 | len -= 8; |
140 | |||
141 | /* | ||
142 | * I reckon we can afford to implement both endian | ||
143 | * cases and to decide which way to take at run-time | ||
144 | * because the machine code appears to be very compact | ||
145 | * and redundant 1-2KB is perfectly tolerable (i.e. | ||
146 | * in case the compiler fails to eliminate it:-). By | ||
147 | * suggestion from Terrel Larson <terr@terralogic.net>. | ||
148 | * | ||
149 | * Special notes. | ||
150 | * | ||
151 | * - compilers (those I've tried) don't seem to have | ||
152 | * problems eliminating either the operators guarded | ||
153 | * by "if (sizeof(RC4_CHUNK)==8)" or the condition | ||
154 | * expressions themselves so I've got 'em to replace | ||
155 | * corresponding #ifdefs from the previous version; | ||
156 | * - I chose to let the redundant switch cases when | ||
157 | * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed | ||
158 | * before); | ||
159 | * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in | ||
160 | * [LB]ESHFT guards against "shift is out of range" | ||
161 | * warnings when sizeof(RC4_CHUNK)!=8 | ||
162 | * | ||
163 | * <appro@fy.chalmers.se> | ||
164 | */ | ||
165 | #if BYTE_ORDER == BIG_ENDIAN | ||
166 | # define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
167 | for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { | ||
168 | ichunk = *(RC4_CHUNK *)indata; | ||
169 | otp = RC4_STEP << BESHFT(0); | ||
170 | otp |= RC4_STEP << BESHFT(1); | ||
171 | otp |= RC4_STEP << BESHFT(2); | ||
172 | otp |= RC4_STEP << BESHFT(3); | ||
173 | if (sizeof(RC4_CHUNK) == 8) { | ||
174 | otp |= RC4_STEP << BESHFT(4); | ||
175 | otp |= RC4_STEP << BESHFT(5); | ||
176 | otp |= RC4_STEP << BESHFT(6); | ||
177 | otp |= RC4_STEP << BESHFT(7); | ||
178 | } | ||
179 | *(RC4_CHUNK *)outdata = otp^ichunk; | ||
180 | indata += sizeof(RC4_CHUNK); | ||
181 | outdata += sizeof(RC4_CHUNK); | ||
182 | } | ||
183 | #else | ||
184 | # define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) | ||
185 | for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { | ||
186 | ichunk = *(RC4_CHUNK *)indata; | ||
187 | otp = RC4_STEP; | ||
188 | otp |= RC4_STEP << 8; | ||
189 | otp |= RC4_STEP << 16; | ||
190 | otp |= RC4_STEP << 24; | ||
191 | if (sizeof(RC4_CHUNK) == 8) { | ||
192 | otp |= RC4_STEP << LESHFT(4); | ||
193 | otp |= RC4_STEP << LESHFT(5); | ||
194 | otp |= RC4_STEP << LESHFT(6); | ||
195 | otp |= RC4_STEP << LESHFT(7); | ||
196 | } | ||
197 | *(RC4_CHUNK *)outdata = otp ^ ichunk; | ||
198 | indata += sizeof(RC4_CHUNK); | ||
199 | outdata += sizeof(RC4_CHUNK); | ||
200 | } | 130 | } |
201 | #endif | ||
202 | } | 131 | } |
203 | #endif | ||
204 | 132 | ||
205 | while (len >= 8) { | 133 | while (len >= 8) { |
206 | for (i = 0; i < 8; i++) | 134 | for (i = 0; i < 8; i++) |
207 | outdata[i] = rc4_step(d, &x, &y) ^ indata[i]; | 135 | out[i] = rc4_step(d, &x, &y) ^ in[i]; |
208 | 136 | ||
209 | indata += 8; | 137 | in += 8; |
210 | outdata += 8; | 138 | out += 8; |
211 | len -= 8; | 139 | len -= 8; |
212 | } | 140 | } |
213 | for (i = 0; i < len; i++) | 141 | for (i = 0; i < len; i++) |
214 | outdata[i] = rc4_step(d, &x, &y) ^ indata[i]; | 142 | out[i] = rc4_step(d, &x, &y) ^ in[i]; |
215 | 143 | ||
216 | key->x = x; | 144 | key->x = x; |
217 | key->y = y; | 145 | key->y = y; |
@@ -219,11 +147,11 @@ rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, | |||
219 | #endif | 147 | #endif |
220 | 148 | ||
221 | #ifdef HAVE_RC4_SET_KEY_INTERNAL | 149 | #ifdef HAVE_RC4_SET_KEY_INTERNAL |
222 | void rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data); | 150 | void rc4_set_key_internal(RC4_KEY *key, int len, const uint8_t *data); |
223 | 151 | ||
224 | #else | 152 | #else |
225 | static inline void | 153 | static inline void |
226 | rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data) | 154 | rc4_set_key_internal(RC4_KEY *key, int len, const uint8_t *data) |
227 | { | 155 | { |
228 | RC4_INT *d, tmp; | 156 | RC4_INT *d, tmp; |
229 | int idx1, idx2; | 157 | int idx1, idx2; |