summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/rc4/rc4.c162
1 files changed, 45 insertions, 117 deletions
diff --git a/src/lib/libcrypto/rc4/rc4.c b/src/lib/libcrypto/rc4/rc4.c
index 9c0a61162d..69b7d0a815 100644
--- a/src/lib/libcrypto/rc4/rc4.c
+++ b/src/lib/libcrypto/rc4/rc4.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: rc4.c,v 1.14 2025/08/14 14:55:43 jsing Exp $ */ 1/* $OpenBSD: rc4.c,v 1.15 2025/08/17 08:04:25 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -57,22 +57,15 @@
57 */ 57 */
58 58
59#include <endian.h> 59#include <endian.h>
60#include <stdint.h>
60 61
61#include <openssl/rc4.h> 62#include <openssl/rc4.h>
62 63
63#include "crypto_arch.h" 64#include "crypto_arch.h"
64 65
65/* RC4 as implemented from a posting from
66 * Newsgroups: sci.crypt
67 * From: sterndark@netcom.com (David Sterndark)
68 * Subject: RC4 Algorithm revealed.
69 * Message-ID: <sternCvKL4B.Hyy@netcom.com>
70 * Date: Wed, 14 Sep 1994 06:35:31 GMT
71 */
72
73#ifdef HAVE_RC4_INTERNAL 66#ifdef HAVE_RC4_INTERNAL
74void rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, 67void rc4_internal(RC4_KEY *key, size_t len, const uint8_t *in,
75 unsigned char *outdata); 68 uint8_t *out);
76 69
77#else 70#else
78static inline RC4_INT 71static inline RC4_INT
@@ -89,9 +82,35 @@ rc4_step(RC4_INT *d, RC4_INT *x, RC4_INT *y)
89 return d[(tx + ty) & 0xff]; 82 return d[(tx + ty) & 0xff];
90} 83}
91 84
85#if BYTE_ORDER == BIG_ENDIAN
86static inline uint64_t
87rc4_chunk(RC4_INT *d, RC4_INT *x, RC4_INT *y)
88{
89 uint64_t chunk = 0;
90 size_t i;
91
92 for (i = 0; i < 8; i++)
93 chunk = chunk << 8 | (uint64_t)rc4_step(d, x, y);
94
95 return chunk;
96}
97
98#else
99static inline uint64_t
100rc4_chunk(RC4_INT *d, RC4_INT *x, RC4_INT *y)
101{
102 uint64_t chunk = 0;
103 size_t i;
104
105 for (i = 0; i < 8; i++)
106 chunk |= (uint64_t)rc4_step(d, x, y) << (i * 8);
107
108 return chunk;
109}
110#endif
111
92static void 112static void
93rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata, 113rc4_internal(RC4_KEY *key, size_t len, const uint8_t *in, uint8_t *out)
94 unsigned char *outdata)
95{ 114{
96 RC4_INT *d, x, y; 115 RC4_INT *d, x, y;
97 size_t i; 116 size_t i;
@@ -100,118 +119,27 @@ rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata,
100 y = key->y; 119 y = key->y;
101 d = key->data; 120 d = key->data;
102 121
103#if defined(RC4_CHUNK) 122 /* Process uint64_t chunks if 8 byte aligned. */
104 /* 123 if ((((size_t)in | (size_t)out) % 8) == 0) {
105 * The original reason for implementing this(*) was the fact that 124 while (len >= 8) {
106 * pre-21164a Alpha CPUs don't have byte load/store instructions 125 *(uint64_t *)out = *(const uint64_t *)in ^ rc4_chunk(d, &x, &y);
107 * and e.g. a byte store has to be done with 64-bit load, shift,
108 * and, or and finally 64-bit store. Peaking data and operating
109 * at natural word size made it possible to reduce amount of
110 * instructions as well as to perform early read-ahead without
111 * suffering from RAW (read-after-write) hazard. This resulted
112 * in ~40%(**) performance improvement on 21064 box with gcc.
113 * But it's not only Alpha users who win here:-) Thanks to the
114 * early-n-wide read-ahead this implementation also exhibits
115 * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
116 * on sizeof(RC4_INT)).
117 *
118 * (*) "this" means code which recognizes the case when input
119 * and output pointers appear to be aligned at natural CPU
120 * word boundary
121 * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
122 * crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
123 *
124 * Caveats.
125 *
126 * - RC4_CHUNK="unsigned long long" should be a #1 choice for
127 * UltraSPARC. Unfortunately gcc generates very slow code
128 * (2.5-3 times slower than one generated by Sun's WorkShop
129 * C) and therefore gcc (at least 2.95 and earlier) should
130 * always be told that RC4_CHUNK="unsigned long".
131 *
132 * <appro@fy.chalmers.se>
133 */
134
135# define RC4_STEP ((RC4_CHUNK)rc4_step(d, &x, &y))
136 126
137 if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) | 127 in += 8;
138 ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) { 128 out += 8;
139 RC4_CHUNK ichunk, otp; 129 len -= 8;
140
141 /*
142 * I reckon we can afford to implement both endian
143 * cases and to decide which way to take at run-time
144 * because the machine code appears to be very compact
145 * and redundant 1-2KB is perfectly tolerable (i.e.
146 * in case the compiler fails to eliminate it:-). By
147 * suggestion from Terrel Larson <terr@terralogic.net>.
148 *
149 * Special notes.
150 *
151 * - compilers (those I've tried) don't seem to have
152 * problems eliminating either the operators guarded
153 * by "if (sizeof(RC4_CHUNK)==8)" or the condition
154 * expressions themselves so I've got 'em to replace
155 * corresponding #ifdefs from the previous version;
156 * - I chose to let the redundant switch cases when
157 * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
158 * before);
159 * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
160 * [LB]ESHFT guards against "shift is out of range"
161 * warnings when sizeof(RC4_CHUNK)!=8
162 *
163 * <appro@fy.chalmers.se>
164 */
165#if BYTE_ORDER == BIG_ENDIAN
166# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
167 for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
168 ichunk = *(RC4_CHUNK *)indata;
169 otp = RC4_STEP << BESHFT(0);
170 otp |= RC4_STEP << BESHFT(1);
171 otp |= RC4_STEP << BESHFT(2);
172 otp |= RC4_STEP << BESHFT(3);
173 if (sizeof(RC4_CHUNK) == 8) {
174 otp |= RC4_STEP << BESHFT(4);
175 otp |= RC4_STEP << BESHFT(5);
176 otp |= RC4_STEP << BESHFT(6);
177 otp |= RC4_STEP << BESHFT(7);
178 }
179 *(RC4_CHUNK *)outdata = otp^ichunk;
180 indata += sizeof(RC4_CHUNK);
181 outdata += sizeof(RC4_CHUNK);
182 }
183#else
184# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
185 for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
186 ichunk = *(RC4_CHUNK *)indata;
187 otp = RC4_STEP;
188 otp |= RC4_STEP << 8;
189 otp |= RC4_STEP << 16;
190 otp |= RC4_STEP << 24;
191 if (sizeof(RC4_CHUNK) == 8) {
192 otp |= RC4_STEP << LESHFT(4);
193 otp |= RC4_STEP << LESHFT(5);
194 otp |= RC4_STEP << LESHFT(6);
195 otp |= RC4_STEP << LESHFT(7);
196 }
197 *(RC4_CHUNK *)outdata = otp ^ ichunk;
198 indata += sizeof(RC4_CHUNK);
199 outdata += sizeof(RC4_CHUNK);
200 } 130 }
201#endif
202 } 131 }
203#endif
204 132
205 while (len >= 8) { 133 while (len >= 8) {
206 for (i = 0; i < 8; i++) 134 for (i = 0; i < 8; i++)
207 outdata[i] = rc4_step(d, &x, &y) ^ indata[i]; 135 out[i] = rc4_step(d, &x, &y) ^ in[i];
208 136
209 indata += 8; 137 in += 8;
210 outdata += 8; 138 out += 8;
211 len -= 8; 139 len -= 8;
212 } 140 }
213 for (i = 0; i < len; i++) 141 for (i = 0; i < len; i++)
214 outdata[i] = rc4_step(d, &x, &y) ^ indata[i]; 142 out[i] = rc4_step(d, &x, &y) ^ in[i];
215 143
216 key->x = x; 144 key->x = x;
217 key->y = y; 145 key->y = y;
@@ -219,11 +147,11 @@ rc4_internal(RC4_KEY *key, size_t len, const unsigned char *indata,
219#endif 147#endif
220 148
221#ifdef HAVE_RC4_SET_KEY_INTERNAL 149#ifdef HAVE_RC4_SET_KEY_INTERNAL
222void rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data); 150void rc4_set_key_internal(RC4_KEY *key, int len, const uint8_t *data);
223 151
224#else 152#else
225static inline void 153static inline void
226rc4_set_key_internal(RC4_KEY *key, int len, const unsigned char *data) 154rc4_set_key_internal(RC4_KEY *key, int len, const uint8_t *data)
227{ 155{
228 RC4_INT *d, tmp; 156 RC4_INT *d, tmp;
229 int idx1, idx2; 157 int idx1, idx2;