summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorguenther <>2014-05-20 01:21:52 +0000
committerguenther <>2014-05-20 01:21:52 +0000
commitb19e47792ae015b7179921c106605e9b7861cb0c (patch)
treed009a3ee2d810a28ee368be1fe06aaab046f09b1 /src/lib
parent4dc1407574fb45b0e8985f2b8149b94dd4704437 (diff)
downloadopenbsd-b19e47792ae015b7179921c106605e9b7861cb0c.tar.gz
openbsd-b19e47792ae015b7179921c106605e9b7861cb0c.tar.bz2
openbsd-b19e47792ae015b7179921c106605e9b7861cb0c.zip
Bring UTF8_{getc,putc} up-to-date: it's been a decade since 5- and 6-byte
encodings and encoding of surrogate pair code points were banned. Add checks for those, both to those functions and to the code decoding the BMP and UNIV encodings. ok miod@
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/asn1/a_mbstr.c18
-rw-r--r--src/lib/libcrypto/asn1/a_strex.c8
-rw-r--r--src/lib/libcrypto/asn1/a_utf8.c144
-rw-r--r--src/lib/libcrypto/asn1/asn1_locl.h11
-rw-r--r--src/lib/libssl/src/crypto/asn1/a_mbstr.c18
-rw-r--r--src/lib/libssl/src/crypto/asn1/a_strex.c8
-rw-r--r--src/lib/libssl/src/crypto/asn1/a_utf8.c144
-rw-r--r--src/lib/libssl/src/crypto/asn1/asn1_locl.h11
8 files changed, 176 insertions, 186 deletions
diff --git a/src/lib/libcrypto/asn1/a_mbstr.c b/src/lib/libcrypto/asn1/a_mbstr.c
index 9945ede2ac..ebc7f2681c 100644
--- a/src/lib/libcrypto/asn1/a_mbstr.c
+++ b/src/lib/libcrypto/asn1/a_mbstr.c
@@ -60,6 +60,7 @@
60#include <ctype.h> 60#include <ctype.h>
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include <openssl/asn1.h> 62#include <openssl/asn1.h>
63#include "asn1_locl.h"
63 64
64static int traverse_string(const unsigned char *p, int len, int inform, 65static int traverse_string(const unsigned char *p, int len, int inform,
65 int (*rfunc)(unsigned long value, void *in), void *arg); 66 int (*rfunc)(unsigned long value, void *in), void *arg);
@@ -232,7 +233,11 @@ ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
232 233
233 case MBSTRING_UTF8: 234 case MBSTRING_UTF8:
234 outlen = 0; 235 outlen = 0;
235 traverse_string(in, len, inform, out_utf8, &outlen); 236 if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) {
237 ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY,
238 ASN1_R_ILLEGAL_CHARACTERS);
239 return -1;
240 }
236 cpyfunc = cpy_utf8; 241 cpyfunc = cpy_utf8;
237 break; 242 break;
238 } 243 }
@@ -267,12 +272,17 @@ traverse_string(const unsigned char *p, int len, int inform,
267 } else if (inform == MBSTRING_BMP) { 272 } else if (inform == MBSTRING_BMP) {
268 value = *p++ << 8; 273 value = *p++ << 8;
269 value |= *p++; 274 value |= *p++;
275 /* BMP is explictly defined to not support surrogates */
276 if (UNICODE_IS_SURROGATE(value))
277 return -1;
270 len -= 2; 278 len -= 2;
271 } else if (inform == MBSTRING_UNIV) { 279 } else if (inform == MBSTRING_UNIV) {
272 value = ((unsigned long)*p++) << 24; 280 value = ((unsigned long)*p++) << 24;
273 value |= ((unsigned long)*p++) << 16; 281 value |= ((unsigned long)*p++) << 16;
274 value |= *p++ << 8; 282 value |= *p++ << 8;
275 value |= *p++; 283 value |= *p++;
284 if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value))
285 return -1;
276 len -= 4; 286 len -= 4;
277 } else { 287 } else {
278 ret = UTF8_getc(p, len, &value); 288 ret = UTF8_getc(p, len, &value);
@@ -310,9 +320,13 @@ static int
310out_utf8(unsigned long value, void *arg) 320out_utf8(unsigned long value, void *arg)
311{ 321{
312 int *outlen; 322 int *outlen;
323 int ret;
313 324
314 outlen = arg; 325 outlen = arg;
315 *outlen += UTF8_putc(NULL, -1, value); 326 ret = UTF8_putc(NULL, -1, value);
327 if (ret < 0)
328 return ret;
329 *outlen += ret;
316 return 1; 330 return 1;
317} 331}
318 332
diff --git a/src/lib/libcrypto/asn1/a_strex.c b/src/lib/libcrypto/asn1/a_strex.c
index 462a4059be..684e933c4f 100644
--- a/src/lib/libcrypto/asn1/a_strex.c
+++ b/src/lib/libcrypto/asn1/a_strex.c
@@ -62,6 +62,7 @@
62#include <openssl/crypto.h> 62#include <openssl/crypto.h>
63#include <openssl/x509.h> 63#include <openssl/x509.h>
64#include <openssl/asn1.h> 64#include <openssl/asn1.h>
65#include "asn1_locl.h"
65 66
66#include "charmap.h" 67#include "charmap.h"
67 68
@@ -215,11 +216,15 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags,
215 c |= ((unsigned long)*p++) << 16; 216 c |= ((unsigned long)*p++) << 16;
216 c |= ((unsigned long)*p++) << 8; 217 c |= ((unsigned long)*p++) << 8;
217 c |= *p++; 218 c |= *p++;
219 if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c))
220 return -1;
218 break; 221 break;
219 222
220 case 2: 223 case 2:
221 c = ((unsigned long)*p++) << 8; 224 c = ((unsigned long)*p++) << 8;
222 c |= *p++; 225 c |= *p++;
226 if (UNICODE_IS_SURROGATE(c))
227 return -1;
223 break; 228 break;
224 229
225 case 1: 230 case 1:
@@ -240,7 +245,10 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags,
240 if (type & BUF_TYPE_CONVUTF8) { 245 if (type & BUF_TYPE_CONVUTF8) {
241 unsigned char utfbuf[6]; 246 unsigned char utfbuf[6];
242 int utflen; 247 int utflen;
248
243 utflen = UTF8_putc(utfbuf, sizeof utfbuf, c); 249 utflen = UTF8_putc(utfbuf, sizeof utfbuf, c);
250 if (utflen < 0)
251 return -1;
244 for (i = 0; i < utflen; i++) { 252 for (i = 0; i < utflen; i++) {
245 /* We don't need to worry about setting orflags correctly 253 /* We don't need to worry about setting orflags correctly
246 * because if utflen==1 its value will be correct anyway 254 * because if utflen==1 its value will be correct anyway
diff --git a/src/lib/libcrypto/asn1/a_utf8.c b/src/lib/libcrypto/asn1/a_utf8.c
index c224db4c12..f5e4bec7e0 100644
--- a/src/lib/libcrypto/asn1/a_utf8.c
+++ b/src/lib/libcrypto/asn1/a_utf8.c
@@ -59,11 +59,13 @@
59#include <stdio.h> 59#include <stdio.h>
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include <openssl/asn1.h> 61#include <openssl/asn1.h>
62#include "asn1_locl.h"
62 63
63 64
64/* UTF8 utilities */ 65/* UTF8 utilities */
65 66
66/* This parses a UTF8 string one character at a time. It is passed a pointer 67/*
68 * This parses a UTF8 string one character at a time. It is passed a pointer
67 * to the string and the length of the string. It sets 'value' to the value of 69 * to the string and the length of the string. It sets 'value' to the value of
68 * the current character. It returns the number of characters read or a 70 * the current character. It returns the number of characters read or a
69 * negative error code: 71 * negative error code:
@@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
88 value = *p++ & 0x7f; 90 value = *p++ & 0x7f;
89 ret = 1; 91 ret = 1;
90 } else if ((*p & 0xe0) == 0xc0) { 92 } else if ((*p & 0xe0) == 0xc0) {
93 if (*p < 0xc2)
94 return -2;
91 if (len < 2) 95 if (len < 2)
92 return -1; 96 return -1;
93 if ((p[1] & 0xc0) != 0x80) 97 if ((p[1] & 0xc0) != 0x80)
@@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
108 value |= *p++ & 0x3f; 112 value |= *p++ & 0x3f;
109 if (value < 0x800) 113 if (value < 0x800)
110 return -4; 114 return -4;
115 /* surrogate pair code points are not valid */
116 if (value >= 0xd800 && value < 0xe000)
117 return -2;
111 ret = 3; 118 ret = 3;
112 } else if ((*p & 0xf8) == 0xf0) { 119 } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
113 if (len < 4) 120 if (len < 4)
114 return -1; 121 return -1;
115 if (((p[1] & 0xc0) != 0x80) || 122 if (((p[1] & 0xc0) != 0x80) ||
@@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
122 value |= *p++ & 0x3f; 129 value |= *p++ & 0x3f;
123 if (value < 0x10000) 130 if (value < 0x10000)
124 return -4; 131 return -4;
132 if (value > UNICODE_MAX)
133 return -2;
125 ret = 4; 134 ret = 4;
126 } else if ((*p & 0xfc) == 0xf8) { 135 } else
127 if (len < 5) 136 return -2;
128 return -1; 137 *val = value;
129 if (((p[1] & 0xc0) != 0x80) ||
130 ((p[2] & 0xc0) != 0x80) ||
131 ((p[3] & 0xc0) != 0x80) ||
132 ((p[4] & 0xc0) != 0x80))
133 return -3;
134 value = ((unsigned long)(*p++ & 0x3)) << 24;
135 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
136 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
137 value |= (*p++ & 0x3f) << 6;
138 value |= *p++ & 0x3f;
139 if (value < 0x200000)
140 return -4;
141 ret = 5;
142 } else if ((*p & 0xfe) == 0xfc) {
143 if (len < 6)
144 return -1;
145 if (((p[1] & 0xc0) != 0x80) ||
146 ((p[2] & 0xc0) != 0x80) ||
147 ((p[3] & 0xc0) != 0x80) ||
148 ((p[4] & 0xc0) != 0x80) ||
149 ((p[5] & 0xc0) != 0x80))
150 return -3;
151 value = ((unsigned long)(*p++ & 0x1)) << 30;
152 value |= ((unsigned long)(*p++ & 0x3f)) << 24;
153 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
154 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
155 value |= (*p++ & 0x3f) << 6;
156 value |= *p++ & 0x3f;
157 if (value < 0x4000000)
158 return -4;
159 ret = 6;
160 } else return -2;
161 *val = value;
162 return ret; 138 return ret;
163} 139}
164 140
165/* This takes a character 'value' and writes the UTF8 encoded value in 141/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
166 * 'str' where 'str' is a buffer containing 'len' characters. Returns 142 * in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
167 * the number of characters written or -1 if 'len' is too small. 'str' can 143 * is NULL, then nothing is written and just the return code is determined.
168 * be set to NULL in which case it just returns the number of characters. 144
169 * It will need at most 6 characters. 145 * Returns less than zero on error:
146 * -1 if 'str' is not NULL and 'len' is too small
147 * -2 if 'value' is an invalid character (surrogate or out-of-range)
148 *
149 * Otherwise, returns the number of bytes in 'value's encoded form
150 * (i.e., the number of bytes written to 'str' when it's not NULL).
151 *
152 * It will need at most 4 characters.
170 */ 153 */
171 154
172int 155int
173UTF8_putc(unsigned char *str, int len, unsigned long value) 156UTF8_putc(unsigned char *str, int len, unsigned long value)
174{ 157{
175 if (!str)
176 len = 6; /* Maximum we will need */
177 else if (len <= 0)
178 return -1;
179 if (value < 0x80) { 158 if (value < 0x80) {
180 if (str) 159 if (str != NULL) {
181 *str = (unsigned char)value; 160 if (len < 1)
161 return -1;
162 str[0] = (unsigned char)value;
163 }
182 return 1; 164 return 1;
183 } 165 }
184 if (value < 0x800) { 166 if (value < 0x800) {
185 if (len < 2) 167 if (str != NULL) {
186 return -1; 168 if (len < 2)
187 if (str) { 169 return -1;
188 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 170 str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
189 *str = (unsigned char)((value & 0x3f) | 0x80); 171 str[1] = (unsigned char)((value & 0x3f) | 0x80);
190 } 172 }
191 return 2; 173 return 2;
192 } 174 }
193 if (value < 0x10000) { 175 if (value < 0x10000) {
194 if (len < 3) 176 if (UNICODE_IS_SURROGATE(value))
195 return -1; 177 return -2;
196 if (str) { 178 if (str != NULL) {
197 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 179 if (len < 3)
198 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 180 return -1;
199 *str = (unsigned char)((value & 0x3f) | 0x80); 181 str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
182 str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
183 str[2] = (unsigned char)((value & 0x3f) | 0x80);
200 } 184 }
201 return 3; 185 return 3;
202 } 186 }
203 if (value < 0x200000) { 187 if (value <= UNICODE_MAX) {
204 if (len < 4) 188 if (str != NULL) {
205 return -1; 189 if (len < 4)
206 if (str) { 190 return -1;
207 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 191 str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
208 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 192 str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
209 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 193 str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
210 *str = (unsigned char)((value & 0x3f) | 0x80); 194 str[3] = (unsigned char)((value & 0x3f) | 0x80);
211 } 195 }
212 return 4; 196 return 4;
213 } 197 }
214 if (value < 0x4000000) { 198 return -2;
215 if (len < 5)
216 return -1;
217 if (str) {
218 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
219 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
220 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
221 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
222 *str = (unsigned char)((value & 0x3f) | 0x80);
223 }
224 return 5;
225 }
226 if (len < 6)
227 return -1;
228 if (str) {
229 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
230 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
231 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
232 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
233 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
234 *str = (unsigned char)((value & 0x3f) | 0x80);
235 }
236 return 6;
237} 199}
diff --git a/src/lib/libcrypto/asn1/asn1_locl.h b/src/lib/libcrypto/asn1/asn1_locl.h
index 9fcf0d9530..2c6087bf02 100644
--- a/src/lib/libcrypto/asn1/asn1_locl.h
+++ b/src/lib/libcrypto/asn1/asn1_locl.h
@@ -143,3 +143,14 @@ struct x509_crl_method_st
143 ASN1_INTEGER *ser, X509_NAME *issuer); 143 ASN1_INTEGER *ser, X509_NAME *issuer);
144 int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk); 144 int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
145 }; 145 };
146
147/*
148 * Unicode codepoint constants
149 */
150#define UNICODE_MAX 0x10FFFF
151#define UNICODE_SURROGATE_MIN 0x00D800
152#define UNICODE_SURROGATE_MAX 0x00DFFF
153
154#define UNICODE_IS_SURROGATE(x) \
155 ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX)
156
diff --git a/src/lib/libssl/src/crypto/asn1/a_mbstr.c b/src/lib/libssl/src/crypto/asn1/a_mbstr.c
index 9945ede2ac..ebc7f2681c 100644
--- a/src/lib/libssl/src/crypto/asn1/a_mbstr.c
+++ b/src/lib/libssl/src/crypto/asn1/a_mbstr.c
@@ -60,6 +60,7 @@
60#include <ctype.h> 60#include <ctype.h>
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include <openssl/asn1.h> 62#include <openssl/asn1.h>
63#include "asn1_locl.h"
63 64
64static int traverse_string(const unsigned char *p, int len, int inform, 65static int traverse_string(const unsigned char *p, int len, int inform,
65 int (*rfunc)(unsigned long value, void *in), void *arg); 66 int (*rfunc)(unsigned long value, void *in), void *arg);
@@ -232,7 +233,11 @@ ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
232 233
233 case MBSTRING_UTF8: 234 case MBSTRING_UTF8:
234 outlen = 0; 235 outlen = 0;
235 traverse_string(in, len, inform, out_utf8, &outlen); 236 if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) {
237 ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY,
238 ASN1_R_ILLEGAL_CHARACTERS);
239 return -1;
240 }
236 cpyfunc = cpy_utf8; 241 cpyfunc = cpy_utf8;
237 break; 242 break;
238 } 243 }
@@ -267,12 +272,17 @@ traverse_string(const unsigned char *p, int len, int inform,
267 } else if (inform == MBSTRING_BMP) { 272 } else if (inform == MBSTRING_BMP) {
268 value = *p++ << 8; 273 value = *p++ << 8;
269 value |= *p++; 274 value |= *p++;
275 /* BMP is explictly defined to not support surrogates */
276 if (UNICODE_IS_SURROGATE(value))
277 return -1;
270 len -= 2; 278 len -= 2;
271 } else if (inform == MBSTRING_UNIV) { 279 } else if (inform == MBSTRING_UNIV) {
272 value = ((unsigned long)*p++) << 24; 280 value = ((unsigned long)*p++) << 24;
273 value |= ((unsigned long)*p++) << 16; 281 value |= ((unsigned long)*p++) << 16;
274 value |= *p++ << 8; 282 value |= *p++ << 8;
275 value |= *p++; 283 value |= *p++;
284 if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value))
285 return -1;
276 len -= 4; 286 len -= 4;
277 } else { 287 } else {
278 ret = UTF8_getc(p, len, &value); 288 ret = UTF8_getc(p, len, &value);
@@ -310,9 +320,13 @@ static int
310out_utf8(unsigned long value, void *arg) 320out_utf8(unsigned long value, void *arg)
311{ 321{
312 int *outlen; 322 int *outlen;
323 int ret;
313 324
314 outlen = arg; 325 outlen = arg;
315 *outlen += UTF8_putc(NULL, -1, value); 326 ret = UTF8_putc(NULL, -1, value);
327 if (ret < 0)
328 return ret;
329 *outlen += ret;
316 return 1; 330 return 1;
317} 331}
318 332
diff --git a/src/lib/libssl/src/crypto/asn1/a_strex.c b/src/lib/libssl/src/crypto/asn1/a_strex.c
index 462a4059be..684e933c4f 100644
--- a/src/lib/libssl/src/crypto/asn1/a_strex.c
+++ b/src/lib/libssl/src/crypto/asn1/a_strex.c
@@ -62,6 +62,7 @@
62#include <openssl/crypto.h> 62#include <openssl/crypto.h>
63#include <openssl/x509.h> 63#include <openssl/x509.h>
64#include <openssl/asn1.h> 64#include <openssl/asn1.h>
65#include "asn1_locl.h"
65 66
66#include "charmap.h" 67#include "charmap.h"
67 68
@@ -215,11 +216,15 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags,
215 c |= ((unsigned long)*p++) << 16; 216 c |= ((unsigned long)*p++) << 16;
216 c |= ((unsigned long)*p++) << 8; 217 c |= ((unsigned long)*p++) << 8;
217 c |= *p++; 218 c |= *p++;
219 if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c))
220 return -1;
218 break; 221 break;
219 222
220 case 2: 223 case 2:
221 c = ((unsigned long)*p++) << 8; 224 c = ((unsigned long)*p++) << 8;
222 c |= *p++; 225 c |= *p++;
226 if (UNICODE_IS_SURROGATE(c))
227 return -1;
223 break; 228 break;
224 229
225 case 1: 230 case 1:
@@ -240,7 +245,10 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags,
240 if (type & BUF_TYPE_CONVUTF8) { 245 if (type & BUF_TYPE_CONVUTF8) {
241 unsigned char utfbuf[6]; 246 unsigned char utfbuf[6];
242 int utflen; 247 int utflen;
248
243 utflen = UTF8_putc(utfbuf, sizeof utfbuf, c); 249 utflen = UTF8_putc(utfbuf, sizeof utfbuf, c);
250 if (utflen < 0)
251 return -1;
244 for (i = 0; i < utflen; i++) { 252 for (i = 0; i < utflen; i++) {
245 /* We don't need to worry about setting orflags correctly 253 /* We don't need to worry about setting orflags correctly
246 * because if utflen==1 its value will be correct anyway 254 * because if utflen==1 its value will be correct anyway
diff --git a/src/lib/libssl/src/crypto/asn1/a_utf8.c b/src/lib/libssl/src/crypto/asn1/a_utf8.c
index c224db4c12..f5e4bec7e0 100644
--- a/src/lib/libssl/src/crypto/asn1/a_utf8.c
+++ b/src/lib/libssl/src/crypto/asn1/a_utf8.c
@@ -59,11 +59,13 @@
59#include <stdio.h> 59#include <stdio.h>
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include <openssl/asn1.h> 61#include <openssl/asn1.h>
62#include "asn1_locl.h"
62 63
63 64
64/* UTF8 utilities */ 65/* UTF8 utilities */
65 66
66/* This parses a UTF8 string one character at a time. It is passed a pointer 67/*
68 * This parses a UTF8 string one character at a time. It is passed a pointer
67 * to the string and the length of the string. It sets 'value' to the value of 69 * to the string and the length of the string. It sets 'value' to the value of
68 * the current character. It returns the number of characters read or a 70 * the current character. It returns the number of characters read or a
69 * negative error code: 71 * negative error code:
@@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
88 value = *p++ & 0x7f; 90 value = *p++ & 0x7f;
89 ret = 1; 91 ret = 1;
90 } else if ((*p & 0xe0) == 0xc0) { 92 } else if ((*p & 0xe0) == 0xc0) {
93 if (*p < 0xc2)
94 return -2;
91 if (len < 2) 95 if (len < 2)
92 return -1; 96 return -1;
93 if ((p[1] & 0xc0) != 0x80) 97 if ((p[1] & 0xc0) != 0x80)
@@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
108 value |= *p++ & 0x3f; 112 value |= *p++ & 0x3f;
109 if (value < 0x800) 113 if (value < 0x800)
110 return -4; 114 return -4;
115 /* surrogate pair code points are not valid */
116 if (value >= 0xd800 && value < 0xe000)
117 return -2;
111 ret = 3; 118 ret = 3;
112 } else if ((*p & 0xf8) == 0xf0) { 119 } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
113 if (len < 4) 120 if (len < 4)
114 return -1; 121 return -1;
115 if (((p[1] & 0xc0) != 0x80) || 122 if (((p[1] & 0xc0) != 0x80) ||
@@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
122 value |= *p++ & 0x3f; 129 value |= *p++ & 0x3f;
123 if (value < 0x10000) 130 if (value < 0x10000)
124 return -4; 131 return -4;
132 if (value > UNICODE_MAX)
133 return -2;
125 ret = 4; 134 ret = 4;
126 } else if ((*p & 0xfc) == 0xf8) { 135 } else
127 if (len < 5) 136 return -2;
128 return -1; 137 *val = value;
129 if (((p[1] & 0xc0) != 0x80) ||
130 ((p[2] & 0xc0) != 0x80) ||
131 ((p[3] & 0xc0) != 0x80) ||
132 ((p[4] & 0xc0) != 0x80))
133 return -3;
134 value = ((unsigned long)(*p++ & 0x3)) << 24;
135 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
136 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
137 value |= (*p++ & 0x3f) << 6;
138 value |= *p++ & 0x3f;
139 if (value < 0x200000)
140 return -4;
141 ret = 5;
142 } else if ((*p & 0xfe) == 0xfc) {
143 if (len < 6)
144 return -1;
145 if (((p[1] & 0xc0) != 0x80) ||
146 ((p[2] & 0xc0) != 0x80) ||
147 ((p[3] & 0xc0) != 0x80) ||
148 ((p[4] & 0xc0) != 0x80) ||
149 ((p[5] & 0xc0) != 0x80))
150 return -3;
151 value = ((unsigned long)(*p++ & 0x1)) << 30;
152 value |= ((unsigned long)(*p++ & 0x3f)) << 24;
153 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
154 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
155 value |= (*p++ & 0x3f) << 6;
156 value |= *p++ & 0x3f;
157 if (value < 0x4000000)
158 return -4;
159 ret = 6;
160 } else return -2;
161 *val = value;
162 return ret; 138 return ret;
163} 139}
164 140
165/* This takes a character 'value' and writes the UTF8 encoded value in 141/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
166 * 'str' where 'str' is a buffer containing 'len' characters. Returns 142 * in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
167 * the number of characters written or -1 if 'len' is too small. 'str' can 143 * is NULL, then nothing is written and just the return code is determined.
168 * be set to NULL in which case it just returns the number of characters. 144
169 * It will need at most 6 characters. 145 * Returns less than zero on error:
146 * -1 if 'str' is not NULL and 'len' is too small
147 * -2 if 'value' is an invalid character (surrogate or out-of-range)
148 *
149 * Otherwise, returns the number of bytes in 'value's encoded form
150 * (i.e., the number of bytes written to 'str' when it's not NULL).
151 *
152 * It will need at most 4 characters.
170 */ 153 */
171 154
172int 155int
173UTF8_putc(unsigned char *str, int len, unsigned long value) 156UTF8_putc(unsigned char *str, int len, unsigned long value)
174{ 157{
175 if (!str)
176 len = 6; /* Maximum we will need */
177 else if (len <= 0)
178 return -1;
179 if (value < 0x80) { 158 if (value < 0x80) {
180 if (str) 159 if (str != NULL) {
181 *str = (unsigned char)value; 160 if (len < 1)
161 return -1;
162 str[0] = (unsigned char)value;
163 }
182 return 1; 164 return 1;
183 } 165 }
184 if (value < 0x800) { 166 if (value < 0x800) {
185 if (len < 2) 167 if (str != NULL) {
186 return -1; 168 if (len < 2)
187 if (str) { 169 return -1;
188 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 170 str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
189 *str = (unsigned char)((value & 0x3f) | 0x80); 171 str[1] = (unsigned char)((value & 0x3f) | 0x80);
190 } 172 }
191 return 2; 173 return 2;
192 } 174 }
193 if (value < 0x10000) { 175 if (value < 0x10000) {
194 if (len < 3) 176 if (UNICODE_IS_SURROGATE(value))
195 return -1; 177 return -2;
196 if (str) { 178 if (str != NULL) {
197 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 179 if (len < 3)
198 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 180 return -1;
199 *str = (unsigned char)((value & 0x3f) | 0x80); 181 str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
182 str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
183 str[2] = (unsigned char)((value & 0x3f) | 0x80);
200 } 184 }
201 return 3; 185 return 3;
202 } 186 }
203 if (value < 0x200000) { 187 if (value <= UNICODE_MAX) {
204 if (len < 4) 188 if (str != NULL) {
205 return -1; 189 if (len < 4)
206 if (str) { 190 return -1;
207 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 191 str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
208 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 192 str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
209 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 193 str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
210 *str = (unsigned char)((value & 0x3f) | 0x80); 194 str[3] = (unsigned char)((value & 0x3f) | 0x80);
211 } 195 }
212 return 4; 196 return 4;
213 } 197 }
214 if (value < 0x4000000) { 198 return -2;
215 if (len < 5)
216 return -1;
217 if (str) {
218 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
219 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
220 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
221 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
222 *str = (unsigned char)((value & 0x3f) | 0x80);
223 }
224 return 5;
225 }
226 if (len < 6)
227 return -1;
228 if (str) {
229 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
230 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
231 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
232 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
233 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
234 *str = (unsigned char)((value & 0x3f) | 0x80);
235 }
236 return 6;
237} 199}
diff --git a/src/lib/libssl/src/crypto/asn1/asn1_locl.h b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
index 9fcf0d9530..2c6087bf02 100644
--- a/src/lib/libssl/src/crypto/asn1/asn1_locl.h
+++ b/src/lib/libssl/src/crypto/asn1/asn1_locl.h
@@ -143,3 +143,14 @@ struct x509_crl_method_st
143 ASN1_INTEGER *ser, X509_NAME *issuer); 143 ASN1_INTEGER *ser, X509_NAME *issuer);
144 int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk); 144 int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
145 }; 145 };
146
147/*
148 * Unicode codepoint constants
149 */
150#define UNICODE_MAX 0x10FFFF
151#define UNICODE_SURROGATE_MIN 0x00D800
152#define UNICODE_SURROGATE_MAX 0x00DFFF
153
154#define UNICODE_IS_SURROGATE(x) \
155 ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX)
156