summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/asn1/a_utf8.c
diff options
context:
space:
mode:
authorguenther <>2014-05-20 01:21:52 +0000
committerguenther <>2014-05-20 01:21:52 +0000
commit3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd (patch)
treed009a3ee2d810a28ee368be1fe06aaab046f09b1 /src/lib/libcrypto/asn1/a_utf8.c
parent71e22b3fafe9cd4167c81a927e41cd9a06077d02 (diff)
downloadopenbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.tar.gz
openbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.tar.bz2
openbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.zip
Bring UTF8_{getc,putc} up-to-date: it's been a decade since 5- and 6-byte
encodings and encoding of surrogate pair code points were banned. Add checks for those, both to those functions and to the code decoding the BMP and UNIV encodings. ok miod@
Diffstat (limited to 'src/lib/libcrypto/asn1/a_utf8.c')
-rw-r--r--src/lib/libcrypto/asn1/a_utf8.c144
1 files changed, 53 insertions, 91 deletions
diff --git a/src/lib/libcrypto/asn1/a_utf8.c b/src/lib/libcrypto/asn1/a_utf8.c
index c224db4c12..f5e4bec7e0 100644
--- a/src/lib/libcrypto/asn1/a_utf8.c
+++ b/src/lib/libcrypto/asn1/a_utf8.c
@@ -59,11 +59,13 @@
59#include <stdio.h> 59#include <stdio.h>
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include <openssl/asn1.h> 61#include <openssl/asn1.h>
62#include "asn1_locl.h"
62 63
63 64
64/* UTF8 utilities */ 65/* UTF8 utilities */
65 66
66/* This parses a UTF8 string one character at a time. It is passed a pointer 67/*
68 * This parses a UTF8 string one character at a time. It is passed a pointer
67 * to the string and the length of the string. It sets 'value' to the value of 69 * to the string and the length of the string. It sets 'value' to the value of
68 * the current character. It returns the number of characters read or a 70 * the current character. It returns the number of characters read or a
69 * negative error code: 71 * negative error code:
@@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
88 value = *p++ & 0x7f; 90 value = *p++ & 0x7f;
89 ret = 1; 91 ret = 1;
90 } else if ((*p & 0xe0) == 0xc0) { 92 } else if ((*p & 0xe0) == 0xc0) {
93 if (*p < 0xc2)
94 return -2;
91 if (len < 2) 95 if (len < 2)
92 return -1; 96 return -1;
93 if ((p[1] & 0xc0) != 0x80) 97 if ((p[1] & 0xc0) != 0x80)
@@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
108 value |= *p++ & 0x3f; 112 value |= *p++ & 0x3f;
109 if (value < 0x800) 113 if (value < 0x800)
110 return -4; 114 return -4;
115 /* surrogate pair code points are not valid */
116 if (value >= 0xd800 && value < 0xe000)
117 return -2;
111 ret = 3; 118 ret = 3;
112 } else if ((*p & 0xf8) == 0xf0) { 119 } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
113 if (len < 4) 120 if (len < 4)
114 return -1; 121 return -1;
115 if (((p[1] & 0xc0) != 0x80) || 122 if (((p[1] & 0xc0) != 0x80) ||
@@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val)
122 value |= *p++ & 0x3f; 129 value |= *p++ & 0x3f;
123 if (value < 0x10000) 130 if (value < 0x10000)
124 return -4; 131 return -4;
132 if (value > UNICODE_MAX)
133 return -2;
125 ret = 4; 134 ret = 4;
126 } else if ((*p & 0xfc) == 0xf8) { 135 } else
127 if (len < 5) 136 return -2;
128 return -1; 137 *val = value;
129 if (((p[1] & 0xc0) != 0x80) ||
130 ((p[2] & 0xc0) != 0x80) ||
131 ((p[3] & 0xc0) != 0x80) ||
132 ((p[4] & 0xc0) != 0x80))
133 return -3;
134 value = ((unsigned long)(*p++ & 0x3)) << 24;
135 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
136 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
137 value |= (*p++ & 0x3f) << 6;
138 value |= *p++ & 0x3f;
139 if (value < 0x200000)
140 return -4;
141 ret = 5;
142 } else if ((*p & 0xfe) == 0xfc) {
143 if (len < 6)
144 return -1;
145 if (((p[1] & 0xc0) != 0x80) ||
146 ((p[2] & 0xc0) != 0x80) ||
147 ((p[3] & 0xc0) != 0x80) ||
148 ((p[4] & 0xc0) != 0x80) ||
149 ((p[5] & 0xc0) != 0x80))
150 return -3;
151 value = ((unsigned long)(*p++ & 0x1)) << 30;
152 value |= ((unsigned long)(*p++ & 0x3f)) << 24;
153 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
154 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
155 value |= (*p++ & 0x3f) << 6;
156 value |= *p++ & 0x3f;
157 if (value < 0x4000000)
158 return -4;
159 ret = 6;
160 } else return -2;
161 *val = value;
162 return ret; 138 return ret;
163} 139}
164 140
165/* This takes a character 'value' and writes the UTF8 encoded value in 141/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
166 * 'str' where 'str' is a buffer containing 'len' characters. Returns 142 * in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
167 * the number of characters written or -1 if 'len' is too small. 'str' can 143 * is NULL, then nothing is written and just the return code is determined.
168 * be set to NULL in which case it just returns the number of characters. 144
169 * It will need at most 6 characters. 145 * Returns less than zero on error:
146 * -1 if 'str' is not NULL and 'len' is too small
147 * -2 if 'value' is an invalid character (surrogate or out-of-range)
148 *
149 * Otherwise, returns the number of bytes in 'value's encoded form
150 * (i.e., the number of bytes written to 'str' when it's not NULL).
151 *
152 * It will need at most 4 characters.
170 */ 153 */
171 154
172int 155int
173UTF8_putc(unsigned char *str, int len, unsigned long value) 156UTF8_putc(unsigned char *str, int len, unsigned long value)
174{ 157{
175 if (!str)
176 len = 6; /* Maximum we will need */
177 else if (len <= 0)
178 return -1;
179 if (value < 0x80) { 158 if (value < 0x80) {
180 if (str) 159 if (str != NULL) {
181 *str = (unsigned char)value; 160 if (len < 1)
161 return -1;
162 str[0] = (unsigned char)value;
163 }
182 return 1; 164 return 1;
183 } 165 }
184 if (value < 0x800) { 166 if (value < 0x800) {
185 if (len < 2) 167 if (str != NULL) {
186 return -1; 168 if (len < 2)
187 if (str) { 169 return -1;
188 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 170 str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
189 *str = (unsigned char)((value & 0x3f) | 0x80); 171 str[1] = (unsigned char)((value & 0x3f) | 0x80);
190 } 172 }
191 return 2; 173 return 2;
192 } 174 }
193 if (value < 0x10000) { 175 if (value < 0x10000) {
194 if (len < 3) 176 if (UNICODE_IS_SURROGATE(value))
195 return -1; 177 return -2;
196 if (str) { 178 if (str != NULL) {
197 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 179 if (len < 3)
198 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 180 return -1;
199 *str = (unsigned char)((value & 0x3f) | 0x80); 181 str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
182 str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
183 str[2] = (unsigned char)((value & 0x3f) | 0x80);
200 } 184 }
201 return 3; 185 return 3;
202 } 186 }
203 if (value < 0x200000) { 187 if (value <= UNICODE_MAX) {
204 if (len < 4) 188 if (str != NULL) {
205 return -1; 189 if (len < 4)
206 if (str) { 190 return -1;
207 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 191 str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
208 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 192 str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
209 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 193 str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
210 *str = (unsigned char)((value & 0x3f) | 0x80); 194 str[3] = (unsigned char)((value & 0x3f) | 0x80);
211 } 195 }
212 return 4; 196 return 4;
213 } 197 }
214 if (value < 0x4000000) { 198 return -2;
215 if (len < 5)
216 return -1;
217 if (str) {
218 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
219 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
220 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
221 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
222 *str = (unsigned char)((value & 0x3f) | 0x80);
223 }
224 return 5;
225 }
226 if (len < 6)
227 return -1;
228 if (str) {
229 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
230 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
231 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
232 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
233 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
234 *str = (unsigned char)((value & 0x3f) | 0x80);
235 }
236 return 6;
237} 199}