diff options
author | guenther <> | 2014-05-20 01:21:52 +0000 |
---|---|---|
committer | guenther <> | 2014-05-20 01:21:52 +0000 |
commit | 3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd (patch) | |
tree | d009a3ee2d810a28ee368be1fe06aaab046f09b1 /src/lib/libcrypto/asn1/a_utf8.c | |
parent | 71e22b3fafe9cd4167c81a927e41cd9a06077d02 (diff) | |
download | openbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.tar.gz openbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.tar.bz2 openbsd-3c12c2aea2c1cdfaab42054ed8643b2bd31e4fdd.zip |
Bring UTF8_{getc,putc} up-to-date: it's been a decade since 5- and 6-byte
encodings and encoding of surrogate pair code points were banned. Add
checks for those, both to those functions and to the code decoding the
BMP and UNIV encodings.
ok miod@
Diffstat (limited to 'src/lib/libcrypto/asn1/a_utf8.c')
-rw-r--r-- | src/lib/libcrypto/asn1/a_utf8.c | 144 |
1 files changed, 53 insertions, 91 deletions
diff --git a/src/lib/libcrypto/asn1/a_utf8.c b/src/lib/libcrypto/asn1/a_utf8.c index c224db4c12..f5e4bec7e0 100644 --- a/src/lib/libcrypto/asn1/a_utf8.c +++ b/src/lib/libcrypto/asn1/a_utf8.c | |||
@@ -59,11 +59,13 @@ | |||
59 | #include <stdio.h> | 59 | #include <stdio.h> |
60 | #include "cryptlib.h" | 60 | #include "cryptlib.h" |
61 | #include <openssl/asn1.h> | 61 | #include <openssl/asn1.h> |
62 | #include "asn1_locl.h" | ||
62 | 63 | ||
63 | 64 | ||
64 | /* UTF8 utilities */ | 65 | /* UTF8 utilities */ |
65 | 66 | ||
66 | /* This parses a UTF8 string one character at a time. It is passed a pointer | 67 | /* |
68 | * This parses a UTF8 string one character at a time. It is passed a pointer | ||
67 | * to the string and the length of the string. It sets 'value' to the value of | 69 | * to the string and the length of the string. It sets 'value' to the value of |
68 | * the current character. It returns the number of characters read or a | 70 | * the current character. It returns the number of characters read or a |
69 | * negative error code: | 71 | * negative error code: |
@@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) | |||
88 | value = *p++ & 0x7f; | 90 | value = *p++ & 0x7f; |
89 | ret = 1; | 91 | ret = 1; |
90 | } else if ((*p & 0xe0) == 0xc0) { | 92 | } else if ((*p & 0xe0) == 0xc0) { |
93 | if (*p < 0xc2) | ||
94 | return -2; | ||
91 | if (len < 2) | 95 | if (len < 2) |
92 | return -1; | 96 | return -1; |
93 | if ((p[1] & 0xc0) != 0x80) | 97 | if ((p[1] & 0xc0) != 0x80) |
@@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) | |||
108 | value |= *p++ & 0x3f; | 112 | value |= *p++ & 0x3f; |
109 | if (value < 0x800) | 113 | if (value < 0x800) |
110 | return -4; | 114 | return -4; |
115 | /* surrogate pair code points are not valid */ | ||
116 | if (value >= 0xd800 && value < 0xe000) | ||
117 | return -2; | ||
111 | ret = 3; | 118 | ret = 3; |
112 | } else if ((*p & 0xf8) == 0xf0) { | 119 | } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { |
113 | if (len < 4) | 120 | if (len < 4) |
114 | return -1; | 121 | return -1; |
115 | if (((p[1] & 0xc0) != 0x80) || | 122 | if (((p[1] & 0xc0) != 0x80) || |
@@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) | |||
122 | value |= *p++ & 0x3f; | 129 | value |= *p++ & 0x3f; |
123 | if (value < 0x10000) | 130 | if (value < 0x10000) |
124 | return -4; | 131 | return -4; |
132 | if (value > UNICODE_MAX) | ||
133 | return -2; | ||
125 | ret = 4; | 134 | ret = 4; |
126 | } else if ((*p & 0xfc) == 0xf8) { | 135 | } else |
127 | if (len < 5) | 136 | return -2; |
128 | return -1; | 137 | *val = value; |
129 | if (((p[1] & 0xc0) != 0x80) || | ||
130 | ((p[2] & 0xc0) != 0x80) || | ||
131 | ((p[3] & 0xc0) != 0x80) || | ||
132 | ((p[4] & 0xc0) != 0x80)) | ||
133 | return -3; | ||
134 | value = ((unsigned long)(*p++ & 0x3)) << 24; | ||
135 | value |= ((unsigned long)(*p++ & 0x3f)) << 18; | ||
136 | value |= ((unsigned long)(*p++ & 0x3f)) << 12; | ||
137 | value |= (*p++ & 0x3f) << 6; | ||
138 | value |= *p++ & 0x3f; | ||
139 | if (value < 0x200000) | ||
140 | return -4; | ||
141 | ret = 5; | ||
142 | } else if ((*p & 0xfe) == 0xfc) { | ||
143 | if (len < 6) | ||
144 | return -1; | ||
145 | if (((p[1] & 0xc0) != 0x80) || | ||
146 | ((p[2] & 0xc0) != 0x80) || | ||
147 | ((p[3] & 0xc0) != 0x80) || | ||
148 | ((p[4] & 0xc0) != 0x80) || | ||
149 | ((p[5] & 0xc0) != 0x80)) | ||
150 | return -3; | ||
151 | value = ((unsigned long)(*p++ & 0x1)) << 30; | ||
152 | value |= ((unsigned long)(*p++ & 0x3f)) << 24; | ||
153 | value |= ((unsigned long)(*p++ & 0x3f)) << 18; | ||
154 | value |= ((unsigned long)(*p++ & 0x3f)) << 12; | ||
155 | value |= (*p++ & 0x3f) << 6; | ||
156 | value |= *p++ & 0x3f; | ||
157 | if (value < 0x4000000) | ||
158 | return -4; | ||
159 | ret = 6; | ||
160 | } else return -2; | ||
161 | *val = value; | ||
162 | return ret; | 138 | return ret; |
163 | } | 139 | } |
164 | 140 | ||
165 | /* This takes a character 'value' and writes the UTF8 encoded value in | 141 | /* This takes a Unicode code point 'value' and writes its UTF-8 encoded form |
166 | * 'str' where 'str' is a buffer containing 'len' characters. Returns | 142 | * in 'str' where 'str' is a buffer of at least length 'len'. If 'str' |
167 | * the number of characters written or -1 if 'len' is too small. 'str' can | 143 | * is NULL, then nothing is written and just the return code is determined. |
168 | * be set to NULL in which case it just returns the number of characters. | 144 | |
169 | * It will need at most 6 characters. | 145 | * Returns less than zero on error: |
146 | * -1 if 'str' is not NULL and 'len' is too small | ||
147 | * -2 if 'value' is an invalid character (surrogate or out-of-range) | ||
148 | * | ||
149 | * Otherwise, returns the number of bytes in 'value's encoded form | ||
150 | * (i.e., the number of bytes written to 'str' when it's not NULL). | ||
151 | * | ||
152 | * It will need at most 4 characters. | ||
170 | */ | 153 | */ |
171 | 154 | ||
172 | int | 155 | int |
173 | UTF8_putc(unsigned char *str, int len, unsigned long value) | 156 | UTF8_putc(unsigned char *str, int len, unsigned long value) |
174 | { | 157 | { |
175 | if (!str) | ||
176 | len = 6; /* Maximum we will need */ | ||
177 | else if (len <= 0) | ||
178 | return -1; | ||
179 | if (value < 0x80) { | 158 | if (value < 0x80) { |
180 | if (str) | 159 | if (str != NULL) { |
181 | *str = (unsigned char)value; | 160 | if (len < 1) |
161 | return -1; | ||
162 | str[0] = (unsigned char)value; | ||
163 | } | ||
182 | return 1; | 164 | return 1; |
183 | } | 165 | } |
184 | if (value < 0x800) { | 166 | if (value < 0x800) { |
185 | if (len < 2) | 167 | if (str != NULL) { |
186 | return -1; | 168 | if (len < 2) |
187 | if (str) { | 169 | return -1; |
188 | *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); | 170 | str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); |
189 | *str = (unsigned char)((value & 0x3f) | 0x80); | 171 | str[1] = (unsigned char)((value & 0x3f) | 0x80); |
190 | } | 172 | } |
191 | return 2; | 173 | return 2; |
192 | } | 174 | } |
193 | if (value < 0x10000) { | 175 | if (value < 0x10000) { |
194 | if (len < 3) | 176 | if (UNICODE_IS_SURROGATE(value)) |
195 | return -1; | 177 | return -2; |
196 | if (str) { | 178 | if (str != NULL) { |
197 | *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); | 179 | if (len < 3) |
198 | *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); | 180 | return -1; |
199 | *str = (unsigned char)((value & 0x3f) | 0x80); | 181 | str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0); |
182 | str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); | ||
183 | str[2] = (unsigned char)((value & 0x3f) | 0x80); | ||
200 | } | 184 | } |
201 | return 3; | 185 | return 3; |
202 | } | 186 | } |
203 | if (value < 0x200000) { | 187 | if (value <= UNICODE_MAX) { |
204 | if (len < 4) | 188 | if (str != NULL) { |
205 | return -1; | 189 | if (len < 4) |
206 | if (str) { | 190 | return -1; |
207 | *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); | 191 | str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0); |
208 | *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); | 192 | str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80); |
209 | *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); | 193 | str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); |
210 | *str = (unsigned char)((value & 0x3f) | 0x80); | 194 | str[3] = (unsigned char)((value & 0x3f) | 0x80); |
211 | } | 195 | } |
212 | return 4; | 196 | return 4; |
213 | } | 197 | } |
214 | if (value < 0x4000000) { | 198 | return -2; |
215 | if (len < 5) | ||
216 | return -1; | ||
217 | if (str) { | ||
218 | *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8); | ||
219 | *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); | ||
220 | *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); | ||
221 | *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); | ||
222 | *str = (unsigned char)((value & 0x3f) | 0x80); | ||
223 | } | ||
224 | return 5; | ||
225 | } | ||
226 | if (len < 6) | ||
227 | return -1; | ||
228 | if (str) { | ||
229 | *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc); | ||
230 | *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80); | ||
231 | *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); | ||
232 | *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); | ||
233 | *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); | ||
234 | *str = (unsigned char)((value & 0x3f) | 0x80); | ||
235 | } | ||
236 | return 6; | ||
237 | } | 199 | } |