diff options
author | guenther <> | 2014-05-18 22:04:14 +0000 |
---|---|---|
committer | guenther <> | 2014-05-18 22:04:14 +0000 |
commit | bf82f8039d52950f75f37cccfd9ea3a07a98a9bc (patch) | |
tree | e81c9ec1edaa5ea8b924ace9921c2fa9c389c5a2 /src | |
parent | 68e33037c2e3ee88c2f07e6259cb0ec3c5a5e54f (diff) | |
download | openbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.tar.gz openbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.tar.bz2 openbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.zip |
Add regression test for UTF8_{getc,putc}()
Diffstat (limited to 'src')
-rw-r--r-- | src/regress/lib/libcrypto/Makefile | 5 | ||||
-rw-r--r-- | src/regress/lib/libcrypto/utf8/Makefile | 7 | ||||
-rw-r--r-- | src/regress/lib/libcrypto/utf8/utf8test.c | 307 |
3 files changed, 317 insertions, 2 deletions
diff --git a/src/regress/lib/libcrypto/Makefile b/src/regress/lib/libcrypto/Makefile index 6cf7191cd7..54fcae7c91 100644 --- a/src/regress/lib/libcrypto/Makefile +++ b/src/regress/lib/libcrypto/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | # $OpenBSD: Makefile,v 1.9 2014/05/14 14:46:35 jsing Exp $ | 1 | # $OpenBSD: Makefile,v 1.10 2014/05/18 22:04:14 guenther Exp $ |
2 | 2 | ||
3 | SUBDIR= \ | 3 | SUBDIR= \ |
4 | aeswrap \ | 4 | aeswrap \ |
@@ -31,7 +31,8 @@ SUBDIR= \ | |||
31 | rmd \ | 31 | rmd \ |
32 | sha \ | 32 | sha \ |
33 | sha1 \ | 33 | sha1 \ |
34 | sha2 | 34 | sha2 \ |
35 | utf8 | ||
35 | 36 | ||
36 | install: | 37 | install: |
37 | 38 | ||
diff --git a/src/regress/lib/libcrypto/utf8/Makefile b/src/regress/lib/libcrypto/utf8/Makefile new file mode 100644 index 0000000000..4940e60050 --- /dev/null +++ b/src/regress/lib/libcrypto/utf8/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # $OpenBSD: Makefile,v 1.1 2014/05/18 22:04:14 guenther Exp $ | ||
2 | |||
3 | PROG= utf8test | ||
4 | LDADD= -lcrypto | ||
5 | DPADD= ${LIBCRYPTO} | ||
6 | |||
7 | .include <bsd.regress.mk> | ||
diff --git a/src/regress/lib/libcrypto/utf8/utf8test.c b/src/regress/lib/libcrypto/utf8/utf8test.c new file mode 100644 index 0000000000..5b737a5201 --- /dev/null +++ b/src/regress/lib/libcrypto/utf8/utf8test.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Philip Guenther <guenther@openbsd.org> | ||
3 | * | ||
4 | * Permission to use, copy, modify, and distribute this software for any | ||
5 | * purpose with or without fee is hereby granted, provided that the above | ||
6 | * copyright notice and this permission notice appear in all copies. | ||
7 | * | ||
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * A mostly exhaustive test of UTF-8 decoder and encoder | ||
19 | */ | ||
20 | |||
21 | #include <stdio.h> | ||
22 | #include <string.h> | ||
23 | #include <err.h> | ||
24 | |||
25 | #include <openssl/asn1.h> | ||
26 | |||
27 | #define UNCHANGED 0xfedcba98 | ||
28 | |||
29 | #define ASSERT(x) \ | ||
30 | do { \ | ||
31 | if (!(x)) \ | ||
32 | errx(1, "test failed at line %d: %s", \ | ||
33 | __LINE__, #x); \ | ||
34 | } while (0) | ||
35 | |||
36 | int | ||
37 | main(void) | ||
38 | { | ||
39 | unsigned char testbuf[] = "012345"; | ||
40 | const unsigned char zerobuf[sizeof testbuf] = { 0 }; | ||
41 | unsigned long value; | ||
42 | int i, j, k, l, ret; | ||
43 | |||
44 | /* | ||
45 | * First, verify UTF8_getc() | ||
46 | */ | ||
47 | value = UNCHANGED; | ||
48 | ret = UTF8_getc(testbuf, 0, &value); | ||
49 | ASSERT(ret == 0); | ||
50 | ASSERT(value == UNCHANGED); | ||
51 | |||
52 | /* check all valid single-byte chars */ | ||
53 | for (i = 0; i < 0x80; i++) { | ||
54 | testbuf[0] = i; | ||
55 | ret = UTF8_getc(testbuf, 1, &value); | ||
56 | ASSERT(ret == 1); | ||
57 | ASSERT(value == i); | ||
58 | |||
59 | ret = UTF8_getc(testbuf, 2, &value); | ||
60 | ASSERT(ret == 1); | ||
61 | ASSERT(value == i); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Verify failure on all invalid initial bytes: | ||
66 | * 0x80 - 0xBF following bytes only | ||
67 | * 0xC0 - 0xC1 used to be in non-shortest forms | ||
68 | * 0xF5 - 0xFD used to be initial for 5 and 6 byte sequences | ||
69 | * 0xFE - 0xFF have never been valid in utf-8 | ||
70 | */ | ||
71 | for (i = 0x80; i < 0xC2; i++) { | ||
72 | value = UNCHANGED; | ||
73 | testbuf[0] = i; | ||
74 | ret = UTF8_getc(testbuf, 1, &value); | ||
75 | ASSERT(ret == -2); | ||
76 | ASSERT(value == UNCHANGED); | ||
77 | } | ||
78 | for (i = 0xF5; i < 0x100; i++) { | ||
79 | value = UNCHANGED; | ||
80 | testbuf[0] = i; | ||
81 | ret = UTF8_getc(testbuf, 1, &value); | ||
82 | ASSERT(ret == -2); | ||
83 | ASSERT(value == UNCHANGED); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Verify handling of all two-byte sequences | ||
88 | */ | ||
89 | for (i = 0xC2; i < 0xE0; i++) { | ||
90 | testbuf[0] = i; | ||
91 | |||
92 | for (j = 0; j < 0x100; j++) { | ||
93 | testbuf[1] = j; | ||
94 | |||
95 | value = UNCHANGED; | ||
96 | ret = UTF8_getc(testbuf, 1, &value); | ||
97 | ASSERT(ret == -1); | ||
98 | ASSERT(value == UNCHANGED); | ||
99 | |||
100 | ret = UTF8_getc(testbuf, 2, &value); | ||
101 | |||
102 | /* outside range of trailing bytes */ | ||
103 | if (j < 0x80 || j > 0xBF) { | ||
104 | ASSERT(ret == -3); | ||
105 | ASSERT(value == UNCHANGED); | ||
106 | continue; | ||
107 | } | ||
108 | |||
109 | /* valid */ | ||
110 | ASSERT(ret == 2); | ||
111 | ASSERT((value & 0x3F) == (j & 0x3F)); | ||
112 | ASSERT(value >> 6 == (i & 0x1F)); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | #if 0 | ||
117 | /* | ||
118 | * Verify handling of all three-byte sequences | ||
119 | */ | ||
120 | for (i = 0xE0; i < 0xF0; i++) { | ||
121 | testbuf[0] = i; | ||
122 | |||
123 | for (j = 0; j < 0x100; j++) { | ||
124 | testbuf[1] = j; | ||
125 | |||
126 | for (k = 0; k < 0x100; k++) { | ||
127 | testbuf[2] = k; | ||
128 | |||
129 | value = UNCHANGED; | ||
130 | ret = UTF8_getc(testbuf, 2, &value); | ||
131 | ASSERT(ret == -1); | ||
132 | ASSERT(value == UNCHANGED); | ||
133 | |||
134 | ret = UTF8_getc(testbuf, 3, &value); | ||
135 | |||
136 | /* outside range of trailing bytes */ | ||
137 | if (j < 0x80 || j > 0xBF || | ||
138 | k < 0x80 || k > 0xBF) { | ||
139 | ASSERT(ret == -3); | ||
140 | ASSERT(value == UNCHANGED); | ||
141 | continue; | ||
142 | } | ||
143 | |||
144 | /* non-shortest form */ | ||
145 | if (i == 0xE0 && j < 0xA0) { | ||
146 | ASSERT(ret == -4); | ||
147 | ASSERT(value == UNCHANGED); | ||
148 | continue; | ||
149 | } | ||
150 | |||
151 | /* surrogate pair code point */ | ||
152 | if (i == 0xED && j > 0x9F) { | ||
153 | ASSERT(ret == -2); | ||
154 | ASSERT(value == UNCHANGED); | ||
155 | continue; | ||
156 | } | ||
157 | |||
158 | ASSERT(ret == 3); | ||
159 | ASSERT((value & 0x3F) == (k & 0x3F)); | ||
160 | ASSERT(((value >> 6) & 0x3F) == (j & 0x3F)); | ||
161 | ASSERT(value >> 12 == (i & 0x0F)); | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Verify handling of all four-byte sequences | ||
168 | */ | ||
169 | for (i = 0xF0; i < 0xF5; i++) { | ||
170 | testbuf[0] = i; | ||
171 | |||
172 | for (j = 0; j < 0x100; j++) { | ||
173 | testbuf[1] = j; | ||
174 | |||
175 | for (k = 0; k < 0x100; k++) { | ||
176 | testbuf[2] = k; | ||
177 | |||
178 | for (l = 0; l < 0x100; l++) { | ||
179 | testbuf[3] = l; | ||
180 | |||
181 | value = UNCHANGED; | ||
182 | ret = UTF8_getc(testbuf, 3, &value); | ||
183 | ASSERT(ret == -1); | ||
184 | ASSERT(value == UNCHANGED); | ||
185 | |||
186 | ret = UTF8_getc(testbuf, 4, &value); | ||
187 | |||
188 | /* outside range of trailing bytes */ | ||
189 | if (j < 0x80 || j > 0xBF || | ||
190 | k < 0x80 || k > 0xBF || | ||
191 | l < 0x80 || l > 0xBF) { | ||
192 | ASSERT(ret == -3); | ||
193 | ASSERT(value == UNCHANGED); | ||
194 | continue; | ||
195 | } | ||
196 | |||
197 | /* non-shortest form */ | ||
198 | if (i == 0xF0 && j < 0x90) { | ||
199 | ASSERT(ret == -4); | ||
200 | ASSERT(value == UNCHANGED); | ||
201 | continue; | ||
202 | } | ||
203 | |||
204 | /* beyond end of UCS range */ | ||
205 | if (i == 0xF4 && j > 0x8F) { | ||
206 | ASSERT(ret == -2); | ||
207 | ASSERT(value == UNCHANGED); | ||
208 | continue; | ||
209 | } | ||
210 | |||
211 | ASSERT(ret == 4); | ||
212 | ASSERT((value & 0x3F) == (l & 0x3F)); | ||
213 | ASSERT(((value >> 6) & 0x3F) == | ||
214 | (k & 0x3F)); | ||
215 | ASSERT(((value >> 12) & 0x3F) == | ||
216 | (j & 0x3F)); | ||
217 | ASSERT(value >> 18 == (i & 0x07)); | ||
218 | } | ||
219 | } | ||
220 | } | ||
221 | } | ||
222 | #endif | ||
223 | |||
224 | |||
225 | /* | ||
226 | * Next, verify UTF8_putc() | ||
227 | */ | ||
228 | memset(testbuf, 0, sizeof testbuf); | ||
229 | |||
230 | /* single-byte sequences */ | ||
231 | for (i = 0; i < 0x80; i++) { | ||
232 | ret = UTF8_putc(NULL, 0, i); | ||
233 | ASSERT(ret == 1); | ||
234 | |||
235 | testbuf[0] = 0; | ||
236 | ret = UTF8_putc(testbuf, 0, i); | ||
237 | ASSERT(ret == -1); | ||
238 | ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); | ||
239 | |||
240 | ret = UTF8_putc(testbuf, 1, i); | ||
241 | ASSERT(ret == 1); | ||
242 | ASSERT(testbuf[0] == i); | ||
243 | ASSERT(memcmp(testbuf+1, zerobuf, sizeof(testbuf)-1) == 0); | ||
244 | } | ||
245 | |||
246 | /* two-byte sequences */ | ||
247 | for (i = 0x80; i < 0x800; i++) { | ||
248 | ret = UTF8_putc(NULL, 0, i); | ||
249 | ASSERT(ret == 2); | ||
250 | |||
251 | testbuf[0] = testbuf[1] = 0; | ||
252 | ret = UTF8_putc(testbuf, 1, i); | ||
253 | ASSERT(ret == -1); | ||
254 | ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); | ||
255 | |||
256 | ret = UTF8_putc(testbuf, 2, i); | ||
257 | ASSERT(ret == 2); | ||
258 | ASSERT(memcmp(testbuf+2, zerobuf, sizeof(testbuf)-2) == 0); | ||
259 | ret = UTF8_getc(testbuf, 2, &value); | ||
260 | ASSERT(ret == 2); | ||
261 | ASSERT(value == i); | ||
262 | } | ||
263 | |||
264 | /* three-byte sequences */ | ||
265 | for (i = 0x800; i < 0x10000; i++) { | ||
266 | /* XXX skip surrogate pair code points */ | ||
267 | if (i >= 0xD800 && i < 0xE000) | ||
268 | continue; | ||
269 | |||
270 | ret = UTF8_putc(NULL, 0, i); | ||
271 | ASSERT(ret == 3); | ||
272 | |||
273 | testbuf[0] = testbuf[1] = testbuf[2] = 0; | ||
274 | ret = UTF8_putc(testbuf, 2, i); | ||
275 | ASSERT(ret == -1); | ||
276 | ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); | ||
277 | |||
278 | ret = UTF8_putc(testbuf, 3, i); | ||
279 | ASSERT(ret == 3); | ||
280 | ASSERT(memcmp(testbuf+3, zerobuf, sizeof(testbuf)-3) == 0); | ||
281 | ret = UTF8_getc(testbuf, 3, &value); | ||
282 | ASSERT(ret == 3); | ||
283 | ASSERT(value == i); | ||
284 | } | ||
285 | |||
286 | /* four-byte sequences */ | ||
287 | for (i = 0x10000; i < 0x110000; i++) { | ||
288 | ret = UTF8_putc(NULL, 0, i); | ||
289 | ASSERT(ret == 4); | ||
290 | |||
291 | testbuf[0] = testbuf[1] = testbuf[2] = testbuf[3] = 0; | ||
292 | ret = UTF8_putc(testbuf, 3, i); | ||
293 | ASSERT(ret == -1); | ||
294 | ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); | ||
295 | |||
296 | ret = UTF8_putc(testbuf, 4, i); | ||
297 | ASSERT(ret == 4); | ||
298 | ASSERT(memcmp(testbuf+4, zerobuf, sizeof(testbuf)-4) == 0); | ||
299 | ret = UTF8_getc(testbuf, 4, &value); | ||
300 | ASSERT(ret == 4); | ||
301 | ASSERT(value == i); | ||
302 | } | ||
303 | |||
304 | /* XXX What should UTF8_putc() do with values > 0x10FFFF */ | ||
305 | |||
306 | return 0; | ||
307 | } | ||