summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorguenther <>2014-05-18 22:04:14 +0000
committerguenther <>2014-05-18 22:04:14 +0000
commitbf82f8039d52950f75f37cccfd9ea3a07a98a9bc (patch)
treee81c9ec1edaa5ea8b924ace9921c2fa9c389c5a2 /src
parent68e33037c2e3ee88c2f07e6259cb0ec3c5a5e54f (diff)
downloadopenbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.tar.gz
openbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.tar.bz2
openbsd-bf82f8039d52950f75f37cccfd9ea3a07a98a9bc.zip
Add regression test for UTF8_{getc,putc}()
Diffstat (limited to 'src')
-rw-r--r--src/regress/lib/libcrypto/Makefile5
-rw-r--r--src/regress/lib/libcrypto/utf8/Makefile7
-rw-r--r--src/regress/lib/libcrypto/utf8/utf8test.c307
3 files changed, 317 insertions, 2 deletions
diff --git a/src/regress/lib/libcrypto/Makefile b/src/regress/lib/libcrypto/Makefile
index 6cf7191cd7..54fcae7c91 100644
--- a/src/regress/lib/libcrypto/Makefile
+++ b/src/regress/lib/libcrypto/Makefile
@@ -1,4 +1,4 @@
1# $OpenBSD: Makefile,v 1.9 2014/05/14 14:46:35 jsing Exp $ 1# $OpenBSD: Makefile,v 1.10 2014/05/18 22:04:14 guenther Exp $
2 2
3SUBDIR= \ 3SUBDIR= \
4 aeswrap \ 4 aeswrap \
@@ -31,7 +31,8 @@ SUBDIR= \
31 rmd \ 31 rmd \
32 sha \ 32 sha \
33 sha1 \ 33 sha1 \
34 sha2 34 sha2 \
35 utf8
35 36
36install: 37install:
37 38
diff --git a/src/regress/lib/libcrypto/utf8/Makefile b/src/regress/lib/libcrypto/utf8/Makefile
new file mode 100644
index 0000000000..4940e60050
--- /dev/null
+++ b/src/regress/lib/libcrypto/utf8/Makefile
@@ -0,0 +1,7 @@
1# $OpenBSD: Makefile,v 1.1 2014/05/18 22:04:14 guenther Exp $
2
3PROG= utf8test
4LDADD= -lcrypto
5DPADD= ${LIBCRYPTO}
6
7.include <bsd.regress.mk>
diff --git a/src/regress/lib/libcrypto/utf8/utf8test.c b/src/regress/lib/libcrypto/utf8/utf8test.c
new file mode 100644
index 0000000000..5b737a5201
--- /dev/null
+++ b/src/regress/lib/libcrypto/utf8/utf8test.c
@@ -0,0 +1,307 @@
1/*
2 * Copyright (c) 2014 Philip Guenther <guenther@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*
18 * A mostly exhaustive test of UTF-8 decoder and encoder
19 */
20
21#include <stdio.h>
22#include <string.h>
23#include <err.h>
24
25#include <openssl/asn1.h>
26
27#define UNCHANGED 0xfedcba98
28
29#define ASSERT(x) \
30 do { \
31 if (!(x)) \
32 errx(1, "test failed at line %d: %s", \
33 __LINE__, #x); \
34 } while (0)
35
36int
37main(void)
38{
39 unsigned char testbuf[] = "012345";
40 const unsigned char zerobuf[sizeof testbuf] = { 0 };
41 unsigned long value;
42 int i, j, k, l, ret;
43
44 /*
45 * First, verify UTF8_getc()
46 */
47 value = UNCHANGED;
48 ret = UTF8_getc(testbuf, 0, &value);
49 ASSERT(ret == 0);
50 ASSERT(value == UNCHANGED);
51
52 /* check all valid single-byte chars */
53 for (i = 0; i < 0x80; i++) {
54 testbuf[0] = i;
55 ret = UTF8_getc(testbuf, 1, &value);
56 ASSERT(ret == 1);
57 ASSERT(value == i);
58
59 ret = UTF8_getc(testbuf, 2, &value);
60 ASSERT(ret == 1);
61 ASSERT(value == i);
62 }
63
64 /*
65 * Verify failure on all invalid initial bytes:
66 * 0x80 - 0xBF following bytes only
67 * 0xC0 - 0xC1 used to be in non-shortest forms
68 * 0xF5 - 0xFD used to be initial for 5 and 6 byte sequences
69 * 0xFE - 0xFF have never been valid in utf-8
70 */
71 for (i = 0x80; i < 0xC2; i++) {
72 value = UNCHANGED;
73 testbuf[0] = i;
74 ret = UTF8_getc(testbuf, 1, &value);
75 ASSERT(ret == -2);
76 ASSERT(value == UNCHANGED);
77 }
78 for (i = 0xF5; i < 0x100; i++) {
79 value = UNCHANGED;
80 testbuf[0] = i;
81 ret = UTF8_getc(testbuf, 1, &value);
82 ASSERT(ret == -2);
83 ASSERT(value == UNCHANGED);
84 }
85
86 /*
87 * Verify handling of all two-byte sequences
88 */
89 for (i = 0xC2; i < 0xE0; i++) {
90 testbuf[0] = i;
91
92 for (j = 0; j < 0x100; j++) {
93 testbuf[1] = j;
94
95 value = UNCHANGED;
96 ret = UTF8_getc(testbuf, 1, &value);
97 ASSERT(ret == -1);
98 ASSERT(value == UNCHANGED);
99
100 ret = UTF8_getc(testbuf, 2, &value);
101
102 /* outside range of trailing bytes */
103 if (j < 0x80 || j > 0xBF) {
104 ASSERT(ret == -3);
105 ASSERT(value == UNCHANGED);
106 continue;
107 }
108
109 /* valid */
110 ASSERT(ret == 2);
111 ASSERT((value & 0x3F) == (j & 0x3F));
112 ASSERT(value >> 6 == (i & 0x1F));
113 }
114 }
115
116#if 0
117 /*
118 * Verify handling of all three-byte sequences
119 */
120 for (i = 0xE0; i < 0xF0; i++) {
121 testbuf[0] = i;
122
123 for (j = 0; j < 0x100; j++) {
124 testbuf[1] = j;
125
126 for (k = 0; k < 0x100; k++) {
127 testbuf[2] = k;
128
129 value = UNCHANGED;
130 ret = UTF8_getc(testbuf, 2, &value);
131 ASSERT(ret == -1);
132 ASSERT(value == UNCHANGED);
133
134 ret = UTF8_getc(testbuf, 3, &value);
135
136 /* outside range of trailing bytes */
137 if (j < 0x80 || j > 0xBF ||
138 k < 0x80 || k > 0xBF) {
139 ASSERT(ret == -3);
140 ASSERT(value == UNCHANGED);
141 continue;
142 }
143
144 /* non-shortest form */
145 if (i == 0xE0 && j < 0xA0) {
146 ASSERT(ret == -4);
147 ASSERT(value == UNCHANGED);
148 continue;
149 }
150
151 /* surrogate pair code point */
152 if (i == 0xED && j > 0x9F) {
153 ASSERT(ret == -2);
154 ASSERT(value == UNCHANGED);
155 continue;
156 }
157
158 ASSERT(ret == 3);
159 ASSERT((value & 0x3F) == (k & 0x3F));
160 ASSERT(((value >> 6) & 0x3F) == (j & 0x3F));
161 ASSERT(value >> 12 == (i & 0x0F));
162 }
163 }
164 }
165
166 /*
167 * Verify handling of all four-byte sequences
168 */
169 for (i = 0xF0; i < 0xF5; i++) {
170 testbuf[0] = i;
171
172 for (j = 0; j < 0x100; j++) {
173 testbuf[1] = j;
174
175 for (k = 0; k < 0x100; k++) {
176 testbuf[2] = k;
177
178 for (l = 0; l < 0x100; l++) {
179 testbuf[3] = l;
180
181 value = UNCHANGED;
182 ret = UTF8_getc(testbuf, 3, &value);
183 ASSERT(ret == -1);
184 ASSERT(value == UNCHANGED);
185
186 ret = UTF8_getc(testbuf, 4, &value);
187
188 /* outside range of trailing bytes */
189 if (j < 0x80 || j > 0xBF ||
190 k < 0x80 || k > 0xBF ||
191 l < 0x80 || l > 0xBF) {
192 ASSERT(ret == -3);
193 ASSERT(value == UNCHANGED);
194 continue;
195 }
196
197 /* non-shortest form */
198 if (i == 0xF0 && j < 0x90) {
199 ASSERT(ret == -4);
200 ASSERT(value == UNCHANGED);
201 continue;
202 }
203
204 /* beyond end of UCS range */
205 if (i == 0xF4 && j > 0x8F) {
206 ASSERT(ret == -2);
207 ASSERT(value == UNCHANGED);
208 continue;
209 }
210
211 ASSERT(ret == 4);
212 ASSERT((value & 0x3F) == (l & 0x3F));
213 ASSERT(((value >> 6) & 0x3F) ==
214 (k & 0x3F));
215 ASSERT(((value >> 12) & 0x3F) ==
216 (j & 0x3F));
217 ASSERT(value >> 18 == (i & 0x07));
218 }
219 }
220 }
221 }
222#endif
223
224
225 /*
226 * Next, verify UTF8_putc()
227 */
228 memset(testbuf, 0, sizeof testbuf);
229
230 /* single-byte sequences */
231 for (i = 0; i < 0x80; i++) {
232 ret = UTF8_putc(NULL, 0, i);
233 ASSERT(ret == 1);
234
235 testbuf[0] = 0;
236 ret = UTF8_putc(testbuf, 0, i);
237 ASSERT(ret == -1);
238 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
239
240 ret = UTF8_putc(testbuf, 1, i);
241 ASSERT(ret == 1);
242 ASSERT(testbuf[0] == i);
243 ASSERT(memcmp(testbuf+1, zerobuf, sizeof(testbuf)-1) == 0);
244 }
245
246 /* two-byte sequences */
247 for (i = 0x80; i < 0x800; i++) {
248 ret = UTF8_putc(NULL, 0, i);
249 ASSERT(ret == 2);
250
251 testbuf[0] = testbuf[1] = 0;
252 ret = UTF8_putc(testbuf, 1, i);
253 ASSERT(ret == -1);
254 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
255
256 ret = UTF8_putc(testbuf, 2, i);
257 ASSERT(ret == 2);
258 ASSERT(memcmp(testbuf+2, zerobuf, sizeof(testbuf)-2) == 0);
259 ret = UTF8_getc(testbuf, 2, &value);
260 ASSERT(ret == 2);
261 ASSERT(value == i);
262 }
263
264 /* three-byte sequences */
265 for (i = 0x800; i < 0x10000; i++) {
266 /* XXX skip surrogate pair code points */
267 if (i >= 0xD800 && i < 0xE000)
268 continue;
269
270 ret = UTF8_putc(NULL, 0, i);
271 ASSERT(ret == 3);
272
273 testbuf[0] = testbuf[1] = testbuf[2] = 0;
274 ret = UTF8_putc(testbuf, 2, i);
275 ASSERT(ret == -1);
276 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
277
278 ret = UTF8_putc(testbuf, 3, i);
279 ASSERT(ret == 3);
280 ASSERT(memcmp(testbuf+3, zerobuf, sizeof(testbuf)-3) == 0);
281 ret = UTF8_getc(testbuf, 3, &value);
282 ASSERT(ret == 3);
283 ASSERT(value == i);
284 }
285
286 /* four-byte sequences */
287 for (i = 0x10000; i < 0x110000; i++) {
288 ret = UTF8_putc(NULL, 0, i);
289 ASSERT(ret == 4);
290
291 testbuf[0] = testbuf[1] = testbuf[2] = testbuf[3] = 0;
292 ret = UTF8_putc(testbuf, 3, i);
293 ASSERT(ret == -1);
294 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
295
296 ret = UTF8_putc(testbuf, 4, i);
297 ASSERT(ret == 4);
298 ASSERT(memcmp(testbuf+4, zerobuf, sizeof(testbuf)-4) == 0);
299 ret = UTF8_getc(testbuf, 4, &value);
300 ASSERT(ret == 4);
301 ASSERT(value == i);
302 }
303
304 /* XXX What should UTF8_putc() do with values > 0x10FFFF */
305
306 return 0;
307}