7 files changed, 1615 insertions, 0 deletions
diff --git a/src/regress/lib/libc/regex/Makefile b/src/regress/lib/libc/regex/Makefile
new file mode 100644
index 0000000000..a29686bca4
--- /dev/null
+++ b/src/regress/lib/libc/regex/Makefile
@@ -0,0 +1,20 @@
+#       $OpenBSD: Makefile,v 1.5 2002/09/02 20:01:43 avsm Exp $
+#       $NetBSD: Makefile,v 1.2 1995/02/16 19:38:45 cgd Exp $
+PROG=   re
+SRCS=   main.c split.c debug.c
+CFLAGS+= -I${.CURDIR}/../../../../lib/libc/regex
+TESTS=  ${.CURDIR}/tests
+REGRESS_TARGETS=do-reg do-reg-long do-reg-backref
+do-reg: ${PROG}
+        ./re < ${TESTS}
+do-reg-long: ${PROG}
+        ./re -el < ${TESTS}
+do-reg-backref: ${PROG}
+        ./re -er < ${TESTS}
+.include <bsd.regress.mk>
diff --git a/src/regress/lib/libc/regex/debug.c b/src/regress/lib/libc/regex/debug.c
new file mode 100644
index 0000000000..11129e7249
--- /dev/null
+++ b/src/regress/lib/libc/regex/debug.c
@@ -0,0 +1,245 @@
+/*      $OpenBSD: debug.c,v 1.4 2003/07/31 21:48:03 deraadt Exp $       */
+/*      $NetBSD: debug.c,v 1.2 1995/04/20 22:39:42 cgd Exp $    */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+#include "utils.h"
+#include "regex2.h"
+#include "debug.ih"
+/*
+ - regprint - print a regexp for debugging
+ == void regprint(regex_t *r, FILE *d);
+ */
+void
+regprint(r, d)
+regex_t *r;
+FILE *d;
+{
+        register struct re_guts *g = r->re_g;
+        register int i;
+        register int c;
+        register int last;
+        int nincat[NC];
+        fprintf(d, "%ld states, %d categories", (long)g->nstates,
+                                                        g->ncategories);
+        fprintf(d, ", first %ld last %ld", (long)g->firststate,
+                                                (long)g->laststate);
+        if (g->iflags&USEBOL)
+                fprintf(d, ", USEBOL");
+        if (g->iflags&USEEOL)
+                fprintf(d, ", USEEOL");
+        if (g->iflags&BAD)
+                fprintf(d, ", BAD");
+        if (g->nsub > 0)
+                fprintf(d, ", nsub=%ld", (long)g->nsub);
+        if (g->must != NULL)
+                fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
+                                                                g->must);
+        if (g->backrefs)
+                fprintf(d, ", backrefs");
+        if (g->nplus > 0)
+                fprintf(d, ", nplus %ld", (long)g->nplus);
+        fprintf(d, "\n");
+        s_print(g, d);
+        for (i = 0; i < g->ncategories; i++) {
+                nincat[i] = 0;
+                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+                        if (g->categories[c] == i)
+                                nincat[i]++;
+        }
+        fprintf(d, "cc0#%d", nincat[0]);
+        for (i = 1; i < g->ncategories; i++)
+                if (nincat[i] == 1) {
+                        for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+                                if (g->categories[c] == i)
+                                        break;
+                        fprintf(d, ", %d=%s", i, regchar(c));
+                }
+        fprintf(d, "\n");
+        for (i = 1; i < g->ncategories; i++)
+                if (nincat[i] != 1) {
+                        fprintf(d, "cc%d\t", i);
+                        last = -1;
+                        for (c = CHAR_MIN; c <= CHAR_MAX+1; c++)        /* +1 does flush */
+                                if (c <= CHAR_MAX && g->categories[c] == i) {
+                                        if (last < 0) {
+                                                fprintf(d, "%s", regchar(c));
+                                                last = c;
+                                        }
+                                } else {
+                                        if (last >= 0) {
+                                                if (last != c-1)
+                                                        fprintf(d, "-%s",
+                                                                regchar(c-1));
+                                                last = -1;
+                                        }
+                                }
+                        fprintf(d, "\n");
+                }
+}
+/*
+ - s_print - print the strip for debugging
+ == static void s_print(register struct re_guts *g, FILE *d);
+ */
+static void
+s_print(g, d)
+register struct re_guts *g;
+FILE *d;
+{
+        register sop *s;
+        register cset *cs;
+        register int i;
+        register int done = 0;
+        register sop opnd;
+        register int col = 0;
+        register int last;
+        register sopno offset = 2;
+#       define  GAP()   {       if (offset % 5 == 0) { \
+                                        if (col > 40) { \
+                                                fprintf(d, "\n\t"); \
+                                                col = 0; \
+                                        } else { \
+                                                fprintf(d, " "); \
+                                                col++; \
+                                        } \
+                                } else \
+                                        col++; \
+                                offset++; \
+                        }
+        if (OP(g->strip[0]) != OEND)
+                fprintf(d, "missing initial OEND!\n");
+        for (s = &g->strip[1]; !done; s++) {
+                opnd = OPND(*s);
+                switch (OP(*s)) {
+                case OEND:
+                        fprintf(d, "\n");
+                        done = 1;
+                        break;
+                case OCHAR:
+                        if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
+                                fprintf(d, "\\%c", (char)opnd);
+                        else
+                                fprintf(d, "%s", regchar((char)opnd));
+                        break;
+                case OBOL:
+                        fprintf(d, "^");
+                        break;
+                case OEOL:
+                        fprintf(d, "$");
+                        break;
+                case OBOW:
+                        fprintf(d, "\\{");
+                        break;
+                case OEOW:
+                        fprintf(d, "\\}");
+                        break;
+                case OANY:
+                        fprintf(d, ".");
+                        break;
+                case OANYOF:
+                        fprintf(d, "[(%ld)", (long)opnd);
+                        cs = &g->sets[opnd];
+                        last = -1;
+                        for (i = 0; i < g->csetsize+1; i++)     /* +1 flushes */
+                                if (CHIN(cs, i) && i < g->csetsize) {
+                                        if (last < 0) {
+                                                fprintf(d, "%s", regchar(i));
+                                                last = i;
+                                        }
+                                } else {
+                                        if (last >= 0) {
+                                                if (last != i-1)
+                                                        fprintf(d, "-%s",
+                                                                regchar(i-1));
+                                                last = -1;
+                                        }
+                                }
+                        fprintf(d, "]");
+                        break;
+                case OBACK_:
+                        fprintf(d, "(\\<%ld>", (long)opnd);
+                        break;
+                case O_BACK:
+                        fprintf(d, "<%ld>\\)", (long)opnd);
+                        break;
+                case OPLUS_:
+                        fprintf(d, "(+");
+                        if (OP(*(s+opnd)) != O_PLUS)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        break;
+                case O_PLUS:
+                        if (OP(*(s-opnd)) != OPLUS_)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        fprintf(d, "+)");
+                        break;
+                case OQUEST_:
+                        fprintf(d, "(?");
+                        if (OP(*(s+opnd)) != O_QUEST)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        break;
+                case O_QUEST:
+                        if (OP(*(s-opnd)) != OQUEST_)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        fprintf(d, "?)");
+                        break;
+                case OLPAREN:
+                        fprintf(d, "((<%ld>", (long)opnd);
+                        break;
+                case ORPAREN:
+                        fprintf(d, "<%ld>))", (long)opnd);
+                        break;
+                case OCH_:
+                        fprintf(d, "<");
+                        if (OP(*(s+opnd)) != OOR2)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        break;
+                case OOR1:
+                        if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        fprintf(d, "|");
+                        break;
+                case OOR2:
+                        fprintf(d, "|");
+                        if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        break;
+                case O_CH:
+                        if (OP(*(s-opnd)) != OOR1)
+                                fprintf(d, "<%ld>", (long)opnd);
+                        fprintf(d, ">");
+                        break;
+                default:
+                        fprintf(d, "!%ld(%ld)!", (long)OP(*s), (long)opnd);
+                        break;
+                }
+                if (!done)
+                        GAP();
+        }
+}
+/*
+ - regchar - make a character printable
+ == static char *regchar(int ch);
+ */
+static char *                   /* -> representation */
+regchar(ch)
+int ch;
+{
+        static char buf[10];
+        if (isprint(ch) || ch == ' ')
+                snprintf(buf, sizeof buf, "%c", ch);
+        else
+                snprintf(buf, sizeof buf, "\\%o", ch);
+        return(buf);
+}
diff --git a/src/regress/lib/libc/regex/debug.ih b/src/regress/lib/libc/regex/debug.ih
new file mode 100644
index 0000000000..9eb313af23
--- /dev/null
+++ b/src/regress/lib/libc/regex/debug.ih
@@ -0,0 +1,17 @@
+/*      $OpenBSD: debug.ih,v 1.3 2002/02/16 21:27:32 millert Exp $      */
+/*      $NetBSD: debug.ih,v 1.2 1995/04/20 22:39:47 cgd Exp $   */
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* === debug.c === */
+void regprint(regex_t *r, FILE *d);
+static void s_print(register struct re_guts *g, FILE *d);
+static char *regchar(int ch);
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
diff --git a/src/regress/lib/libc/regex/main.c b/src/regress/lib/libc/regex/main.c
new file mode 100644
index 0000000000..02f4bd7cf2
--- /dev/null
+++ b/src/regress/lib/libc/regex/main.c
@@ -0,0 +1,516 @@
+/*      $OpenBSD: main.c,v 1.4 2003/07/31 21:48:03 deraadt Exp $        */
+/*      $NetBSD: main.c,v 1.2 1995/04/20 22:39:51 cgd Exp $     */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <assert.h>
+#include <unistd.h>
+#include "main.ih"
+char *progname;
+int debug = 0;
+int line = 0;
+int status = 0;
+int copts = REG_EXTENDED;
+int eopts = 0;
+regoff_t startoff = 0;
+regoff_t endoff = 0;
+extern int split(char *, char *[], int, char *);
+extern void regprint(regex_t *, FILE *);
+/*
+ - main - do the simple case, hand off to regress() for regression
+ */
+int
+main(int argc, char *argv[])
+{
+        regex_t re;
+#       define  NS      10
+        regmatch_t subs[NS];
+        char erbuf[100];
+        int err;
+        size_t len;
+        int c;
+        int errflg = 0;
+        register int i;
+        extern int optind;
+        extern char *optarg;
+        progname = argv[0];
+        while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
+                switch (c) {
+                case 'c':       /* compile options */
+                        copts = options('c', optarg);
+                        break;
+                case 'e':       /* execute options */
+                        eopts = options('e', optarg);
+                        break;
+                case 'S':       /* start offset */
+                        startoff = (regoff_t)atoi(optarg);
+                        break;
+                case 'E':       /* end offset */
+                        endoff = (regoff_t)atoi(optarg);
+                        break;
+                case 'x':       /* Debugging. */
+                        debug++;
+                        break;
+                case '?':
+                default:
+                        errflg++;
+                        break;
+                }
+        if (errflg) {
+                fprintf(stderr, "usage: %s ", progname);
+                fprintf(stderr, "[-c copt][-C][-d] [re]\n");
+                exit(2);
+        }
+        if (optind >= argc) {
+                regress(stdin);
+                exit(status);
+        }
+        err = regcomp(&re, argv[optind++], copts);
+        if (err) {
+                len = regerror(err, &re, erbuf, sizeof(erbuf));
+                fprintf(stderr, "error %s, %d/%d `%s'\n",
+                        eprint(err), len, sizeof(erbuf), erbuf);
+                exit(status);
+        }
+        regprint(&re, stdout);  
+        if (optind >= argc) {
+                regfree(&re);
+                exit(status);
+        }
+        if (eopts&REG_STARTEND) {
+                subs[0].rm_so = startoff;
+                subs[0].rm_eo = strlen(argv[optind]) - endoff;
+        }
+        err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
+        if (err) {
+                len = regerror(err, &re, erbuf, sizeof(erbuf));
+                fprintf(stderr, "error %s, %d/%d `%s'\n",
+                        eprint(err), len, sizeof(erbuf), erbuf);
+                exit(status);
+        }
+        if (!(copts&REG_NOSUB)) {
+                len = (size_t)(subs[0].rm_eo - subs[0].rm_so);
+                if (subs[0].rm_so != -1) {
+                        if (len != 0)
+                                printf("match `%.*s'\n", (int)len,
+                                        argv[optind] + subs[0].rm_so);
+                        else
+                                printf("match `'@%.1s\n",
+                                        argv[optind] + subs[0].rm_so);
+                }
+                for (i = 1; i < NS; i++)
+                        if (subs[i].rm_so != -1)
+                                printf("(%d) `%.*s'\n", i,
+                                        (int)(subs[i].rm_eo - subs[i].rm_so),
+                                        argv[optind] + subs[i].rm_so);
+        }
+        exit(status);
+}
+/*
+ - regress - main loop of regression test
+ == void regress(FILE *in);
+ */
+void
+regress(in)
+FILE *in;
+{
+        char inbuf[1000];
+#       define  MAXF    10
+        char *f[MAXF];
+        int nf;
+        int i;
+        char erbuf[100];
+        size_t ne;
+        char *badpat = "invalid regular expression";
+#       define  SHORT   10
+        char *bpname = "REG_BADPAT";
+        regex_t re;
+        while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
+                line++;
+                if (inbuf[0] == '#' || inbuf[0] == '\n')
+                        continue;                       /* NOTE CONTINUE */
+                inbuf[strlen(inbuf)-1] = '\0';  /* get rid of stupid \n */
+                if (debug)
+                        fprintf(stdout, "%d:\n", line);
+                nf = split(inbuf, f, MAXF, "\t\t");
+                if (nf < 3) {
+                        fprintf(stderr, "bad input, line %d\n", line);
+                        exit(1);
+                }
+                for (i = 0; i < nf; i++)
+                        if (strcmp(f[i], "\"\"") == 0)
+                                f[i] = "";
+                if (nf <= 3)
+                        f[3] = NULL;
+                if (nf <= 4)
+                        f[4] = NULL;
+                try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
+                if (opt('&', f[1]))     /* try with either type of RE */
+                        try(f[0], f[1], f[2], f[3], f[4],
+                                        options('c', f[1]) &~ REG_EXTENDED);
+        }
+        ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+        if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
+                fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
+                                                        erbuf, badpat);
+                status = 1;
+        }
+        ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
+        if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
+                                                ne != strlen(badpat)+1) {
+                fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
+                                                erbuf, SHORT-1, badpat);
+                status = 1;
+        }
+        ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
+        if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
+                fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
+                                                erbuf, bpname);
+                status = 1;
+        }
+        re.re_endp = bpname;
+        ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
+        if (atoi(erbuf) != (int)REG_BADPAT) {
+                fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
+                                                erbuf, (long)REG_BADPAT);
+                status = 1;
+        } else if (ne != strlen(erbuf)+1) {
+                fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
+                                                erbuf, (long)REG_BADPAT);
+                status = 1;
+        }
+}
+/*
+ - try - try it, and report on problems
+ == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
+ */
+void
+try(f0, f1, f2, f3, f4, opts)
+char *f0;
+char *f1;
+char *f2;
+char *f3;
+char *f4;
+int opts;                       /* may not match f1 */
+{
+        regex_t re;
+#       define  NSUBS   10
+        regmatch_t subs[NSUBS];
+#       define  NSHOULD 15
+        char *should[NSHOULD];
+        int nshould;
+        char erbuf[100];
+        int err;
+        int len;
+        char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
+        register int i;
+        char *grump;
+        char f0copy[1000];
+        char f2copy[1000];
+        strcpy(f0copy, f0);
+        re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
+        fixstr(f0copy);
+        err = regcomp(&re, f0copy, opts);
+        if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
+                /* unexpected error or wrong error */
+                len = regerror(err, &re, erbuf, sizeof(erbuf));
+                fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
+                                        line, type, eprint(err), len,
+                                        sizeof(erbuf), erbuf);
+                status = 1;
+        } else if (err == 0 && opt('C', f1)) {
+                /* unexpected success */
+                fprintf(stderr, "%d: %s should have given REG_%s\n",
+                                                line, type, f2);
+                status = 1;
+                err = 1;        /* so we won't try regexec */
+        }
+        if (err != 0) {
+                regfree(&re);
+                return;
+        }
+        strcpy(f2copy, f2);
+        fixstr(f2copy);
+        if (options('e', f1)&REG_STARTEND) {
+                if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
+                        fprintf(stderr, "%d: bad STARTEND syntax\n", line);
+                subs[0].rm_so = strchr(f2, '(') - f2 + 1;
+                subs[0].rm_eo = strchr(f2, ')') - f2;
+        }
+        err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
+        if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
+                /* unexpected error or wrong error */
+                len = regerror(err, &re, erbuf, sizeof(erbuf));
+                fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
+                                        line, type, eprint(err), len,
+                                        sizeof(erbuf), erbuf);
+                status = 1;
+        } else if (err != 0) {
+                /* nothing more to check */
+        } else if (f3 == NULL) {
+                /* unexpected success */
+                fprintf(stderr, "%d: %s exec should have failed\n",
+                                                line, type);
+                status = 1;
+                err = 1;                /* just on principle */
+        } else if (opts&REG_NOSUB) {
+                /* nothing more to check */
+        } else if ((grump = check(f2, subs[0], f3)) != NULL) {
+                fprintf(stderr, "%d: %s %s\n", line, type, grump);
+                status = 1;
+                err = 1;
+        }
+        if (err != 0 || f4 == NULL) {
+                regfree(&re);
+                return;
+        }
+        for (i = 1; i < NSHOULD; i++)
+                should[i] = NULL;
+        nshould = split(f4, should+1, NSHOULD-1, ",");
+        if (nshould == 0) {
+                nshould = 1;
+                should[1] = "";
+        }
+        for (i = 1; i < NSUBS; i++) {
+                grump = check(f2, subs[i], should[i]);
+                if (grump != NULL) {
+                        fprintf(stderr, "%d: %s $%d %s\n", line,
+                                                        type, i, grump);
+                        status = 1;
+                        err = 1;
+                }
+        }
+        regfree(&re);
+}
+/*
+ - options - pick options out of a regression-test string
+ == int options(int type, char *s);
+ */
+int
+options(type, s)
+int type;                       /* 'c' compile, 'e' exec */
+char *s;
+{
+        register char *p;
+        register int o = (type == 'c') ? copts : eopts;
+        register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
+        for (p = s; *p != '\0'; p++)
+                if (strchr(legal, *p) != NULL)
+                        switch (*p) {
+                        case 'b':
+                                o &= ~REG_EXTENDED;
+                                break;
+                        case 'i':
+                                o |= REG_ICASE;
+                                break;
+                        case 's':
+                                o |= REG_NOSUB;
+                                break;
+                        case 'n':
+                                o |= REG_NEWLINE;
+                                break;
+                        case 'm':
+                                o &= ~REG_EXTENDED;
+                                o |= REG_NOSPEC;
+                                break;
+                        case 'p':
+                                o |= REG_PEND;
+                                break;
+                        case '^':
+                                o |= REG_NOTBOL;
+                                break;
+                        case '$':
+                                o |= REG_NOTEOL;
+                                break;
+                        case '#':
+                                o |= REG_STARTEND;
+                                break;
+                        case 't':       /* trace */
+                                o |= REG_TRACE;
+                                break;
+                        case 'l':       /* force long representation */
+                                o |= REG_LARGE;
+                                break;
+                        case 'r':       /* force backref use */
+                                o |= REG_BACKR;
+                                break;
+                        }
+        return(o);
+}
+/*
+ - opt - is a particular option in a regression string?
+ == int opt(int c, char *s);
+ */
+int                             /* predicate */
+opt(c, s)
+int c;
+char *s;
+{
+        return(strchr(s, c) != NULL);
+}
+/*
+ - fixstr - transform magic characters in strings
+ == void fixstr(register char *p);
+ */
+void
+fixstr(p)
+register char *p;
+{
+        if (p == NULL)
+                return;
+        for (; *p != '\0'; p++)
+                if (*p == 'N')
+                        *p = '\n';
+                else if (*p == 'T')
+                        *p = '\t';
+                else if (*p == 'S')
+                        *p = ' ';
+                else if (*p == 'Z')
+                        *p = '\0';
+}
+/*
+ - check - check a substring match
+ == char *check(char *str, regmatch_t sub, char *should);
+ */
+char *                          /* NULL or complaint */
+check(str, sub, should)
+char *str;
+regmatch_t sub;
+char *should;
+{
+        register int len;
+        register int shlen;
+        register char *p;
+        static char grump[500];
+        register char *at = NULL;
+        if (should != NULL && strcmp(should, "-") == 0)
+                should = NULL;
+        if (should != NULL && should[0] == '@') {
+                at = should + 1;
+                should = "";
+        }
+        /* check rm_so and rm_eo for consistency */
+        if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
+                                (sub.rm_so != -1 && sub.rm_eo == -1) ||
+                                (sub.rm_so != -1 && sub.rm_so < 0) ||
+                                (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
+                snprintf(grump, sizeof grump,
+                    "start %ld end %ld", (long)sub.rm_so,
+                    (long)sub.rm_eo);
+                return(grump);
+        }
+        /* check for no match */
+        if (sub.rm_so == -1 && should == NULL)
+                return(NULL);
+        if (sub.rm_so == -1)
+                return("did not match");
+        /* check for in range */
+        if (sub.rm_eo > strlen(str)) {
+                snprintf(grump, sizeof grump,
+                        "start %ld end %ld, past end of string",
+                        (long)sub.rm_so, (long)sub.rm_eo);
+                return(grump);
+        }
+        len = (int)(sub.rm_eo - sub.rm_so);
+        shlen = (int)strlen(should);
+        p = str + sub.rm_so;
+        /* check for not supposed to match */
+        if (should == NULL) {
+                snprintf(grump, sizeof grump, "matched `%.*s'", len, p);
+                return(grump);
+        }
+        /* check for wrong match */
+        if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
+                snprintf(grump, sizeof grump, "matched `%.*s' instead", len, p);
+                return(grump);
+        }
+        if (shlen > 0)
+                return(NULL);
+        /* check null match in right place */
+        if (at == NULL)
+                return(NULL);
+        shlen = strlen(at);
+        if (shlen == 0)
+                shlen = 1;      /* force check for end-of-string */
+        if (strncmp(p, at, shlen) != 0) {
+                snprintf(grump, sizeof grump, "matched null at `%.20s'", p);
+                return(grump);
+        }
+        return(NULL);
+}
+/*
+ - eprint - convert error number to name
+ == static char *eprint(int err);
+ */
+static char *
+eprint(err)
+int err;
+{
+        static char epbuf[100];
+        size_t len;
+        len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
+        assert(len <= sizeof(epbuf));
+        return(epbuf);
+}
+/*
+ - efind - convert error name to number
+ == static int efind(char *name);
+ */
+static int
+efind(name)
+char *name;
+{
+        static char efbuf[100];
+        regex_t re;
+        snprintf(efbuf, sizeof efbuf, "REG_%s", name);
+        assert(strlen(efbuf) < sizeof(efbuf));
+        re.re_endp = efbuf;
+        (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
+        return(atoi(efbuf));
+}
diff --git a/src/regress/lib/libc/regex/main.ih b/src/regress/lib/libc/regex/main.ih
new file mode 100644
index 0000000000..0860e26333
--- /dev/null
+++ b/src/regress/lib/libc/regex/main.ih
@@ -0,0 +1,22 @@
+/*      $OpenBSD: main.ih,v 1.3 2002/02/16 21:27:32 millert Exp $       */
+/*      $NetBSD: main.ih,v 1.2 1995/04/20 22:39:55 cgd Exp $    */
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* === main.c === */
+void regress(FILE *in);
+void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
+int options(int type, char *s);
+int opt(int c, char *s);
+void fixstr(register char *p);
+char *check(char *str, regmatch_t sub, char *should);
+static char *eprint(int err);
+static int efind(char *name);
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
diff --git a/src/regress/lib/libc/regex/split.c b/src/regress/lib/libc/regex/split.c
new file mode 100644
index 0000000000..1e17a55b89
--- /dev/null
+++ b/src/regress/lib/libc/regex/split.c
@@ -0,0 +1,317 @@
+/*      $OpenBSD: split.c,v 1.3 2003/07/31 21:48:03 deraadt Exp $       */
+/*      $NetBSD: split.c,v 1.2 1995/04/20 22:39:57 cgd Exp $    */
+#include <stdio.h>
+#include <string.h>
+int split(char *string, char *fields[], int nfields, char *sep);
+/*
+ - split - divide a string into fields, like awk split()
+ = int split(char *string, char *fields[], int nfields, char *sep);
+ */
+int                             /* number of fields, including overflow */
+split(char *string, char *fields[], int nfields, char *sep)
+{
+        register char *p = string;
+        register char c;                        /* latest character */
+        register char sepc = sep[0];
+        register char sepc2;
+        register int fn;
+        register char **fp = fields;
+        register char *sepp;
+        register int trimtrail;
+        /* white space */
+        if (sepc == '\0') {
+                while ((c = *p++) == ' ' || c == '\t')
+                        continue;
+                p--;
+                trimtrail = 1;
+                sep = " \t";    /* note, code below knows this is 2 long */
+                sepc = ' ';
+        } else
+                trimtrail = 0;
+        sepc2 = sep[1];         /* now we can safely pick this up */
+        /* catch empties */
+        if (*p == '\0')
+                return(0);
+        /* single separator */
+        if (sepc2 == '\0') {
+                fn = nfields;
+                for (;;) {
+                        *fp++ = p;
+                        fn--;
+                        if (fn == 0)
+                                break;
+                        while ((c = *p++) != sepc)
+                                if (c == '\0')
+                                        return(nfields - fn);
+                        *(p-1) = '\0';
+                }
+                /* we have overflowed the fields vector -- just count them */
+                fn = nfields;
+                for (;;) {
+                        while ((c = *p++) != sepc)
+                                if (c == '\0')
+                                        return(fn);
+                        fn++;
+                }
+                /* not reached */
+        }
+        /* two separators */
+        if (sep[2] == '\0') {
+                fn = nfields;
+                for (;;) {
+                        *fp++ = p;
+                        fn--;
+                        while ((c = *p++) != sepc && c != sepc2)
+                                if (c == '\0') {
+                                        if (trimtrail && **(fp-1) == '\0')
+                                                fn++;
+                                        return(nfields - fn);
+                                }
+                        if (fn == 0)
+                                break;
+                        *(p-1) = '\0';
+                        while ((c = *p++) == sepc || c == sepc2)
+                                continue;
+                        p--;
+                }
+                /* we have overflowed the fields vector -- just count them */
+                fn = nfields;
+                while (c != '\0') {
+                        while ((c = *p++) == sepc || c == sepc2)
+                                continue;
+                        p--;
+                        fn++;
+                        while ((c = *p++) != '\0' && c != sepc && c != sepc2)
+                                continue;
+                }
+                /* might have to trim trailing white space */
+                if (trimtrail) {
+                        p--;
+                        while ((c = *--p) == sepc || c == sepc2)
+                                continue;
+                        p++;
+                        if (*p != '\0') {
+                                if (fn == nfields+1)
+                                        *p = '\0';
+                                fn--;
+                        }
+                }
+                return(fn);
+        }
+        /* n separators */
+        fn = 0;
+        for (;;) {
+                if (fn < nfields)
+                        *fp++ = p;
+                fn++;
+                for (;;) {
+                        c = *p++;
+                        if (c == '\0')
+                                return(fn);
+                        sepp = sep;
+                        while ((sepc = *sepp++) != '\0' && sepc != c)
+                                continue;
+                        if (sepc != '\0')       /* it was a separator */
+                                break;
+                }
+                if (fn < nfields)
+                        *(p-1) = '\0';
+                for (;;) {
+                        c = *p++;
+                        sepp = sep;
+                        while ((sepc = *sepp++) != '\0' && sepc != c)
+                                continue;
+                        if (sepc == '\0')       /* it wasn't a separator */
+                                break;
+                }
+                p--;
+        }
+        /* not reached */
+}
+#ifdef TEST_SPLIT
+/*
+ * test program
+ * pgm          runs regression
+ * pgm sep      splits stdin lines by sep
+ * pgm str sep  splits str by sep
+ * pgm str sep n        splits str by sep n times
+ */
+int
+main(argc, argv)
+int argc;
+char *argv[];
+{
+        char buf[512];
+        register int n;
+#       define  MNF     10
+        char *fields[MNF];
+        if (argc > 4)
+                for (n = atoi(argv[3]); n > 0; n--) {
+                        (void) strcpy(buf, argv[1]);
+                }
+        else if (argc > 3)
+                for (n = atoi(argv[3]); n > 0; n--) {
+                        (void) strcpy(buf, argv[1]);
+                        (void) split(buf, fields, MNF, argv[2]);
+                }
+        else if (argc > 2)
+                dosplit(argv[1], argv[2]);
+        else if (argc > 1)
+                while (fgets(buf, sizeof(buf), stdin) != NULL) {
+                        buf[strlen(buf)-1] = '\0';      /* stomp newline */
+                        dosplit(buf, argv[1]);
+                }
+        else
+                regress();
+        exit(0);
+}
+dosplit(string, seps)
+char *string;
+char *seps;
+{
+#       define  NF      5
+        char *fields[NF];
+        register int nf;
+        nf = split(string, fields, NF, seps);
+        print(nf, NF, fields);
+}
+print(nf, nfp, fields)
+int nf;
+int nfp;
+char *fields[];
+{
+        register int fn;
+        register int bound;
+        bound = (nf > nfp) ? nfp : nf;
+        printf("%d:\t", nf);
+        for (fn = 0; fn < bound; fn++)
+                printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
+}
+#define RNF     5               /* some table entries know this */
+struct {
+        char *str;
+        char *seps;
+        int nf;
+        char *fi[RNF];
+} tests[] = {
+        "",             " ",    0,      { "" },
+        " ",            " ",    2,      { "", "" },
+        "x",            " ",    1,      { "x" },
+        "xy",           " ",    1,      { "xy" },
+        "x y",          " ",    2,      { "x", "y" },
+        "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
+        "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
+        "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
+        " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
+        "",             " _",   0,      { "" },
+        " ",            " _",   2,      { "", "" },
+        "x",            " _",   1,      { "x" },
+        "x y",          " _",   2,      { "x", "y" },
+        "ab _ cd",      " _",   2,      { "ab", "cd" },
+        " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
+        "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
+        " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
+        "",             " _~",  0,      { "" },
+        " ",            " _~",  2,      { "", "" },
+        "x",            " _~",  1,      { "x" },
+        "x y",          " _~",  2,      { "x", "y" },
+        "ab _~ cd",     " _~",  2,      { "ab", "cd" },
+        " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
+        "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
+        "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
+        "",             " _~-", 0,      { "" },
+        " ",            " _~-", 2,      { "", "" },
+        "x",            " _~-", 1,      { "x" },
+        "x y",          " _~-", 2,      { "x", "y" },
+        "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
+        " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
+        "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
+        "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
+        "",             "  ",   0,      { "" },
+        " ",            "  ",   2,      { "", "" },
+        "x",            "  ",   1,      { "x" },
+        "xy",           "  ",   1,      { "xy" },
+        "x y",          "  ",   2,      { "x", "y" },
+        "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
+        "  a bcd",      "  ",   3,      { "", "a", "bcd" },
+        "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
+        " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
+        "",             "",     0,      { "" },
+        " ",            "",     0,      { "" },
+        "x",            "",     1,      { "x" },
+        "xy",           "",     1,      { "xy" },
+        "x y",          "",     2,      { "x", "y" },
+        "abc def  g ",  "",     3,      { "abc", "def", "g" },
+        "\t a bcd",     "",     2,      { "a", "bcd" },
+        "  a \tb\t c ", "",     3,      { "a", "b", "c" },
+        "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
+        "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
+        " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
+        NULL,           NULL,   0,      { NULL },
+};
+regress()
+{
+        char buf[512];
+        register int n;
+        char *fields[RNF+1];
+        register int nf;
+        register int i;
+        register int printit;
+        register char *f;
+        for (n = 0; tests[n].str != NULL; n++) {
+                (void) strcpy(buf, tests[n].str);
+                fields[RNF] = NULL;
+                nf = split(buf, fields, RNF, tests[n].seps);
+                printit = 0;
+                if (nf != tests[n].nf) {
+                        printf("split `%s' by `%s' gave %d fields, not %d\n",
+                                tests[n].str, tests[n].seps, nf, tests[n].nf);
+                        printit = 1;
+                } else if (fields[RNF] != NULL) {
+                        printf("split() went beyond array end\n");
+                        printit = 1;
+                } else {
+                        for (i = 0; i < nf && i < RNF; i++) {
+                                f = fields[i];
+                                if (f == NULL)
+                                        f = "(NULL)";
+                                if (strcmp(f, tests[n].fi[i]) != 0) {
+                                        printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
+                                                tests[n].str, tests[n].seps,
+                                                i, fields[i], tests[n].fi[i]);
+                                        printit = 1;
+                                }
+                        }
+                }
+                if (printit)
+                        print(nf, RNF, fields);
+        }
+}
+#endif
diff --git a/src/regress/lib/libc/regex/tests b/src/regress/lib/libc/regex/tests
new file mode 100644
index 0000000000..c89b9ec164
--- /dev/null
+++ b/src/regress/lib/libc/regex/tests
@@ -0,0 +1,478 @@
+#       $OpenBSD: tests,v 1.2 2001/01/29 02:05:44 niklas Exp $
+#       $NetBSD: tests,v 1.5 1995/04/20 22:40:00 cgd Exp $
+# regular expression test set
+# Lines are at least three fields, separated by one or more tabs.  "" stands
+# for an empty field.  First field is an RE.  Second field is flags.  If
+# C flag given, regcomp() is expected to fail, and the third field is the
+# error name (minus the leading REG_).
+#
+# Otherwise it is expected to succeed, and the third field is the string to
+# try matching it against.  If there is no fourth field, the match is
+# expected to fail.  If there is a fourth field, it is the substring that
+# the RE is expected to match.  If there is a fifth field, it is a comma-
+# separated list of what the subexpressions should match, with - indicating
+# no match for that one.  In both the fourth and fifth fields, a (sub)field
+# starting with @ indicates that the (sub)expression is expected to match
+# a null string followed by the stuff after the @; this provides a way to
+# test where null strings match.  The character `N' in REs and strings
+# is newline, `S' is space, `T' is tab, `Z' is NUL.
+#
+# The full list of flags:
+#       -       placeholder, does nothing
+#       b       RE is a BRE, not an ERE
+#       &       try it as both an ERE and a BRE
+#       C       regcomp() error expected, third field is error name
+#       i       REG_ICASE
+#       m       ("mundane") REG_NOSPEC
+#       s       REG_NOSUB (not really testable)
+#       n       REG_NEWLINE
+#       ^       REG_NOTBOL
+#       $       REG_NOTEOL
+#       #       REG_STARTEND (see below)
+#       p       REG_PEND
+#
+# For REG_STARTEND, the start/end offsets are those of the substring
+# enclosed in ().
+# basics
+a               &       a       a
+abc             &       abc     abc
+abc|de          -       abc     abc
+a|b|c           -       abc     a
+# parentheses and perversions thereof
+a(b)c           -       abc     abc
+a\(b\)c         b       abc     abc
+a(              C       EPAREN
+a(              b       a(      a(
+a\(             -       a(      a(
+a\(             bC      EPAREN
+a\(b            bC      EPAREN
+a(b             C       EPAREN
+a(b             b       a(b     a(b
+# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
+a)              -       a)      a)
+)               -       )       )
+# end gagging (in a just world, those *should* give EPAREN)
+a)              b       a)      a)
+a\)             bC      EPAREN
+\)              bC      EPAREN
+a()b            -       ab      ab
+a\(\)b          b       ab      ab
+# anchoring and REG_NEWLINE
+^abc$           &       abc     abc
+a^b             -       a^b
+a^b             b       a^b     a^b
+a$b             -       a$b
+a$b             b       a$b     a$b
+^               &       abc     @abc
+$               &       abc     @
+^$              &       ""      @
+$^              -       ""      @
+\($\)\(^\)      b       ""      @
+# stop retching, those are legitimate (although disgusting)
+^^              -       ""      @
+$$              -       ""      @
+b$              &       abNc
+b$              &n      abNc    b
+^b$             &       aNbNc
+^b$             &n      aNbNc   b
+^$              &n      aNNb    @Nb
+^$              n       abc
+^$              n       abcN    @
+$^              n       aNNb    @Nb
+\($\)\(^\)      bn      aNNb    @Nb
+^^              n^      aNNb    @Nb
+$$              n       aNNb    @NN
+^a              ^       a
+a$              $       a
+^a              ^n      aNb
+^b              ^n      aNb     b
+a$              $n      bNa
+b$              $n      bNa     b
+a*(^b$)c*       -       b       b
+a*\(^b$\)c*     b       b       b
+# certain syntax errors and non-errors
+|               C       EMPTY
+|               b       |       |
+*               C       BADRPT
+*               b       *       *
+               C       BADRPT
+?               C       BADRPT
+""              &C      EMPTY
+()              -       abc     @abc
+\(\)            b       abc     @abc
+a||b            C       EMPTY
+|ab             C       EMPTY
+ab|             C       EMPTY
+(|a)b           C       EMPTY
+(a|)b           C       EMPTY
+(*a)            C       BADRPT
+(+a)            C       BADRPT
+(?a)            C       BADRPT
+({1}a)          C       BADRPT
+\(\{1\}a\)      bC      BADRPT
+(a|*b)          C       BADRPT
+(a|+b)          C       BADRPT
+(a|?b)          C       BADRPT
+(a|{1}b)        C       BADRPT
+^*              C       BADRPT
+^*              b       *       *
+^+              C       BADRPT
+^?              C       BADRPT
+^{1}            C       BADRPT
+^\{1\}          bC      BADRPT
+# metacharacters, backslashes
+a.c             &       abc     abc
+a[bc]d          &       abd     abd
+a\*c            &       a*c     a*c
+a\\b            &       a\b     a\b
+a\\\*b          &       a\*b    a\*b
+a\bc            &       abc     abc
+a\              &C      EESCAPE
+a\\bc           &       a\bc    a\bc
+\{              bC      BADRPT
+a\[b            &       a[b     a[b
+a[b             &C      EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$              &       a       a
+a$              &       a$
+a\$             &       a
+a\$             &       a$      a$
+a\\$            &       a
+a\\$            &       a$
+a\\$            &       a\$
+a\\$            &       a\      a\
+# back references, ugh
+a\(b\)\2c       bC      ESUBREG
+a\(b\1\)c       bC      ESUBREG
+a\(b*\)c\1d     b       abbcbbd abbcbbd bb
+a\(b*\)c\1d     b       abbcbd
+a\(b*\)c\1d     b       abbcbbbd
+^\(.\)\1        b       abc
+a\([bc]\)\1d    b       abcdabbd        abbd    b
+a\(\([bc]\)\2\)*d       b       abbccd  abbccd
+a\(\([bc]\)\2\)*d       b       abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d         b       abbbd   abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1  b       ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd      b       aabcd   aabcd
+\(a\)\1bc*d     b       aabcd   aabcd
+\(a\)\1bc*d     b       aabd    aabd
+\(a\)\1bc*d     b       aabcccd aabcccd
+\(a\)\1bc*[ce]d b       aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$      b       aabcccd aabcccd
+# ordinary repetitions
+ab*c            &       abc     abc
+ab+c            -       abc     abc
+ab?c            -       abc     abc
+a\(*\)b         b       a*b     a*b
+a\(**\)b        b       ab      ab
+a\(***\)b       bC      BADRPT
+*a              b       *a      *a
+**a             b       a       a
+***a            bC      BADRPT
+# the dreaded bounded repetitions
+{               &       {       {
+{abc            &       {abc    {abc
+{1              C       BADRPT
+{1}             C       BADRPT
+a{b             &       a{b     a{b
+a{1}b           -       ab      ab
+a\{1\}b         b       ab      ab
+a{1,}b          -       ab      ab
+a\{1,\}b        b       ab      ab
+a{1,2}b         -       aab     aab
+a\{1,2\}b       b       aab     aab
+a{1             C       EBRACE
+a\{1            bC      EBRACE
+a{1a            C       EBRACE
+a\{1a           bC      EBRACE
+a{1a}           C       BADBR
+a\{1a\}         bC      BADBR
+a{,2}           -       a{,2}   a{,2}
+a\{,2\}         bC      BADBR
+a{,}            -       a{,}    a{,}
+a\{,\}          bC      BADBR
+a{1,x}          C       BADBR
+a\{1,x\}        bC      BADBR
+a{1,x           C       EBRACE
+a\{1,x          bC      EBRACE
+a{300}          C       BADBR
+a\{300\}        bC      BADBR
+a{1,0}          C       BADBR
+a\{1,0\}        bC      BADBR
+ab{0,0}c        -       abcac   ac
+ab\{0,0\}c      b       abcac   ac
+ab{0,1}c        -       abcac   abc
+ab\{0,1\}c      b       abcac   abc
+ab{0,3}c        -       abbcac  abbc
+ab\{0,3\}c      b       abbcac  abbc
+ab{1,1}c        -       acabc   abc
+ab\{1,1\}c      b       acabc   abc
+ab{1,3}c        -       acabc   abc
+ab\{1,3\}c      b       acabc   abc
+ab{2,2}c        -       abcabbc abbc
+ab\{2,2\}c      b       abcabbc abbc
+ab{2,4}c        -       abcabbc abbc
+ab\{2,4\}c      b       abcabbc abbc
+((a{1,10}){1,10}){1,10} -       a       a       a,a
+# multiple repetitions
+a**             &C      BADRPT
+a++             C       BADRPT
+a??             C       BADRPT
+a*+             C       BADRPT
+a*?             C       BADRPT
+a+*             C       BADRPT
+a+?             C       BADRPT
+a?*             C       BADRPT
+a?+             C       BADRPT
+a{1}{1}         C       BADRPT
+a*{1}           C       BADRPT
+a+{1}           C       BADRPT
+a?{1}           C       BADRPT
+a{1}*           C       BADRPT
+a{1}+           C       BADRPT
+a{1}?           C       BADRPT
+a*{b}           -       a{b}    a{b}
+a\{1\}\{1\}     bC      BADRPT
+a*\{1\}         bC      BADRPT
+a\{1\}*         bC      BADRPT
+# brackets, and numerous perversions thereof
+a[b]c           &       abc     abc
+a[ab]c          &       abc     abc
+a[^ab]c         &       adc     adc
+a[]b]c          &       a]c     a]c
+a[[b]c          &       a[c     a[c
+a[-b]c          &       a-c     a-c
+a[^]b]c         &       adc     adc
+a[^-b]c         &       adc     adc
+a[b-]c          &       a-c     a-c
+a[b             &C      EBRACK
+a[]             &C      EBRACK
+a[1-3]c         &       a2c     a2c
+a[3-1]c         &C      ERANGE
+a[1-3-5]c       &C      ERANGE
+a[[.-.]--]c     &       a-c     a-c
+a[1-            &C      ERANGE
+a[[.            &C      EBRACK
+a[[.x           &C      EBRACK
+a[[.x.          &C      EBRACK
+a[[.x.]         &C      EBRACK
+a[[.x.]]        &       ax      ax
+a[[.x,.]]       &C      ECOLLATE
+a[[.one.]]b     &       a1b     a1b
+a[[.notdef.]]b  &C      ECOLLATE
+a[[.].]]b       &       a]b     a]b
+a[[:alpha:]]c   &       abc     abc
+a[[:notdef:]]c  &C      ECTYPE
+a[[:            &C      EBRACK
+a[[:alpha       &C      EBRACK
+a[[:alpha:]     &C      EBRACK
+a[[:alpha,:]    &C      ECTYPE
+a[[:]:]]b       &C      ECTYPE
+a[[:-:]]b       &C      ECTYPE
+a[[:alph:]]     &C      ECTYPE
+a[[:alphabet:]] &C      ECTYPE
+[[:alnum:]]+    -       -%@a0X- a0X
+[[:alpha:]]+    -       -%@aX0- aX
+[[:blank:]]+    -       aSSTb   SST
+[[:cntrl:]]+    -       aNTb    NT
+[[:digit:]]+    -       a019b   019
+[[:graph:]]+    -       Sa%bS   a%b
+[[:lower:]]+    -       AabC    ab
+[[:print:]]+    -       NaSbN   aSb
+[[:punct:]]+    -       S%-&T   %-&
+[[:space:]]+    -       aSNTb   SNT
+[[:upper:]]+    -       aBCd    BC
+[[:xdigit:]]+   -       p0f3Cq  0f3C
+a[[=b=]]c       &       abc     abc
+a[[=            &C      EBRACK
+a[[=b           &C      EBRACK
+a[[=b=          &C      EBRACK
+a[[=b=]         &C      EBRACK
+a[[=b,=]]       &C      ECOLLATE
+a[[=one=]]b     &       a1b     a1b
+# complexities
+a(((b)))c       -       abc     abc
+a(b|(c))d       -       abd     abd
+a(b*|c)d        -       abbd    abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20}       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)      -       aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789   -       a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890  -       a123456789012345678901234567890b        123456789012345678901234567890
+1234567890123456789012345678901 -       a1234567890123456789012345678901b       1234567890123456789012345678901
+12345678901234567890123456789012        -       a12345678901234567890123456789012b      12345678901234567890123456789012
+123456789012345678901234567890123       -       a123456789012345678901234567890123b     123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890  -       a1234567890123456789012345678901234567890123456789012345678901234567890b        1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn]    -       xacegikmoq      acegikm
+[ab][cd][ef][gh][ij][kl][mn][op]        -       xacegikmoq      acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr]    -       xacegikmoqy     acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q]     -       xacegikmoqy     acegikmoq
+# subtleties of matching
+abc             &       xabcy   abc
+a\(b\)?c\1d     b       acd
+aBc             i       Abc     Abc
+a[Bc]*d         i       abBCcd  abBCcd
+0[[:upper:]]1   &i      0a1     0a1
+0[[:lower:]]1   &i      0A1     0A1
+a[^b]c          &i      abc
+a[^b]c          &i      aBc
+a[^b]c          &i      adc     adc
+[a]b[c]         -       abc     abc
+[a]b[a]         -       aba     aba
+[abc]b[abc]     -       abc     abc
+[abc]b[abd]     -       abd     abd
+a(b?c)+d        -       accd    accd
+(wee|week)(knights|night)       -       weeknights      weeknights
+(we|wee|week|frob)(knights|night|day)   -       weeknights      weeknights
+a[bc]d          -       xyzaaabcaababdacd       abd
+a[ab]c          -       aaabc   abc
+abc             s       abc     abc
+a*              &       b       @b
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/        -       /*x*/   /*x*/
+# but...
+/\*.*\*/        -       /*x*/y/*z*/     /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/    -       /*x*/   /*x*/
+/\*([^*]|\*[^/])*\*/    -       /*x*/y/*z*/     /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/    -       /*x**/y/*z*/    /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/ -       /*x*/   /*x*/
+/\*([^*]|\*+[^*/])*\*+/ -       /*x*/y/*z*/     /*x*/
+/\*([^*]|\*+[^*/])*\*+/ -       /*x**/y/*z*/    /*x**/
+/\*([^*]|\*+[^*/])*\*+/ -       /*x****/y/*z*/  /*x****/
+/\*([^*]|\*+[^*/])*\*+/ -       /*x**x*/y/*z*/  /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/ -       /*x***x/y/*z*/  /*x***x/y/*z*/
+# subexpressions
+a(b)(c)d        -       abcd    abcd    b,c
+a(((b)))c       -       abc     abc     b,b,b
+a(b|(c))d       -       abd     abd     b,-
+a(b*|c|e)d      -       abbd    abbd    bb
+a(b*|c|e)d      -       acd     acd     c
+a(b*|c|e)d      -       ad      ad      @d
+a(b?)c          -       abc     abc     b
+a(b?)c          -       ac      ac      @c
+a(b+)c          -       abc     abc     b
+a(b+)c          -       abbbc   abbbc   bbb
+a(b*)c          -       ac      ac      @c
+(a|ab)(bc([de]+)f|cde)  -       abcdef  abcdef  a,bcdef,de
+# the regression tester only asks for 9 subexpressions
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)k   -       abcdefghijk     abcdefghijk     b,c,d,e,f,g,h,i,j
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l        -       abcdefghijkl    abcdefghijkl    b,c,d,e,f,g,h,i,j,k
+a([bc]?)c       -       abc     abc     b
+a([bc]?)c       -       ac      ac      @c
+a([bc]+)c       -       abc     abc     b
+a([bc]+)c       -       abcc    abcc    bc
+a([bc]+)bc      -       abcbc   abcbc   bc
+a(bb+|b)b       -       abb     abb     b
+a(bbb+|bb+|b)b  -       abb     abb     b
+a(bbb+|bb+|b)b  -       abbb    abbb    bb
+a(bbb+|bb+|b)bb -       abbb    abbb    b
+(.*).*          -       abcdef  abcdef  abcdef
+(a*)*           -       bc      @b      @b
+# do we get the right subexpression when it is used more than once?
+a(b|c)*d        -       ad      ad      -
+a(b|c)*d        -       abcd    abcd    c
+a(b|c)+d        -       abd     abd     b
+a(b|c)+d        -       abcd    abcd    c
+a(b|c?)+d       -       ad      ad      @d
+a(b|c?)+d       -       abcd    abcd    @d
+a(b|c){0,0}d    -       ad      ad      -
+a(b|c){0,1}d    -       ad      ad      -
+a(b|c){0,1}d    -       abd     abd     b
+a(b|c){0,2}d    -       ad      ad      -
+a(b|c){0,2}d    -       abcd    abcd    c
+a(b|c){0,}d     -       ad      ad      -
+a(b|c){0,}d     -       abcd    abcd    c
+a(b|c){1,1}d    -       abd     abd     b
+a(b|c){1,1}d    -       acd     acd     c
+a(b|c){1,2}d    -       abd     abd     b
+a(b|c){1,2}d    -       abcd    abcd    c
+a(b|c){1,}d     -       abd     abd     b
+a(b|c){1,}d     -       abcd    abcd    c
+a(b|c){2,2}d    -       acbd    acbd    b
+a(b|c){2,2}d    -       abcd    abcd    c
+a(b|c){2,4}d    -       abcd    abcd    c
+a(b|c){2,4}d    -       abcbd   abcbd   b
+a(b|c){2,4}d    -       abcbcd  abcbcd  c
+a(b|c){2,}d     -       abcd    abcd    c
+a(b|c){2,}d     -       abcbd   abcbd   b
+a(b+|((c)*))+d  -       abd     abd     @d,@d,-
+a(b+|((c)*))+d  -       abcd    abcd    @d,@d,-
+# check out the STARTEND option
+[abc]           &#      a(b)c   b
+[abc]           &#      a(d)c
+[abc]           &#      a(bc)d  b
+[abc]           &#      a(dc)d  c
+.               &#      a()c
+b.*c            &#      b(bc)c  bc
+b.*             &#      b(bc)c  bc
+.*c             &#      b(bc)c  bc
+# plain strings, with the NOSPEC flag
+abc             m       abc     abc
+abc             m       xabcy   abc
+abc             m       xyz
+a*b             m       aba*b   a*b
+a*b             m       ab
+""              mC      EMPTY
+# cases involving NULs
+aZb             &       a       a
+aZb             &p      a
+aZb             &p#     (aZb)   aZb
+aZ*b            &p#     (ab)    ab
+a.b             &#      (aZb)   aZb
+a.*             &#      (aZb)c  aZb
+# word boundaries (ick)
+[[:<:]]a        &       a       a
+[[:<:]]a        &       ba
+[[:<:]]a        &       -a      a
+a[[:>:]]        &       a       a
+a[[:>:]]        &       ab
+a[[:>:]]        &       a-      a
+[[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc     abc
+[[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc-q   abc
+[[:<:]]a.c[[:>:]]       &       axc-dayc-dazce-abc      axc
+[[:<:]]b.c[[:>:]]       &       a_bxc-byc_d-bzc-q       bzc
+[[:<:]].x..[[:>:]]      &       y_xa_-_xb_y-_xc_-axdc   _xc_
+[[:<:]]a_b[[:>:]]       &       x_a_b
+# past problems, and suspected problems
+(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])   -       A1      A1
+abcdefghijklmnop        i       abcdefghijklmnop        abcdefghijklmnop
+abcdefghijklmnopqrstuv  i       abcdefghijklmnopqrstuv  abcdefghijklmnopqrstuv
+(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])     -       CC11    CC11
+CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a    -       CC11    CC11
+Char \([a-z0-9_]*\)\[.* b       Char xyz[k      Char xyz[k      xyz
+a?b     -       ab      ab
+-\{0,1\}[0-9]*$ b       -5      -5