diff options
Diffstat (limited to 'src/lib/libc/string')
| -rw-r--r-- | src/lib/libc/string/Makefile.inc | 7 | ||||
| -rw-r--r-- | src/lib/libc/string/bm.3 | 120 | ||||
| -rw-r--r-- | src/lib/libc/string/bm.c | 205 |
3 files changed, 3 insertions, 329 deletions
diff --git a/src/lib/libc/string/Makefile.inc b/src/lib/libc/string/Makefile.inc index 1cbb54b3b5..1f6d756182 100644 --- a/src/lib/libc/string/Makefile.inc +++ b/src/lib/libc/string/Makefile.inc | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | # $OpenBSD: Makefile.inc,v 1.33 2014/01/22 21:06:45 tedu Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.34 2014/03/23 23:16:48 tedu Exp $ |
| 2 | 2 | ||
| 3 | # string sources | 3 | # string sources |
| 4 | .PATH: ${LIBCSRCDIR}/arch/${MACHINE_CPU}/string ${LIBCSRCDIR}/string | 4 | .PATH: ${LIBCSRCDIR}/arch/${MACHINE_CPU}/string ${LIBCSRCDIR}/string |
| 5 | 5 | ||
| 6 | SRCS+= bm.c explicit_bzero.c memccpy.c memmem.c memrchr.c stpcpy.c stpncpy.c \ | 6 | SRCS+= explicit_bzero.c memccpy.c memmem.c memrchr.c stpcpy.c stpncpy.c \ |
| 7 | strcasecmp.c strcasestr.c strcoll.c strdup.c \ | 7 | strcasecmp.c strcasestr.c strcoll.c strdup.c \ |
| 8 | strerror.c strerror_r.c strlcat.c strmode.c strndup.c strnlen.c \ | 8 | strerror.c strerror_r.c strlcat.c strmode.c strndup.c strnlen.c \ |
| 9 | strsignal.c strtok.c strxfrm.c \ | 9 | strsignal.c strtok.c strxfrm.c \ |
| @@ -144,7 +144,7 @@ strrchr.do: rindex.c | |||
| 144 | @mv ${.TARGET}.tmp ${.TARGET} | 144 | @mv ${.TARGET}.tmp ${.TARGET} |
| 145 | .endif | 145 | .endif |
| 146 | 146 | ||
| 147 | MAN+= bm.3 bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 memccpy.3 memchr.3 \ | 147 | MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 memccpy.3 memchr.3 \ |
| 148 | memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 stpcpy.3 strcasecmp.3 \ | 148 | memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 stpcpy.3 strcasecmp.3 \ |
| 149 | strcat.3 strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 \ | 149 | strcat.3 strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 \ |
| 150 | strerror.3 string.3 strlen.3 strmode.3 strncat.3 strncpy.3 strpbrk.3 \ | 150 | strerror.3 string.3 strlen.3 strmode.3 strncat.3 strncpy.3 strpbrk.3 \ |
| @@ -154,7 +154,6 @@ MAN+= bm.3 bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 memccpy.3 memchr.3 \ | |||
| 154 | wcsstr.3 wcstok.3 wcswidth.3 wmemchr.3 wmemcmp.3 wmemcpy.3 wmemmove.3 \ | 154 | wcsstr.3 wcstok.3 wcswidth.3 wmemchr.3 wmemcmp.3 wmemcpy.3 wmemmove.3 \ |
| 155 | wmemset.3 | 155 | wmemset.3 |
| 156 | 156 | ||
| 157 | MLINKS+=bm.3 bm_comp.3 bm.3 bm_exec.3 bm.3 bm_free.3 | ||
| 158 | MLINKS+=bzero.3 explicit_bzero.3 | 157 | MLINKS+=bzero.3 explicit_bzero.3 |
| 159 | MLINKS+=memchr.3 memrchr.3 | 158 | MLINKS+=memchr.3 memrchr.3 |
| 160 | MLINKS+=stpcpy.3 stpncpy.3 | 159 | MLINKS+=stpcpy.3 stpncpy.3 |
diff --git a/src/lib/libc/string/bm.3 b/src/lib/libc/string/bm.3 deleted file mode 100644 index 82f5917594..0000000000 --- a/src/lib/libc/string/bm.3 +++ /dev/null | |||
| @@ -1,120 +0,0 @@ | |||
| 1 | .\" Copyright (c) 1994 | ||
| 2 | .\" The Regents of the University of California. All rights reserved. | ||
| 3 | .\" | ||
| 4 | .\" This code is derived from software contributed to Berkeley by | ||
| 5 | .\" Andrew Hume of AT&T Bell Laboratories. | ||
| 6 | .\" | ||
| 7 | .\" Redistribution and use in source and binary forms, with or without | ||
| 8 | .\" modification, are permitted provided that the following conditions | ||
| 9 | .\" are met: | ||
| 10 | .\" 1. Redistributions of source code must retain the above copyright | ||
| 11 | .\" notice, this list of conditions and the following disclaimer. | ||
| 12 | .\" 2. Redistributions in binary form must reproduce the above copyright | ||
| 13 | .\" notice, this list of conditions and the following disclaimer in the | ||
| 14 | .\" documentation and/or other materials provided with the distribution. | ||
| 15 | .\" 3. Neither the name of the University nor the names of its contributors | ||
| 16 | .\" may be used to endorse or promote products derived from this software | ||
| 17 | .\" without specific prior written permission. | ||
| 18 | .\" | ||
| 19 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||
| 20 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 21 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 22 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 23 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 24 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 25 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 26 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 27 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 28 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 29 | .\" SUCH DAMAGE. | ||
| 30 | .\" | ||
| 31 | .\" $OpenBSD: bm.3,v 1.12 2013/12/05 07:43:20 jmc Exp $ | ||
| 32 | .\" | ||
| 33 | .Dd $Mdocdate: December 5 2013 $ | ||
| 34 | .Dt BM 3 | ||
| 35 | .Os | ||
| 36 | .Sh NAME | ||
| 37 | .Nm bm_comp , | ||
| 38 | .Nm bm_exec , | ||
| 39 | .Nm bm_free | ||
| 40 | .Nd Boyer-Moore string search | ||
| 41 | .Sh SYNOPSIS | ||
| 42 | .In sys/types.h | ||
| 43 | .In bm.h | ||
| 44 | .Ft bm_pat * | ||
| 45 | .Fn bm_comp "const unsigned char *pattern" "size_t patlen" \ | ||
| 46 | "const unsigned char freq[256]" | ||
| 47 | .Ft unsigned char * | ||
| 48 | .Fn bm_exec "bm_pat *pdesc" "unsigned char *text" "size_t len" | ||
| 49 | .Ft void | ||
| 50 | .Fn bm_free "bm_pat *pdesc" | ||
| 51 | .Sh DESCRIPTION | ||
| 52 | These routines implement an efficient mechanism to find an | ||
| 53 | occurrence of a byte string within another byte string. | ||
| 54 | .Pp | ||
| 55 | .Fn bm_comp | ||
| 56 | evaluates | ||
| 57 | .Fa patlen | ||
| 58 | bytes starting at | ||
| 59 | .Fa pattern | ||
| 60 | and returns a pointer to a structure describing them. | ||
| 61 | The bytes referenced by | ||
| 62 | .Fa pattern | ||
| 63 | may be of any value. | ||
| 64 | .Pp | ||
| 65 | The search takes advantage of the frequency distribution of the | ||
| 66 | bytes in the text to be searched. | ||
| 67 | If specified, | ||
| 68 | .Ar freq | ||
| 69 | should be an array of 256 values, | ||
| 70 | with higher values indicating that the corresponding character occurs | ||
| 71 | more frequently. | ||
| 72 | (A less than optimal frequency distribution can only result in less | ||
| 73 | than optimal performance, not incorrect results.) | ||
| 74 | If | ||
| 75 | .Ar freq | ||
| 76 | is | ||
| 77 | .Dv NULL , | ||
| 78 | a system default table is used. | ||
| 79 | .Pp | ||
| 80 | .Fn bm_exec | ||
| 81 | returns a pointer to the leftmost occurrence of the string given to | ||
| 82 | .Fn bm_comp | ||
| 83 | within | ||
| 84 | .Ar text , | ||
| 85 | or | ||
| 86 | .Dv NULL | ||
| 87 | if none occurs. | ||
| 88 | The number of bytes in | ||
| 89 | .Ar text | ||
| 90 | must be specified by | ||
| 91 | .Ar len . | ||
| 92 | .Pp | ||
| 93 | Space allocated for the returned description is discarded | ||
| 94 | by calling | ||
| 95 | .Fn bm_free | ||
| 96 | with the returned description as an argument. | ||
| 97 | .Pp | ||
| 98 | The asymptotic speed of | ||
| 99 | .Fn bm_exec | ||
| 100 | is | ||
| 101 | .Pf O Ns Pq len / patlen . | ||
| 102 | .Sh SEE ALSO | ||
| 103 | .Xr regex 3 , | ||
| 104 | .Xr strstr 3 | ||
| 105 | .Rs | ||
| 106 | .%R "Fast String Searching" | ||
| 107 | .%A Andrew Hume | ||
| 108 | .%A Daniel Sunday | ||
| 109 | .%J "Software Practice and Experience" | ||
| 110 | .%V Volume 21, 11 (November 1991) | ||
| 111 | .%P 1221-48 | ||
| 112 | .Re | ||
| 113 | .Sh HISTORY | ||
| 114 | The | ||
| 115 | .Fn bm_comp , | ||
| 116 | .Fn bm_exec , | ||
| 117 | and | ||
| 118 | .Fn bm_free | ||
| 119 | functions first appeared in | ||
| 120 | .Nx 1.0 . | ||
diff --git a/src/lib/libc/string/bm.c b/src/lib/libc/string/bm.c deleted file mode 100644 index 2c4c6ca720..0000000000 --- a/src/lib/libc/string/bm.c +++ /dev/null | |||
| @@ -1,205 +0,0 @@ | |||
| 1 | /* $OpenBSD: bm.c,v 1.7 2007/09/02 15:19:18 deraadt Exp $ */ | ||
| 2 | /*- | ||
| 3 | * Copyright (c) 1994 | ||
| 4 | * The Regents of the University of California. All rights reserved. | ||
| 5 | * | ||
| 6 | * This code is derived from software contributed to Berkeley by | ||
| 7 | * Andrew Hume of AT&T Bell Laboratories. | ||
| 8 | * | ||
| 9 | * Redistribution and use in source and binary forms, with or without | ||
| 10 | * modification, are permitted provided that the following conditions | ||
| 11 | * are met: | ||
| 12 | * 1. Redistributions of source code must retain the above copyright | ||
| 13 | * notice, this list of conditions and the following disclaimer. | ||
| 14 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer in the | ||
| 16 | * documentation and/or other materials provided with the distribution. | ||
| 17 | * 3. Neither the name of the University nor the names of its contributors | ||
| 18 | * may be used to endorse or promote products derived from this software | ||
| 19 | * without specific prior written permission. | ||
| 20 | * | ||
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
| 31 | * SUCH DAMAGE. | ||
| 32 | */ | ||
| 33 | |||
| 34 | #include <sys/types.h> | ||
| 35 | |||
| 36 | #include <bm.h> | ||
| 37 | #include <errno.h> | ||
| 38 | #include <stdlib.h> | ||
| 39 | #include <string.h> | ||
| 40 | |||
| 41 | /* | ||
| 42 | * XXX | ||
| 43 | * The default frequency table starts at 99 and counts down. The default | ||
| 44 | * table should probably be oriented toward text, and will necessarily be | ||
| 45 | * locale specific. This one is for English. It was derived from the | ||
| 46 | * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb | ||
| 47 | * of email and random text. Change it if you can find something better. | ||
| 48 | */ | ||
| 49 | static u_char const freq_def[256] = { | ||
| 50 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 51 | 0, 77, 90, 0, 0, 0, 0, 0, | ||
| 52 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 53 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 54 | 99, 28, 42, 27, 16, 14, 20, 51, | ||
| 55 | 66, 65, 59, 24, 75, 76, 84, 56, | ||
| 56 | 72, 74, 64, 55, 54, 47, 41, 37, | ||
| 57 | 44, 61, 70, 43, 23, 53, 49, 22, | ||
| 58 | 33, 58, 40, 46, 45, 57, 60, 26, | ||
| 59 | 30, 63, 21, 12, 32, 50, 38, 39, | ||
| 60 | 34, 11, 48, 67, 62, 35, 15, 29, | ||
| 61 | 71, 18, 9, 17, 25, 13, 10, 52, | ||
| 62 | 36, 95, 78, 86, 87, 98, 82, 80, | ||
| 63 | 88, 94, 19, 68, 89, 83, 93, 96, | ||
| 64 | 81, 7, 91, 92, 97, 85, 69, 73, | ||
| 65 | 31, 79, 8, 5, 4, 6, 3, 0, | ||
| 66 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 67 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 68 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 69 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 70 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 71 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 72 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 73 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 74 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 75 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 76 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 77 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 78 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 79 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 80 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 81 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 82 | }; | ||
| 83 | |||
| 84 | bm_pat * | ||
| 85 | bm_comp(u_char const *pb, size_t len, u_char const *freq) | ||
| 86 | { | ||
| 87 | u_char const *pe, *p; | ||
| 88 | size_t *d, r; | ||
| 89 | int j; | ||
| 90 | int sv_errno; | ||
| 91 | bm_pat *pat; | ||
| 92 | |||
| 93 | if (len == 0) { | ||
| 94 | errno = EINVAL; | ||
| 95 | return (NULL); | ||
| 96 | } | ||
| 97 | if ((pat = malloc(sizeof(*pat))) == NULL) | ||
| 98 | return (NULL); | ||
| 99 | pat->pat = NULL; | ||
| 100 | pat->delta = NULL; | ||
| 101 | |||
| 102 | pat->patlen = len; /* copy pattern */ | ||
| 103 | if ((pat->pat = malloc(pat->patlen)) == NULL) | ||
| 104 | goto mem; | ||
| 105 | memcpy(pat->pat, pb, pat->patlen); | ||
| 106 | /* get skip delta */ | ||
| 107 | if ((pat->delta = calloc(256, sizeof(*d))) == NULL) | ||
| 108 | goto mem; | ||
| 109 | for (j = 0, d = pat->delta; j < 256; j++) | ||
| 110 | d[j] = pat->patlen; | ||
| 111 | for (pe = pb + pat->patlen - 1; pb <= pe; pb++) | ||
| 112 | d[*pb] = pe - pb; | ||
| 113 | |||
| 114 | if (freq == NULL) /* default freq table */ | ||
| 115 | freq = freq_def; | ||
| 116 | r = 0; /* get guard */ | ||
| 117 | for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++) | ||
| 118 | if (freq[*pb] < freq[pat->pat[r]]) | ||
| 119 | r = pb - pat->pat; | ||
| 120 | pat->rarec = pat->pat[r]; | ||
| 121 | pat->rareoff = r - (pat->patlen - 1); | ||
| 122 | |||
| 123 | /* get md2 shift */ | ||
| 124 | for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--) | ||
| 125 | if (*p == *pe) | ||
| 126 | break; | ||
| 127 | |||
| 128 | /* *p is first leftward reoccurrence of *pe */ | ||
| 129 | pat->md2 = pe - p; | ||
| 130 | return (pat); | ||
| 131 | |||
| 132 | mem: sv_errno = errno; | ||
| 133 | bm_free(pat); | ||
| 134 | errno = sv_errno; | ||
| 135 | return (NULL); | ||
| 136 | } | ||
| 137 | |||
| 138 | void | ||
| 139 | bm_free(bm_pat *pat) | ||
| 140 | { | ||
| 141 | if (pat->pat != NULL) | ||
| 142 | free(pat->pat); | ||
| 143 | if (pat->delta != NULL) | ||
| 144 | free(pat->delta); | ||
| 145 | free(pat); | ||
| 146 | } | ||
| 147 | |||
| 148 | u_char * | ||
| 149 | bm_exec(bm_pat *pat, u_char *base, size_t n) | ||
| 150 | { | ||
| 151 | u_char *e, *ep, *p, *q, *s; | ||
| 152 | size_t *d0, k, md2, n1, ro; | ||
| 153 | int rc; | ||
| 154 | |||
| 155 | if (n == 0) | ||
| 156 | return (NULL); | ||
| 157 | |||
| 158 | d0 = pat->delta; | ||
| 159 | n1 = pat->patlen - 1; | ||
| 160 | md2 = pat->md2; | ||
| 161 | ro = pat->rareoff; | ||
| 162 | rc = pat->rarec; | ||
| 163 | ep = pat->pat + pat->patlen - 1; | ||
| 164 | s = base + (pat->patlen - 1); | ||
| 165 | |||
| 166 | /* fast loop up to n - 3 * patlen */ | ||
| 167 | e = base + n - 3 * pat->patlen; | ||
| 168 | while (s < e) { | ||
| 169 | k = d0[*s]; /* ufast skip loop */ | ||
| 170 | while (k) { | ||
| 171 | k = d0[*(s += k)]; | ||
| 172 | k = d0[*(s += k)]; | ||
| 173 | } | ||
| 174 | if (s >= e) | ||
| 175 | break; | ||
| 176 | if (s[ro] != rc) /* guard test */ | ||
| 177 | goto mismatch1; | ||
| 178 | /* fwd match */ | ||
| 179 | for (p = pat->pat, q = s - n1; p < ep;) | ||
| 180 | if (*q++ != *p++) | ||
| 181 | goto mismatch1; | ||
| 182 | return (s - n1); | ||
| 183 | |||
| 184 | mismatch1: s += md2; /* md2 shift */ | ||
| 185 | } | ||
| 186 | |||
| 187 | /* slow loop up to end */ | ||
| 188 | e = base + n; | ||
| 189 | while (s < e) { | ||
| 190 | s += d0[*s]; /* step */ | ||
| 191 | if (s >= e) | ||
| 192 | break; | ||
| 193 | if (s[ro] != rc) /* guard test */ | ||
| 194 | goto mismatch2; | ||
| 195 | /* fwd match */ | ||
| 196 | for (p = pat->pat, q = s - n1; p <= ep;) | ||
| 197 | if (*q++ != *p++) | ||
| 198 | goto mismatch2; | ||
| 199 | return (s - n1); | ||
| 200 | |||
| 201 | mismatch2: s += md2; /* md2 shift */ | ||
| 202 | } | ||
| 203 | |||
| 204 | return (NULL); | ||
| 205 | } | ||
