diff options
Diffstat (limited to 'src/lib/libc/string/bm.c')
-rw-r--r-- | src/lib/libc/string/bm.c | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/src/lib/libc/string/bm.c b/src/lib/libc/string/bm.c new file mode 100644 index 0000000000..b191d340f6 --- /dev/null +++ b/src/lib/libc/string/bm.c | |||
@@ -0,0 +1,219 @@ | |||
1 | /*- | ||
2 | * Copyright (c) 1994 | ||
3 | * The Regents of the University of California. All rights reserved. | ||
4 | * | ||
5 | * This code is derived from software contributed to Berkeley by | ||
6 | * Andrew Hume of AT&T Bell Laboratories. | ||
7 | * | ||
8 | * Redistribution and use in source and binary forms, with or without | ||
9 | * modification, are permitted provided that the following conditions | ||
10 | * are met: | ||
11 | * 1. Redistributions of source code must retain the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer. | ||
13 | * 2. Redistributions in binary form must reproduce the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer in the | ||
15 | * documentation and/or other materials provided with the distribution. | ||
16 | * 3. All advertising materials mentioning features or use of this software | ||
17 | * must display the following acknowledgement: | ||
18 | * This product includes software developed by the University of | ||
19 | * California, Berkeley and its contributors. | ||
20 | * 4. Neither the name of the University nor the names of its contributors | ||
21 | * may be used to endorse or promote products derived from this software | ||
22 | * without specific prior written permission. | ||
23 | * | ||
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
34 | * SUCH DAMAGE. | ||
35 | */ | ||
36 | |||
37 | #if defined(LIBC_SCCS) && !defined(lint) | ||
38 | static char *rcsid = "$OpenBSD: bm.c,v 1.3 1996/08/19 08:33:59 tholo Exp $"; | ||
39 | #endif /* LIBC_SCCS and not lint */ | ||
40 | |||
41 | #include <sys/types.h> | ||
42 | |||
43 | #include <bm.h> | ||
44 | #include <errno.h> | ||
45 | #include <stdlib.h> | ||
46 | #include <string.h> | ||
47 | |||
48 | /* | ||
49 | * XXX | ||
50 | * The default frequency table starts at 99 and counts down. The default | ||
51 | * table should probably be oriented toward text, and will necessarily be | ||
52 | * locale specific. This one is for English. It was derived from the | ||
53 | * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb | ||
54 | * of email and random text. Change it if you can find something better. | ||
55 | */ | ||
56 | static u_char const freq_def[256] = { | ||
57 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
58 | 0, 77, 90, 0, 0, 0, 0, 0, | ||
59 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
60 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
61 | 99, 28, 42, 27, 16, 14, 20, 51, | ||
62 | 66, 65, 59, 24, 75, 76, 84, 56, | ||
63 | 72, 74, 64, 55, 54, 47, 41, 37, | ||
64 | 44, 61, 70, 43, 23, 53, 49, 22, | ||
65 | 33, 58, 40, 46, 45, 57, 60, 26, | ||
66 | 30, 63, 21, 12, 32, 50, 38, 39, | ||
67 | 34, 11, 48, 67, 62, 35, 15, 29, | ||
68 | 71, 18, 9, 17, 25, 13, 10, 52, | ||
69 | 36, 95, 78, 86, 87, 98, 82, 80, | ||
70 | 88, 94, 19, 68, 89, 83, 93, 96, | ||
71 | 81, 7, 91, 92, 97, 85, 69, 73, | ||
72 | 31, 79, 8, 5, 4, 6, 3, 0, | ||
73 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
74 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
75 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
76 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
77 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
78 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
79 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
80 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
81 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
82 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
83 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
84 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
85 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
86 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
87 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
88 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
89 | }; | ||
90 | |||
91 | bm_pat * | ||
92 | bm_comp(pb, len, freq) | ||
93 | u_char const *pb; | ||
94 | size_t len; | ||
95 | u_char const *freq; | ||
96 | { | ||
97 | register u_char const *pe, *p; | ||
98 | register size_t *d, r; | ||
99 | register int j; | ||
100 | int sv_errno; | ||
101 | bm_pat *pat; | ||
102 | |||
103 | if (len == 0) { | ||
104 | errno = EINVAL; | ||
105 | return (NULL); | ||
106 | } | ||
107 | if ((pat = malloc(sizeof(*pat))) == NULL) | ||
108 | return (NULL); | ||
109 | pat->pat = NULL; | ||
110 | pat->delta = NULL; | ||
111 | |||
112 | pat->patlen = len; /* copy pattern */ | ||
113 | if ((pat->pat = malloc(pat->patlen)) == NULL) | ||
114 | goto mem; | ||
115 | memcpy(pat->pat, pb, pat->patlen); | ||
116 | /* get skip delta */ | ||
117 | if ((pat->delta = malloc(256 * sizeof(*d))) == NULL) | ||
118 | goto mem; | ||
119 | for (j = 0, d = pat->delta; j < 256; j++) | ||
120 | d[j] = pat->patlen; | ||
121 | for (pe = pb + pat->patlen - 1; pb <= pe; pb++) | ||
122 | d[*pb] = pe - pb; | ||
123 | |||
124 | if (freq == NULL) /* default freq table */ | ||
125 | freq = freq_def; | ||
126 | r = 0; /* get guard */ | ||
127 | for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++) | ||
128 | if (freq[*pb] < freq[pat->pat[r]]) | ||
129 | r = pb - pat->pat; | ||
130 | pat->rarec = pat->pat[r]; | ||
131 | pat->rareoff = r - (pat->patlen - 1); | ||
132 | |||
133 | /* get md2 shift */ | ||
134 | for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--) | ||
135 | if (*p == *pe) | ||
136 | break; | ||
137 | |||
138 | /* *p is first leftward reoccurrence of *pe */ | ||
139 | pat->md2 = pe - p; | ||
140 | return (pat); | ||
141 | |||
142 | mem: sv_errno = errno; | ||
143 | bm_free(pat); | ||
144 | errno = sv_errno; | ||
145 | return (NULL); | ||
146 | } | ||
147 | |||
148 | void | ||
149 | bm_free(pat) | ||
150 | bm_pat *pat; | ||
151 | { | ||
152 | if (pat->pat != NULL) | ||
153 | free(pat->pat); | ||
154 | if (pat->delta != NULL) | ||
155 | free(pat->delta); | ||
156 | free(pat); | ||
157 | } | ||
158 | |||
159 | u_char * | ||
160 | bm_exec(pat, base, n) | ||
161 | bm_pat *pat; | ||
162 | u_char *base; | ||
163 | size_t n; | ||
164 | { | ||
165 | register u_char *e, *ep, *p, *q, *s; | ||
166 | register size_t *d0, k, md2, n1, ro; | ||
167 | register int rc; | ||
168 | |||
169 | if (n == 0) | ||
170 | return (NULL); | ||
171 | |||
172 | d0 = pat->delta; | ||
173 | n1 = pat->patlen - 1; | ||
174 | md2 = pat->md2; | ||
175 | ro = pat->rareoff; | ||
176 | rc = pat->rarec; | ||
177 | ep = pat->pat + pat->patlen - 1; | ||
178 | s = base + (pat->patlen - 1); | ||
179 | |||
180 | /* fast loop up to n - 3 * patlen */ | ||
181 | e = base + n - 3 * pat->patlen; | ||
182 | while (s < e) { | ||
183 | k = d0[*s]; /* ufast skip loop */ | ||
184 | while (k) { | ||
185 | k = d0[*(s += k)]; | ||
186 | k = d0[*(s += k)]; | ||
187 | } | ||
188 | if (s >= e) | ||
189 | break; | ||
190 | if (s[ro] != rc) /* guard test */ | ||
191 | goto mismatch1; | ||
192 | /* fwd match */ | ||
193 | for (p = pat->pat, q = s - n1; p < ep;) | ||
194 | if (*q++ != *p++) | ||
195 | goto mismatch1; | ||
196 | return (s - n1); | ||
197 | |||
198 | mismatch1: s += md2; /* md2 shift */ | ||
199 | } | ||
200 | |||
201 | /* slow loop up to end */ | ||
202 | e = base + n; | ||
203 | while (s < e) { | ||
204 | s += d0[*s]; /* step */ | ||
205 | if (s >= e) | ||
206 | break; | ||
207 | if (s[ro] != rc) /* guard test */ | ||
208 | goto mismatch2; | ||
209 | /* fwd match */ | ||
210 | for (p = pat->pat, q = s - n1; p <= ep;) | ||
211 | if (*q++ != *p++) | ||
212 | goto mismatch2; | ||
213 | return (s - n1); | ||
214 | |||
215 | mismatch2: s += md2; /* md2 shift */ | ||
216 | } | ||
217 | |||
218 | return (NULL); | ||
219 | } | ||