summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libc/string/strstr.c224
1 files changed, 180 insertions, 44 deletions
diff --git a/src/lib/libc/string/strstr.c b/src/lib/libc/string/strstr.c
index ccd0871700..167d609c39 100644
--- a/src/lib/libc/string/strstr.c
+++ b/src/lib/libc/string/strstr.c
@@ -1,57 +1,193 @@
1/* $OpenBSD: strstr.c,v 1.6 2015/08/31 02:53:57 guenther Exp $ */ 1/* $OpenBSD: strstr.c,v 1.7 2017/04/12 16:06:12 millert Exp $ */
2/*- 2
3 * Copyright (c) 1990 The Regents of the University of California. 3/*
4 * All rights reserved. 4 * Copyright (c) 2005-2014 Rich Felker
5 * 5 *
6 * This code is derived from software contributed to Berkeley by 6 * Permission is hereby granted, free of charge, to any person obtaining
7 * Chris Torek. 7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
8 * 13 *
9 * Redistribution and use in source and binary forms, with or without 14 * The above copyright notice and this permission notice shall be
10 * modification, are permitted provided that the following conditions 15 * included in all copies or substantial portions of the Software.
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 * 16 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */ 24 */
33 25
34#include <string.h> 26#include <string.h>
27#include <stdlib.h>
28#include <stdint.h>
29
30static char *
31twobyte_strstr(const unsigned char *h, const unsigned char *n)
32{
33 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
34 for (h++; *h && hw != nw; hw = hw<<8 | *++h);
35 return *h ? (char *)h-1 : 0;
36}
37
38static char *
39threebyte_strstr(const unsigned char *h, const unsigned char *n)
40{
41 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
42 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
43 for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8);
44 return *h ? (char *)h-2 : 0;
45}
46
47static char *
48fourbyte_strstr(const unsigned char *h, const unsigned char *n)
49{
50 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
51 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
52 for (h+=3; *h && hw != nw; hw = hw<<8 | *++h);
53 return *h ? (char *)h-3 : 0;
54}
55
56#define MAX(a,b) ((a)>(b)?(a):(b))
57#define MIN(a,b) ((a)<(b)?(a):(b))
58
59#define BITOP(a,b,op) \
60 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
35 61
36/* 62/*
37 * Find the first occurrence of find in s. 63 * Maxime Crochemore and Dominique Perrin, Two-way string-matching,
64 * Journal of the ACM, 38(3):651-675, July 1991.
38 */ 65 */
39char * 66static char *
40strstr(const char *s, const char *find) 67twoway_strstr(const unsigned char *h, const unsigned char *n)
41{ 68{
42 char c, sc; 69 const unsigned char *z;
43 size_t len; 70 size_t l, ip, jp, k, p, ms, p0, mem, mem0;
44 71 size_t byteset[32 / sizeof(size_t)] = { 0 };
45 if ((c = *find++) != 0) { 72 size_t shift[256];
46 len = strlen(find); 73
47 do { 74 /* Computing length of needle and fill shift table */
48 do { 75 for (l=0; n[l] && h[l]; l++)
49 if ((sc = *s++) == 0) 76 BITOP(byteset, n[l], |=), shift[n[l]] = l+1;
50 return (NULL); 77 if (n[l]) return 0; /* hit the end of h */
51 } while (sc != c); 78
52 } while (strncmp(s, find, len) != 0); 79 /* Compute maximal suffix */
53 s--; 80 ip = -1; jp = 0; k = p = 1;
81 while (jp+k<l) {
82 if (n[ip+k] == n[jp+k]) {
83 if (k == p) {
84 jp += p;
85 k = 1;
86 } else k++;
87 } else if (n[ip+k] > n[jp+k]) {
88 jp += k;
89 k = 1;
90 p = jp - ip;
91 } else {
92 ip = jp++;
93 k = p = 1;
94 }
95 }
96 ms = ip;
97 p0 = p;
98
99 /* And with the opposite comparison */
100 ip = -1; jp = 0; k = p = 1;
101 while (jp+k<l) {
102 if (n[ip+k] == n[jp+k]) {
103 if (k == p) {
104 jp += p;
105 k = 1;
106 } else k++;
107 } else if (n[ip+k] < n[jp+k]) {
108 jp += k;
109 k = 1;
110 p = jp - ip;
111 } else {
112 ip = jp++;
113 k = p = 1;
114 }
54 } 115 }
55 return ((char *)s); 116 if (ip+1 > ms+1) ms = ip;
117 else p = p0;
118
119 /* Periodic needle? */
120 if (memcmp(n, n+p, ms+1)) {
121 mem0 = 0;
122 p = MAX(ms, l-ms-1) + 1;
123 } else mem0 = l-p;
124 mem = 0;
125
126 /* Initialize incremental end-of-haystack pointer */
127 z = h;
128
129 /* Search loop */
130 for (;;) {
131 /* Update incremental end-of-haystack pointer */
132 if (z-h < l) {
133 /* Fast estimate for MIN(l,63) */
134 size_t grow = l | 63;
135 const unsigned char *z2 = memchr(z, 0, grow);
136 if (z2) {
137 z = z2;
138 if (z-h < l) return 0;
139 } else z += grow;
140 }
141
142 /* Check last byte first; advance by shift on mismatch */
143 if (BITOP(byteset, h[l-1], &)) {
144 k = l-shift[h[l-1]];
145#ifdef DEBUG
146 printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, shift[h[l-1]], l);
147#endif
148 if (k) {
149 if (mem0 && mem && k < p) k = l-p;
150 h += k;
151 mem = 0;
152 continue;
153 }
154 } else {
155 h += l;
156 mem = 0;
157 continue;
158 }
159
160 /* Compare right half */
161 for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++);
162 if (n[k]) {
163 h += k-ms;
164 mem = 0;
165 continue;
166 }
167 /* Compare left half */
168 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
169 if (k <= mem) return (char *)h;
170 h += p;
171 mem = mem0;
172 }
173}
174
175char *
176strstr(const char *h, const char *n)
177{
178 /* Return immediately on empty needle */
179 if (!n[0]) return (char *)h;
180
181 /* Use faster algorithms for short needles */
182 h = strchr(h, *n);
183 if (!h || !n[1]) return (char *)h;
184 if (!h[1]) return 0;
185 if (!n[2]) return twobyte_strstr((void *)h, (void *)n);
186 if (!h[2]) return 0;
187 if (!n[3]) return threebyte_strstr((void *)h, (void *)n);
188 if (!h[3]) return 0;
189 if (!n[4]) return fourbyte_strstr((void *)h, (void *)n);
190
191 return twoway_strstr((void *)h, (void *)n);
56} 192}
57DEF_STRONG(strstr); 193DEF_STRONG(strstr);