summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormillert <>2017-04-12 16:06:12 +0000
committermillert <>2017-04-12 16:06:12 +0000
commit0f3f0734ef5aa0887305bde6762a8cbeb7d54695 (patch)
tree7c0d80375d5f5c3cb1076dc4669534c881157a6d
parent97f100b6ccb83dadea1de6b0bf12416a8ed1c845 (diff)
downloadopenbsd-0f3f0734ef5aa0887305bde6762a8cbeb7d54695.tar.gz
openbsd-0f3f0734ef5aa0887305bde6762a8cbeb7d54695.tar.bz2
openbsd-0f3f0734ef5aa0887305bde6762a8cbeb7d54695.zip
New strstr() implementation from musl libc by Rich Felker. This
version uses the two-way string matching algorithm and is faster than the old implementation. With this change, ports that check for strstr having linear complexity time strstr will no longer replace the libc strstr with a private version. OK deraadt@ espie@
-rw-r--r--src/lib/libc/string/strstr.c224
1 files changed, 180 insertions, 44 deletions
diff --git a/src/lib/libc/string/strstr.c b/src/lib/libc/string/strstr.c
index ccd0871700..167d609c39 100644
--- a/src/lib/libc/string/strstr.c
+++ b/src/lib/libc/string/strstr.c
@@ -1,57 +1,193 @@
1/* $OpenBSD: strstr.c,v 1.6 2015/08/31 02:53:57 guenther Exp $ */ 1/* $OpenBSD: strstr.c,v 1.7 2017/04/12 16:06:12 millert Exp $ */
2/*- 2
3 * Copyright (c) 1990 The Regents of the University of California. 3/*
4 * All rights reserved. 4 * Copyright (c) 2005-2014 Rich Felker
5 * 5 *
6 * This code is derived from software contributed to Berkeley by 6 * Permission is hereby granted, free of charge, to any person obtaining
7 * Chris Torek. 7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
8 * 13 *
9 * Redistribution and use in source and binary forms, with or without 14 * The above copyright notice and this permission notice shall be
10 * modification, are permitted provided that the following conditions 15 * included in all copies or substantial portions of the Software.
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 * 16 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */ 24 */
33 25
34#include <string.h> 26#include <string.h>
27#include <stdlib.h>
28#include <stdint.h>
29
30static char *
31twobyte_strstr(const unsigned char *h, const unsigned char *n)
32{
33 uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
34 for (h++; *h && hw != nw; hw = hw<<8 | *++h);
35 return *h ? (char *)h-1 : 0;
36}
37
38static char *
39threebyte_strstr(const unsigned char *h, const unsigned char *n)
40{
41 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
42 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
43 for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8);
44 return *h ? (char *)h-2 : 0;
45}
46
47static char *
48fourbyte_strstr(const unsigned char *h, const unsigned char *n)
49{
50 uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
51 uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
52 for (h+=3; *h && hw != nw; hw = hw<<8 | *++h);
53 return *h ? (char *)h-3 : 0;
54}
55
56#define MAX(a,b) ((a)>(b)?(a):(b))
57#define MIN(a,b) ((a)<(b)?(a):(b))
58
59#define BITOP(a,b,op) \
60 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
35 61
36/* 62/*
37 * Find the first occurrence of find in s. 63 * Maxime Crochemore and Dominique Perrin, Two-way string-matching,
64 * Journal of the ACM, 38(3):651-675, July 1991.
38 */ 65 */
39char * 66static char *
40strstr(const char *s, const char *find) 67twoway_strstr(const unsigned char *h, const unsigned char *n)
41{ 68{
42 char c, sc; 69 const unsigned char *z;
43 size_t len; 70 size_t l, ip, jp, k, p, ms, p0, mem, mem0;
44 71 size_t byteset[32 / sizeof(size_t)] = { 0 };
45 if ((c = *find++) != 0) { 72 size_t shift[256];
46 len = strlen(find); 73
47 do { 74 /* Computing length of needle and fill shift table */
48 do { 75 for (l=0; n[l] && h[l]; l++)
49 if ((sc = *s++) == 0) 76 BITOP(byteset, n[l], |=), shift[n[l]] = l+1;
50 return (NULL); 77 if (n[l]) return 0; /* hit the end of h */
51 } while (sc != c); 78
52 } while (strncmp(s, find, len) != 0); 79 /* Compute maximal suffix */
53 s--; 80 ip = -1; jp = 0; k = p = 1;
81 while (jp+k<l) {
82 if (n[ip+k] == n[jp+k]) {
83 if (k == p) {
84 jp += p;
85 k = 1;
86 } else k++;
87 } else if (n[ip+k] > n[jp+k]) {
88 jp += k;
89 k = 1;
90 p = jp - ip;
91 } else {
92 ip = jp++;
93 k = p = 1;
94 }
95 }
96 ms = ip;
97 p0 = p;
98
99 /* And with the opposite comparison */
100 ip = -1; jp = 0; k = p = 1;
101 while (jp+k<l) {
102 if (n[ip+k] == n[jp+k]) {
103 if (k == p) {
104 jp += p;
105 k = 1;
106 } else k++;
107 } else if (n[ip+k] < n[jp+k]) {
108 jp += k;
109 k = 1;
110 p = jp - ip;
111 } else {
112 ip = jp++;
113 k = p = 1;
114 }
54 } 115 }
55 return ((char *)s); 116 if (ip+1 > ms+1) ms = ip;
117 else p = p0;
118
119 /* Periodic needle? */
120 if (memcmp(n, n+p, ms+1)) {
121 mem0 = 0;
122 p = MAX(ms, l-ms-1) + 1;
123 } else mem0 = l-p;
124 mem = 0;
125
126 /* Initialize incremental end-of-haystack pointer */
127 z = h;
128
129 /* Search loop */
130 for (;;) {
131 /* Update incremental end-of-haystack pointer */
132 if (z-h < l) {
133 /* Fast estimate for MIN(l,63) */
134 size_t grow = l | 63;
135 const unsigned char *z2 = memchr(z, 0, grow);
136 if (z2) {
137 z = z2;
138 if (z-h < l) return 0;
139 } else z += grow;
140 }
141
142 /* Check last byte first; advance by shift on mismatch */
143 if (BITOP(byteset, h[l-1], &)) {
144 k = l-shift[h[l-1]];
145#ifdef DEBUG
146 printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, shift[h[l-1]], l);
147#endif
148 if (k) {
149 if (mem0 && mem && k < p) k = l-p;
150 h += k;
151 mem = 0;
152 continue;
153 }
154 } else {
155 h += l;
156 mem = 0;
157 continue;
158 }
159
160 /* Compare right half */
161 for (k=MAX(ms+1,mem); n[k] && n[k] == h[k]; k++);
162 if (n[k]) {
163 h += k-ms;
164 mem = 0;
165 continue;
166 }
167 /* Compare left half */
168 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
169 if (k <= mem) return (char *)h;
170 h += p;
171 mem = mem0;
172 }
173}
174
175char *
176strstr(const char *h, const char *n)
177{
178 /* Return immediately on empty needle */
179 if (!n[0]) return (char *)h;
180
181 /* Use faster algorithms for short needles */
182 h = strchr(h, *n);
183 if (!h || !n[1]) return (char *)h;
184 if (!h[1]) return 0;
185 if (!n[2]) return twobyte_strstr((void *)h, (void *)n);
186 if (!h[2]) return 0;
187 if (!n[3]) return threebyte_strstr((void *)h, (void *)n);
188 if (!h[3]) return 0;
189 if (!n[4]) return fourbyte_strstr((void *)h, (void *)n);
190
191 return twoway_strstr((void *)h, (void *)n);
56} 192}
57DEF_STRONG(strstr); 193DEF_STRONG(strstr);