diff options
author | millert <> | 2017-05-24 21:18:25 +0000 |
---|---|---|
committer | millert <> | 2017-05-24 21:18:25 +0000 |
commit | 228cae985cb23cf92bfe09e2e620b9fb68f8129d (patch) | |
tree | f0bf52388f83831ca81a49f345135df882cf9662 /src | |
parent | 9329cce20278a230c46f465b58dcf3f3d03f088e (diff) | |
download | openbsd-228cae985cb23cf92bfe09e2e620b9fb68f8129d.tar.gz openbsd-228cae985cb23cf92bfe09e2e620b9fb68f8129d.tar.bz2 openbsd-228cae985cb23cf92bfe09e2e620b9fb68f8129d.zip |
Support swapping 32-bit aligned elements on 64-bit platforms.
Previously they would be swapped a byte at a time when sizeof(int)
!= sizeof(long). Idea from FreeBSD.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libc/stdlib/qsort.c | 85 |
1 files changed, 63 insertions, 22 deletions
diff --git a/src/lib/libc/stdlib/qsort.c b/src/lib/libc/stdlib/qsort.c index 4fe54eb5ff..58f2ec4709 100644 --- a/src/lib/libc/stdlib/qsort.c +++ b/src/lib/libc/stdlib/qsort.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: qsort.c,v 1.16 2017/05/20 12:48:56 millert Exp $ */ | 1 | /* $OpenBSD: qsort.c,v 1.17 2017/05/24 21:18:25 millert Exp $ */ |
2 | /*- | 2 | /*- |
3 | * Copyright (c) 1992, 1993 | 3 | * Copyright (c) 1992, 1993 |
4 | * The Regents of the University of California. All rights reserved. | 4 | * The Regents of the University of California. All rights reserved. |
@@ -43,14 +43,26 @@ static __inline void swapfunc(char *, char *, size_t, int); | |||
43 | * 1. The partition value is swapped into a[0] instead of being | 43 | * 1. The partition value is swapped into a[0] instead of being |
44 | * stored out of line. | 44 | * stored out of line. |
45 | * | 45 | * |
46 | * 2. It uses David Musser's introsort algorithm to fall back to | 46 | * 2. The swap function can swap 32-bit aligned elements on 64-bit |
47 | * platforms instead of swapping them as byte-aligned. | ||
48 | * | ||
49 | * 3. It uses David Musser's introsort algorithm to fall back to | ||
47 | * heapsort(3) when the recursion depth reaches 2*lg(n + 1). | 50 | * heapsort(3) when the recursion depth reaches 2*lg(n + 1). |
48 | * This avoids quicksort's quadratic behavior for pathological | 51 | * This avoids quicksort's quadratic behavior for pathological |
49 | * input without appreciably changing the average run time. | 52 | * input without appreciably changing the average run time. |
50 | * | 53 | * |
51 | * 3. Tail recursion is eliminated when sorting the larger of two | 54 | * 4. Tail recursion is eliminated when sorting the larger of two |
52 | * subpartitions to save stack space. | 55 | * subpartitions to save stack space. |
53 | */ | 56 | */ |
57 | #define SWAPTYPE_BYTEV 1 | ||
58 | #define SWAPTYPE_INTV 2 | ||
59 | #define SWAPTYPE_LONGV 3 | ||
60 | #define SWAPTYPE_INT 4 | ||
61 | #define SWAPTYPE_LONG 5 | ||
62 | |||
63 | #define TYPE_ALIGNED(TYPE, a, es) \ | ||
64 | (((char *)a - (char *)0) % sizeof(TYPE) == 0 && es % sizeof(TYPE) == 0) | ||
65 | |||
54 | #define swapcode(TYPE, parmi, parmj, n) { \ | 66 | #define swapcode(TYPE, parmi, parmj, n) { \ |
55 | size_t i = (n) / sizeof (TYPE); \ | 67 | size_t i = (n) / sizeof (TYPE); \ |
56 | TYPE *pi = (TYPE *) (parmi); \ | 68 | TYPE *pi = (TYPE *) (parmi); \ |
@@ -62,25 +74,42 @@ static __inline void swapfunc(char *, char *, size_t, int); | |||
62 | } while (--i > 0); \ | 74 | } while (--i > 0); \ |
63 | } | 75 | } |
64 | 76 | ||
65 | #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ | ||
66 | es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; | ||
67 | |||
68 | static __inline void | 77 | static __inline void |
69 | swapfunc(char *a, char *b, size_t n, int swaptype) | 78 | swapfunc(char *a, char *b, size_t n, int swaptype) |
70 | { | 79 | { |
71 | if (swaptype <= 1) | 80 | switch (swaptype) { |
72 | swapcode(long, a, b, n) | 81 | case SWAPTYPE_INT: |
73 | else | 82 | case SWAPTYPE_INTV: |
74 | swapcode(char, a, b, n) | 83 | swapcode(int, a, b, n); |
84 | break; | ||
85 | case SWAPTYPE_LONG: | ||
86 | case SWAPTYPE_LONGV: | ||
87 | swapcode(long, a, b, n); | ||
88 | break; | ||
89 | default: | ||
90 | swapcode(char, a, b, n); | ||
91 | break; | ||
92 | } | ||
75 | } | 93 | } |
76 | 94 | ||
77 | #define swap(a, b) \ | 95 | #define swap(a, b) do { \ |
78 | if (swaptype == 0) { \ | 96 | switch (swaptype) { \ |
97 | case SWAPTYPE_INT: { \ | ||
98 | int t = *(int *)(a); \ | ||
99 | *(int *)(a) = *(int *)(b); \ | ||
100 | *(int *)(b) = t; \ | ||
101 | break; \ | ||
102 | } \ | ||
103 | case SWAPTYPE_LONG: { \ | ||
79 | long t = *(long *)(a); \ | 104 | long t = *(long *)(a); \ |
80 | *(long *)(a) = *(long *)(b); \ | 105 | *(long *)(a) = *(long *)(b); \ |
81 | *(long *)(b) = t; \ | 106 | *(long *)(b) = t; \ |
82 | } else \ | 107 | break; \ |
83 | swapfunc(a, b, es, swaptype) | 108 | } \ |
109 | default: \ | ||
110 | swapfunc(a, b, es, swaptype); \ | ||
111 | } \ | ||
112 | } while (0) | ||
84 | 113 | ||
85 | #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) | 114 | #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) |
86 | 115 | ||
@@ -93,11 +122,11 @@ med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *)) | |||
93 | } | 122 | } |
94 | 123 | ||
95 | static void | 124 | static void |
96 | introsort(char *a, size_t n, size_t es, size_t maxdepth, | 125 | introsort(char *a, size_t n, size_t es, size_t maxdepth, int swaptype, |
97 | int (*cmp)(const void *, const void *)) | 126 | int (*cmp)(const void *, const void *)) |
98 | { | 127 | { |
99 | char *pa, *pb, *pc, *pd, *pl, *pm, *pn; | 128 | char *pa, *pb, *pc, *pd, *pl, *pm, *pn; |
100 | int cmp_result, swaptype; | 129 | int cmp_result; |
101 | size_t r, s; | 130 | size_t r, s; |
102 | 131 | ||
103 | loop: if (maxdepth == 0) { | 132 | loop: if (maxdepth == 0) { |
@@ -105,7 +134,6 @@ loop: if (maxdepth == 0) { | |||
105 | return; | 134 | return; |
106 | } | 135 | } |
107 | maxdepth--; | 136 | maxdepth--; |
108 | SWAPINIT(a, es); | ||
109 | if (n < 7) { | 137 | if (n < 7) { |
110 | for (pm = a + es; pm < a + n * es; pm += es) | 138 | for (pm = a + es; pm < a + n * es; pm += es) |
111 | for (pl = pm; pl > a && cmp(pl - es, pl) > 0; | 139 | for (pl = pm; pl > a && cmp(pl - es, pl) > 0; |
@@ -164,8 +192,10 @@ loop: if (maxdepth == 0) { | |||
164 | if (r < s) { | 192 | if (r < s) { |
165 | /* Recurse for 1st side, iterate for 2nd side. */ | 193 | /* Recurse for 1st side, iterate for 2nd side. */ |
166 | if (s > es) { | 194 | if (s > es) { |
167 | if (r > es) | 195 | if (r > es) { |
168 | introsort(a, r / es, es, maxdepth, cmp); | 196 | introsort(a, r / es, es, maxdepth, |
197 | swaptype, cmp); | ||
198 | } | ||
169 | a = pn - s; | 199 | a = pn - s; |
170 | n = s / es; | 200 | n = s / es; |
171 | goto loop; | 201 | goto loop; |
@@ -173,8 +203,10 @@ loop: if (maxdepth == 0) { | |||
173 | } else { | 203 | } else { |
174 | /* Recurse for 2nd side, iterate for 1st side. */ | 204 | /* Recurse for 2nd side, iterate for 1st side. */ |
175 | if (r > es) { | 205 | if (r > es) { |
176 | if (s > es) | 206 | if (s > es) { |
177 | introsort(pn - s, s / es, es, maxdepth, cmp); | 207 | introsort(pn - s, s / es, es, maxdepth, |
208 | swaptype, cmp); | ||
209 | } | ||
178 | n = r / es; | 210 | n = r / es; |
179 | goto loop; | 211 | goto loop; |
180 | } | 212 | } |
@@ -185,13 +217,22 @@ void | |||
185 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)) | 217 | qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)) |
186 | { | 218 | { |
187 | size_t i, maxdepth = 0; | 219 | size_t i, maxdepth = 0; |
220 | int swaptype; | ||
188 | 221 | ||
189 | /* Approximate 2*ceil(lg(n + 1)) */ | 222 | /* Approximate 2*ceil(lg(n + 1)) */ |
190 | for (i = n; i > 0; i >>= 1) | 223 | for (i = n; i > 0; i >>= 1) |
191 | maxdepth++; | 224 | maxdepth++; |
192 | maxdepth *= 2; | 225 | maxdepth *= 2; |
193 | 226 | ||
194 | introsort(a, n, es, maxdepth, cmp); | 227 | if (TYPE_ALIGNED(long, a, es)) |
228 | swaptype = es == sizeof(long) ? SWAPTYPE_LONG : SWAPTYPE_LONGV; | ||
229 | else if (sizeof(int) != sizeof(long) && TYPE_ALIGNED(int, a, es)) | ||
230 | swaptype = es == sizeof(int) ? SWAPTYPE_INT : SWAPTYPE_INTV; | ||
231 | else | ||
232 | swaptype = SWAPTYPE_BYTEV; | ||
233 | |||
234 | introsort(a, n, es, maxdepth, swaptype, cmp); | ||
235 | |||
195 | } | 236 | } |
196 | 237 | ||
197 | DEF_STRONG(qsort); | 238 | DEF_STRONG(qsort); |