aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2024-01-30 14:41:17 +0000
committerRon Yorston <rmy@pobox.com>2024-01-30 14:50:08 +0000
commita750640a87ff0bad6e59b534264dddeaf8c6923b (patch)
tree1aa154cac12650b486bf9ba469910bb6f1e873b8
parent59783a1ce07482493e20b6a8b0fc1bfc06ee047c (diff)
downloadbusybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.tar.gz
busybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.tar.bz2
busybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.zip
win32: import dirname(3) from mingw-w64
The mingw-w64 project has updated its implementation of dirname(3). In some circumstances the new version doesn't preserve the type of the user-supplied top-level directory separator. As a result of this the dirname-handles-root test case failed. Import the new implementation and tweak it to preserve the type of the separator. This only affects mingw-w64 versions 12 and above. Currently only the aarch64 build using llvm-mingw is affected.
-rw-r--r--win32/Kbuild1
-rw-r--r--win32/dirname.c287
2 files changed, 288 insertions, 0 deletions
diff --git a/win32/Kbuild b/win32/Kbuild
index e705dae66..1bb79bfd3 100644
--- a/win32/Kbuild
+++ b/win32/Kbuild
@@ -5,6 +5,7 @@
5lib-y:= 5lib-y:=
6 6
7lib-$(CONFIG_PLATFORM_MINGW32) += dirent.o 7lib-$(CONFIG_PLATFORM_MINGW32) += dirent.o
8lib-$(CONFIG_PLATFORM_MINGW32) += dirname.o
8lib-$(CONFIG_PLATFORM_MINGW32) += env.o 9lib-$(CONFIG_PLATFORM_MINGW32) += env.o
9lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o 10lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o
10lib-$(CONFIG_PLATFORM_MINGW32) += fsync.o 11lib-$(CONFIG_PLATFORM_MINGW32) += fsync.o
diff --git a/win32/dirname.c b/win32/dirname.c
new file mode 100644
index 000000000..dd62b8b08
--- /dev/null
+++ b/win32/dirname.c
@@ -0,0 +1,287 @@
1/**
2 * This file has no copyright assigned and is placed in the Public Domain.
3 * This file is part of the mingw-w64 runtime package.
4 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
5 */
6#ifndef WIN32_LEAN_AND_MEAN
7#define WIN32_LEAN_AND_MEAN
8#endif
9#include <stdlib.h>
10#include <libgen.h>
11#include <windows.h>
12
13#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR > 11
14
15/* A 'directory separator' is a byte that equals 0x2F ('solidus' or more
16 * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly
17 * 'backward slash'). The byte 0x5C may look different from a backward slash
18 * in some locales; for example, it looks the same as a Yen sign in Japanese
19 * locales and a Won sign in Korean locales. Despite its appearance, it still
20 * functions as a directory separator.
21 *
22 * A 'path' comprises an optional DOS drive letter with a colon, and then an
23 * arbitrary number of possibily empty components, separated by non-empty
24 * sequences of directory separators (in other words, consecutive directory
25 * separators are treated as a single one). A path that comprises an empty
26 * component denotes the current working directory.
27 *
28 * An 'absolute path' comprises at least two components, the first of which
29 * is empty.
30 *
31 * A 'relative path' is a path that is not an absolute path. In other words,
32 * it either comprises an empty component, or begins with a non-empty
33 * component.
34 *
35 * POSIX doesn't have a concept about DOS drives. A path that does not have a
36 * drive letter starts from the same drive as the current working directory.
37 *
38 * For example:
39 * (Examples without drive letters match POSIX.)
40 *
41 * Argument dirname() returns basename() returns
42 * -------- ----------------- ------------------
43 * `` or NULL `.` `.`
44 * `usr` `.` `usr`
45 * `usr\` `.` `usr`
46 * `\` `\` `\`
47 * `\usr` `\` `usr`
48 * `\usr\lib` `\usr` `lib`
49 * `\home\\dwc\\test` `\home\\dwc` `test`
50 * `\\host\usr` `\\host\.` `usr`
51 * `\\host\usr\lib` `\\host\usr` `lib`
52 * `\\host\\usr` `\\host\\` `usr`
53 * `\\host\\usr\lib` `\\host\\usr` `lib`
54 * `C:` `C:.` `.`
55 * `C:usr` `C:.` `usr`
56 * `C:usr\` `C:.` `usr`
57 * `C:\` `C:\` `\`
58 * `C:\\` `C:\` `\`
59 * `C:\\\` `C:\` `\`
60 * `C:\usr` `C:\` `usr`
61 * `C:\usr\lib` `C:\usr` `lib`
62 * `C:\\usr\\lib\\` `C:\\usr` `lib`
63 * `C:\home\\dwc\\test` `C:\home\\dwc` `test`
64 */
65
66struct path_info
67 {
68 /* This points to end of the UNC prefix and drive letter, if any. */
69 char* prefix_end;
70
71 /* These point to the directory separator in front of the last non-empty
72 * component. */
73 char* base_sep_begin;
74 char* base_sep_end;
75
76 /* This points to the last directory separator sequence if no other
77 * non-separator characters follow it. */
78 char* term_sep_begin;
79
80 /* This points to the end of the string. */
81 char* path_end;
82 };
83
84#define IS_DIR_SEP(c) ((c) == '/' || (c) == '\\')
85
86static
87void
88do_get_path_info(struct path_info* info, char* path)
89 {
90 char* pos = path;
91 int unc_ncoms = 0;
92 DWORD cp;
93 int dbcs_tb, prev_dir_sep, dir_sep;
94
95 /* Get the code page for paths in the same way as `fopen()`. */
96 cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
97
98 /* Set the structure to 'no data'. */
99 info->prefix_end = NULL;
100 info->base_sep_begin = NULL;
101 info->base_sep_end = NULL;
102 info->term_sep_begin = NULL;
103
104 if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) {
105 /* The path is UNC. */
106 pos += 2;
107
108 /* Seek to the end of the share/device name. */
109 dbcs_tb = 0;
110 prev_dir_sep = 0;
111
112 while(*pos != 0) {
113 dir_sep = 0;
114
115 if(dbcs_tb)
116 dbcs_tb = 0;
117 else if(IsDBCSLeadByteEx(cp, *pos))
118 dbcs_tb = 1;
119 else
120 dir_sep = IS_DIR_SEP(*pos);
121
122 /* If a separator has been encountered and the previous character
123 * was not, mark this as the end of the current component. */
124 if(dir_sep && !prev_dir_sep) {
125 unc_ncoms ++;
126
127 /* The first component is the host name, and the second is the
128 * share name. So we stop at the end of the second component. */
129 if(unc_ncoms == 2)
130 break;
131 }
132
133 prev_dir_sep = dir_sep;
134 pos ++;
135 }
136
137 /* The UNC prefix terminates here. The terminating directory separator
138 * is not part of the prefix, and initiates a new absolute path. */
139 info->prefix_end = pos;
140 }
141 else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':')
142 || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) {
143 /* The path contains a DOS drive letter in the beginning. */
144 pos += 2;
145
146 /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing
147 * part can be relative. For example, `C:foo` denotes `foo` in the
148 * working directory of drive `C:`. */
149 info->prefix_end = pos;
150 }
151
152 /* The remaining part of the path is almost the same as POSIX. */
153 dbcs_tb = 0;
154 prev_dir_sep = 0;
155
156 while(*pos != 0) {
157 dir_sep = 0;
158
159 if(dbcs_tb)
160 dbcs_tb = 0;
161 else if(IsDBCSLeadByteEx(cp, *pos))
162 dbcs_tb = 1;
163 else
164 dir_sep = IS_DIR_SEP(*pos);
165
166 /* If a separator has been encountered and the previous character
167 * was not, mark this as the beginning of the terminating separator
168 * sequence. */
169 if(dir_sep && !prev_dir_sep)
170 info->term_sep_begin = pos;
171
172 /* If a non-separator character has been encountered and a previous
173 * terminating separator sequence exists, start a new component. */
174 if(!dir_sep && prev_dir_sep) {
175 info->base_sep_begin = info->term_sep_begin;
176 info->base_sep_end = pos;
177 info->term_sep_begin = NULL;
178 }
179
180 prev_dir_sep = dir_sep;
181 pos ++;
182 }
183
184 /* Store the end of the path for convenience. */
185 info->path_end = pos;
186 }
187
188char*
189dirname(char* path)
190 {
191 struct path_info info;
192 char* upath;
193 const char* top;
194 static char* static_path_copy;
195
196 if(path == NULL || path[0] == 0)
197 return (char*) ".";
198
199 do_get_path_info(&info, path);
200 upath = info.prefix_end ? info.prefix_end : path;
201 /* Preserve type of top-level separator */
202 if (IS_DIR_SEP(path[0]))
203 top = path[0] == '/' ? "/" : "\\";
204 else if (IS_DIR_SEP(upath[0]))
205 top = upath[0] == '/' ? "/" : "\\";
206 else
207 top = ".";
208
209 /* If a non-terminating directory separator exists, it terminates the
210 * dirname. Truncate the path there. */
211 if(info.base_sep_begin) {
212 info.base_sep_begin[0] = 0;
213
214 /* If the unprefixed path has not been truncated to empty, it is now
215 * the dirname, so return it. */
216 if(upath[0])
217 return path;
218 }
219
220 /* The dirname is empty. In principle we return `<prefix>.` if the
221 * path is relative and `<prefix>\` if it is absolute. This can be
222 * optimized if there is no prefix. */
223 if(upath == path)
224 return (char*) top;
225
226 /* When there is a prefix, we must append a character to the prefix.
227 * If there is enough room in the original path, we just reuse its
228 * storage. */
229 if(upath != info.path_end) {
230 upath[0] = *top;
231 upath[1] = 0;
232 return path;
233 }
234
235 /* This is only the last resort. If there is no room, we have to copy
236 * the prefix elsewhere. */
237 upath = realloc(static_path_copy, info.prefix_end - path + 2);
238 if(!upath)
239 return (char*) top;
240
241 static_path_copy = upath;
242 memcpy(upath, path, info.prefix_end - path);
243 upath += info.prefix_end - path;
244 upath[0] = *top;
245 upath[1] = 0;
246 return static_path_copy;
247 }
248
249char*
250basename(char* path)
251 {
252 struct path_info info;
253 char* upath;
254
255 if(path == NULL || path[0] == 0)
256 return (char*) ".";
257
258 do_get_path_info(&info, path);
259 upath = info.prefix_end ? info.prefix_end : path;
260
261 /* If the path is non-UNC and empty, then it's relative. POSIX says '.'
262 * shall be returned. */
263 if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0)
264 return (char*) ".";
265
266 /* If a terminating separator sequence exists, it is not part of the
267 * name and shall be truncated. */
268 if(info.term_sep_begin)
269 info.term_sep_begin[0] = 0;
270
271 /* If some other separator sequence has been found, the basename
272 * immediately follows it. */
273 if(info.base_sep_end)
274 return info.base_sep_end;
275
276 /* If removal of the terminating separator sequence has caused the
277 * unprefixed path to become empty, it must have comprised only
278 * separators. POSIX says `/` shall be returned, but on Windows, we
279 * return `\` instead. */
280 if(upath[0] == 0)
281 return (char*) "\\";
282
283 /* Return the unprefixed path. */
284 return upath;
285 }
286
287#endif /* __MINGW64_VERSION_MAJOR */