diff options
author | Ron Yorston <rmy@pobox.com> | 2024-01-30 14:41:17 +0000 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2024-01-30 14:50:08 +0000 |
commit | a750640a87ff0bad6e59b534264dddeaf8c6923b (patch) | |
tree | 1aa154cac12650b486bf9ba469910bb6f1e873b8 | |
parent | 59783a1ce07482493e20b6a8b0fc1bfc06ee047c (diff) | |
download | busybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.tar.gz busybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.tar.bz2 busybox-w32-a750640a87ff0bad6e59b534264dddeaf8c6923b.zip |
win32: import dirname(3) from mingw-w64
The mingw-w64 project has updated its implementation of dirname(3).
In some circumstances the new version doesn't preserve the type of
the user-supplied top-level directory separator. As a result of
this the dirname-handles-root test case failed.
Import the new implementation and tweak it to preserve the type of
the separator.
This only affects mingw-w64 versions 12 and above. Currently only
the aarch64 build using llvm-mingw is affected.
-rw-r--r-- | win32/Kbuild | 1 | ||||
-rw-r--r-- | win32/dirname.c | 287 |
2 files changed, 288 insertions, 0 deletions
diff --git a/win32/Kbuild b/win32/Kbuild index e705dae66..1bb79bfd3 100644 --- a/win32/Kbuild +++ b/win32/Kbuild | |||
@@ -5,6 +5,7 @@ | |||
5 | lib-y:= | 5 | lib-y:= |
6 | 6 | ||
7 | lib-$(CONFIG_PLATFORM_MINGW32) += dirent.o | 7 | lib-$(CONFIG_PLATFORM_MINGW32) += dirent.o |
8 | lib-$(CONFIG_PLATFORM_MINGW32) += dirname.o | ||
8 | lib-$(CONFIG_PLATFORM_MINGW32) += env.o | 9 | lib-$(CONFIG_PLATFORM_MINGW32) += env.o |
9 | lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o | 10 | lib-$(CONFIG_PLATFORM_MINGW32) += fnmatch.o |
10 | lib-$(CONFIG_PLATFORM_MINGW32) += fsync.o | 11 | lib-$(CONFIG_PLATFORM_MINGW32) += fsync.o |
diff --git a/win32/dirname.c b/win32/dirname.c new file mode 100644 index 000000000..dd62b8b08 --- /dev/null +++ b/win32/dirname.c | |||
@@ -0,0 +1,287 @@ | |||
1 | /** | ||
2 | * This file has no copyright assigned and is placed in the Public Domain. | ||
3 | * This file is part of the mingw-w64 runtime package. | ||
4 | * No warranty is given; refer to the file DISCLAIMER.PD within this package. | ||
5 | */ | ||
6 | #ifndef WIN32_LEAN_AND_MEAN | ||
7 | #define WIN32_LEAN_AND_MEAN | ||
8 | #endif | ||
9 | #include <stdlib.h> | ||
10 | #include <libgen.h> | ||
11 | #include <windows.h> | ||
12 | |||
13 | #if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR > 11 | ||
14 | |||
15 | /* A 'directory separator' is a byte that equals 0x2F ('solidus' or more | ||
16 | * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly | ||
17 | * 'backward slash'). The byte 0x5C may look different from a backward slash | ||
18 | * in some locales; for example, it looks the same as a Yen sign in Japanese | ||
19 | * locales and a Won sign in Korean locales. Despite its appearance, it still | ||
20 | * functions as a directory separator. | ||
21 | * | ||
22 | * A 'path' comprises an optional DOS drive letter with a colon, and then an | ||
23 | * arbitrary number of possibily empty components, separated by non-empty | ||
24 | * sequences of directory separators (in other words, consecutive directory | ||
25 | * separators are treated as a single one). A path that comprises an empty | ||
26 | * component denotes the current working directory. | ||
27 | * | ||
28 | * An 'absolute path' comprises at least two components, the first of which | ||
29 | * is empty. | ||
30 | * | ||
31 | * A 'relative path' is a path that is not an absolute path. In other words, | ||
32 | * it either comprises an empty component, or begins with a non-empty | ||
33 | * component. | ||
34 | * | ||
35 | * POSIX doesn't have a concept about DOS drives. A path that does not have a | ||
36 | * drive letter starts from the same drive as the current working directory. | ||
37 | * | ||
38 | * For example: | ||
39 | * (Examples without drive letters match POSIX.) | ||
40 | * | ||
41 | * Argument dirname() returns basename() returns | ||
42 | * -------- ----------------- ------------------ | ||
43 | * `` or NULL `.` `.` | ||
44 | * `usr` `.` `usr` | ||
45 | * `usr\` `.` `usr` | ||
46 | * `\` `\` `\` | ||
47 | * `\usr` `\` `usr` | ||
48 | * `\usr\lib` `\usr` `lib` | ||
49 | * `\home\\dwc\\test` `\home\\dwc` `test` | ||
50 | * `\\host\usr` `\\host\.` `usr` | ||
51 | * `\\host\usr\lib` `\\host\usr` `lib` | ||
52 | * `\\host\\usr` `\\host\\` `usr` | ||
53 | * `\\host\\usr\lib` `\\host\\usr` `lib` | ||
54 | * `C:` `C:.` `.` | ||
55 | * `C:usr` `C:.` `usr` | ||
56 | * `C:usr\` `C:.` `usr` | ||
57 | * `C:\` `C:\` `\` | ||
58 | * `C:\\` `C:\` `\` | ||
59 | * `C:\\\` `C:\` `\` | ||
60 | * `C:\usr` `C:\` `usr` | ||
61 | * `C:\usr\lib` `C:\usr` `lib` | ||
62 | * `C:\\usr\\lib\\` `C:\\usr` `lib` | ||
63 | * `C:\home\\dwc\\test` `C:\home\\dwc` `test` | ||
64 | */ | ||
65 | |||
66 | struct path_info | ||
67 | { | ||
68 | /* This points to end of the UNC prefix and drive letter, if any. */ | ||
69 | char* prefix_end; | ||
70 | |||
71 | /* These point to the directory separator in front of the last non-empty | ||
72 | * component. */ | ||
73 | char* base_sep_begin; | ||
74 | char* base_sep_end; | ||
75 | |||
76 | /* This points to the last directory separator sequence if no other | ||
77 | * non-separator characters follow it. */ | ||
78 | char* term_sep_begin; | ||
79 | |||
80 | /* This points to the end of the string. */ | ||
81 | char* path_end; | ||
82 | }; | ||
83 | |||
84 | #define IS_DIR_SEP(c) ((c) == '/' || (c) == '\\') | ||
85 | |||
86 | static | ||
87 | void | ||
88 | do_get_path_info(struct path_info* info, char* path) | ||
89 | { | ||
90 | char* pos = path; | ||
91 | int unc_ncoms = 0; | ||
92 | DWORD cp; | ||
93 | int dbcs_tb, prev_dir_sep, dir_sep; | ||
94 | |||
95 | /* Get the code page for paths in the same way as `fopen()`. */ | ||
96 | cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP; | ||
97 | |||
98 | /* Set the structure to 'no data'. */ | ||
99 | info->prefix_end = NULL; | ||
100 | info->base_sep_begin = NULL; | ||
101 | info->base_sep_end = NULL; | ||
102 | info->term_sep_begin = NULL; | ||
103 | |||
104 | if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) { | ||
105 | /* The path is UNC. */ | ||
106 | pos += 2; | ||
107 | |||
108 | /* Seek to the end of the share/device name. */ | ||
109 | dbcs_tb = 0; | ||
110 | prev_dir_sep = 0; | ||
111 | |||
112 | while(*pos != 0) { | ||
113 | dir_sep = 0; | ||
114 | |||
115 | if(dbcs_tb) | ||
116 | dbcs_tb = 0; | ||
117 | else if(IsDBCSLeadByteEx(cp, *pos)) | ||
118 | dbcs_tb = 1; | ||
119 | else | ||
120 | dir_sep = IS_DIR_SEP(*pos); | ||
121 | |||
122 | /* If a separator has been encountered and the previous character | ||
123 | * was not, mark this as the end of the current component. */ | ||
124 | if(dir_sep && !prev_dir_sep) { | ||
125 | unc_ncoms ++; | ||
126 | |||
127 | /* The first component is the host name, and the second is the | ||
128 | * share name. So we stop at the end of the second component. */ | ||
129 | if(unc_ncoms == 2) | ||
130 | break; | ||
131 | } | ||
132 | |||
133 | prev_dir_sep = dir_sep; | ||
134 | pos ++; | ||
135 | } | ||
136 | |||
137 | /* The UNC prefix terminates here. The terminating directory separator | ||
138 | * is not part of the prefix, and initiates a new absolute path. */ | ||
139 | info->prefix_end = pos; | ||
140 | } | ||
141 | else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':') | ||
142 | || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) { | ||
143 | /* The path contains a DOS drive letter in the beginning. */ | ||
144 | pos += 2; | ||
145 | |||
146 | /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing | ||
147 | * part can be relative. For example, `C:foo` denotes `foo` in the | ||
148 | * working directory of drive `C:`. */ | ||
149 | info->prefix_end = pos; | ||
150 | } | ||
151 | |||
152 | /* The remaining part of the path is almost the same as POSIX. */ | ||
153 | dbcs_tb = 0; | ||
154 | prev_dir_sep = 0; | ||
155 | |||
156 | while(*pos != 0) { | ||
157 | dir_sep = 0; | ||
158 | |||
159 | if(dbcs_tb) | ||
160 | dbcs_tb = 0; | ||
161 | else if(IsDBCSLeadByteEx(cp, *pos)) | ||
162 | dbcs_tb = 1; | ||
163 | else | ||
164 | dir_sep = IS_DIR_SEP(*pos); | ||
165 | |||
166 | /* If a separator has been encountered and the previous character | ||
167 | * was not, mark this as the beginning of the terminating separator | ||
168 | * sequence. */ | ||
169 | if(dir_sep && !prev_dir_sep) | ||
170 | info->term_sep_begin = pos; | ||
171 | |||
172 | /* If a non-separator character has been encountered and a previous | ||
173 | * terminating separator sequence exists, start a new component. */ | ||
174 | if(!dir_sep && prev_dir_sep) { | ||
175 | info->base_sep_begin = info->term_sep_begin; | ||
176 | info->base_sep_end = pos; | ||
177 | info->term_sep_begin = NULL; | ||
178 | } | ||
179 | |||
180 | prev_dir_sep = dir_sep; | ||
181 | pos ++; | ||
182 | } | ||
183 | |||
184 | /* Store the end of the path for convenience. */ | ||
185 | info->path_end = pos; | ||
186 | } | ||
187 | |||
188 | char* | ||
189 | dirname(char* path) | ||
190 | { | ||
191 | struct path_info info; | ||
192 | char* upath; | ||
193 | const char* top; | ||
194 | static char* static_path_copy; | ||
195 | |||
196 | if(path == NULL || path[0] == 0) | ||
197 | return (char*) "."; | ||
198 | |||
199 | do_get_path_info(&info, path); | ||
200 | upath = info.prefix_end ? info.prefix_end : path; | ||
201 | /* Preserve type of top-level separator */ | ||
202 | if (IS_DIR_SEP(path[0])) | ||
203 | top = path[0] == '/' ? "/" : "\\"; | ||
204 | else if (IS_DIR_SEP(upath[0])) | ||
205 | top = upath[0] == '/' ? "/" : "\\"; | ||
206 | else | ||
207 | top = "."; | ||
208 | |||
209 | /* If a non-terminating directory separator exists, it terminates the | ||
210 | * dirname. Truncate the path there. */ | ||
211 | if(info.base_sep_begin) { | ||
212 | info.base_sep_begin[0] = 0; | ||
213 | |||
214 | /* If the unprefixed path has not been truncated to empty, it is now | ||
215 | * the dirname, so return it. */ | ||
216 | if(upath[0]) | ||
217 | return path; | ||
218 | } | ||
219 | |||
220 | /* The dirname is empty. In principle we return `<prefix>.` if the | ||
221 | * path is relative and `<prefix>\` if it is absolute. This can be | ||
222 | * optimized if there is no prefix. */ | ||
223 | if(upath == path) | ||
224 | return (char*) top; | ||
225 | |||
226 | /* When there is a prefix, we must append a character to the prefix. | ||
227 | * If there is enough room in the original path, we just reuse its | ||
228 | * storage. */ | ||
229 | if(upath != info.path_end) { | ||
230 | upath[0] = *top; | ||
231 | upath[1] = 0; | ||
232 | return path; | ||
233 | } | ||
234 | |||
235 | /* This is only the last resort. If there is no room, we have to copy | ||
236 | * the prefix elsewhere. */ | ||
237 | upath = realloc(static_path_copy, info.prefix_end - path + 2); | ||
238 | if(!upath) | ||
239 | return (char*) top; | ||
240 | |||
241 | static_path_copy = upath; | ||
242 | memcpy(upath, path, info.prefix_end - path); | ||
243 | upath += info.prefix_end - path; | ||
244 | upath[0] = *top; | ||
245 | upath[1] = 0; | ||
246 | return static_path_copy; | ||
247 | } | ||
248 | |||
249 | char* | ||
250 | basename(char* path) | ||
251 | { | ||
252 | struct path_info info; | ||
253 | char* upath; | ||
254 | |||
255 | if(path == NULL || path[0] == 0) | ||
256 | return (char*) "."; | ||
257 | |||
258 | do_get_path_info(&info, path); | ||
259 | upath = info.prefix_end ? info.prefix_end : path; | ||
260 | |||
261 | /* If the path is non-UNC and empty, then it's relative. POSIX says '.' | ||
262 | * shall be returned. */ | ||
263 | if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0) | ||
264 | return (char*) "."; | ||
265 | |||
266 | /* If a terminating separator sequence exists, it is not part of the | ||
267 | * name and shall be truncated. */ | ||
268 | if(info.term_sep_begin) | ||
269 | info.term_sep_begin[0] = 0; | ||
270 | |||
271 | /* If some other separator sequence has been found, the basename | ||
272 | * immediately follows it. */ | ||
273 | if(info.base_sep_end) | ||
274 | return info.base_sep_end; | ||
275 | |||
276 | /* If removal of the terminating separator sequence has caused the | ||
277 | * unprefixed path to become empty, it must have comprised only | ||
278 | * separators. POSIX says `/` shall be returned, but on Windows, we | ||
279 | * return `\` instead. */ | ||
280 | if(upath[0] == 0) | ||
281 | return (char*) "\\"; | ||
282 | |||
283 | /* Return the unprefixed path. */ | ||
284 | return upath; | ||
285 | } | ||
286 | |||
287 | #endif /* __MINGW64_VERSION_MAJOR */ | ||