diff options
Diffstat (limited to 'win32/dirname.c')
| -rw-r--r-- | win32/dirname.c | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/win32/dirname.c b/win32/dirname.c new file mode 100644 index 000000000..dd62b8b08 --- /dev/null +++ b/win32/dirname.c | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | /** | ||
| 2 | * This file has no copyright assigned and is placed in the Public Domain. | ||
| 3 | * This file is part of the mingw-w64 runtime package. | ||
| 4 | * No warranty is given; refer to the file DISCLAIMER.PD within this package. | ||
| 5 | */ | ||
| 6 | #ifndef WIN32_LEAN_AND_MEAN | ||
| 7 | #define WIN32_LEAN_AND_MEAN | ||
| 8 | #endif | ||
| 9 | #include <stdlib.h> | ||
| 10 | #include <libgen.h> | ||
| 11 | #include <windows.h> | ||
| 12 | |||
| 13 | #if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR > 11 | ||
| 14 | |||
| 15 | /* A 'directory separator' is a byte that equals 0x2F ('solidus' or more | ||
| 16 | * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly | ||
| 17 | * 'backward slash'). The byte 0x5C may look different from a backward slash | ||
| 18 | * in some locales; for example, it looks the same as a Yen sign in Japanese | ||
| 19 | * locales and a Won sign in Korean locales. Despite its appearance, it still | ||
| 20 | * functions as a directory separator. | ||
| 21 | * | ||
| 22 | * A 'path' comprises an optional DOS drive letter with a colon, and then an | ||
| 23 | * arbitrary number of possibily empty components, separated by non-empty | ||
| 24 | * sequences of directory separators (in other words, consecutive directory | ||
| 25 | * separators are treated as a single one). A path that comprises an empty | ||
| 26 | * component denotes the current working directory. | ||
| 27 | * | ||
| 28 | * An 'absolute path' comprises at least two components, the first of which | ||
| 29 | * is empty. | ||
| 30 | * | ||
| 31 | * A 'relative path' is a path that is not an absolute path. In other words, | ||
| 32 | * it either comprises an empty component, or begins with a non-empty | ||
| 33 | * component. | ||
| 34 | * | ||
| 35 | * POSIX doesn't have a concept about DOS drives. A path that does not have a | ||
| 36 | * drive letter starts from the same drive as the current working directory. | ||
| 37 | * | ||
| 38 | * For example: | ||
| 39 | * (Examples without drive letters match POSIX.) | ||
| 40 | * | ||
| 41 | * Argument dirname() returns basename() returns | ||
| 42 | * -------- ----------------- ------------------ | ||
| 43 | * `` or NULL `.` `.` | ||
| 44 | * `usr` `.` `usr` | ||
| 45 | * `usr\` `.` `usr` | ||
| 46 | * `\` `\` `\` | ||
| 47 | * `\usr` `\` `usr` | ||
| 48 | * `\usr\lib` `\usr` `lib` | ||
| 49 | * `\home\\dwc\\test` `\home\\dwc` `test` | ||
| 50 | * `\\host\usr` `\\host\.` `usr` | ||
| 51 | * `\\host\usr\lib` `\\host\usr` `lib` | ||
| 52 | * `\\host\\usr` `\\host\\` `usr` | ||
| 53 | * `\\host\\usr\lib` `\\host\\usr` `lib` | ||
| 54 | * `C:` `C:.` `.` | ||
| 55 | * `C:usr` `C:.` `usr` | ||
| 56 | * `C:usr\` `C:.` `usr` | ||
| 57 | * `C:\` `C:\` `\` | ||
| 58 | * `C:\\` `C:\` `\` | ||
| 59 | * `C:\\\` `C:\` `\` | ||
| 60 | * `C:\usr` `C:\` `usr` | ||
| 61 | * `C:\usr\lib` `C:\usr` `lib` | ||
| 62 | * `C:\\usr\\lib\\` `C:\\usr` `lib` | ||
| 63 | * `C:\home\\dwc\\test` `C:\home\\dwc` `test` | ||
| 64 | */ | ||
| 65 | |||
| 66 | struct path_info | ||
| 67 | { | ||
| 68 | /* This points to end of the UNC prefix and drive letter, if any. */ | ||
| 69 | char* prefix_end; | ||
| 70 | |||
| 71 | /* These point to the directory separator in front of the last non-empty | ||
| 72 | * component. */ | ||
| 73 | char* base_sep_begin; | ||
| 74 | char* base_sep_end; | ||
| 75 | |||
| 76 | /* This points to the last directory separator sequence if no other | ||
| 77 | * non-separator characters follow it. */ | ||
| 78 | char* term_sep_begin; | ||
| 79 | |||
| 80 | /* This points to the end of the string. */ | ||
| 81 | char* path_end; | ||
| 82 | }; | ||
| 83 | |||
| 84 | #define IS_DIR_SEP(c) ((c) == '/' || (c) == '\\') | ||
| 85 | |||
| 86 | static | ||
| 87 | void | ||
| 88 | do_get_path_info(struct path_info* info, char* path) | ||
| 89 | { | ||
| 90 | char* pos = path; | ||
| 91 | int unc_ncoms = 0; | ||
| 92 | DWORD cp; | ||
| 93 | int dbcs_tb, prev_dir_sep, dir_sep; | ||
| 94 | |||
| 95 | /* Get the code page for paths in the same way as `fopen()`. */ | ||
| 96 | cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP; | ||
| 97 | |||
| 98 | /* Set the structure to 'no data'. */ | ||
| 99 | info->prefix_end = NULL; | ||
| 100 | info->base_sep_begin = NULL; | ||
| 101 | info->base_sep_end = NULL; | ||
| 102 | info->term_sep_begin = NULL; | ||
| 103 | |||
| 104 | if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) { | ||
| 105 | /* The path is UNC. */ | ||
| 106 | pos += 2; | ||
| 107 | |||
| 108 | /* Seek to the end of the share/device name. */ | ||
| 109 | dbcs_tb = 0; | ||
| 110 | prev_dir_sep = 0; | ||
| 111 | |||
| 112 | while(*pos != 0) { | ||
| 113 | dir_sep = 0; | ||
| 114 | |||
| 115 | if(dbcs_tb) | ||
| 116 | dbcs_tb = 0; | ||
| 117 | else if(IsDBCSLeadByteEx(cp, *pos)) | ||
| 118 | dbcs_tb = 1; | ||
| 119 | else | ||
| 120 | dir_sep = IS_DIR_SEP(*pos); | ||
| 121 | |||
| 122 | /* If a separator has been encountered and the previous character | ||
| 123 | * was not, mark this as the end of the current component. */ | ||
| 124 | if(dir_sep && !prev_dir_sep) { | ||
| 125 | unc_ncoms ++; | ||
| 126 | |||
| 127 | /* The first component is the host name, and the second is the | ||
| 128 | * share name. So we stop at the end of the second component. */ | ||
| 129 | if(unc_ncoms == 2) | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | |||
| 133 | prev_dir_sep = dir_sep; | ||
| 134 | pos ++; | ||
| 135 | } | ||
| 136 | |||
| 137 | /* The UNC prefix terminates here. The terminating directory separator | ||
| 138 | * is not part of the prefix, and initiates a new absolute path. */ | ||
| 139 | info->prefix_end = pos; | ||
| 140 | } | ||
| 141 | else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':') | ||
| 142 | || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) { | ||
| 143 | /* The path contains a DOS drive letter in the beginning. */ | ||
| 144 | pos += 2; | ||
| 145 | |||
| 146 | /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing | ||
| 147 | * part can be relative. For example, `C:foo` denotes `foo` in the | ||
| 148 | * working directory of drive `C:`. */ | ||
| 149 | info->prefix_end = pos; | ||
| 150 | } | ||
| 151 | |||
| 152 | /* The remaining part of the path is almost the same as POSIX. */ | ||
| 153 | dbcs_tb = 0; | ||
| 154 | prev_dir_sep = 0; | ||
| 155 | |||
| 156 | while(*pos != 0) { | ||
| 157 | dir_sep = 0; | ||
| 158 | |||
| 159 | if(dbcs_tb) | ||
| 160 | dbcs_tb = 0; | ||
| 161 | else if(IsDBCSLeadByteEx(cp, *pos)) | ||
| 162 | dbcs_tb = 1; | ||
| 163 | else | ||
| 164 | dir_sep = IS_DIR_SEP(*pos); | ||
| 165 | |||
| 166 | /* If a separator has been encountered and the previous character | ||
| 167 | * was not, mark this as the beginning of the terminating separator | ||
| 168 | * sequence. */ | ||
| 169 | if(dir_sep && !prev_dir_sep) | ||
| 170 | info->term_sep_begin = pos; | ||
| 171 | |||
| 172 | /* If a non-separator character has been encountered and a previous | ||
| 173 | * terminating separator sequence exists, start a new component. */ | ||
| 174 | if(!dir_sep && prev_dir_sep) { | ||
| 175 | info->base_sep_begin = info->term_sep_begin; | ||
| 176 | info->base_sep_end = pos; | ||
| 177 | info->term_sep_begin = NULL; | ||
| 178 | } | ||
| 179 | |||
| 180 | prev_dir_sep = dir_sep; | ||
| 181 | pos ++; | ||
| 182 | } | ||
| 183 | |||
| 184 | /* Store the end of the path for convenience. */ | ||
| 185 | info->path_end = pos; | ||
| 186 | } | ||
| 187 | |||
| 188 | char* | ||
| 189 | dirname(char* path) | ||
| 190 | { | ||
| 191 | struct path_info info; | ||
| 192 | char* upath; | ||
| 193 | const char* top; | ||
| 194 | static char* static_path_copy; | ||
| 195 | |||
| 196 | if(path == NULL || path[0] == 0) | ||
| 197 | return (char*) "."; | ||
| 198 | |||
| 199 | do_get_path_info(&info, path); | ||
| 200 | upath = info.prefix_end ? info.prefix_end : path; | ||
| 201 | /* Preserve type of top-level separator */ | ||
| 202 | if (IS_DIR_SEP(path[0])) | ||
| 203 | top = path[0] == '/' ? "/" : "\\"; | ||
| 204 | else if (IS_DIR_SEP(upath[0])) | ||
| 205 | top = upath[0] == '/' ? "/" : "\\"; | ||
| 206 | else | ||
| 207 | top = "."; | ||
| 208 | |||
| 209 | /* If a non-terminating directory separator exists, it terminates the | ||
| 210 | * dirname. Truncate the path there. */ | ||
| 211 | if(info.base_sep_begin) { | ||
| 212 | info.base_sep_begin[0] = 0; | ||
| 213 | |||
| 214 | /* If the unprefixed path has not been truncated to empty, it is now | ||
| 215 | * the dirname, so return it. */ | ||
| 216 | if(upath[0]) | ||
| 217 | return path; | ||
| 218 | } | ||
| 219 | |||
| 220 | /* The dirname is empty. In principle we return `<prefix>.` if the | ||
| 221 | * path is relative and `<prefix>\` if it is absolute. This can be | ||
| 222 | * optimized if there is no prefix. */ | ||
| 223 | if(upath == path) | ||
| 224 | return (char*) top; | ||
| 225 | |||
| 226 | /* When there is a prefix, we must append a character to the prefix. | ||
| 227 | * If there is enough room in the original path, we just reuse its | ||
| 228 | * storage. */ | ||
| 229 | if(upath != info.path_end) { | ||
| 230 | upath[0] = *top; | ||
| 231 | upath[1] = 0; | ||
| 232 | return path; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* This is only the last resort. If there is no room, we have to copy | ||
| 236 | * the prefix elsewhere. */ | ||
| 237 | upath = realloc(static_path_copy, info.prefix_end - path + 2); | ||
| 238 | if(!upath) | ||
| 239 | return (char*) top; | ||
| 240 | |||
| 241 | static_path_copy = upath; | ||
| 242 | memcpy(upath, path, info.prefix_end - path); | ||
| 243 | upath += info.prefix_end - path; | ||
| 244 | upath[0] = *top; | ||
| 245 | upath[1] = 0; | ||
| 246 | return static_path_copy; | ||
| 247 | } | ||
| 248 | |||
| 249 | char* | ||
| 250 | basename(char* path) | ||
| 251 | { | ||
| 252 | struct path_info info; | ||
| 253 | char* upath; | ||
| 254 | |||
| 255 | if(path == NULL || path[0] == 0) | ||
| 256 | return (char*) "."; | ||
| 257 | |||
| 258 | do_get_path_info(&info, path); | ||
| 259 | upath = info.prefix_end ? info.prefix_end : path; | ||
| 260 | |||
| 261 | /* If the path is non-UNC and empty, then it's relative. POSIX says '.' | ||
| 262 | * shall be returned. */ | ||
| 263 | if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0) | ||
| 264 | return (char*) "."; | ||
| 265 | |||
| 266 | /* If a terminating separator sequence exists, it is not part of the | ||
| 267 | * name and shall be truncated. */ | ||
| 268 | if(info.term_sep_begin) | ||
| 269 | info.term_sep_begin[0] = 0; | ||
| 270 | |||
| 271 | /* If some other separator sequence has been found, the basename | ||
| 272 | * immediately follows it. */ | ||
| 273 | if(info.base_sep_end) | ||
| 274 | return info.base_sep_end; | ||
| 275 | |||
| 276 | /* If removal of the terminating separator sequence has caused the | ||
| 277 | * unprefixed path to become empty, it must have comprised only | ||
| 278 | * separators. POSIX says `/` shall be returned, but on Windows, we | ||
| 279 | * return `\` instead. */ | ||
| 280 | if(upath[0] == 0) | ||
| 281 | return (char*) "\\"; | ||
| 282 | |||
| 283 | /* Return the unprefixed path. */ | ||
| 284 | return upath; | ||
| 285 | } | ||
| 286 | |||
| 287 | #endif /* __MINGW64_VERSION_MAJOR */ | ||
