diff options
| author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-14 00:00:00 +0000 |
|---|---|---|
| committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-15 23:55:04 +0500 |
| commit | fc662341e6f85da78ada0e443f6116b978f79f22 (patch) | |
| tree | 1be1cc402a7a9cbc18d4eeea6b141354c2d559e3 /CPP/Common/StringConvert.cpp | |
| parent | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (diff) | |
| download | 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.gz 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.bz2 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.zip | |
24.0524.05
Diffstat (limited to 'CPP/Common/StringConvert.cpp')
| -rw-r--r-- | CPP/Common/StringConvert.cpp | 52 |
1 files changed, 29 insertions, 23 deletions
diff --git a/CPP/Common/StringConvert.cpp b/CPP/Common/StringConvert.cpp index f25396a..79ff9e0 100644 --- a/CPP/Common/StringConvert.cpp +++ b/CPP/Common/StringConvert.cpp | |||
| @@ -267,8 +267,10 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) | |||
| 267 | 267 | ||
| 268 | if (codePage == CP_UTF8 || g_ForceToUTF8) | 268 | if (codePage == CP_UTF8 || g_ForceToUTF8) |
| 269 | { | 269 | { |
| 270 | #if 1 | ||
| 270 | ConvertUTF8ToUnicode(src, dest); | 271 | ConvertUTF8ToUnicode(src, dest); |
| 271 | return; | 272 | return; |
| 273 | #endif | ||
| 272 | } | 274 | } |
| 273 | 275 | ||
| 274 | const size_t limit = ((size_t)src.Len() + 1) * 2; | 276 | const size_t limit = ((size_t)src.Len() + 1) * 2; |
| @@ -278,48 +280,47 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) | |||
| 278 | { | 280 | { |
| 279 | dest.ReleaseBuf_SetEnd((unsigned)len); | 281 | dest.ReleaseBuf_SetEnd((unsigned)len); |
| 280 | 282 | ||
| 281 | #if WCHAR_MAX > 0xffff | 283 | #if WCHAR_MAX > 0xffff |
| 282 | d = dest.GetBuf(); | 284 | d = dest.GetBuf(); |
| 283 | for (size_t i = 0;; i++) | 285 | for (size_t i = 0;; i++) |
| 284 | { | 286 | { |
| 285 | // wchar_t c = dest[i]; | ||
| 286 | wchar_t c = d[i]; | 287 | wchar_t c = d[i]; |
| 288 | // printf("\ni=%2d c = %4x\n", (unsigned)i, (unsigned)c); | ||
| 287 | if (c == 0) | 289 | if (c == 0) |
| 288 | break; | 290 | break; |
| 289 | if (c >= 0x10000 && c < 0x110000) | 291 | if (c >= 0x10000 && c < 0x110000) |
| 290 | { | 292 | { |
| 291 | /* | 293 | UString tempString = d + i; |
| 292 | c -= 0x10000; | 294 | const wchar_t *t = tempString.Ptr(); |
| 293 | unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF); | ||
| 294 | dest.ReplaceOneCharAtPos(i, c0); | ||
| 295 | i++; | ||
| 296 | c = 0xdc00 + (c & 0x3FF); | ||
| 297 | dest.Insert_wchar_t(i, c); | ||
| 298 | */ | ||
| 299 | UString temp = d + i; | ||
| 300 | 295 | ||
| 301 | for (size_t t = 0;; t++) | 296 | for (;;) |
| 302 | { | 297 | { |
| 303 | wchar_t w = temp[t]; | 298 | wchar_t w = *t++; |
| 299 | // printf("\nchar=%x\n", w); | ||
| 304 | if (w == 0) | 300 | if (w == 0) |
| 305 | break; | 301 | break; |
| 306 | if (i == limit) | 302 | if (i == limit) |
| 307 | break; // unexpected error | 303 | break; // unexpected error |
| 308 | if (w >= 0x10000 && w < 0x110000) | 304 | if (w >= 0x10000 && w < 0x110000) |
| 309 | { | 305 | { |
| 306 | #if 1 | ||
| 310 | if (i + 1 == limit) | 307 | if (i + 1 == limit) |
| 311 | break; // unexpected error | 308 | break; // unexpected error |
| 312 | w -= 0x10000; | 309 | w -= 0x10000; |
| 313 | d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF); | 310 | d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff); |
| 314 | w = 0xdc00 + (w & 0x3FF); | 311 | w = 0xdc00 + (w & 0x3ff); |
| 312 | #else | ||
| 313 | // w = '_'; // for debug | ||
| 314 | #endif | ||
| 315 | } | 315 | } |
| 316 | d[i++] = w; | 316 | d[i++] = w; |
| 317 | } | 317 | } |
| 318 | dest.ReleaseBuf_SetEnd((unsigned)i); | 318 | dest.ReleaseBuf_SetEnd((unsigned)i); |
| 319 | break; | ||
| 319 | } | 320 | } |
| 320 | } | 321 | } |
| 321 | 322 | ||
| 322 | #endif | 323 | #endif |
| 323 | 324 | ||
| 324 | /* | 325 | /* |
| 325 | printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr()); | 326 | printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr()); |
| @@ -395,34 +396,39 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT c | |||
| 395 | // if (codePage == 1234567) // for debug purposes | 396 | // if (codePage == 1234567) // for debug purposes |
| 396 | if (codePage == CP_UTF8 || g_ForceToUTF8) | 397 | if (codePage == CP_UTF8 || g_ForceToUTF8) |
| 397 | { | 398 | { |
| 399 | #if 1 | ||
| 398 | defaultCharWasUsed = false; | 400 | defaultCharWasUsed = false; |
| 399 | ConvertUnicodeToUTF8(src2, dest); | 401 | ConvertUnicodeToUTF8(src2, dest); |
| 400 | return; | 402 | return; |
| 403 | #endif | ||
| 401 | } | 404 | } |
| 402 | 405 | ||
| 403 | UString src = src2; | 406 | UString src = src2; |
| 404 | #if WCHAR_MAX > 0xffff | 407 | #if WCHAR_MAX > 0xffff |
| 405 | { | 408 | { |
| 406 | src.Empty(); | 409 | src.Empty(); |
| 407 | for (unsigned i = 0; i < src2.Len();) | 410 | for (unsigned i = 0; i < src2.Len();) |
| 408 | { | 411 | { |
| 409 | wchar_t c = src2[i]; | 412 | wchar_t c = src2[i++]; |
| 410 | if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len()) | 413 | if (c >= 0xd800 && c < 0xdc00 && i != src2.Len()) |
| 411 | { | 414 | { |
| 412 | const wchar_t c2 = src2[i + 1]; | 415 | const wchar_t c2 = src2[i]; |
| 413 | if (c2 >= 0xdc00 && c2 < 0x10000) | 416 | if (c2 >= 0xdc00 && c2 < 0xe000) |
| 414 | { | 417 | { |
| 418 | #if 1 | ||
| 415 | // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2); | 419 | // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2); |
| 416 | c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); | 420 | c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); |
| 417 | // printf("%4x\n", (int)c); | 421 | // printf("%4x\n", (int)c); |
| 418 | i++; | 422 | i++; |
| 423 | #else | ||
| 424 | // c = '_'; // for debug | ||
| 425 | #endif | ||
| 419 | } | 426 | } |
| 420 | } | 427 | } |
| 421 | src += c; | 428 | src += c; |
| 422 | i++; | ||
| 423 | } | 429 | } |
| 424 | } | 430 | } |
| 425 | #endif | 431 | #endif |
| 426 | 432 | ||
| 427 | dest.Empty(); | 433 | dest.Empty(); |
| 428 | defaultCharWasUsed = false; | 434 | defaultCharWasUsed = false; |
