diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-14 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-15 23:55:04 +0500 |
commit | fc662341e6f85da78ada0e443f6116b978f79f22 (patch) | |
tree | 1be1cc402a7a9cbc18d4eeea6b141354c2d559e3 /CPP/Common/StringConvert.cpp | |
parent | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (diff) | |
download | 7zip-24.05.tar.gz 7zip-24.05.tar.bz2 7zip-24.05.zip |
24.0524.05
Diffstat (limited to 'CPP/Common/StringConvert.cpp')
-rw-r--r-- | CPP/Common/StringConvert.cpp | 52 |
1 files changed, 29 insertions, 23 deletions
diff --git a/CPP/Common/StringConvert.cpp b/CPP/Common/StringConvert.cpp index f25396a..79ff9e0 100644 --- a/CPP/Common/StringConvert.cpp +++ b/CPP/Common/StringConvert.cpp | |||
@@ -267,8 +267,10 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) | |||
267 | 267 | ||
268 | if (codePage == CP_UTF8 || g_ForceToUTF8) | 268 | if (codePage == CP_UTF8 || g_ForceToUTF8) |
269 | { | 269 | { |
270 | #if 1 | ||
270 | ConvertUTF8ToUnicode(src, dest); | 271 | ConvertUTF8ToUnicode(src, dest); |
271 | return; | 272 | return; |
273 | #endif | ||
272 | } | 274 | } |
273 | 275 | ||
274 | const size_t limit = ((size_t)src.Len() + 1) * 2; | 276 | const size_t limit = ((size_t)src.Len() + 1) * 2; |
@@ -278,48 +280,47 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) | |||
278 | { | 280 | { |
279 | dest.ReleaseBuf_SetEnd((unsigned)len); | 281 | dest.ReleaseBuf_SetEnd((unsigned)len); |
280 | 282 | ||
281 | #if WCHAR_MAX > 0xffff | 283 | #if WCHAR_MAX > 0xffff |
282 | d = dest.GetBuf(); | 284 | d = dest.GetBuf(); |
283 | for (size_t i = 0;; i++) | 285 | for (size_t i = 0;; i++) |
284 | { | 286 | { |
285 | // wchar_t c = dest[i]; | ||
286 | wchar_t c = d[i]; | 287 | wchar_t c = d[i]; |
288 | // printf("\ni=%2d c = %4x\n", (unsigned)i, (unsigned)c); | ||
287 | if (c == 0) | 289 | if (c == 0) |
288 | break; | 290 | break; |
289 | if (c >= 0x10000 && c < 0x110000) | 291 | if (c >= 0x10000 && c < 0x110000) |
290 | { | 292 | { |
291 | /* | 293 | UString tempString = d + i; |
292 | c -= 0x10000; | 294 | const wchar_t *t = tempString.Ptr(); |
293 | unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF); | ||
294 | dest.ReplaceOneCharAtPos(i, c0); | ||
295 | i++; | ||
296 | c = 0xdc00 + (c & 0x3FF); | ||
297 | dest.Insert_wchar_t(i, c); | ||
298 | */ | ||
299 | UString temp = d + i; | ||
300 | 295 | ||
301 | for (size_t t = 0;; t++) | 296 | for (;;) |
302 | { | 297 | { |
303 | wchar_t w = temp[t]; | 298 | wchar_t w = *t++; |
299 | // printf("\nchar=%x\n", w); | ||
304 | if (w == 0) | 300 | if (w == 0) |
305 | break; | 301 | break; |
306 | if (i == limit) | 302 | if (i == limit) |
307 | break; // unexpected error | 303 | break; // unexpected error |
308 | if (w >= 0x10000 && w < 0x110000) | 304 | if (w >= 0x10000 && w < 0x110000) |
309 | { | 305 | { |
306 | #if 1 | ||
310 | if (i + 1 == limit) | 307 | if (i + 1 == limit) |
311 | break; // unexpected error | 308 | break; // unexpected error |
312 | w -= 0x10000; | 309 | w -= 0x10000; |
313 | d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF); | 310 | d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff); |
314 | w = 0xdc00 + (w & 0x3FF); | 311 | w = 0xdc00 + (w & 0x3ff); |
312 | #else | ||
313 | // w = '_'; // for debug | ||
314 | #endif | ||
315 | } | 315 | } |
316 | d[i++] = w; | 316 | d[i++] = w; |
317 | } | 317 | } |
318 | dest.ReleaseBuf_SetEnd((unsigned)i); | 318 | dest.ReleaseBuf_SetEnd((unsigned)i); |
319 | break; | ||
319 | } | 320 | } |
320 | } | 321 | } |
321 | 322 | ||
322 | #endif | 323 | #endif |
323 | 324 | ||
324 | /* | 325 | /* |
325 | printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr()); | 326 | printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr()); |
@@ -395,34 +396,39 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT c | |||
395 | // if (codePage == 1234567) // for debug purposes | 396 | // if (codePage == 1234567) // for debug purposes |
396 | if (codePage == CP_UTF8 || g_ForceToUTF8) | 397 | if (codePage == CP_UTF8 || g_ForceToUTF8) |
397 | { | 398 | { |
399 | #if 1 | ||
398 | defaultCharWasUsed = false; | 400 | defaultCharWasUsed = false; |
399 | ConvertUnicodeToUTF8(src2, dest); | 401 | ConvertUnicodeToUTF8(src2, dest); |
400 | return; | 402 | return; |
403 | #endif | ||
401 | } | 404 | } |
402 | 405 | ||
403 | UString src = src2; | 406 | UString src = src2; |
404 | #if WCHAR_MAX > 0xffff | 407 | #if WCHAR_MAX > 0xffff |
405 | { | 408 | { |
406 | src.Empty(); | 409 | src.Empty(); |
407 | for (unsigned i = 0; i < src2.Len();) | 410 | for (unsigned i = 0; i < src2.Len();) |
408 | { | 411 | { |
409 | wchar_t c = src2[i]; | 412 | wchar_t c = src2[i++]; |
410 | if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len()) | 413 | if (c >= 0xd800 && c < 0xdc00 && i != src2.Len()) |
411 | { | 414 | { |
412 | const wchar_t c2 = src2[i + 1]; | 415 | const wchar_t c2 = src2[i]; |
413 | if (c2 >= 0xdc00 && c2 < 0x10000) | 416 | if (c2 >= 0xdc00 && c2 < 0xe000) |
414 | { | 417 | { |
418 | #if 1 | ||
415 | // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2); | 419 | // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2); |
416 | c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); | 420 | c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); |
417 | // printf("%4x\n", (int)c); | 421 | // printf("%4x\n", (int)c); |
418 | i++; | 422 | i++; |
423 | #else | ||
424 | // c = '_'; // for debug | ||
425 | #endif | ||
419 | } | 426 | } |
420 | } | 427 | } |
421 | src += c; | 428 | src += c; |
422 | i++; | ||
423 | } | 429 | } |
424 | } | 430 | } |
425 | #endif | 431 | #endif |
426 | 432 | ||
427 | dest.Empty(); | 433 | dest.Empty(); |
428 | defaultCharWasUsed = false; | 434 | defaultCharWasUsed = false; |