aboutsummaryrefslogtreecommitdiff
path: root/CPP/Common/StringConvert.cpp
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-05-14 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-05-15 23:55:04 +0500
commitfc662341e6f85da78ada0e443f6116b978f79f22 (patch)
tree1be1cc402a7a9cbc18d4eeea6b141354c2d559e3 /CPP/Common/StringConvert.cpp
parent5b39dc76f1bc82f941d5c800ab9f34407a06b53a (diff)
download7zip-24.05.tar.gz
7zip-24.05.tar.bz2
7zip-24.05.zip
24.0524.05
Diffstat (limited to 'CPP/Common/StringConvert.cpp')
-rw-r--r--CPP/Common/StringConvert.cpp52
1 files changed, 29 insertions, 23 deletions
diff --git a/CPP/Common/StringConvert.cpp b/CPP/Common/StringConvert.cpp
index f25396a..79ff9e0 100644
--- a/CPP/Common/StringConvert.cpp
+++ b/CPP/Common/StringConvert.cpp
@@ -267,8 +267,10 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
267 267
268 if (codePage == CP_UTF8 || g_ForceToUTF8) 268 if (codePage == CP_UTF8 || g_ForceToUTF8)
269 { 269 {
270#if 1
270 ConvertUTF8ToUnicode(src, dest); 271 ConvertUTF8ToUnicode(src, dest);
271 return; 272 return;
273#endif
272 } 274 }
273 275
274 const size_t limit = ((size_t)src.Len() + 1) * 2; 276 const size_t limit = ((size_t)src.Len() + 1) * 2;
@@ -278,48 +280,47 @@ void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
278 { 280 {
279 dest.ReleaseBuf_SetEnd((unsigned)len); 281 dest.ReleaseBuf_SetEnd((unsigned)len);
280 282
281 #if WCHAR_MAX > 0xffff 283#if WCHAR_MAX > 0xffff
282 d = dest.GetBuf(); 284 d = dest.GetBuf();
283 for (size_t i = 0;; i++) 285 for (size_t i = 0;; i++)
284 { 286 {
285 // wchar_t c = dest[i];
286 wchar_t c = d[i]; 287 wchar_t c = d[i];
288 // printf("\ni=%2d c = %4x\n", (unsigned)i, (unsigned)c);
287 if (c == 0) 289 if (c == 0)
288 break; 290 break;
289 if (c >= 0x10000 && c < 0x110000) 291 if (c >= 0x10000 && c < 0x110000)
290 { 292 {
291 /* 293 UString tempString = d + i;
292 c -= 0x10000; 294 const wchar_t *t = tempString.Ptr();
293 unsigned c0 = 0xd800 + ((c >> 10) & 0x3FF);
294 dest.ReplaceOneCharAtPos(i, c0);
295 i++;
296 c = 0xdc00 + (c & 0x3FF);
297 dest.Insert_wchar_t(i, c);
298 */
299 UString temp = d + i;
300 295
301 for (size_t t = 0;; t++) 296 for (;;)
302 { 297 {
303 wchar_t w = temp[t]; 298 wchar_t w = *t++;
299 // printf("\nchar=%x\n", w);
304 if (w == 0) 300 if (w == 0)
305 break; 301 break;
306 if (i == limit) 302 if (i == limit)
307 break; // unexpected error 303 break; // unexpected error
308 if (w >= 0x10000 && w < 0x110000) 304 if (w >= 0x10000 && w < 0x110000)
309 { 305 {
306#if 1
310 if (i + 1 == limit) 307 if (i + 1 == limit)
311 break; // unexpected error 308 break; // unexpected error
312 w -= 0x10000; 309 w -= 0x10000;
313 d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3FF); 310 d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff);
314 w = 0xdc00 + (w & 0x3FF); 311 w = 0xdc00 + (w & 0x3ff);
312#else
313 // w = '_'; // for debug
314#endif
315 } 315 }
316 d[i++] = w; 316 d[i++] = w;
317 } 317 }
318 dest.ReleaseBuf_SetEnd((unsigned)i); 318 dest.ReleaseBuf_SetEnd((unsigned)i);
319 break;
319 } 320 }
320 } 321 }
321 322
322 #endif 323#endif
323 324
324 /* 325 /*
325 printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr()); 326 printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(), src.Ptr());
@@ -395,34 +396,39 @@ static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT c
395 // if (codePage == 1234567) // for debug purposes 396 // if (codePage == 1234567) // for debug purposes
396 if (codePage == CP_UTF8 || g_ForceToUTF8) 397 if (codePage == CP_UTF8 || g_ForceToUTF8)
397 { 398 {
399#if 1
398 defaultCharWasUsed = false; 400 defaultCharWasUsed = false;
399 ConvertUnicodeToUTF8(src2, dest); 401 ConvertUnicodeToUTF8(src2, dest);
400 return; 402 return;
403#endif
401 } 404 }
402 405
403 UString src = src2; 406 UString src = src2;
404 #if WCHAR_MAX > 0xffff 407#if WCHAR_MAX > 0xffff
405 { 408 {
406 src.Empty(); 409 src.Empty();
407 for (unsigned i = 0; i < src2.Len();) 410 for (unsigned i = 0; i < src2.Len();)
408 { 411 {
409 wchar_t c = src2[i]; 412 wchar_t c = src2[i++];
410 if (c >= 0xd800 && c < 0xdc00 && i + 1 != src2.Len()) 413 if (c >= 0xd800 && c < 0xdc00 && i != src2.Len())
411 { 414 {
412 const wchar_t c2 = src2[i + 1]; 415 const wchar_t c2 = src2[i];
413 if (c2 >= 0xdc00 && c2 < 0x10000) 416 if (c2 >= 0xdc00 && c2 < 0xe000)
414 { 417 {
418#if 1
415 // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2); 419 // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
416 c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); 420 c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
417 // printf("%4x\n", (int)c); 421 // printf("%4x\n", (int)c);
418 i++; 422 i++;
423#else
424 // c = '_'; // for debug
425#endif
419 } 426 }
420 } 427 }
421 src += c; 428 src += c;
422 i++;
423 } 429 }
424 } 430 }
425 #endif 431#endif
426 432
427 dest.Empty(); 433 dest.Empty();
428 defaultCharWasUsed = false; 434 defaultCharWasUsed = false;