diff options
Diffstat (limited to '')
-rw-r--r-- | CPP/Common/UTFConvert.cpp | 196 |
1 files changed, 98 insertions, 98 deletions
diff --git a/CPP/Common/UTFConvert.cpp b/CPP/Common/UTFConvert.cpp index ac069db..fb166b7 100644 --- a/CPP/Common/UTFConvert.cpp +++ b/CPP/Common/UTFConvert.cpp | |||
@@ -8,17 +8,17 @@ | |||
8 | #include "UTFConvert.h" | 8 | #include "UTFConvert.h" |
9 | 9 | ||
10 | 10 | ||
11 | #ifndef _WCHART_IS_16BIT | 11 | #ifndef Z7_WCHART_IS_16BIT |
12 | #ifndef __APPLE__ | 12 | #ifndef __APPLE__ |
13 | // we define it if the system supports files with non-utf8 symbols: | 13 | // we define it if the system supports files with non-utf8 symbols: |
14 | #define _UTF8_RAW_NON_UTF8_SUPPORTED | 14 | #define MY_UTF8_RAW_NON_UTF8_SUPPORTED |
15 | #endif | 15 | #endif |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | /* | 18 | /* |
19 | _UTF8_START(n) - is a base value for start byte (head), if there are (n) additional bytes after start byte | 19 | MY_UTF8_START(n) - is a base value for start byte (head), if there are (n) additional bytes after start byte |
20 | 20 | ||
21 | n : _UTF8_START(n) : Bits of code point | 21 | n : MY_UTF8_START(n) : Bits of code point |
22 | 22 | ||
23 | 0 : 0x80 : : unused | 23 | 0 : 0x80 : : unused |
24 | 1 : 0xC0 : 11 : | 24 | 1 : 0xC0 : 11 : |
@@ -30,13 +30,13 @@ | |||
30 | 7 : 0xFF : | 30 | 7 : 0xFF : |
31 | */ | 31 | */ |
32 | 32 | ||
33 | #define _UTF8_START(n) (0x100 - (1 << (7 - (n)))) | 33 | #define MY_UTF8_START(n) (0x100 - (1 << (7 - (n)))) |
34 | 34 | ||
35 | #define _UTF8_HEAD_PARSE2(n) \ | 35 | #define MY_UTF8_HEAD_PARSE2(n) \ |
36 | if (c < _UTF8_START((n) + 1)) \ | 36 | if (c < MY_UTF8_START((n) + 1)) \ |
37 | { numBytes = (n); val -= _UTF8_START(n); } | 37 | { numBytes = (n); val -= MY_UTF8_START(n); } |
38 | 38 | ||
39 | #ifndef _WCHART_IS_16BIT | 39 | #ifndef Z7_WCHART_IS_16BIT |
40 | 40 | ||
41 | /* | 41 | /* |
42 | if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence, | 42 | if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence, |
@@ -46,30 +46,30 @@ | |||
46 | (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack) | 46 | (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack) |
47 | */ | 47 | */ |
48 | 48 | ||
49 | #define _UTF8_NUM_TAIL_BYTES_MAX 5 | 49 | #define MY_UTF8_NUM_TAIL_BYTES_MAX 5 |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | /* | 52 | /* |
53 | #define _UTF8_HEAD_PARSE \ | 53 | #define MY_UTF8_HEAD_PARSE \ |
54 | UInt32 val = c; \ | 54 | UInt32 val = c; \ |
55 | _UTF8_HEAD_PARSE2(1) \ | 55 | MY_UTF8_HEAD_PARSE2(1) \ |
56 | else _UTF8_HEAD_PARSE2(2) \ | 56 | else MY_UTF8_HEAD_PARSE2(2) \ |
57 | else _UTF8_HEAD_PARSE2(3) \ | 57 | else MY_UTF8_HEAD_PARSE2(3) \ |
58 | else _UTF8_HEAD_PARSE2(4) \ | 58 | else MY_UTF8_HEAD_PARSE2(4) \ |
59 | else _UTF8_HEAD_PARSE2(5) \ | 59 | else MY_UTF8_HEAD_PARSE2(5) \ |
60 | #if _UTF8_NUM_TAIL_BYTES_MAX >= 6 | 60 | #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6 |
61 | else _UTF8_HEAD_PARSE2(6) | 61 | else MY_UTF8_HEAD_PARSE2(6) |
62 | #endif | 62 | #endif |
63 | */ | 63 | */ |
64 | 64 | ||
65 | #define _UTF8_HEAD_PARSE_MAX_3_BYTES \ | 65 | #define MY_UTF8_HEAD_PARSE_MAX_3_BYTES \ |
66 | UInt32 val = c; \ | 66 | UInt32 val = c; \ |
67 | _UTF8_HEAD_PARSE2(1) \ | 67 | MY_UTF8_HEAD_PARSE2(1) \ |
68 | else _UTF8_HEAD_PARSE2(2) \ | 68 | else MY_UTF8_HEAD_PARSE2(2) \ |
69 | else { numBytes = 3; val -= _UTF8_START(3); } | 69 | else { numBytes = 3; val -= MY_UTF8_START(3); } |
70 | 70 | ||
71 | 71 | ||
72 | #define _UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6)) | 72 | #define MY_UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6)) |
73 | 73 | ||
74 | 74 | ||
75 | #define START_POINT_FOR_SURROGATE 0x10000 | 75 | #define START_POINT_FOR_SURROGATE 0x10000 |
@@ -82,7 +82,7 @@ | |||
82 | */ | 82 | */ |
83 | 83 | ||
84 | 84 | ||
85 | #if defined(_WCHART_IS_16BIT) | 85 | #if defined(Z7_WCHART_IS_16BIT) |
86 | 86 | ||
87 | #define UTF_ESCAPE_PLANE 0 | 87 | #define UTF_ESCAPE_PLANE 0 |
88 | 88 | ||
@@ -102,7 +102,7 @@ we can place 128 ESCAPE chars to | |||
102 | #define UTF_ESCAPE_PLANE 0 | 102 | #define UTF_ESCAPE_PLANE 0 |
103 | 103 | ||
104 | /* | 104 | /* |
105 | if (UTF_FLAG__FROM_UTF8__USE_ESCAPE is set) | 105 | if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is set) |
106 | { | 106 | { |
107 | if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH) | 107 | if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH) |
108 | { | 108 | { |
@@ -111,13 +111,13 @@ we can place 128 ESCAPE chars to | |||
111 | So we still need a way to extract 8-bit Escapes and BMP-Escapes-8 | 111 | So we still need a way to extract 8-bit Escapes and BMP-Escapes-8 |
112 | from same BMP-Escapes-16 stored in 7z. | 112 | from same BMP-Escapes-16 stored in 7z. |
113 | And if we want to restore any 8-bit from 7z archive, | 113 | And if we want to restore any 8-bit from 7z archive, |
114 | we still must use UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT for (utf-8 -> utf-16) | 114 | we still must use Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT for (utf-8 -> utf-16) |
115 | Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21 | 115 | Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21 |
116 | } | 116 | } |
117 | else (UTF_ESCAPE_PLANE == 0) | 117 | else (UTF_ESCAPE_PLANE == 0) |
118 | { | 118 | { |
119 | we must convert original 3-bytes utf-8 BMP-Escape point to sequence | 119 | we must convert original 3-bytes utf-8 BMP-Escape point to sequence |
120 | of 3 BMP-Escape-16 points with UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT | 120 | of 3 BMP-Escape-16 points with Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT |
121 | so we can extract original RAW-UTF-8 from UTFD-16 later. | 121 | so we can extract original RAW-UTF-8 from UTFD-16 later. |
122 | } | 122 | } |
123 | } | 123 | } |
@@ -138,7 +138,7 @@ we can place 128 ESCAPE chars to | |||
138 | #define IS_LOW_SURROGATE_POINT(v) (((v) & (UInt32)0xfffffC00) == 0xdc00) | 138 | #define IS_LOW_SURROGATE_POINT(v) (((v) & (UInt32)0xfffffC00) == 0xdc00) |
139 | 139 | ||
140 | 140 | ||
141 | #define _ERROR_UTF8_CHECK \ | 141 | #define UTF_ERROR_UTF8_CHECK \ |
142 | { NonUtf = true; continue; } | 142 | { NonUtf = true; continue; } |
143 | 143 | ||
144 | void CUtf8Check::Check_Buf(const char *src, size_t size) throw() | 144 | void CUtf8Check::Check_Buf(const char *src, size_t size) throw() |
@@ -168,19 +168,19 @@ void CUtf8Check::Check_Buf(const char *src, size_t size) throw() | |||
168 | if (c < 0x80) | 168 | if (c < 0x80) |
169 | continue; | 169 | continue; |
170 | 170 | ||
171 | if (c < 0xc0 + 2)// it's limit for 0x140000 unicode codes : win32 compatibility | 171 | if (c < 0xc0 + 2) // it's limit for 0x140000 unicode codes : win32 compatibility |
172 | _ERROR_UTF8_CHECK | 172 | UTF_ERROR_UTF8_CHECK |
173 | 173 | ||
174 | unsigned numBytes; | 174 | unsigned numBytes; |
175 | 175 | ||
176 | UInt32 val = c; | 176 | UInt32 val = c; |
177 | _UTF8_HEAD_PARSE2(1) | 177 | MY_UTF8_HEAD_PARSE2(1) |
178 | else _UTF8_HEAD_PARSE2(2) | 178 | else MY_UTF8_HEAD_PARSE2(2) |
179 | else _UTF8_HEAD_PARSE2(4) | 179 | else MY_UTF8_HEAD_PARSE2(4) |
180 | else _UTF8_HEAD_PARSE2(5) | 180 | else MY_UTF8_HEAD_PARSE2(5) |
181 | else | 181 | else |
182 | { | 182 | { |
183 | _ERROR_UTF8_CHECK | 183 | UTF_ERROR_UTF8_CHECK |
184 | } | 184 | } |
185 | 185 | ||
186 | unsigned pos = 0; | 186 | unsigned pos = 0; |
@@ -206,7 +206,7 @@ void CUtf8Check::Check_Buf(const char *src, size_t size) throw() | |||
206 | if (pos == size) | 206 | if (pos == size) |
207 | Truncated = true; | 207 | Truncated = true; |
208 | else | 208 | else |
209 | _ERROR_UTF8_CHECK | 209 | UTF_ERROR_UTF8_CHECK |
210 | } | 210 | } |
211 | 211 | ||
212 | #ifdef UTF_ESCAPE_BASE | 212 | #ifdef UTF_ESCAPE_BASE |
@@ -268,7 +268,7 @@ bool CheckUTF8(const char *src, bool allowReduced) throw() | |||
268 | return false; | 268 | return false; |
269 | 269 | ||
270 | unsigned numBytes; | 270 | unsigned numBytes; |
271 | _UTF8_HEAD_PARSE | 271 | MY_UTF8_HEAD_PARSE |
272 | else | 272 | else |
273 | return false; | 273 | return false; |
274 | 274 | ||
@@ -285,7 +285,7 @@ bool CheckUTF8(const char *src, bool allowReduced) throw() | |||
285 | } | 285 | } |
286 | while (--numBytes); | 286 | while (--numBytes); |
287 | 287 | ||
288 | if (val < _UTF8_RANGE(pos - 1)) | 288 | if (val < MY_UTF8_RANGE(pos - 1)) |
289 | return false; | 289 | return false; |
290 | 290 | ||
291 | if (val >= 0x110000) | 291 | if (val >= 0x110000) |
@@ -303,18 +303,18 @@ bool CheckUTF8(const char *src, bool allowReduced) throw() | |||
303 | 303 | ||
304 | 304 | ||
305 | #define UTF_ESCAPE(c) \ | 305 | #define UTF_ESCAPE(c) \ |
306 | ((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) ? \ | 306 | ((flags & Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE) ? \ |
307 | UTF_ESCAPE_PLANE + UTF_ESCAPE_BASE + (c) : UTF_REPLACEMENT_CHAR) | 307 | UTF_ESCAPE_PLANE + UTF_ESCAPE_BASE + (c) : UTF_REPLACEMENT_CHAR) |
308 | 308 | ||
309 | /* | 309 | /* |
310 | #define _HARD_ERROR_UTF8 | 310 | #define UTF_HARD_ERROR_UTF8 |
311 | { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \ | 311 | { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \ |
312 | destPos++; ok = false; continue; } | 312 | destPos++; ok = false; continue; } |
313 | */ | 313 | */ |
314 | 314 | ||
315 | // we ignore utf errors, and don't change (ok) variable! | 315 | // we ignore utf errors, and don't change (ok) variable! |
316 | 316 | ||
317 | #define _ERROR_UTF8 \ | 317 | #define UTF_ERROR_UTF8 \ |
318 | { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \ | 318 | { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \ |
319 | destPos++; continue; } | 319 | destPos++; continue; } |
320 | 320 | ||
@@ -362,12 +362,12 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
362 | if (c < 0xc0 + 2 | 362 | if (c < 0xc0 + 2 |
363 | || c >= 0xf5) // it's limit for 0x140000 unicode codes : win32 compatibility | 363 | || c >= 0xf5) // it's limit for 0x140000 unicode codes : win32 compatibility |
364 | { | 364 | { |
365 | _ERROR_UTF8 | 365 | UTF_ERROR_UTF8 |
366 | } | 366 | } |
367 | 367 | ||
368 | unsigned numBytes; | 368 | unsigned numBytes; |
369 | 369 | ||
370 | _UTF8_HEAD_PARSE_MAX_3_BYTES | 370 | MY_UTF8_HEAD_PARSE_MAX_3_BYTES |
371 | 371 | ||
372 | unsigned pos = 0; | 372 | unsigned pos = 0; |
373 | do | 373 | do |
@@ -387,7 +387,7 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
387 | break; | 387 | break; |
388 | if (numBytes == 2) | 388 | if (numBytes == 2) |
389 | { | 389 | { |
390 | if (flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR) | 390 | if (flags & Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR) |
391 | if ((val & (0xF800 >> 6)) == (0xd800 >> 6)) | 391 | if ((val & (0xF800 >> 6)) == (0xd800 >> 6)) |
392 | break; | 392 | break; |
393 | } | 393 | } |
@@ -399,27 +399,27 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
399 | 399 | ||
400 | if (numBytes != 0) | 400 | if (numBytes != 0) |
401 | { | 401 | { |
402 | if ((flags & UTF_FLAG__FROM_UTF8__USE_ESCAPE) == 0) | 402 | if ((flags & Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE) == 0) |
403 | { | 403 | { |
404 | // the following code to emit the 0xfffd chars as win32 Utf8 function. | 404 | // the following code to emit the 0xfffd chars as win32 Utf8 function. |
405 | // disable the folling line, if you need 0xfffd for each incorrect byte as in Escape mode | 405 | // disable the folling line, if you need 0xfffd for each incorrect byte as in Escape mode |
406 | src += pos; | 406 | src += pos; |
407 | } | 407 | } |
408 | _ERROR_UTF8 | 408 | UTF_ERROR_UTF8 |
409 | } | 409 | } |
410 | 410 | ||
411 | /* | 411 | /* |
412 | if (val < _UTF8_RANGE(pos - 1)) | 412 | if (val < MY_UTF8_RANGE(pos - 1)) |
413 | _ERROR_UTF8 | 413 | UTF_ERROR_UTF8 |
414 | */ | 414 | */ |
415 | 415 | ||
416 | #ifdef UTF_ESCAPE_BASE | 416 | #ifdef UTF_ESCAPE_BASE |
417 | 417 | ||
418 | if ((flags & UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT) | 418 | if ((flags & Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT) |
419 | && IS_ESCAPE_POINT(val, 0)) | 419 | && IS_ESCAPE_POINT(val, 0)) |
420 | { | 420 | { |
421 | // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes) | 421 | // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes) |
422 | _ERROR_UTF8 | 422 | UTF_ERROR_UTF8 |
423 | } | 423 | } |
424 | 424 | ||
425 | #endif | 425 | #endif |
@@ -434,11 +434,11 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
434 | if (val < START_POINT_FOR_SURROGATE) | 434 | if (val < START_POINT_FOR_SURROGATE) |
435 | { | 435 | { |
436 | /* | 436 | /* |
437 | if ((flags & UTF_FLAG__FROM_UTF8__SURROGATE_ERROR) | 437 | if ((flags & Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR) |
438 | && IS_SURROGATE_POINT(val)) | 438 | && IS_SURROGATE_POINT(val)) |
439 | { | 439 | { |
440 | // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes) | 440 | // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes) |
441 | _ERROR_UTF8 | 441 | UTF_ERROR_UTF8 |
442 | } | 442 | } |
443 | */ | 443 | */ |
444 | if (dest) | 444 | if (dest) |
@@ -451,7 +451,7 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
451 | if (val >= 0x110000) | 451 | if (val >= 0x110000) |
452 | { | 452 | { |
453 | // We will emit utf16-Escape-16-21 point from each source byte | 453 | // We will emit utf16-Escape-16-21 point from each source byte |
454 | _ERROR_UTF8 | 454 | UTF_ERROR_UTF8 |
455 | } | 455 | } |
456 | */ | 456 | */ |
457 | if (dest) | 457 | if (dest) |
@@ -467,8 +467,8 @@ static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const | |||
467 | 467 | ||
468 | 468 | ||
469 | 469 | ||
470 | #define _UTF8_HEAD(n, val) ((char)(_UTF8_START(n) + (val >> (6 * (n))))) | 470 | #define MY_UTF8_HEAD(n, val) ((char)(MY_UTF8_START(n) + (val >> (6 * (n))))) |
471 | #define _UTF8_CHAR(n, val) ((char)(0x80 + (((val) >> (6 * (n))) & 0x3F))) | 471 | #define MY_UTF8_CHAR(n, val) ((char)(0x80 + (((val) >> (6 * (n))) & 0x3F))) |
472 | 472 | ||
473 | static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsigned flags) | 473 | static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsigned flags) |
474 | { | 474 | { |
@@ -483,7 +483,7 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsi | |||
483 | if (val < 0x80) | 483 | if (val < 0x80) |
484 | continue; | 484 | continue; |
485 | 485 | ||
486 | if (val < _UTF8_RANGE(1)) | 486 | if (val < MY_UTF8_RANGE(1)) |
487 | { | 487 | { |
488 | size++; | 488 | size++; |
489 | continue; | 489 | continue; |
@@ -492,12 +492,12 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsi | |||
492 | #ifdef UTF_ESCAPE_BASE | 492 | #ifdef UTF_ESCAPE_BASE |
493 | 493 | ||
494 | #if UTF_ESCAPE_PLANE != 0 | 494 | #if UTF_ESCAPE_PLANE != 0 |
495 | if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE) | 495 | if (flags & Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE) |
496 | if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE)) | 496 | if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE)) |
497 | continue; | 497 | continue; |
498 | #endif | 498 | #endif |
499 | 499 | ||
500 | if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE) | 500 | if (flags & Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE) |
501 | if (IS_ESCAPE_POINT(val, 0)) | 501 | if (IS_ESCAPE_POINT(val, 0)) |
502 | continue; | 502 | continue; |
503 | 503 | ||
@@ -517,18 +517,18 @@ static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsi | |||
517 | continue; | 517 | continue; |
518 | } | 518 | } |
519 | 519 | ||
520 | #ifdef _WCHART_IS_16BIT | 520 | #ifdef Z7_WCHART_IS_16BIT |
521 | 521 | ||
522 | size += 2; | 522 | size += 2; |
523 | 523 | ||
524 | #else | 524 | #else |
525 | 525 | ||
526 | if (val < _UTF8_RANGE(2)) size += 2; | 526 | if (val < MY_UTF8_RANGE(2)) size += 2; |
527 | else if (val < _UTF8_RANGE(3)) size += 3; | 527 | else if (val < MY_UTF8_RANGE(3)) size += 3; |
528 | else if (val < _UTF8_RANGE(4)) size += 4; | 528 | else if (val < MY_UTF8_RANGE(4)) size += 4; |
529 | else if (val < _UTF8_RANGE(5)) size += 5; | 529 | else if (val < MY_UTF8_RANGE(5)) size += 5; |
530 | else | 530 | else |
531 | #if _UTF8_NUM_TAIL_BYTES_MAX >= 6 | 531 | #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6 |
532 | size += 6; | 532 | size += 6; |
533 | #else | 533 | #else |
534 | size += 3; | 534 | size += 3; |
@@ -554,10 +554,10 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim | |||
554 | continue; | 554 | continue; |
555 | } | 555 | } |
556 | 556 | ||
557 | if (val < _UTF8_RANGE(1)) | 557 | if (val < MY_UTF8_RANGE(1)) |
558 | { | 558 | { |
559 | dest[0] = _UTF8_HEAD(1, val); | 559 | dest[0] = MY_UTF8_HEAD(1, val); |
560 | dest[1] = _UTF8_CHAR(0, val); | 560 | dest[1] = MY_UTF8_CHAR(0, val); |
561 | dest += 2; | 561 | dest += 2; |
562 | continue; | 562 | continue; |
563 | } | 563 | } |
@@ -567,11 +567,11 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim | |||
567 | #if UTF_ESCAPE_PLANE != 0 | 567 | #if UTF_ESCAPE_PLANE != 0 |
568 | /* | 568 | /* |
569 | if (wchar_t is 32-bit) | 569 | if (wchar_t is 32-bit) |
570 | && (UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE is set) | 570 | && (Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE is set) |
571 | && (point is virtual escape plane) | 571 | && (point is virtual escape plane) |
572 | we extract 8-bit byte from virtual HIGH-ESCAPE PLANE. | 572 | we extract 8-bit byte from virtual HIGH-ESCAPE PLANE. |
573 | */ | 573 | */ |
574 | if (flags & UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE) | 574 | if (flags & Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE) |
575 | if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE)) | 575 | if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE)) |
576 | { | 576 | { |
577 | *dest++ = (char)(val); | 577 | *dest++ = (char)(val); |
@@ -579,10 +579,10 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim | |||
579 | } | 579 | } |
580 | #endif // UTF_ESCAPE_PLANE != 0 | 580 | #endif // UTF_ESCAPE_PLANE != 0 |
581 | 581 | ||
582 | /* if (UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE is defined) | 582 | /* if (Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE is defined) |
583 | we extract 8-bit byte from BMP-ESCAPE PLANE. */ | 583 | we extract 8-bit byte from BMP-ESCAPE PLANE. */ |
584 | 584 | ||
585 | if (flags & UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE) | 585 | if (flags & Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE) |
586 | if (IS_ESCAPE_POINT(val, 0)) | 586 | if (IS_ESCAPE_POINT(val, 0)) |
587 | { | 587 | { |
588 | *dest++ = (char)(val); | 588 | *dest++ = (char)(val); |
@@ -601,46 +601,46 @@ static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim | |||
601 | { | 601 | { |
602 | src++; | 602 | src++; |
603 | val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000; | 603 | val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000; |
604 | dest[0] = _UTF8_HEAD(3, val); | 604 | dest[0] = MY_UTF8_HEAD(3, val); |
605 | dest[1] = _UTF8_CHAR(2, val); | 605 | dest[1] = MY_UTF8_CHAR(2, val); |
606 | dest[2] = _UTF8_CHAR(1, val); | 606 | dest[2] = MY_UTF8_CHAR(1, val); |
607 | dest[3] = _UTF8_CHAR(0, val); | 607 | dest[3] = MY_UTF8_CHAR(0, val); |
608 | dest += 4; | 608 | dest += 4; |
609 | continue; | 609 | continue; |
610 | } | 610 | } |
611 | } | 611 | } |
612 | if (flags & UTF_FLAG__TO_UTF8__SURROGATE_ERROR) | 612 | if (flags & Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR) |
613 | val = UTF_REPLACEMENT_CHAR; // WIN32 function does it | 613 | val = UTF_REPLACEMENT_CHAR; // WIN32 function does it |
614 | } | 614 | } |
615 | 615 | ||
616 | #ifndef _WCHART_IS_16BIT | 616 | #ifndef Z7_WCHART_IS_16BIT |
617 | if (val < _UTF8_RANGE(2)) | 617 | if (val < MY_UTF8_RANGE(2)) |
618 | #endif | 618 | #endif |
619 | { | 619 | { |
620 | dest[0] = _UTF8_HEAD(2, val); | 620 | dest[0] = MY_UTF8_HEAD(2, val); |
621 | dest[1] = _UTF8_CHAR(1, val); | 621 | dest[1] = MY_UTF8_CHAR(1, val); |
622 | dest[2] = _UTF8_CHAR(0, val); | 622 | dest[2] = MY_UTF8_CHAR(0, val); |
623 | dest += 3; | 623 | dest += 3; |
624 | continue; | 624 | continue; |
625 | } | 625 | } |
626 | 626 | ||
627 | #ifndef _WCHART_IS_16BIT | 627 | #ifndef Z7_WCHART_IS_16BIT |
628 | 628 | ||
629 | // we don't expect this case. so we can throw exception | 629 | // we don't expect this case. so we can throw exception |
630 | // throw 20210407; | 630 | // throw 20210407; |
631 | 631 | ||
632 | char b; | 632 | char b; |
633 | unsigned numBits; | 633 | unsigned numBits; |
634 | if (val < _UTF8_RANGE(3)) { numBits = 6 * 3; b = _UTF8_HEAD(3, val); } | 634 | if (val < MY_UTF8_RANGE(3)) { numBits = 6 * 3; b = MY_UTF8_HEAD(3, val); } |
635 | else if (val < _UTF8_RANGE(4)) { numBits = 6 * 4; b = _UTF8_HEAD(4, val); } | 635 | else if (val < MY_UTF8_RANGE(4)) { numBits = 6 * 4; b = MY_UTF8_HEAD(4, val); } |
636 | else if (val < _UTF8_RANGE(5)) { numBits = 6 * 5; b = _UTF8_HEAD(5, val); } | 636 | else if (val < MY_UTF8_RANGE(5)) { numBits = 6 * 5; b = MY_UTF8_HEAD(5, val); } |
637 | #if _UTF8_NUM_TAIL_BYTES_MAX >= 6 | 637 | #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6 |
638 | else { numBits = 6 * 6; b = (char)_UTF8_START(6); } | 638 | else { numBits = 6 * 6; b = (char)MY_UTF8_START(6); } |
639 | #else | 639 | #else |
640 | else | 640 | else |
641 | { | 641 | { |
642 | val = UTF_REPLACEMENT_CHAR; | 642 | val = UTF_REPLACEMENT_CHAR; |
643 | { numBits = 6 * 3; b = _UTF8_HEAD(3, val); } | 643 | { numBits = 6 * 3; b = MY_UTF8_HEAD(3, val); } |
644 | } | 644 | } |
645 | #endif | 645 | #endif |
646 | 646 | ||
@@ -675,11 +675,11 @@ bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flag | |||
675 | 675 | ||
676 | static | 676 | static |
677 | unsigned g_UTF8_To_Unicode_Flags = | 677 | unsigned g_UTF8_To_Unicode_Flags = |
678 | UTF_FLAG__FROM_UTF8__USE_ESCAPE | 678 | Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE |
679 | #ifndef _WCHART_IS_16BIT | 679 | #ifndef Z7_WCHART_IS_16BIT |
680 | | UTF_FLAG__FROM_UTF8__SURROGATE_ERROR | 680 | | Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR |
681 | #ifdef _UTF8_RAW_NON_UTF8_SUPPORTED | 681 | #ifdef MY_UTF8_RAW_NON_UTF8_SUPPORTED |
682 | | UTF_FLAG__FROM_UTF8__BMP_ESCAPE_CONVERT | 682 | | Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT |
683 | #endif | 683 | #endif |
684 | #endif | 684 | #endif |
685 | ; | 685 | ; |
@@ -729,13 +729,13 @@ void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flag | |||
729 | 729 | ||
730 | 730 | ||
731 | unsigned g_Unicode_To_UTF8_Flags = | 731 | unsigned g_Unicode_To_UTF8_Flags = |
732 | // UTF_FLAG__TO_UTF8__PARSE_HIGH_ESCAPE | 732 | // Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE |
733 | 0 | 733 | 0 |
734 | #ifndef _WIN32 | 734 | #ifndef _WIN32 |
735 | #ifdef _UTF8_RAW_NON_UTF8_SUPPORTED | 735 | #ifdef MY_UTF8_RAW_NON_UTF8_SUPPORTED |
736 | | UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE | 736 | | Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE |
737 | #else | 737 | #else |
738 | | UTF_FLAG__TO_UTF8__SURROGATE_ERROR; | 738 | | Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR |
739 | #endif | 739 | #endif |
740 | #endif | 740 | #endif |
741 | ; | 741 | ; |
@@ -840,7 +840,7 @@ bool Unicode_IsThere_Utf16SurrogateError(const UString &src) | |||
840 | } | 840 | } |
841 | */ | 841 | */ |
842 | 842 | ||
843 | #ifndef _WCHART_IS_16BIT | 843 | #ifndef Z7_WCHART_IS_16BIT |
844 | 844 | ||
845 | void Convert_UnicodeEsc16_To_UnicodeEscHigh | 845 | void Convert_UnicodeEsc16_To_UnicodeEscHigh |
846 | #if UTF_ESCAPE_PLANE == 0 | 846 | #if UTF_ESCAPE_PLANE == 0 |