diff options
| author | Li Jin <dragon-fly@qq.com> | 2022-11-15 17:23:46 +0800 |
|---|---|---|
| committer | Li Jin <dragon-fly@qq.com> | 2022-11-15 17:52:09 +0800 |
| commit | 94f8330613877b3582d32bd11abd83a97b4399ad (patch) | |
| tree | 5359de314be1ebde17f8d1e48632a97d18f9e50f /src/3rdParty/efsw/Utf.inl | |
| parent | 60f8f00a022ac08701792b2897b72d8c99b50f52 (diff) | |
| download | yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.gz yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.bz2 yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.zip | |
adding -w option to Yuescript tool.
Diffstat (limited to 'src/3rdParty/efsw/Utf.inl')
| -rwxr-xr-x | src/3rdParty/efsw/Utf.inl | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/src/3rdParty/efsw/Utf.inl b/src/3rdParty/efsw/Utf.inl new file mode 100755 index 0000000..7e3e9d6 --- /dev/null +++ b/src/3rdParty/efsw/Utf.inl | |||
| @@ -0,0 +1,576 @@ | |||
| 1 | // References : | ||
| 2 | // http://www.unicode.org/ | ||
| 3 | // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c | ||
| 4 | // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h | ||
| 5 | // http://people.w3.org/rishida/scripts/uniview/conversion | ||
| 6 | //////////////////////////////////////////////////////////// | ||
| 7 | |||
| 8 | template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { | ||
| 9 | // Some useful precomputed data | ||
| 10 | static const int trailing[256] = { | ||
| 11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 12 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 13 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 14 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 18 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 19 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; | ||
| 20 | static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080, | ||
| 21 | 0x03C82080, 0xFA082080, 0x82082080 }; | ||
| 22 | |||
| 23 | // Decode the character | ||
| 24 | int trailingBytes = trailing[static_cast<Uint8>( *begin )]; | ||
| 25 | if ( begin + trailingBytes < end ) { | ||
| 26 | output = 0; | ||
| 27 | switch ( trailingBytes ) { | ||
| 28 | case 5: | ||
| 29 | output += static_cast<Uint8>( *begin++ ); | ||
| 30 | output <<= 6; | ||
| 31 | case 4: | ||
| 32 | output += static_cast<Uint8>( *begin++ ); | ||
| 33 | output <<= 6; | ||
| 34 | case 3: | ||
| 35 | output += static_cast<Uint8>( *begin++ ); | ||
| 36 | output <<= 6; | ||
| 37 | case 2: | ||
| 38 | output += static_cast<Uint8>( *begin++ ); | ||
| 39 | output <<= 6; | ||
| 40 | case 1: | ||
| 41 | output += static_cast<Uint8>( *begin++ ); | ||
| 42 | output <<= 6; | ||
| 43 | case 0: | ||
| 44 | output += static_cast<Uint8>( *begin++ ); | ||
| 45 | } | ||
| 46 | output -= offsets[trailingBytes]; | ||
| 47 | } else { | ||
| 48 | // Incomplete character | ||
| 49 | begin = end; | ||
| 50 | output = replacement; | ||
| 51 | } | ||
| 52 | |||
| 53 | return begin; | ||
| 54 | } | ||
| 55 | |||
| 56 | template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) { | ||
| 57 | // Some useful precomputed data | ||
| 58 | static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | ||
| 59 | |||
| 60 | // Encode the character | ||
| 61 | if ( ( input > 0x0010FFFF ) || ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) { | ||
| 62 | // Invalid character | ||
| 63 | if ( replacement ) | ||
| 64 | *output++ = replacement; | ||
| 65 | } else { | ||
| 66 | // Valid character | ||
| 67 | |||
| 68 | // Get the number of bytes to write | ||
| 69 | int bytesToWrite = 1; | ||
| 70 | if ( input < 0x80 ) | ||
| 71 | bytesToWrite = 1; | ||
| 72 | else if ( input < 0x800 ) | ||
| 73 | bytesToWrite = 2; | ||
| 74 | else if ( input < 0x10000 ) | ||
| 75 | bytesToWrite = 3; | ||
| 76 | else if ( input <= 0x0010FFFF ) | ||
| 77 | bytesToWrite = 4; | ||
| 78 | |||
| 79 | // Extract the bytes to write | ||
| 80 | Uint8 bytes[4]; | ||
| 81 | switch ( bytesToWrite ) { | ||
| 82 | case 4: | ||
| 83 | bytes[3] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
| 84 | input >>= 6; | ||
| 85 | case 3: | ||
| 86 | bytes[2] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
| 87 | input >>= 6; | ||
| 88 | case 2: | ||
| 89 | bytes[1] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
| 90 | input >>= 6; | ||
| 91 | case 1: | ||
| 92 | bytes[0] = static_cast<Uint8>( input | firstBytes[bytesToWrite] ); | ||
| 93 | } | ||
| 94 | |||
| 95 | // Add them to the output | ||
| 96 | const Uint8* currentByte = bytes; | ||
| 97 | switch ( bytesToWrite ) { | ||
| 98 | case 4: | ||
| 99 | *output++ = *currentByte++; | ||
| 100 | case 3: | ||
| 101 | *output++ = *currentByte++; | ||
| 102 | case 2: | ||
| 103 | *output++ = *currentByte++; | ||
| 104 | case 1: | ||
| 105 | *output++ = *currentByte++; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | return output; | ||
| 110 | } | ||
| 111 | |||
| 112 | template <typename In> In Utf<8>::Next( In begin, In end ) { | ||
| 113 | Uint32 codepoint; | ||
| 114 | return Decode( begin, end, codepoint ); | ||
| 115 | } | ||
| 116 | |||
| 117 | template <typename In> std::size_t Utf<8>::Count( In begin, In end ) { | ||
| 118 | std::size_t length = 0; | ||
| 119 | while ( begin < end ) { | ||
| 120 | begin = Next( begin, end ); | ||
| 121 | ++length; | ||
| 122 | } | ||
| 123 | |||
| 124 | return length; | ||
| 125 | } | ||
| 126 | |||
| 127 | template <typename In, typename Out> | ||
| 128 | Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
| 129 | while ( begin < end ) { | ||
| 130 | Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); | ||
| 131 | output = Encode( codepoint, output ); | ||
| 132 | } | ||
| 133 | |||
| 134 | return output; | ||
| 135 | } | ||
| 136 | |||
| 137 | template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) { | ||
| 138 | while ( begin < end ) { | ||
| 139 | Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); | ||
| 140 | output = Encode( codepoint, output ); | ||
| 141 | } | ||
| 142 | |||
| 143 | return output; | ||
| 144 | } | ||
| 145 | |||
| 146 | template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) { | ||
| 147 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 148 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 149 | while ( begin < end ) | ||
| 150 | output = Encode( *begin++, output ); | ||
| 151 | |||
| 152 | return output; | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename In, typename Out> | ||
| 156 | Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
| 157 | while ( begin < end ) { | ||
| 158 | Uint32 codepoint; | ||
| 159 | begin = Decode( begin, end, codepoint ); | ||
| 160 | output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); | ||
| 161 | } | ||
| 162 | |||
| 163 | return output; | ||
| 164 | } | ||
| 165 | |||
| 166 | #ifndef EFSW_NO_WIDECHAR | ||
| 167 | template <typename In, typename Out> | ||
| 168 | Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
| 169 | while ( begin < end ) { | ||
| 170 | Uint32 codepoint; | ||
| 171 | begin = Decode( begin, end, codepoint ); | ||
| 172 | output = Utf<32>::EncodeWide( codepoint, output, replacement ); | ||
| 173 | } | ||
| 174 | |||
| 175 | return output; | ||
| 176 | } | ||
| 177 | #endif | ||
| 178 | |||
| 179 | template <typename In, typename Out> | ||
| 180 | Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
| 181 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 182 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 183 | while ( begin < end ) { | ||
| 184 | Uint32 codepoint; | ||
| 185 | begin = Decode( begin, end, codepoint ); | ||
| 186 | *output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement; | ||
| 187 | } | ||
| 188 | |||
| 189 | return output; | ||
| 190 | } | ||
| 191 | |||
| 192 | template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) { | ||
| 193 | while ( begin < end ) | ||
| 194 | *output++ = *begin++; | ||
| 195 | |||
| 196 | return output; | ||
| 197 | } | ||
| 198 | |||
| 199 | template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) { | ||
| 200 | while ( begin < end ) { | ||
| 201 | Uint32 codepoint; | ||
| 202 | begin = Decode( begin, end, codepoint ); | ||
| 203 | output = Utf<16>::Encode( codepoint, output ); | ||
| 204 | } | ||
| 205 | |||
| 206 | return output; | ||
| 207 | } | ||
| 208 | |||
| 209 | template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) { | ||
| 210 | while ( begin < end ) { | ||
| 211 | Uint32 codepoint; | ||
| 212 | begin = Decode( begin, end, codepoint ); | ||
| 213 | *output++ = codepoint; | ||
| 214 | } | ||
| 215 | |||
| 216 | return output; | ||
| 217 | } | ||
| 218 | |||
| 219 | template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { | ||
| 220 | Uint16 first = *begin++; | ||
| 221 | |||
| 222 | // If it's a surrogate pair, first convert to a single UTF-32 character | ||
| 223 | if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) { | ||
| 224 | if ( begin < end ) { | ||
| 225 | Uint32 second = *begin++; | ||
| 226 | if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) { | ||
| 227 | // The second element is valid: convert the two elements to a UTF-32 character | ||
| 228 | output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) + | ||
| 229 | 0x0010000 ); | ||
| 230 | } else { | ||
| 231 | // Invalid character | ||
| 232 | output = replacement; | ||
| 233 | } | ||
| 234 | } else { | ||
| 235 | // Invalid character | ||
| 236 | begin = end; | ||
| 237 | output = replacement; | ||
| 238 | } | ||
| 239 | } else { | ||
| 240 | // We can make a direct copy | ||
| 241 | output = first; | ||
| 242 | } | ||
| 243 | |||
| 244 | return begin; | ||
| 245 | } | ||
| 246 | |||
| 247 | template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) { | ||
| 248 | if ( input < 0xFFFF ) { | ||
| 249 | // The character can be copied directly, we just need to check if it's in the valid range | ||
| 250 | if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) { | ||
| 251 | // Invalid character (this range is reserved) | ||
| 252 | if ( replacement ) | ||
| 253 | *output++ = replacement; | ||
| 254 | } else { | ||
| 255 | // Valid character directly convertible to a single UTF-16 character | ||
| 256 | *output++ = static_cast<Uint16>( input ); | ||
| 257 | } | ||
| 258 | } else if ( input > 0x0010FFFF ) { | ||
| 259 | // Invalid character (greater than the maximum unicode value) | ||
| 260 | if ( replacement ) | ||
| 261 | *output++ = replacement; | ||
| 262 | } else { | ||
| 263 | // The input character will be converted to two UTF-16 elements | ||
| 264 | input -= 0x0010000; | ||
| 265 | *output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 ); | ||
| 266 | *output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 ); | ||
| 267 | } | ||
| 268 | |||
| 269 | return output; | ||
| 270 | } | ||
| 271 | |||
| 272 | template <typename In> In Utf<16>::Next( In begin, In end ) { | ||
| 273 | Uint32 codepoint; | ||
| 274 | return Decode( begin, end, codepoint ); | ||
| 275 | } | ||
| 276 | |||
| 277 | template <typename In> std::size_t Utf<16>::Count( In begin, In end ) { | ||
| 278 | std::size_t length = 0; | ||
| 279 | while ( begin < end ) { | ||
| 280 | begin = Next( begin, end ); | ||
| 281 | ++length; | ||
| 282 | } | ||
| 283 | |||
| 284 | return length; | ||
| 285 | } | ||
| 286 | |||
| 287 | template <typename In, typename Out> | ||
| 288 | Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
| 289 | while ( begin < end ) { | ||
| 290 | Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); | ||
| 291 | output = Encode( codepoint, output ); | ||
| 292 | } | ||
| 293 | |||
| 294 | return output; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) { | ||
| 298 | while ( begin < end ) { | ||
| 299 | Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); | ||
| 300 | output = Encode( codepoint, output ); | ||
| 301 | } | ||
| 302 | |||
| 303 | return output; | ||
| 304 | } | ||
| 305 | |||
| 306 | template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) { | ||
| 307 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 308 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 309 | while ( begin < end ) | ||
| 310 | *output++ = *begin++; | ||
| 311 | |||
| 312 | return output; | ||
| 313 | } | ||
| 314 | |||
| 315 | template <typename In, typename Out> | ||
| 316 | Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
| 317 | while ( begin < end ) { | ||
| 318 | Uint32 codepoint; | ||
| 319 | begin = Decode( begin, end, codepoint ); | ||
| 320 | output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); | ||
| 321 | } | ||
| 322 | |||
| 323 | return output; | ||
| 324 | } | ||
| 325 | |||
| 326 | #ifndef EFSW_NO_WIDECHAR | ||
| 327 | template <typename In, typename Out> | ||
| 328 | Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
| 329 | while ( begin < end ) { | ||
| 330 | Uint32 codepoint; | ||
| 331 | begin = Decode( begin, end, codepoint ); | ||
| 332 | output = Utf<32>::EncodeWide( codepoint, output, replacement ); | ||
| 333 | } | ||
| 334 | |||
| 335 | return output; | ||
| 336 | } | ||
| 337 | #endif | ||
| 338 | |||
| 339 | template <typename In, typename Out> | ||
| 340 | Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
| 341 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 342 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 343 | while ( begin < end ) { | ||
| 344 | *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; | ||
| 345 | begin++; | ||
| 346 | } | ||
| 347 | |||
| 348 | return output; | ||
| 349 | } | ||
| 350 | |||
| 351 | template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) { | ||
| 352 | while ( begin < end ) { | ||
| 353 | Uint32 codepoint; | ||
| 354 | begin = Decode( begin, end, codepoint ); | ||
| 355 | output = Utf<8>::Encode( codepoint, output ); | ||
| 356 | } | ||
| 357 | |||
| 358 | return output; | ||
| 359 | } | ||
| 360 | |||
| 361 | template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) { | ||
| 362 | while ( begin < end ) | ||
| 363 | *output++ = *begin++; | ||
| 364 | |||
| 365 | return output; | ||
| 366 | } | ||
| 367 | |||
| 368 | template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) { | ||
| 369 | while ( begin < end ) { | ||
| 370 | Uint32 codepoint; | ||
| 371 | begin = Decode( begin, end, codepoint ); | ||
| 372 | *output++ = codepoint; | ||
| 373 | } | ||
| 374 | |||
| 375 | return output; | ||
| 376 | } | ||
| 377 | |||
| 378 | template <typename In> In Utf<32>::Decode( In begin, In end, Uint32& output, Uint32 ) { | ||
| 379 | output = *begin++; | ||
| 380 | return begin; | ||
| 381 | } | ||
| 382 | |||
| 383 | template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 replacement ) { | ||
| 384 | *output++ = input; | ||
| 385 | return output; | ||
| 386 | } | ||
| 387 | |||
| 388 | template <typename In> In Utf<32>::Next( In begin, In end ) { | ||
| 389 | return ++begin; | ||
| 390 | } | ||
| 391 | |||
| 392 | template <typename In> std::size_t Utf<32>::Count( In begin, In end ) { | ||
| 393 | return begin - end; | ||
| 394 | } | ||
| 395 | |||
| 396 | template <typename In, typename Out> | ||
| 397 | Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
| 398 | while ( begin < end ) | ||
| 399 | *output++ = DecodeAnsi( *begin++, locale ); | ||
| 400 | |||
| 401 | return output; | ||
| 402 | } | ||
| 403 | |||
| 404 | template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) { | ||
| 405 | while ( begin < end ) | ||
| 406 | *output++ = DecodeWide( *begin++ ); | ||
| 407 | |||
| 408 | return output; | ||
| 409 | } | ||
| 410 | |||
| 411 | template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) { | ||
| 412 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 413 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 414 | while ( begin < end ) | ||
| 415 | *output++ = *begin++; | ||
| 416 | |||
| 417 | return output; | ||
| 418 | } | ||
| 419 | |||
| 420 | template <typename In, typename Out> | ||
| 421 | Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
| 422 | while ( begin < end ) | ||
| 423 | output = EncodeAnsi( *begin++, output, replacement, locale ); | ||
| 424 | |||
| 425 | return output; | ||
| 426 | } | ||
| 427 | |||
| 428 | #ifndef EFSW_NO_WIDECHAR | ||
| 429 | template <typename In, typename Out> | ||
| 430 | Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
| 431 | while ( begin < end ) | ||
| 432 | output = EncodeWide( *begin++, output, replacement ); | ||
| 433 | |||
| 434 | return output; | ||
| 435 | } | ||
| 436 | #endif | ||
| 437 | |||
| 438 | template <typename In, typename Out> | ||
| 439 | Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
| 440 | // Latin-1 is directly compatible with Unicode encodings, | ||
| 441 | // and can thus be treated as (a sub-range of) UTF-32 | ||
| 442 | while ( begin < end ) { | ||
| 443 | *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; | ||
| 444 | begin++; | ||
| 445 | } | ||
| 446 | |||
| 447 | return output; | ||
| 448 | } | ||
| 449 | |||
| 450 | template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) { | ||
| 451 | while ( begin < end ) | ||
| 452 | output = Utf<8>::Encode( *begin++, output ); | ||
| 453 | |||
| 454 | return output; | ||
| 455 | } | ||
| 456 | |||
| 457 | template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) { | ||
| 458 | while ( begin < end ) | ||
| 459 | output = Utf<16>::Encode( *begin++, output ); | ||
| 460 | |||
| 461 | return output; | ||
| 462 | } | ||
| 463 | |||
| 464 | template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) { | ||
| 465 | while ( begin < end ) | ||
| 466 | *output++ = *begin++; | ||
| 467 | |||
| 468 | return output; | ||
| 469 | } | ||
| 470 | |||
| 471 | template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) { | ||
| 472 | // On Windows, gcc's standard library (glibc++) has almost | ||
| 473 | // no support for Unicode stuff. As a consequence, in this | ||
| 474 | // context we can only use the default locale and ignore | ||
| 475 | // the one passed as parameter. | ||
| 476 | |||
| 477 | #if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ | ||
| 478 | ( defined( __GLIBCPP__ ) || \ | ||
| 479 | defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ | ||
| 480 | !( defined( __SGI_STL_PORT ) || \ | ||
| 481 | defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ | ||
| 482 | |||
| 483 | wchar_t character = 0; | ||
| 484 | mbtowc( &character, &input, 1 ); | ||
| 485 | return static_cast<Uint32>( character ); | ||
| 486 | |||
| 487 | #else | ||
| 488 | // Get the facet of the locale which deals with character conversion | ||
| 489 | #ifndef EFSW_NO_WIDECHAR | ||
| 490 | const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); | ||
| 491 | #else | ||
| 492 | const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); | ||
| 493 | #endif | ||
| 494 | |||
| 495 | // Use the facet to convert each character of the input string | ||
| 496 | return static_cast<Uint32>( facet.widen( input ) ); | ||
| 497 | |||
| 498 | #endif | ||
| 499 | } | ||
| 500 | |||
| 501 | template <typename In> Uint32 Utf<32>::DecodeWide( In input ) { | ||
| 502 | // The encoding of wide characters is not well defined and is left to the system; | ||
| 503 | // however we can safely assume that it is UCS-2 on Windows and | ||
| 504 | // UCS-4 on Unix systems. | ||
| 505 | // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4, | ||
| 506 | // and UCS-4 *is* UTF-32). | ||
| 507 | |||
| 508 | return input; | ||
| 509 | } | ||
| 510 | |||
| 511 | template <typename Out> | ||
| 512 | Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement, | ||
| 513 | const std::locale& locale ) { | ||
| 514 | // On Windows, gcc's standard library (glibc++) has almost | ||
| 515 | // no support for Unicode stuff. As a consequence, in this | ||
| 516 | // context we can only use the default locale and ignore | ||
| 517 | // the one passed as parameter. | ||
| 518 | |||
| 519 | #if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ | ||
| 520 | ( defined( __GLIBCPP__ ) || \ | ||
| 521 | defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ | ||
| 522 | !( defined( __SGI_STL_PORT ) || \ | ||
| 523 | defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ | ||
| 524 | |||
| 525 | char character = 0; | ||
| 526 | if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 ) | ||
| 527 | *output++ = character; | ||
| 528 | else if ( replacement ) | ||
| 529 | *output++ = replacement; | ||
| 530 | |||
| 531 | return output; | ||
| 532 | |||
| 533 | #else | ||
| 534 | // Get the facet of the locale which deals with character conversion | ||
| 535 | #ifndef EFSW_NO_WIDECHAR | ||
| 536 | const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); | ||
| 537 | #else | ||
| 538 | const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); | ||
| 539 | #endif | ||
| 540 | |||
| 541 | // Use the facet to convert each character of the input string | ||
| 542 | *output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement ); | ||
| 543 | |||
| 544 | return output; | ||
| 545 | |||
| 546 | #endif | ||
| 547 | } | ||
| 548 | |||
| 549 | #ifndef EFSW_NO_WIDECHAR | ||
| 550 | template <typename Out> | ||
| 551 | Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) { | ||
| 552 | // The encoding of wide characters is not well defined and is left to the system; | ||
| 553 | // however we can safely assume that it is UCS-2 on Windows and | ||
| 554 | // UCS-4 on Unix systems. | ||
| 555 | // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4). | ||
| 556 | // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32). | ||
| 557 | |||
| 558 | switch ( sizeof( wchar_t ) ) { | ||
| 559 | case 4: { | ||
| 560 | *output++ = static_cast<wchar_t>( codepoint ); | ||
| 561 | break; | ||
| 562 | } | ||
| 563 | |||
| 564 | default: { | ||
| 565 | if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) || ( codepoint > 0xDFFF ) ) ) { | ||
| 566 | *output++ = static_cast<wchar_t>( codepoint ); | ||
| 567 | } else if ( replacement ) { | ||
| 568 | *output++ = replacement; | ||
| 569 | } | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | } | ||
| 573 | |||
| 574 | return output; | ||
| 575 | } | ||
| 576 | #endif | ||
