diff options
Diffstat (limited to 'src/3rdParty/efsw/Utf.inl')
-rwxr-xr-x | src/3rdParty/efsw/Utf.inl | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/src/3rdParty/efsw/Utf.inl b/src/3rdParty/efsw/Utf.inl new file mode 100755 index 0000000..7e3e9d6 --- /dev/null +++ b/src/3rdParty/efsw/Utf.inl | |||
@@ -0,0 +1,576 @@ | |||
1 | // References : | ||
2 | // http://www.unicode.org/ | ||
3 | // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c | ||
4 | // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h | ||
5 | // http://people.w3.org/rishida/scripts/uniview/conversion | ||
6 | //////////////////////////////////////////////////////////// | ||
7 | |||
8 | template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { | ||
9 | // Some useful precomputed data | ||
10 | static const int trailing[256] = { | ||
11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
12 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
13 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
14 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
18 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, | ||
19 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; | ||
20 | static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080, | ||
21 | 0x03C82080, 0xFA082080, 0x82082080 }; | ||
22 | |||
23 | // Decode the character | ||
24 | int trailingBytes = trailing[static_cast<Uint8>( *begin )]; | ||
25 | if ( begin + trailingBytes < end ) { | ||
26 | output = 0; | ||
27 | switch ( trailingBytes ) { | ||
28 | case 5: | ||
29 | output += static_cast<Uint8>( *begin++ ); | ||
30 | output <<= 6; | ||
31 | case 4: | ||
32 | output += static_cast<Uint8>( *begin++ ); | ||
33 | output <<= 6; | ||
34 | case 3: | ||
35 | output += static_cast<Uint8>( *begin++ ); | ||
36 | output <<= 6; | ||
37 | case 2: | ||
38 | output += static_cast<Uint8>( *begin++ ); | ||
39 | output <<= 6; | ||
40 | case 1: | ||
41 | output += static_cast<Uint8>( *begin++ ); | ||
42 | output <<= 6; | ||
43 | case 0: | ||
44 | output += static_cast<Uint8>( *begin++ ); | ||
45 | } | ||
46 | output -= offsets[trailingBytes]; | ||
47 | } else { | ||
48 | // Incomplete character | ||
49 | begin = end; | ||
50 | output = replacement; | ||
51 | } | ||
52 | |||
53 | return begin; | ||
54 | } | ||
55 | |||
56 | template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) { | ||
57 | // Some useful precomputed data | ||
58 | static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | ||
59 | |||
60 | // Encode the character | ||
61 | if ( ( input > 0x0010FFFF ) || ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) { | ||
62 | // Invalid character | ||
63 | if ( replacement ) | ||
64 | *output++ = replacement; | ||
65 | } else { | ||
66 | // Valid character | ||
67 | |||
68 | // Get the number of bytes to write | ||
69 | int bytesToWrite = 1; | ||
70 | if ( input < 0x80 ) | ||
71 | bytesToWrite = 1; | ||
72 | else if ( input < 0x800 ) | ||
73 | bytesToWrite = 2; | ||
74 | else if ( input < 0x10000 ) | ||
75 | bytesToWrite = 3; | ||
76 | else if ( input <= 0x0010FFFF ) | ||
77 | bytesToWrite = 4; | ||
78 | |||
79 | // Extract the bytes to write | ||
80 | Uint8 bytes[4]; | ||
81 | switch ( bytesToWrite ) { | ||
82 | case 4: | ||
83 | bytes[3] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
84 | input >>= 6; | ||
85 | case 3: | ||
86 | bytes[2] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
87 | input >>= 6; | ||
88 | case 2: | ||
89 | bytes[1] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); | ||
90 | input >>= 6; | ||
91 | case 1: | ||
92 | bytes[0] = static_cast<Uint8>( input | firstBytes[bytesToWrite] ); | ||
93 | } | ||
94 | |||
95 | // Add them to the output | ||
96 | const Uint8* currentByte = bytes; | ||
97 | switch ( bytesToWrite ) { | ||
98 | case 4: | ||
99 | *output++ = *currentByte++; | ||
100 | case 3: | ||
101 | *output++ = *currentByte++; | ||
102 | case 2: | ||
103 | *output++ = *currentByte++; | ||
104 | case 1: | ||
105 | *output++ = *currentByte++; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | return output; | ||
110 | } | ||
111 | |||
112 | template <typename In> In Utf<8>::Next( In begin, In end ) { | ||
113 | Uint32 codepoint; | ||
114 | return Decode( begin, end, codepoint ); | ||
115 | } | ||
116 | |||
117 | template <typename In> std::size_t Utf<8>::Count( In begin, In end ) { | ||
118 | std::size_t length = 0; | ||
119 | while ( begin < end ) { | ||
120 | begin = Next( begin, end ); | ||
121 | ++length; | ||
122 | } | ||
123 | |||
124 | return length; | ||
125 | } | ||
126 | |||
127 | template <typename In, typename Out> | ||
128 | Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
129 | while ( begin < end ) { | ||
130 | Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); | ||
131 | output = Encode( codepoint, output ); | ||
132 | } | ||
133 | |||
134 | return output; | ||
135 | } | ||
136 | |||
137 | template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) { | ||
138 | while ( begin < end ) { | ||
139 | Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); | ||
140 | output = Encode( codepoint, output ); | ||
141 | } | ||
142 | |||
143 | return output; | ||
144 | } | ||
145 | |||
146 | template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) { | ||
147 | // Latin-1 is directly compatible with Unicode encodings, | ||
148 | // and can thus be treated as (a sub-range of) UTF-32 | ||
149 | while ( begin < end ) | ||
150 | output = Encode( *begin++, output ); | ||
151 | |||
152 | return output; | ||
153 | } | ||
154 | |||
155 | template <typename In, typename Out> | ||
156 | Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
157 | while ( begin < end ) { | ||
158 | Uint32 codepoint; | ||
159 | begin = Decode( begin, end, codepoint ); | ||
160 | output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); | ||
161 | } | ||
162 | |||
163 | return output; | ||
164 | } | ||
165 | |||
166 | #ifndef EFSW_NO_WIDECHAR | ||
167 | template <typename In, typename Out> | ||
168 | Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
169 | while ( begin < end ) { | ||
170 | Uint32 codepoint; | ||
171 | begin = Decode( begin, end, codepoint ); | ||
172 | output = Utf<32>::EncodeWide( codepoint, output, replacement ); | ||
173 | } | ||
174 | |||
175 | return output; | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | template <typename In, typename Out> | ||
180 | Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
181 | // Latin-1 is directly compatible with Unicode encodings, | ||
182 | // and can thus be treated as (a sub-range of) UTF-32 | ||
183 | while ( begin < end ) { | ||
184 | Uint32 codepoint; | ||
185 | begin = Decode( begin, end, codepoint ); | ||
186 | *output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement; | ||
187 | } | ||
188 | |||
189 | return output; | ||
190 | } | ||
191 | |||
192 | template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) { | ||
193 | while ( begin < end ) | ||
194 | *output++ = *begin++; | ||
195 | |||
196 | return output; | ||
197 | } | ||
198 | |||
199 | template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) { | ||
200 | while ( begin < end ) { | ||
201 | Uint32 codepoint; | ||
202 | begin = Decode( begin, end, codepoint ); | ||
203 | output = Utf<16>::Encode( codepoint, output ); | ||
204 | } | ||
205 | |||
206 | return output; | ||
207 | } | ||
208 | |||
209 | template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) { | ||
210 | while ( begin < end ) { | ||
211 | Uint32 codepoint; | ||
212 | begin = Decode( begin, end, codepoint ); | ||
213 | *output++ = codepoint; | ||
214 | } | ||
215 | |||
216 | return output; | ||
217 | } | ||
218 | |||
219 | template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { | ||
220 | Uint16 first = *begin++; | ||
221 | |||
222 | // If it's a surrogate pair, first convert to a single UTF-32 character | ||
223 | if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) { | ||
224 | if ( begin < end ) { | ||
225 | Uint32 second = *begin++; | ||
226 | if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) { | ||
227 | // The second element is valid: convert the two elements to a UTF-32 character | ||
228 | output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) + | ||
229 | 0x0010000 ); | ||
230 | } else { | ||
231 | // Invalid character | ||
232 | output = replacement; | ||
233 | } | ||
234 | } else { | ||
235 | // Invalid character | ||
236 | begin = end; | ||
237 | output = replacement; | ||
238 | } | ||
239 | } else { | ||
240 | // We can make a direct copy | ||
241 | output = first; | ||
242 | } | ||
243 | |||
244 | return begin; | ||
245 | } | ||
246 | |||
247 | template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) { | ||
248 | if ( input < 0xFFFF ) { | ||
249 | // The character can be copied directly, we just need to check if it's in the valid range | ||
250 | if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) { | ||
251 | // Invalid character (this range is reserved) | ||
252 | if ( replacement ) | ||
253 | *output++ = replacement; | ||
254 | } else { | ||
255 | // Valid character directly convertible to a single UTF-16 character | ||
256 | *output++ = static_cast<Uint16>( input ); | ||
257 | } | ||
258 | } else if ( input > 0x0010FFFF ) { | ||
259 | // Invalid character (greater than the maximum unicode value) | ||
260 | if ( replacement ) | ||
261 | *output++ = replacement; | ||
262 | } else { | ||
263 | // The input character will be converted to two UTF-16 elements | ||
264 | input -= 0x0010000; | ||
265 | *output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 ); | ||
266 | *output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 ); | ||
267 | } | ||
268 | |||
269 | return output; | ||
270 | } | ||
271 | |||
272 | template <typename In> In Utf<16>::Next( In begin, In end ) { | ||
273 | Uint32 codepoint; | ||
274 | return Decode( begin, end, codepoint ); | ||
275 | } | ||
276 | |||
277 | template <typename In> std::size_t Utf<16>::Count( In begin, In end ) { | ||
278 | std::size_t length = 0; | ||
279 | while ( begin < end ) { | ||
280 | begin = Next( begin, end ); | ||
281 | ++length; | ||
282 | } | ||
283 | |||
284 | return length; | ||
285 | } | ||
286 | |||
287 | template <typename In, typename Out> | ||
288 | Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
289 | while ( begin < end ) { | ||
290 | Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); | ||
291 | output = Encode( codepoint, output ); | ||
292 | } | ||
293 | |||
294 | return output; | ||
295 | } | ||
296 | |||
297 | template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) { | ||
298 | while ( begin < end ) { | ||
299 | Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); | ||
300 | output = Encode( codepoint, output ); | ||
301 | } | ||
302 | |||
303 | return output; | ||
304 | } | ||
305 | |||
306 | template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) { | ||
307 | // Latin-1 is directly compatible with Unicode encodings, | ||
308 | // and can thus be treated as (a sub-range of) UTF-32 | ||
309 | while ( begin < end ) | ||
310 | *output++ = *begin++; | ||
311 | |||
312 | return output; | ||
313 | } | ||
314 | |||
315 | template <typename In, typename Out> | ||
316 | Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
317 | while ( begin < end ) { | ||
318 | Uint32 codepoint; | ||
319 | begin = Decode( begin, end, codepoint ); | ||
320 | output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); | ||
321 | } | ||
322 | |||
323 | return output; | ||
324 | } | ||
325 | |||
326 | #ifndef EFSW_NO_WIDECHAR | ||
327 | template <typename In, typename Out> | ||
328 | Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
329 | while ( begin < end ) { | ||
330 | Uint32 codepoint; | ||
331 | begin = Decode( begin, end, codepoint ); | ||
332 | output = Utf<32>::EncodeWide( codepoint, output, replacement ); | ||
333 | } | ||
334 | |||
335 | return output; | ||
336 | } | ||
337 | #endif | ||
338 | |||
339 | template <typename In, typename Out> | ||
340 | Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
341 | // Latin-1 is directly compatible with Unicode encodings, | ||
342 | // and can thus be treated as (a sub-range of) UTF-32 | ||
343 | while ( begin < end ) { | ||
344 | *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; | ||
345 | begin++; | ||
346 | } | ||
347 | |||
348 | return output; | ||
349 | } | ||
350 | |||
351 | template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) { | ||
352 | while ( begin < end ) { | ||
353 | Uint32 codepoint; | ||
354 | begin = Decode( begin, end, codepoint ); | ||
355 | output = Utf<8>::Encode( codepoint, output ); | ||
356 | } | ||
357 | |||
358 | return output; | ||
359 | } | ||
360 | |||
361 | template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) { | ||
362 | while ( begin < end ) | ||
363 | *output++ = *begin++; | ||
364 | |||
365 | return output; | ||
366 | } | ||
367 | |||
368 | template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) { | ||
369 | while ( begin < end ) { | ||
370 | Uint32 codepoint; | ||
371 | begin = Decode( begin, end, codepoint ); | ||
372 | *output++ = codepoint; | ||
373 | } | ||
374 | |||
375 | return output; | ||
376 | } | ||
377 | |||
378 | template <typename In> In Utf<32>::Decode( In begin, In end, Uint32& output, Uint32 ) { | ||
379 | output = *begin++; | ||
380 | return begin; | ||
381 | } | ||
382 | |||
383 | template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 replacement ) { | ||
384 | *output++ = input; | ||
385 | return output; | ||
386 | } | ||
387 | |||
388 | template <typename In> In Utf<32>::Next( In begin, In end ) { | ||
389 | return ++begin; | ||
390 | } | ||
391 | |||
392 | template <typename In> std::size_t Utf<32>::Count( In begin, In end ) { | ||
393 | return begin - end; | ||
394 | } | ||
395 | |||
396 | template <typename In, typename Out> | ||
397 | Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { | ||
398 | while ( begin < end ) | ||
399 | *output++ = DecodeAnsi( *begin++, locale ); | ||
400 | |||
401 | return output; | ||
402 | } | ||
403 | |||
404 | template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) { | ||
405 | while ( begin < end ) | ||
406 | *output++ = DecodeWide( *begin++ ); | ||
407 | |||
408 | return output; | ||
409 | } | ||
410 | |||
411 | template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) { | ||
412 | // Latin-1 is directly compatible with Unicode encodings, | ||
413 | // and can thus be treated as (a sub-range of) UTF-32 | ||
414 | while ( begin < end ) | ||
415 | *output++ = *begin++; | ||
416 | |||
417 | return output; | ||
418 | } | ||
419 | |||
420 | template <typename In, typename Out> | ||
421 | Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { | ||
422 | while ( begin < end ) | ||
423 | output = EncodeAnsi( *begin++, output, replacement, locale ); | ||
424 | |||
425 | return output; | ||
426 | } | ||
427 | |||
428 | #ifndef EFSW_NO_WIDECHAR | ||
429 | template <typename In, typename Out> | ||
430 | Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) { | ||
431 | while ( begin < end ) | ||
432 | output = EncodeWide( *begin++, output, replacement ); | ||
433 | |||
434 | return output; | ||
435 | } | ||
436 | #endif | ||
437 | |||
438 | template <typename In, typename Out> | ||
439 | Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) { | ||
440 | // Latin-1 is directly compatible with Unicode encodings, | ||
441 | // and can thus be treated as (a sub-range of) UTF-32 | ||
442 | while ( begin < end ) { | ||
443 | *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; | ||
444 | begin++; | ||
445 | } | ||
446 | |||
447 | return output; | ||
448 | } | ||
449 | |||
450 | template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) { | ||
451 | while ( begin < end ) | ||
452 | output = Utf<8>::Encode( *begin++, output ); | ||
453 | |||
454 | return output; | ||
455 | } | ||
456 | |||
457 | template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) { | ||
458 | while ( begin < end ) | ||
459 | output = Utf<16>::Encode( *begin++, output ); | ||
460 | |||
461 | return output; | ||
462 | } | ||
463 | |||
464 | template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) { | ||
465 | while ( begin < end ) | ||
466 | *output++ = *begin++; | ||
467 | |||
468 | return output; | ||
469 | } | ||
470 | |||
471 | template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) { | ||
472 | // On Windows, gcc's standard library (glibc++) has almost | ||
473 | // no support for Unicode stuff. As a consequence, in this | ||
474 | // context we can only use the default locale and ignore | ||
475 | // the one passed as parameter. | ||
476 | |||
477 | #if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ | ||
478 | ( defined( __GLIBCPP__ ) || \ | ||
479 | defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ | ||
480 | !( defined( __SGI_STL_PORT ) || \ | ||
481 | defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ | ||
482 | |||
483 | wchar_t character = 0; | ||
484 | mbtowc( &character, &input, 1 ); | ||
485 | return static_cast<Uint32>( character ); | ||
486 | |||
487 | #else | ||
488 | // Get the facet of the locale which deals with character conversion | ||
489 | #ifndef EFSW_NO_WIDECHAR | ||
490 | const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); | ||
491 | #else | ||
492 | const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); | ||
493 | #endif | ||
494 | |||
495 | // Use the facet to convert each character of the input string | ||
496 | return static_cast<Uint32>( facet.widen( input ) ); | ||
497 | |||
498 | #endif | ||
499 | } | ||
500 | |||
501 | template <typename In> Uint32 Utf<32>::DecodeWide( In input ) { | ||
502 | // The encoding of wide characters is not well defined and is left to the system; | ||
503 | // however we can safely assume that it is UCS-2 on Windows and | ||
504 | // UCS-4 on Unix systems. | ||
505 | // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4, | ||
506 | // and UCS-4 *is* UTF-32). | ||
507 | |||
508 | return input; | ||
509 | } | ||
510 | |||
511 | template <typename Out> | ||
512 | Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement, | ||
513 | const std::locale& locale ) { | ||
514 | // On Windows, gcc's standard library (glibc++) has almost | ||
515 | // no support for Unicode stuff. As a consequence, in this | ||
516 | // context we can only use the default locale and ignore | ||
517 | // the one passed as parameter. | ||
518 | |||
519 | #if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ | ||
520 | ( defined( __GLIBCPP__ ) || \ | ||
521 | defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ | ||
522 | !( defined( __SGI_STL_PORT ) || \ | ||
523 | defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ | ||
524 | |||
525 | char character = 0; | ||
526 | if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 ) | ||
527 | *output++ = character; | ||
528 | else if ( replacement ) | ||
529 | *output++ = replacement; | ||
530 | |||
531 | return output; | ||
532 | |||
533 | #else | ||
534 | // Get the facet of the locale which deals with character conversion | ||
535 | #ifndef EFSW_NO_WIDECHAR | ||
536 | const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); | ||
537 | #else | ||
538 | const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); | ||
539 | #endif | ||
540 | |||
541 | // Use the facet to convert each character of the input string | ||
542 | *output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement ); | ||
543 | |||
544 | return output; | ||
545 | |||
546 | #endif | ||
547 | } | ||
548 | |||
549 | #ifndef EFSW_NO_WIDECHAR | ||
550 | template <typename Out> | ||
551 | Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) { | ||
552 | // The encoding of wide characters is not well defined and is left to the system; | ||
553 | // however we can safely assume that it is UCS-2 on Windows and | ||
554 | // UCS-4 on Unix systems. | ||
555 | // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4). | ||
556 | // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32). | ||
557 | |||
558 | switch ( sizeof( wchar_t ) ) { | ||
559 | case 4: { | ||
560 | *output++ = static_cast<wchar_t>( codepoint ); | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | default: { | ||
565 | if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) || ( codepoint > 0xDFFF ) ) ) { | ||
566 | *output++ = static_cast<wchar_t>( codepoint ); | ||
567 | } else if ( replacement ) { | ||
568 | *output++ = replacement; | ||
569 | } | ||
570 | break; | ||
571 | } | ||
572 | } | ||
573 | |||
574 | return output; | ||
575 | } | ||
576 | #endif | ||