aboutsummaryrefslogtreecommitdiff
path: root/src/3rdParty/efsw/Utf.inl
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdParty/efsw/Utf.inl')
-rw-r--r--[-rwxr-xr-x]src/3rdParty/efsw/Utf.inl1152
1 files changed, 576 insertions, 576 deletions
diff --git a/src/3rdParty/efsw/Utf.inl b/src/3rdParty/efsw/Utf.inl
index 5b6c2e0..5c9d7a3 100755..100644
--- a/src/3rdParty/efsw/Utf.inl
+++ b/src/3rdParty/efsw/Utf.inl
@@ -1,576 +1,576 @@
1// References : 1// References :
2// http://www.unicode.org/ 2// http://www.unicode.org/
3// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c 3// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
4// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h 4// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
5// http://people.w3.org/rishida/scripts/uniview/conversion 5// http://people.w3.org/rishida/scripts/uniview/conversion
6//////////////////////////////////////////////////////////// 6////////////////////////////////////////////////////////////
7 7
8template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { 8template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
9 // Some useful precomputed data 9 // Some useful precomputed data
10 static const int trailing[256] = { 10 static const int trailing[256] = {
11 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
19 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; 19 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
20 static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080, 20 static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080,
21 0x03C82080, 0xFA082080, 0x82082080 }; 21 0x03C82080, 0xFA082080, 0x82082080 };
22 22
23 // Decode the character 23 // Decode the character
24 int trailingBytes = trailing[static_cast<Uint8>( *begin )]; 24 int trailingBytes = trailing[static_cast<Uint8>( *begin )];
25 if ( begin + trailingBytes < end ) { 25 if ( begin + trailingBytes < end ) {
26 output = 0; 26 output = 0;
27 switch ( trailingBytes ) { 27 switch ( trailingBytes ) {
28 case 5: 28 case 5:
29 output += static_cast<Uint8>( *begin++ ); 29 output += static_cast<Uint8>( *begin++ );
30 output <<= 6; 30 output <<= 6;
31 case 4: 31 case 4:
32 output += static_cast<Uint8>( *begin++ ); 32 output += static_cast<Uint8>( *begin++ );
33 output <<= 6; 33 output <<= 6;
34 case 3: 34 case 3:
35 output += static_cast<Uint8>( *begin++ ); 35 output += static_cast<Uint8>( *begin++ );
36 output <<= 6; 36 output <<= 6;
37 case 2: 37 case 2:
38 output += static_cast<Uint8>( *begin++ ); 38 output += static_cast<Uint8>( *begin++ );
39 output <<= 6; 39 output <<= 6;
40 case 1: 40 case 1:
41 output += static_cast<Uint8>( *begin++ ); 41 output += static_cast<Uint8>( *begin++ );
42 output <<= 6; 42 output <<= 6;
43 case 0: 43 case 0:
44 output += static_cast<Uint8>( *begin++ ); 44 output += static_cast<Uint8>( *begin++ );
45 } 45 }
46 output -= offsets[trailingBytes]; 46 output -= offsets[trailingBytes];
47 } else { 47 } else {
48 // Incomplete character 48 // Incomplete character
49 begin = end; 49 begin = end;
50 output = replacement; 50 output = replacement;
51 } 51 }
52 52
53 return begin; 53 return begin;
54} 54}
55 55
56template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) { 56template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) {
57 // Some useful precomputed data 57 // Some useful precomputed data
58 static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 58 static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
59 59
60 // Encode the character 60 // Encode the character
61 if ( ( input > 0x0010FFFF ) || ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) { 61 if ( ( input > 0x0010FFFF ) || ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) {
62 // Invalid character 62 // Invalid character
63 if ( replacement ) 63 if ( replacement )
64 *output++ = replacement; 64 *output++ = replacement;
65 } else { 65 } else {
66 // Valid character 66 // Valid character
67 67
68 // Get the number of bytes to write 68 // Get the number of bytes to write
69 int bytesToWrite = 1; 69 int bytesToWrite = 1;
70 if ( input < 0x80 ) 70 if ( input < 0x80 )
71 bytesToWrite = 1; 71 bytesToWrite = 1;
72 else if ( input < 0x800 ) 72 else if ( input < 0x800 )
73 bytesToWrite = 2; 73 bytesToWrite = 2;
74 else if ( input < 0x10000 ) 74 else if ( input < 0x10000 )
75 bytesToWrite = 3; 75 bytesToWrite = 3;
76 else if ( input <= 0x0010FFFF ) 76 else if ( input <= 0x0010FFFF )
77 bytesToWrite = 4; 77 bytesToWrite = 4;
78 78
79 // Extract the bytes to write 79 // Extract the bytes to write
80 Uint8 bytes[4]; 80 Uint8 bytes[4];
81 switch ( bytesToWrite ) { 81 switch ( bytesToWrite ) {
82 case 4: 82 case 4:
83 bytes[3] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); 83 bytes[3] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
84 input >>= 6; 84 input >>= 6;
85 case 3: 85 case 3:
86 bytes[2] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); 86 bytes[2] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
87 input >>= 6; 87 input >>= 6;
88 case 2: 88 case 2:
89 bytes[1] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF ); 89 bytes[1] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
90 input >>= 6; 90 input >>= 6;
91 case 1: 91 case 1:
92 bytes[0] = static_cast<Uint8>( input | firstBytes[bytesToWrite] ); 92 bytes[0] = static_cast<Uint8>( input | firstBytes[bytesToWrite] );
93 } 93 }
94 94
95 // Add them to the output 95 // Add them to the output
96 const Uint8* currentByte = bytes; 96 const Uint8* currentByte = bytes;
97 switch ( bytesToWrite ) { 97 switch ( bytesToWrite ) {
98 case 4: 98 case 4:
99 *output++ = *currentByte++; 99 *output++ = *currentByte++;
100 case 3: 100 case 3:
101 *output++ = *currentByte++; 101 *output++ = *currentByte++;
102 case 2: 102 case 2:
103 *output++ = *currentByte++; 103 *output++ = *currentByte++;
104 case 1: 104 case 1:
105 *output++ = *currentByte++; 105 *output++ = *currentByte++;
106 } 106 }
107 } 107 }
108 108
109 return output; 109 return output;
110} 110}
111 111
112template <typename In> In Utf<8>::Next( In begin, In end ) { 112template <typename In> In Utf<8>::Next( In begin, In end ) {
113 Uint32 codepoint; 113 Uint32 codepoint;
114 return Decode( begin, end, codepoint ); 114 return Decode( begin, end, codepoint );
115} 115}
116 116
117template <typename In> std::size_t Utf<8>::Count( In begin, In end ) { 117template <typename In> std::size_t Utf<8>::Count( In begin, In end ) {
118 std::size_t length = 0; 118 std::size_t length = 0;
119 while ( begin < end ) { 119 while ( begin < end ) {
120 begin = Next( begin, end ); 120 begin = Next( begin, end );
121 ++length; 121 ++length;
122 } 122 }
123 123
124 return length; 124 return length;
125} 125}
126 126
127template <typename In, typename Out> 127template <typename In, typename Out>
128Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { 128Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
129 while ( begin < end ) { 129 while ( begin < end ) {
130 Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); 130 Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
131 output = Encode( codepoint, output ); 131 output = Encode( codepoint, output );
132 } 132 }
133 133
134 return output; 134 return output;
135} 135}
136 136
137template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) { 137template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) {
138 while ( begin < end ) { 138 while ( begin < end ) {
139 Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); 139 Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
140 output = Encode( codepoint, output ); 140 output = Encode( codepoint, output );
141 } 141 }
142 142
143 return output; 143 return output;
144} 144}
145 145
146template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) { 146template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) {
147 // Latin-1 is directly compatible with Unicode encodings, 147 // Latin-1 is directly compatible with Unicode encodings,
148 // and can thus be treated as (a sub-range of) UTF-32 148 // and can thus be treated as (a sub-range of) UTF-32
149 while ( begin < end ) 149 while ( begin < end )
150 output = Encode( *begin++, output ); 150 output = Encode( *begin++, output );
151 151
152 return output; 152 return output;
153} 153}
154 154
155template <typename In, typename Out> 155template <typename In, typename Out>
156Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { 156Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
157 while ( begin < end ) { 157 while ( begin < end ) {
158 Uint32 codepoint; 158 Uint32 codepoint;
159 begin = Decode( begin, end, codepoint ); 159 begin = Decode( begin, end, codepoint );
160 output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); 160 output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
161 } 161 }
162 162
163 return output; 163 return output;
164} 164}
165 165
166#ifndef EFSW_NO_WIDECHAR 166#ifndef EFSW_NO_WIDECHAR
167template <typename In, typename Out> 167template <typename In, typename Out>
168Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) { 168Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
169 while ( begin < end ) { 169 while ( begin < end ) {
170 Uint32 codepoint; 170 Uint32 codepoint;
171 begin = Decode( begin, end, codepoint ); 171 begin = Decode( begin, end, codepoint );
172 output = Utf<32>::EncodeWide( codepoint, output, replacement ); 172 output = Utf<32>::EncodeWide( codepoint, output, replacement );
173 } 173 }
174 174
175 return output; 175 return output;
176} 176}
177#endif 177#endif
178 178
179template <typename In, typename Out> 179template <typename In, typename Out>
180Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) { 180Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) {
181 // Latin-1 is directly compatible with Unicode encodings, 181 // Latin-1 is directly compatible with Unicode encodings,
182 // and can thus be treated as (a sub-range of) UTF-32 182 // and can thus be treated as (a sub-range of) UTF-32
183 while ( begin < end ) { 183 while ( begin < end ) {
184 Uint32 codepoint; 184 Uint32 codepoint;
185 begin = Decode( begin, end, codepoint ); 185 begin = Decode( begin, end, codepoint );
186 *output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement; 186 *output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement;
187 } 187 }
188 188
189 return output; 189 return output;
190} 190}
191 191
192template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) { 192template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) {
193 while ( begin < end ) 193 while ( begin < end )
194 *output++ = *begin++; 194 *output++ = *begin++;
195 195
196 return output; 196 return output;
197} 197}
198 198
199template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) { 199template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) {
200 while ( begin < end ) { 200 while ( begin < end ) {
201 Uint32 codepoint; 201 Uint32 codepoint;
202 begin = Decode( begin, end, codepoint ); 202 begin = Decode( begin, end, codepoint );
203 output = Utf<16>::Encode( codepoint, output ); 203 output = Utf<16>::Encode( codepoint, output );
204 } 204 }
205 205
206 return output; 206 return output;
207} 207}
208 208
209template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) { 209template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) {
210 while ( begin < end ) { 210 while ( begin < end ) {
211 Uint32 codepoint; 211 Uint32 codepoint;
212 begin = Decode( begin, end, codepoint ); 212 begin = Decode( begin, end, codepoint );
213 *output++ = codepoint; 213 *output++ = codepoint;
214 } 214 }
215 215
216 return output; 216 return output;
217} 217}
218 218
219template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) { 219template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
220 Uint16 first = *begin++; 220 Uint16 first = *begin++;
221 221
222 // If it's a surrogate pair, first convert to a single UTF-32 character 222 // If it's a surrogate pair, first convert to a single UTF-32 character
223 if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) { 223 if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) {
224 if ( begin < end ) { 224 if ( begin < end ) {
225 Uint32 second = *begin++; 225 Uint32 second = *begin++;
226 if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) { 226 if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) {
227 // The second element is valid: convert the two elements to a UTF-32 character 227 // The second element is valid: convert the two elements to a UTF-32 character
228 output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) + 228 output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) +
229 0x0010000 ); 229 0x0010000 );
230 } else { 230 } else {
231 // Invalid character 231 // Invalid character
232 output = replacement; 232 output = replacement;
233 } 233 }
234 } else { 234 } else {
235 // Invalid character 235 // Invalid character
236 begin = end; 236 begin = end;
237 output = replacement; 237 output = replacement;
238 } 238 }
239 } else { 239 } else {
240 // We can make a direct copy 240 // We can make a direct copy
241 output = first; 241 output = first;
242 } 242 }
243 243
244 return begin; 244 return begin;
245} 245}
246 246
247template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) { 247template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) {
248 if ( input < 0xFFFF ) { 248 if ( input < 0xFFFF ) {
249 // The character can be copied directly, we just need to check if it's in the valid range 249 // The character can be copied directly, we just need to check if it's in the valid range
250 if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) { 250 if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) {
251 // Invalid character (this range is reserved) 251 // Invalid character (this range is reserved)
252 if ( replacement ) 252 if ( replacement )
253 *output++ = replacement; 253 *output++ = replacement;
254 } else { 254 } else {
255 // Valid character directly convertible to a single UTF-16 character 255 // Valid character directly convertible to a single UTF-16 character
256 *output++ = static_cast<Uint16>( input ); 256 *output++ = static_cast<Uint16>( input );
257 } 257 }
258 } else if ( input > 0x0010FFFF ) { 258 } else if ( input > 0x0010FFFF ) {
259 // Invalid character (greater than the maximum unicode value) 259 // Invalid character (greater than the maximum unicode value)
260 if ( replacement ) 260 if ( replacement )
261 *output++ = replacement; 261 *output++ = replacement;
262 } else { 262 } else {
263 // The input character will be converted to two UTF-16 elements 263 // The input character will be converted to two UTF-16 elements
264 input -= 0x0010000; 264 input -= 0x0010000;
265 *output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 ); 265 *output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 );
266 *output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 ); 266 *output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 );
267 } 267 }
268 268
269 return output; 269 return output;
270} 270}
271 271
272template <typename In> In Utf<16>::Next( In begin, In end ) { 272template <typename In> In Utf<16>::Next( In begin, In end ) {
273 Uint32 codepoint; 273 Uint32 codepoint;
274 return Decode( begin, end, codepoint ); 274 return Decode( begin, end, codepoint );
275} 275}
276 276
277template <typename In> std::size_t Utf<16>::Count( In begin, In end ) { 277template <typename In> std::size_t Utf<16>::Count( In begin, In end ) {
278 std::size_t length = 0; 278 std::size_t length = 0;
279 while ( begin < end ) { 279 while ( begin < end ) {
280 begin = Next( begin, end ); 280 begin = Next( begin, end );
281 ++length; 281 ++length;
282 } 282 }
283 283
284 return length; 284 return length;
285} 285}
286 286
287template <typename In, typename Out> 287template <typename In, typename Out>
288Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { 288Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
289 while ( begin < end ) { 289 while ( begin < end ) {
290 Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale ); 290 Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
291 output = Encode( codepoint, output ); 291 output = Encode( codepoint, output );
292 } 292 }
293 293
294 return output; 294 return output;
295} 295}
296 296
297template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) { 297template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) {
298 while ( begin < end ) { 298 while ( begin < end ) {
299 Uint32 codepoint = Utf<32>::DecodeWide( *begin++ ); 299 Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
300 output = Encode( codepoint, output ); 300 output = Encode( codepoint, output );
301 } 301 }
302 302
303 return output; 303 return output;
304} 304}
305 305
306template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) { 306template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) {
307 // Latin-1 is directly compatible with Unicode encodings, 307 // Latin-1 is directly compatible with Unicode encodings,
308 // and can thus be treated as (a sub-range of) UTF-32 308 // and can thus be treated as (a sub-range of) UTF-32
309 while ( begin < end ) 309 while ( begin < end )
310 *output++ = *begin++; 310 *output++ = *begin++;
311 311
312 return output; 312 return output;
313} 313}
314 314
315template <typename In, typename Out> 315template <typename In, typename Out>
316Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { 316Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
317 while ( begin < end ) { 317 while ( begin < end ) {
318 Uint32 codepoint; 318 Uint32 codepoint;
319 begin = Decode( begin, end, codepoint ); 319 begin = Decode( begin, end, codepoint );
320 output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale ); 320 output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
321 } 321 }
322 322
323 return output; 323 return output;
324} 324}
325 325
326#ifndef EFSW_NO_WIDECHAR 326#ifndef EFSW_NO_WIDECHAR
327template <typename In, typename Out> 327template <typename In, typename Out>
328Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) { 328Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
329 while ( begin < end ) { 329 while ( begin < end ) {
330 Uint32 codepoint; 330 Uint32 codepoint;
331 begin = Decode( begin, end, codepoint ); 331 begin = Decode( begin, end, codepoint );
332 output = Utf<32>::EncodeWide( codepoint, output, replacement ); 332 output = Utf<32>::EncodeWide( codepoint, output, replacement );
333 } 333 }
334 334
335 return output; 335 return output;
336} 336}
337#endif 337#endif
338 338
339template <typename In, typename Out> 339template <typename In, typename Out>
340Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) { 340Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) {
341 // Latin-1 is directly compatible with Unicode encodings, 341 // Latin-1 is directly compatible with Unicode encodings,
342 // and can thus be treated as (a sub-range of) UTF-32 342 // and can thus be treated as (a sub-range of) UTF-32
343 while ( begin < end ) { 343 while ( begin < end ) {
344 *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; 344 *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement;
345 begin++; 345 begin++;
346 } 346 }
347 347
348 return output; 348 return output;
349} 349}
350 350
351template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) { 351template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) {
352 while ( begin < end ) { 352 while ( begin < end ) {
353 Uint32 codepoint; 353 Uint32 codepoint;
354 begin = Decode( begin, end, codepoint ); 354 begin = Decode( begin, end, codepoint );
355 output = Utf<8>::Encode( codepoint, output ); 355 output = Utf<8>::Encode( codepoint, output );
356 } 356 }
357 357
358 return output; 358 return output;
359} 359}
360 360
361template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) { 361template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) {
362 while ( begin < end ) 362 while ( begin < end )
363 *output++ = *begin++; 363 *output++ = *begin++;
364 364
365 return output; 365 return output;
366} 366}
367 367
368template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) { 368template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) {
369 while ( begin < end ) { 369 while ( begin < end ) {
370 Uint32 codepoint; 370 Uint32 codepoint;
371 begin = Decode( begin, end, codepoint ); 371 begin = Decode( begin, end, codepoint );
372 *output++ = codepoint; 372 *output++ = codepoint;
373 } 373 }
374 374
375 return output; 375 return output;
376} 376}
377 377
378template <typename In> In Utf<32>::Decode( In begin, In /*end*/, Uint32& output, Uint32 ) { 378template <typename In> In Utf<32>::Decode( In begin, In /*end*/, Uint32& output, Uint32 ) {
379 output = *begin++; 379 output = *begin++;
380 return begin; 380 return begin;
381} 381}
382 382
383template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 /*replacement*/ ) { 383template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 /*replacement*/ ) {
384 *output++ = input; 384 *output++ = input;
385 return output; 385 return output;
386} 386}
387 387
388template <typename In> In Utf<32>::Next( In begin, In /*end*/ ) { 388template <typename In> In Utf<32>::Next( In begin, In /*end*/ ) {
389 return ++begin; 389 return ++begin;
390} 390}
391 391
392template <typename In> std::size_t Utf<32>::Count( In begin, In end ) { 392template <typename In> std::size_t Utf<32>::Count( In begin, In end ) {
393 return begin - end; 393 return begin - end;
394} 394}
395 395
396template <typename In, typename Out> 396template <typename In, typename Out>
397Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) { 397Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
398 while ( begin < end ) 398 while ( begin < end )
399 *output++ = DecodeAnsi( *begin++, locale ); 399 *output++ = DecodeAnsi( *begin++, locale );
400 400
401 return output; 401 return output;
402} 402}
403 403
404template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) { 404template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) {
405 while ( begin < end ) 405 while ( begin < end )
406 *output++ = DecodeWide( *begin++ ); 406 *output++ = DecodeWide( *begin++ );
407 407
408 return output; 408 return output;
409} 409}
410 410
411template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) { 411template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) {
412 // Latin-1 is directly compatible with Unicode encodings, 412 // Latin-1 is directly compatible with Unicode encodings,
413 // and can thus be treated as (a sub-range of) UTF-32 413 // and can thus be treated as (a sub-range of) UTF-32
414 while ( begin < end ) 414 while ( begin < end )
415 *output++ = *begin++; 415 *output++ = *begin++;
416 416
417 return output; 417 return output;
418} 418}
419 419
420template <typename In, typename Out> 420template <typename In, typename Out>
421Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) { 421Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
422 while ( begin < end ) 422 while ( begin < end )
423 output = EncodeAnsi( *begin++, output, replacement, locale ); 423 output = EncodeAnsi( *begin++, output, replacement, locale );
424 424
425 return output; 425 return output;
426} 426}
427 427
428#ifndef EFSW_NO_WIDECHAR 428#ifndef EFSW_NO_WIDECHAR
429template <typename In, typename Out> 429template <typename In, typename Out>
430Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) { 430Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
431 while ( begin < end ) 431 while ( begin < end )
432 output = EncodeWide( *begin++, output, replacement ); 432 output = EncodeWide( *begin++, output, replacement );
433 433
434 return output; 434 return output;
435} 435}
436#endif 436#endif
437 437
438template <typename In, typename Out> 438template <typename In, typename Out>
439Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) { 439Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) {
440 // Latin-1 is directly compatible with Unicode encodings, 440 // Latin-1 is directly compatible with Unicode encodings,
441 // and can thus be treated as (a sub-range of) UTF-32 441 // and can thus be treated as (a sub-range of) UTF-32
442 while ( begin < end ) { 442 while ( begin < end ) {
443 *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement; 443 *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement;
444 begin++; 444 begin++;
445 } 445 }
446 446
447 return output; 447 return output;
448} 448}
449 449
450template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) { 450template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) {
451 while ( begin < end ) 451 while ( begin < end )
452 output = Utf<8>::Encode( *begin++, output ); 452 output = Utf<8>::Encode( *begin++, output );
453 453
454 return output; 454 return output;
455} 455}
456 456
457template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) { 457template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) {
458 while ( begin < end ) 458 while ( begin < end )
459 output = Utf<16>::Encode( *begin++, output ); 459 output = Utf<16>::Encode( *begin++, output );
460 460
461 return output; 461 return output;
462} 462}
463 463
464template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) { 464template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) {
465 while ( begin < end ) 465 while ( begin < end )
466 *output++ = *begin++; 466 *output++ = *begin++;
467 467
468 return output; 468 return output;
469} 469}
470 470
471template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) { 471template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) {
472 // On Windows, gcc's standard library (glibc++) has almost 472 // On Windows, gcc's standard library (glibc++) has almost
473 // no support for Unicode stuff. As a consequence, in this 473 // no support for Unicode stuff. As a consequence, in this
474 // context we can only use the default locale and ignore 474 // context we can only use the default locale and ignore
475 // the one passed as parameter. 475 // the one passed as parameter.
476 476
477#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ 477#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \
478 ( defined( __GLIBCPP__ ) || \ 478 ( defined( __GLIBCPP__ ) || \
479 defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ 479 defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
480 !( defined( __SGI_STL_PORT ) || \ 480 !( defined( __SGI_STL_PORT ) || \
481 defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ 481 defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
482 482
483 wchar_t character = 0; 483 wchar_t character = 0;
484 mbtowc( &character, &input, 1 ); 484 mbtowc( &character, &input, 1 );
485 return static_cast<Uint32>( character ); 485 return static_cast<Uint32>( character );
486 486
487#else 487#else
488// Get the facet of the locale which deals with character conversion 488// Get the facet of the locale which deals with character conversion
489#ifndef EFSW_NO_WIDECHAR 489#ifndef EFSW_NO_WIDECHAR
490 const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); 490 const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
491#else 491#else
492 const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); 492 const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
493#endif 493#endif
494 494
495 // Use the facet to convert each character of the input string 495 // Use the facet to convert each character of the input string
496 return static_cast<Uint32>( facet.widen( input ) ); 496 return static_cast<Uint32>( facet.widen( input ) );
497 497
498#endif 498#endif
499} 499}
500 500
501template <typename In> Uint32 Utf<32>::DecodeWide( In input ) { 501template <typename In> Uint32 Utf<32>::DecodeWide( In input ) {
502 // The encoding of wide characters is not well defined and is left to the system; 502 // The encoding of wide characters is not well defined and is left to the system;
503 // however we can safely assume that it is UCS-2 on Windows and 503 // however we can safely assume that it is UCS-2 on Windows and
504 // UCS-4 on Unix systems. 504 // UCS-4 on Unix systems.
505 // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4, 505 // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
506 // and UCS-4 *is* UTF-32). 506 // and UCS-4 *is* UTF-32).
507 507
508 return input; 508 return input;
509} 509}
510 510
511template <typename Out> 511template <typename Out>
512Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement, 512Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement,
513 const std::locale& locale ) { 513 const std::locale& locale ) {
514 // On Windows, gcc's standard library (glibc++) has almost 514 // On Windows, gcc's standard library (glibc++) has almost
515 // no support for Unicode stuff. As a consequence, in this 515 // no support for Unicode stuff. As a consequence, in this
516 // context we can only use the default locale and ignore 516 // context we can only use the default locale and ignore
517 // the one passed as parameter. 517 // the one passed as parameter.
518 518
519#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \ 519#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \
520 ( defined( __GLIBCPP__ ) || \ 520 ( defined( __GLIBCPP__ ) || \
521 defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \ 521 defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
522 !( defined( __SGI_STL_PORT ) || \ 522 !( defined( __SGI_STL_PORT ) || \
523 defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */ 523 defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
524 524
525 char character = 0; 525 char character = 0;
526 if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 ) 526 if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 )
527 *output++ = character; 527 *output++ = character;
528 else if ( replacement ) 528 else if ( replacement )
529 *output++ = replacement; 529 *output++ = replacement;
530 530
531 return output; 531 return output;
532 532
533#else 533#else
534// Get the facet of the locale which deals with character conversion 534// Get the facet of the locale which deals with character conversion
535#ifndef EFSW_NO_WIDECHAR 535#ifndef EFSW_NO_WIDECHAR
536 const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale ); 536 const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
537#else 537#else
538 const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale ); 538 const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
539#endif 539#endif
540 540
541 // Use the facet to convert each character of the input string 541 // Use the facet to convert each character of the input string
542 *output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement ); 542 *output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement );
543 543
544 return output; 544 return output;
545 545
546#endif 546#endif
547} 547}
548 548
549#ifndef EFSW_NO_WIDECHAR 549#ifndef EFSW_NO_WIDECHAR
550template <typename Out> 550template <typename Out>
551Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) { 551Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) {
552 // The encoding of wide characters is not well defined and is left to the system; 552 // The encoding of wide characters is not well defined and is left to the system;
553 // however we can safely assume that it is UCS-2 on Windows and 553 // however we can safely assume that it is UCS-2 on Windows and
554 // UCS-4 on Unix systems. 554 // UCS-4 on Unix systems.
555 // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4). 555 // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
556 // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32). 556 // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
557 557
558 switch ( sizeof( wchar_t ) ) { 558 switch ( sizeof( wchar_t ) ) {
559 case 4: { 559 case 4: {
560 *output++ = static_cast<wchar_t>( codepoint ); 560 *output++ = static_cast<wchar_t>( codepoint );
561 break; 561 break;
562 } 562 }
563 563
564 default: { 564 default: {
565 if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) || ( codepoint > 0xDFFF ) ) ) { 565 if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) || ( codepoint > 0xDFFF ) ) ) {
566 *output++ = static_cast<wchar_t>( codepoint ); 566 *output++ = static_cast<wchar_t>( codepoint );
567 } else if ( replacement ) { 567 } else if ( replacement ) {
568 *output++ = replacement; 568 *output++ = replacement;
569 } 569 }
570 break; 570 break;
571 } 571 }
572 } 572 }
573 573
574 return output; 574 return output;
575} 575}
576#endif 576#endif