diff options
author | Li Jin <dragon-fly@qq.com> | 2022-11-15 17:23:46 +0800 |
---|---|---|
committer | Li Jin <dragon-fly@qq.com> | 2022-11-15 17:52:09 +0800 |
commit | 94f8330613877b3582d32bd11abd83a97b4399ad (patch) | |
tree | 5359de314be1ebde17f8d1e48632a97d18f9e50f /src/3rdParty/efsw/Utf.hpp | |
parent | 60f8f00a022ac08701792b2897b72d8c99b50f52 (diff) | |
download | yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.gz yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.bz2 yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.zip |
adding -w option to Yuescript tool.
Diffstat (limited to 'src/3rdParty/efsw/Utf.hpp')
-rwxr-xr-x | src/3rdParty/efsw/Utf.hpp | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/src/3rdParty/efsw/Utf.hpp b/src/3rdParty/efsw/Utf.hpp new file mode 100755 index 0000000..6e9ea71 --- /dev/null +++ b/src/3rdParty/efsw/Utf.hpp | |||
@@ -0,0 +1,721 @@ | |||
1 | /** NOTE: | ||
2 | * This code is based on the Utf implementation from SFML2. License zlib/png ( | ||
3 | *http://www.sfml-dev.org/license.php ) The class was modified to fit efsw own needs. This is not | ||
4 | *the original implementation from SFML2. | ||
5 | * */ | ||
6 | |||
7 | #ifndef EFSW_UTF_HPP | ||
8 | #define EFSW_UTF_HPP | ||
9 | |||
10 | //////////////////////////////////////////////////////////// | ||
11 | // Headers | ||
12 | //////////////////////////////////////////////////////////// | ||
13 | #include <cstdlib> | ||
14 | #include <efsw/base.hpp> | ||
15 | #include <locale> | ||
16 | #include <string> | ||
17 | |||
18 | namespace efsw { | ||
19 | |||
20 | template <unsigned int N> class Utf; | ||
21 | |||
22 | //////////////////////////////////////////////////////////// | ||
23 | /// \brief Specialization of the Utf template for UTF-8 | ||
24 | /// | ||
25 | //////////////////////////////////////////////////////////// | ||
26 | template <> class Utf<8> { | ||
27 | public: | ||
28 | //////////////////////////////////////////////////////////// | ||
29 | /// \brief Decode a single UTF-8 character | ||
30 | /// | ||
31 | /// Decoding a character means finding its unique 32-bits | ||
32 | /// code (called the codepoint) in the Unicode standard. | ||
33 | /// | ||
34 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
35 | /// \param end Iterator pointing to the end of the input sequence | ||
36 | /// \param output Codepoint of the decoded UTF-8 character | ||
37 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | ||
38 | /// | ||
39 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
40 | /// | ||
41 | //////////////////////////////////////////////////////////// | ||
42 | template <typename In> | ||
43 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | ||
44 | |||
45 | //////////////////////////////////////////////////////////// | ||
46 | /// \brief Encode a single UTF-8 character | ||
47 | /// | ||
48 | /// Encoding a character means converting a unique 32-bits | ||
49 | /// code (called the codepoint) in the target encoding, UTF-8. | ||
50 | /// | ||
51 | /// \param input Codepoint to encode as UTF-8 | ||
52 | /// \param output Iterator pointing to the beginning of the output sequence | ||
53 | /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them) | ||
54 | /// | ||
55 | /// \return Iterator to the end of the output sequence which has been written | ||
56 | /// | ||
57 | //////////////////////////////////////////////////////////// | ||
58 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint8 replacement = 0 ); | ||
59 | |||
60 | //////////////////////////////////////////////////////////// | ||
61 | /// \brief Advance to the next UTF-8 character | ||
62 | /// | ||
63 | /// This function is necessary for multi-elements encodings, as | ||
64 | /// a single character may use more than 1 storage element. | ||
65 | /// | ||
66 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
67 | /// \param end Iterator pointing to the end of the input sequence | ||
68 | /// | ||
69 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
70 | /// | ||
71 | //////////////////////////////////////////////////////////// | ||
72 | template <typename In> static In Next( In begin, In end ); | ||
73 | |||
74 | //////////////////////////////////////////////////////////// | ||
75 | /// \brief Count the number of characters of a UTF-8 sequence | ||
76 | /// | ||
77 | /// This function is necessary for multi-elements encodings, as | ||
78 | /// a single character may use more than 1 storage element, thus the | ||
79 | /// total size can be different from (begin - end). | ||
80 | /// | ||
81 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
82 | /// \param end Iterator pointing to the end of the input sequence | ||
83 | /// | ||
84 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
85 | /// | ||
86 | //////////////////////////////////////////////////////////// | ||
87 | template <typename In> static std::size_t Count( In begin, In end ); | ||
88 | |||
89 | //////////////////////////////////////////////////////////// | ||
90 | /// \brief Convert an ANSI characters range to UTF-8 | ||
91 | /// | ||
92 | /// The current global locale will be used by default, unless you | ||
93 | /// pass a custom one in the \a locale parameter. | ||
94 | /// | ||
95 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
96 | /// \param end Iterator pointing to the end of the input sequence | ||
97 | /// \param output Iterator pointing to the beginning of the output sequence | ||
98 | /// \param locale Locale to use for conversion | ||
99 | /// | ||
100 | /// \return Iterator to the end of the output sequence which has been written | ||
101 | /// | ||
102 | //////////////////////////////////////////////////////////// | ||
103 | template <typename In, typename Out> | ||
104 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | ||
105 | |||
106 | //////////////////////////////////////////////////////////// | ||
107 | /// \brief Convert a wide characters range to UTF-8 | ||
108 | /// | ||
109 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
110 | /// \param end Iterator pointing to the end of the input sequence | ||
111 | /// \param output Iterator pointing to the beginning of the output sequence | ||
112 | /// | ||
113 | /// \return Iterator to the end of the output sequence which has been written | ||
114 | /// | ||
115 | //////////////////////////////////////////////////////////// | ||
116 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | ||
117 | |||
118 | //////////////////////////////////////////////////////////// | ||
119 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8 | ||
120 | /// | ||
121 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
122 | /// \param end Iterator pointing to the end of the input sequence | ||
123 | /// \param output Iterator pointing to the beginning of the output sequence | ||
124 | /// \param locale Locale to use for conversion | ||
125 | /// | ||
126 | /// \return Iterator to the end of the output sequence which has been written | ||
127 | /// | ||
128 | //////////////////////////////////////////////////////////// | ||
129 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | ||
130 | |||
131 | //////////////////////////////////////////////////////////// | ||
132 | /// \brief Convert an UTF-8 characters range to ANSI characters | ||
133 | /// | ||
134 | /// The current global locale will be used by default, unless you | ||
135 | /// pass a custom one in the \a locale parameter. | ||
136 | /// | ||
137 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
138 | /// \param end Iterator pointing to the end of the input sequence | ||
139 | /// \param output Iterator pointing to the beginning of the output sequence | ||
140 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | ||
141 | /// \param locale Locale to use for conversion | ||
142 | /// | ||
143 | /// \return Iterator to the end of the output sequence which has been written | ||
144 | /// | ||
145 | //////////////////////////////////////////////////////////// | ||
146 | template <typename In, typename Out> | ||
147 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | ||
148 | const std::locale& locale = std::locale() ); | ||
149 | |||
150 | #ifndef EFSW_NO_WIDECHAR | ||
151 | //////////////////////////////////////////////////////////// | ||
152 | /// \brief Convert an UTF-8 characters range to wide characters | ||
153 | /// | ||
154 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
155 | /// \param end Iterator pointing to the end of the input sequence | ||
156 | /// \param output Iterator pointing to the beginning of the output sequence | ||
157 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
158 | /// | ||
159 | /// \return Iterator to the end of the output sequence which has been written | ||
160 | /// | ||
161 | //////////////////////////////////////////////////////////// | ||
162 | template <typename In, typename Out> | ||
163 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | ||
164 | #endif | ||
165 | |||
166 | //////////////////////////////////////////////////////////// | ||
167 | /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters | ||
168 | /// | ||
169 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
170 | /// \param end Iterator pointing to the end of the input sequence | ||
171 | /// \param output Iterator pointing to the beginning of the output sequence | ||
172 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
173 | /// | ||
174 | /// \return Iterator to the end of the output sequence which has been written | ||
175 | /// | ||
176 | //////////////////////////////////////////////////////////// | ||
177 | template <typename In, typename Out> | ||
178 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | ||
179 | |||
180 | //////////////////////////////////////////////////////////// | ||
181 | /// \brief Convert a UTF-8 characters range to UTF-8 | ||
182 | /// | ||
183 | /// This functions does nothing more than a direct copy; | ||
184 | /// it is defined only to provide the same interface as other | ||
185 | /// specializations of the efsw::Utf<> template, and allow | ||
186 | /// generic code to be written on top of it. | ||
187 | /// | ||
188 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
189 | /// \param end Iterator pointing to the end of the input sequence | ||
190 | /// \param output Iterator pointing to the beginning of the output sequence | ||
191 | /// | ||
192 | /// \return Iterator to the end of the output sequence which has been written | ||
193 | /// | ||
194 | //////////////////////////////////////////////////////////// | ||
195 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | ||
196 | |||
197 | //////////////////////////////////////////////////////////// | ||
198 | /// \brief Convert a UTF-8 characters range to UTF-16 | ||
199 | /// | ||
200 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
201 | /// \param end Iterator pointing to the end of the input sequence | ||
202 | /// \param output Iterator pointing to the beginning of the output sequence | ||
203 | /// | ||
204 | /// \return Iterator to the end of the output sequence which has been written | ||
205 | /// | ||
206 | //////////////////////////////////////////////////////////// | ||
207 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | ||
208 | |||
209 | //////////////////////////////////////////////////////////// | ||
210 | /// \brief Convert a UTF-8 characters range to UTF-32 | ||
211 | /// | ||
212 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
213 | /// \param end Iterator pointing to the end of the input sequence | ||
214 | /// \param output Iterator pointing to the beginning of the output sequence | ||
215 | /// | ||
216 | /// \return Iterator to the end of the output sequence which has been written | ||
217 | /// | ||
218 | //////////////////////////////////////////////////////////// | ||
219 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | ||
220 | }; | ||
221 | |||
222 | //////////////////////////////////////////////////////////// | ||
223 | /// \brief Specialization of the Utf template for UTF-16 | ||
224 | /// | ||
225 | //////////////////////////////////////////////////////////// | ||
226 | template <> class Utf<16> { | ||
227 | public: | ||
228 | //////////////////////////////////////////////////////////// | ||
229 | /// \brief Decode a single UTF-16 character | ||
230 | /// | ||
231 | /// Decoding a character means finding its unique 32-bits | ||
232 | /// code (called the codepoint) in the Unicode standard. | ||
233 | /// | ||
234 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
235 | /// \param end Iterator pointing to the end of the input sequence | ||
236 | /// \param output Codepoint of the decoded UTF-16 character | ||
237 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | ||
238 | /// | ||
239 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
240 | /// | ||
241 | //////////////////////////////////////////////////////////// | ||
242 | template <typename In> | ||
243 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | ||
244 | |||
245 | //////////////////////////////////////////////////////////// | ||
246 | /// \brief Encode a single UTF-16 character | ||
247 | /// | ||
248 | /// Encoding a character means converting a unique 32-bits | ||
249 | /// code (called the codepoint) in the target encoding, UTF-16. | ||
250 | /// | ||
251 | /// \param input Codepoint to encode as UTF-16 | ||
252 | /// \param output Iterator pointing to the beginning of the output sequence | ||
253 | /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them) | ||
254 | /// | ||
255 | /// \return Iterator to the end of the output sequence which has been written | ||
256 | /// | ||
257 | //////////////////////////////////////////////////////////// | ||
258 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint16 replacement = 0 ); | ||
259 | |||
260 | //////////////////////////////////////////////////////////// | ||
261 | /// \brief Advance to the next UTF-16 character | ||
262 | /// | ||
263 | /// This function is necessary for multi-elements encodings, as | ||
264 | /// a single character may use more than 1 storage element. | ||
265 | /// | ||
266 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
267 | /// \param end Iterator pointing to the end of the input sequence | ||
268 | /// | ||
269 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
270 | /// | ||
271 | //////////////////////////////////////////////////////////// | ||
272 | template <typename In> static In Next( In begin, In end ); | ||
273 | |||
274 | //////////////////////////////////////////////////////////// | ||
275 | /// \brief Count the number of characters of a UTF-16 sequence | ||
276 | /// | ||
277 | /// This function is necessary for multi-elements encodings, as | ||
278 | /// a single character may use more than 1 storage element, thus the | ||
279 | /// total size can be different from (begin - end). | ||
280 | /// | ||
281 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
282 | /// \param end Iterator pointing to the end of the input sequence | ||
283 | /// | ||
284 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
285 | /// | ||
286 | //////////////////////////////////////////////////////////// | ||
287 | template <typename In> static std::size_t Count( In begin, In end ); | ||
288 | |||
289 | //////////////////////////////////////////////////////////// | ||
290 | /// \brief Convert an ANSI characters range to UTF-16 | ||
291 | /// | ||
292 | /// The current global locale will be used by default, unless you | ||
293 | /// pass a custom one in the \a locale parameter. | ||
294 | /// | ||
295 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
296 | /// \param end Iterator pointing to the end of the input sequence | ||
297 | /// \param output Iterator pointing to the beginning of the output sequence | ||
298 | /// \param locale Locale to use for conversion | ||
299 | /// | ||
300 | /// \return Iterator to the end of the output sequence which has been written | ||
301 | /// | ||
302 | //////////////////////////////////////////////////////////// | ||
303 | template <typename In, typename Out> | ||
304 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | ||
305 | |||
306 | //////////////////////////////////////////////////////////// | ||
307 | /// \brief Convert a wide characters range to UTF-16 | ||
308 | /// | ||
309 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
310 | /// \param end Iterator pointing to the end of the input sequence | ||
311 | /// \param output Iterator pointing to the beginning of the output sequence | ||
312 | /// | ||
313 | /// \return Iterator to the end of the output sequence which has been written | ||
314 | /// | ||
315 | //////////////////////////////////////////////////////////// | ||
316 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | ||
317 | |||
318 | //////////////////////////////////////////////////////////// | ||
319 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16 | ||
320 | /// | ||
321 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
322 | /// \param end Iterator pointing to the end of the input sequence | ||
323 | /// \param output Iterator pointing to the beginning of the output sequence | ||
324 | /// \param locale Locale to use for conversion | ||
325 | /// | ||
326 | /// \return Iterator to the end of the output sequence which has been written | ||
327 | /// | ||
328 | //////////////////////////////////////////////////////////// | ||
329 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | ||
330 | |||
331 | //////////////////////////////////////////////////////////// | ||
332 | /// \brief Convert an UTF-16 characters range to ANSI characters | ||
333 | /// | ||
334 | /// The current global locale will be used by default, unless you | ||
335 | /// pass a custom one in the \a locale parameter. | ||
336 | /// | ||
337 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
338 | /// \param end Iterator pointing to the end of the input sequence | ||
339 | /// \param output Iterator pointing to the beginning of the output sequence | ||
340 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | ||
341 | /// \param locale Locale to use for conversion | ||
342 | /// | ||
343 | /// \return Iterator to the end of the output sequence which has been written | ||
344 | /// | ||
345 | //////////////////////////////////////////////////////////// | ||
346 | template <typename In, typename Out> | ||
347 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | ||
348 | const std::locale& locale = std::locale() ); | ||
349 | |||
350 | #ifndef EFSW_NO_WIDECHAR | ||
351 | //////////////////////////////////////////////////////////// | ||
352 | /// \brief Convert an UTF-16 characters range to wide characters | ||
353 | /// | ||
354 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
355 | /// \param end Iterator pointing to the end of the input sequence | ||
356 | /// \param output Iterator pointing to the beginning of the output sequence | ||
357 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
358 | /// | ||
359 | /// \return Iterator to the end of the output sequence which has been written | ||
360 | /// | ||
361 | //////////////////////////////////////////////////////////// | ||
362 | template <typename In, typename Out> | ||
363 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | ||
364 | #endif | ||
365 | |||
366 | //////////////////////////////////////////////////////////// | ||
367 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters | ||
368 | /// | ||
369 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
370 | /// \param end Iterator pointing to the end of the input sequence | ||
371 | /// \param output Iterator pointing to the beginning of the output sequence | ||
372 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
373 | /// | ||
374 | /// \return Iterator to the end of the output sequence which has been written | ||
375 | /// | ||
376 | //////////////////////////////////////////////////////////// | ||
377 | template <typename In, typename Out> | ||
378 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | ||
379 | |||
380 | //////////////////////////////////////////////////////////// | ||
381 | /// \brief Convert a UTF-16 characters range to UTF-8 | ||
382 | /// | ||
383 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
384 | /// \param end Iterator pointing to the end of the input sequence | ||
385 | /// \param output Iterator pointing to the beginning of the output sequence | ||
386 | /// | ||
387 | /// \return Iterator to the end of the output sequence which has been written | ||
388 | /// | ||
389 | //////////////////////////////////////////////////////////// | ||
390 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | ||
391 | |||
392 | //////////////////////////////////////////////////////////// | ||
393 | /// \brief Convert a UTF-16 characters range to UTF-16 | ||
394 | /// | ||
395 | /// This functions does nothing more than a direct copy; | ||
396 | /// it is defined only to provide the same interface as other | ||
397 | /// specializations of the efsw::Utf<> template, and allow | ||
398 | /// generic code to be written on top of it. | ||
399 | /// | ||
400 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
401 | /// \param end Iterator pointing to the end of the input sequence | ||
402 | /// \param output Iterator pointing to the beginning of the output sequence | ||
403 | /// | ||
404 | /// \return Iterator to the end of the output sequence which has been written | ||
405 | /// | ||
406 | //////////////////////////////////////////////////////////// | ||
407 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | ||
408 | |||
409 | //////////////////////////////////////////////////////////// | ||
410 | /// \brief Convert a UTF-16 characters range to UTF-32 | ||
411 | /// | ||
412 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
413 | /// \param end Iterator pointing to the end of the input sequence | ||
414 | /// \param output Iterator pointing to the beginning of the output sequence | ||
415 | /// | ||
416 | /// \return Iterator to the end of the output sequence which has been written | ||
417 | /// | ||
418 | //////////////////////////////////////////////////////////// | ||
419 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | ||
420 | }; | ||
421 | |||
422 | //////////////////////////////////////////////////////////// | ||
423 | /// \brief Specialization of the Utf template for UTF-32 | ||
424 | /// | ||
425 | //////////////////////////////////////////////////////////// | ||
426 | template <> class Utf<32> { | ||
427 | public: | ||
428 | //////////////////////////////////////////////////////////// | ||
429 | /// \brief Decode a single UTF-32 character | ||
430 | /// | ||
431 | /// Decoding a character means finding its unique 32-bits | ||
432 | /// code (called the codepoint) in the Unicode standard. | ||
433 | /// For UTF-32, the character value is the same as the codepoint. | ||
434 | /// | ||
435 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
436 | /// \param end Iterator pointing to the end of the input sequence | ||
437 | /// \param output Codepoint of the decoded UTF-32 character | ||
438 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | ||
439 | /// | ||
440 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
441 | /// | ||
442 | //////////////////////////////////////////////////////////// | ||
443 | template <typename In> | ||
444 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | ||
445 | |||
446 | //////////////////////////////////////////////////////////// | ||
447 | /// \brief Encode a single UTF-32 character | ||
448 | /// | ||
449 | /// Encoding a character means converting a unique 32-bits | ||
450 | /// code (called the codepoint) in the target encoding, UTF-32. | ||
451 | /// For UTF-32, the codepoint is the same as the character value. | ||
452 | /// | ||
453 | /// \param input Codepoint to encode as UTF-32 | ||
454 | /// \param output Iterator pointing to the beginning of the output sequence | ||
455 | /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them) | ||
456 | /// | ||
457 | /// \return Iterator to the end of the output sequence which has been written | ||
458 | /// | ||
459 | //////////////////////////////////////////////////////////// | ||
460 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint32 replacement = 0 ); | ||
461 | |||
462 | //////////////////////////////////////////////////////////// | ||
463 | /// \brief Advance to the next UTF-32 character | ||
464 | /// | ||
465 | /// This function is trivial for UTF-32, which can store | ||
466 | /// every character in a single storage element. | ||
467 | /// | ||
468 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
469 | /// \param end Iterator pointing to the end of the input sequence | ||
470 | /// | ||
471 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
472 | /// | ||
473 | //////////////////////////////////////////////////////////// | ||
474 | template <typename In> static In Next( In begin, In end ); | ||
475 | |||
476 | //////////////////////////////////////////////////////////// | ||
477 | /// \brief Count the number of characters of a UTF-32 sequence | ||
478 | /// | ||
479 | /// This function is trivial for UTF-32, which can store | ||
480 | /// every character in a single storage element. | ||
481 | /// | ||
482 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
483 | /// \param end Iterator pointing to the end of the input sequence | ||
484 | /// | ||
485 | /// \return Iterator pointing to one past the last read element of the input sequence | ||
486 | /// | ||
487 | //////////////////////////////////////////////////////////// | ||
488 | template <typename In> static std::size_t Count( In begin, In end ); | ||
489 | |||
490 | //////////////////////////////////////////////////////////// | ||
491 | /// \brief Convert an ANSI characters range to UTF-32 | ||
492 | /// | ||
493 | /// The current global locale will be used by default, unless you | ||
494 | /// pass a custom one in the \a locale parameter. | ||
495 | /// | ||
496 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
497 | /// \param end Iterator pointing to the end of the input sequence | ||
498 | /// \param output Iterator pointing to the beginning of the output sequence | ||
499 | /// \param locale Locale to use for conversion | ||
500 | /// | ||
501 | /// \return Iterator to the end of the output sequence which has been written | ||
502 | /// | ||
503 | //////////////////////////////////////////////////////////// | ||
504 | template <typename In, typename Out> | ||
505 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | ||
506 | |||
507 | //////////////////////////////////////////////////////////// | ||
508 | /// \brief Convert a wide characters range to UTF-32 | ||
509 | /// | ||
510 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
511 | /// \param end Iterator pointing to the end of the input sequence | ||
512 | /// \param output Iterator pointing to the beginning of the output sequence | ||
513 | /// | ||
514 | /// \return Iterator to the end of the output sequence which has been written | ||
515 | /// | ||
516 | //////////////////////////////////////////////////////////// | ||
517 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | ||
518 | |||
519 | //////////////////////////////////////////////////////////// | ||
520 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32 | ||
521 | /// | ||
522 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
523 | /// \param end Iterator pointing to the end of the input sequence | ||
524 | /// \param output Iterator pointing to the beginning of the output sequence | ||
525 | /// \param locale Locale to use for conversion | ||
526 | /// | ||
527 | /// \return Iterator to the end of the output sequence which has been written | ||
528 | /// | ||
529 | //////////////////////////////////////////////////////////// | ||
530 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | ||
531 | |||
532 | //////////////////////////////////////////////////////////// | ||
533 | /// \brief Convert an UTF-32 characters range to ANSI characters | ||
534 | /// | ||
535 | /// The current global locale will be used by default, unless you | ||
536 | /// pass a custom one in the \a locale parameter. | ||
537 | /// | ||
538 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
539 | /// \param end Iterator pointing to the end of the input sequence | ||
540 | /// \param output Iterator pointing to the beginning of the output sequence | ||
541 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | ||
542 | /// \param locale Locale to use for conversion | ||
543 | /// | ||
544 | /// \return Iterator to the end of the output sequence which has been written | ||
545 | /// | ||
546 | //////////////////////////////////////////////////////////// | ||
547 | template <typename In, typename Out> | ||
548 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | ||
549 | const std::locale& locale = std::locale() ); | ||
550 | |||
551 | #ifndef EFSW_NO_WIDECHAR | ||
552 | //////////////////////////////////////////////////////////// | ||
553 | /// \brief Convert an UTF-32 characters range to wide characters | ||
554 | /// | ||
555 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
556 | /// \param end Iterator pointing to the end of the input sequence | ||
557 | /// \param output Iterator pointing to the beginning of the output sequence | ||
558 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
559 | /// | ||
560 | /// \return Iterator to the end of the output sequence which has been written | ||
561 | /// | ||
562 | //////////////////////////////////////////////////////////// | ||
563 | template <typename In, typename Out> | ||
564 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | ||
565 | #endif | ||
566 | |||
567 | //////////////////////////////////////////////////////////// | ||
568 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters | ||
569 | /// | ||
570 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
571 | /// \param end Iterator pointing to the end of the input sequence | ||
572 | /// \param output Iterator pointing to the beginning of the output sequence | ||
573 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | ||
574 | /// | ||
575 | /// \return Iterator to the end of the output sequence which has been written | ||
576 | /// | ||
577 | //////////////////////////////////////////////////////////// | ||
578 | template <typename In, typename Out> | ||
579 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | ||
580 | |||
581 | //////////////////////////////////////////////////////////// | ||
582 | /// \brief Convert a UTF-32 characters range to UTF-8 | ||
583 | /// | ||
584 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
585 | /// \param end Iterator pointing to the end of the input sequence | ||
586 | /// \param output Iterator pointing to the beginning of the output sequence | ||
587 | /// | ||
588 | /// \return Iterator to the end of the output sequence which has been written | ||
589 | /// | ||
590 | //////////////////////////////////////////////////////////// | ||
591 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | ||
592 | |||
593 | //////////////////////////////////////////////////////////// | ||
594 | /// \brief Convert a UTF-32 characters range to UTF-16 | ||
595 | /// | ||
596 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
597 | /// \param end Iterator pointing to the end of the input sequence | ||
598 | /// \param output Iterator pointing to the beginning of the output sequence | ||
599 | /// | ||
600 | /// \return Iterator to the end of the output sequence which has been written | ||
601 | /// | ||
602 | //////////////////////////////////////////////////////////// | ||
603 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | ||
604 | |||
605 | //////////////////////////////////////////////////////////// | ||
606 | /// \brief Convert a UTF-32 characters range to UTF-32 | ||
607 | /// | ||
608 | /// This functions does nothing more than a direct copy; | ||
609 | /// it is defined only to provide the same interface as other | ||
610 | /// specializations of the efsw::Utf<> template, and allow | ||
611 | /// generic code to be written on top of it. | ||
612 | /// | ||
613 | /// \param begin Iterator pointing to the beginning of the input sequence | ||
614 | /// \param end Iterator pointing to the end of the input sequence | ||
615 | /// \param output Iterator pointing to the beginning of the output sequence | ||
616 | /// | ||
617 | /// \return Iterator to the end of the output sequence which has been written | ||
618 | /// | ||
619 | //////////////////////////////////////////////////////////// | ||
620 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | ||
621 | |||
622 | //////////////////////////////////////////////////////////// | ||
623 | /// \brief Decode a single ANSI character to UTF-32 | ||
624 | /// | ||
625 | /// This function does not exist in other specializations | ||
626 | /// of efsw::Utf<>, it is defined for convenience (it is used by | ||
627 | /// several other conversion functions). | ||
628 | /// | ||
629 | /// \param input Input ANSI character | ||
630 | /// \param locale Locale to use for conversion | ||
631 | /// | ||
632 | /// \return Converted character | ||
633 | /// | ||
634 | //////////////////////////////////////////////////////////// | ||
635 | template <typename In> | ||
636 | static Uint32 DecodeAnsi( In input, const std::locale& locale = std::locale() ); | ||
637 | |||
638 | //////////////////////////////////////////////////////////// | ||
639 | /// \brief Decode a single wide character to UTF-32 | ||
640 | /// | ||
641 | /// This function does not exist in other specializations | ||
642 | /// of efsw::Utf<>, it is defined for convenience (it is used by | ||
643 | /// several other conversion functions). | ||
644 | /// | ||
645 | /// \param input Input wide character | ||
646 | /// | ||
647 | /// \return Converted character | ||
648 | /// | ||
649 | //////////////////////////////////////////////////////////// | ||
650 | template <typename In> static Uint32 DecodeWide( In input ); | ||
651 | |||
652 | //////////////////////////////////////////////////////////// | ||
653 | /// \brief Encode a single UTF-32 character to ANSI | ||
654 | /// | ||
655 | /// This function does not exist in other specializations | ||
656 | /// of efsw::Utf<>, it is defined for convenience (it is used by | ||
657 | /// several other conversion functions). | ||
658 | /// | ||
659 | /// \param codepoint Iterator pointing to the beginning of the input sequence | ||
660 | /// \param output Iterator pointing to the beginning of the output sequence | ||
661 | /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to | ||
662 | /// skip it) \param locale Locale to use for conversion | ||
663 | /// | ||
664 | /// \return Iterator to the end of the output sequence which has been written | ||
665 | /// | ||
666 | //////////////////////////////////////////////////////////// | ||
667 | template <typename Out> | ||
668 | static Out EncodeAnsi( Uint32 codepoint, Out output, char replacement = 0, | ||
669 | const std::locale& locale = std::locale() ); | ||
670 | |||
671 | #ifndef EFSW_NO_WIDECHAR | ||
672 | //////////////////////////////////////////////////////////// | ||
673 | /// \brief Encode a single UTF-32 character to wide | ||
674 | /// | ||
675 | /// This function does not exist in other specializations | ||
676 | /// of efsw::Utf<>, it is defined for convenience (it is used by | ||
677 | /// several other conversion functions). | ||
678 | /// | ||
679 | /// \param codepoint Iterator pointing to the beginning of the input sequence | ||
680 | /// \param output Iterator pointing to the beginning of the output sequence | ||
681 | /// \param replacement Replacement if the input character is not convertible to wide (use 0 to | ||
682 | /// skip it) | ||
683 | /// | ||
684 | /// \return Iterator to the end of the output sequence which has been written | ||
685 | /// | ||
686 | //////////////////////////////////////////////////////////// | ||
687 | template <typename Out> | ||
688 | static Out EncodeWide( Uint32 codepoint, Out output, wchar_t replacement = 0 ); | ||
689 | #endif | ||
690 | }; | ||
691 | |||
692 | #include "Utf.inl" | ||
693 | |||
694 | // Make typedefs to get rid of the template syntax | ||
695 | typedef Utf<8> Utf8; | ||
696 | typedef Utf<16> Utf16; | ||
697 | typedef Utf<32> Utf32; | ||
698 | |||
699 | } // namespace efsw | ||
700 | #endif | ||
701 | |||
702 | //////////////////////////////////////////////////////////// | ||
703 | /// \class efsw::Utf | ||
704 | /// \ingroup system | ||
705 | /// | ||
706 | /// Utility class providing generic functions for UTF conversions. | ||
707 | /// | ||
708 | /// efsw::Utf is a low-level, generic interface for counting, iterating, | ||
709 | /// encoding and decoding Unicode characters and strings. It is able | ||
710 | /// to handle ANSI, wide, UTF-8, UTF-16 and UTF-32 encodings. | ||
711 | /// | ||
712 | /// efsw::Utf<X> functions are all static, these classes are not meant to | ||
713 | /// be instanciated. All the functions are template, so that you | ||
714 | /// can use any character / string type for a given encoding. | ||
715 | /// | ||
716 | /// It has 3 specializations: | ||
717 | /// \li efsw::Utf<8> (typedef'd to efsw::Utf8) | ||
718 | /// \li efsw::Utf<16> (typedef'd to efsw::Utf16) | ||
719 | /// \li efsw::Utf<32> (typedef'd to efsw::Utf32) | ||
720 | /// | ||
721 | //////////////////////////////////////////////////////////// | ||