aboutsummaryrefslogtreecommitdiff
path: root/src/3rdParty/efsw/Utf.hpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2022-11-15 17:23:46 +0800
committerLi Jin <dragon-fly@qq.com>2022-11-15 17:52:09 +0800
commit94f8330613877b3582d32bd11abd83a97b4399ad (patch)
tree5359de314be1ebde17f8d1e48632a97d18f9e50f /src/3rdParty/efsw/Utf.hpp
parent60f8f00a022ac08701792b2897b72d8c99b50f52 (diff)
downloadyuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.gz
yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.bz2
yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.zip
adding -w option to Yuescript tool.
Diffstat (limited to 'src/3rdParty/efsw/Utf.hpp')
-rwxr-xr-xsrc/3rdParty/efsw/Utf.hpp721
1 files changed, 721 insertions, 0 deletions
diff --git a/src/3rdParty/efsw/Utf.hpp b/src/3rdParty/efsw/Utf.hpp
new file mode 100755
index 0000000..6e9ea71
--- /dev/null
+++ b/src/3rdParty/efsw/Utf.hpp
@@ -0,0 +1,721 @@
1/** NOTE:
2 * This code is based on the Utf implementation from SFML2. License zlib/png (
3 *http://www.sfml-dev.org/license.php ) The class was modified to fit efsw own needs. This is not
4 *the original implementation from SFML2.
5 * */
6
7#ifndef EFSW_UTF_HPP
8#define EFSW_UTF_HPP
9
10////////////////////////////////////////////////////////////
11// Headers
12////////////////////////////////////////////////////////////
13#include <cstdlib>
14#include <efsw/base.hpp>
15#include <locale>
16#include <string>
17
18namespace efsw {
19
20template <unsigned int N> class Utf;
21
22////////////////////////////////////////////////////////////
23/// \brief Specialization of the Utf template for UTF-8
24///
25////////////////////////////////////////////////////////////
26template <> class Utf<8> {
27 public:
28 ////////////////////////////////////////////////////////////
29 /// \brief Decode a single UTF-8 character
30 ///
31 /// Decoding a character means finding its unique 32-bits
32 /// code (called the codepoint) in the Unicode standard.
33 ///
34 /// \param begin Iterator pointing to the beginning of the input sequence
35 /// \param end Iterator pointing to the end of the input sequence
36 /// \param output Codepoint of the decoded UTF-8 character
37 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
38 ///
39 /// \return Iterator pointing to one past the last read element of the input sequence
40 ///
41 ////////////////////////////////////////////////////////////
42 template <typename In>
43 static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 );
44
45 ////////////////////////////////////////////////////////////
46 /// \brief Encode a single UTF-8 character
47 ///
48 /// Encoding a character means converting a unique 32-bits
49 /// code (called the codepoint) in the target encoding, UTF-8.
50 ///
51 /// \param input Codepoint to encode as UTF-8
52 /// \param output Iterator pointing to the beginning of the output sequence
53 /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them)
54 ///
55 /// \return Iterator to the end of the output sequence which has been written
56 ///
57 ////////////////////////////////////////////////////////////
58 template <typename Out> static Out Encode( Uint32 input, Out output, Uint8 replacement = 0 );
59
60 ////////////////////////////////////////////////////////////
61 /// \brief Advance to the next UTF-8 character
62 ///
63 /// This function is necessary for multi-elements encodings, as
64 /// a single character may use more than 1 storage element.
65 ///
66 /// \param begin Iterator pointing to the beginning of the input sequence
67 /// \param end Iterator pointing to the end of the input sequence
68 ///
69 /// \return Iterator pointing to one past the last read element of the input sequence
70 ///
71 ////////////////////////////////////////////////////////////
72 template <typename In> static In Next( In begin, In end );
73
74 ////////////////////////////////////////////////////////////
75 /// \brief Count the number of characters of a UTF-8 sequence
76 ///
77 /// This function is necessary for multi-elements encodings, as
78 /// a single character may use more than 1 storage element, thus the
79 /// total size can be different from (begin - end).
80 ///
81 /// \param begin Iterator pointing to the beginning of the input sequence
82 /// \param end Iterator pointing to the end of the input sequence
83 ///
84 /// \return Iterator pointing to one past the last read element of the input sequence
85 ///
86 ////////////////////////////////////////////////////////////
87 template <typename In> static std::size_t Count( In begin, In end );
88
89 ////////////////////////////////////////////////////////////
90 /// \brief Convert an ANSI characters range to UTF-8
91 ///
92 /// The current global locale will be used by default, unless you
93 /// pass a custom one in the \a locale parameter.
94 ///
95 /// \param begin Iterator pointing to the beginning of the input sequence
96 /// \param end Iterator pointing to the end of the input sequence
97 /// \param output Iterator pointing to the beginning of the output sequence
98 /// \param locale Locale to use for conversion
99 ///
100 /// \return Iterator to the end of the output sequence which has been written
101 ///
102 ////////////////////////////////////////////////////////////
103 template <typename In, typename Out>
104 static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() );
105
106 ////////////////////////////////////////////////////////////
107 /// \brief Convert a wide characters range to UTF-8
108 ///
109 /// \param begin Iterator pointing to the beginning of the input sequence
110 /// \param end Iterator pointing to the end of the input sequence
111 /// \param output Iterator pointing to the beginning of the output sequence
112 ///
113 /// \return Iterator to the end of the output sequence which has been written
114 ///
115 ////////////////////////////////////////////////////////////
116 template <typename In, typename Out> static Out FromWide( In begin, In end, Out output );
117
118 ////////////////////////////////////////////////////////////
119 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8
120 ///
121 /// \param begin Iterator pointing to the beginning of the input sequence
122 /// \param end Iterator pointing to the end of the input sequence
123 /// \param output Iterator pointing to the beginning of the output sequence
124 /// \param locale Locale to use for conversion
125 ///
126 /// \return Iterator to the end of the output sequence which has been written
127 ///
128 ////////////////////////////////////////////////////////////
129 template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output );
130
131 ////////////////////////////////////////////////////////////
132 /// \brief Convert an UTF-8 characters range to ANSI characters
133 ///
134 /// The current global locale will be used by default, unless you
135 /// pass a custom one in the \a locale parameter.
136 ///
137 /// \param begin Iterator pointing to the beginning of the input sequence
138 /// \param end Iterator pointing to the end of the input sequence
139 /// \param output Iterator pointing to the beginning of the output sequence
140 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
141 /// \param locale Locale to use for conversion
142 ///
143 /// \return Iterator to the end of the output sequence which has been written
144 ///
145 ////////////////////////////////////////////////////////////
146 template <typename In, typename Out>
147 static Out ToAnsi( In begin, In end, Out output, char replacement = 0,
148 const std::locale& locale = std::locale() );
149
150#ifndef EFSW_NO_WIDECHAR
151 ////////////////////////////////////////////////////////////
152 /// \brief Convert an UTF-8 characters range to wide characters
153 ///
154 /// \param begin Iterator pointing to the beginning of the input sequence
155 /// \param end Iterator pointing to the end of the input sequence
156 /// \param output Iterator pointing to the beginning of the output sequence
157 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
158 ///
159 /// \return Iterator to the end of the output sequence which has been written
160 ///
161 ////////////////////////////////////////////////////////////
162 template <typename In, typename Out>
163 static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 );
164#endif
165
166 ////////////////////////////////////////////////////////////
167 /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters
168 ///
169 /// \param begin Iterator pointing to the beginning of the input sequence
170 /// \param end Iterator pointing to the end of the input sequence
171 /// \param output Iterator pointing to the beginning of the output sequence
172 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
173 ///
174 /// \return Iterator to the end of the output sequence which has been written
175 ///
176 ////////////////////////////////////////////////////////////
177 template <typename In, typename Out>
178 static Out ToLatin1( In begin, In end, Out output, char replacement = 0 );
179
180 ////////////////////////////////////////////////////////////
181 /// \brief Convert a UTF-8 characters range to UTF-8
182 ///
183 /// This functions does nothing more than a direct copy;
184 /// it is defined only to provide the same interface as other
185 /// specializations of the efsw::Utf<> template, and allow
186 /// generic code to be written on top of it.
187 ///
188 /// \param begin Iterator pointing to the beginning of the input sequence
189 /// \param end Iterator pointing to the end of the input sequence
190 /// \param output Iterator pointing to the beginning of the output sequence
191 ///
192 /// \return Iterator to the end of the output sequence which has been written
193 ///
194 ////////////////////////////////////////////////////////////
195 template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output );
196
197 ////////////////////////////////////////////////////////////
198 /// \brief Convert a UTF-8 characters range to UTF-16
199 ///
200 /// \param begin Iterator pointing to the beginning of the input sequence
201 /// \param end Iterator pointing to the end of the input sequence
202 /// \param output Iterator pointing to the beginning of the output sequence
203 ///
204 /// \return Iterator to the end of the output sequence which has been written
205 ///
206 ////////////////////////////////////////////////////////////
207 template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output );
208
209 ////////////////////////////////////////////////////////////
210 /// \brief Convert a UTF-8 characters range to UTF-32
211 ///
212 /// \param begin Iterator pointing to the beginning of the input sequence
213 /// \param end Iterator pointing to the end of the input sequence
214 /// \param output Iterator pointing to the beginning of the output sequence
215 ///
216 /// \return Iterator to the end of the output sequence which has been written
217 ///
218 ////////////////////////////////////////////////////////////
219 template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output );
220};
221
222////////////////////////////////////////////////////////////
223/// \brief Specialization of the Utf template for UTF-16
224///
225////////////////////////////////////////////////////////////
226template <> class Utf<16> {
227 public:
228 ////////////////////////////////////////////////////////////
229 /// \brief Decode a single UTF-16 character
230 ///
231 /// Decoding a character means finding its unique 32-bits
232 /// code (called the codepoint) in the Unicode standard.
233 ///
234 /// \param begin Iterator pointing to the beginning of the input sequence
235 /// \param end Iterator pointing to the end of the input sequence
236 /// \param output Codepoint of the decoded UTF-16 character
237 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
238 ///
239 /// \return Iterator pointing to one past the last read element of the input sequence
240 ///
241 ////////////////////////////////////////////////////////////
242 template <typename In>
243 static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 );
244
245 ////////////////////////////////////////////////////////////
246 /// \brief Encode a single UTF-16 character
247 ///
248 /// Encoding a character means converting a unique 32-bits
249 /// code (called the codepoint) in the target encoding, UTF-16.
250 ///
251 /// \param input Codepoint to encode as UTF-16
252 /// \param output Iterator pointing to the beginning of the output sequence
253 /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them)
254 ///
255 /// \return Iterator to the end of the output sequence which has been written
256 ///
257 ////////////////////////////////////////////////////////////
258 template <typename Out> static Out Encode( Uint32 input, Out output, Uint16 replacement = 0 );
259
260 ////////////////////////////////////////////////////////////
261 /// \brief Advance to the next UTF-16 character
262 ///
263 /// This function is necessary for multi-elements encodings, as
264 /// a single character may use more than 1 storage element.
265 ///
266 /// \param begin Iterator pointing to the beginning of the input sequence
267 /// \param end Iterator pointing to the end of the input sequence
268 ///
269 /// \return Iterator pointing to one past the last read element of the input sequence
270 ///
271 ////////////////////////////////////////////////////////////
272 template <typename In> static In Next( In begin, In end );
273
274 ////////////////////////////////////////////////////////////
275 /// \brief Count the number of characters of a UTF-16 sequence
276 ///
277 /// This function is necessary for multi-elements encodings, as
278 /// a single character may use more than 1 storage element, thus the
279 /// total size can be different from (begin - end).
280 ///
281 /// \param begin Iterator pointing to the beginning of the input sequence
282 /// \param end Iterator pointing to the end of the input sequence
283 ///
284 /// \return Iterator pointing to one past the last read element of the input sequence
285 ///
286 ////////////////////////////////////////////////////////////
287 template <typename In> static std::size_t Count( In begin, In end );
288
289 ////////////////////////////////////////////////////////////
290 /// \brief Convert an ANSI characters range to UTF-16
291 ///
292 /// The current global locale will be used by default, unless you
293 /// pass a custom one in the \a locale parameter.
294 ///
295 /// \param begin Iterator pointing to the beginning of the input sequence
296 /// \param end Iterator pointing to the end of the input sequence
297 /// \param output Iterator pointing to the beginning of the output sequence
298 /// \param locale Locale to use for conversion
299 ///
300 /// \return Iterator to the end of the output sequence which has been written
301 ///
302 ////////////////////////////////////////////////////////////
303 template <typename In, typename Out>
304 static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() );
305
306 ////////////////////////////////////////////////////////////
307 /// \brief Convert a wide characters range to UTF-16
308 ///
309 /// \param begin Iterator pointing to the beginning of the input sequence
310 /// \param end Iterator pointing to the end of the input sequence
311 /// \param output Iterator pointing to the beginning of the output sequence
312 ///
313 /// \return Iterator to the end of the output sequence which has been written
314 ///
315 ////////////////////////////////////////////////////////////
316 template <typename In, typename Out> static Out FromWide( In begin, In end, Out output );
317
318 ////////////////////////////////////////////////////////////
319 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16
320 ///
321 /// \param begin Iterator pointing to the beginning of the input sequence
322 /// \param end Iterator pointing to the end of the input sequence
323 /// \param output Iterator pointing to the beginning of the output sequence
324 /// \param locale Locale to use for conversion
325 ///
326 /// \return Iterator to the end of the output sequence which has been written
327 ///
328 ////////////////////////////////////////////////////////////
329 template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output );
330
331 ////////////////////////////////////////////////////////////
332 /// \brief Convert an UTF-16 characters range to ANSI characters
333 ///
334 /// The current global locale will be used by default, unless you
335 /// pass a custom one in the \a locale parameter.
336 ///
337 /// \param begin Iterator pointing to the beginning of the input sequence
338 /// \param end Iterator pointing to the end of the input sequence
339 /// \param output Iterator pointing to the beginning of the output sequence
340 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
341 /// \param locale Locale to use for conversion
342 ///
343 /// \return Iterator to the end of the output sequence which has been written
344 ///
345 ////////////////////////////////////////////////////////////
346 template <typename In, typename Out>
347 static Out ToAnsi( In begin, In end, Out output, char replacement = 0,
348 const std::locale& locale = std::locale() );
349
350#ifndef EFSW_NO_WIDECHAR
351 ////////////////////////////////////////////////////////////
352 /// \brief Convert an UTF-16 characters range to wide characters
353 ///
354 /// \param begin Iterator pointing to the beginning of the input sequence
355 /// \param end Iterator pointing to the end of the input sequence
356 /// \param output Iterator pointing to the beginning of the output sequence
357 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
358 ///
359 /// \return Iterator to the end of the output sequence which has been written
360 ///
361 ////////////////////////////////////////////////////////////
362 template <typename In, typename Out>
363 static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 );
364#endif
365
366 ////////////////////////////////////////////////////////////
367 /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters
368 ///
369 /// \param begin Iterator pointing to the beginning of the input sequence
370 /// \param end Iterator pointing to the end of the input sequence
371 /// \param output Iterator pointing to the beginning of the output sequence
372 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
373 ///
374 /// \return Iterator to the end of the output sequence which has been written
375 ///
376 ////////////////////////////////////////////////////////////
377 template <typename In, typename Out>
378 static Out ToLatin1( In begin, In end, Out output, char replacement = 0 );
379
380 ////////////////////////////////////////////////////////////
381 /// \brief Convert a UTF-16 characters range to UTF-8
382 ///
383 /// \param begin Iterator pointing to the beginning of the input sequence
384 /// \param end Iterator pointing to the end of the input sequence
385 /// \param output Iterator pointing to the beginning of the output sequence
386 ///
387 /// \return Iterator to the end of the output sequence which has been written
388 ///
389 ////////////////////////////////////////////////////////////
390 template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output );
391
392 ////////////////////////////////////////////////////////////
393 /// \brief Convert a UTF-16 characters range to UTF-16
394 ///
395 /// This functions does nothing more than a direct copy;
396 /// it is defined only to provide the same interface as other
397 /// specializations of the efsw::Utf<> template, and allow
398 /// generic code to be written on top of it.
399 ///
400 /// \param begin Iterator pointing to the beginning of the input sequence
401 /// \param end Iterator pointing to the end of the input sequence
402 /// \param output Iterator pointing to the beginning of the output sequence
403 ///
404 /// \return Iterator to the end of the output sequence which has been written
405 ///
406 ////////////////////////////////////////////////////////////
407 template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output );
408
409 ////////////////////////////////////////////////////////////
410 /// \brief Convert a UTF-16 characters range to UTF-32
411 ///
412 /// \param begin Iterator pointing to the beginning of the input sequence
413 /// \param end Iterator pointing to the end of the input sequence
414 /// \param output Iterator pointing to the beginning of the output sequence
415 ///
416 /// \return Iterator to the end of the output sequence which has been written
417 ///
418 ////////////////////////////////////////////////////////////
419 template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output );
420};
421
422////////////////////////////////////////////////////////////
423/// \brief Specialization of the Utf template for UTF-32
424///
425////////////////////////////////////////////////////////////
426template <> class Utf<32> {
427 public:
428 ////////////////////////////////////////////////////////////
429 /// \brief Decode a single UTF-32 character
430 ///
431 /// Decoding a character means finding its unique 32-bits
432 /// code (called the codepoint) in the Unicode standard.
433 /// For UTF-32, the character value is the same as the codepoint.
434 ///
435 /// \param begin Iterator pointing to the beginning of the input sequence
436 /// \param end Iterator pointing to the end of the input sequence
437 /// \param output Codepoint of the decoded UTF-32 character
438 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
439 ///
440 /// \return Iterator pointing to one past the last read element of the input sequence
441 ///
442 ////////////////////////////////////////////////////////////
443 template <typename In>
444 static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 );
445
446 ////////////////////////////////////////////////////////////
447 /// \brief Encode a single UTF-32 character
448 ///
449 /// Encoding a character means converting a unique 32-bits
450 /// code (called the codepoint) in the target encoding, UTF-32.
451 /// For UTF-32, the codepoint is the same as the character value.
452 ///
453 /// \param input Codepoint to encode as UTF-32
454 /// \param output Iterator pointing to the beginning of the output sequence
455 /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them)
456 ///
457 /// \return Iterator to the end of the output sequence which has been written
458 ///
459 ////////////////////////////////////////////////////////////
460 template <typename Out> static Out Encode( Uint32 input, Out output, Uint32 replacement = 0 );
461
462 ////////////////////////////////////////////////////////////
463 /// \brief Advance to the next UTF-32 character
464 ///
465 /// This function is trivial for UTF-32, which can store
466 /// every character in a single storage element.
467 ///
468 /// \param begin Iterator pointing to the beginning of the input sequence
469 /// \param end Iterator pointing to the end of the input sequence
470 ///
471 /// \return Iterator pointing to one past the last read element of the input sequence
472 ///
473 ////////////////////////////////////////////////////////////
474 template <typename In> static In Next( In begin, In end );
475
476 ////////////////////////////////////////////////////////////
477 /// \brief Count the number of characters of a UTF-32 sequence
478 ///
479 /// This function is trivial for UTF-32, which can store
480 /// every character in a single storage element.
481 ///
482 /// \param begin Iterator pointing to the beginning of the input sequence
483 /// \param end Iterator pointing to the end of the input sequence
484 ///
485 /// \return Iterator pointing to one past the last read element of the input sequence
486 ///
487 ////////////////////////////////////////////////////////////
488 template <typename In> static std::size_t Count( In begin, In end );
489
490 ////////////////////////////////////////////////////////////
491 /// \brief Convert an ANSI characters range to UTF-32
492 ///
493 /// The current global locale will be used by default, unless you
494 /// pass a custom one in the \a locale parameter.
495 ///
496 /// \param begin Iterator pointing to the beginning of the input sequence
497 /// \param end Iterator pointing to the end of the input sequence
498 /// \param output Iterator pointing to the beginning of the output sequence
499 /// \param locale Locale to use for conversion
500 ///
501 /// \return Iterator to the end of the output sequence which has been written
502 ///
503 ////////////////////////////////////////////////////////////
504 template <typename In, typename Out>
505 static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() );
506
507 ////////////////////////////////////////////////////////////
508 /// \brief Convert a wide characters range to UTF-32
509 ///
510 /// \param begin Iterator pointing to the beginning of the input sequence
511 /// \param end Iterator pointing to the end of the input sequence
512 /// \param output Iterator pointing to the beginning of the output sequence
513 ///
514 /// \return Iterator to the end of the output sequence which has been written
515 ///
516 ////////////////////////////////////////////////////////////
517 template <typename In, typename Out> static Out FromWide( In begin, In end, Out output );
518
519 ////////////////////////////////////////////////////////////
520 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32
521 ///
522 /// \param begin Iterator pointing to the beginning of the input sequence
523 /// \param end Iterator pointing to the end of the input sequence
524 /// \param output Iterator pointing to the beginning of the output sequence
525 /// \param locale Locale to use for conversion
526 ///
527 /// \return Iterator to the end of the output sequence which has been written
528 ///
529 ////////////////////////////////////////////////////////////
530 template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output );
531
532 ////////////////////////////////////////////////////////////
533 /// \brief Convert an UTF-32 characters range to ANSI characters
534 ///
535 /// The current global locale will be used by default, unless you
536 /// pass a custom one in the \a locale parameter.
537 ///
538 /// \param begin Iterator pointing to the beginning of the input sequence
539 /// \param end Iterator pointing to the end of the input sequence
540 /// \param output Iterator pointing to the beginning of the output sequence
541 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
542 /// \param locale Locale to use for conversion
543 ///
544 /// \return Iterator to the end of the output sequence which has been written
545 ///
546 ////////////////////////////////////////////////////////////
547 template <typename In, typename Out>
548 static Out ToAnsi( In begin, In end, Out output, char replacement = 0,
549 const std::locale& locale = std::locale() );
550
551#ifndef EFSW_NO_WIDECHAR
552 ////////////////////////////////////////////////////////////
553 /// \brief Convert an UTF-32 characters range to wide characters
554 ///
555 /// \param begin Iterator pointing to the beginning of the input sequence
556 /// \param end Iterator pointing to the end of the input sequence
557 /// \param output Iterator pointing to the beginning of the output sequence
558 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
559 ///
560 /// \return Iterator to the end of the output sequence which has been written
561 ///
562 ////////////////////////////////////////////////////////////
563 template <typename In, typename Out>
564 static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 );
565#endif
566
567 ////////////////////////////////////////////////////////////
568 /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters
569 ///
570 /// \param begin Iterator pointing to the beginning of the input sequence
571 /// \param end Iterator pointing to the end of the input sequence
572 /// \param output Iterator pointing to the beginning of the output sequence
573 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
574 ///
575 /// \return Iterator to the end of the output sequence which has been written
576 ///
577 ////////////////////////////////////////////////////////////
578 template <typename In, typename Out>
579 static Out ToLatin1( In begin, In end, Out output, char replacement = 0 );
580
581 ////////////////////////////////////////////////////////////
582 /// \brief Convert a UTF-32 characters range to UTF-8
583 ///
584 /// \param begin Iterator pointing to the beginning of the input sequence
585 /// \param end Iterator pointing to the end of the input sequence
586 /// \param output Iterator pointing to the beginning of the output sequence
587 ///
588 /// \return Iterator to the end of the output sequence which has been written
589 ///
590 ////////////////////////////////////////////////////////////
591 template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output );
592
593 ////////////////////////////////////////////////////////////
594 /// \brief Convert a UTF-32 characters range to UTF-16
595 ///
596 /// \param begin Iterator pointing to the beginning of the input sequence
597 /// \param end Iterator pointing to the end of the input sequence
598 /// \param output Iterator pointing to the beginning of the output sequence
599 ///
600 /// \return Iterator to the end of the output sequence which has been written
601 ///
602 ////////////////////////////////////////////////////////////
603 template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output );
604
605 ////////////////////////////////////////////////////////////
606 /// \brief Convert a UTF-32 characters range to UTF-32
607 ///
608 /// This functions does nothing more than a direct copy;
609 /// it is defined only to provide the same interface as other
610 /// specializations of the efsw::Utf<> template, and allow
611 /// generic code to be written on top of it.
612 ///
613 /// \param begin Iterator pointing to the beginning of the input sequence
614 /// \param end Iterator pointing to the end of the input sequence
615 /// \param output Iterator pointing to the beginning of the output sequence
616 ///
617 /// \return Iterator to the end of the output sequence which has been written
618 ///
619 ////////////////////////////////////////////////////////////
620 template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output );
621
622 ////////////////////////////////////////////////////////////
623 /// \brief Decode a single ANSI character to UTF-32
624 ///
625 /// This function does not exist in other specializations
626 /// of efsw::Utf<>, it is defined for convenience (it is used by
627 /// several other conversion functions).
628 ///
629 /// \param input Input ANSI character
630 /// \param locale Locale to use for conversion
631 ///
632 /// \return Converted character
633 ///
634 ////////////////////////////////////////////////////////////
635 template <typename In>
636 static Uint32 DecodeAnsi( In input, const std::locale& locale = std::locale() );
637
638 ////////////////////////////////////////////////////////////
639 /// \brief Decode a single wide character to UTF-32
640 ///
641 /// This function does not exist in other specializations
642 /// of efsw::Utf<>, it is defined for convenience (it is used by
643 /// several other conversion functions).
644 ///
645 /// \param input Input wide character
646 ///
647 /// \return Converted character
648 ///
649 ////////////////////////////////////////////////////////////
650 template <typename In> static Uint32 DecodeWide( In input );
651
652 ////////////////////////////////////////////////////////////
653 /// \brief Encode a single UTF-32 character to ANSI
654 ///
655 /// This function does not exist in other specializations
656 /// of efsw::Utf<>, it is defined for convenience (it is used by
657 /// several other conversion functions).
658 ///
659 /// \param codepoint Iterator pointing to the beginning of the input sequence
660 /// \param output Iterator pointing to the beginning of the output sequence
661 /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to
662 /// skip it) \param locale Locale to use for conversion
663 ///
664 /// \return Iterator to the end of the output sequence which has been written
665 ///
666 ////////////////////////////////////////////////////////////
667 template <typename Out>
668 static Out EncodeAnsi( Uint32 codepoint, Out output, char replacement = 0,
669 const std::locale& locale = std::locale() );
670
671#ifndef EFSW_NO_WIDECHAR
672 ////////////////////////////////////////////////////////////
673 /// \brief Encode a single UTF-32 character to wide
674 ///
675 /// This function does not exist in other specializations
676 /// of efsw::Utf<>, it is defined for convenience (it is used by
677 /// several other conversion functions).
678 ///
679 /// \param codepoint Iterator pointing to the beginning of the input sequence
680 /// \param output Iterator pointing to the beginning of the output sequence
681 /// \param replacement Replacement if the input character is not convertible to wide (use 0 to
682 /// skip it)
683 ///
684 /// \return Iterator to the end of the output sequence which has been written
685 ///
686 ////////////////////////////////////////////////////////////
687 template <typename Out>
688 static Out EncodeWide( Uint32 codepoint, Out output, wchar_t replacement = 0 );
689#endif
690};
691
692#include "Utf.inl"
693
694// Make typedefs to get rid of the template syntax
695typedef Utf<8> Utf8;
696typedef Utf<16> Utf16;
697typedef Utf<32> Utf32;
698
699} // namespace efsw
700#endif
701
702////////////////////////////////////////////////////////////
703/// \class efsw::Utf
704/// \ingroup system
705///
706/// Utility class providing generic functions for UTF conversions.
707///
708/// efsw::Utf is a low-level, generic interface for counting, iterating,
709/// encoding and decoding Unicode characters and strings. It is able
710/// to handle ANSI, wide, UTF-8, UTF-16 and UTF-32 encodings.
711///
712/// efsw::Utf<X> functions are all static, these classes are not meant to
713/// be instanciated. All the functions are template, so that you
714/// can use any character / string type for a given encoding.
715///
716/// It has 3 specializations:
717/// \li efsw::Utf<8> (typedef'd to efsw::Utf8)
718/// \li efsw::Utf<16> (typedef'd to efsw::Utf16)
719/// \li efsw::Utf<32> (typedef'd to efsw::Utf32)
720///
721////////////////////////////////////////////////////////////