diff options
Diffstat (limited to 'src/3rdParty/efsw/Utf.hpp')
-rw-r--r--[-rwxr-xr-x] | src/3rdParty/efsw/Utf.hpp | 1442 |
1 files changed, 721 insertions, 721 deletions
diff --git a/src/3rdParty/efsw/Utf.hpp b/src/3rdParty/efsw/Utf.hpp index 6e9ea71..1b042cd 100755..100644 --- a/src/3rdParty/efsw/Utf.hpp +++ b/src/3rdParty/efsw/Utf.hpp | |||
@@ -1,721 +1,721 @@ | |||
1 | /** NOTE: | 1 | /** NOTE: |
2 | * This code is based on the Utf implementation from SFML2. License zlib/png ( | 2 | * This code is based on the Utf implementation from SFML2. License zlib/png ( |
3 | *http://www.sfml-dev.org/license.php ) The class was modified to fit efsw own needs. This is not | 3 | *http://www.sfml-dev.org/license.php ) The class was modified to fit efsw own needs. This is not |
4 | *the original implementation from SFML2. | 4 | *the original implementation from SFML2. |
5 | * */ | 5 | * */ |
6 | 6 | ||
7 | #ifndef EFSW_UTF_HPP | 7 | #ifndef EFSW_UTF_HPP |
8 | #define EFSW_UTF_HPP | 8 | #define EFSW_UTF_HPP |
9 | 9 | ||
10 | //////////////////////////////////////////////////////////// | 10 | //////////////////////////////////////////////////////////// |
11 | // Headers | 11 | // Headers |
12 | //////////////////////////////////////////////////////////// | 12 | //////////////////////////////////////////////////////////// |
13 | #include <cstdlib> | 13 | #include <cstdlib> |
14 | #include <efsw/base.hpp> | 14 | #include <efsw/base.hpp> |
15 | #include <locale> | 15 | #include <locale> |
16 | #include <string> | 16 | #include <string> |
17 | 17 | ||
18 | namespace efsw { | 18 | namespace efsw { |
19 | 19 | ||
20 | template <unsigned int N> class Utf; | 20 | template <unsigned int N> class Utf; |
21 | 21 | ||
22 | //////////////////////////////////////////////////////////// | 22 | //////////////////////////////////////////////////////////// |
23 | /// \brief Specialization of the Utf template for UTF-8 | 23 | /// \brief Specialization of the Utf template for UTF-8 |
24 | /// | 24 | /// |
25 | //////////////////////////////////////////////////////////// | 25 | //////////////////////////////////////////////////////////// |
26 | template <> class Utf<8> { | 26 | template <> class Utf<8> { |
27 | public: | 27 | public: |
28 | //////////////////////////////////////////////////////////// | 28 | //////////////////////////////////////////////////////////// |
29 | /// \brief Decode a single UTF-8 character | 29 | /// \brief Decode a single UTF-8 character |
30 | /// | 30 | /// |
31 | /// Decoding a character means finding its unique 32-bits | 31 | /// Decoding a character means finding its unique 32-bits |
32 | /// code (called the codepoint) in the Unicode standard. | 32 | /// code (called the codepoint) in the Unicode standard. |
33 | /// | 33 | /// |
34 | /// \param begin Iterator pointing to the beginning of the input sequence | 34 | /// \param begin Iterator pointing to the beginning of the input sequence |
35 | /// \param end Iterator pointing to the end of the input sequence | 35 | /// \param end Iterator pointing to the end of the input sequence |
36 | /// \param output Codepoint of the decoded UTF-8 character | 36 | /// \param output Codepoint of the decoded UTF-8 character |
37 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | 37 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid |
38 | /// | 38 | /// |
39 | /// \return Iterator pointing to one past the last read element of the input sequence | 39 | /// \return Iterator pointing to one past the last read element of the input sequence |
40 | /// | 40 | /// |
41 | //////////////////////////////////////////////////////////// | 41 | //////////////////////////////////////////////////////////// |
42 | template <typename In> | 42 | template <typename In> |
43 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | 43 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); |
44 | 44 | ||
45 | //////////////////////////////////////////////////////////// | 45 | //////////////////////////////////////////////////////////// |
46 | /// \brief Encode a single UTF-8 character | 46 | /// \brief Encode a single UTF-8 character |
47 | /// | 47 | /// |
48 | /// Encoding a character means converting a unique 32-bits | 48 | /// Encoding a character means converting a unique 32-bits |
49 | /// code (called the codepoint) in the target encoding, UTF-8. | 49 | /// code (called the codepoint) in the target encoding, UTF-8. |
50 | /// | 50 | /// |
51 | /// \param input Codepoint to encode as UTF-8 | 51 | /// \param input Codepoint to encode as UTF-8 |
52 | /// \param output Iterator pointing to the beginning of the output sequence | 52 | /// \param output Iterator pointing to the beginning of the output sequence |
53 | /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them) | 53 | /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them) |
54 | /// | 54 | /// |
55 | /// \return Iterator to the end of the output sequence which has been written | 55 | /// \return Iterator to the end of the output sequence which has been written |
56 | /// | 56 | /// |
57 | //////////////////////////////////////////////////////////// | 57 | //////////////////////////////////////////////////////////// |
58 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint8 replacement = 0 ); | 58 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint8 replacement = 0 ); |
59 | 59 | ||
60 | //////////////////////////////////////////////////////////// | 60 | //////////////////////////////////////////////////////////// |
61 | /// \brief Advance to the next UTF-8 character | 61 | /// \brief Advance to the next UTF-8 character |
62 | /// | 62 | /// |
63 | /// This function is necessary for multi-elements encodings, as | 63 | /// This function is necessary for multi-elements encodings, as |
64 | /// a single character may use more than 1 storage element. | 64 | /// a single character may use more than 1 storage element. |
65 | /// | 65 | /// |
66 | /// \param begin Iterator pointing to the beginning of the input sequence | 66 | /// \param begin Iterator pointing to the beginning of the input sequence |
67 | /// \param end Iterator pointing to the end of the input sequence | 67 | /// \param end Iterator pointing to the end of the input sequence |
68 | /// | 68 | /// |
69 | /// \return Iterator pointing to one past the last read element of the input sequence | 69 | /// \return Iterator pointing to one past the last read element of the input sequence |
70 | /// | 70 | /// |
71 | //////////////////////////////////////////////////////////// | 71 | //////////////////////////////////////////////////////////// |
72 | template <typename In> static In Next( In begin, In end ); | 72 | template <typename In> static In Next( In begin, In end ); |
73 | 73 | ||
74 | //////////////////////////////////////////////////////////// | 74 | //////////////////////////////////////////////////////////// |
75 | /// \brief Count the number of characters of a UTF-8 sequence | 75 | /// \brief Count the number of characters of a UTF-8 sequence |
76 | /// | 76 | /// |
77 | /// This function is necessary for multi-elements encodings, as | 77 | /// This function is necessary for multi-elements encodings, as |
78 | /// a single character may use more than 1 storage element, thus the | 78 | /// a single character may use more than 1 storage element, thus the |
79 | /// total size can be different from (begin - end). | 79 | /// total size can be different from (begin - end). |
80 | /// | 80 | /// |
81 | /// \param begin Iterator pointing to the beginning of the input sequence | 81 | /// \param begin Iterator pointing to the beginning of the input sequence |
82 | /// \param end Iterator pointing to the end of the input sequence | 82 | /// \param end Iterator pointing to the end of the input sequence |
83 | /// | 83 | /// |
84 | /// \return Iterator pointing to one past the last read element of the input sequence | 84 | /// \return Iterator pointing to one past the last read element of the input sequence |
85 | /// | 85 | /// |
86 | //////////////////////////////////////////////////////////// | 86 | //////////////////////////////////////////////////////////// |
87 | template <typename In> static std::size_t Count( In begin, In end ); | 87 | template <typename In> static std::size_t Count( In begin, In end ); |
88 | 88 | ||
89 | //////////////////////////////////////////////////////////// | 89 | //////////////////////////////////////////////////////////// |
90 | /// \brief Convert an ANSI characters range to UTF-8 | 90 | /// \brief Convert an ANSI characters range to UTF-8 |
91 | /// | 91 | /// |
92 | /// The current global locale will be used by default, unless you | 92 | /// The current global locale will be used by default, unless you |
93 | /// pass a custom one in the \a locale parameter. | 93 | /// pass a custom one in the \a locale parameter. |
94 | /// | 94 | /// |
95 | /// \param begin Iterator pointing to the beginning of the input sequence | 95 | /// \param begin Iterator pointing to the beginning of the input sequence |
96 | /// \param end Iterator pointing to the end of the input sequence | 96 | /// \param end Iterator pointing to the end of the input sequence |
97 | /// \param output Iterator pointing to the beginning of the output sequence | 97 | /// \param output Iterator pointing to the beginning of the output sequence |
98 | /// \param locale Locale to use for conversion | 98 | /// \param locale Locale to use for conversion |
99 | /// | 99 | /// |
100 | /// \return Iterator to the end of the output sequence which has been written | 100 | /// \return Iterator to the end of the output sequence which has been written |
101 | /// | 101 | /// |
102 | //////////////////////////////////////////////////////////// | 102 | //////////////////////////////////////////////////////////// |
103 | template <typename In, typename Out> | 103 | template <typename In, typename Out> |
104 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | 104 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); |
105 | 105 | ||
106 | //////////////////////////////////////////////////////////// | 106 | //////////////////////////////////////////////////////////// |
107 | /// \brief Convert a wide characters range to UTF-8 | 107 | /// \brief Convert a wide characters range to UTF-8 |
108 | /// | 108 | /// |
109 | /// \param begin Iterator pointing to the beginning of the input sequence | 109 | /// \param begin Iterator pointing to the beginning of the input sequence |
110 | /// \param end Iterator pointing to the end of the input sequence | 110 | /// \param end Iterator pointing to the end of the input sequence |
111 | /// \param output Iterator pointing to the beginning of the output sequence | 111 | /// \param output Iterator pointing to the beginning of the output sequence |
112 | /// | 112 | /// |
113 | /// \return Iterator to the end of the output sequence which has been written | 113 | /// \return Iterator to the end of the output sequence which has been written |
114 | /// | 114 | /// |
115 | //////////////////////////////////////////////////////////// | 115 | //////////////////////////////////////////////////////////// |
116 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | 116 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); |
117 | 117 | ||
118 | //////////////////////////////////////////////////////////// | 118 | //////////////////////////////////////////////////////////// |
119 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8 | 119 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8 |
120 | /// | 120 | /// |
121 | /// \param begin Iterator pointing to the beginning of the input sequence | 121 | /// \param begin Iterator pointing to the beginning of the input sequence |
122 | /// \param end Iterator pointing to the end of the input sequence | 122 | /// \param end Iterator pointing to the end of the input sequence |
123 | /// \param output Iterator pointing to the beginning of the output sequence | 123 | /// \param output Iterator pointing to the beginning of the output sequence |
124 | /// \param locale Locale to use for conversion | 124 | /// \param locale Locale to use for conversion |
125 | /// | 125 | /// |
126 | /// \return Iterator to the end of the output sequence which has been written | 126 | /// \return Iterator to the end of the output sequence which has been written |
127 | /// | 127 | /// |
128 | //////////////////////////////////////////////////////////// | 128 | //////////////////////////////////////////////////////////// |
129 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | 129 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); |
130 | 130 | ||
131 | //////////////////////////////////////////////////////////// | 131 | //////////////////////////////////////////////////////////// |
132 | /// \brief Convert an UTF-8 characters range to ANSI characters | 132 | /// \brief Convert an UTF-8 characters range to ANSI characters |
133 | /// | 133 | /// |
134 | /// The current global locale will be used by default, unless you | 134 | /// The current global locale will be used by default, unless you |
135 | /// pass a custom one in the \a locale parameter. | 135 | /// pass a custom one in the \a locale parameter. |
136 | /// | 136 | /// |
137 | /// \param begin Iterator pointing to the beginning of the input sequence | 137 | /// \param begin Iterator pointing to the beginning of the input sequence |
138 | /// \param end Iterator pointing to the end of the input sequence | 138 | /// \param end Iterator pointing to the end of the input sequence |
139 | /// \param output Iterator pointing to the beginning of the output sequence | 139 | /// \param output Iterator pointing to the beginning of the output sequence |
140 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | 140 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) |
141 | /// \param locale Locale to use for conversion | 141 | /// \param locale Locale to use for conversion |
142 | /// | 142 | /// |
143 | /// \return Iterator to the end of the output sequence which has been written | 143 | /// \return Iterator to the end of the output sequence which has been written |
144 | /// | 144 | /// |
145 | //////////////////////////////////////////////////////////// | 145 | //////////////////////////////////////////////////////////// |
146 | template <typename In, typename Out> | 146 | template <typename In, typename Out> |
147 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | 147 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, |
148 | const std::locale& locale = std::locale() ); | 148 | const std::locale& locale = std::locale() ); |
149 | 149 | ||
150 | #ifndef EFSW_NO_WIDECHAR | 150 | #ifndef EFSW_NO_WIDECHAR |
151 | //////////////////////////////////////////////////////////// | 151 | //////////////////////////////////////////////////////////// |
152 | /// \brief Convert an UTF-8 characters range to wide characters | 152 | /// \brief Convert an UTF-8 characters range to wide characters |
153 | /// | 153 | /// |
154 | /// \param begin Iterator pointing to the beginning of the input sequence | 154 | /// \param begin Iterator pointing to the beginning of the input sequence |
155 | /// \param end Iterator pointing to the end of the input sequence | 155 | /// \param end Iterator pointing to the end of the input sequence |
156 | /// \param output Iterator pointing to the beginning of the output sequence | 156 | /// \param output Iterator pointing to the beginning of the output sequence |
157 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 157 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
158 | /// | 158 | /// |
159 | /// \return Iterator to the end of the output sequence which has been written | 159 | /// \return Iterator to the end of the output sequence which has been written |
160 | /// | 160 | /// |
161 | //////////////////////////////////////////////////////////// | 161 | //////////////////////////////////////////////////////////// |
162 | template <typename In, typename Out> | 162 | template <typename In, typename Out> |
163 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | 163 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); |
164 | #endif | 164 | #endif |
165 | 165 | ||
166 | //////////////////////////////////////////////////////////// | 166 | //////////////////////////////////////////////////////////// |
167 | /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters | 167 | /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters |
168 | /// | 168 | /// |
169 | /// \param begin Iterator pointing to the beginning of the input sequence | 169 | /// \param begin Iterator pointing to the beginning of the input sequence |
170 | /// \param end Iterator pointing to the end of the input sequence | 170 | /// \param end Iterator pointing to the end of the input sequence |
171 | /// \param output Iterator pointing to the beginning of the output sequence | 171 | /// \param output Iterator pointing to the beginning of the output sequence |
172 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 172 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
173 | /// | 173 | /// |
174 | /// \return Iterator to the end of the output sequence which has been written | 174 | /// \return Iterator to the end of the output sequence which has been written |
175 | /// | 175 | /// |
176 | //////////////////////////////////////////////////////////// | 176 | //////////////////////////////////////////////////////////// |
177 | template <typename In, typename Out> | 177 | template <typename In, typename Out> |
178 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | 178 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); |
179 | 179 | ||
180 | //////////////////////////////////////////////////////////// | 180 | //////////////////////////////////////////////////////////// |
181 | /// \brief Convert a UTF-8 characters range to UTF-8 | 181 | /// \brief Convert a UTF-8 characters range to UTF-8 |
182 | /// | 182 | /// |
183 | /// This functions does nothing more than a direct copy; | 183 | /// This functions does nothing more than a direct copy; |
184 | /// it is defined only to provide the same interface as other | 184 | /// it is defined only to provide the same interface as other |
185 | /// specializations of the efsw::Utf<> template, and allow | 185 | /// specializations of the efsw::Utf<> template, and allow |
186 | /// generic code to be written on top of it. | 186 | /// generic code to be written on top of it. |
187 | /// | 187 | /// |
188 | /// \param begin Iterator pointing to the beginning of the input sequence | 188 | /// \param begin Iterator pointing to the beginning of the input sequence |
189 | /// \param end Iterator pointing to the end of the input sequence | 189 | /// \param end Iterator pointing to the end of the input sequence |
190 | /// \param output Iterator pointing to the beginning of the output sequence | 190 | /// \param output Iterator pointing to the beginning of the output sequence |
191 | /// | 191 | /// |
192 | /// \return Iterator to the end of the output sequence which has been written | 192 | /// \return Iterator to the end of the output sequence which has been written |
193 | /// | 193 | /// |
194 | //////////////////////////////////////////////////////////// | 194 | //////////////////////////////////////////////////////////// |
195 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | 195 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); |
196 | 196 | ||
197 | //////////////////////////////////////////////////////////// | 197 | //////////////////////////////////////////////////////////// |
198 | /// \brief Convert a UTF-8 characters range to UTF-16 | 198 | /// \brief Convert a UTF-8 characters range to UTF-16 |
199 | /// | 199 | /// |
200 | /// \param begin Iterator pointing to the beginning of the input sequence | 200 | /// \param begin Iterator pointing to the beginning of the input sequence |
201 | /// \param end Iterator pointing to the end of the input sequence | 201 | /// \param end Iterator pointing to the end of the input sequence |
202 | /// \param output Iterator pointing to the beginning of the output sequence | 202 | /// \param output Iterator pointing to the beginning of the output sequence |
203 | /// | 203 | /// |
204 | /// \return Iterator to the end of the output sequence which has been written | 204 | /// \return Iterator to the end of the output sequence which has been written |
205 | /// | 205 | /// |
206 | //////////////////////////////////////////////////////////// | 206 | //////////////////////////////////////////////////////////// |
207 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | 207 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); |
208 | 208 | ||
209 | //////////////////////////////////////////////////////////// | 209 | //////////////////////////////////////////////////////////// |
210 | /// \brief Convert a UTF-8 characters range to UTF-32 | 210 | /// \brief Convert a UTF-8 characters range to UTF-32 |
211 | /// | 211 | /// |
212 | /// \param begin Iterator pointing to the beginning of the input sequence | 212 | /// \param begin Iterator pointing to the beginning of the input sequence |
213 | /// \param end Iterator pointing to the end of the input sequence | 213 | /// \param end Iterator pointing to the end of the input sequence |
214 | /// \param output Iterator pointing to the beginning of the output sequence | 214 | /// \param output Iterator pointing to the beginning of the output sequence |
215 | /// | 215 | /// |
216 | /// \return Iterator to the end of the output sequence which has been written | 216 | /// \return Iterator to the end of the output sequence which has been written |
217 | /// | 217 | /// |
218 | //////////////////////////////////////////////////////////// | 218 | //////////////////////////////////////////////////////////// |
219 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | 219 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); |
220 | }; | 220 | }; |
221 | 221 | ||
222 | //////////////////////////////////////////////////////////// | 222 | //////////////////////////////////////////////////////////// |
223 | /// \brief Specialization of the Utf template for UTF-16 | 223 | /// \brief Specialization of the Utf template for UTF-16 |
224 | /// | 224 | /// |
225 | //////////////////////////////////////////////////////////// | 225 | //////////////////////////////////////////////////////////// |
226 | template <> class Utf<16> { | 226 | template <> class Utf<16> { |
227 | public: | 227 | public: |
228 | //////////////////////////////////////////////////////////// | 228 | //////////////////////////////////////////////////////////// |
229 | /// \brief Decode a single UTF-16 character | 229 | /// \brief Decode a single UTF-16 character |
230 | /// | 230 | /// |
231 | /// Decoding a character means finding its unique 32-bits | 231 | /// Decoding a character means finding its unique 32-bits |
232 | /// code (called the codepoint) in the Unicode standard. | 232 | /// code (called the codepoint) in the Unicode standard. |
233 | /// | 233 | /// |
234 | /// \param begin Iterator pointing to the beginning of the input sequence | 234 | /// \param begin Iterator pointing to the beginning of the input sequence |
235 | /// \param end Iterator pointing to the end of the input sequence | 235 | /// \param end Iterator pointing to the end of the input sequence |
236 | /// \param output Codepoint of the decoded UTF-16 character | 236 | /// \param output Codepoint of the decoded UTF-16 character |
237 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | 237 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid |
238 | /// | 238 | /// |
239 | /// \return Iterator pointing to one past the last read element of the input sequence | 239 | /// \return Iterator pointing to one past the last read element of the input sequence |
240 | /// | 240 | /// |
241 | //////////////////////////////////////////////////////////// | 241 | //////////////////////////////////////////////////////////// |
242 | template <typename In> | 242 | template <typename In> |
243 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | 243 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); |
244 | 244 | ||
245 | //////////////////////////////////////////////////////////// | 245 | //////////////////////////////////////////////////////////// |
246 | /// \brief Encode a single UTF-16 character | 246 | /// \brief Encode a single UTF-16 character |
247 | /// | 247 | /// |
248 | /// Encoding a character means converting a unique 32-bits | 248 | /// Encoding a character means converting a unique 32-bits |
249 | /// code (called the codepoint) in the target encoding, UTF-16. | 249 | /// code (called the codepoint) in the target encoding, UTF-16. |
250 | /// | 250 | /// |
251 | /// \param input Codepoint to encode as UTF-16 | 251 | /// \param input Codepoint to encode as UTF-16 |
252 | /// \param output Iterator pointing to the beginning of the output sequence | 252 | /// \param output Iterator pointing to the beginning of the output sequence |
253 | /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them) | 253 | /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them) |
254 | /// | 254 | /// |
255 | /// \return Iterator to the end of the output sequence which has been written | 255 | /// \return Iterator to the end of the output sequence which has been written |
256 | /// | 256 | /// |
257 | //////////////////////////////////////////////////////////// | 257 | //////////////////////////////////////////////////////////// |
258 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint16 replacement = 0 ); | 258 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint16 replacement = 0 ); |
259 | 259 | ||
260 | //////////////////////////////////////////////////////////// | 260 | //////////////////////////////////////////////////////////// |
261 | /// \brief Advance to the next UTF-16 character | 261 | /// \brief Advance to the next UTF-16 character |
262 | /// | 262 | /// |
263 | /// This function is necessary for multi-elements encodings, as | 263 | /// This function is necessary for multi-elements encodings, as |
264 | /// a single character may use more than 1 storage element. | 264 | /// a single character may use more than 1 storage element. |
265 | /// | 265 | /// |
266 | /// \param begin Iterator pointing to the beginning of the input sequence | 266 | /// \param begin Iterator pointing to the beginning of the input sequence |
267 | /// \param end Iterator pointing to the end of the input sequence | 267 | /// \param end Iterator pointing to the end of the input sequence |
268 | /// | 268 | /// |
269 | /// \return Iterator pointing to one past the last read element of the input sequence | 269 | /// \return Iterator pointing to one past the last read element of the input sequence |
270 | /// | 270 | /// |
271 | //////////////////////////////////////////////////////////// | 271 | //////////////////////////////////////////////////////////// |
272 | template <typename In> static In Next( In begin, In end ); | 272 | template <typename In> static In Next( In begin, In end ); |
273 | 273 | ||
274 | //////////////////////////////////////////////////////////// | 274 | //////////////////////////////////////////////////////////// |
275 | /// \brief Count the number of characters of a UTF-16 sequence | 275 | /// \brief Count the number of characters of a UTF-16 sequence |
276 | /// | 276 | /// |
277 | /// This function is necessary for multi-elements encodings, as | 277 | /// This function is necessary for multi-elements encodings, as |
278 | /// a single character may use more than 1 storage element, thus the | 278 | /// a single character may use more than 1 storage element, thus the |
279 | /// total size can be different from (begin - end). | 279 | /// total size can be different from (begin - end). |
280 | /// | 280 | /// |
281 | /// \param begin Iterator pointing to the beginning of the input sequence | 281 | /// \param begin Iterator pointing to the beginning of the input sequence |
282 | /// \param end Iterator pointing to the end of the input sequence | 282 | /// \param end Iterator pointing to the end of the input sequence |
283 | /// | 283 | /// |
284 | /// \return Iterator pointing to one past the last read element of the input sequence | 284 | /// \return Iterator pointing to one past the last read element of the input sequence |
285 | /// | 285 | /// |
286 | //////////////////////////////////////////////////////////// | 286 | //////////////////////////////////////////////////////////// |
287 | template <typename In> static std::size_t Count( In begin, In end ); | 287 | template <typename In> static std::size_t Count( In begin, In end ); |
288 | 288 | ||
289 | //////////////////////////////////////////////////////////// | 289 | //////////////////////////////////////////////////////////// |
290 | /// \brief Convert an ANSI characters range to UTF-16 | 290 | /// \brief Convert an ANSI characters range to UTF-16 |
291 | /// | 291 | /// |
292 | /// The current global locale will be used by default, unless you | 292 | /// The current global locale will be used by default, unless you |
293 | /// pass a custom one in the \a locale parameter. | 293 | /// pass a custom one in the \a locale parameter. |
294 | /// | 294 | /// |
295 | /// \param begin Iterator pointing to the beginning of the input sequence | 295 | /// \param begin Iterator pointing to the beginning of the input sequence |
296 | /// \param end Iterator pointing to the end of the input sequence | 296 | /// \param end Iterator pointing to the end of the input sequence |
297 | /// \param output Iterator pointing to the beginning of the output sequence | 297 | /// \param output Iterator pointing to the beginning of the output sequence |
298 | /// \param locale Locale to use for conversion | 298 | /// \param locale Locale to use for conversion |
299 | /// | 299 | /// |
300 | /// \return Iterator to the end of the output sequence which has been written | 300 | /// \return Iterator to the end of the output sequence which has been written |
301 | /// | 301 | /// |
302 | //////////////////////////////////////////////////////////// | 302 | //////////////////////////////////////////////////////////// |
303 | template <typename In, typename Out> | 303 | template <typename In, typename Out> |
304 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | 304 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); |
305 | 305 | ||
306 | //////////////////////////////////////////////////////////// | 306 | //////////////////////////////////////////////////////////// |
307 | /// \brief Convert a wide characters range to UTF-16 | 307 | /// \brief Convert a wide characters range to UTF-16 |
308 | /// | 308 | /// |
309 | /// \param begin Iterator pointing to the beginning of the input sequence | 309 | /// \param begin Iterator pointing to the beginning of the input sequence |
310 | /// \param end Iterator pointing to the end of the input sequence | 310 | /// \param end Iterator pointing to the end of the input sequence |
311 | /// \param output Iterator pointing to the beginning of the output sequence | 311 | /// \param output Iterator pointing to the beginning of the output sequence |
312 | /// | 312 | /// |
313 | /// \return Iterator to the end of the output sequence which has been written | 313 | /// \return Iterator to the end of the output sequence which has been written |
314 | /// | 314 | /// |
315 | //////////////////////////////////////////////////////////// | 315 | //////////////////////////////////////////////////////////// |
316 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | 316 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); |
317 | 317 | ||
318 | //////////////////////////////////////////////////////////// | 318 | //////////////////////////////////////////////////////////// |
319 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16 | 319 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16 |
320 | /// | 320 | /// |
321 | /// \param begin Iterator pointing to the beginning of the input sequence | 321 | /// \param begin Iterator pointing to the beginning of the input sequence |
322 | /// \param end Iterator pointing to the end of the input sequence | 322 | /// \param end Iterator pointing to the end of the input sequence |
323 | /// \param output Iterator pointing to the beginning of the output sequence | 323 | /// \param output Iterator pointing to the beginning of the output sequence |
324 | /// \param locale Locale to use for conversion | 324 | /// \param locale Locale to use for conversion |
325 | /// | 325 | /// |
326 | /// \return Iterator to the end of the output sequence which has been written | 326 | /// \return Iterator to the end of the output sequence which has been written |
327 | /// | 327 | /// |
328 | //////////////////////////////////////////////////////////// | 328 | //////////////////////////////////////////////////////////// |
329 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | 329 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); |
330 | 330 | ||
331 | //////////////////////////////////////////////////////////// | 331 | //////////////////////////////////////////////////////////// |
332 | /// \brief Convert an UTF-16 characters range to ANSI characters | 332 | /// \brief Convert an UTF-16 characters range to ANSI characters |
333 | /// | 333 | /// |
334 | /// The current global locale will be used by default, unless you | 334 | /// The current global locale will be used by default, unless you |
335 | /// pass a custom one in the \a locale parameter. | 335 | /// pass a custom one in the \a locale parameter. |
336 | /// | 336 | /// |
337 | /// \param begin Iterator pointing to the beginning of the input sequence | 337 | /// \param begin Iterator pointing to the beginning of the input sequence |
338 | /// \param end Iterator pointing to the end of the input sequence | 338 | /// \param end Iterator pointing to the end of the input sequence |
339 | /// \param output Iterator pointing to the beginning of the output sequence | 339 | /// \param output Iterator pointing to the beginning of the output sequence |
340 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | 340 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) |
341 | /// \param locale Locale to use for conversion | 341 | /// \param locale Locale to use for conversion |
342 | /// | 342 | /// |
343 | /// \return Iterator to the end of the output sequence which has been written | 343 | /// \return Iterator to the end of the output sequence which has been written |
344 | /// | 344 | /// |
345 | //////////////////////////////////////////////////////////// | 345 | //////////////////////////////////////////////////////////// |
346 | template <typename In, typename Out> | 346 | template <typename In, typename Out> |
347 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | 347 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, |
348 | const std::locale& locale = std::locale() ); | 348 | const std::locale& locale = std::locale() ); |
349 | 349 | ||
350 | #ifndef EFSW_NO_WIDECHAR | 350 | #ifndef EFSW_NO_WIDECHAR |
351 | //////////////////////////////////////////////////////////// | 351 | //////////////////////////////////////////////////////////// |
352 | /// \brief Convert an UTF-16 characters range to wide characters | 352 | /// \brief Convert an UTF-16 characters range to wide characters |
353 | /// | 353 | /// |
354 | /// \param begin Iterator pointing to the beginning of the input sequence | 354 | /// \param begin Iterator pointing to the beginning of the input sequence |
355 | /// \param end Iterator pointing to the end of the input sequence | 355 | /// \param end Iterator pointing to the end of the input sequence |
356 | /// \param output Iterator pointing to the beginning of the output sequence | 356 | /// \param output Iterator pointing to the beginning of the output sequence |
357 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 357 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
358 | /// | 358 | /// |
359 | /// \return Iterator to the end of the output sequence which has been written | 359 | /// \return Iterator to the end of the output sequence which has been written |
360 | /// | 360 | /// |
361 | //////////////////////////////////////////////////////////// | 361 | //////////////////////////////////////////////////////////// |
362 | template <typename In, typename Out> | 362 | template <typename In, typename Out> |
363 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | 363 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); |
364 | #endif | 364 | #endif |
365 | 365 | ||
366 | //////////////////////////////////////////////////////////// | 366 | //////////////////////////////////////////////////////////// |
367 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters | 367 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters |
368 | /// | 368 | /// |
369 | /// \param begin Iterator pointing to the beginning of the input sequence | 369 | /// \param begin Iterator pointing to the beginning of the input sequence |
370 | /// \param end Iterator pointing to the end of the input sequence | 370 | /// \param end Iterator pointing to the end of the input sequence |
371 | /// \param output Iterator pointing to the beginning of the output sequence | 371 | /// \param output Iterator pointing to the beginning of the output sequence |
372 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 372 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
373 | /// | 373 | /// |
374 | /// \return Iterator to the end of the output sequence which has been written | 374 | /// \return Iterator to the end of the output sequence which has been written |
375 | /// | 375 | /// |
376 | //////////////////////////////////////////////////////////// | 376 | //////////////////////////////////////////////////////////// |
377 | template <typename In, typename Out> | 377 | template <typename In, typename Out> |
378 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | 378 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); |
379 | 379 | ||
380 | //////////////////////////////////////////////////////////// | 380 | //////////////////////////////////////////////////////////// |
381 | /// \brief Convert a UTF-16 characters range to UTF-8 | 381 | /// \brief Convert a UTF-16 characters range to UTF-8 |
382 | /// | 382 | /// |
383 | /// \param begin Iterator pointing to the beginning of the input sequence | 383 | /// \param begin Iterator pointing to the beginning of the input sequence |
384 | /// \param end Iterator pointing to the end of the input sequence | 384 | /// \param end Iterator pointing to the end of the input sequence |
385 | /// \param output Iterator pointing to the beginning of the output sequence | 385 | /// \param output Iterator pointing to the beginning of the output sequence |
386 | /// | 386 | /// |
387 | /// \return Iterator to the end of the output sequence which has been written | 387 | /// \return Iterator to the end of the output sequence which has been written |
388 | /// | 388 | /// |
389 | //////////////////////////////////////////////////////////// | 389 | //////////////////////////////////////////////////////////// |
390 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | 390 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); |
391 | 391 | ||
392 | //////////////////////////////////////////////////////////// | 392 | //////////////////////////////////////////////////////////// |
393 | /// \brief Convert a UTF-16 characters range to UTF-16 | 393 | /// \brief Convert a UTF-16 characters range to UTF-16 |
394 | /// | 394 | /// |
395 | /// This functions does nothing more than a direct copy; | 395 | /// This functions does nothing more than a direct copy; |
396 | /// it is defined only to provide the same interface as other | 396 | /// it is defined only to provide the same interface as other |
397 | /// specializations of the efsw::Utf<> template, and allow | 397 | /// specializations of the efsw::Utf<> template, and allow |
398 | /// generic code to be written on top of it. | 398 | /// generic code to be written on top of it. |
399 | /// | 399 | /// |
400 | /// \param begin Iterator pointing to the beginning of the input sequence | 400 | /// \param begin Iterator pointing to the beginning of the input sequence |
401 | /// \param end Iterator pointing to the end of the input sequence | 401 | /// \param end Iterator pointing to the end of the input sequence |
402 | /// \param output Iterator pointing to the beginning of the output sequence | 402 | /// \param output Iterator pointing to the beginning of the output sequence |
403 | /// | 403 | /// |
404 | /// \return Iterator to the end of the output sequence which has been written | 404 | /// \return Iterator to the end of the output sequence which has been written |
405 | /// | 405 | /// |
406 | //////////////////////////////////////////////////////////// | 406 | //////////////////////////////////////////////////////////// |
407 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | 407 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); |
408 | 408 | ||
409 | //////////////////////////////////////////////////////////// | 409 | //////////////////////////////////////////////////////////// |
410 | /// \brief Convert a UTF-16 characters range to UTF-32 | 410 | /// \brief Convert a UTF-16 characters range to UTF-32 |
411 | /// | 411 | /// |
412 | /// \param begin Iterator pointing to the beginning of the input sequence | 412 | /// \param begin Iterator pointing to the beginning of the input sequence |
413 | /// \param end Iterator pointing to the end of the input sequence | 413 | /// \param end Iterator pointing to the end of the input sequence |
414 | /// \param output Iterator pointing to the beginning of the output sequence | 414 | /// \param output Iterator pointing to the beginning of the output sequence |
415 | /// | 415 | /// |
416 | /// \return Iterator to the end of the output sequence which has been written | 416 | /// \return Iterator to the end of the output sequence which has been written |
417 | /// | 417 | /// |
418 | //////////////////////////////////////////////////////////// | 418 | //////////////////////////////////////////////////////////// |
419 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | 419 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); |
420 | }; | 420 | }; |
421 | 421 | ||
422 | //////////////////////////////////////////////////////////// | 422 | //////////////////////////////////////////////////////////// |
423 | /// \brief Specialization of the Utf template for UTF-32 | 423 | /// \brief Specialization of the Utf template for UTF-32 |
424 | /// | 424 | /// |
425 | //////////////////////////////////////////////////////////// | 425 | //////////////////////////////////////////////////////////// |
426 | template <> class Utf<32> { | 426 | template <> class Utf<32> { |
427 | public: | 427 | public: |
428 | //////////////////////////////////////////////////////////// | 428 | //////////////////////////////////////////////////////////// |
429 | /// \brief Decode a single UTF-32 character | 429 | /// \brief Decode a single UTF-32 character |
430 | /// | 430 | /// |
431 | /// Decoding a character means finding its unique 32-bits | 431 | /// Decoding a character means finding its unique 32-bits |
432 | /// code (called the codepoint) in the Unicode standard. | 432 | /// code (called the codepoint) in the Unicode standard. |
433 | /// For UTF-32, the character value is the same as the codepoint. | 433 | /// For UTF-32, the character value is the same as the codepoint. |
434 | /// | 434 | /// |
435 | /// \param begin Iterator pointing to the beginning of the input sequence | 435 | /// \param begin Iterator pointing to the beginning of the input sequence |
436 | /// \param end Iterator pointing to the end of the input sequence | 436 | /// \param end Iterator pointing to the end of the input sequence |
437 | /// \param output Codepoint of the decoded UTF-32 character | 437 | /// \param output Codepoint of the decoded UTF-32 character |
438 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid | 438 | /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid |
439 | /// | 439 | /// |
440 | /// \return Iterator pointing to one past the last read element of the input sequence | 440 | /// \return Iterator pointing to one past the last read element of the input sequence |
441 | /// | 441 | /// |
442 | //////////////////////////////////////////////////////////// | 442 | //////////////////////////////////////////////////////////// |
443 | template <typename In> | 443 | template <typename In> |
444 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); | 444 | static In Decode( In begin, In end, Uint32& output, Uint32 replacement = 0 ); |
445 | 445 | ||
446 | //////////////////////////////////////////////////////////// | 446 | //////////////////////////////////////////////////////////// |
447 | /// \brief Encode a single UTF-32 character | 447 | /// \brief Encode a single UTF-32 character |
448 | /// | 448 | /// |
449 | /// Encoding a character means converting a unique 32-bits | 449 | /// Encoding a character means converting a unique 32-bits |
450 | /// code (called the codepoint) in the target encoding, UTF-32. | 450 | /// code (called the codepoint) in the target encoding, UTF-32. |
451 | /// For UTF-32, the codepoint is the same as the character value. | 451 | /// For UTF-32, the codepoint is the same as the character value. |
452 | /// | 452 | /// |
453 | /// \param input Codepoint to encode as UTF-32 | 453 | /// \param input Codepoint to encode as UTF-32 |
454 | /// \param output Iterator pointing to the beginning of the output sequence | 454 | /// \param output Iterator pointing to the beginning of the output sequence |
455 | /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them) | 455 | /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them) |
456 | /// | 456 | /// |
457 | /// \return Iterator to the end of the output sequence which has been written | 457 | /// \return Iterator to the end of the output sequence which has been written |
458 | /// | 458 | /// |
459 | //////////////////////////////////////////////////////////// | 459 | //////////////////////////////////////////////////////////// |
460 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint32 replacement = 0 ); | 460 | template <typename Out> static Out Encode( Uint32 input, Out output, Uint32 replacement = 0 ); |
461 | 461 | ||
462 | //////////////////////////////////////////////////////////// | 462 | //////////////////////////////////////////////////////////// |
463 | /// \brief Advance to the next UTF-32 character | 463 | /// \brief Advance to the next UTF-32 character |
464 | /// | 464 | /// |
465 | /// This function is trivial for UTF-32, which can store | 465 | /// This function is trivial for UTF-32, which can store |
466 | /// every character in a single storage element. | 466 | /// every character in a single storage element. |
467 | /// | 467 | /// |
468 | /// \param begin Iterator pointing to the beginning of the input sequence | 468 | /// \param begin Iterator pointing to the beginning of the input sequence |
469 | /// \param end Iterator pointing to the end of the input sequence | 469 | /// \param end Iterator pointing to the end of the input sequence |
470 | /// | 470 | /// |
471 | /// \return Iterator pointing to one past the last read element of the input sequence | 471 | /// \return Iterator pointing to one past the last read element of the input sequence |
472 | /// | 472 | /// |
473 | //////////////////////////////////////////////////////////// | 473 | //////////////////////////////////////////////////////////// |
474 | template <typename In> static In Next( In begin, In end ); | 474 | template <typename In> static In Next( In begin, In end ); |
475 | 475 | ||
476 | //////////////////////////////////////////////////////////// | 476 | //////////////////////////////////////////////////////////// |
477 | /// \brief Count the number of characters of a UTF-32 sequence | 477 | /// \brief Count the number of characters of a UTF-32 sequence |
478 | /// | 478 | /// |
479 | /// This function is trivial for UTF-32, which can store | 479 | /// This function is trivial for UTF-32, which can store |
480 | /// every character in a single storage element. | 480 | /// every character in a single storage element. |
481 | /// | 481 | /// |
482 | /// \param begin Iterator pointing to the beginning of the input sequence | 482 | /// \param begin Iterator pointing to the beginning of the input sequence |
483 | /// \param end Iterator pointing to the end of the input sequence | 483 | /// \param end Iterator pointing to the end of the input sequence |
484 | /// | 484 | /// |
485 | /// \return Iterator pointing to one past the last read element of the input sequence | 485 | /// \return Iterator pointing to one past the last read element of the input sequence |
486 | /// | 486 | /// |
487 | //////////////////////////////////////////////////////////// | 487 | //////////////////////////////////////////////////////////// |
488 | template <typename In> static std::size_t Count( In begin, In end ); | 488 | template <typename In> static std::size_t Count( In begin, In end ); |
489 | 489 | ||
490 | //////////////////////////////////////////////////////////// | 490 | //////////////////////////////////////////////////////////// |
491 | /// \brief Convert an ANSI characters range to UTF-32 | 491 | /// \brief Convert an ANSI characters range to UTF-32 |
492 | /// | 492 | /// |
493 | /// The current global locale will be used by default, unless you | 493 | /// The current global locale will be used by default, unless you |
494 | /// pass a custom one in the \a locale parameter. | 494 | /// pass a custom one in the \a locale parameter. |
495 | /// | 495 | /// |
496 | /// \param begin Iterator pointing to the beginning of the input sequence | 496 | /// \param begin Iterator pointing to the beginning of the input sequence |
497 | /// \param end Iterator pointing to the end of the input sequence | 497 | /// \param end Iterator pointing to the end of the input sequence |
498 | /// \param output Iterator pointing to the beginning of the output sequence | 498 | /// \param output Iterator pointing to the beginning of the output sequence |
499 | /// \param locale Locale to use for conversion | 499 | /// \param locale Locale to use for conversion |
500 | /// | 500 | /// |
501 | /// \return Iterator to the end of the output sequence which has been written | 501 | /// \return Iterator to the end of the output sequence which has been written |
502 | /// | 502 | /// |
503 | //////////////////////////////////////////////////////////// | 503 | //////////////////////////////////////////////////////////// |
504 | template <typename In, typename Out> | 504 | template <typename In, typename Out> |
505 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); | 505 | static Out FromAnsi( In begin, In end, Out output, const std::locale& locale = std::locale() ); |
506 | 506 | ||
507 | //////////////////////////////////////////////////////////// | 507 | //////////////////////////////////////////////////////////// |
508 | /// \brief Convert a wide characters range to UTF-32 | 508 | /// \brief Convert a wide characters range to UTF-32 |
509 | /// | 509 | /// |
510 | /// \param begin Iterator pointing to the beginning of the input sequence | 510 | /// \param begin Iterator pointing to the beginning of the input sequence |
511 | /// \param end Iterator pointing to the end of the input sequence | 511 | /// \param end Iterator pointing to the end of the input sequence |
512 | /// \param output Iterator pointing to the beginning of the output sequence | 512 | /// \param output Iterator pointing to the beginning of the output sequence |
513 | /// | 513 | /// |
514 | /// \return Iterator to the end of the output sequence which has been written | 514 | /// \return Iterator to the end of the output sequence which has been written |
515 | /// | 515 | /// |
516 | //////////////////////////////////////////////////////////// | 516 | //////////////////////////////////////////////////////////// |
517 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); | 517 | template <typename In, typename Out> static Out FromWide( In begin, In end, Out output ); |
518 | 518 | ||
519 | //////////////////////////////////////////////////////////// | 519 | //////////////////////////////////////////////////////////// |
520 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32 | 520 | /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32 |
521 | /// | 521 | /// |
522 | /// \param begin Iterator pointing to the beginning of the input sequence | 522 | /// \param begin Iterator pointing to the beginning of the input sequence |
523 | /// \param end Iterator pointing to the end of the input sequence | 523 | /// \param end Iterator pointing to the end of the input sequence |
524 | /// \param output Iterator pointing to the beginning of the output sequence | 524 | /// \param output Iterator pointing to the beginning of the output sequence |
525 | /// \param locale Locale to use for conversion | 525 | /// \param locale Locale to use for conversion |
526 | /// | 526 | /// |
527 | /// \return Iterator to the end of the output sequence which has been written | 527 | /// \return Iterator to the end of the output sequence which has been written |
528 | /// | 528 | /// |
529 | //////////////////////////////////////////////////////////// | 529 | //////////////////////////////////////////////////////////// |
530 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); | 530 | template <typename In, typename Out> static Out FromLatin1( In begin, In end, Out output ); |
531 | 531 | ||
532 | //////////////////////////////////////////////////////////// | 532 | //////////////////////////////////////////////////////////// |
533 | /// \brief Convert an UTF-32 characters range to ANSI characters | 533 | /// \brief Convert an UTF-32 characters range to ANSI characters |
534 | /// | 534 | /// |
535 | /// The current global locale will be used by default, unless you | 535 | /// The current global locale will be used by default, unless you |
536 | /// pass a custom one in the \a locale parameter. | 536 | /// pass a custom one in the \a locale parameter. |
537 | /// | 537 | /// |
538 | /// \param begin Iterator pointing to the beginning of the input sequence | 538 | /// \param begin Iterator pointing to the beginning of the input sequence |
539 | /// \param end Iterator pointing to the end of the input sequence | 539 | /// \param end Iterator pointing to the end of the input sequence |
540 | /// \param output Iterator pointing to the beginning of the output sequence | 540 | /// \param output Iterator pointing to the beginning of the output sequence |
541 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) | 541 | /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) |
542 | /// \param locale Locale to use for conversion | 542 | /// \param locale Locale to use for conversion |
543 | /// | 543 | /// |
544 | /// \return Iterator to the end of the output sequence which has been written | 544 | /// \return Iterator to the end of the output sequence which has been written |
545 | /// | 545 | /// |
546 | //////////////////////////////////////////////////////////// | 546 | //////////////////////////////////////////////////////////// |
547 | template <typename In, typename Out> | 547 | template <typename In, typename Out> |
548 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, | 548 | static Out ToAnsi( In begin, In end, Out output, char replacement = 0, |
549 | const std::locale& locale = std::locale() ); | 549 | const std::locale& locale = std::locale() ); |
550 | 550 | ||
551 | #ifndef EFSW_NO_WIDECHAR | 551 | #ifndef EFSW_NO_WIDECHAR |
552 | //////////////////////////////////////////////////////////// | 552 | //////////////////////////////////////////////////////////// |
553 | /// \brief Convert an UTF-32 characters range to wide characters | 553 | /// \brief Convert an UTF-32 characters range to wide characters |
554 | /// | 554 | /// |
555 | /// \param begin Iterator pointing to the beginning of the input sequence | 555 | /// \param begin Iterator pointing to the beginning of the input sequence |
556 | /// \param end Iterator pointing to the end of the input sequence | 556 | /// \param end Iterator pointing to the end of the input sequence |
557 | /// \param output Iterator pointing to the beginning of the output sequence | 557 | /// \param output Iterator pointing to the beginning of the output sequence |
558 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 558 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
559 | /// | 559 | /// |
560 | /// \return Iterator to the end of the output sequence which has been written | 560 | /// \return Iterator to the end of the output sequence which has been written |
561 | /// | 561 | /// |
562 | //////////////////////////////////////////////////////////// | 562 | //////////////////////////////////////////////////////////// |
563 | template <typename In, typename Out> | 563 | template <typename In, typename Out> |
564 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); | 564 | static Out ToWide( In begin, In end, Out output, wchar_t replacement = 0 ); |
565 | #endif | 565 | #endif |
566 | 566 | ||
567 | //////////////////////////////////////////////////////////// | 567 | //////////////////////////////////////////////////////////// |
568 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters | 568 | /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters |
569 | /// | 569 | /// |
570 | /// \param begin Iterator pointing to the beginning of the input sequence | 570 | /// \param begin Iterator pointing to the beginning of the input sequence |
571 | /// \param end Iterator pointing to the end of the input sequence | 571 | /// \param end Iterator pointing to the end of the input sequence |
572 | /// \param output Iterator pointing to the beginning of the output sequence | 572 | /// \param output Iterator pointing to the beginning of the output sequence |
573 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) | 573 | /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) |
574 | /// | 574 | /// |
575 | /// \return Iterator to the end of the output sequence which has been written | 575 | /// \return Iterator to the end of the output sequence which has been written |
576 | /// | 576 | /// |
577 | //////////////////////////////////////////////////////////// | 577 | //////////////////////////////////////////////////////////// |
578 | template <typename In, typename Out> | 578 | template <typename In, typename Out> |
579 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); | 579 | static Out ToLatin1( In begin, In end, Out output, char replacement = 0 ); |
580 | 580 | ||
581 | //////////////////////////////////////////////////////////// | 581 | //////////////////////////////////////////////////////////// |
582 | /// \brief Convert a UTF-32 characters range to UTF-8 | 582 | /// \brief Convert a UTF-32 characters range to UTF-8 |
583 | /// | 583 | /// |
584 | /// \param begin Iterator pointing to the beginning of the input sequence | 584 | /// \param begin Iterator pointing to the beginning of the input sequence |
585 | /// \param end Iterator pointing to the end of the input sequence | 585 | /// \param end Iterator pointing to the end of the input sequence |
586 | /// \param output Iterator pointing to the beginning of the output sequence | 586 | /// \param output Iterator pointing to the beginning of the output sequence |
587 | /// | 587 | /// |
588 | /// \return Iterator to the end of the output sequence which has been written | 588 | /// \return Iterator to the end of the output sequence which has been written |
589 | /// | 589 | /// |
590 | //////////////////////////////////////////////////////////// | 590 | //////////////////////////////////////////////////////////// |
591 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); | 591 | template <typename In, typename Out> static Out toUtf8( In begin, In end, Out output ); |
592 | 592 | ||
593 | //////////////////////////////////////////////////////////// | 593 | //////////////////////////////////////////////////////////// |
594 | /// \brief Convert a UTF-32 characters range to UTF-16 | 594 | /// \brief Convert a UTF-32 characters range to UTF-16 |
595 | /// | 595 | /// |
596 | /// \param begin Iterator pointing to the beginning of the input sequence | 596 | /// \param begin Iterator pointing to the beginning of the input sequence |
597 | /// \param end Iterator pointing to the end of the input sequence | 597 | /// \param end Iterator pointing to the end of the input sequence |
598 | /// \param output Iterator pointing to the beginning of the output sequence | 598 | /// \param output Iterator pointing to the beginning of the output sequence |
599 | /// | 599 | /// |
600 | /// \return Iterator to the end of the output sequence which has been written | 600 | /// \return Iterator to the end of the output sequence which has been written |
601 | /// | 601 | /// |
602 | //////////////////////////////////////////////////////////// | 602 | //////////////////////////////////////////////////////////// |
603 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); | 603 | template <typename In, typename Out> static Out ToUtf16( In begin, In end, Out output ); |
604 | 604 | ||
605 | //////////////////////////////////////////////////////////// | 605 | //////////////////////////////////////////////////////////// |
606 | /// \brief Convert a UTF-32 characters range to UTF-32 | 606 | /// \brief Convert a UTF-32 characters range to UTF-32 |
607 | /// | 607 | /// |
608 | /// This functions does nothing more than a direct copy; | 608 | /// This functions does nothing more than a direct copy; |
609 | /// it is defined only to provide the same interface as other | 609 | /// it is defined only to provide the same interface as other |
610 | /// specializations of the efsw::Utf<> template, and allow | 610 | /// specializations of the efsw::Utf<> template, and allow |
611 | /// generic code to be written on top of it. | 611 | /// generic code to be written on top of it. |
612 | /// | 612 | /// |
613 | /// \param begin Iterator pointing to the beginning of the input sequence | 613 | /// \param begin Iterator pointing to the beginning of the input sequence |
614 | /// \param end Iterator pointing to the end of the input sequence | 614 | /// \param end Iterator pointing to the end of the input sequence |
615 | /// \param output Iterator pointing to the beginning of the output sequence | 615 | /// \param output Iterator pointing to the beginning of the output sequence |
616 | /// | 616 | /// |
617 | /// \return Iterator to the end of the output sequence which has been written | 617 | /// \return Iterator to the end of the output sequence which has been written |
618 | /// | 618 | /// |
619 | //////////////////////////////////////////////////////////// | 619 | //////////////////////////////////////////////////////////// |
620 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); | 620 | template <typename In, typename Out> static Out ToUtf32( In begin, In end, Out output ); |
621 | 621 | ||
622 | //////////////////////////////////////////////////////////// | 622 | //////////////////////////////////////////////////////////// |
623 | /// \brief Decode a single ANSI character to UTF-32 | 623 | /// \brief Decode a single ANSI character to UTF-32 |
624 | /// | 624 | /// |
625 | /// This function does not exist in other specializations | 625 | /// This function does not exist in other specializations |
626 | /// of efsw::Utf<>, it is defined for convenience (it is used by | 626 | /// of efsw::Utf<>, it is defined for convenience (it is used by |
627 | /// several other conversion functions). | 627 | /// several other conversion functions). |
628 | /// | 628 | /// |
629 | /// \param input Input ANSI character | 629 | /// \param input Input ANSI character |
630 | /// \param locale Locale to use for conversion | 630 | /// \param locale Locale to use for conversion |
631 | /// | 631 | /// |
632 | /// \return Converted character | 632 | /// \return Converted character |
633 | /// | 633 | /// |
634 | //////////////////////////////////////////////////////////// | 634 | //////////////////////////////////////////////////////////// |
635 | template <typename In> | 635 | template <typename In> |
636 | static Uint32 DecodeAnsi( In input, const std::locale& locale = std::locale() ); | 636 | static Uint32 DecodeAnsi( In input, const std::locale& locale = std::locale() ); |
637 | 637 | ||
638 | //////////////////////////////////////////////////////////// | 638 | //////////////////////////////////////////////////////////// |
639 | /// \brief Decode a single wide character to UTF-32 | 639 | /// \brief Decode a single wide character to UTF-32 |
640 | /// | 640 | /// |
641 | /// This function does not exist in other specializations | 641 | /// This function does not exist in other specializations |
642 | /// of efsw::Utf<>, it is defined for convenience (it is used by | 642 | /// of efsw::Utf<>, it is defined for convenience (it is used by |
643 | /// several other conversion functions). | 643 | /// several other conversion functions). |
644 | /// | 644 | /// |
645 | /// \param input Input wide character | 645 | /// \param input Input wide character |
646 | /// | 646 | /// |
647 | /// \return Converted character | 647 | /// \return Converted character |
648 | /// | 648 | /// |
649 | //////////////////////////////////////////////////////////// | 649 | //////////////////////////////////////////////////////////// |
650 | template <typename In> static Uint32 DecodeWide( In input ); | 650 | template <typename In> static Uint32 DecodeWide( In input ); |
651 | 651 | ||
652 | //////////////////////////////////////////////////////////// | 652 | //////////////////////////////////////////////////////////// |
653 | /// \brief Encode a single UTF-32 character to ANSI | 653 | /// \brief Encode a single UTF-32 character to ANSI |
654 | /// | 654 | /// |
655 | /// This function does not exist in other specializations | 655 | /// This function does not exist in other specializations |
656 | /// of efsw::Utf<>, it is defined for convenience (it is used by | 656 | /// of efsw::Utf<>, it is defined for convenience (it is used by |
657 | /// several other conversion functions). | 657 | /// several other conversion functions). |
658 | /// | 658 | /// |
659 | /// \param codepoint Iterator pointing to the beginning of the input sequence | 659 | /// \param codepoint Iterator pointing to the beginning of the input sequence |
660 | /// \param output Iterator pointing to the beginning of the output sequence | 660 | /// \param output Iterator pointing to the beginning of the output sequence |
661 | /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to | 661 | /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to |
662 | /// skip it) \param locale Locale to use for conversion | 662 | /// skip it) \param locale Locale to use for conversion |
663 | /// | 663 | /// |
664 | /// \return Iterator to the end of the output sequence which has been written | 664 | /// \return Iterator to the end of the output sequence which has been written |
665 | /// | 665 | /// |
666 | //////////////////////////////////////////////////////////// | 666 | //////////////////////////////////////////////////////////// |
667 | template <typename Out> | 667 | template <typename Out> |
668 | static Out EncodeAnsi( Uint32 codepoint, Out output, char replacement = 0, | 668 | static Out EncodeAnsi( Uint32 codepoint, Out output, char replacement = 0, |
669 | const std::locale& locale = std::locale() ); | 669 | const std::locale& locale = std::locale() ); |
670 | 670 | ||
671 | #ifndef EFSW_NO_WIDECHAR | 671 | #ifndef EFSW_NO_WIDECHAR |
672 | //////////////////////////////////////////////////////////// | 672 | //////////////////////////////////////////////////////////// |
673 | /// \brief Encode a single UTF-32 character to wide | 673 | /// \brief Encode a single UTF-32 character to wide |
674 | /// | 674 | /// |
675 | /// This function does not exist in other specializations | 675 | /// This function does not exist in other specializations |
676 | /// of efsw::Utf<>, it is defined for convenience (it is used by | 676 | /// of efsw::Utf<>, it is defined for convenience (it is used by |
677 | /// several other conversion functions). | 677 | /// several other conversion functions). |
678 | /// | 678 | /// |
679 | /// \param codepoint Iterator pointing to the beginning of the input sequence | 679 | /// \param codepoint Iterator pointing to the beginning of the input sequence |
680 | /// \param output Iterator pointing to the beginning of the output sequence | 680 | /// \param output Iterator pointing to the beginning of the output sequence |
681 | /// \param replacement Replacement if the input character is not convertible to wide (use 0 to | 681 | /// \param replacement Replacement if the input character is not convertible to wide (use 0 to |
682 | /// skip it) | 682 | /// skip it) |
683 | /// | 683 | /// |
684 | /// \return Iterator to the end of the output sequence which has been written | 684 | /// \return Iterator to the end of the output sequence which has been written |
685 | /// | 685 | /// |
686 | //////////////////////////////////////////////////////////// | 686 | //////////////////////////////////////////////////////////// |
687 | template <typename Out> | 687 | template <typename Out> |
688 | static Out EncodeWide( Uint32 codepoint, Out output, wchar_t replacement = 0 ); | 688 | static Out EncodeWide( Uint32 codepoint, Out output, wchar_t replacement = 0 ); |
689 | #endif | 689 | #endif |
690 | }; | 690 | }; |
691 | 691 | ||
692 | #include "Utf.inl" | 692 | #include "Utf.inl" |
693 | 693 | ||
694 | // Make typedefs to get rid of the template syntax | 694 | // Make typedefs to get rid of the template syntax |
695 | typedef Utf<8> Utf8; | 695 | typedef Utf<8> Utf8; |
696 | typedef Utf<16> Utf16; | 696 | typedef Utf<16> Utf16; |
697 | typedef Utf<32> Utf32; | 697 | typedef Utf<32> Utf32; |
698 | 698 | ||
699 | } // namespace efsw | 699 | } // namespace efsw |
700 | #endif | 700 | #endif |
701 | 701 | ||
702 | //////////////////////////////////////////////////////////// | 702 | //////////////////////////////////////////////////////////// |
703 | /// \class efsw::Utf | 703 | /// \class efsw::Utf |
704 | /// \ingroup system | 704 | /// \ingroup system |
705 | /// | 705 | /// |
706 | /// Utility class providing generic functions for UTF conversions. | 706 | /// Utility class providing generic functions for UTF conversions. |
707 | /// | 707 | /// |
708 | /// efsw::Utf is a low-level, generic interface for counting, iterating, | 708 | /// efsw::Utf is a low-level, generic interface for counting, iterating, |
709 | /// encoding and decoding Unicode characters and strings. It is able | 709 | /// encoding and decoding Unicode characters and strings. It is able |
710 | /// to handle ANSI, wide, UTF-8, UTF-16 and UTF-32 encodings. | 710 | /// to handle ANSI, wide, UTF-8, UTF-16 and UTF-32 encodings. |
711 | /// | 711 | /// |
712 | /// efsw::Utf<X> functions are all static, these classes are not meant to | 712 | /// efsw::Utf<X> functions are all static, these classes are not meant to |
713 | /// be instanciated. All the functions are template, so that you | 713 | /// be instanciated. All the functions are template, so that you |
714 | /// can use any character / string type for a given encoding. | 714 | /// can use any character / string type for a given encoding. |
715 | /// | 715 | /// |
716 | /// It has 3 specializations: | 716 | /// It has 3 specializations: |
717 | /// \li efsw::Utf<8> (typedef'd to efsw::Utf8) | 717 | /// \li efsw::Utf<8> (typedef'd to efsw::Utf8) |
718 | /// \li efsw::Utf<16> (typedef'd to efsw::Utf16) | 718 | /// \li efsw::Utf<16> (typedef'd to efsw::Utf16) |
719 | /// \li efsw::Utf<32> (typedef'd to efsw::Utf32) | 719 | /// \li efsw::Utf<32> (typedef'd to efsw::Utf32) |
720 | /// | 720 | /// |
721 | //////////////////////////////////////////////////////////// | 721 | //////////////////////////////////////////////////////////// |