diff options
Diffstat (limited to 'miscutils/iconv.c')
-rw-r--r-- | miscutils/iconv.c | 1771 |
1 files changed, 1771 insertions, 0 deletions
diff --git a/miscutils/iconv.c b/miscutils/iconv.c new file mode 100644 index 000000000..bedbb718d --- /dev/null +++ b/miscutils/iconv.c | |||
@@ -0,0 +1,1771 @@ | |||
1 | /* | ||
2 | * iconv implementation using Win32 API to convert. | ||
3 | * | ||
4 | * This file is placed in the public domain. | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * This code was obtained from: | ||
9 | * | ||
10 | * https://github.com/win-iconv/win-iconv | ||
11 | * | ||
12 | * Modified for busybox-w32 by Ronald M Yorston. These modifications | ||
13 | * are also dedicated to the public domain. | ||
14 | */ | ||
15 | |||
16 | //config:config ICONV | ||
17 | //config: bool "iconv (11.4 kb)" | ||
18 | //config: default y | ||
19 | //config: depends on PLATFORM_MINGW32 | ||
20 | //config: help | ||
21 | //config: 'iconv' converts text between character encodings. | ||
22 | |||
23 | //applet:IF_ICONV(APPLET(iconv, BB_DIR_USR_BIN, BB_SUID_DROP)) | ||
24 | |||
25 | //kbuild:lib-$(CONFIG_ICONV) += iconv.o | ||
26 | |||
27 | //usage:#define iconv_trivial_usage | ||
28 | //usage: "[-lc] [-o outfile] [-f from-enc] [-t to-enc] [FILE]..." | ||
29 | //usage:#define iconv_full_usage "\n\n" | ||
30 | //usage: "Convert text between character encodings\n" | ||
31 | //usage: "\n -l List all known character encodings" | ||
32 | //usage: "\n -c Silently discard characters that cannot be converted" | ||
33 | //usage: "\n -o Use outfile for output" | ||
34 | //usage: "\n -f Use from-enc for input characters" | ||
35 | //usage: "\n -t Use to-enc for output characters" | ||
36 | |||
37 | #include "libbb.h" | ||
38 | |||
39 | /* WORKAROUND: */ | ||
40 | #define GetProcAddressA GetProcAddress | ||
41 | |||
42 | #define MB_CHAR_MAX 16 | ||
43 | |||
44 | #define UNICODE_MODE_BOM_DONE 1 | ||
45 | #define UNICODE_MODE_SWAPPED 2 | ||
46 | |||
47 | #define FLAG_USE_BOM 1 | ||
48 | #define FLAG_TRANSLIT 2 /* //TRANSLIT */ | ||
49 | #define FLAG_IGNORE 4 /* //IGNORE */ | ||
50 | |||
51 | typedef unsigned char uchar; | ||
52 | typedef unsigned short ushort; | ||
53 | typedef unsigned int uint; | ||
54 | |||
55 | typedef void* iconv_t; | ||
56 | |||
57 | static iconv_t iconv_open(const char *tocode, const char *fromcode); | ||
58 | static int iconv_close(iconv_t cd); | ||
59 | static size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
60 | |||
61 | typedef struct compat_t compat_t; | ||
62 | typedef struct csconv_t csconv_t; | ||
63 | typedef struct rec_iconv_t rec_iconv_t; | ||
64 | |||
65 | typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
66 | typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
67 | typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); | ||
68 | typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); | ||
69 | |||
70 | #define COMPAT_IN 1 | ||
71 | #define COMPAT_OUT 2 | ||
72 | |||
73 | /* unicode mapping for compatibility with other conversion table. */ | ||
74 | struct compat_t { | ||
75 | uint in; | ||
76 | uint out; | ||
77 | uint flag; | ||
78 | }; | ||
79 | |||
80 | struct csconv_t { | ||
81 | int codepage; | ||
82 | int flags; | ||
83 | f_mbtowc mbtowc; | ||
84 | f_wctomb wctomb; | ||
85 | f_mblen mblen; | ||
86 | f_flush flush; | ||
87 | DWORD mode; | ||
88 | compat_t *compat; | ||
89 | }; | ||
90 | |||
91 | struct rec_iconv_t { | ||
92 | iconv_t cd; | ||
93 | csconv_t from; | ||
94 | csconv_t to; | ||
95 | }; | ||
96 | |||
97 | static int load_mlang(void); | ||
98 | static int make_csconv(const char *name, csconv_t *cv); | ||
99 | static int name_to_codepage(const char *name); | ||
100 | static uint utf16_to_ucs4(const ushort *wbuf); | ||
101 | static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); | ||
102 | static int mbtowc_flags(int codepage); | ||
103 | static int must_use_null_useddefaultchar(int codepage); | ||
104 | static int seterror(int err); | ||
105 | |||
106 | static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
107 | static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
108 | static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
109 | static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
110 | static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
111 | |||
112 | static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
113 | static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
114 | static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
115 | static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
116 | static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
117 | static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
118 | static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
119 | static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
120 | static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
121 | static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
122 | static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); | ||
123 | |||
124 | #define CP_ALIAS_LIST \ | ||
125 | CP_ALIAS(65001, "CP65001") \ | ||
126 | CP_ALIAS(65001, "UTF8") \ | ||
127 | CP_ALIAS(65001, "UTF-8") \ | ||
128 | \ | ||
129 | CP_ALIAS(1200, "CP1200") \ | ||
130 | CP_ALIAS(1200, "UTF16LE") \ | ||
131 | CP_ALIAS(1200, "UTF-16LE") \ | ||
132 | CP_ALIAS(1200, "UCS2LE") \ | ||
133 | CP_ALIAS(1200, "UCS-2LE") \ | ||
134 | CP_ALIAS(1200, "UCS-2-INTERNAL") \ | ||
135 | \ | ||
136 | CP_ALIAS(1201, "CP1201") \ | ||
137 | CP_ALIAS(1201, "UTF16BE") \ | ||
138 | CP_ALIAS(1201, "UTF-16BE") \ | ||
139 | CP_ALIAS(1201, "UCS2BE") \ | ||
140 | CP_ALIAS(1201, "UCS-2BE") \ | ||
141 | CP_ALIAS(1201, "unicodeFFFE") \ | ||
142 | \ | ||
143 | CP_ALIAS(12000, "CP12000") \ | ||
144 | CP_ALIAS(12000, "UTF32LE") \ | ||
145 | CP_ALIAS(12000, "UTF-32LE") \ | ||
146 | CP_ALIAS(12000, "UCS4LE") \ | ||
147 | CP_ALIAS(12000, "UCS-4LE") \ | ||
148 | \ | ||
149 | CP_ALIAS(12001, "CP12001") \ | ||
150 | CP_ALIAS(12001, "UTF32BE") \ | ||
151 | CP_ALIAS(12001, "UTF-32BE") \ | ||
152 | CP_ALIAS(12001, "UCS4BE") \ | ||
153 | CP_ALIAS(12001, "UCS-4BE") \ | ||
154 | \ | ||
155 | /* Default is little endian, because the platform is */ \ | ||
156 | CP_ALIAS(1200, "UTF16") \ | ||
157 | CP_ALIAS(1200, "UTF-16") \ | ||
158 | CP_ALIAS(1200, "UCS2") \ | ||
159 | CP_ALIAS(1200, "UCS-2") \ | ||
160 | CP_ALIAS(12000, "UTF32") \ | ||
161 | CP_ALIAS(12000, "UTF-32") \ | ||
162 | CP_ALIAS(12000, "UCS4") \ | ||
163 | CP_ALIAS(12000, "UCS-4") \ | ||
164 | \ | ||
165 | /* copy from libiconv `iconv -l` */ \ | ||
166 | /* !IsValidCodePage(367) */ \ | ||
167 | CP_ALIAS(20127, "ANSI_X3.4-1968") \ | ||
168 | CP_ALIAS(20127, "ANSI_X3.4-1986") \ | ||
169 | CP_ALIAS(20127, "ASCII") \ | ||
170 | CP_ALIAS(20127, "CP367") \ | ||
171 | CP_ALIAS(20127, "IBM367") \ | ||
172 | CP_ALIAS(20127, "ISO-IR-6") \ | ||
173 | CP_ALIAS(20127, "ISO646-US") \ | ||
174 | CP_ALIAS(20127, "ISO_646.IRV:1991") \ | ||
175 | CP_ALIAS(20127, "US") \ | ||
176 | CP_ALIAS(20127, "US-ASCII") \ | ||
177 | CP_ALIAS(20127, "CSASCII") \ | ||
178 | \ | ||
179 | /* !IsValidCodePage(819) */ \ | ||
180 | CP_ALIAS(1252, "CP819") \ | ||
181 | CP_ALIAS(1252, "IBM819") \ | ||
182 | CP_ALIAS(28591, "ISO-8859-1") \ | ||
183 | CP_ALIAS(28591, "ISO-IR-100") \ | ||
184 | CP_ALIAS(28591, "ISO8859-1") \ | ||
185 | CP_ALIAS(28591, "ISO_8859-1") \ | ||
186 | CP_ALIAS(28591, "ISO_8859-1:1987") \ | ||
187 | CP_ALIAS(28591, "L1") \ | ||
188 | CP_ALIAS(28591, "LATIN1") \ | ||
189 | CP_ALIAS(28591, "CSISOLATIN1") \ | ||
190 | \ | ||
191 | CP_ALIAS(1250, "CP1250") \ | ||
192 | CP_ALIAS(1250, "MS-EE") \ | ||
193 | CP_ALIAS(1250, "WINDOWS-1250") \ | ||
194 | \ | ||
195 | CP_ALIAS(1251, "CP1251") \ | ||
196 | CP_ALIAS(1251, "MS-CYRL") \ | ||
197 | CP_ALIAS(1251, "WINDOWS-1251") \ | ||
198 | \ | ||
199 | CP_ALIAS(1252, "CP1252") \ | ||
200 | CP_ALIAS(1252, "MS-ANSI") \ | ||
201 | CP_ALIAS(1252, "WINDOWS-1252") \ | ||
202 | \ | ||
203 | CP_ALIAS(1253, "CP1253") \ | ||
204 | CP_ALIAS(1253, "MS-GREEK") \ | ||
205 | CP_ALIAS(1253, "WINDOWS-1253") \ | ||
206 | \ | ||
207 | CP_ALIAS(1254, "CP1254") \ | ||
208 | CP_ALIAS(1254, "MS-TURK") \ | ||
209 | CP_ALIAS(1254, "WINDOWS-1254") \ | ||
210 | \ | ||
211 | CP_ALIAS(1255, "CP1255") \ | ||
212 | CP_ALIAS(1255, "MS-HEBR") \ | ||
213 | CP_ALIAS(1255, "WINDOWS-1255") \ | ||
214 | \ | ||
215 | CP_ALIAS(1256, "CP1256") \ | ||
216 | CP_ALIAS(1256, "MS-ARAB") \ | ||
217 | CP_ALIAS(1256, "WINDOWS-1256") \ | ||
218 | \ | ||
219 | CP_ALIAS(1257, "CP1257") \ | ||
220 | CP_ALIAS(1257, "WINBALTRIM") \ | ||
221 | CP_ALIAS(1257, "WINDOWS-1257") \ | ||
222 | \ | ||
223 | CP_ALIAS(1258, "CP1258") \ | ||
224 | CP_ALIAS(1258, "WINDOWS-1258") \ | ||
225 | \ | ||
226 | CP_ALIAS(850, "850") \ | ||
227 | CP_ALIAS(850, "CP850") \ | ||
228 | CP_ALIAS(850, "IBM850") \ | ||
229 | CP_ALIAS(850, "CSPC850MULTILINGUAL") \ | ||
230 | \ | ||
231 | /* !IsValidCodePage(862) */ \ | ||
232 | CP_ALIAS(862, "862") \ | ||
233 | CP_ALIAS(862, "CP862") \ | ||
234 | CP_ALIAS(862, "IBM862") \ | ||
235 | CP_ALIAS(862, "CSPC862LATINHEBREW") \ | ||
236 | \ | ||
237 | CP_ALIAS(866, "866") \ | ||
238 | CP_ALIAS(866, "CP866") \ | ||
239 | CP_ALIAS(866, "IBM866") \ | ||
240 | CP_ALIAS(866, "CSIBM866") \ | ||
241 | \ | ||
242 | /* !IsValidCodePage(154) */ \ | ||
243 | CP_ALIAS(154, "CP154") \ | ||
244 | CP_ALIAS(154, "CYRILLIC-ASIAN") \ | ||
245 | CP_ALIAS(154, "PT154") \ | ||
246 | CP_ALIAS(154, "PTCP154") \ | ||
247 | CP_ALIAS(154, "CSPTCP154") \ | ||
248 | \ | ||
249 | /* !IsValidCodePage(1133) */ \ | ||
250 | CP_ALIAS(1133, "CP1133") \ | ||
251 | CP_ALIAS(1133, "IBM-CP1133") \ | ||
252 | \ | ||
253 | CP_ALIAS(874, "CP874") \ | ||
254 | CP_ALIAS(874, "WINDOWS-874") \ | ||
255 | \ | ||
256 | /* !IsValidCodePage(51932) */ \ | ||
257 | CP_ALIAS(51932, "CP51932") \ | ||
258 | CP_ALIAS(51932, "MS51932") \ | ||
259 | CP_ALIAS(51932, "WINDOWS-51932") \ | ||
260 | CP_ALIAS(51932, "EUC-JP") \ | ||
261 | \ | ||
262 | CP_ALIAS(932, "CP932") \ | ||
263 | CP_ALIAS(932, "MS932") \ | ||
264 | CP_ALIAS(932, "SHIFFT_JIS") \ | ||
265 | CP_ALIAS(932, "SHIFFT_JIS-MS") \ | ||
266 | CP_ALIAS(932, "SJIS") \ | ||
267 | CP_ALIAS(932, "SJIS-MS") \ | ||
268 | CP_ALIAS(932, "SJIS-OPEN") \ | ||
269 | CP_ALIAS(932, "SJIS-WIN") \ | ||
270 | CP_ALIAS(932, "WINDOWS-31J") \ | ||
271 | CP_ALIAS(932, "WINDOWS-932") \ | ||
272 | CP_ALIAS(932, "CSWINDOWS31J") \ | ||
273 | \ | ||
274 | CP_ALIAS(50221, "CP50221") \ | ||
275 | CP_ALIAS(50221, "ISO-2022-JP") \ | ||
276 | CP_ALIAS(50221, "ISO-2022-JP-MS") \ | ||
277 | CP_ALIAS(50221, "ISO2022-JP") \ | ||
278 | CP_ALIAS(50221, "ISO2022-JP-MS") \ | ||
279 | CP_ALIAS(50221, "MS50221") \ | ||
280 | CP_ALIAS(50221, "WINDOWS-50221") \ | ||
281 | \ | ||
282 | CP_ALIAS(936, "CP936") \ | ||
283 | CP_ALIAS(936, "GBK") \ | ||
284 | CP_ALIAS(936, "MS936") \ | ||
285 | CP_ALIAS(936, "WINDOWS-936") \ | ||
286 | \ | ||
287 | CP_ALIAS(950, "CP950") \ | ||
288 | CP_ALIAS(950, "BIG5") \ | ||
289 | CP_ALIAS(950, "BIG5HKSCS") \ | ||
290 | CP_ALIAS(950, "BIG5-HKSCS") \ | ||
291 | \ | ||
292 | CP_ALIAS(949, "CP949") \ | ||
293 | CP_ALIAS(949, "UHC") \ | ||
294 | CP_ALIAS(949, "EUC-KR") \ | ||
295 | \ | ||
296 | CP_ALIAS(1361, "CP1361") \ | ||
297 | CP_ALIAS(1361, "JOHAB") \ | ||
298 | \ | ||
299 | CP_ALIAS(437, "437") \ | ||
300 | CP_ALIAS(437, "CP437") \ | ||
301 | CP_ALIAS(437, "IBM437") \ | ||
302 | CP_ALIAS(437, "CSPC8CODEPAGE437") \ | ||
303 | \ | ||
304 | CP_ALIAS(737, "CP737") \ | ||
305 | \ | ||
306 | CP_ALIAS(775, "CP775") \ | ||
307 | CP_ALIAS(775, "IBM775") \ | ||
308 | CP_ALIAS(775, "CSPC775BALTIC") \ | ||
309 | \ | ||
310 | CP_ALIAS(852, "852") \ | ||
311 | CP_ALIAS(852, "CP852") \ | ||
312 | CP_ALIAS(852, "IBM852") \ | ||
313 | CP_ALIAS(852, "CSPCP852") \ | ||
314 | \ | ||
315 | /* !IsValidCodePage(853) */ \ | ||
316 | CP_ALIAS(853, "CP853") \ | ||
317 | \ | ||
318 | CP_ALIAS(855, "855") \ | ||
319 | CP_ALIAS(855, "CP855") \ | ||
320 | CP_ALIAS(855, "IBM855") \ | ||
321 | CP_ALIAS(855, "CSIBM855") \ | ||
322 | \ | ||
323 | CP_ALIAS(857, "857") \ | ||
324 | CP_ALIAS(857, "CP857") \ | ||
325 | CP_ALIAS(857, "IBM857") \ | ||
326 | CP_ALIAS(857, "CSIBM857") \ | ||
327 | \ | ||
328 | /* !IsValidCodePage(858) */ \ | ||
329 | CP_ALIAS(858, "CP858") \ | ||
330 | \ | ||
331 | CP_ALIAS(860, "860") \ | ||
332 | CP_ALIAS(860, "CP860") \ | ||
333 | CP_ALIAS(860, "IBM860") \ | ||
334 | CP_ALIAS(860, "CSIBM860") \ | ||
335 | \ | ||
336 | CP_ALIAS(861, "861") \ | ||
337 | CP_ALIAS(861, "CP-IS") \ | ||
338 | CP_ALIAS(861, "CP861") \ | ||
339 | CP_ALIAS(861, "IBM861") \ | ||
340 | CP_ALIAS(861, "CSIBM861") \ | ||
341 | \ | ||
342 | CP_ALIAS(863, "863") \ | ||
343 | CP_ALIAS(863, "CP863") \ | ||
344 | CP_ALIAS(863, "IBM863") \ | ||
345 | CP_ALIAS(863, "CSIBM863") \ | ||
346 | \ | ||
347 | CP_ALIAS(864, "CP864") \ | ||
348 | CP_ALIAS(864, "IBM864") \ | ||
349 | CP_ALIAS(864, "CSIBM864") \ | ||
350 | \ | ||
351 | CP_ALIAS(865, "865") \ | ||
352 | CP_ALIAS(865, "CP865") \ | ||
353 | CP_ALIAS(865, "IBM865") \ | ||
354 | CP_ALIAS(865, "CSIBM865") \ | ||
355 | \ | ||
356 | CP_ALIAS(869, "869") \ | ||
357 | CP_ALIAS(869, "CP-GR") \ | ||
358 | CP_ALIAS(869, "CP869") \ | ||
359 | CP_ALIAS(869, "IBM869") \ | ||
360 | CP_ALIAS(869, "CSIBM869") \ | ||
361 | \ | ||
362 | /* !IsValidCodePage(1152) */ \ | ||
363 | CP_ALIAS(1125, "CP1125") \ | ||
364 | \ | ||
365 | /* \ | ||
366 | * Code Page Identifiers \ | ||
367 | * http://msdn2.microsoft.com/en-us/library/ms776446.aspx \ | ||
368 | */ \ | ||
369 | CP_ALIAS(37, "IBM037") /* IBM EBCDIC US-Canada */ \ | ||
370 | CP_ALIAS(437, "IBM437") /* OEM United States */ \ | ||
371 | CP_ALIAS(500, "IBM500") /* IBM EBCDIC International */ \ | ||
372 | CP_ALIAS(708, "ASMO-708") /* Arabic (ASMO 708) */ \ | ||
373 | /* 709 Arabic (ASMO-449+, BCON V4) */ \ | ||
374 | /* 710 Arabic - Transparent Arabic */ \ | ||
375 | CP_ALIAS(720, "DOS-720") /* Arabic (Transparent ASMO); Arabic (DOS) */ \ | ||
376 | CP_ALIAS(737, "ibm737") /* OEM Greek (formerly 437G); Greek (DOS) */ \ | ||
377 | CP_ALIAS(775, "ibm775") /* OEM Baltic; Baltic (DOS) */ \ | ||
378 | CP_ALIAS(850, "ibm850") /* OEM Multilingual Latin 1; Western European (DOS) */ \ | ||
379 | CP_ALIAS(852, "ibm852") /* OEM Latin 2; Central European (DOS) */ \ | ||
380 | CP_ALIAS(855, "IBM855") /* OEM Cyrillic (primarily Russian) */ \ | ||
381 | CP_ALIAS(857, "ibm857") /* OEM Turkish; Turkish (DOS) */ \ | ||
382 | CP_ALIAS(858, "IBM00858") /* OEM Multilingual Latin 1 + Euro symbol */ \ | ||
383 | CP_ALIAS(860, "IBM860") /* OEM Portuguese; Portuguese (DOS) */ \ | ||
384 | CP_ALIAS(861, "ibm861") /* OEM Icelandic; Icelandic (DOS) */ \ | ||
385 | CP_ALIAS(862, "DOS-862") /* OEM Hebrew; Hebrew (DOS) */ \ | ||
386 | CP_ALIAS(863, "IBM863") /* OEM French Canadian; French Canadian (DOS) */ \ | ||
387 | CP_ALIAS(864, "IBM864") /* OEM Arabic; Arabic (864) */ \ | ||
388 | CP_ALIAS(865, "IBM865") /* OEM Nordic; Nordic (DOS) */ \ | ||
389 | CP_ALIAS(866, "cp866") /* OEM Russian; Cyrillic (DOS) */ \ | ||
390 | CP_ALIAS(869, "ibm869") /* OEM Modern Greek; Greek, Modern (DOS) */ \ | ||
391 | CP_ALIAS(870, "IBM870") /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ \ | ||
392 | CP_ALIAS(874, "windows-874") /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ \ | ||
393 | CP_ALIAS(875, "cp875") /* IBM EBCDIC Greek Modern */ \ | ||
394 | CP_ALIAS(932, "shift_jis") /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ \ | ||
395 | CP_ALIAS(932, "shift-jis") /* alternative name for it */ \ | ||
396 | CP_ALIAS(936, "gb2312") /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ \ | ||
397 | CP_ALIAS(949, "ks_c_5601-1987") /* ANSI/OEM Korean (Unified Hangul Code) */ \ | ||
398 | CP_ALIAS(950, "big5") /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ \ | ||
399 | CP_ALIAS(950, "big5hkscs") /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ \ | ||
400 | CP_ALIAS(950, "big5-hkscs") /* alternative name for it */ \ | ||
401 | CP_ALIAS(1026, "IBM1026") /* IBM EBCDIC Turkish (Latin 5) */ \ | ||
402 | CP_ALIAS(1047, "IBM01047") /* IBM EBCDIC Latin 1/Open System */ \ | ||
403 | CP_ALIAS(1140, "IBM01140") /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ \ | ||
404 | CP_ALIAS(1141, "IBM01141") /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ \ | ||
405 | CP_ALIAS(1142, "IBM01142") /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ \ | ||
406 | CP_ALIAS(1143, "IBM01143") /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ \ | ||
407 | CP_ALIAS(1144, "IBM01144") /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ \ | ||
408 | CP_ALIAS(1145, "IBM01145") /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ \ | ||
409 | CP_ALIAS(1146, "IBM01146") /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ \ | ||
410 | CP_ALIAS(1147, "IBM01147") /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ \ | ||
411 | CP_ALIAS(1148, "IBM01148") /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ \ | ||
412 | CP_ALIAS(1149, "IBM01149") /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ \ | ||
413 | CP_ALIAS(1250, "windows-1250") /* ANSI Central European; Central European (Windows) */ \ | ||
414 | CP_ALIAS(1251, "windows-1251") /* ANSI Cyrillic; Cyrillic (Windows) */ \ | ||
415 | CP_ALIAS(1252, "windows-1252") /* ANSI Latin 1; Western European (Windows) */ \ | ||
416 | CP_ALIAS(1253, "windows-1253") /* ANSI Greek; Greek (Windows) */ \ | ||
417 | CP_ALIAS(1254, "windows-1254") /* ANSI Turkish; Turkish (Windows) */ \ | ||
418 | CP_ALIAS(1255, "windows-1255") /* ANSI Hebrew; Hebrew (Windows) */ \ | ||
419 | CP_ALIAS(1256, "windows-1256") /* ANSI Arabic; Arabic (Windows) */ \ | ||
420 | CP_ALIAS(1257, "windows-1257") /* ANSI Baltic; Baltic (Windows) */ \ | ||
421 | CP_ALIAS(1258, "windows-1258") /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ \ | ||
422 | CP_ALIAS(1361, "Johab") /* Korean (Johab) */ \ | ||
423 | CP_ALIAS(10000, "macintosh") /* MAC Roman; Western European (Mac) */ \ | ||
424 | CP_ALIAS(10001, "x-mac-japanese") /* Japanese (Mac) */ \ | ||
425 | CP_ALIAS(10002, "x-mac-chinesetrad") /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ \ | ||
426 | CP_ALIAS(10003, "x-mac-korean") /* Korean (Mac) */ \ | ||
427 | CP_ALIAS(10004, "x-mac-arabic") /* Arabic (Mac) */ \ | ||
428 | CP_ALIAS(10005, "x-mac-hebrew") /* Hebrew (Mac) */ \ | ||
429 | CP_ALIAS(10006, "x-mac-greek") /* Greek (Mac) */ \ | ||
430 | CP_ALIAS(10007, "x-mac-cyrillic") /* Cyrillic (Mac) */ \ | ||
431 | CP_ALIAS(10008, "x-mac-chinesesimp") /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ \ | ||
432 | CP_ALIAS(10010, "x-mac-romanian") /* Romanian (Mac) */ \ | ||
433 | CP_ALIAS(10017, "x-mac-ukrainian") /* Ukrainian (Mac) */ \ | ||
434 | CP_ALIAS(10021, "x-mac-thai") /* Thai (Mac) */ \ | ||
435 | CP_ALIAS(10029, "x-mac-ce") /* MAC Latin 2; Central European (Mac) */ \ | ||
436 | CP_ALIAS(10079, "x-mac-icelandic") /* Icelandic (Mac) */ \ | ||
437 | CP_ALIAS(10081, "x-mac-turkish") /* Turkish (Mac) */ \ | ||
438 | CP_ALIAS(10082, "x-mac-croatian") /* Croatian (Mac) */ \ | ||
439 | CP_ALIAS(20000, "x-Chinese_CNS") /* CNS Taiwan; Chinese Traditional (CNS) */ \ | ||
440 | CP_ALIAS(20001, "x-cp20001") /* TCA Taiwan */ \ | ||
441 | CP_ALIAS(20002, "x_Chinese-Eten") /* Eten Taiwan; Chinese Traditional (Eten) */ \ | ||
442 | CP_ALIAS(20003, "x-cp20003") /* IBM5550 Taiwan */ \ | ||
443 | CP_ALIAS(20004, "x-cp20004") /* TeleText Taiwan */ \ | ||
444 | CP_ALIAS(20005, "x-cp20005") /* Wang Taiwan */ \ | ||
445 | CP_ALIAS(20105, "x-IA5") /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ \ | ||
446 | CP_ALIAS(20106, "x-IA5-German") /* IA5 German (7-bit) */ \ | ||
447 | CP_ALIAS(20107, "x-IA5-Swedish") /* IA5 Swedish (7-bit) */ \ | ||
448 | CP_ALIAS(20108, "x-IA5-Norwegian") /* IA5 Norwegian (7-bit) */ \ | ||
449 | CP_ALIAS(20127, "us-ascii") /* US-ASCII (7-bit) */ \ | ||
450 | CP_ALIAS(20261, "x-cp20261") /* T.61 */ \ | ||
451 | CP_ALIAS(20269, "x-cp20269") /* ISO 6937 Non-Spacing Accent */ \ | ||
452 | CP_ALIAS(20273, "IBM273") /* IBM EBCDIC Germany */ \ | ||
453 | CP_ALIAS(20277, "IBM277") /* IBM EBCDIC Denmark-Norway */ \ | ||
454 | CP_ALIAS(20278, "IBM278") /* IBM EBCDIC Finland-Sweden */ \ | ||
455 | CP_ALIAS(20280, "IBM280") /* IBM EBCDIC Italy */ \ | ||
456 | CP_ALIAS(20284, "IBM284") /* IBM EBCDIC Latin America-Spain */ \ | ||
457 | CP_ALIAS(20285, "IBM285") /* IBM EBCDIC United Kingdom */ \ | ||
458 | CP_ALIAS(20290, "IBM290") /* IBM EBCDIC Japanese Katakana Extended */ \ | ||
459 | CP_ALIAS(20297, "IBM297") /* IBM EBCDIC France */ \ | ||
460 | CP_ALIAS(20420, "IBM420") /* IBM EBCDIC Arabic */ \ | ||
461 | CP_ALIAS(20423, "IBM423") /* IBM EBCDIC Greek */ \ | ||
462 | CP_ALIAS(20424, "IBM424") /* IBM EBCDIC Hebrew */ \ | ||
463 | CP_ALIAS(20833, "x-EBCDIC-KoreanExtended") /* IBM EBCDIC Korean Extended */ \ | ||
464 | CP_ALIAS(20838, "IBM-Thai") /* IBM EBCDIC Thai */ \ | ||
465 | CP_ALIAS(20866, "koi8-r") /* Russian (KOI8-R); Cyrillic (KOI8-R) */ \ | ||
466 | CP_ALIAS(20871, "IBM871") /* IBM EBCDIC Icelandic */ \ | ||
467 | CP_ALIAS(20880, "IBM880") /* IBM EBCDIC Cyrillic Russian */ \ | ||
468 | CP_ALIAS(20905, "IBM905") /* IBM EBCDIC Turkish */ \ | ||
469 | CP_ALIAS(20924, "IBM00924") /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ \ | ||
470 | CP_ALIAS(20932, "EUC-JP") /* Japanese (JIS 0208-1990 and 0121-1990) */ \ | ||
471 | CP_ALIAS(20936, "x-cp20936") /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ \ | ||
472 | CP_ALIAS(20949, "x-cp20949") /* Korean Wansung */ \ | ||
473 | CP_ALIAS(21025, "cp1025") /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ \ | ||
474 | /* 21027 (deprecated) */ \ | ||
475 | CP_ALIAS(21866, "koi8-u") /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ \ | ||
476 | CP_ALIAS(28591, "iso-8859-1") /* ISO 8859-1 Latin 1; Western European (ISO) */ \ | ||
477 | CP_ALIAS(28591, "iso8859-1") /* ISO 8859-1 Latin 1; Western European (ISO) */ \ | ||
478 | CP_ALIAS(28591, "iso_8859-1") \ | ||
479 | CP_ALIAS(28591, "iso_8859_1") \ | ||
480 | CP_ALIAS(28592, "iso-8859-2") /* ISO 8859-2 Central European; Central European (ISO) */ \ | ||
481 | CP_ALIAS(28592, "iso8859-2") /* ISO 8859-2 Central European; Central European (ISO) */ \ | ||
482 | CP_ALIAS(28592, "iso_8859-2") \ | ||
483 | CP_ALIAS(28592, "iso_8859_2") \ | ||
484 | CP_ALIAS(28593, "iso-8859-3") /* ISO 8859-3 Latin 3 */ \ | ||
485 | CP_ALIAS(28593, "iso8859-3") /* ISO 8859-3 Latin 3 */ \ | ||
486 | CP_ALIAS(28593, "iso_8859-3") \ | ||
487 | CP_ALIAS(28593, "iso_8859_3") \ | ||
488 | CP_ALIAS(28594, "iso-8859-4") /* ISO 8859-4 Baltic */ \ | ||
489 | CP_ALIAS(28594, "iso8859-4") /* ISO 8859-4 Baltic */ \ | ||
490 | CP_ALIAS(28594, "iso_8859-4") \ | ||
491 | CP_ALIAS(28594, "iso_8859_4") \ | ||
492 | CP_ALIAS(28595, "iso-8859-5") /* ISO 8859-5 Cyrillic */ \ | ||
493 | CP_ALIAS(28595, "iso8859-5") /* ISO 8859-5 Cyrillic */ \ | ||
494 | CP_ALIAS(28595, "iso_8859-5") \ | ||
495 | CP_ALIAS(28595, "iso_8859_5") \ | ||
496 | CP_ALIAS(28596, "iso-8859-6") /* ISO 8859-6 Arabic */ \ | ||
497 | CP_ALIAS(28596, "iso8859-6") /* ISO 8859-6 Arabic */ \ | ||
498 | CP_ALIAS(28596, "iso_8859-6") \ | ||
499 | CP_ALIAS(28596, "iso_8859_6") \ | ||
500 | CP_ALIAS(28597, "iso-8859-7") /* ISO 8859-7 Greek */ \ | ||
501 | CP_ALIAS(28597, "iso8859-7") /* ISO 8859-7 Greek */ \ | ||
502 | CP_ALIAS(28597, "iso_8859-7") \ | ||
503 | CP_ALIAS(28597, "iso_8859_7") \ | ||
504 | CP_ALIAS(28598, "iso-8859-8") /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ \ | ||
505 | CP_ALIAS(28598, "iso8859-8") /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ \ | ||
506 | CP_ALIAS(28598, "iso_8859-8") \ | ||
507 | CP_ALIAS(28598, "iso_8859_8") \ | ||
508 | CP_ALIAS(28599, "iso-8859-9") /* ISO 8859-9 Turkish */ \ | ||
509 | CP_ALIAS(28599, "iso8859-9") /* ISO 8859-9 Turkish */ \ | ||
510 | CP_ALIAS(28599, "iso_8859-9") \ | ||
511 | CP_ALIAS(28599, "iso_8859_9") \ | ||
512 | CP_ALIAS(28603, "iso-8859-13") /* ISO 8859-13 Estonian */ \ | ||
513 | CP_ALIAS(28603, "iso8859-13") /* ISO 8859-13 Estonian */ \ | ||
514 | CP_ALIAS(28603, "iso_8859-13") \ | ||
515 | CP_ALIAS(28603, "iso_8859_13") \ | ||
516 | CP_ALIAS(28605, "iso-8859-15") /* ISO 8859-15 Latin 9 */ \ | ||
517 | CP_ALIAS(28605, "iso8859-15") /* ISO 8859-15 Latin 9 */ \ | ||
518 | CP_ALIAS(28605, "iso_8859-15") \ | ||
519 | CP_ALIAS(28605, "iso_8859_15") \ | ||
520 | CP_ALIAS(29001, "x-Europa") /* Europa 3 */ \ | ||
521 | CP_ALIAS(38598, "iso-8859-8-i") /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ \ | ||
522 | CP_ALIAS(38598, "iso8859-8-i") /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ \ | ||
523 | CP_ALIAS(38598, "iso_8859-8-i") \ | ||
524 | CP_ALIAS(38598, "iso_8859_8-i") \ | ||
525 | CP_ALIAS(50220, "iso-2022-jp") /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ \ | ||
526 | CP_ALIAS(50221, "csISO2022JP") /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ \ | ||
527 | CP_ALIAS(50222, "iso-2022-jp") /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ \ | ||
528 | CP_ALIAS(50225, "iso-2022-kr") /* ISO 2022 Korean */ \ | ||
529 | CP_ALIAS(50225, "iso2022-kr") /* ISO 2022 Korean */ \ | ||
530 | CP_ALIAS(50227, "x-cp50227") /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ \ | ||
531 | /* 50229 ISO 2022 Traditional Chinese */ \ | ||
532 | /* 50930 EBCDIC Japanese (Katakana) Extended */ \ | ||
533 | /* 50931 EBCDIC US-Canada and Japanese */ \ | ||
534 | /* 50933 EBCDIC Korean Extended and Korean */ \ | ||
535 | /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ \ | ||
536 | /* 50936 EBCDIC Simplified Chinese */ \ | ||
537 | /* 50937 EBCDIC US-Canada and Traditional Chinese */ \ | ||
538 | /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ \ | ||
539 | CP_ALIAS(51932, "euc-jp") /* EUC Japanese */ \ | ||
540 | CP_ALIAS(51936, "EUC-CN") /* EUC Simplified Chinese; Chinese Simplified (EUC) */ \ | ||
541 | CP_ALIAS(51949, "euc-kr") /* EUC Korean */ \ | ||
542 | /* 51950 EUC Traditional Chinese */ \ | ||
543 | CP_ALIAS(52936, "hz-gb-2312") /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ \ | ||
544 | CP_ALIAS(54936, "GB18030") /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ \ | ||
545 | CP_ALIAS(57002, "x-iscii-de") /* ISCII Devanagari */ \ | ||
546 | CP_ALIAS(57003, "x-iscii-be") /* ISCII Bengali */ \ | ||
547 | CP_ALIAS(57004, "x-iscii-ta") /* ISCII Tamil */ \ | ||
548 | CP_ALIAS(57005, "x-iscii-te") /* ISCII Telugu */ \ | ||
549 | CP_ALIAS(57006, "x-iscii-as") /* ISCII Assamese */ \ | ||
550 | CP_ALIAS(57007, "x-iscii-or") /* ISCII Oriya */ \ | ||
551 | CP_ALIAS(57008, "x-iscii-ka") /* ISCII Kannada */ \ | ||
552 | CP_ALIAS(57009, "x-iscii-ma") /* ISCII Malayalam */ \ | ||
553 | CP_ALIAS(57010, "x-iscii-gu") /* ISCII Gujarati */ \ | ||
554 | CP_ALIAS(57011, "x-iscii-pa") /* ISCII Punjabi */ | ||
555 | |||
556 | #define CP_ALIAS(codepage, alias) codepage, | ||
557 | static const int cp_codepage[] = { | ||
558 | CP_ALIAS_LIST | ||
559 | }; | ||
560 | #undef CP_ALIAS | ||
561 | |||
562 | #define CP_ALIAS(codepage, alias) alias"\0" | ||
563 | static const char cp_alias[] ALIGN1 = | ||
564 | CP_ALIAS_LIST; | ||
565 | #undef CP_ALIAS | ||
566 | |||
567 | /* | ||
568 | * SJIS SHIFTJIS table CP932 table | ||
569 | * ---- --------------------------- -------------------------------- | ||
570 | * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS | ||
571 | * 7E U+203E OVERLINE U+007E TILDE | ||
572 | * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR | ||
573 | * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS | ||
574 | * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE | ||
575 | * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO | ||
576 | * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS | ||
577 | * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN | ||
578 | * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN | ||
579 | * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN | ||
580 | * | ||
581 | * EUC-JP and ISO-2022-JP should be compatible with CP932. | ||
582 | * | ||
583 | * Kernel and MLang have different Unicode mapping table. Make sure | ||
584 | * which API is used. | ||
585 | */ | ||
586 | static compat_t cp932_compat[] = { | ||
587 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
588 | {0x203E, 0x007E, COMPAT_OUT}, | ||
589 | {0x2014, 0x2015, COMPAT_OUT}, | ||
590 | {0x301C, 0xFF5E, COMPAT_OUT}, | ||
591 | {0x2016, 0x2225, COMPAT_OUT}, | ||
592 | {0x2212, 0xFF0D, COMPAT_OUT}, | ||
593 | {0x00A2, 0xFFE0, COMPAT_OUT}, | ||
594 | {0x00A3, 0xFFE1, COMPAT_OUT}, | ||
595 | {0x00AC, 0xFFE2, COMPAT_OUT}, | ||
596 | {0, 0, 0} | ||
597 | }; | ||
598 | |||
599 | static compat_t cp20932_compat[] = { | ||
600 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
601 | {0x203E, 0x007E, COMPAT_OUT}, | ||
602 | {0x2014, 0x2015, COMPAT_OUT}, | ||
603 | {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, | ||
604 | {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, | ||
605 | {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, | ||
606 | {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, | ||
607 | {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, | ||
608 | {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, | ||
609 | {0, 0, 0} | ||
610 | }; | ||
611 | |||
612 | static compat_t *cp51932_compat = cp932_compat; | ||
613 | |||
614 | /* cp20932_compat for kernel. cp932_compat for mlang. */ | ||
615 | static compat_t *cp5022x_compat = cp932_compat; | ||
616 | |||
617 | typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( | ||
618 | LPDWORD lpdwMode, | ||
619 | DWORD dwSrcEncoding, | ||
620 | LPCSTR lpSrcStr, | ||
621 | LPINT lpnMultiCharCount, | ||
622 | LPWSTR lpDstStr, | ||
623 | LPINT lpnWideCharCount | ||
624 | ); | ||
625 | |||
626 | typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( | ||
627 | LPDWORD lpdwMode, | ||
628 | DWORD dwEncoding, | ||
629 | LPCWSTR lpSrcStr, | ||
630 | LPINT lpnWideCharCount, | ||
631 | LPSTR lpDstStr, | ||
632 | LPINT lpnMultiCharCount | ||
633 | ); | ||
634 | |||
635 | static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; | ||
636 | static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; | ||
637 | |||
638 | static int | ||
639 | load_mlang(void) | ||
640 | { | ||
641 | HMODULE h; | ||
642 | if (ConvertINetMultiByteToUnicode != NULL) | ||
643 | return TRUE; | ||
644 | h = LoadLibrary(TEXT("mlang.dll")); | ||
645 | if (!h) | ||
646 | return FALSE; | ||
647 | ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); | ||
648 | ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); | ||
649 | return TRUE; | ||
650 | } | ||
651 | |||
652 | static iconv_t | ||
653 | iconv_open(const char *tocode, const char *fromcode) | ||
654 | { | ||
655 | rec_iconv_t *cd; | ||
656 | |||
657 | cd = (rec_iconv_t *)xzalloc(sizeof(rec_iconv_t)); | ||
658 | |||
659 | /* reset the errno to prevent reporting wrong error code. | ||
660 | * 0 for unsorted error. */ | ||
661 | errno = 0; | ||
662 | if (make_csconv(fromcode, &cd->from) && make_csconv(tocode, &cd->to)) { | ||
663 | cd->cd = (iconv_t)cd; | ||
664 | return (iconv_t)cd; | ||
665 | } | ||
666 | |||
667 | free(cd); | ||
668 | return (iconv_t)(-1); | ||
669 | } | ||
670 | |||
671 | static int | ||
672 | iconv_close(iconv_t _cd) | ||
673 | { | ||
674 | free(_cd); | ||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | static size_t | ||
679 | iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) | ||
680 | { | ||
681 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
682 | ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
683 | int insize; | ||
684 | int outsize; | ||
685 | int wsize; | ||
686 | DWORD frommode; | ||
687 | DWORD tomode; | ||
688 | uint wc; | ||
689 | compat_t *cp; | ||
690 | int i; | ||
691 | |||
692 | if (inbuf == NULL || *inbuf == NULL) | ||
693 | { | ||
694 | if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) | ||
695 | { | ||
696 | tomode = cd->to.mode; | ||
697 | outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); | ||
698 | if (outsize == -1) | ||
699 | { | ||
700 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
701 | { | ||
702 | outsize = 0; | ||
703 | } | ||
704 | else | ||
705 | { | ||
706 | cd->to.mode = tomode; | ||
707 | return (size_t)(-1); | ||
708 | } | ||
709 | } | ||
710 | *outbuf += outsize; | ||
711 | *outbytesleft -= outsize; | ||
712 | } | ||
713 | cd->from.mode = 0; | ||
714 | cd->to.mode = 0; | ||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | while (*inbytesleft != 0) | ||
719 | { | ||
720 | frommode = cd->from.mode; | ||
721 | tomode = cd->to.mode; | ||
722 | wsize = MB_CHAR_MAX; | ||
723 | |||
724 | insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); | ||
725 | if (insize == -1) | ||
726 | { | ||
727 | if (cd->to.flags & FLAG_IGNORE) | ||
728 | { | ||
729 | cd->from.mode = frommode; | ||
730 | insize = 1; | ||
731 | wsize = 0; | ||
732 | } | ||
733 | else | ||
734 | { | ||
735 | cd->from.mode = frommode; | ||
736 | return (size_t)(-1); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | if (wsize == 0) | ||
741 | { | ||
742 | *inbuf += insize; | ||
743 | *inbytesleft -= insize; | ||
744 | continue; | ||
745 | } | ||
746 | |||
747 | if (cd->from.compat != NULL) | ||
748 | { | ||
749 | wc = utf16_to_ucs4(wbuf); | ||
750 | cp = cd->from.compat; | ||
751 | for (i = 0; cp[i].in != 0; ++i) | ||
752 | { | ||
753 | if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) | ||
754 | { | ||
755 | ucs4_to_utf16(cp[i].in, wbuf, &wsize); | ||
756 | break; | ||
757 | } | ||
758 | } | ||
759 | } | ||
760 | |||
761 | if (cd->to.compat != NULL) | ||
762 | { | ||
763 | wc = utf16_to_ucs4(wbuf); | ||
764 | cp = cd->to.compat; | ||
765 | for (i = 0; cp[i].in != 0; ++i) | ||
766 | { | ||
767 | if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) | ||
768 | { | ||
769 | ucs4_to_utf16(cp[i].out, wbuf, &wsize); | ||
770 | break; | ||
771 | } | ||
772 | } | ||
773 | } | ||
774 | |||
775 | outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); | ||
776 | if (outsize == -1) | ||
777 | { | ||
778 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
779 | { | ||
780 | cd->to.mode = tomode; | ||
781 | outsize = 0; | ||
782 | } | ||
783 | else | ||
784 | { | ||
785 | cd->from.mode = frommode; | ||
786 | cd->to.mode = tomode; | ||
787 | return (size_t)(-1); | ||
788 | } | ||
789 | } | ||
790 | |||
791 | *inbuf += insize; | ||
792 | *outbuf += outsize; | ||
793 | *inbytesleft -= insize; | ||
794 | *outbytesleft -= outsize; | ||
795 | } | ||
796 | |||
797 | return 0; | ||
798 | } | ||
799 | |||
800 | static int | ||
801 | make_csconv(const char *_name, csconv_t *cv) | ||
802 | { | ||
803 | CPINFO cpinfo; | ||
804 | int use_compat = TRUE; | ||
805 | int flag = 0; | ||
806 | char *name; | ||
807 | char *p, *s; | ||
808 | |||
809 | name = xstrdup(_name); | ||
810 | |||
811 | /* check for option "enc_name//opt1//opt2" */ | ||
812 | while ((p = strrstr(name, "//")) != NULL) | ||
813 | { | ||
814 | for (s = p + 2; *s; ++s) | ||
815 | *s = tolower(*s); | ||
816 | switch (index_in_strings("nocompat\0translit\0ignore\0", p + 2)) { | ||
817 | case 0: | ||
818 | use_compat = FALSE; | ||
819 | break; | ||
820 | case 1: | ||
821 | flag |= FLAG_TRANSLIT; | ||
822 | break; | ||
823 | case 2: | ||
824 | flag |= FLAG_IGNORE; | ||
825 | break; | ||
826 | } | ||
827 | *p = 0; | ||
828 | } | ||
829 | |||
830 | cv->mode = 0; | ||
831 | cv->flags = flag; | ||
832 | cv->mblen = NULL; | ||
833 | cv->flush = NULL; | ||
834 | cv->compat = NULL; | ||
835 | cv->codepage = name_to_codepage(name); | ||
836 | if (cv->codepage == 1200 || cv->codepage == 1201) | ||
837 | { | ||
838 | cv->mbtowc = utf16_mbtowc; | ||
839 | cv->wctomb = utf16_wctomb; | ||
840 | if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || | ||
841 | _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 || | ||
842 | _stricmp(name,"UCS-2-INTERNAL") == 0) | ||
843 | cv->flags |= FLAG_USE_BOM; | ||
844 | } | ||
845 | else if (cv->codepage == 12000 || cv->codepage == 12001) | ||
846 | { | ||
847 | cv->mbtowc = utf32_mbtowc; | ||
848 | cv->wctomb = utf32_wctomb; | ||
849 | if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || | ||
850 | _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) | ||
851 | cv->flags |= FLAG_USE_BOM; | ||
852 | } | ||
853 | else if (cv->codepage == 65001) | ||
854 | { | ||
855 | cv->mbtowc = kernel_mbtowc; | ||
856 | cv->wctomb = kernel_wctomb; | ||
857 | cv->mblen = utf8_mblen; | ||
858 | } | ||
859 | else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) | ||
860 | { | ||
861 | cv->mbtowc = iso2022jp_mbtowc; | ||
862 | cv->wctomb = iso2022jp_wctomb; | ||
863 | cv->flush = iso2022jp_flush; | ||
864 | } | ||
865 | else if (cv->codepage == 51932 && load_mlang()) | ||
866 | { | ||
867 | cv->mbtowc = mlang_mbtowc; | ||
868 | cv->wctomb = mlang_wctomb; | ||
869 | cv->mblen = eucjp_mblen; | ||
870 | } | ||
871 | else if (IsValidCodePage(cv->codepage) | ||
872 | && GetCPInfo(cv->codepage, &cpinfo) != 0) | ||
873 | { | ||
874 | cv->mbtowc = kernel_mbtowc; | ||
875 | cv->wctomb = kernel_wctomb; | ||
876 | if (cpinfo.MaxCharSize == 1) | ||
877 | cv->mblen = sbcs_mblen; | ||
878 | else if (cpinfo.MaxCharSize == 2) | ||
879 | cv->mblen = dbcs_mblen; | ||
880 | else | ||
881 | cv->mblen = mbcs_mblen; | ||
882 | } | ||
883 | else | ||
884 | { | ||
885 | /* not supported */ | ||
886 | free(name); | ||
887 | errno = EINVAL; | ||
888 | return FALSE; | ||
889 | } | ||
890 | |||
891 | if (use_compat) | ||
892 | { | ||
893 | switch (cv->codepage) | ||
894 | { | ||
895 | case 932: cv->compat = cp932_compat; break; | ||
896 | case 20932: cv->compat = cp20932_compat; break; | ||
897 | case 51932: cv->compat = cp51932_compat; break; | ||
898 | case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; | ||
899 | } | ||
900 | } | ||
901 | |||
902 | free(name); | ||
903 | |||
904 | return TRUE; | ||
905 | } | ||
906 | |||
907 | static int | ||
908 | name_to_codepage(const char *name) | ||
909 | { | ||
910 | int i; | ||
911 | const char *alias; | ||
912 | |||
913 | if (*name == '\0' || strcmp(name, "char") == 0) | ||
914 | return GetACP(); | ||
915 | else if (strcmp(name, "wchar_t") == 0) | ||
916 | return 1200; | ||
917 | else if (_strnicmp(name, "cp", 2) == 0) | ||
918 | return atoi(name + 2); /* CP123 */ | ||
919 | else if ('0' <= name[0] && name[0] <= '9') | ||
920 | return atoi(name); /* 123 */ | ||
921 | else if (_strnicmp(name, "xx", 2) == 0) | ||
922 | return atoi(name + 2); /* XX123 for debug */ | ||
923 | |||
924 | i = 0; | ||
925 | alias = cp_alias; | ||
926 | while (*alias) { | ||
927 | if (_stricmp(alias, name) == 0) { | ||
928 | return cp_codepage[i]; | ||
929 | } | ||
930 | alias += strlen(alias) + 1; | ||
931 | ++i; | ||
932 | } | ||
933 | return -1; | ||
934 | } | ||
935 | |||
936 | /* | ||
937 | * http://www.faqs.org/rfcs/rfc2781.html | ||
938 | */ | ||
939 | static uint | ||
940 | utf16_to_ucs4(const ushort *wbuf) | ||
941 | { | ||
942 | uint wc = wbuf[0]; | ||
943 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
944 | wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; | ||
945 | return wc; | ||
946 | } | ||
947 | |||
948 | static void | ||
949 | ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) | ||
950 | { | ||
951 | if (wc < 0x10000) | ||
952 | { | ||
953 | wbuf[0] = wc; | ||
954 | *wbufsize = 1; | ||
955 | } | ||
956 | else | ||
957 | { | ||
958 | wc -= 0x10000; | ||
959 | wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); | ||
960 | wbuf[1] = 0xDC00 | (wc & 0x3FF); | ||
961 | *wbufsize = 2; | ||
962 | } | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Check if codepage is one of those for which the dwFlags parameter | ||
967 | * to MultiByteToWideChar() must be zero. Return zero or | ||
968 | * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows | ||
969 | * Server 2003 R2 claims that also codepage 65001 is one of these, but | ||
970 | * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave | ||
971 | * out 65001 (UTF-8), and that indeed seems to be the case on XP, it | ||
972 | * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting | ||
973 | * from UTF-8. | ||
974 | */ | ||
975 | static int | ||
976 | mbtowc_flags(int codepage) | ||
977 | { | ||
978 | return (codepage == 50220 || codepage == 50221 || | ||
979 | codepage == 50222 || codepage == 50225 || | ||
980 | codepage == 50227 || codepage == 50229 || | ||
981 | codepage == 52936 || codepage == 54936 || | ||
982 | (codepage >= 57002 && codepage <= 57011) || | ||
983 | codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; | ||
984 | } | ||
985 | |||
986 | /* | ||
987 | * Check if codepage is one those for which the lpUsedDefaultChar | ||
988 | * parameter to WideCharToMultiByte() must be NULL. The docs in | ||
989 | * Platform SDK for Windows Server 2003 R2 claims that this is the | ||
990 | * list below, while the MSDN docs for MSVS2008 claim that it is only | ||
991 | * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform | ||
992 | * SDK seems to be correct, at least for XP. | ||
993 | */ | ||
994 | static int | ||
995 | must_use_null_useddefaultchar(int codepage) | ||
996 | { | ||
997 | return (codepage == 65000 || codepage == 65001 || | ||
998 | codepage == 50220 || codepage == 50221 || | ||
999 | codepage == 50222 || codepage == 50225 || | ||
1000 | codepage == 50227 || codepage == 50229 || | ||
1001 | codepage == 52936 || codepage == 54936 || | ||
1002 | (codepage >= 57002 && codepage <= 57011) || | ||
1003 | codepage == 42); | ||
1004 | } | ||
1005 | |||
1006 | static int | ||
1007 | seterror(int err) | ||
1008 | { | ||
1009 | errno = err; | ||
1010 | return -1; | ||
1011 | } | ||
1012 | |||
1013 | static int | ||
1014 | sbcs_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf UNUSED_PARAM, | ||
1015 | int bufsize UNUSED_PARAM) | ||
1016 | { | ||
1017 | return 1; | ||
1018 | } | ||
1019 | |||
1020 | static int | ||
1021 | dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1022 | { | ||
1023 | int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; | ||
1024 | if (bufsize < len) | ||
1025 | return seterror(EINVAL); | ||
1026 | return len; | ||
1027 | } | ||
1028 | |||
1029 | static int | ||
1030 | mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1031 | { | ||
1032 | int len = 0; | ||
1033 | |||
1034 | if (cv->codepage == 54936) { | ||
1035 | if (buf[0] <= 0x7F) | ||
1036 | len = 1; | ||
1037 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1038 | bufsize >= 2 && | ||
1039 | ((buf[1] >= 0x40 && buf[1] <= 0x7E) || | ||
1040 | (buf[1] >= 0x80 && buf[1] <= 0xFE))) | ||
1041 | len = 2; | ||
1042 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1043 | bufsize >= 4 && | ||
1044 | buf[1] >= 0x30 && buf[1] <= 0x39) | ||
1045 | len = 4; | ||
1046 | else | ||
1047 | return seterror(EINVAL); | ||
1048 | return len; | ||
1049 | } | ||
1050 | else | ||
1051 | return seterror(EINVAL); | ||
1052 | } | ||
1053 | |||
1054 | static int | ||
1055 | utf8_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1056 | { | ||
1057 | int len = 0; | ||
1058 | |||
1059 | if (buf[0] < 0x80) len = 1; | ||
1060 | else if ((buf[0] & 0xE0) == 0xC0) len = 2; | ||
1061 | else if ((buf[0] & 0xF0) == 0xE0) len = 3; | ||
1062 | else if ((buf[0] & 0xF8) == 0xF0) len = 4; | ||
1063 | else if ((buf[0] & 0xFC) == 0xF8) len = 5; | ||
1064 | else if ((buf[0] & 0xFE) == 0xFC) len = 6; | ||
1065 | |||
1066 | if (len == 0) | ||
1067 | return seterror(EILSEQ); | ||
1068 | else if (bufsize < len) | ||
1069 | return seterror(EINVAL); | ||
1070 | return len; | ||
1071 | } | ||
1072 | |||
1073 | static int | ||
1074 | eucjp_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1075 | { | ||
1076 | if (buf[0] < 0x80) /* ASCII */ | ||
1077 | return 1; | ||
1078 | else if (buf[0] == 0x8E) /* JIS X 0201 */ | ||
1079 | { | ||
1080 | if (bufsize < 2) | ||
1081 | return seterror(EINVAL); | ||
1082 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) | ||
1083 | return seterror(EILSEQ); | ||
1084 | return 2; | ||
1085 | } | ||
1086 | else if (buf[0] == 0x8F) /* JIS X 0212 */ | ||
1087 | { | ||
1088 | if (bufsize < 3) | ||
1089 | return seterror(EINVAL); | ||
1090 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) | ||
1091 | || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) | ||
1092 | return seterror(EILSEQ); | ||
1093 | return 3; | ||
1094 | } | ||
1095 | else /* JIS X 0208 */ | ||
1096 | { | ||
1097 | if (bufsize < 2) | ||
1098 | return seterror(EINVAL); | ||
1099 | else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) | ||
1100 | || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) | ||
1101 | return seterror(EILSEQ); | ||
1102 | return 2; | ||
1103 | } | ||
1104 | } | ||
1105 | |||
1106 | static int | ||
1107 | kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1108 | { | ||
1109 | int len; | ||
1110 | |||
1111 | len = cv->mblen(cv, buf, bufsize); | ||
1112 | if (len == -1) | ||
1113 | return -1; | ||
1114 | /* If converting from ASCII, reject 8bit | ||
1115 | * chars. MultiByteToWideChar() doesn't. Note that for ASCII we | ||
1116 | * know that the mblen function is sbcs_mblen() so len is 1. | ||
1117 | */ | ||
1118 | if (cv->codepage == 20127 && buf[0] >= 0x80) | ||
1119 | return seterror(EILSEQ); | ||
1120 | *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), | ||
1121 | (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); | ||
1122 | if (*wbufsize == 0) | ||
1123 | return seterror(EILSEQ); | ||
1124 | return len; | ||
1125 | } | ||
1126 | |||
1127 | static int | ||
1128 | kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1129 | { | ||
1130 | BOOL usedDefaultChar = 0; | ||
1131 | BOOL *p = NULL; | ||
1132 | int flags = 0; | ||
1133 | int len; | ||
1134 | |||
1135 | if (bufsize == 0) | ||
1136 | return seterror(E2BIG); | ||
1137 | if (!must_use_null_useddefaultchar(cv->codepage)) | ||
1138 | { | ||
1139 | p = &usedDefaultChar; | ||
1140 | #ifdef WC_NO_BEST_FIT_CHARS | ||
1141 | if (!(cv->flags & FLAG_TRANSLIT)) | ||
1142 | flags |= WC_NO_BEST_FIT_CHARS; | ||
1143 | #endif | ||
1144 | } | ||
1145 | len = WideCharToMultiByte(cv->codepage, flags, | ||
1146 | (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); | ||
1147 | if (len == 0) | ||
1148 | { | ||
1149 | if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) | ||
1150 | return seterror(E2BIG); | ||
1151 | return seterror(EILSEQ); | ||
1152 | } | ||
1153 | else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) | ||
1154 | return seterror(EILSEQ); | ||
1155 | else if (cv->mblen(cv, buf, len) != len) /* validate result */ | ||
1156 | return seterror(EILSEQ); | ||
1157 | return len; | ||
1158 | } | ||
1159 | |||
1160 | /* | ||
1161 | * It seems that the mode (cv->mode) is fixnum. | ||
1162 | * For example, when converting iso-2022-jp(cp50221) to unicode: | ||
1163 | * in ascii sequence: mode=0xC42C0000 | ||
1164 | * in jisx0208 sequence: mode=0xC42C0001 | ||
1165 | * "C42C" is same for each convert session. | ||
1166 | * It should be: ((codepage-1)<<16)|state | ||
1167 | */ | ||
1168 | static int | ||
1169 | mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1170 | { | ||
1171 | int len; | ||
1172 | int insize; | ||
1173 | HRESULT hr; | ||
1174 | |||
1175 | len = cv->mblen(cv, buf, bufsize); | ||
1176 | if (len == -1) | ||
1177 | return -1; | ||
1178 | insize = len; | ||
1179 | hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, | ||
1180 | (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); | ||
1181 | if (hr != S_OK || insize != len) | ||
1182 | return seterror(EILSEQ); | ||
1183 | return len; | ||
1184 | } | ||
1185 | |||
1186 | static int | ||
1187 | mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1188 | { | ||
1189 | char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
1190 | int tmpsize = MB_CHAR_MAX; | ||
1191 | int insize = wbufsize; | ||
1192 | HRESULT hr; | ||
1193 | |||
1194 | hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, | ||
1195 | (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); | ||
1196 | if (hr != S_OK || insize != wbufsize) | ||
1197 | return seterror(EILSEQ); | ||
1198 | else if (bufsize < tmpsize) | ||
1199 | return seterror(E2BIG); | ||
1200 | else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) | ||
1201 | return seterror(EILSEQ); | ||
1202 | memcpy(buf, tmpbuf, tmpsize); | ||
1203 | return tmpsize; | ||
1204 | } | ||
1205 | |||
1206 | static int | ||
1207 | utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1208 | { | ||
1209 | int codepage = cv->codepage; | ||
1210 | |||
1211 | /* swap endian: 1200 <-> 1201 */ | ||
1212 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1213 | codepage ^= 1; | ||
1214 | |||
1215 | if (bufsize < 2) | ||
1216 | return seterror(EINVAL); | ||
1217 | if (codepage == 1200) /* little endian */ | ||
1218 | wbuf[0] = (buf[1] << 8) | buf[0]; | ||
1219 | else if (codepage == 1201) /* big endian */ | ||
1220 | wbuf[0] = (buf[0] << 8) | buf[1]; | ||
1221 | |||
1222 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1223 | { | ||
1224 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1225 | if (wbuf[0] == 0xFFFE) | ||
1226 | { | ||
1227 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1228 | *wbufsize = 0; | ||
1229 | return 2; | ||
1230 | } | ||
1231 | else if (wbuf[0] == 0xFEFF) | ||
1232 | { | ||
1233 | *wbufsize = 0; | ||
1234 | return 2; | ||
1235 | } | ||
1236 | } | ||
1237 | |||
1238 | if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) | ||
1239 | return seterror(EILSEQ); | ||
1240 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1241 | { | ||
1242 | if (bufsize < 4) | ||
1243 | return seterror(EINVAL); | ||
1244 | if (codepage == 1200) /* little endian */ | ||
1245 | wbuf[1] = (buf[3] << 8) | buf[2]; | ||
1246 | else if (codepage == 1201) /* big endian */ | ||
1247 | wbuf[1] = (buf[2] << 8) | buf[3]; | ||
1248 | if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) | ||
1249 | return seterror(EILSEQ); | ||
1250 | *wbufsize = 2; | ||
1251 | return 4; | ||
1252 | } | ||
1253 | *wbufsize = 1; | ||
1254 | return 2; | ||
1255 | } | ||
1256 | |||
1257 | static int | ||
1258 | utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1259 | { | ||
1260 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1261 | { | ||
1262 | int r; | ||
1263 | |||
1264 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1265 | if (bufsize < 2) | ||
1266 | return seterror(E2BIG); | ||
1267 | if (cv->codepage == 1200) /* little endian */ | ||
1268 | memcpy(buf, "\xFF\xFE", 2); | ||
1269 | else if (cv->codepage == 1201) /* big endian */ | ||
1270 | memcpy(buf, "\xFE\xFF", 2); | ||
1271 | |||
1272 | r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); | ||
1273 | if (r == -1) | ||
1274 | return -1; | ||
1275 | return r + 2; | ||
1276 | } | ||
1277 | |||
1278 | if (bufsize < 2) | ||
1279 | return seterror(E2BIG); | ||
1280 | if (cv->codepage == 1200) /* little endian */ | ||
1281 | { | ||
1282 | buf[0] = (wbuf[0] & 0x00FF); | ||
1283 | buf[1] = (wbuf[0] & 0xFF00) >> 8; | ||
1284 | } | ||
1285 | else if (cv->codepage == 1201) /* big endian */ | ||
1286 | { | ||
1287 | buf[0] = (wbuf[0] & 0xFF00) >> 8; | ||
1288 | buf[1] = (wbuf[0] & 0x00FF); | ||
1289 | } | ||
1290 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1291 | { | ||
1292 | if (bufsize < 4) | ||
1293 | return seterror(E2BIG); | ||
1294 | if (cv->codepage == 1200) /* little endian */ | ||
1295 | { | ||
1296 | buf[2] = (wbuf[1] & 0x00FF); | ||
1297 | buf[3] = (wbuf[1] & 0xFF00) >> 8; | ||
1298 | } | ||
1299 | else if (cv->codepage == 1201) /* big endian */ | ||
1300 | { | ||
1301 | buf[2] = (wbuf[1] & 0xFF00) >> 8; | ||
1302 | buf[3] = (wbuf[1] & 0x00FF); | ||
1303 | } | ||
1304 | return 4; | ||
1305 | } | ||
1306 | return 2; | ||
1307 | } | ||
1308 | |||
1309 | static int | ||
1310 | utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1311 | { | ||
1312 | int codepage = cv->codepage; | ||
1313 | uint wc = 0xD800; | ||
1314 | |||
1315 | /* swap endian: 12000 <-> 12001 */ | ||
1316 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1317 | codepage ^= 1; | ||
1318 | |||
1319 | if (bufsize < 4) | ||
1320 | return seterror(EINVAL); | ||
1321 | if (codepage == 12000) /* little endian */ | ||
1322 | wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; | ||
1323 | else if (codepage == 12001) /* big endian */ | ||
1324 | wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; | ||
1325 | |||
1326 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1327 | { | ||
1328 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1329 | if (wc == 0xFFFE0000) | ||
1330 | { | ||
1331 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1332 | *wbufsize = 0; | ||
1333 | return 4; | ||
1334 | } | ||
1335 | else if (wc == 0x0000FEFF) | ||
1336 | { | ||
1337 | *wbufsize = 0; | ||
1338 | return 4; | ||
1339 | } | ||
1340 | } | ||
1341 | |||
1342 | if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) | ||
1343 | return seterror(EILSEQ); | ||
1344 | ucs4_to_utf16(wc, wbuf, wbufsize); | ||
1345 | return 4; | ||
1346 | } | ||
1347 | |||
1348 | static int | ||
1349 | utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1350 | { | ||
1351 | uint wc; | ||
1352 | |||
1353 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1354 | { | ||
1355 | int r; | ||
1356 | |||
1357 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1358 | if (bufsize < 4) | ||
1359 | return seterror(E2BIG); | ||
1360 | if (cv->codepage == 12000) /* little endian */ | ||
1361 | memcpy(buf, "\xFF\xFE\x00\x00", 4); | ||
1362 | else if (cv->codepage == 12001) /* big endian */ | ||
1363 | memcpy(buf, "\x00\x00\xFE\xFF", 4); | ||
1364 | |||
1365 | r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); | ||
1366 | if (r == -1) | ||
1367 | return -1; | ||
1368 | return r + 4; | ||
1369 | } | ||
1370 | |||
1371 | if (bufsize < 4) | ||
1372 | return seterror(E2BIG); | ||
1373 | wc = utf16_to_ucs4(wbuf); | ||
1374 | if (cv->codepage == 12000) /* little endian */ | ||
1375 | { | ||
1376 | buf[0] = wc & 0x000000FF; | ||
1377 | buf[1] = (wc & 0x0000FF00) >> 8; | ||
1378 | buf[2] = (wc & 0x00FF0000) >> 16; | ||
1379 | buf[3] = (wc & 0xFF000000) >> 24; | ||
1380 | } | ||
1381 | else if (cv->codepage == 12001) /* big endian */ | ||
1382 | { | ||
1383 | buf[0] = (wc & 0xFF000000) >> 24; | ||
1384 | buf[1] = (wc & 0x00FF0000) >> 16; | ||
1385 | buf[2] = (wc & 0x0000FF00) >> 8; | ||
1386 | buf[3] = wc & 0x000000FF; | ||
1387 | } | ||
1388 | return 4; | ||
1389 | } | ||
1390 | |||
1391 | /* | ||
1392 | * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) | ||
1393 | * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow | ||
1394 | * 1 byte Kana) | ||
1395 | * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte | ||
1396 | * Kana - SO/SI) | ||
1397 | * | ||
1398 | * MultiByteToWideChar() and WideCharToMultiByte() behave differently | ||
1399 | * depending on Windows version. On XP, WideCharToMultiByte() doesn't | ||
1400 | * terminate result sequence with ascii escape. But Vista does. | ||
1401 | * Use MLang instead. | ||
1402 | */ | ||
1403 | |||
1404 | #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) | ||
1405 | #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) | ||
1406 | #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) | ||
1407 | |||
1408 | #define ISO2022_SI 0 | ||
1409 | #define ISO2022_SO 1 | ||
1410 | |||
1411 | /* shift in */ | ||
1412 | static const char iso2022_SI_seq[] = "\x0F"; | ||
1413 | /* shift out */ | ||
1414 | static const char iso2022_SO_seq[] = "\x0E"; | ||
1415 | |||
1416 | typedef struct iso2022_esc_t iso2022_esc_t; | ||
1417 | struct iso2022_esc_t { | ||
1418 | const char *esc; | ||
1419 | int esc_len; | ||
1420 | int len; | ||
1421 | int cs; | ||
1422 | }; | ||
1423 | |||
1424 | #define ISO2022JP_CS_ASCII 0 | ||
1425 | #define ISO2022JP_CS_JISX0201_ROMAN 1 | ||
1426 | #define ISO2022JP_CS_JISX0201_KANA 2 | ||
1427 | #define ISO2022JP_CS_JISX0208_1978 3 | ||
1428 | #define ISO2022JP_CS_JISX0208_1983 4 | ||
1429 | #define ISO2022JP_CS_JISX0212 5 | ||
1430 | |||
1431 | static iso2022_esc_t iso2022jp_esc[] = { | ||
1432 | {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, | ||
1433 | {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, | ||
1434 | {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, | ||
1435 | {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ | ||
1436 | {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, | ||
1437 | {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, | ||
1438 | {NULL, 0, 0, 0} | ||
1439 | }; | ||
1440 | |||
1441 | static int | ||
1442 | iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1443 | { | ||
1444 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1445 | char tmp[MB_CHAR_MAX]; | ||
1446 | int insize; | ||
1447 | HRESULT hr; | ||
1448 | DWORD dummy = 0; | ||
1449 | int len; | ||
1450 | int esc_len; | ||
1451 | int cs; | ||
1452 | int shift; | ||
1453 | int i; | ||
1454 | |||
1455 | if (buf[0] == 0x1B) | ||
1456 | { | ||
1457 | for (i = 0; iesc[i].esc != NULL; ++i) | ||
1458 | { | ||
1459 | esc_len = iesc[i].esc_len; | ||
1460 | if (bufsize < esc_len) | ||
1461 | { | ||
1462 | if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) | ||
1463 | return seterror(EINVAL); | ||
1464 | } | ||
1465 | else | ||
1466 | { | ||
1467 | if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) | ||
1468 | { | ||
1469 | cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); | ||
1470 | *wbufsize = 0; | ||
1471 | return esc_len; | ||
1472 | } | ||
1473 | } | ||
1474 | } | ||
1475 | /* not supported escape sequence */ | ||
1476 | return seterror(EILSEQ); | ||
1477 | } | ||
1478 | else if (buf[0] == iso2022_SO_seq[0]) | ||
1479 | { | ||
1480 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); | ||
1481 | *wbufsize = 0; | ||
1482 | return 1; | ||
1483 | } | ||
1484 | else if (buf[0] == iso2022_SI_seq[0]) | ||
1485 | { | ||
1486 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); | ||
1487 | *wbufsize = 0; | ||
1488 | return 1; | ||
1489 | } | ||
1490 | |||
1491 | cs = ISO2022_MODE_CS(cv->mode); | ||
1492 | shift = ISO2022_MODE_SHIFT(cv->mode); | ||
1493 | |||
1494 | /* reset the mode for informal sequence */ | ||
1495 | if (buf[0] < 0x20) | ||
1496 | { | ||
1497 | cs = ISO2022JP_CS_ASCII; | ||
1498 | shift = ISO2022_SI; | ||
1499 | } | ||
1500 | |||
1501 | len = iesc[cs].len; | ||
1502 | if (bufsize < len) | ||
1503 | return seterror(EINVAL); | ||
1504 | for (i = 0; i < len; ++i) | ||
1505 | if (!(buf[i] < 0x80)) | ||
1506 | return seterror(EILSEQ); | ||
1507 | esc_len = iesc[cs].esc_len; | ||
1508 | memcpy(tmp, iesc[cs].esc, esc_len); | ||
1509 | if (shift == ISO2022_SO) | ||
1510 | { | ||
1511 | memcpy(tmp + esc_len, iso2022_SO_seq, 1); | ||
1512 | esc_len += 1; | ||
1513 | } | ||
1514 | memcpy(tmp + esc_len, buf, len); | ||
1515 | |||
1516 | if ((cv->codepage == 50220 || cv->codepage == 50221 | ||
1517 | || cv->codepage == 50222) && shift == ISO2022_SO) | ||
1518 | { | ||
1519 | /* XXX: shift-out cannot be used for mbtowc (both kernel and | ||
1520 | * mlang) */ | ||
1521 | esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; | ||
1522 | memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); | ||
1523 | memcpy(tmp + esc_len, buf, len); | ||
1524 | } | ||
1525 | |||
1526 | insize = len + esc_len; | ||
1527 | hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, | ||
1528 | (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); | ||
1529 | if (hr != S_OK || insize != len + esc_len) | ||
1530 | return seterror(EILSEQ); | ||
1531 | |||
1532 | /* Check for conversion error. Assuming defaultChar is 0x3F. */ | ||
1533 | /* ascii should be converted from ascii */ | ||
1534 | if (wbuf[0] == buf[0] | ||
1535 | && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1536 | return seterror(EILSEQ); | ||
1537 | |||
1538 | /* reset the mode for informal sequence */ | ||
1539 | if (cv->mode != ISO2022_MODE(cs, shift)) | ||
1540 | cv->mode = ISO2022_MODE(cs, shift); | ||
1541 | |||
1542 | return len; | ||
1543 | } | ||
1544 | |||
1545 | static int | ||
1546 | iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1547 | { | ||
1548 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1549 | char tmp[MB_CHAR_MAX]; | ||
1550 | int tmpsize = MB_CHAR_MAX; | ||
1551 | int insize = wbufsize; | ||
1552 | HRESULT hr; | ||
1553 | DWORD dummy = 0; | ||
1554 | int len; | ||
1555 | int esc_len; | ||
1556 | int cs; | ||
1557 | int shift; | ||
1558 | int i; | ||
1559 | |||
1560 | /* | ||
1561 | * MultiByte = [escape sequence] + character + [escape sequence] | ||
1562 | * | ||
1563 | * Whether trailing escape sequence is added depends on which API is | ||
1564 | * used (kernel or MLang, and its version). | ||
1565 | */ | ||
1566 | hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, | ||
1567 | (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); | ||
1568 | if (hr != S_OK || insize != wbufsize) | ||
1569 | return seterror(EILSEQ); | ||
1570 | else if (bufsize < tmpsize) | ||
1571 | return seterror(E2BIG); | ||
1572 | |||
1573 | if (tmpsize == 1) | ||
1574 | { | ||
1575 | cs = ISO2022JP_CS_ASCII; | ||
1576 | esc_len = 0; | ||
1577 | } | ||
1578 | else | ||
1579 | { | ||
1580 | for (i = 1; iesc[i].esc != NULL; ++i) | ||
1581 | { | ||
1582 | esc_len = iesc[i].esc_len; | ||
1583 | if (strncmp(tmp, iesc[i].esc, esc_len) == 0) | ||
1584 | { | ||
1585 | cs = iesc[i].cs; | ||
1586 | break; | ||
1587 | } | ||
1588 | } | ||
1589 | if (iesc[i].esc == NULL) | ||
1590 | /* not supported escape sequence */ | ||
1591 | return seterror(EILSEQ); | ||
1592 | } | ||
1593 | |||
1594 | shift = ISO2022_SI; | ||
1595 | if (tmp[esc_len] == iso2022_SO_seq[0]) | ||
1596 | { | ||
1597 | shift = ISO2022_SO; | ||
1598 | esc_len += 1; | ||
1599 | } | ||
1600 | |||
1601 | len = iesc[cs].len; | ||
1602 | |||
1603 | /* Check for converting error. Assuming defaultChar is 0x3F. */ | ||
1604 | /* ascii should be converted from ascii */ | ||
1605 | if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) | ||
1606 | return seterror(EILSEQ); | ||
1607 | else if (tmpsize < esc_len + len) | ||
1608 | return seterror(EILSEQ); | ||
1609 | |||
1610 | if (cv->mode == ISO2022_MODE(cs, shift)) | ||
1611 | { | ||
1612 | /* remove escape sequence */ | ||
1613 | if (esc_len != 0) | ||
1614 | memmove(tmp, tmp + esc_len, len); | ||
1615 | esc_len = 0; | ||
1616 | } | ||
1617 | else | ||
1618 | { | ||
1619 | if (cs == ISO2022JP_CS_ASCII) | ||
1620 | { | ||
1621 | esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1622 | memmove(tmp + esc_len, tmp, len); | ||
1623 | memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); | ||
1624 | } | ||
1625 | if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) | ||
1626 | { | ||
1627 | /* shift-in before changing to other mode */ | ||
1628 | memmove(tmp + 1, tmp, len + esc_len); | ||
1629 | memcpy(tmp, iso2022_SI_seq, 1); | ||
1630 | esc_len += 1; | ||
1631 | } | ||
1632 | } | ||
1633 | |||
1634 | if (bufsize < len + esc_len) | ||
1635 | return seterror(E2BIG); | ||
1636 | memcpy(buf, tmp, len + esc_len); | ||
1637 | cv->mode = ISO2022_MODE(cs, shift); | ||
1638 | return len + esc_len; | ||
1639 | } | ||
1640 | |||
1641 | static int | ||
1642 | iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) | ||
1643 | { | ||
1644 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1645 | int esc_len; | ||
1646 | |||
1647 | if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1648 | { | ||
1649 | esc_len = 0; | ||
1650 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1651 | esc_len += 1; | ||
1652 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1653 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1654 | if (bufsize < esc_len) | ||
1655 | return seterror(E2BIG); | ||
1656 | |||
1657 | esc_len = 0; | ||
1658 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1659 | { | ||
1660 | memcpy(buf, iso2022_SI_seq, 1); | ||
1661 | esc_len += 1; | ||
1662 | } | ||
1663 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1664 | { | ||
1665 | memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, | ||
1666 | iesc[ISO2022JP_CS_ASCII].esc_len); | ||
1667 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1668 | } | ||
1669 | return esc_len; | ||
1670 | } | ||
1671 | return 0; | ||
1672 | } | ||
1673 | |||
1674 | static void process_file(iconv_t cd, FILE *in, FILE *out) | ||
1675 | { | ||
1676 | char inbuf[BUFSIZ]; | ||
1677 | char outbuf[BUFSIZ]; | ||
1678 | const char *pin; | ||
1679 | char *pout; | ||
1680 | size_t inbytesleft; | ||
1681 | size_t outbytesleft; | ||
1682 | size_t rest = 0; | ||
1683 | size_t r; | ||
1684 | |||
1685 | while ((inbytesleft=fread(inbuf+rest, 1, sizeof(inbuf)-rest, in)) != 0 | ||
1686 | || rest != 0) { | ||
1687 | inbytesleft += rest; | ||
1688 | pin = inbuf; | ||
1689 | pout = outbuf; | ||
1690 | outbytesleft = sizeof(outbuf); | ||
1691 | r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); | ||
1692 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1693 | if (r == (size_t)(-1) && errno != E2BIG && | ||
1694 | (errno != EINVAL || feof(in))) | ||
1695 | bb_perror_msg_and_die("conversion error"); | ||
1696 | memmove(inbuf, pin, inbytesleft); | ||
1697 | rest = inbytesleft; | ||
1698 | if (rest == 0 && feof(in)) | ||
1699 | break; | ||
1700 | } | ||
1701 | pout = outbuf; | ||
1702 | outbytesleft = sizeof(outbuf); | ||
1703 | r = iconv(cd, NULL, NULL, &pout, &outbytesleft); | ||
1704 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1705 | if (r == (size_t)(-1)) | ||
1706 | bb_perror_msg_and_die("conversion error"); | ||
1707 | } | ||
1708 | |||
1709 | enum { | ||
1710 | OPT_f = (1 << 0), | ||
1711 | OPT_t = (1 << 1), | ||
1712 | OPT_l = (1 << 2), | ||
1713 | OPT_c = (1 << 3), | ||
1714 | OPT_o = (1 << 4), | ||
1715 | }; | ||
1716 | |||
1717 | int iconv_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | ||
1718 | int iconv_main(int argc, char **argv) | ||
1719 | { | ||
1720 | const char *fromcode = "", *tocode = "", *outfile; | ||
1721 | char *tmpname = NULL; | ||
1722 | int i, opt; | ||
1723 | iconv_t cd; | ||
1724 | FILE *in; | ||
1725 | FILE *out = stdout; | ||
1726 | |||
1727 | opt = getopt32(argv, "f:t:lco:", &fromcode, &tocode, &outfile); | ||
1728 | |||
1729 | if (opt & OPT_l) { | ||
1730 | const char *alias = cp_alias; | ||
1731 | while (*alias) { | ||
1732 | printf("%s\n", alias); | ||
1733 | alias += strlen(alias) + 1; | ||
1734 | } | ||
1735 | return 0; | ||
1736 | } | ||
1737 | |||
1738 | if (opt & OPT_o) { | ||
1739 | tmpname = xasprintf("%sXXXXXX", outfile); | ||
1740 | mktemp(tmpname); | ||
1741 | out = xfopen(tmpname, "wb"); | ||
1742 | } | ||
1743 | |||
1744 | if (opt & OPT_c) | ||
1745 | tocode = xasprintf("%s//IGNORE", tocode); | ||
1746 | |||
1747 | cd = iconv_open(tocode, fromcode); | ||
1748 | if (cd == (iconv_t)(-1)) | ||
1749 | bb_perror_msg_and_die("iconv_open error"); | ||
1750 | |||
1751 | if (optind == argc) | ||
1752 | argv[argc++] = (char *)"-"; | ||
1753 | |||
1754 | for (i=optind; i<argc; ++i) { | ||
1755 | if (argv[i][0] == '-' && argv[i][1] == '\0') | ||
1756 | in = stdin; | ||
1757 | else | ||
1758 | in = xfopen(argv[optind], "rb"); | ||
1759 | process_file(cd, in, out); | ||
1760 | fclose(in); | ||
1761 | } | ||
1762 | |||
1763 | if (tmpname) { | ||
1764 | fclose(out); | ||
1765 | xrename(tmpname, outfile); | ||
1766 | } | ||
1767 | |||
1768 | if (ENABLE_FEATURE_CLEAN_UP) | ||
1769 | iconv_close(cd); | ||
1770 | return 0; | ||
1771 | } | ||