diff options
author | Ron Yorston <rmy@pobox.com> | 2019-01-06 13:17:09 +0000 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2019-01-06 13:26:19 +0000 |
commit | 1fec4ebbdb930f6b8989be2e10c0f673803ac830 (patch) | |
tree | b66e26698acde618b64b219c5709d59feaaea72b | |
parent | f192e653963344fdffa88d47bb16d40d6e63081f (diff) | |
download | busybox-w32-1fec4ebbdb930f6b8989be2e10c0f673803ac830.tar.gz busybox-w32-1fec4ebbdb930f6b8989be2e10c0f673803ac830.tar.bz2 busybox-w32-1fec4ebbdb930f6b8989be2e10c0f673803ac830.zip |
iconv: import from win-iconv
Source imported from https://github.com/win-iconv/win-iconv and
modified to build in busybox-w32.
-rw-r--r-- | configs/mingw32_defconfig | 1 | ||||
-rw-r--r-- | configs/mingw64_defconfig | 1 | ||||
-rw-r--r-- | libbb/Kbuild.src | 2 | ||||
-rw-r--r-- | miscutils/iconv.c | 1846 |
4 files changed, 1849 insertions, 1 deletions
diff --git a/configs/mingw32_defconfig b/configs/mingw32_defconfig index 6ef09c7b9..0b8cf83ce 100644 --- a/configs/mingw32_defconfig +++ b/configs/mingw32_defconfig | |||
@@ -782,6 +782,7 @@ CONFIG_FEATURE_DC_LIBM=y | |||
782 | # CONFIG_I2CSET is not set | 782 | # CONFIG_I2CSET is not set |
783 | # CONFIG_I2CDUMP is not set | 783 | # CONFIG_I2CDUMP is not set |
784 | # CONFIG_I2CDETECT is not set | 784 | # CONFIG_I2CDETECT is not set |
785 | CONFIG_ICONV=y | ||
785 | # CONFIG_INOTIFYD is not set | 786 | # CONFIG_INOTIFYD is not set |
786 | CONFIG_LESS=y | 787 | CONFIG_LESS=y |
787 | CONFIG_FEATURE_LESS_MAXLINES=9999999 | 788 | CONFIG_FEATURE_LESS_MAXLINES=9999999 |
diff --git a/configs/mingw64_defconfig b/configs/mingw64_defconfig index 8cc6b419a..46ebfbbfc 100644 --- a/configs/mingw64_defconfig +++ b/configs/mingw64_defconfig | |||
@@ -782,6 +782,7 @@ CONFIG_FEATURE_DC_LIBM=y | |||
782 | # CONFIG_I2CSET is not set | 782 | # CONFIG_I2CSET is not set |
783 | # CONFIG_I2CDUMP is not set | 783 | # CONFIG_I2CDUMP is not set |
784 | # CONFIG_I2CDETECT is not set | 784 | # CONFIG_I2CDETECT is not set |
785 | CONFIG_ICONV=y | ||
785 | # CONFIG_INOTIFYD is not set | 786 | # CONFIG_INOTIFYD is not set |
786 | CONFIG_LESS=y | 787 | CONFIG_LESS=y |
787 | CONFIG_FEATURE_LESS_MAXLINES=9999999 | 788 | CONFIG_FEATURE_LESS_MAXLINES=9999999 |
diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index 43ededbea..b298040ac 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src | |||
@@ -69,6 +69,7 @@ lib-y += securetty.o | |||
69 | lib-y += single_argv.o | 69 | lib-y += single_argv.o |
70 | lib-y += skip_whitespace.o | 70 | lib-y += skip_whitespace.o |
71 | lib-y += str_tolower.o | 71 | lib-y += str_tolower.o |
72 | lib-y += strrstr.o | ||
72 | lib-y += sysconf.o | 73 | lib-y += sysconf.o |
73 | lib-y += time.o | 74 | lib-y += time.o |
74 | lib-y += trim.o | 75 | lib-y += trim.o |
@@ -109,7 +110,6 @@ lib-$(CONFIG_PLATFORM_POSIX) += setup_environment.o | |||
109 | lib-$(CONFIG_PLATFORM_POSIX) += signals.o | 110 | lib-$(CONFIG_PLATFORM_POSIX) += signals.o |
110 | lib-$(CONFIG_PLATFORM_POSIX) += simplify_path.o | 111 | lib-$(CONFIG_PLATFORM_POSIX) += simplify_path.o |
111 | lib-$(CONFIG_PLATFORM_POSIX) += speed_table.o | 112 | lib-$(CONFIG_PLATFORM_POSIX) += speed_table.o |
112 | lib-$(CONFIG_PLATFORM_POSIX) += strrstr.o | ||
113 | lib-$(CONFIG_PLATFORM_POSIX) += udp_io.o | 113 | lib-$(CONFIG_PLATFORM_POSIX) += udp_io.o |
114 | lib-$(CONFIG_PLATFORM_POSIX) += warn_ignoring_args.o | 114 | lib-$(CONFIG_PLATFORM_POSIX) += warn_ignoring_args.o |
115 | lib-$(CONFIG_PLATFORM_POSIX) += write.o | 115 | lib-$(CONFIG_PLATFORM_POSIX) += write.o |
diff --git a/miscutils/iconv.c b/miscutils/iconv.c new file mode 100644 index 000000000..828c38213 --- /dev/null +++ b/miscutils/iconv.c | |||
@@ -0,0 +1,1846 @@ | |||
1 | /* | ||
2 | * iconv implementation using Win32 API to convert. | ||
3 | * | ||
4 | * This file is placed in the public domain. | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * This code was obtained from: | ||
9 | * | ||
10 | * https://github.com/win-iconv/win-iconv | ||
11 | * | ||
12 | * Modified for busybox-w32 by Ronald M Yorston. These modifications | ||
13 | * are also dedicated to the public domain. | ||
14 | */ | ||
15 | |||
16 | //config:config ICONV | ||
17 | //config: bool "iconv" | ||
18 | //config: default y | ||
19 | //config: help | ||
20 | //config: 'iconv' converts text between character encodings. | ||
21 | |||
22 | //applet:IF_ICONV(APPLET(iconv, BB_DIR_USR_BIN, BB_SUID_DROP)) | ||
23 | |||
24 | //kbuild:lib-$(CONFIG_ICONV) += iconv.o | ||
25 | |||
26 | //usage:#define iconv_trivial_usage | ||
27 | //usage: "[-lc] [-o outfile] -f from-enc -t to-enc [FILE]..." | ||
28 | //usage:#define iconv_full_usage "\n\n" | ||
29 | //usage: "Convert text between character encodings\n" | ||
30 | //usage: "\n -l List all known character encodings" | ||
31 | //usage: "\n -c Silently discard characters that cannot be converted" | ||
32 | //usage: "\n -o Use outfile for output" | ||
33 | //usage: "\n -f Use from-enc for input characters" | ||
34 | //usage: "\n -t Use to-enc for output characters" | ||
35 | |||
36 | #include "libbb.h" | ||
37 | |||
38 | /* WORKAROUND: */ | ||
39 | #define GetProcAddressA GetProcAddress | ||
40 | |||
41 | #define MB_CHAR_MAX 16 | ||
42 | |||
43 | #define UNICODE_MODE_BOM_DONE 1 | ||
44 | #define UNICODE_MODE_SWAPPED 2 | ||
45 | |||
46 | #define FLAG_USE_BOM 1 | ||
47 | #define FLAG_TRANSLIT 2 /* //TRANSLIT */ | ||
48 | #define FLAG_IGNORE 4 /* //IGNORE */ | ||
49 | |||
50 | typedef unsigned char uchar; | ||
51 | typedef unsigned short ushort; | ||
52 | typedef unsigned int uint; | ||
53 | |||
54 | typedef void* iconv_t; | ||
55 | |||
56 | iconv_t iconv_open(const char *tocode, const char *fromcode); | ||
57 | int iconv_close(iconv_t cd); | ||
58 | size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
59 | |||
60 | typedef struct compat_t compat_t; | ||
61 | typedef struct csconv_t csconv_t; | ||
62 | typedef struct rec_iconv_t rec_iconv_t; | ||
63 | |||
64 | typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode); | ||
65 | typedef int (*f_iconv_close)(iconv_t cd); | ||
66 | typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
67 | typedef int* (*f_errno)(void); | ||
68 | typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
69 | typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
70 | typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); | ||
71 | typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); | ||
72 | |||
73 | #define COMPAT_IN 1 | ||
74 | #define COMPAT_OUT 2 | ||
75 | |||
76 | /* unicode mapping for compatibility with other conversion table. */ | ||
77 | struct compat_t { | ||
78 | uint in; | ||
79 | uint out; | ||
80 | uint flag; | ||
81 | }; | ||
82 | |||
83 | struct csconv_t { | ||
84 | int codepage; | ||
85 | int flags; | ||
86 | f_mbtowc mbtowc; | ||
87 | f_wctomb wctomb; | ||
88 | f_mblen mblen; | ||
89 | f_flush flush; | ||
90 | DWORD mode; | ||
91 | compat_t *compat; | ||
92 | }; | ||
93 | |||
94 | struct rec_iconv_t { | ||
95 | iconv_t cd; | ||
96 | f_iconv_close iconv_close; | ||
97 | f_iconv iconv; | ||
98 | f_errno _errno; | ||
99 | csconv_t from; | ||
100 | csconv_t to; | ||
101 | }; | ||
102 | |||
103 | static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); | ||
104 | static int win_iconv_close(iconv_t cd); | ||
105 | static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
106 | |||
107 | static int load_mlang(void); | ||
108 | static int make_csconv(const char *name, csconv_t *cv); | ||
109 | static int name_to_codepage(const char *name); | ||
110 | static uint utf16_to_ucs4(const ushort *wbuf); | ||
111 | static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); | ||
112 | static int mbtowc_flags(int codepage); | ||
113 | static int must_use_null_useddefaultchar(int codepage); | ||
114 | static int seterror(int err); | ||
115 | |||
116 | static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
117 | static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
118 | static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
119 | static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
120 | static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
121 | |||
122 | static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
123 | static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
124 | static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
125 | static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
126 | static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
127 | static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
128 | static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
129 | static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
130 | static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
131 | static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
132 | static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); | ||
133 | |||
134 | static struct { | ||
135 | int codepage; | ||
136 | const char *name; | ||
137 | } codepage_alias[] = { | ||
138 | {65001, "CP65001"}, | ||
139 | {65001, "UTF8"}, | ||
140 | {65001, "UTF-8"}, | ||
141 | |||
142 | {1200, "CP1200"}, | ||
143 | {1200, "UTF16LE"}, | ||
144 | {1200, "UTF-16LE"}, | ||
145 | {1200, "UCS2LE"}, | ||
146 | {1200, "UCS-2LE"}, | ||
147 | {1200, "UCS-2-INTERNAL"}, | ||
148 | |||
149 | {1201, "CP1201"}, | ||
150 | {1201, "UTF16BE"}, | ||
151 | {1201, "UTF-16BE"}, | ||
152 | {1201, "UCS2BE"}, | ||
153 | {1201, "UCS-2BE"}, | ||
154 | {1201, "unicodeFFFE"}, | ||
155 | |||
156 | {12000, "CP12000"}, | ||
157 | {12000, "UTF32LE"}, | ||
158 | {12000, "UTF-32LE"}, | ||
159 | {12000, "UCS4LE"}, | ||
160 | {12000, "UCS-4LE"}, | ||
161 | |||
162 | {12001, "CP12001"}, | ||
163 | {12001, "UTF32BE"}, | ||
164 | {12001, "UTF-32BE"}, | ||
165 | {12001, "UCS4BE"}, | ||
166 | {12001, "UCS-4BE"}, | ||
167 | |||
168 | #ifndef GLIB_COMPILATION | ||
169 | /* | ||
170 | * Default is big endian. | ||
171 | * See rfc2781 4.3 Interpreting text labelled as UTF-16. | ||
172 | */ | ||
173 | {1201, "UTF16"}, | ||
174 | {1201, "UTF-16"}, | ||
175 | {1201, "UCS2"}, | ||
176 | {1201, "UCS-2"}, | ||
177 | {12001, "UTF32"}, | ||
178 | {12001, "UTF-32"}, | ||
179 | {12001, "UCS-4"}, | ||
180 | {12001, "UCS4"}, | ||
181 | #else | ||
182 | /* Default is little endian, because the platform is */ | ||
183 | {1200, "UTF16"}, | ||
184 | {1200, "UTF-16"}, | ||
185 | {1200, "UCS2"}, | ||
186 | {1200, "UCS-2"}, | ||
187 | {12000, "UTF32"}, | ||
188 | {12000, "UTF-32"}, | ||
189 | {12000, "UCS4"}, | ||
190 | {12000, "UCS-4"}, | ||
191 | #endif | ||
192 | |||
193 | /* copy from libiconv `iconv -l` */ | ||
194 | /* !IsValidCodePage(367) */ | ||
195 | {20127, "ANSI_X3.4-1968"}, | ||
196 | {20127, "ANSI_X3.4-1986"}, | ||
197 | {20127, "ASCII"}, | ||
198 | {20127, "CP367"}, | ||
199 | {20127, "IBM367"}, | ||
200 | {20127, "ISO-IR-6"}, | ||
201 | {20127, "ISO646-US"}, | ||
202 | {20127, "ISO_646.IRV:1991"}, | ||
203 | {20127, "US"}, | ||
204 | {20127, "US-ASCII"}, | ||
205 | {20127, "CSASCII"}, | ||
206 | |||
207 | /* !IsValidCodePage(819) */ | ||
208 | {1252, "CP819"}, | ||
209 | {1252, "IBM819"}, | ||
210 | {28591, "ISO-8859-1"}, | ||
211 | {28591, "ISO-IR-100"}, | ||
212 | {28591, "ISO8859-1"}, | ||
213 | {28591, "ISO_8859-1"}, | ||
214 | {28591, "ISO_8859-1:1987"}, | ||
215 | {28591, "L1"}, | ||
216 | {28591, "LATIN1"}, | ||
217 | {28591, "CSISOLATIN1"}, | ||
218 | |||
219 | {1250, "CP1250"}, | ||
220 | {1250, "MS-EE"}, | ||
221 | {1250, "WINDOWS-1250"}, | ||
222 | |||
223 | {1251, "CP1251"}, | ||
224 | {1251, "MS-CYRL"}, | ||
225 | {1251, "WINDOWS-1251"}, | ||
226 | |||
227 | {1252, "CP1252"}, | ||
228 | {1252, "MS-ANSI"}, | ||
229 | {1252, "WINDOWS-1252"}, | ||
230 | |||
231 | {1253, "CP1253"}, | ||
232 | {1253, "MS-GREEK"}, | ||
233 | {1253, "WINDOWS-1253"}, | ||
234 | |||
235 | {1254, "CP1254"}, | ||
236 | {1254, "MS-TURK"}, | ||
237 | {1254, "WINDOWS-1254"}, | ||
238 | |||
239 | {1255, "CP1255"}, | ||
240 | {1255, "MS-HEBR"}, | ||
241 | {1255, "WINDOWS-1255"}, | ||
242 | |||
243 | {1256, "CP1256"}, | ||
244 | {1256, "MS-ARAB"}, | ||
245 | {1256, "WINDOWS-1256"}, | ||
246 | |||
247 | {1257, "CP1257"}, | ||
248 | {1257, "WINBALTRIM"}, | ||
249 | {1257, "WINDOWS-1257"}, | ||
250 | |||
251 | {1258, "CP1258"}, | ||
252 | {1258, "WINDOWS-1258"}, | ||
253 | |||
254 | {850, "850"}, | ||
255 | {850, "CP850"}, | ||
256 | {850, "IBM850"}, | ||
257 | {850, "CSPC850MULTILINGUAL"}, | ||
258 | |||
259 | /* !IsValidCodePage(862) */ | ||
260 | {862, "862"}, | ||
261 | {862, "CP862"}, | ||
262 | {862, "IBM862"}, | ||
263 | {862, "CSPC862LATINHEBREW"}, | ||
264 | |||
265 | {866, "866"}, | ||
266 | {866, "CP866"}, | ||
267 | {866, "IBM866"}, | ||
268 | {866, "CSIBM866"}, | ||
269 | |||
270 | /* !IsValidCodePage(154) */ | ||
271 | {154, "CP154"}, | ||
272 | {154, "CYRILLIC-ASIAN"}, | ||
273 | {154, "PT154"}, | ||
274 | {154, "PTCP154"}, | ||
275 | {154, "CSPTCP154"}, | ||
276 | |||
277 | /* !IsValidCodePage(1133) */ | ||
278 | {1133, "CP1133"}, | ||
279 | {1133, "IBM-CP1133"}, | ||
280 | |||
281 | {874, "CP874"}, | ||
282 | {874, "WINDOWS-874"}, | ||
283 | |||
284 | /* !IsValidCodePage(51932) */ | ||
285 | {51932, "CP51932"}, | ||
286 | {51932, "MS51932"}, | ||
287 | {51932, "WINDOWS-51932"}, | ||
288 | {51932, "EUC-JP"}, | ||
289 | |||
290 | {932, "CP932"}, | ||
291 | {932, "MS932"}, | ||
292 | {932, "SHIFFT_JIS"}, | ||
293 | {932, "SHIFFT_JIS-MS"}, | ||
294 | {932, "SJIS"}, | ||
295 | {932, "SJIS-MS"}, | ||
296 | {932, "SJIS-OPEN"}, | ||
297 | {932, "SJIS-WIN"}, | ||
298 | {932, "WINDOWS-31J"}, | ||
299 | {932, "WINDOWS-932"}, | ||
300 | {932, "CSWINDOWS31J"}, | ||
301 | |||
302 | {50221, "CP50221"}, | ||
303 | {50221, "ISO-2022-JP"}, | ||
304 | {50221, "ISO-2022-JP-MS"}, | ||
305 | {50221, "ISO2022-JP"}, | ||
306 | {50221, "ISO2022-JP-MS"}, | ||
307 | {50221, "MS50221"}, | ||
308 | {50221, "WINDOWS-50221"}, | ||
309 | |||
310 | {936, "CP936"}, | ||
311 | {936, "GBK"}, | ||
312 | {936, "MS936"}, | ||
313 | {936, "WINDOWS-936"}, | ||
314 | |||
315 | {950, "CP950"}, | ||
316 | {950, "BIG5"}, | ||
317 | {950, "BIG5HKSCS"}, | ||
318 | {950, "BIG5-HKSCS"}, | ||
319 | |||
320 | {949, "CP949"}, | ||
321 | {949, "UHC"}, | ||
322 | {949, "EUC-KR"}, | ||
323 | |||
324 | {1361, "CP1361"}, | ||
325 | {1361, "JOHAB"}, | ||
326 | |||
327 | {437, "437"}, | ||
328 | {437, "CP437"}, | ||
329 | {437, "IBM437"}, | ||
330 | {437, "CSPC8CODEPAGE437"}, | ||
331 | |||
332 | {737, "CP737"}, | ||
333 | |||
334 | {775, "CP775"}, | ||
335 | {775, "IBM775"}, | ||
336 | {775, "CSPC775BALTIC"}, | ||
337 | |||
338 | {852, "852"}, | ||
339 | {852, "CP852"}, | ||
340 | {852, "IBM852"}, | ||
341 | {852, "CSPCP852"}, | ||
342 | |||
343 | /* !IsValidCodePage(853) */ | ||
344 | {853, "CP853"}, | ||
345 | |||
346 | {855, "855"}, | ||
347 | {855, "CP855"}, | ||
348 | {855, "IBM855"}, | ||
349 | {855, "CSIBM855"}, | ||
350 | |||
351 | {857, "857"}, | ||
352 | {857, "CP857"}, | ||
353 | {857, "IBM857"}, | ||
354 | {857, "CSIBM857"}, | ||
355 | |||
356 | /* !IsValidCodePage(858) */ | ||
357 | {858, "CP858"}, | ||
358 | |||
359 | {860, "860"}, | ||
360 | {860, "CP860"}, | ||
361 | {860, "IBM860"}, | ||
362 | {860, "CSIBM860"}, | ||
363 | |||
364 | {861, "861"}, | ||
365 | {861, "CP-IS"}, | ||
366 | {861, "CP861"}, | ||
367 | {861, "IBM861"}, | ||
368 | {861, "CSIBM861"}, | ||
369 | |||
370 | {863, "863"}, | ||
371 | {863, "CP863"}, | ||
372 | {863, "IBM863"}, | ||
373 | {863, "CSIBM863"}, | ||
374 | |||
375 | {864, "CP864"}, | ||
376 | {864, "IBM864"}, | ||
377 | {864, "CSIBM864"}, | ||
378 | |||
379 | {865, "865"}, | ||
380 | {865, "CP865"}, | ||
381 | {865, "IBM865"}, | ||
382 | {865, "CSIBM865"}, | ||
383 | |||
384 | {869, "869"}, | ||
385 | {869, "CP-GR"}, | ||
386 | {869, "CP869"}, | ||
387 | {869, "IBM869"}, | ||
388 | {869, "CSIBM869"}, | ||
389 | |||
390 | /* !IsValidCodePage(1152) */ | ||
391 | {1125, "CP1125"}, | ||
392 | |||
393 | /* | ||
394 | * Code Page Identifiers | ||
395 | * http://msdn2.microsoft.com/en-us/library/ms776446.aspx | ||
396 | */ | ||
397 | {37, "IBM037"}, /* IBM EBCDIC US-Canada */ | ||
398 | {437, "IBM437"}, /* OEM United States */ | ||
399 | {500, "IBM500"}, /* IBM EBCDIC International */ | ||
400 | {708, "ASMO-708"}, /* Arabic (ASMO 708) */ | ||
401 | /* 709 Arabic (ASMO-449+, BCON V4) */ | ||
402 | /* 710 Arabic - Transparent Arabic */ | ||
403 | {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */ | ||
404 | {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */ | ||
405 | {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */ | ||
406 | {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */ | ||
407 | {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */ | ||
408 | {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */ | ||
409 | {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */ | ||
410 | {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */ | ||
411 | {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */ | ||
412 | {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */ | ||
413 | {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */ | ||
414 | {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */ | ||
415 | {864, "IBM864"}, /* OEM Arabic; Arabic (864) */ | ||
416 | {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */ | ||
417 | {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */ | ||
418 | {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */ | ||
419 | {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ | ||
420 | {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ | ||
421 | {875, "cp875"}, /* IBM EBCDIC Greek Modern */ | ||
422 | {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ | ||
423 | {932, "shift-jis"}, /* alternative name for it */ | ||
424 | {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ | ||
425 | {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */ | ||
426 | {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ | ||
427 | {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ | ||
428 | {950, "big5-hkscs"}, /* alternative name for it */ | ||
429 | {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */ | ||
430 | {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */ | ||
431 | {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ | ||
432 | {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ | ||
433 | {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ | ||
434 | {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ | ||
435 | {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ | ||
436 | {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ | ||
437 | {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ | ||
438 | {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ | ||
439 | {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ | ||
440 | {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ | ||
441 | {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */ | ||
442 | {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */ | ||
443 | {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */ | ||
444 | {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */ | ||
445 | {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */ | ||
446 | {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */ | ||
447 | {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */ | ||
448 | {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */ | ||
449 | {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ | ||
450 | {1361, "Johab"}, /* Korean (Johab) */ | ||
451 | {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */ | ||
452 | {10001, "x-mac-japanese"}, /* Japanese (Mac) */ | ||
453 | {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ | ||
454 | {10003, "x-mac-korean"}, /* Korean (Mac) */ | ||
455 | {10004, "x-mac-arabic"}, /* Arabic (Mac) */ | ||
456 | {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */ | ||
457 | {10006, "x-mac-greek"}, /* Greek (Mac) */ | ||
458 | {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */ | ||
459 | {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ | ||
460 | {10010, "x-mac-romanian"}, /* Romanian (Mac) */ | ||
461 | {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */ | ||
462 | {10021, "x-mac-thai"}, /* Thai (Mac) */ | ||
463 | {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */ | ||
464 | {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */ | ||
465 | {10081, "x-mac-turkish"}, /* Turkish (Mac) */ | ||
466 | {10082, "x-mac-croatian"}, /* Croatian (Mac) */ | ||
467 | {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */ | ||
468 | {20001, "x-cp20001"}, /* TCA Taiwan */ | ||
469 | {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */ | ||
470 | {20003, "x-cp20003"}, /* IBM5550 Taiwan */ | ||
471 | {20004, "x-cp20004"}, /* TeleText Taiwan */ | ||
472 | {20005, "x-cp20005"}, /* Wang Taiwan */ | ||
473 | {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ | ||
474 | {20106, "x-IA5-German"}, /* IA5 German (7-bit) */ | ||
475 | {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */ | ||
476 | {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */ | ||
477 | {20127, "us-ascii"}, /* US-ASCII (7-bit) */ | ||
478 | {20261, "x-cp20261"}, /* T.61 */ | ||
479 | {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */ | ||
480 | {20273, "IBM273"}, /* IBM EBCDIC Germany */ | ||
481 | {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */ | ||
482 | {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */ | ||
483 | {20280, "IBM280"}, /* IBM EBCDIC Italy */ | ||
484 | {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */ | ||
485 | {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */ | ||
486 | {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */ | ||
487 | {20297, "IBM297"}, /* IBM EBCDIC France */ | ||
488 | {20420, "IBM420"}, /* IBM EBCDIC Arabic */ | ||
489 | {20423, "IBM423"}, /* IBM EBCDIC Greek */ | ||
490 | {20424, "IBM424"}, /* IBM EBCDIC Hebrew */ | ||
491 | {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */ | ||
492 | {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */ | ||
493 | {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */ | ||
494 | {20871, "IBM871"}, /* IBM EBCDIC Icelandic */ | ||
495 | {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */ | ||
496 | {20905, "IBM905"}, /* IBM EBCDIC Turkish */ | ||
497 | {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ | ||
498 | {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */ | ||
499 | {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ | ||
500 | {20949, "x-cp20949"}, /* Korean Wansung */ | ||
501 | {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ | ||
502 | /* 21027 (deprecated) */ | ||
503 | {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ | ||
504 | {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ | ||
505 | {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ | ||
506 | {28591, "iso_8859-1"}, | ||
507 | {28591, "iso_8859_1"}, | ||
508 | {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ | ||
509 | {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ | ||
510 | {28592, "iso_8859-2"}, | ||
511 | {28592, "iso_8859_2"}, | ||
512 | {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */ | ||
513 | {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */ | ||
514 | {28593, "iso_8859-3"}, | ||
515 | {28593, "iso_8859_3"}, | ||
516 | {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */ | ||
517 | {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */ | ||
518 | {28594, "iso_8859-4"}, | ||
519 | {28594, "iso_8859_4"}, | ||
520 | {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */ | ||
521 | {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */ | ||
522 | {28595, "iso_8859-5"}, | ||
523 | {28595, "iso_8859_5"}, | ||
524 | {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */ | ||
525 | {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */ | ||
526 | {28596, "iso_8859-6"}, | ||
527 | {28596, "iso_8859_6"}, | ||
528 | {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */ | ||
529 | {28597, "iso8859-7"}, /* ISO 8859-7 Greek */ | ||
530 | {28597, "iso_8859-7"}, | ||
531 | {28597, "iso_8859_7"}, | ||
532 | {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ | ||
533 | {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ | ||
534 | {28598, "iso_8859-8"}, | ||
535 | {28598, "iso_8859_8"}, | ||
536 | {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */ | ||
537 | {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */ | ||
538 | {28599, "iso_8859-9"}, | ||
539 | {28599, "iso_8859_9"}, | ||
540 | {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */ | ||
541 | {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */ | ||
542 | {28603, "iso_8859-13"}, | ||
543 | {28603, "iso_8859_13"}, | ||
544 | {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */ | ||
545 | {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */ | ||
546 | {28605, "iso_8859-15"}, | ||
547 | {28605, "iso_8859_15"}, | ||
548 | {29001, "x-Europa"}, /* Europa 3 */ | ||
549 | {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ | ||
550 | {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ | ||
551 | {38598, "iso_8859-8-i"}, | ||
552 | {38598, "iso_8859_8-i"}, | ||
553 | {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ | ||
554 | {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ | ||
555 | {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ | ||
556 | {50225, "iso-2022-kr"}, /* ISO 2022 Korean */ | ||
557 | {50225, "iso2022-kr"}, /* ISO 2022 Korean */ | ||
558 | {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ | ||
559 | /* 50229 ISO 2022 Traditional Chinese */ | ||
560 | /* 50930 EBCDIC Japanese (Katakana) Extended */ | ||
561 | /* 50931 EBCDIC US-Canada and Japanese */ | ||
562 | /* 50933 EBCDIC Korean Extended and Korean */ | ||
563 | /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ | ||
564 | /* 50936 EBCDIC Simplified Chinese */ | ||
565 | /* 50937 EBCDIC US-Canada and Traditional Chinese */ | ||
566 | /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ | ||
567 | {51932, "euc-jp"}, /* EUC Japanese */ | ||
568 | {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */ | ||
569 | {51949, "euc-kr"}, /* EUC Korean */ | ||
570 | /* 51950 EUC Traditional Chinese */ | ||
571 | {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ | ||
572 | {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ | ||
573 | {57002, "x-iscii-de"}, /* ISCII Devanagari */ | ||
574 | {57003, "x-iscii-be"}, /* ISCII Bengali */ | ||
575 | {57004, "x-iscii-ta"}, /* ISCII Tamil */ | ||
576 | {57005, "x-iscii-te"}, /* ISCII Telugu */ | ||
577 | {57006, "x-iscii-as"}, /* ISCII Assamese */ | ||
578 | {57007, "x-iscii-or"}, /* ISCII Oriya */ | ||
579 | {57008, "x-iscii-ka"}, /* ISCII Kannada */ | ||
580 | {57009, "x-iscii-ma"}, /* ISCII Malayalam */ | ||
581 | {57010, "x-iscii-gu"}, /* ISCII Gujarati */ | ||
582 | {57011, "x-iscii-pa"}, /* ISCII Punjabi */ | ||
583 | |||
584 | {0, NULL} | ||
585 | }; | ||
586 | |||
587 | /* | ||
588 | * SJIS SHIFTJIS table CP932 table | ||
589 | * ---- --------------------------- -------------------------------- | ||
590 | * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS | ||
591 | * 7E U+203E OVERLINE U+007E TILDE | ||
592 | * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR | ||
593 | * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS | ||
594 | * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE | ||
595 | * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO | ||
596 | * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS | ||
597 | * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN | ||
598 | * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN | ||
599 | * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN | ||
600 | * | ||
601 | * EUC-JP and ISO-2022-JP should be compatible with CP932. | ||
602 | * | ||
603 | * Kernel and MLang have different Unicode mapping table. Make sure | ||
604 | * which API is used. | ||
605 | */ | ||
606 | static compat_t cp932_compat[] = { | ||
607 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
608 | {0x203E, 0x007E, COMPAT_OUT}, | ||
609 | {0x2014, 0x2015, COMPAT_OUT}, | ||
610 | {0x301C, 0xFF5E, COMPAT_OUT}, | ||
611 | {0x2016, 0x2225, COMPAT_OUT}, | ||
612 | {0x2212, 0xFF0D, COMPAT_OUT}, | ||
613 | {0x00A2, 0xFFE0, COMPAT_OUT}, | ||
614 | {0x00A3, 0xFFE1, COMPAT_OUT}, | ||
615 | {0x00AC, 0xFFE2, COMPAT_OUT}, | ||
616 | {0, 0, 0} | ||
617 | }; | ||
618 | |||
619 | static compat_t cp20932_compat[] = { | ||
620 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
621 | {0x203E, 0x007E, COMPAT_OUT}, | ||
622 | {0x2014, 0x2015, COMPAT_OUT}, | ||
623 | {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, | ||
624 | {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, | ||
625 | {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, | ||
626 | {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, | ||
627 | {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, | ||
628 | {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, | ||
629 | {0, 0, 0} | ||
630 | }; | ||
631 | |||
632 | static compat_t *cp51932_compat = cp932_compat; | ||
633 | |||
634 | /* cp20932_compat for kernel. cp932_compat for mlang. */ | ||
635 | static compat_t *cp5022x_compat = cp932_compat; | ||
636 | |||
637 | typedef HRESULT (WINAPI *CONVERTINETSTRING)( | ||
638 | LPDWORD lpdwMode, | ||
639 | DWORD dwSrcEncoding, | ||
640 | DWORD dwDstEncoding, | ||
641 | LPCSTR lpSrcStr, | ||
642 | LPINT lpnSrcSize, | ||
643 | LPBYTE lpDstStr, | ||
644 | LPINT lpnDstSize | ||
645 | ); | ||
646 | typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( | ||
647 | LPDWORD lpdwMode, | ||
648 | DWORD dwSrcEncoding, | ||
649 | LPCSTR lpSrcStr, | ||
650 | LPINT lpnMultiCharCount, | ||
651 | LPWSTR lpDstStr, | ||
652 | LPINT lpnWideCharCount | ||
653 | ); | ||
654 | typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( | ||
655 | LPDWORD lpdwMode, | ||
656 | DWORD dwEncoding, | ||
657 | LPCWSTR lpSrcStr, | ||
658 | LPINT lpnWideCharCount, | ||
659 | LPSTR lpDstStr, | ||
660 | LPINT lpnMultiCharCount | ||
661 | ); | ||
662 | typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)( | ||
663 | DWORD dwSrcEncoding, | ||
664 | DWORD dwDstEncoding | ||
665 | ); | ||
666 | typedef HRESULT (WINAPI *LCIDTORFC1766A)( | ||
667 | LCID Locale, | ||
668 | LPSTR pszRfc1766, | ||
669 | int nChar | ||
670 | ); | ||
671 | typedef HRESULT (WINAPI *LCIDTORFC1766W)( | ||
672 | LCID Locale, | ||
673 | LPWSTR pszRfc1766, | ||
674 | int nChar | ||
675 | ); | ||
676 | typedef HRESULT (WINAPI *RFC1766TOLCIDA)( | ||
677 | LCID *pLocale, | ||
678 | LPSTR pszRfc1766 | ||
679 | ); | ||
680 | typedef HRESULT (WINAPI *RFC1766TOLCIDW)( | ||
681 | LCID *pLocale, | ||
682 | LPWSTR pszRfc1766 | ||
683 | ); | ||
684 | static CONVERTINETSTRING ConvertINetString; | ||
685 | static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; | ||
686 | static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; | ||
687 | static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable; | ||
688 | static LCIDTORFC1766A LcidToRfc1766A; | ||
689 | static RFC1766TOLCIDA Rfc1766ToLcidA; | ||
690 | |||
691 | static int | ||
692 | load_mlang(void) | ||
693 | { | ||
694 | HMODULE h; | ||
695 | if (ConvertINetString != NULL) | ||
696 | return TRUE; | ||
697 | h = LoadLibrary(TEXT("mlang.dll")); | ||
698 | if (!h) | ||
699 | return FALSE; | ||
700 | ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString"); | ||
701 | ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); | ||
702 | ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); | ||
703 | IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable"); | ||
704 | LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A"); | ||
705 | Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA"); | ||
706 | return TRUE; | ||
707 | } | ||
708 | |||
709 | iconv_t | ||
710 | iconv_open(const char *tocode, const char *fromcode) | ||
711 | { | ||
712 | rec_iconv_t *cd; | ||
713 | |||
714 | cd = (rec_iconv_t *)xzalloc(sizeof(rec_iconv_t)); | ||
715 | |||
716 | /* reset the errno to prevent reporting wrong error code. | ||
717 | * 0 for unsorted error. */ | ||
718 | errno = 0; | ||
719 | if (win_iconv_open(cd, tocode, fromcode)) | ||
720 | return (iconv_t)cd; | ||
721 | |||
722 | free(cd); | ||
723 | |||
724 | return (iconv_t)(-1); | ||
725 | } | ||
726 | |||
727 | int | ||
728 | iconv_close(iconv_t _cd) | ||
729 | { | ||
730 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
731 | int r = cd->iconv_close(cd->cd); | ||
732 | int e = *(cd->_errno()); | ||
733 | free(cd); | ||
734 | errno = e; | ||
735 | return r; | ||
736 | } | ||
737 | |||
738 | size_t | ||
739 | iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) | ||
740 | { | ||
741 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
742 | size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); | ||
743 | errno = *(cd->_errno()); | ||
744 | return r; | ||
745 | } | ||
746 | |||
747 | static int | ||
748 | win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) | ||
749 | { | ||
750 | if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) | ||
751 | return FALSE; | ||
752 | cd->iconv_close = win_iconv_close; | ||
753 | cd->iconv = win_iconv; | ||
754 | cd->_errno = _errno; | ||
755 | cd->cd = (iconv_t)cd; | ||
756 | return TRUE; | ||
757 | } | ||
758 | |||
759 | static int | ||
760 | win_iconv_close(iconv_t cd UNUSED_PARAM) | ||
761 | { | ||
762 | return 0; | ||
763 | } | ||
764 | |||
765 | static size_t | ||
766 | win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) | ||
767 | { | ||
768 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
769 | ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
770 | int insize; | ||
771 | int outsize; | ||
772 | int wsize; | ||
773 | DWORD frommode; | ||
774 | DWORD tomode; | ||
775 | uint wc; | ||
776 | compat_t *cp; | ||
777 | int i; | ||
778 | |||
779 | if (inbuf == NULL || *inbuf == NULL) | ||
780 | { | ||
781 | if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) | ||
782 | { | ||
783 | tomode = cd->to.mode; | ||
784 | outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); | ||
785 | if (outsize == -1) | ||
786 | { | ||
787 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
788 | { | ||
789 | outsize = 0; | ||
790 | } | ||
791 | else | ||
792 | { | ||
793 | cd->to.mode = tomode; | ||
794 | return (size_t)(-1); | ||
795 | } | ||
796 | } | ||
797 | *outbuf += outsize; | ||
798 | *outbytesleft -= outsize; | ||
799 | } | ||
800 | cd->from.mode = 0; | ||
801 | cd->to.mode = 0; | ||
802 | return 0; | ||
803 | } | ||
804 | |||
805 | while (*inbytesleft != 0) | ||
806 | { | ||
807 | frommode = cd->from.mode; | ||
808 | tomode = cd->to.mode; | ||
809 | wsize = MB_CHAR_MAX; | ||
810 | |||
811 | insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); | ||
812 | if (insize == -1) | ||
813 | { | ||
814 | if (cd->to.flags & FLAG_IGNORE) | ||
815 | { | ||
816 | cd->from.mode = frommode; | ||
817 | insize = 1; | ||
818 | wsize = 0; | ||
819 | } | ||
820 | else | ||
821 | { | ||
822 | cd->from.mode = frommode; | ||
823 | return (size_t)(-1); | ||
824 | } | ||
825 | } | ||
826 | |||
827 | if (wsize == 0) | ||
828 | { | ||
829 | *inbuf += insize; | ||
830 | *inbytesleft -= insize; | ||
831 | continue; | ||
832 | } | ||
833 | |||
834 | if (cd->from.compat != NULL) | ||
835 | { | ||
836 | wc = utf16_to_ucs4(wbuf); | ||
837 | cp = cd->from.compat; | ||
838 | for (i = 0; cp[i].in != 0; ++i) | ||
839 | { | ||
840 | if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) | ||
841 | { | ||
842 | ucs4_to_utf16(cp[i].in, wbuf, &wsize); | ||
843 | break; | ||
844 | } | ||
845 | } | ||
846 | } | ||
847 | |||
848 | if (cd->to.compat != NULL) | ||
849 | { | ||
850 | wc = utf16_to_ucs4(wbuf); | ||
851 | cp = cd->to.compat; | ||
852 | for (i = 0; cp[i].in != 0; ++i) | ||
853 | { | ||
854 | if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) | ||
855 | { | ||
856 | ucs4_to_utf16(cp[i].out, wbuf, &wsize); | ||
857 | break; | ||
858 | } | ||
859 | } | ||
860 | } | ||
861 | |||
862 | outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); | ||
863 | if (outsize == -1) | ||
864 | { | ||
865 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
866 | { | ||
867 | cd->to.mode = tomode; | ||
868 | outsize = 0; | ||
869 | } | ||
870 | else | ||
871 | { | ||
872 | cd->from.mode = frommode; | ||
873 | cd->to.mode = tomode; | ||
874 | return (size_t)(-1); | ||
875 | } | ||
876 | } | ||
877 | |||
878 | *inbuf += insize; | ||
879 | *outbuf += outsize; | ||
880 | *inbytesleft -= insize; | ||
881 | *outbytesleft -= outsize; | ||
882 | } | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | static int | ||
888 | make_csconv(const char *_name, csconv_t *cv) | ||
889 | { | ||
890 | CPINFO cpinfo; | ||
891 | int use_compat = TRUE; | ||
892 | int flag = 0; | ||
893 | char *name; | ||
894 | char *p; | ||
895 | |||
896 | name = xstrndup(_name, strlen(_name)); | ||
897 | if (name == NULL) | ||
898 | return FALSE; | ||
899 | |||
900 | /* check for option "enc_name//opt1//opt2" */ | ||
901 | while ((p = strrstr(name, "//")) != NULL) | ||
902 | { | ||
903 | if (_stricmp(p + 2, "nocompat") == 0) | ||
904 | use_compat = FALSE; | ||
905 | else if (_stricmp(p + 2, "translit") == 0) | ||
906 | flag |= FLAG_TRANSLIT; | ||
907 | else if (_stricmp(p + 2, "ignore") == 0) | ||
908 | flag |= FLAG_IGNORE; | ||
909 | *p = 0; | ||
910 | } | ||
911 | |||
912 | cv->mode = 0; | ||
913 | cv->flags = flag; | ||
914 | cv->mblen = NULL; | ||
915 | cv->flush = NULL; | ||
916 | cv->compat = NULL; | ||
917 | cv->codepage = name_to_codepage(name); | ||
918 | if (cv->codepage == 1200 || cv->codepage == 1201) | ||
919 | { | ||
920 | cv->mbtowc = utf16_mbtowc; | ||
921 | cv->wctomb = utf16_wctomb; | ||
922 | if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || | ||
923 | _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 || | ||
924 | _stricmp(name,"UCS-2-INTERNAL") == 0) | ||
925 | cv->flags |= FLAG_USE_BOM; | ||
926 | } | ||
927 | else if (cv->codepage == 12000 || cv->codepage == 12001) | ||
928 | { | ||
929 | cv->mbtowc = utf32_mbtowc; | ||
930 | cv->wctomb = utf32_wctomb; | ||
931 | if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || | ||
932 | _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) | ||
933 | cv->flags |= FLAG_USE_BOM; | ||
934 | } | ||
935 | else if (cv->codepage == 65001) | ||
936 | { | ||
937 | cv->mbtowc = kernel_mbtowc; | ||
938 | cv->wctomb = kernel_wctomb; | ||
939 | cv->mblen = utf8_mblen; | ||
940 | } | ||
941 | else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) | ||
942 | { | ||
943 | cv->mbtowc = iso2022jp_mbtowc; | ||
944 | cv->wctomb = iso2022jp_wctomb; | ||
945 | cv->flush = iso2022jp_flush; | ||
946 | } | ||
947 | else if (cv->codepage == 51932 && load_mlang()) | ||
948 | { | ||
949 | cv->mbtowc = mlang_mbtowc; | ||
950 | cv->wctomb = mlang_wctomb; | ||
951 | cv->mblen = eucjp_mblen; | ||
952 | } | ||
953 | else if (IsValidCodePage(cv->codepage) | ||
954 | && GetCPInfo(cv->codepage, &cpinfo) != 0) | ||
955 | { | ||
956 | cv->mbtowc = kernel_mbtowc; | ||
957 | cv->wctomb = kernel_wctomb; | ||
958 | if (cpinfo.MaxCharSize == 1) | ||
959 | cv->mblen = sbcs_mblen; | ||
960 | else if (cpinfo.MaxCharSize == 2) | ||
961 | cv->mblen = dbcs_mblen; | ||
962 | else | ||
963 | cv->mblen = mbcs_mblen; | ||
964 | } | ||
965 | else | ||
966 | { | ||
967 | /* not supported */ | ||
968 | free(name); | ||
969 | errno = EINVAL; | ||
970 | return FALSE; | ||
971 | } | ||
972 | |||
973 | if (use_compat) | ||
974 | { | ||
975 | switch (cv->codepage) | ||
976 | { | ||
977 | case 932: cv->compat = cp932_compat; break; | ||
978 | case 20932: cv->compat = cp20932_compat; break; | ||
979 | case 51932: cv->compat = cp51932_compat; break; | ||
980 | case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; | ||
981 | } | ||
982 | } | ||
983 | |||
984 | free(name); | ||
985 | |||
986 | return TRUE; | ||
987 | } | ||
988 | |||
989 | static int | ||
990 | name_to_codepage(const char *name) | ||
991 | { | ||
992 | int i; | ||
993 | |||
994 | if (*name == '\0' || | ||
995 | strcmp(name, "char") == 0) | ||
996 | return GetACP(); | ||
997 | else if (strcmp(name, "wchar_t") == 0) | ||
998 | return 1200; | ||
999 | else if (_strnicmp(name, "cp", 2) == 0) | ||
1000 | return atoi(name + 2); /* CP123 */ | ||
1001 | else if ('0' <= name[0] && name[0] <= '9') | ||
1002 | return atoi(name); /* 123 */ | ||
1003 | else if (_strnicmp(name, "xx", 2) == 0) | ||
1004 | return atoi(name + 2); /* XX123 for debug */ | ||
1005 | |||
1006 | for (i = 0; codepage_alias[i].name != NULL; ++i) | ||
1007 | if (_stricmp(name, codepage_alias[i].name) == 0) | ||
1008 | return codepage_alias[i].codepage; | ||
1009 | return -1; | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * http://www.faqs.org/rfcs/rfc2781.html | ||
1014 | */ | ||
1015 | static uint | ||
1016 | utf16_to_ucs4(const ushort *wbuf) | ||
1017 | { | ||
1018 | uint wc = wbuf[0]; | ||
1019 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1020 | wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; | ||
1021 | return wc; | ||
1022 | } | ||
1023 | |||
1024 | static void | ||
1025 | ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) | ||
1026 | { | ||
1027 | if (wc < 0x10000) | ||
1028 | { | ||
1029 | wbuf[0] = wc; | ||
1030 | *wbufsize = 1; | ||
1031 | } | ||
1032 | else | ||
1033 | { | ||
1034 | wc -= 0x10000; | ||
1035 | wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); | ||
1036 | wbuf[1] = 0xDC00 | (wc & 0x3FF); | ||
1037 | *wbufsize = 2; | ||
1038 | } | ||
1039 | } | ||
1040 | |||
1041 | /* | ||
1042 | * Check if codepage is one of those for which the dwFlags parameter | ||
1043 | * to MultiByteToWideChar() must be zero. Return zero or | ||
1044 | * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows | ||
1045 | * Server 2003 R2 claims that also codepage 65001 is one of these, but | ||
1046 | * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave | ||
1047 | * out 65001 (UTF-8), and that indeed seems to be the case on XP, it | ||
1048 | * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting | ||
1049 | * from UTF-8. | ||
1050 | */ | ||
1051 | static int | ||
1052 | mbtowc_flags(int codepage) | ||
1053 | { | ||
1054 | return (codepage == 50220 || codepage == 50221 || | ||
1055 | codepage == 50222 || codepage == 50225 || | ||
1056 | codepage == 50227 || codepage == 50229 || | ||
1057 | codepage == 52936 || codepage == 54936 || | ||
1058 | (codepage >= 57002 && codepage <= 57011) || | ||
1059 | codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; | ||
1060 | } | ||
1061 | |||
1062 | /* | ||
1063 | * Check if codepage is one those for which the lpUsedDefaultChar | ||
1064 | * parameter to WideCharToMultiByte() must be NULL. The docs in | ||
1065 | * Platform SDK for Windows Server 2003 R2 claims that this is the | ||
1066 | * list below, while the MSDN docs for MSVS2008 claim that it is only | ||
1067 | * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform | ||
1068 | * SDK seems to be correct, at least for XP. | ||
1069 | */ | ||
1070 | static int | ||
1071 | must_use_null_useddefaultchar(int codepage) | ||
1072 | { | ||
1073 | return (codepage == 65000 || codepage == 65001 || | ||
1074 | codepage == 50220 || codepage == 50221 || | ||
1075 | codepage == 50222 || codepage == 50225 || | ||
1076 | codepage == 50227 || codepage == 50229 || | ||
1077 | codepage == 52936 || codepage == 54936 || | ||
1078 | (codepage >= 57002 && codepage <= 57011) || | ||
1079 | codepage == 42); | ||
1080 | } | ||
1081 | |||
1082 | static int | ||
1083 | seterror(int err) | ||
1084 | { | ||
1085 | errno = err; | ||
1086 | return -1; | ||
1087 | } | ||
1088 | |||
1089 | static int | ||
1090 | sbcs_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf UNUSED_PARAM, | ||
1091 | int bufsize UNUSED_PARAM) | ||
1092 | { | ||
1093 | return 1; | ||
1094 | } | ||
1095 | |||
1096 | static int | ||
1097 | dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1098 | { | ||
1099 | int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; | ||
1100 | if (bufsize < len) | ||
1101 | return seterror(EINVAL); | ||
1102 | return len; | ||
1103 | } | ||
1104 | |||
1105 | static int | ||
1106 | mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1107 | { | ||
1108 | int len = 0; | ||
1109 | |||
1110 | if (cv->codepage == 54936) { | ||
1111 | if (buf[0] <= 0x7F) | ||
1112 | len = 1; | ||
1113 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1114 | bufsize >= 2 && | ||
1115 | ((buf[1] >= 0x40 && buf[1] <= 0x7E) || | ||
1116 | (buf[1] >= 0x80 && buf[1] <= 0xFE))) | ||
1117 | len = 2; | ||
1118 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1119 | bufsize >= 4 && | ||
1120 | buf[1] >= 0x30 && buf[1] <= 0x39) | ||
1121 | len = 4; | ||
1122 | else | ||
1123 | return seterror(EINVAL); | ||
1124 | return len; | ||
1125 | } | ||
1126 | else | ||
1127 | return seterror(EINVAL); | ||
1128 | } | ||
1129 | |||
1130 | static int | ||
1131 | utf8_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1132 | { | ||
1133 | int len = 0; | ||
1134 | |||
1135 | if (buf[0] < 0x80) len = 1; | ||
1136 | else if ((buf[0] & 0xE0) == 0xC0) len = 2; | ||
1137 | else if ((buf[0] & 0xF0) == 0xE0) len = 3; | ||
1138 | else if ((buf[0] & 0xF8) == 0xF0) len = 4; | ||
1139 | else if ((buf[0] & 0xFC) == 0xF8) len = 5; | ||
1140 | else if ((buf[0] & 0xFE) == 0xFC) len = 6; | ||
1141 | |||
1142 | if (len == 0) | ||
1143 | return seterror(EILSEQ); | ||
1144 | else if (bufsize < len) | ||
1145 | return seterror(EINVAL); | ||
1146 | return len; | ||
1147 | } | ||
1148 | |||
1149 | static int | ||
1150 | eucjp_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1151 | { | ||
1152 | if (buf[0] < 0x80) /* ASCII */ | ||
1153 | return 1; | ||
1154 | else if (buf[0] == 0x8E) /* JIS X 0201 */ | ||
1155 | { | ||
1156 | if (bufsize < 2) | ||
1157 | return seterror(EINVAL); | ||
1158 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) | ||
1159 | return seterror(EILSEQ); | ||
1160 | return 2; | ||
1161 | } | ||
1162 | else if (buf[0] == 0x8F) /* JIS X 0212 */ | ||
1163 | { | ||
1164 | if (bufsize < 3) | ||
1165 | return seterror(EINVAL); | ||
1166 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) | ||
1167 | || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) | ||
1168 | return seterror(EILSEQ); | ||
1169 | return 3; | ||
1170 | } | ||
1171 | else /* JIS X 0208 */ | ||
1172 | { | ||
1173 | if (bufsize < 2) | ||
1174 | return seterror(EINVAL); | ||
1175 | else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) | ||
1176 | || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) | ||
1177 | return seterror(EILSEQ); | ||
1178 | return 2; | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | static int | ||
1183 | kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1184 | { | ||
1185 | int len; | ||
1186 | |||
1187 | len = cv->mblen(cv, buf, bufsize); | ||
1188 | if (len == -1) | ||
1189 | return -1; | ||
1190 | /* If converting from ASCII, reject 8bit | ||
1191 | * chars. MultiByteToWideChar() doesn't. Note that for ASCII we | ||
1192 | * know that the mblen function is sbcs_mblen() so len is 1. | ||
1193 | */ | ||
1194 | if (cv->codepage == 20127 && buf[0] >= 0x80) | ||
1195 | return seterror(EILSEQ); | ||
1196 | *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), | ||
1197 | (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); | ||
1198 | if (*wbufsize == 0) | ||
1199 | return seterror(EILSEQ); | ||
1200 | return len; | ||
1201 | } | ||
1202 | |||
1203 | static int | ||
1204 | kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1205 | { | ||
1206 | BOOL usedDefaultChar = 0; | ||
1207 | BOOL *p = NULL; | ||
1208 | int flags = 0; | ||
1209 | int len; | ||
1210 | |||
1211 | if (bufsize == 0) | ||
1212 | return seterror(E2BIG); | ||
1213 | if (!must_use_null_useddefaultchar(cv->codepage)) | ||
1214 | { | ||
1215 | p = &usedDefaultChar; | ||
1216 | #ifdef WC_NO_BEST_FIT_CHARS | ||
1217 | if (!(cv->flags & FLAG_TRANSLIT)) | ||
1218 | flags |= WC_NO_BEST_FIT_CHARS; | ||
1219 | #endif | ||
1220 | } | ||
1221 | len = WideCharToMultiByte(cv->codepage, flags, | ||
1222 | (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); | ||
1223 | if (len == 0) | ||
1224 | { | ||
1225 | if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) | ||
1226 | return seterror(E2BIG); | ||
1227 | return seterror(EILSEQ); | ||
1228 | } | ||
1229 | else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) | ||
1230 | return seterror(EILSEQ); | ||
1231 | else if (cv->mblen(cv, buf, len) != len) /* validate result */ | ||
1232 | return seterror(EILSEQ); | ||
1233 | return len; | ||
1234 | } | ||
1235 | |||
1236 | /* | ||
1237 | * It seems that the mode (cv->mode) is fixnum. | ||
1238 | * For example, when converting iso-2022-jp(cp50221) to unicode: | ||
1239 | * in ascii sequence: mode=0xC42C0000 | ||
1240 | * in jisx0208 sequence: mode=0xC42C0001 | ||
1241 | * "C42C" is same for each convert session. | ||
1242 | * It should be: ((codepage-1)<<16)|state | ||
1243 | */ | ||
1244 | static int | ||
1245 | mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1246 | { | ||
1247 | int len; | ||
1248 | int insize; | ||
1249 | HRESULT hr; | ||
1250 | |||
1251 | len = cv->mblen(cv, buf, bufsize); | ||
1252 | if (len == -1) | ||
1253 | return -1; | ||
1254 | insize = len; | ||
1255 | hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, | ||
1256 | (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); | ||
1257 | if (hr != S_OK || insize != len) | ||
1258 | return seterror(EILSEQ); | ||
1259 | return len; | ||
1260 | } | ||
1261 | |||
1262 | static int | ||
1263 | mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1264 | { | ||
1265 | char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
1266 | int tmpsize = MB_CHAR_MAX; | ||
1267 | int insize = wbufsize; | ||
1268 | HRESULT hr; | ||
1269 | |||
1270 | hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, | ||
1271 | (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); | ||
1272 | if (hr != S_OK || insize != wbufsize) | ||
1273 | return seterror(EILSEQ); | ||
1274 | else if (bufsize < tmpsize) | ||
1275 | return seterror(E2BIG); | ||
1276 | else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) | ||
1277 | return seterror(EILSEQ); | ||
1278 | memcpy(buf, tmpbuf, tmpsize); | ||
1279 | return tmpsize; | ||
1280 | } | ||
1281 | |||
1282 | static int | ||
1283 | utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1284 | { | ||
1285 | int codepage = cv->codepage; | ||
1286 | |||
1287 | /* swap endian: 1200 <-> 1201 */ | ||
1288 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1289 | codepage ^= 1; | ||
1290 | |||
1291 | if (bufsize < 2) | ||
1292 | return seterror(EINVAL); | ||
1293 | if (codepage == 1200) /* little endian */ | ||
1294 | wbuf[0] = (buf[1] << 8) | buf[0]; | ||
1295 | else if (codepage == 1201) /* big endian */ | ||
1296 | wbuf[0] = (buf[0] << 8) | buf[1]; | ||
1297 | |||
1298 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1299 | { | ||
1300 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1301 | if (wbuf[0] == 0xFFFE) | ||
1302 | { | ||
1303 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1304 | *wbufsize = 0; | ||
1305 | return 2; | ||
1306 | } | ||
1307 | else if (wbuf[0] == 0xFEFF) | ||
1308 | { | ||
1309 | *wbufsize = 0; | ||
1310 | return 2; | ||
1311 | } | ||
1312 | } | ||
1313 | |||
1314 | if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) | ||
1315 | return seterror(EILSEQ); | ||
1316 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1317 | { | ||
1318 | if (bufsize < 4) | ||
1319 | return seterror(EINVAL); | ||
1320 | if (codepage == 1200) /* little endian */ | ||
1321 | wbuf[1] = (buf[3] << 8) | buf[2]; | ||
1322 | else if (codepage == 1201) /* big endian */ | ||
1323 | wbuf[1] = (buf[2] << 8) | buf[3]; | ||
1324 | if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) | ||
1325 | return seterror(EILSEQ); | ||
1326 | *wbufsize = 2; | ||
1327 | return 4; | ||
1328 | } | ||
1329 | *wbufsize = 1; | ||
1330 | return 2; | ||
1331 | } | ||
1332 | |||
1333 | static int | ||
1334 | utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1335 | { | ||
1336 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1337 | { | ||
1338 | int r; | ||
1339 | |||
1340 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1341 | if (bufsize < 2) | ||
1342 | return seterror(E2BIG); | ||
1343 | if (cv->codepage == 1200) /* little endian */ | ||
1344 | memcpy(buf, "\xFF\xFE", 2); | ||
1345 | else if (cv->codepage == 1201) /* big endian */ | ||
1346 | memcpy(buf, "\xFE\xFF", 2); | ||
1347 | |||
1348 | r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); | ||
1349 | if (r == -1) | ||
1350 | return -1; | ||
1351 | return r + 2; | ||
1352 | } | ||
1353 | |||
1354 | if (bufsize < 2) | ||
1355 | return seterror(E2BIG); | ||
1356 | if (cv->codepage == 1200) /* little endian */ | ||
1357 | { | ||
1358 | buf[0] = (wbuf[0] & 0x00FF); | ||
1359 | buf[1] = (wbuf[0] & 0xFF00) >> 8; | ||
1360 | } | ||
1361 | else if (cv->codepage == 1201) /* big endian */ | ||
1362 | { | ||
1363 | buf[0] = (wbuf[0] & 0xFF00) >> 8; | ||
1364 | buf[1] = (wbuf[0] & 0x00FF); | ||
1365 | } | ||
1366 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1367 | { | ||
1368 | if (bufsize < 4) | ||
1369 | return seterror(E2BIG); | ||
1370 | if (cv->codepage == 1200) /* little endian */ | ||
1371 | { | ||
1372 | buf[2] = (wbuf[1] & 0x00FF); | ||
1373 | buf[3] = (wbuf[1] & 0xFF00) >> 8; | ||
1374 | } | ||
1375 | else if (cv->codepage == 1201) /* big endian */ | ||
1376 | { | ||
1377 | buf[2] = (wbuf[1] & 0xFF00) >> 8; | ||
1378 | buf[3] = (wbuf[1] & 0x00FF); | ||
1379 | } | ||
1380 | return 4; | ||
1381 | } | ||
1382 | return 2; | ||
1383 | } | ||
1384 | |||
1385 | static int | ||
1386 | utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1387 | { | ||
1388 | int codepage = cv->codepage; | ||
1389 | uint wc = 0xD800; | ||
1390 | |||
1391 | /* swap endian: 12000 <-> 12001 */ | ||
1392 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1393 | codepage ^= 1; | ||
1394 | |||
1395 | if (bufsize < 4) | ||
1396 | return seterror(EINVAL); | ||
1397 | if (codepage == 12000) /* little endian */ | ||
1398 | wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; | ||
1399 | else if (codepage == 12001) /* big endian */ | ||
1400 | wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; | ||
1401 | |||
1402 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1403 | { | ||
1404 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1405 | if (wc == 0xFFFE0000) | ||
1406 | { | ||
1407 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1408 | *wbufsize = 0; | ||
1409 | return 4; | ||
1410 | } | ||
1411 | else if (wc == 0x0000FEFF) | ||
1412 | { | ||
1413 | *wbufsize = 0; | ||
1414 | return 4; | ||
1415 | } | ||
1416 | } | ||
1417 | |||
1418 | if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) | ||
1419 | return seterror(EILSEQ); | ||
1420 | ucs4_to_utf16(wc, wbuf, wbufsize); | ||
1421 | return 4; | ||
1422 | } | ||
1423 | |||
1424 | static int | ||
1425 | utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1426 | { | ||
1427 | uint wc; | ||
1428 | |||
1429 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1430 | { | ||
1431 | int r; | ||
1432 | |||
1433 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1434 | if (bufsize < 4) | ||
1435 | return seterror(E2BIG); | ||
1436 | if (cv->codepage == 12000) /* little endian */ | ||
1437 | memcpy(buf, "\xFF\xFE\x00\x00", 4); | ||
1438 | else if (cv->codepage == 12001) /* big endian */ | ||
1439 | memcpy(buf, "\x00\x00\xFE\xFF", 4); | ||
1440 | |||
1441 | r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); | ||
1442 | if (r == -1) | ||
1443 | return -1; | ||
1444 | return r + 4; | ||
1445 | } | ||
1446 | |||
1447 | if (bufsize < 4) | ||
1448 | return seterror(E2BIG); | ||
1449 | wc = utf16_to_ucs4(wbuf); | ||
1450 | if (cv->codepage == 12000) /* little endian */ | ||
1451 | { | ||
1452 | buf[0] = wc & 0x000000FF; | ||
1453 | buf[1] = (wc & 0x0000FF00) >> 8; | ||
1454 | buf[2] = (wc & 0x00FF0000) >> 16; | ||
1455 | buf[3] = (wc & 0xFF000000) >> 24; | ||
1456 | } | ||
1457 | else if (cv->codepage == 12001) /* big endian */ | ||
1458 | { | ||
1459 | buf[0] = (wc & 0xFF000000) >> 24; | ||
1460 | buf[1] = (wc & 0x00FF0000) >> 16; | ||
1461 | buf[2] = (wc & 0x0000FF00) >> 8; | ||
1462 | buf[3] = wc & 0x000000FF; | ||
1463 | } | ||
1464 | return 4; | ||
1465 | } | ||
1466 | |||
1467 | /* | ||
1468 | * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) | ||
1469 | * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow | ||
1470 | * 1 byte Kana) | ||
1471 | * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte | ||
1472 | * Kana - SO/SI) | ||
1473 | * | ||
1474 | * MultiByteToWideChar() and WideCharToMultiByte() behave differently | ||
1475 | * depending on Windows version. On XP, WideCharToMultiByte() doesn't | ||
1476 | * terminate result sequence with ascii escape. But Vista does. | ||
1477 | * Use MLang instead. | ||
1478 | */ | ||
1479 | |||
1480 | #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) | ||
1481 | #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) | ||
1482 | #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) | ||
1483 | |||
1484 | #define ISO2022_SI 0 | ||
1485 | #define ISO2022_SO 1 | ||
1486 | |||
1487 | /* shift in */ | ||
1488 | static const char iso2022_SI_seq[] = "\x0F"; | ||
1489 | /* shift out */ | ||
1490 | static const char iso2022_SO_seq[] = "\x0E"; | ||
1491 | |||
1492 | typedef struct iso2022_esc_t iso2022_esc_t; | ||
1493 | struct iso2022_esc_t { | ||
1494 | const char *esc; | ||
1495 | int esc_len; | ||
1496 | int len; | ||
1497 | int cs; | ||
1498 | }; | ||
1499 | |||
1500 | #define ISO2022JP_CS_ASCII 0 | ||
1501 | #define ISO2022JP_CS_JISX0201_ROMAN 1 | ||
1502 | #define ISO2022JP_CS_JISX0201_KANA 2 | ||
1503 | #define ISO2022JP_CS_JISX0208_1978 3 | ||
1504 | #define ISO2022JP_CS_JISX0208_1983 4 | ||
1505 | #define ISO2022JP_CS_JISX0212 5 | ||
1506 | |||
1507 | static iso2022_esc_t iso2022jp_esc[] = { | ||
1508 | {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, | ||
1509 | {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, | ||
1510 | {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, | ||
1511 | {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ | ||
1512 | {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, | ||
1513 | {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, | ||
1514 | {NULL, 0, 0, 0} | ||
1515 | }; | ||
1516 | |||
1517 | static int | ||
1518 | iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1519 | { | ||
1520 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1521 | char tmp[MB_CHAR_MAX]; | ||
1522 | int insize; | ||
1523 | HRESULT hr; | ||
1524 | DWORD dummy = 0; | ||
1525 | int len; | ||
1526 | int esc_len; | ||
1527 | int cs; | ||
1528 | int shift; | ||
1529 | int i; | ||
1530 | |||
1531 | if (buf[0] == 0x1B) | ||
1532 | { | ||
1533 | for (i = 0; iesc[i].esc != NULL; ++i) | ||
1534 | { | ||
1535 | esc_len = iesc[i].esc_len; | ||
1536 | if (bufsize < esc_len) | ||
1537 | { | ||
1538 | if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) | ||
1539 | return seterror(EINVAL); | ||
1540 | } | ||
1541 | else | ||
1542 | { | ||
1543 | if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) | ||
1544 | { | ||
1545 | cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); | ||
1546 | *wbufsize = 0; | ||
1547 | return esc_len; | ||
1548 | } | ||
1549 | } | ||
1550 | } | ||
1551 | /* not supported escape sequence */ | ||
1552 | return seterror(EILSEQ); | ||
1553 | } | ||
1554 | else if (buf[0] == iso2022_SO_seq[0]) | ||
1555 | { | ||
1556 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); | ||
1557 | *wbufsize = 0; | ||
1558 | return 1; | ||
1559 | } | ||
1560 | else if (buf[0] == iso2022_SI_seq[0]) | ||
1561 | { | ||
1562 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); | ||
1563 | *wbufsize = 0; | ||
1564 | return 1; | ||
1565 | } | ||
1566 | |||
1567 | cs = ISO2022_MODE_CS(cv->mode); | ||
1568 | shift = ISO2022_MODE_SHIFT(cv->mode); | ||
1569 | |||
1570 | /* reset the mode for informal sequence */ | ||
1571 | if (buf[0] < 0x20) | ||
1572 | { | ||
1573 | cs = ISO2022JP_CS_ASCII; | ||
1574 | shift = ISO2022_SI; | ||
1575 | } | ||
1576 | |||
1577 | len = iesc[cs].len; | ||
1578 | if (bufsize < len) | ||
1579 | return seterror(EINVAL); | ||
1580 | for (i = 0; i < len; ++i) | ||
1581 | if (!(buf[i] < 0x80)) | ||
1582 | return seterror(EILSEQ); | ||
1583 | esc_len = iesc[cs].esc_len; | ||
1584 | memcpy(tmp, iesc[cs].esc, esc_len); | ||
1585 | if (shift == ISO2022_SO) | ||
1586 | { | ||
1587 | memcpy(tmp + esc_len, iso2022_SO_seq, 1); | ||
1588 | esc_len += 1; | ||
1589 | } | ||
1590 | memcpy(tmp + esc_len, buf, len); | ||
1591 | |||
1592 | if ((cv->codepage == 50220 || cv->codepage == 50221 | ||
1593 | || cv->codepage == 50222) && shift == ISO2022_SO) | ||
1594 | { | ||
1595 | /* XXX: shift-out cannot be used for mbtowc (both kernel and | ||
1596 | * mlang) */ | ||
1597 | esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; | ||
1598 | memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); | ||
1599 | memcpy(tmp + esc_len, buf, len); | ||
1600 | } | ||
1601 | |||
1602 | insize = len + esc_len; | ||
1603 | hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, | ||
1604 | (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); | ||
1605 | if (hr != S_OK || insize != len + esc_len) | ||
1606 | return seterror(EILSEQ); | ||
1607 | |||
1608 | /* Check for conversion error. Assuming defaultChar is 0x3F. */ | ||
1609 | /* ascii should be converted from ascii */ | ||
1610 | if (wbuf[0] == buf[0] | ||
1611 | && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1612 | return seterror(EILSEQ); | ||
1613 | |||
1614 | /* reset the mode for informal sequence */ | ||
1615 | if (cv->mode != ISO2022_MODE(cs, shift)) | ||
1616 | cv->mode = ISO2022_MODE(cs, shift); | ||
1617 | |||
1618 | return len; | ||
1619 | } | ||
1620 | |||
1621 | static int | ||
1622 | iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1623 | { | ||
1624 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1625 | char tmp[MB_CHAR_MAX]; | ||
1626 | int tmpsize = MB_CHAR_MAX; | ||
1627 | int insize = wbufsize; | ||
1628 | HRESULT hr; | ||
1629 | DWORD dummy = 0; | ||
1630 | int len; | ||
1631 | int esc_len; | ||
1632 | int cs; | ||
1633 | int shift; | ||
1634 | int i; | ||
1635 | |||
1636 | /* | ||
1637 | * MultiByte = [escape sequence] + character + [escape sequence] | ||
1638 | * | ||
1639 | * Whether trailing escape sequence is added depends on which API is | ||
1640 | * used (kernel or MLang, and its version). | ||
1641 | */ | ||
1642 | hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, | ||
1643 | (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); | ||
1644 | if (hr != S_OK || insize != wbufsize) | ||
1645 | return seterror(EILSEQ); | ||
1646 | else if (bufsize < tmpsize) | ||
1647 | return seterror(E2BIG); | ||
1648 | |||
1649 | if (tmpsize == 1) | ||
1650 | { | ||
1651 | cs = ISO2022JP_CS_ASCII; | ||
1652 | esc_len = 0; | ||
1653 | } | ||
1654 | else | ||
1655 | { | ||
1656 | for (i = 1; iesc[i].esc != NULL; ++i) | ||
1657 | { | ||
1658 | esc_len = iesc[i].esc_len; | ||
1659 | if (strncmp(tmp, iesc[i].esc, esc_len) == 0) | ||
1660 | { | ||
1661 | cs = iesc[i].cs; | ||
1662 | break; | ||
1663 | } | ||
1664 | } | ||
1665 | if (iesc[i].esc == NULL) | ||
1666 | /* not supported escape sequence */ | ||
1667 | return seterror(EILSEQ); | ||
1668 | } | ||
1669 | |||
1670 | shift = ISO2022_SI; | ||
1671 | if (tmp[esc_len] == iso2022_SO_seq[0]) | ||
1672 | { | ||
1673 | shift = ISO2022_SO; | ||
1674 | esc_len += 1; | ||
1675 | } | ||
1676 | |||
1677 | len = iesc[cs].len; | ||
1678 | |||
1679 | /* Check for converting error. Assuming defaultChar is 0x3F. */ | ||
1680 | /* ascii should be converted from ascii */ | ||
1681 | if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) | ||
1682 | return seterror(EILSEQ); | ||
1683 | else if (tmpsize < esc_len + len) | ||
1684 | return seterror(EILSEQ); | ||
1685 | |||
1686 | if (cv->mode == ISO2022_MODE(cs, shift)) | ||
1687 | { | ||
1688 | /* remove escape sequence */ | ||
1689 | if (esc_len != 0) | ||
1690 | memmove(tmp, tmp + esc_len, len); | ||
1691 | esc_len = 0; | ||
1692 | } | ||
1693 | else | ||
1694 | { | ||
1695 | if (cs == ISO2022JP_CS_ASCII) | ||
1696 | { | ||
1697 | esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1698 | memmove(tmp + esc_len, tmp, len); | ||
1699 | memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); | ||
1700 | } | ||
1701 | if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) | ||
1702 | { | ||
1703 | /* shift-in before changing to other mode */ | ||
1704 | memmove(tmp + 1, tmp, len + esc_len); | ||
1705 | memcpy(tmp, iso2022_SI_seq, 1); | ||
1706 | esc_len += 1; | ||
1707 | } | ||
1708 | } | ||
1709 | |||
1710 | if (bufsize < len + esc_len) | ||
1711 | return seterror(E2BIG); | ||
1712 | memcpy(buf, tmp, len + esc_len); | ||
1713 | cv->mode = ISO2022_MODE(cs, shift); | ||
1714 | return len + esc_len; | ||
1715 | } | ||
1716 | |||
1717 | static int | ||
1718 | iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) | ||
1719 | { | ||
1720 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1721 | int esc_len; | ||
1722 | |||
1723 | if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1724 | { | ||
1725 | esc_len = 0; | ||
1726 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1727 | esc_len += 1; | ||
1728 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1729 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1730 | if (bufsize < esc_len) | ||
1731 | return seterror(E2BIG); | ||
1732 | |||
1733 | esc_len = 0; | ||
1734 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1735 | { | ||
1736 | memcpy(buf, iso2022_SI_seq, 1); | ||
1737 | esc_len += 1; | ||
1738 | } | ||
1739 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1740 | { | ||
1741 | memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, | ||
1742 | iesc[ISO2022JP_CS_ASCII].esc_len); | ||
1743 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1744 | } | ||
1745 | return esc_len; | ||
1746 | } | ||
1747 | return 0; | ||
1748 | } | ||
1749 | |||
1750 | static void process_file(iconv_t cd, FILE *in, FILE *out) | ||
1751 | { | ||
1752 | char inbuf[BUFSIZ]; | ||
1753 | char outbuf[BUFSIZ]; | ||
1754 | const char *pin; | ||
1755 | char *pout; | ||
1756 | size_t inbytesleft; | ||
1757 | size_t outbytesleft; | ||
1758 | size_t rest = 0; | ||
1759 | size_t r; | ||
1760 | |||
1761 | while ((inbytesleft=fread(inbuf+rest, 1, sizeof(inbuf)-rest, in)) != 0 | ||
1762 | || rest != 0) { | ||
1763 | inbytesleft += rest; | ||
1764 | pin = inbuf; | ||
1765 | pout = outbuf; | ||
1766 | outbytesleft = sizeof(outbuf); | ||
1767 | r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); | ||
1768 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1769 | if (r == (size_t)(-1) && errno != E2BIG && | ||
1770 | (errno != EINVAL || feof(in))) | ||
1771 | bb_perror_msg_and_die("conversion error"); | ||
1772 | memmove(inbuf, pin, inbytesleft); | ||
1773 | rest = inbytesleft; | ||
1774 | } | ||
1775 | pout = outbuf; | ||
1776 | outbytesleft = sizeof(outbuf); | ||
1777 | r = iconv(cd, NULL, NULL, &pout, &outbytesleft); | ||
1778 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1779 | if (r == (size_t)(-1)) | ||
1780 | bb_perror_msg_and_die("conversion error"); | ||
1781 | } | ||
1782 | |||
1783 | int iconv_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | ||
1784 | int iconv_main(int argc, char **argv) | ||
1785 | { | ||
1786 | char *fromcode = NULL; | ||
1787 | char *tocode = NULL; | ||
1788 | int i; | ||
1789 | iconv_t cd; | ||
1790 | FILE *in = stdin; | ||
1791 | FILE *out = stdout; | ||
1792 | int ignore = 0; | ||
1793 | |||
1794 | while ((i = getopt(argc, argv, "f:t:lco:")) != -1) { | ||
1795 | switch (i) { | ||
1796 | case 'l': | ||
1797 | for (i = 0; codepage_alias[i].name != NULL; ++i) | ||
1798 | printf("%s\n", codepage_alias[i].name); | ||
1799 | return 0; | ||
1800 | |||
1801 | case 'f': | ||
1802 | fromcode = optarg; | ||
1803 | break; | ||
1804 | |||
1805 | case 't': | ||
1806 | tocode = optarg; | ||
1807 | break; | ||
1808 | |||
1809 | case 'c': | ||
1810 | ignore = 1; | ||
1811 | break; | ||
1812 | |||
1813 | case 'o': | ||
1814 | out = xfopen(optarg, "wb"); | ||
1815 | break; | ||
1816 | |||
1817 | default: | ||
1818 | bb_show_usage(); | ||
1819 | } | ||
1820 | } | ||
1821 | |||
1822 | if (fromcode == NULL || tocode == NULL) | ||
1823 | bb_show_usage(); | ||
1824 | |||
1825 | if (ignore) | ||
1826 | tocode = xasprintf("%s//IGNORE", tocode); | ||
1827 | |||
1828 | cd = iconv_open(tocode, fromcode); | ||
1829 | if (cd == (iconv_t)(-1)) | ||
1830 | bb_perror_msg_and_die("iconv_open error"); | ||
1831 | |||
1832 | if (optind == argc || | ||
1833 | (optind == argc-1 && strcmp(argv[optind], "-") == 0)) { | ||
1834 | process_file(cd, in, out); | ||
1835 | } | ||
1836 | else { | ||
1837 | for (i=optind; i<argc; ++i) { | ||
1838 | in = xfopen(argv[i], "rb"); | ||
1839 | process_file(cd, in, out); | ||
1840 | fclose(in); | ||
1841 | } | ||
1842 | } | ||
1843 | |||
1844 | iconv_close(cd); | ||
1845 | return 0; | ||
1846 | } | ||