diff options
Diffstat (limited to 'miscutils')
-rw-r--r-- | miscutils/bbconfig.c | 1 | ||||
-rw-r--r-- | miscutils/dc.c | 2 | ||||
-rw-r--r-- | miscutils/iconv.c | 1847 | ||||
-rw-r--r-- | miscutils/less.c | 90 | ||||
-rw-r--r-- | miscutils/man.c | 32 |
5 files changed, 1968 insertions, 4 deletions
diff --git a/miscutils/bbconfig.c b/miscutils/bbconfig.c index fe02516a8..077e03c5d 100644 --- a/miscutils/bbconfig.c +++ b/miscutils/bbconfig.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "libbb.h" | 35 | #include "libbb.h" |
36 | #include "bbconfigopts.h" | 36 | #include "bbconfigopts.h" |
37 | #if ENABLE_FEATURE_COMPRESS_BBCONFIG | 37 | #if ENABLE_FEATURE_COMPRESS_BBCONFIG |
38 | #define BB_ARCHIVE_PUBLIC | ||
38 | # include "bb_archive.h" | 39 | # include "bb_archive.h" |
39 | # include "bbconfigopts_bz2.h" | 40 | # include "bbconfigopts_bz2.h" |
40 | #endif | 41 | #endif |
diff --git a/miscutils/dc.c b/miscutils/dc.c index 5aef64b60..c7ce2be0b 100644 --- a/miscutils/dc.c +++ b/miscutils/dc.c | |||
@@ -17,7 +17,7 @@ typedef unsigned long data_t; | |||
17 | #define DATA_FMT "l" | 17 | #define DATA_FMT "l" |
18 | #else | 18 | #else |
19 | typedef unsigned long long data_t; | 19 | typedef unsigned long long data_t; |
20 | #define DATA_FMT "ll" | 20 | #define DATA_FMT LL_FMT |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | struct globals { | 23 | struct globals { |
diff --git a/miscutils/iconv.c b/miscutils/iconv.c new file mode 100644 index 000000000..c3289982d --- /dev/null +++ b/miscutils/iconv.c | |||
@@ -0,0 +1,1847 @@ | |||
1 | /* | ||
2 | * iconv implementation using Win32 API to convert. | ||
3 | * | ||
4 | * This file is placed in the public domain. | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * This code was obtained from: | ||
9 | * | ||
10 | * https://github.com/win-iconv/win-iconv | ||
11 | * | ||
12 | * Modified for busybox-w32 by Ronald M Yorston. These modifications | ||
13 | * are also dedicated to the public domain. | ||
14 | */ | ||
15 | |||
16 | //config:config ICONV | ||
17 | //config: bool "iconv" | ||
18 | //config: default y | ||
19 | //config: depends on PLATFORM_MINGW32 | ||
20 | //config: help | ||
21 | //config: 'iconv' converts text between character encodings. | ||
22 | |||
23 | //applet:IF_ICONV(APPLET(iconv, BB_DIR_USR_BIN, BB_SUID_DROP)) | ||
24 | |||
25 | //kbuild:lib-$(CONFIG_ICONV) += iconv.o | ||
26 | |||
27 | //usage:#define iconv_trivial_usage | ||
28 | //usage: "[-lc] [-o outfile] -f from-enc -t to-enc [FILE]..." | ||
29 | //usage:#define iconv_full_usage "\n\n" | ||
30 | //usage: "Convert text between character encodings\n" | ||
31 | //usage: "\n -l List all known character encodings" | ||
32 | //usage: "\n -c Silently discard characters that cannot be converted" | ||
33 | //usage: "\n -o Use outfile for output" | ||
34 | //usage: "\n -f Use from-enc for input characters" | ||
35 | //usage: "\n -t Use to-enc for output characters" | ||
36 | |||
37 | #include "libbb.h" | ||
38 | |||
39 | /* WORKAROUND: */ | ||
40 | #define GetProcAddressA GetProcAddress | ||
41 | |||
42 | #define MB_CHAR_MAX 16 | ||
43 | |||
44 | #define UNICODE_MODE_BOM_DONE 1 | ||
45 | #define UNICODE_MODE_SWAPPED 2 | ||
46 | |||
47 | #define FLAG_USE_BOM 1 | ||
48 | #define FLAG_TRANSLIT 2 /* //TRANSLIT */ | ||
49 | #define FLAG_IGNORE 4 /* //IGNORE */ | ||
50 | |||
51 | typedef unsigned char uchar; | ||
52 | typedef unsigned short ushort; | ||
53 | typedef unsigned int uint; | ||
54 | |||
55 | typedef void* iconv_t; | ||
56 | |||
57 | iconv_t iconv_open(const char *tocode, const char *fromcode); | ||
58 | int iconv_close(iconv_t cd); | ||
59 | size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
60 | |||
61 | typedef struct compat_t compat_t; | ||
62 | typedef struct csconv_t csconv_t; | ||
63 | typedef struct rec_iconv_t rec_iconv_t; | ||
64 | |||
65 | typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode); | ||
66 | typedef int (*f_iconv_close)(iconv_t cd); | ||
67 | typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
68 | typedef int* (*f_errno)(void); | ||
69 | typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
70 | typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
71 | typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); | ||
72 | typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); | ||
73 | |||
74 | #define COMPAT_IN 1 | ||
75 | #define COMPAT_OUT 2 | ||
76 | |||
77 | /* unicode mapping for compatibility with other conversion table. */ | ||
78 | struct compat_t { | ||
79 | uint in; | ||
80 | uint out; | ||
81 | uint flag; | ||
82 | }; | ||
83 | |||
84 | struct csconv_t { | ||
85 | int codepage; | ||
86 | int flags; | ||
87 | f_mbtowc mbtowc; | ||
88 | f_wctomb wctomb; | ||
89 | f_mblen mblen; | ||
90 | f_flush flush; | ||
91 | DWORD mode; | ||
92 | compat_t *compat; | ||
93 | }; | ||
94 | |||
95 | struct rec_iconv_t { | ||
96 | iconv_t cd; | ||
97 | f_iconv_close iconv_close; | ||
98 | f_iconv iconv; | ||
99 | f_errno _errno; | ||
100 | csconv_t from; | ||
101 | csconv_t to; | ||
102 | }; | ||
103 | |||
104 | static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); | ||
105 | static int win_iconv_close(iconv_t cd); | ||
106 | static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); | ||
107 | |||
108 | static int load_mlang(void); | ||
109 | static int make_csconv(const char *name, csconv_t *cv); | ||
110 | static int name_to_codepage(const char *name); | ||
111 | static uint utf16_to_ucs4(const ushort *wbuf); | ||
112 | static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); | ||
113 | static int mbtowc_flags(int codepage); | ||
114 | static int must_use_null_useddefaultchar(int codepage); | ||
115 | static int seterror(int err); | ||
116 | |||
117 | static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
118 | static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
119 | static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
120 | static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
121 | static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); | ||
122 | |||
123 | static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
124 | static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
125 | static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
126 | static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
127 | static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
128 | static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
129 | static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
130 | static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
131 | static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); | ||
132 | static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); | ||
133 | static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); | ||
134 | |||
135 | static struct { | ||
136 | int codepage; | ||
137 | const char *name; | ||
138 | } codepage_alias[] = { | ||
139 | {65001, "CP65001"}, | ||
140 | {65001, "UTF8"}, | ||
141 | {65001, "UTF-8"}, | ||
142 | |||
143 | {1200, "CP1200"}, | ||
144 | {1200, "UTF16LE"}, | ||
145 | {1200, "UTF-16LE"}, | ||
146 | {1200, "UCS2LE"}, | ||
147 | {1200, "UCS-2LE"}, | ||
148 | {1200, "UCS-2-INTERNAL"}, | ||
149 | |||
150 | {1201, "CP1201"}, | ||
151 | {1201, "UTF16BE"}, | ||
152 | {1201, "UTF-16BE"}, | ||
153 | {1201, "UCS2BE"}, | ||
154 | {1201, "UCS-2BE"}, | ||
155 | {1201, "unicodeFFFE"}, | ||
156 | |||
157 | {12000, "CP12000"}, | ||
158 | {12000, "UTF32LE"}, | ||
159 | {12000, "UTF-32LE"}, | ||
160 | {12000, "UCS4LE"}, | ||
161 | {12000, "UCS-4LE"}, | ||
162 | |||
163 | {12001, "CP12001"}, | ||
164 | {12001, "UTF32BE"}, | ||
165 | {12001, "UTF-32BE"}, | ||
166 | {12001, "UCS4BE"}, | ||
167 | {12001, "UCS-4BE"}, | ||
168 | |||
169 | #ifndef GLIB_COMPILATION | ||
170 | /* | ||
171 | * Default is big endian. | ||
172 | * See rfc2781 4.3 Interpreting text labelled as UTF-16. | ||
173 | */ | ||
174 | {1201, "UTF16"}, | ||
175 | {1201, "UTF-16"}, | ||
176 | {1201, "UCS2"}, | ||
177 | {1201, "UCS-2"}, | ||
178 | {12001, "UTF32"}, | ||
179 | {12001, "UTF-32"}, | ||
180 | {12001, "UCS-4"}, | ||
181 | {12001, "UCS4"}, | ||
182 | #else | ||
183 | /* Default is little endian, because the platform is */ | ||
184 | {1200, "UTF16"}, | ||
185 | {1200, "UTF-16"}, | ||
186 | {1200, "UCS2"}, | ||
187 | {1200, "UCS-2"}, | ||
188 | {12000, "UTF32"}, | ||
189 | {12000, "UTF-32"}, | ||
190 | {12000, "UCS4"}, | ||
191 | {12000, "UCS-4"}, | ||
192 | #endif | ||
193 | |||
194 | /* copy from libiconv `iconv -l` */ | ||
195 | /* !IsValidCodePage(367) */ | ||
196 | {20127, "ANSI_X3.4-1968"}, | ||
197 | {20127, "ANSI_X3.4-1986"}, | ||
198 | {20127, "ASCII"}, | ||
199 | {20127, "CP367"}, | ||
200 | {20127, "IBM367"}, | ||
201 | {20127, "ISO-IR-6"}, | ||
202 | {20127, "ISO646-US"}, | ||
203 | {20127, "ISO_646.IRV:1991"}, | ||
204 | {20127, "US"}, | ||
205 | {20127, "US-ASCII"}, | ||
206 | {20127, "CSASCII"}, | ||
207 | |||
208 | /* !IsValidCodePage(819) */ | ||
209 | {1252, "CP819"}, | ||
210 | {1252, "IBM819"}, | ||
211 | {28591, "ISO-8859-1"}, | ||
212 | {28591, "ISO-IR-100"}, | ||
213 | {28591, "ISO8859-1"}, | ||
214 | {28591, "ISO_8859-1"}, | ||
215 | {28591, "ISO_8859-1:1987"}, | ||
216 | {28591, "L1"}, | ||
217 | {28591, "LATIN1"}, | ||
218 | {28591, "CSISOLATIN1"}, | ||
219 | |||
220 | {1250, "CP1250"}, | ||
221 | {1250, "MS-EE"}, | ||
222 | {1250, "WINDOWS-1250"}, | ||
223 | |||
224 | {1251, "CP1251"}, | ||
225 | {1251, "MS-CYRL"}, | ||
226 | {1251, "WINDOWS-1251"}, | ||
227 | |||
228 | {1252, "CP1252"}, | ||
229 | {1252, "MS-ANSI"}, | ||
230 | {1252, "WINDOWS-1252"}, | ||
231 | |||
232 | {1253, "CP1253"}, | ||
233 | {1253, "MS-GREEK"}, | ||
234 | {1253, "WINDOWS-1253"}, | ||
235 | |||
236 | {1254, "CP1254"}, | ||
237 | {1254, "MS-TURK"}, | ||
238 | {1254, "WINDOWS-1254"}, | ||
239 | |||
240 | {1255, "CP1255"}, | ||
241 | {1255, "MS-HEBR"}, | ||
242 | {1255, "WINDOWS-1255"}, | ||
243 | |||
244 | {1256, "CP1256"}, | ||
245 | {1256, "MS-ARAB"}, | ||
246 | {1256, "WINDOWS-1256"}, | ||
247 | |||
248 | {1257, "CP1257"}, | ||
249 | {1257, "WINBALTRIM"}, | ||
250 | {1257, "WINDOWS-1257"}, | ||
251 | |||
252 | {1258, "CP1258"}, | ||
253 | {1258, "WINDOWS-1258"}, | ||
254 | |||
255 | {850, "850"}, | ||
256 | {850, "CP850"}, | ||
257 | {850, "IBM850"}, | ||
258 | {850, "CSPC850MULTILINGUAL"}, | ||
259 | |||
260 | /* !IsValidCodePage(862) */ | ||
261 | {862, "862"}, | ||
262 | {862, "CP862"}, | ||
263 | {862, "IBM862"}, | ||
264 | {862, "CSPC862LATINHEBREW"}, | ||
265 | |||
266 | {866, "866"}, | ||
267 | {866, "CP866"}, | ||
268 | {866, "IBM866"}, | ||
269 | {866, "CSIBM866"}, | ||
270 | |||
271 | /* !IsValidCodePage(154) */ | ||
272 | {154, "CP154"}, | ||
273 | {154, "CYRILLIC-ASIAN"}, | ||
274 | {154, "PT154"}, | ||
275 | {154, "PTCP154"}, | ||
276 | {154, "CSPTCP154"}, | ||
277 | |||
278 | /* !IsValidCodePage(1133) */ | ||
279 | {1133, "CP1133"}, | ||
280 | {1133, "IBM-CP1133"}, | ||
281 | |||
282 | {874, "CP874"}, | ||
283 | {874, "WINDOWS-874"}, | ||
284 | |||
285 | /* !IsValidCodePage(51932) */ | ||
286 | {51932, "CP51932"}, | ||
287 | {51932, "MS51932"}, | ||
288 | {51932, "WINDOWS-51932"}, | ||
289 | {51932, "EUC-JP"}, | ||
290 | |||
291 | {932, "CP932"}, | ||
292 | {932, "MS932"}, | ||
293 | {932, "SHIFFT_JIS"}, | ||
294 | {932, "SHIFFT_JIS-MS"}, | ||
295 | {932, "SJIS"}, | ||
296 | {932, "SJIS-MS"}, | ||
297 | {932, "SJIS-OPEN"}, | ||
298 | {932, "SJIS-WIN"}, | ||
299 | {932, "WINDOWS-31J"}, | ||
300 | {932, "WINDOWS-932"}, | ||
301 | {932, "CSWINDOWS31J"}, | ||
302 | |||
303 | {50221, "CP50221"}, | ||
304 | {50221, "ISO-2022-JP"}, | ||
305 | {50221, "ISO-2022-JP-MS"}, | ||
306 | {50221, "ISO2022-JP"}, | ||
307 | {50221, "ISO2022-JP-MS"}, | ||
308 | {50221, "MS50221"}, | ||
309 | {50221, "WINDOWS-50221"}, | ||
310 | |||
311 | {936, "CP936"}, | ||
312 | {936, "GBK"}, | ||
313 | {936, "MS936"}, | ||
314 | {936, "WINDOWS-936"}, | ||
315 | |||
316 | {950, "CP950"}, | ||
317 | {950, "BIG5"}, | ||
318 | {950, "BIG5HKSCS"}, | ||
319 | {950, "BIG5-HKSCS"}, | ||
320 | |||
321 | {949, "CP949"}, | ||
322 | {949, "UHC"}, | ||
323 | {949, "EUC-KR"}, | ||
324 | |||
325 | {1361, "CP1361"}, | ||
326 | {1361, "JOHAB"}, | ||
327 | |||
328 | {437, "437"}, | ||
329 | {437, "CP437"}, | ||
330 | {437, "IBM437"}, | ||
331 | {437, "CSPC8CODEPAGE437"}, | ||
332 | |||
333 | {737, "CP737"}, | ||
334 | |||
335 | {775, "CP775"}, | ||
336 | {775, "IBM775"}, | ||
337 | {775, "CSPC775BALTIC"}, | ||
338 | |||
339 | {852, "852"}, | ||
340 | {852, "CP852"}, | ||
341 | {852, "IBM852"}, | ||
342 | {852, "CSPCP852"}, | ||
343 | |||
344 | /* !IsValidCodePage(853) */ | ||
345 | {853, "CP853"}, | ||
346 | |||
347 | {855, "855"}, | ||
348 | {855, "CP855"}, | ||
349 | {855, "IBM855"}, | ||
350 | {855, "CSIBM855"}, | ||
351 | |||
352 | {857, "857"}, | ||
353 | {857, "CP857"}, | ||
354 | {857, "IBM857"}, | ||
355 | {857, "CSIBM857"}, | ||
356 | |||
357 | /* !IsValidCodePage(858) */ | ||
358 | {858, "CP858"}, | ||
359 | |||
360 | {860, "860"}, | ||
361 | {860, "CP860"}, | ||
362 | {860, "IBM860"}, | ||
363 | {860, "CSIBM860"}, | ||
364 | |||
365 | {861, "861"}, | ||
366 | {861, "CP-IS"}, | ||
367 | {861, "CP861"}, | ||
368 | {861, "IBM861"}, | ||
369 | {861, "CSIBM861"}, | ||
370 | |||
371 | {863, "863"}, | ||
372 | {863, "CP863"}, | ||
373 | {863, "IBM863"}, | ||
374 | {863, "CSIBM863"}, | ||
375 | |||
376 | {864, "CP864"}, | ||
377 | {864, "IBM864"}, | ||
378 | {864, "CSIBM864"}, | ||
379 | |||
380 | {865, "865"}, | ||
381 | {865, "CP865"}, | ||
382 | {865, "IBM865"}, | ||
383 | {865, "CSIBM865"}, | ||
384 | |||
385 | {869, "869"}, | ||
386 | {869, "CP-GR"}, | ||
387 | {869, "CP869"}, | ||
388 | {869, "IBM869"}, | ||
389 | {869, "CSIBM869"}, | ||
390 | |||
391 | /* !IsValidCodePage(1152) */ | ||
392 | {1125, "CP1125"}, | ||
393 | |||
394 | /* | ||
395 | * Code Page Identifiers | ||
396 | * http://msdn2.microsoft.com/en-us/library/ms776446.aspx | ||
397 | */ | ||
398 | {37, "IBM037"}, /* IBM EBCDIC US-Canada */ | ||
399 | {437, "IBM437"}, /* OEM United States */ | ||
400 | {500, "IBM500"}, /* IBM EBCDIC International */ | ||
401 | {708, "ASMO-708"}, /* Arabic (ASMO 708) */ | ||
402 | /* 709 Arabic (ASMO-449+, BCON V4) */ | ||
403 | /* 710 Arabic - Transparent Arabic */ | ||
404 | {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */ | ||
405 | {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */ | ||
406 | {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */ | ||
407 | {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */ | ||
408 | {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */ | ||
409 | {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */ | ||
410 | {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */ | ||
411 | {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */ | ||
412 | {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */ | ||
413 | {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */ | ||
414 | {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */ | ||
415 | {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */ | ||
416 | {864, "IBM864"}, /* OEM Arabic; Arabic (864) */ | ||
417 | {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */ | ||
418 | {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */ | ||
419 | {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */ | ||
420 | {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ | ||
421 | {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ | ||
422 | {875, "cp875"}, /* IBM EBCDIC Greek Modern */ | ||
423 | {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ | ||
424 | {932, "shift-jis"}, /* alternative name for it */ | ||
425 | {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ | ||
426 | {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */ | ||
427 | {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ | ||
428 | {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ | ||
429 | {950, "big5-hkscs"}, /* alternative name for it */ | ||
430 | {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */ | ||
431 | {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */ | ||
432 | {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ | ||
433 | {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ | ||
434 | {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ | ||
435 | {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ | ||
436 | {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ | ||
437 | {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ | ||
438 | {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ | ||
439 | {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ | ||
440 | {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ | ||
441 | {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ | ||
442 | {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */ | ||
443 | {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */ | ||
444 | {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */ | ||
445 | {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */ | ||
446 | {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */ | ||
447 | {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */ | ||
448 | {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */ | ||
449 | {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */ | ||
450 | {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ | ||
451 | {1361, "Johab"}, /* Korean (Johab) */ | ||
452 | {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */ | ||
453 | {10001, "x-mac-japanese"}, /* Japanese (Mac) */ | ||
454 | {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ | ||
455 | {10003, "x-mac-korean"}, /* Korean (Mac) */ | ||
456 | {10004, "x-mac-arabic"}, /* Arabic (Mac) */ | ||
457 | {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */ | ||
458 | {10006, "x-mac-greek"}, /* Greek (Mac) */ | ||
459 | {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */ | ||
460 | {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ | ||
461 | {10010, "x-mac-romanian"}, /* Romanian (Mac) */ | ||
462 | {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */ | ||
463 | {10021, "x-mac-thai"}, /* Thai (Mac) */ | ||
464 | {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */ | ||
465 | {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */ | ||
466 | {10081, "x-mac-turkish"}, /* Turkish (Mac) */ | ||
467 | {10082, "x-mac-croatian"}, /* Croatian (Mac) */ | ||
468 | {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */ | ||
469 | {20001, "x-cp20001"}, /* TCA Taiwan */ | ||
470 | {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */ | ||
471 | {20003, "x-cp20003"}, /* IBM5550 Taiwan */ | ||
472 | {20004, "x-cp20004"}, /* TeleText Taiwan */ | ||
473 | {20005, "x-cp20005"}, /* Wang Taiwan */ | ||
474 | {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ | ||
475 | {20106, "x-IA5-German"}, /* IA5 German (7-bit) */ | ||
476 | {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */ | ||
477 | {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */ | ||
478 | {20127, "us-ascii"}, /* US-ASCII (7-bit) */ | ||
479 | {20261, "x-cp20261"}, /* T.61 */ | ||
480 | {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */ | ||
481 | {20273, "IBM273"}, /* IBM EBCDIC Germany */ | ||
482 | {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */ | ||
483 | {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */ | ||
484 | {20280, "IBM280"}, /* IBM EBCDIC Italy */ | ||
485 | {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */ | ||
486 | {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */ | ||
487 | {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */ | ||
488 | {20297, "IBM297"}, /* IBM EBCDIC France */ | ||
489 | {20420, "IBM420"}, /* IBM EBCDIC Arabic */ | ||
490 | {20423, "IBM423"}, /* IBM EBCDIC Greek */ | ||
491 | {20424, "IBM424"}, /* IBM EBCDIC Hebrew */ | ||
492 | {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */ | ||
493 | {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */ | ||
494 | {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */ | ||
495 | {20871, "IBM871"}, /* IBM EBCDIC Icelandic */ | ||
496 | {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */ | ||
497 | {20905, "IBM905"}, /* IBM EBCDIC Turkish */ | ||
498 | {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ | ||
499 | {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */ | ||
500 | {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ | ||
501 | {20949, "x-cp20949"}, /* Korean Wansung */ | ||
502 | {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ | ||
503 | /* 21027 (deprecated) */ | ||
504 | {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ | ||
505 | {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ | ||
506 | {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ | ||
507 | {28591, "iso_8859-1"}, | ||
508 | {28591, "iso_8859_1"}, | ||
509 | {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ | ||
510 | {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ | ||
511 | {28592, "iso_8859-2"}, | ||
512 | {28592, "iso_8859_2"}, | ||
513 | {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */ | ||
514 | {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */ | ||
515 | {28593, "iso_8859-3"}, | ||
516 | {28593, "iso_8859_3"}, | ||
517 | {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */ | ||
518 | {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */ | ||
519 | {28594, "iso_8859-4"}, | ||
520 | {28594, "iso_8859_4"}, | ||
521 | {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */ | ||
522 | {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */ | ||
523 | {28595, "iso_8859-5"}, | ||
524 | {28595, "iso_8859_5"}, | ||
525 | {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */ | ||
526 | {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */ | ||
527 | {28596, "iso_8859-6"}, | ||
528 | {28596, "iso_8859_6"}, | ||
529 | {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */ | ||
530 | {28597, "iso8859-7"}, /* ISO 8859-7 Greek */ | ||
531 | {28597, "iso_8859-7"}, | ||
532 | {28597, "iso_8859_7"}, | ||
533 | {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ | ||
534 | {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ | ||
535 | {28598, "iso_8859-8"}, | ||
536 | {28598, "iso_8859_8"}, | ||
537 | {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */ | ||
538 | {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */ | ||
539 | {28599, "iso_8859-9"}, | ||
540 | {28599, "iso_8859_9"}, | ||
541 | {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */ | ||
542 | {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */ | ||
543 | {28603, "iso_8859-13"}, | ||
544 | {28603, "iso_8859_13"}, | ||
545 | {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */ | ||
546 | {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */ | ||
547 | {28605, "iso_8859-15"}, | ||
548 | {28605, "iso_8859_15"}, | ||
549 | {29001, "x-Europa"}, /* Europa 3 */ | ||
550 | {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ | ||
551 | {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ | ||
552 | {38598, "iso_8859-8-i"}, | ||
553 | {38598, "iso_8859_8-i"}, | ||
554 | {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ | ||
555 | {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ | ||
556 | {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ | ||
557 | {50225, "iso-2022-kr"}, /* ISO 2022 Korean */ | ||
558 | {50225, "iso2022-kr"}, /* ISO 2022 Korean */ | ||
559 | {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ | ||
560 | /* 50229 ISO 2022 Traditional Chinese */ | ||
561 | /* 50930 EBCDIC Japanese (Katakana) Extended */ | ||
562 | /* 50931 EBCDIC US-Canada and Japanese */ | ||
563 | /* 50933 EBCDIC Korean Extended and Korean */ | ||
564 | /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ | ||
565 | /* 50936 EBCDIC Simplified Chinese */ | ||
566 | /* 50937 EBCDIC US-Canada and Traditional Chinese */ | ||
567 | /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ | ||
568 | {51932, "euc-jp"}, /* EUC Japanese */ | ||
569 | {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */ | ||
570 | {51949, "euc-kr"}, /* EUC Korean */ | ||
571 | /* 51950 EUC Traditional Chinese */ | ||
572 | {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ | ||
573 | {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ | ||
574 | {57002, "x-iscii-de"}, /* ISCII Devanagari */ | ||
575 | {57003, "x-iscii-be"}, /* ISCII Bengali */ | ||
576 | {57004, "x-iscii-ta"}, /* ISCII Tamil */ | ||
577 | {57005, "x-iscii-te"}, /* ISCII Telugu */ | ||
578 | {57006, "x-iscii-as"}, /* ISCII Assamese */ | ||
579 | {57007, "x-iscii-or"}, /* ISCII Oriya */ | ||
580 | {57008, "x-iscii-ka"}, /* ISCII Kannada */ | ||
581 | {57009, "x-iscii-ma"}, /* ISCII Malayalam */ | ||
582 | {57010, "x-iscii-gu"}, /* ISCII Gujarati */ | ||
583 | {57011, "x-iscii-pa"}, /* ISCII Punjabi */ | ||
584 | |||
585 | {0, NULL} | ||
586 | }; | ||
587 | |||
588 | /* | ||
589 | * SJIS SHIFTJIS table CP932 table | ||
590 | * ---- --------------------------- -------------------------------- | ||
591 | * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS | ||
592 | * 7E U+203E OVERLINE U+007E TILDE | ||
593 | * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR | ||
594 | * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS | ||
595 | * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE | ||
596 | * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO | ||
597 | * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS | ||
598 | * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN | ||
599 | * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN | ||
600 | * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN | ||
601 | * | ||
602 | * EUC-JP and ISO-2022-JP should be compatible with CP932. | ||
603 | * | ||
604 | * Kernel and MLang have different Unicode mapping table. Make sure | ||
605 | * which API is used. | ||
606 | */ | ||
607 | static compat_t cp932_compat[] = { | ||
608 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
609 | {0x203E, 0x007E, COMPAT_OUT}, | ||
610 | {0x2014, 0x2015, COMPAT_OUT}, | ||
611 | {0x301C, 0xFF5E, COMPAT_OUT}, | ||
612 | {0x2016, 0x2225, COMPAT_OUT}, | ||
613 | {0x2212, 0xFF0D, COMPAT_OUT}, | ||
614 | {0x00A2, 0xFFE0, COMPAT_OUT}, | ||
615 | {0x00A3, 0xFFE1, COMPAT_OUT}, | ||
616 | {0x00AC, 0xFFE2, COMPAT_OUT}, | ||
617 | {0, 0, 0} | ||
618 | }; | ||
619 | |||
620 | static compat_t cp20932_compat[] = { | ||
621 | {0x00A5, 0x005C, COMPAT_OUT}, | ||
622 | {0x203E, 0x007E, COMPAT_OUT}, | ||
623 | {0x2014, 0x2015, COMPAT_OUT}, | ||
624 | {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, | ||
625 | {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, | ||
626 | {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, | ||
627 | {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, | ||
628 | {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, | ||
629 | {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, | ||
630 | {0, 0, 0} | ||
631 | }; | ||
632 | |||
633 | static compat_t *cp51932_compat = cp932_compat; | ||
634 | |||
635 | /* cp20932_compat for kernel. cp932_compat for mlang. */ | ||
636 | static compat_t *cp5022x_compat = cp932_compat; | ||
637 | |||
638 | typedef HRESULT (WINAPI *CONVERTINETSTRING)( | ||
639 | LPDWORD lpdwMode, | ||
640 | DWORD dwSrcEncoding, | ||
641 | DWORD dwDstEncoding, | ||
642 | LPCSTR lpSrcStr, | ||
643 | LPINT lpnSrcSize, | ||
644 | LPBYTE lpDstStr, | ||
645 | LPINT lpnDstSize | ||
646 | ); | ||
647 | typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( | ||
648 | LPDWORD lpdwMode, | ||
649 | DWORD dwSrcEncoding, | ||
650 | LPCSTR lpSrcStr, | ||
651 | LPINT lpnMultiCharCount, | ||
652 | LPWSTR lpDstStr, | ||
653 | LPINT lpnWideCharCount | ||
654 | ); | ||
655 | typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( | ||
656 | LPDWORD lpdwMode, | ||
657 | DWORD dwEncoding, | ||
658 | LPCWSTR lpSrcStr, | ||
659 | LPINT lpnWideCharCount, | ||
660 | LPSTR lpDstStr, | ||
661 | LPINT lpnMultiCharCount | ||
662 | ); | ||
663 | typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)( | ||
664 | DWORD dwSrcEncoding, | ||
665 | DWORD dwDstEncoding | ||
666 | ); | ||
667 | typedef HRESULT (WINAPI *LCIDTORFC1766A)( | ||
668 | LCID Locale, | ||
669 | LPSTR pszRfc1766, | ||
670 | int nChar | ||
671 | ); | ||
672 | typedef HRESULT (WINAPI *LCIDTORFC1766W)( | ||
673 | LCID Locale, | ||
674 | LPWSTR pszRfc1766, | ||
675 | int nChar | ||
676 | ); | ||
677 | typedef HRESULT (WINAPI *RFC1766TOLCIDA)( | ||
678 | LCID *pLocale, | ||
679 | LPSTR pszRfc1766 | ||
680 | ); | ||
681 | typedef HRESULT (WINAPI *RFC1766TOLCIDW)( | ||
682 | LCID *pLocale, | ||
683 | LPWSTR pszRfc1766 | ||
684 | ); | ||
685 | static CONVERTINETSTRING ConvertINetString; | ||
686 | static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; | ||
687 | static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; | ||
688 | static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable; | ||
689 | static LCIDTORFC1766A LcidToRfc1766A; | ||
690 | static RFC1766TOLCIDA Rfc1766ToLcidA; | ||
691 | |||
692 | static int | ||
693 | load_mlang(void) | ||
694 | { | ||
695 | HMODULE h; | ||
696 | if (ConvertINetString != NULL) | ||
697 | return TRUE; | ||
698 | h = LoadLibrary(TEXT("mlang.dll")); | ||
699 | if (!h) | ||
700 | return FALSE; | ||
701 | ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString"); | ||
702 | ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); | ||
703 | ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); | ||
704 | IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable"); | ||
705 | LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A"); | ||
706 | Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA"); | ||
707 | return TRUE; | ||
708 | } | ||
709 | |||
710 | iconv_t | ||
711 | iconv_open(const char *tocode, const char *fromcode) | ||
712 | { | ||
713 | rec_iconv_t *cd; | ||
714 | |||
715 | cd = (rec_iconv_t *)xzalloc(sizeof(rec_iconv_t)); | ||
716 | |||
717 | /* reset the errno to prevent reporting wrong error code. | ||
718 | * 0 for unsorted error. */ | ||
719 | errno = 0; | ||
720 | if (win_iconv_open(cd, tocode, fromcode)) | ||
721 | return (iconv_t)cd; | ||
722 | |||
723 | free(cd); | ||
724 | |||
725 | return (iconv_t)(-1); | ||
726 | } | ||
727 | |||
728 | int | ||
729 | iconv_close(iconv_t _cd) | ||
730 | { | ||
731 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
732 | int r = cd->iconv_close(cd->cd); | ||
733 | int e = *(cd->_errno()); | ||
734 | free(cd); | ||
735 | errno = e; | ||
736 | return r; | ||
737 | } | ||
738 | |||
739 | size_t | ||
740 | iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) | ||
741 | { | ||
742 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
743 | size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); | ||
744 | errno = *(cd->_errno()); | ||
745 | return r; | ||
746 | } | ||
747 | |||
748 | static int | ||
749 | win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) | ||
750 | { | ||
751 | if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) | ||
752 | return FALSE; | ||
753 | cd->iconv_close = win_iconv_close; | ||
754 | cd->iconv = win_iconv; | ||
755 | cd->_errno = _errno; | ||
756 | cd->cd = (iconv_t)cd; | ||
757 | return TRUE; | ||
758 | } | ||
759 | |||
760 | static int | ||
761 | win_iconv_close(iconv_t cd UNUSED_PARAM) | ||
762 | { | ||
763 | return 0; | ||
764 | } | ||
765 | |||
766 | static size_t | ||
767 | win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) | ||
768 | { | ||
769 | rec_iconv_t *cd = (rec_iconv_t *)_cd; | ||
770 | ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
771 | int insize; | ||
772 | int outsize; | ||
773 | int wsize; | ||
774 | DWORD frommode; | ||
775 | DWORD tomode; | ||
776 | uint wc; | ||
777 | compat_t *cp; | ||
778 | int i; | ||
779 | |||
780 | if (inbuf == NULL || *inbuf == NULL) | ||
781 | { | ||
782 | if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) | ||
783 | { | ||
784 | tomode = cd->to.mode; | ||
785 | outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); | ||
786 | if (outsize == -1) | ||
787 | { | ||
788 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
789 | { | ||
790 | outsize = 0; | ||
791 | } | ||
792 | else | ||
793 | { | ||
794 | cd->to.mode = tomode; | ||
795 | return (size_t)(-1); | ||
796 | } | ||
797 | } | ||
798 | *outbuf += outsize; | ||
799 | *outbytesleft -= outsize; | ||
800 | } | ||
801 | cd->from.mode = 0; | ||
802 | cd->to.mode = 0; | ||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | while (*inbytesleft != 0) | ||
807 | { | ||
808 | frommode = cd->from.mode; | ||
809 | tomode = cd->to.mode; | ||
810 | wsize = MB_CHAR_MAX; | ||
811 | |||
812 | insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); | ||
813 | if (insize == -1) | ||
814 | { | ||
815 | if (cd->to.flags & FLAG_IGNORE) | ||
816 | { | ||
817 | cd->from.mode = frommode; | ||
818 | insize = 1; | ||
819 | wsize = 0; | ||
820 | } | ||
821 | else | ||
822 | { | ||
823 | cd->from.mode = frommode; | ||
824 | return (size_t)(-1); | ||
825 | } | ||
826 | } | ||
827 | |||
828 | if (wsize == 0) | ||
829 | { | ||
830 | *inbuf += insize; | ||
831 | *inbytesleft -= insize; | ||
832 | continue; | ||
833 | } | ||
834 | |||
835 | if (cd->from.compat != NULL) | ||
836 | { | ||
837 | wc = utf16_to_ucs4(wbuf); | ||
838 | cp = cd->from.compat; | ||
839 | for (i = 0; cp[i].in != 0; ++i) | ||
840 | { | ||
841 | if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) | ||
842 | { | ||
843 | ucs4_to_utf16(cp[i].in, wbuf, &wsize); | ||
844 | break; | ||
845 | } | ||
846 | } | ||
847 | } | ||
848 | |||
849 | if (cd->to.compat != NULL) | ||
850 | { | ||
851 | wc = utf16_to_ucs4(wbuf); | ||
852 | cp = cd->to.compat; | ||
853 | for (i = 0; cp[i].in != 0; ++i) | ||
854 | { | ||
855 | if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) | ||
856 | { | ||
857 | ucs4_to_utf16(cp[i].out, wbuf, &wsize); | ||
858 | break; | ||
859 | } | ||
860 | } | ||
861 | } | ||
862 | |||
863 | outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); | ||
864 | if (outsize == -1) | ||
865 | { | ||
866 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) | ||
867 | { | ||
868 | cd->to.mode = tomode; | ||
869 | outsize = 0; | ||
870 | } | ||
871 | else | ||
872 | { | ||
873 | cd->from.mode = frommode; | ||
874 | cd->to.mode = tomode; | ||
875 | return (size_t)(-1); | ||
876 | } | ||
877 | } | ||
878 | |||
879 | *inbuf += insize; | ||
880 | *outbuf += outsize; | ||
881 | *inbytesleft -= insize; | ||
882 | *outbytesleft -= outsize; | ||
883 | } | ||
884 | |||
885 | return 0; | ||
886 | } | ||
887 | |||
888 | static int | ||
889 | make_csconv(const char *_name, csconv_t *cv) | ||
890 | { | ||
891 | CPINFO cpinfo; | ||
892 | int use_compat = TRUE; | ||
893 | int flag = 0; | ||
894 | char *name; | ||
895 | char *p; | ||
896 | |||
897 | name = xstrndup(_name, strlen(_name)); | ||
898 | if (name == NULL) | ||
899 | return FALSE; | ||
900 | |||
901 | /* check for option "enc_name//opt1//opt2" */ | ||
902 | while ((p = strrstr(name, "//")) != NULL) | ||
903 | { | ||
904 | if (_stricmp(p + 2, "nocompat") == 0) | ||
905 | use_compat = FALSE; | ||
906 | else if (_stricmp(p + 2, "translit") == 0) | ||
907 | flag |= FLAG_TRANSLIT; | ||
908 | else if (_stricmp(p + 2, "ignore") == 0) | ||
909 | flag |= FLAG_IGNORE; | ||
910 | *p = 0; | ||
911 | } | ||
912 | |||
913 | cv->mode = 0; | ||
914 | cv->flags = flag; | ||
915 | cv->mblen = NULL; | ||
916 | cv->flush = NULL; | ||
917 | cv->compat = NULL; | ||
918 | cv->codepage = name_to_codepage(name); | ||
919 | if (cv->codepage == 1200 || cv->codepage == 1201) | ||
920 | { | ||
921 | cv->mbtowc = utf16_mbtowc; | ||
922 | cv->wctomb = utf16_wctomb; | ||
923 | if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || | ||
924 | _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 || | ||
925 | _stricmp(name,"UCS-2-INTERNAL") == 0) | ||
926 | cv->flags |= FLAG_USE_BOM; | ||
927 | } | ||
928 | else if (cv->codepage == 12000 || cv->codepage == 12001) | ||
929 | { | ||
930 | cv->mbtowc = utf32_mbtowc; | ||
931 | cv->wctomb = utf32_wctomb; | ||
932 | if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || | ||
933 | _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) | ||
934 | cv->flags |= FLAG_USE_BOM; | ||
935 | } | ||
936 | else if (cv->codepage == 65001) | ||
937 | { | ||
938 | cv->mbtowc = kernel_mbtowc; | ||
939 | cv->wctomb = kernel_wctomb; | ||
940 | cv->mblen = utf8_mblen; | ||
941 | } | ||
942 | else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) | ||
943 | { | ||
944 | cv->mbtowc = iso2022jp_mbtowc; | ||
945 | cv->wctomb = iso2022jp_wctomb; | ||
946 | cv->flush = iso2022jp_flush; | ||
947 | } | ||
948 | else if (cv->codepage == 51932 && load_mlang()) | ||
949 | { | ||
950 | cv->mbtowc = mlang_mbtowc; | ||
951 | cv->wctomb = mlang_wctomb; | ||
952 | cv->mblen = eucjp_mblen; | ||
953 | } | ||
954 | else if (IsValidCodePage(cv->codepage) | ||
955 | && GetCPInfo(cv->codepage, &cpinfo) != 0) | ||
956 | { | ||
957 | cv->mbtowc = kernel_mbtowc; | ||
958 | cv->wctomb = kernel_wctomb; | ||
959 | if (cpinfo.MaxCharSize == 1) | ||
960 | cv->mblen = sbcs_mblen; | ||
961 | else if (cpinfo.MaxCharSize == 2) | ||
962 | cv->mblen = dbcs_mblen; | ||
963 | else | ||
964 | cv->mblen = mbcs_mblen; | ||
965 | } | ||
966 | else | ||
967 | { | ||
968 | /* not supported */ | ||
969 | free(name); | ||
970 | errno = EINVAL; | ||
971 | return FALSE; | ||
972 | } | ||
973 | |||
974 | if (use_compat) | ||
975 | { | ||
976 | switch (cv->codepage) | ||
977 | { | ||
978 | case 932: cv->compat = cp932_compat; break; | ||
979 | case 20932: cv->compat = cp20932_compat; break; | ||
980 | case 51932: cv->compat = cp51932_compat; break; | ||
981 | case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; | ||
982 | } | ||
983 | } | ||
984 | |||
985 | free(name); | ||
986 | |||
987 | return TRUE; | ||
988 | } | ||
989 | |||
990 | static int | ||
991 | name_to_codepage(const char *name) | ||
992 | { | ||
993 | int i; | ||
994 | |||
995 | if (*name == '\0' || | ||
996 | strcmp(name, "char") == 0) | ||
997 | return GetACP(); | ||
998 | else if (strcmp(name, "wchar_t") == 0) | ||
999 | return 1200; | ||
1000 | else if (_strnicmp(name, "cp", 2) == 0) | ||
1001 | return atoi(name + 2); /* CP123 */ | ||
1002 | else if ('0' <= name[0] && name[0] <= '9') | ||
1003 | return atoi(name); /* 123 */ | ||
1004 | else if (_strnicmp(name, "xx", 2) == 0) | ||
1005 | return atoi(name + 2); /* XX123 for debug */ | ||
1006 | |||
1007 | for (i = 0; codepage_alias[i].name != NULL; ++i) | ||
1008 | if (_stricmp(name, codepage_alias[i].name) == 0) | ||
1009 | return codepage_alias[i].codepage; | ||
1010 | return -1; | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
1014 | * http://www.faqs.org/rfcs/rfc2781.html | ||
1015 | */ | ||
1016 | static uint | ||
1017 | utf16_to_ucs4(const ushort *wbuf) | ||
1018 | { | ||
1019 | uint wc = wbuf[0]; | ||
1020 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1021 | wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; | ||
1022 | return wc; | ||
1023 | } | ||
1024 | |||
1025 | static void | ||
1026 | ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) | ||
1027 | { | ||
1028 | if (wc < 0x10000) | ||
1029 | { | ||
1030 | wbuf[0] = wc; | ||
1031 | *wbufsize = 1; | ||
1032 | } | ||
1033 | else | ||
1034 | { | ||
1035 | wc -= 0x10000; | ||
1036 | wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); | ||
1037 | wbuf[1] = 0xDC00 | (wc & 0x3FF); | ||
1038 | *wbufsize = 2; | ||
1039 | } | ||
1040 | } | ||
1041 | |||
1042 | /* | ||
1043 | * Check if codepage is one of those for which the dwFlags parameter | ||
1044 | * to MultiByteToWideChar() must be zero. Return zero or | ||
1045 | * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows | ||
1046 | * Server 2003 R2 claims that also codepage 65001 is one of these, but | ||
1047 | * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave | ||
1048 | * out 65001 (UTF-8), and that indeed seems to be the case on XP, it | ||
1049 | * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting | ||
1050 | * from UTF-8. | ||
1051 | */ | ||
1052 | static int | ||
1053 | mbtowc_flags(int codepage) | ||
1054 | { | ||
1055 | return (codepage == 50220 || codepage == 50221 || | ||
1056 | codepage == 50222 || codepage == 50225 || | ||
1057 | codepage == 50227 || codepage == 50229 || | ||
1058 | codepage == 52936 || codepage == 54936 || | ||
1059 | (codepage >= 57002 && codepage <= 57011) || | ||
1060 | codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; | ||
1061 | } | ||
1062 | |||
1063 | /* | ||
1064 | * Check if codepage is one those for which the lpUsedDefaultChar | ||
1065 | * parameter to WideCharToMultiByte() must be NULL. The docs in | ||
1066 | * Platform SDK for Windows Server 2003 R2 claims that this is the | ||
1067 | * list below, while the MSDN docs for MSVS2008 claim that it is only | ||
1068 | * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform | ||
1069 | * SDK seems to be correct, at least for XP. | ||
1070 | */ | ||
1071 | static int | ||
1072 | must_use_null_useddefaultchar(int codepage) | ||
1073 | { | ||
1074 | return (codepage == 65000 || codepage == 65001 || | ||
1075 | codepage == 50220 || codepage == 50221 || | ||
1076 | codepage == 50222 || codepage == 50225 || | ||
1077 | codepage == 50227 || codepage == 50229 || | ||
1078 | codepage == 52936 || codepage == 54936 || | ||
1079 | (codepage >= 57002 && codepage <= 57011) || | ||
1080 | codepage == 42); | ||
1081 | } | ||
1082 | |||
1083 | static int | ||
1084 | seterror(int err) | ||
1085 | { | ||
1086 | errno = err; | ||
1087 | return -1; | ||
1088 | } | ||
1089 | |||
1090 | static int | ||
1091 | sbcs_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf UNUSED_PARAM, | ||
1092 | int bufsize UNUSED_PARAM) | ||
1093 | { | ||
1094 | return 1; | ||
1095 | } | ||
1096 | |||
1097 | static int | ||
1098 | dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1099 | { | ||
1100 | int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; | ||
1101 | if (bufsize < len) | ||
1102 | return seterror(EINVAL); | ||
1103 | return len; | ||
1104 | } | ||
1105 | |||
1106 | static int | ||
1107 | mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) | ||
1108 | { | ||
1109 | int len = 0; | ||
1110 | |||
1111 | if (cv->codepage == 54936) { | ||
1112 | if (buf[0] <= 0x7F) | ||
1113 | len = 1; | ||
1114 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1115 | bufsize >= 2 && | ||
1116 | ((buf[1] >= 0x40 && buf[1] <= 0x7E) || | ||
1117 | (buf[1] >= 0x80 && buf[1] <= 0xFE))) | ||
1118 | len = 2; | ||
1119 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && | ||
1120 | bufsize >= 4 && | ||
1121 | buf[1] >= 0x30 && buf[1] <= 0x39) | ||
1122 | len = 4; | ||
1123 | else | ||
1124 | return seterror(EINVAL); | ||
1125 | return len; | ||
1126 | } | ||
1127 | else | ||
1128 | return seterror(EINVAL); | ||
1129 | } | ||
1130 | |||
1131 | static int | ||
1132 | utf8_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1133 | { | ||
1134 | int len = 0; | ||
1135 | |||
1136 | if (buf[0] < 0x80) len = 1; | ||
1137 | else if ((buf[0] & 0xE0) == 0xC0) len = 2; | ||
1138 | else if ((buf[0] & 0xF0) == 0xE0) len = 3; | ||
1139 | else if ((buf[0] & 0xF8) == 0xF0) len = 4; | ||
1140 | else if ((buf[0] & 0xFC) == 0xF8) len = 5; | ||
1141 | else if ((buf[0] & 0xFE) == 0xFC) len = 6; | ||
1142 | |||
1143 | if (len == 0) | ||
1144 | return seterror(EILSEQ); | ||
1145 | else if (bufsize < len) | ||
1146 | return seterror(EINVAL); | ||
1147 | return len; | ||
1148 | } | ||
1149 | |||
1150 | static int | ||
1151 | eucjp_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize) | ||
1152 | { | ||
1153 | if (buf[0] < 0x80) /* ASCII */ | ||
1154 | return 1; | ||
1155 | else if (buf[0] == 0x8E) /* JIS X 0201 */ | ||
1156 | { | ||
1157 | if (bufsize < 2) | ||
1158 | return seterror(EINVAL); | ||
1159 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) | ||
1160 | return seterror(EILSEQ); | ||
1161 | return 2; | ||
1162 | } | ||
1163 | else if (buf[0] == 0x8F) /* JIS X 0212 */ | ||
1164 | { | ||
1165 | if (bufsize < 3) | ||
1166 | return seterror(EINVAL); | ||
1167 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) | ||
1168 | || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) | ||
1169 | return seterror(EILSEQ); | ||
1170 | return 3; | ||
1171 | } | ||
1172 | else /* JIS X 0208 */ | ||
1173 | { | ||
1174 | if (bufsize < 2) | ||
1175 | return seterror(EINVAL); | ||
1176 | else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) | ||
1177 | || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) | ||
1178 | return seterror(EILSEQ); | ||
1179 | return 2; | ||
1180 | } | ||
1181 | } | ||
1182 | |||
1183 | static int | ||
1184 | kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1185 | { | ||
1186 | int len; | ||
1187 | |||
1188 | len = cv->mblen(cv, buf, bufsize); | ||
1189 | if (len == -1) | ||
1190 | return -1; | ||
1191 | /* If converting from ASCII, reject 8bit | ||
1192 | * chars. MultiByteToWideChar() doesn't. Note that for ASCII we | ||
1193 | * know that the mblen function is sbcs_mblen() so len is 1. | ||
1194 | */ | ||
1195 | if (cv->codepage == 20127 && buf[0] >= 0x80) | ||
1196 | return seterror(EILSEQ); | ||
1197 | *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), | ||
1198 | (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); | ||
1199 | if (*wbufsize == 0) | ||
1200 | return seterror(EILSEQ); | ||
1201 | return len; | ||
1202 | } | ||
1203 | |||
1204 | static int | ||
1205 | kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1206 | { | ||
1207 | BOOL usedDefaultChar = 0; | ||
1208 | BOOL *p = NULL; | ||
1209 | int flags = 0; | ||
1210 | int len; | ||
1211 | |||
1212 | if (bufsize == 0) | ||
1213 | return seterror(E2BIG); | ||
1214 | if (!must_use_null_useddefaultchar(cv->codepage)) | ||
1215 | { | ||
1216 | p = &usedDefaultChar; | ||
1217 | #ifdef WC_NO_BEST_FIT_CHARS | ||
1218 | if (!(cv->flags & FLAG_TRANSLIT)) | ||
1219 | flags |= WC_NO_BEST_FIT_CHARS; | ||
1220 | #endif | ||
1221 | } | ||
1222 | len = WideCharToMultiByte(cv->codepage, flags, | ||
1223 | (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); | ||
1224 | if (len == 0) | ||
1225 | { | ||
1226 | if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) | ||
1227 | return seterror(E2BIG); | ||
1228 | return seterror(EILSEQ); | ||
1229 | } | ||
1230 | else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) | ||
1231 | return seterror(EILSEQ); | ||
1232 | else if (cv->mblen(cv, buf, len) != len) /* validate result */ | ||
1233 | return seterror(EILSEQ); | ||
1234 | return len; | ||
1235 | } | ||
1236 | |||
1237 | /* | ||
1238 | * It seems that the mode (cv->mode) is fixnum. | ||
1239 | * For example, when converting iso-2022-jp(cp50221) to unicode: | ||
1240 | * in ascii sequence: mode=0xC42C0000 | ||
1241 | * in jisx0208 sequence: mode=0xC42C0001 | ||
1242 | * "C42C" is same for each convert session. | ||
1243 | * It should be: ((codepage-1)<<16)|state | ||
1244 | */ | ||
1245 | static int | ||
1246 | mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1247 | { | ||
1248 | int len; | ||
1249 | int insize; | ||
1250 | HRESULT hr; | ||
1251 | |||
1252 | len = cv->mblen(cv, buf, bufsize); | ||
1253 | if (len == -1) | ||
1254 | return -1; | ||
1255 | insize = len; | ||
1256 | hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, | ||
1257 | (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); | ||
1258 | if (hr != S_OK || insize != len) | ||
1259 | return seterror(EILSEQ); | ||
1260 | return len; | ||
1261 | } | ||
1262 | |||
1263 | static int | ||
1264 | mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1265 | { | ||
1266 | char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ | ||
1267 | int tmpsize = MB_CHAR_MAX; | ||
1268 | int insize = wbufsize; | ||
1269 | HRESULT hr; | ||
1270 | |||
1271 | hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, | ||
1272 | (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); | ||
1273 | if (hr != S_OK || insize != wbufsize) | ||
1274 | return seterror(EILSEQ); | ||
1275 | else if (bufsize < tmpsize) | ||
1276 | return seterror(E2BIG); | ||
1277 | else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) | ||
1278 | return seterror(EILSEQ); | ||
1279 | memcpy(buf, tmpbuf, tmpsize); | ||
1280 | return tmpsize; | ||
1281 | } | ||
1282 | |||
1283 | static int | ||
1284 | utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1285 | { | ||
1286 | int codepage = cv->codepage; | ||
1287 | |||
1288 | /* swap endian: 1200 <-> 1201 */ | ||
1289 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1290 | codepage ^= 1; | ||
1291 | |||
1292 | if (bufsize < 2) | ||
1293 | return seterror(EINVAL); | ||
1294 | if (codepage == 1200) /* little endian */ | ||
1295 | wbuf[0] = (buf[1] << 8) | buf[0]; | ||
1296 | else if (codepage == 1201) /* big endian */ | ||
1297 | wbuf[0] = (buf[0] << 8) | buf[1]; | ||
1298 | |||
1299 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1300 | { | ||
1301 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1302 | if (wbuf[0] == 0xFFFE) | ||
1303 | { | ||
1304 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1305 | *wbufsize = 0; | ||
1306 | return 2; | ||
1307 | } | ||
1308 | else if (wbuf[0] == 0xFEFF) | ||
1309 | { | ||
1310 | *wbufsize = 0; | ||
1311 | return 2; | ||
1312 | } | ||
1313 | } | ||
1314 | |||
1315 | if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) | ||
1316 | return seterror(EILSEQ); | ||
1317 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1318 | { | ||
1319 | if (bufsize < 4) | ||
1320 | return seterror(EINVAL); | ||
1321 | if (codepage == 1200) /* little endian */ | ||
1322 | wbuf[1] = (buf[3] << 8) | buf[2]; | ||
1323 | else if (codepage == 1201) /* big endian */ | ||
1324 | wbuf[1] = (buf[2] << 8) | buf[3]; | ||
1325 | if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) | ||
1326 | return seterror(EILSEQ); | ||
1327 | *wbufsize = 2; | ||
1328 | return 4; | ||
1329 | } | ||
1330 | *wbufsize = 1; | ||
1331 | return 2; | ||
1332 | } | ||
1333 | |||
1334 | static int | ||
1335 | utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1336 | { | ||
1337 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1338 | { | ||
1339 | int r; | ||
1340 | |||
1341 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1342 | if (bufsize < 2) | ||
1343 | return seterror(E2BIG); | ||
1344 | if (cv->codepage == 1200) /* little endian */ | ||
1345 | memcpy(buf, "\xFF\xFE", 2); | ||
1346 | else if (cv->codepage == 1201) /* big endian */ | ||
1347 | memcpy(buf, "\xFE\xFF", 2); | ||
1348 | |||
1349 | r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); | ||
1350 | if (r == -1) | ||
1351 | return -1; | ||
1352 | return r + 2; | ||
1353 | } | ||
1354 | |||
1355 | if (bufsize < 2) | ||
1356 | return seterror(E2BIG); | ||
1357 | if (cv->codepage == 1200) /* little endian */ | ||
1358 | { | ||
1359 | buf[0] = (wbuf[0] & 0x00FF); | ||
1360 | buf[1] = (wbuf[0] & 0xFF00) >> 8; | ||
1361 | } | ||
1362 | else if (cv->codepage == 1201) /* big endian */ | ||
1363 | { | ||
1364 | buf[0] = (wbuf[0] & 0xFF00) >> 8; | ||
1365 | buf[1] = (wbuf[0] & 0x00FF); | ||
1366 | } | ||
1367 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) | ||
1368 | { | ||
1369 | if (bufsize < 4) | ||
1370 | return seterror(E2BIG); | ||
1371 | if (cv->codepage == 1200) /* little endian */ | ||
1372 | { | ||
1373 | buf[2] = (wbuf[1] & 0x00FF); | ||
1374 | buf[3] = (wbuf[1] & 0xFF00) >> 8; | ||
1375 | } | ||
1376 | else if (cv->codepage == 1201) /* big endian */ | ||
1377 | { | ||
1378 | buf[2] = (wbuf[1] & 0xFF00) >> 8; | ||
1379 | buf[3] = (wbuf[1] & 0x00FF); | ||
1380 | } | ||
1381 | return 4; | ||
1382 | } | ||
1383 | return 2; | ||
1384 | } | ||
1385 | |||
1386 | static int | ||
1387 | utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1388 | { | ||
1389 | int codepage = cv->codepage; | ||
1390 | uint wc = 0xD800; | ||
1391 | |||
1392 | /* swap endian: 12000 <-> 12001 */ | ||
1393 | if (cv->mode & UNICODE_MODE_SWAPPED) | ||
1394 | codepage ^= 1; | ||
1395 | |||
1396 | if (bufsize < 4) | ||
1397 | return seterror(EINVAL); | ||
1398 | if (codepage == 12000) /* little endian */ | ||
1399 | wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; | ||
1400 | else if (codepage == 12001) /* big endian */ | ||
1401 | wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; | ||
1402 | |||
1403 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1404 | { | ||
1405 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1406 | if (wc == 0xFFFE0000) | ||
1407 | { | ||
1408 | cv->mode |= UNICODE_MODE_SWAPPED; | ||
1409 | *wbufsize = 0; | ||
1410 | return 4; | ||
1411 | } | ||
1412 | else if (wc == 0x0000FEFF) | ||
1413 | { | ||
1414 | *wbufsize = 0; | ||
1415 | return 4; | ||
1416 | } | ||
1417 | } | ||
1418 | |||
1419 | if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) | ||
1420 | return seterror(EILSEQ); | ||
1421 | ucs4_to_utf16(wc, wbuf, wbufsize); | ||
1422 | return 4; | ||
1423 | } | ||
1424 | |||
1425 | static int | ||
1426 | utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1427 | { | ||
1428 | uint wc; | ||
1429 | |||
1430 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) | ||
1431 | { | ||
1432 | int r; | ||
1433 | |||
1434 | cv->mode |= UNICODE_MODE_BOM_DONE; | ||
1435 | if (bufsize < 4) | ||
1436 | return seterror(E2BIG); | ||
1437 | if (cv->codepage == 12000) /* little endian */ | ||
1438 | memcpy(buf, "\xFF\xFE\x00\x00", 4); | ||
1439 | else if (cv->codepage == 12001) /* big endian */ | ||
1440 | memcpy(buf, "\x00\x00\xFE\xFF", 4); | ||
1441 | |||
1442 | r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); | ||
1443 | if (r == -1) | ||
1444 | return -1; | ||
1445 | return r + 4; | ||
1446 | } | ||
1447 | |||
1448 | if (bufsize < 4) | ||
1449 | return seterror(E2BIG); | ||
1450 | wc = utf16_to_ucs4(wbuf); | ||
1451 | if (cv->codepage == 12000) /* little endian */ | ||
1452 | { | ||
1453 | buf[0] = wc & 0x000000FF; | ||
1454 | buf[1] = (wc & 0x0000FF00) >> 8; | ||
1455 | buf[2] = (wc & 0x00FF0000) >> 16; | ||
1456 | buf[3] = (wc & 0xFF000000) >> 24; | ||
1457 | } | ||
1458 | else if (cv->codepage == 12001) /* big endian */ | ||
1459 | { | ||
1460 | buf[0] = (wc & 0xFF000000) >> 24; | ||
1461 | buf[1] = (wc & 0x00FF0000) >> 16; | ||
1462 | buf[2] = (wc & 0x0000FF00) >> 8; | ||
1463 | buf[3] = wc & 0x000000FF; | ||
1464 | } | ||
1465 | return 4; | ||
1466 | } | ||
1467 | |||
1468 | /* | ||
1469 | * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) | ||
1470 | * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow | ||
1471 | * 1 byte Kana) | ||
1472 | * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte | ||
1473 | * Kana - SO/SI) | ||
1474 | * | ||
1475 | * MultiByteToWideChar() and WideCharToMultiByte() behave differently | ||
1476 | * depending on Windows version. On XP, WideCharToMultiByte() doesn't | ||
1477 | * terminate result sequence with ascii escape. But Vista does. | ||
1478 | * Use MLang instead. | ||
1479 | */ | ||
1480 | |||
1481 | #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) | ||
1482 | #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) | ||
1483 | #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) | ||
1484 | |||
1485 | #define ISO2022_SI 0 | ||
1486 | #define ISO2022_SO 1 | ||
1487 | |||
1488 | /* shift in */ | ||
1489 | static const char iso2022_SI_seq[] = "\x0F"; | ||
1490 | /* shift out */ | ||
1491 | static const char iso2022_SO_seq[] = "\x0E"; | ||
1492 | |||
1493 | typedef struct iso2022_esc_t iso2022_esc_t; | ||
1494 | struct iso2022_esc_t { | ||
1495 | const char *esc; | ||
1496 | int esc_len; | ||
1497 | int len; | ||
1498 | int cs; | ||
1499 | }; | ||
1500 | |||
1501 | #define ISO2022JP_CS_ASCII 0 | ||
1502 | #define ISO2022JP_CS_JISX0201_ROMAN 1 | ||
1503 | #define ISO2022JP_CS_JISX0201_KANA 2 | ||
1504 | #define ISO2022JP_CS_JISX0208_1978 3 | ||
1505 | #define ISO2022JP_CS_JISX0208_1983 4 | ||
1506 | #define ISO2022JP_CS_JISX0212 5 | ||
1507 | |||
1508 | static iso2022_esc_t iso2022jp_esc[] = { | ||
1509 | {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, | ||
1510 | {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, | ||
1511 | {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, | ||
1512 | {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ | ||
1513 | {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, | ||
1514 | {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, | ||
1515 | {NULL, 0, 0, 0} | ||
1516 | }; | ||
1517 | |||
1518 | static int | ||
1519 | iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) | ||
1520 | { | ||
1521 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1522 | char tmp[MB_CHAR_MAX]; | ||
1523 | int insize; | ||
1524 | HRESULT hr; | ||
1525 | DWORD dummy = 0; | ||
1526 | int len; | ||
1527 | int esc_len; | ||
1528 | int cs; | ||
1529 | int shift; | ||
1530 | int i; | ||
1531 | |||
1532 | if (buf[0] == 0x1B) | ||
1533 | { | ||
1534 | for (i = 0; iesc[i].esc != NULL; ++i) | ||
1535 | { | ||
1536 | esc_len = iesc[i].esc_len; | ||
1537 | if (bufsize < esc_len) | ||
1538 | { | ||
1539 | if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) | ||
1540 | return seterror(EINVAL); | ||
1541 | } | ||
1542 | else | ||
1543 | { | ||
1544 | if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) | ||
1545 | { | ||
1546 | cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); | ||
1547 | *wbufsize = 0; | ||
1548 | return esc_len; | ||
1549 | } | ||
1550 | } | ||
1551 | } | ||
1552 | /* not supported escape sequence */ | ||
1553 | return seterror(EILSEQ); | ||
1554 | } | ||
1555 | else if (buf[0] == iso2022_SO_seq[0]) | ||
1556 | { | ||
1557 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); | ||
1558 | *wbufsize = 0; | ||
1559 | return 1; | ||
1560 | } | ||
1561 | else if (buf[0] == iso2022_SI_seq[0]) | ||
1562 | { | ||
1563 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); | ||
1564 | *wbufsize = 0; | ||
1565 | return 1; | ||
1566 | } | ||
1567 | |||
1568 | cs = ISO2022_MODE_CS(cv->mode); | ||
1569 | shift = ISO2022_MODE_SHIFT(cv->mode); | ||
1570 | |||
1571 | /* reset the mode for informal sequence */ | ||
1572 | if (buf[0] < 0x20) | ||
1573 | { | ||
1574 | cs = ISO2022JP_CS_ASCII; | ||
1575 | shift = ISO2022_SI; | ||
1576 | } | ||
1577 | |||
1578 | len = iesc[cs].len; | ||
1579 | if (bufsize < len) | ||
1580 | return seterror(EINVAL); | ||
1581 | for (i = 0; i < len; ++i) | ||
1582 | if (!(buf[i] < 0x80)) | ||
1583 | return seterror(EILSEQ); | ||
1584 | esc_len = iesc[cs].esc_len; | ||
1585 | memcpy(tmp, iesc[cs].esc, esc_len); | ||
1586 | if (shift == ISO2022_SO) | ||
1587 | { | ||
1588 | memcpy(tmp + esc_len, iso2022_SO_seq, 1); | ||
1589 | esc_len += 1; | ||
1590 | } | ||
1591 | memcpy(tmp + esc_len, buf, len); | ||
1592 | |||
1593 | if ((cv->codepage == 50220 || cv->codepage == 50221 | ||
1594 | || cv->codepage == 50222) && shift == ISO2022_SO) | ||
1595 | { | ||
1596 | /* XXX: shift-out cannot be used for mbtowc (both kernel and | ||
1597 | * mlang) */ | ||
1598 | esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; | ||
1599 | memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); | ||
1600 | memcpy(tmp + esc_len, buf, len); | ||
1601 | } | ||
1602 | |||
1603 | insize = len + esc_len; | ||
1604 | hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, | ||
1605 | (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); | ||
1606 | if (hr != S_OK || insize != len + esc_len) | ||
1607 | return seterror(EILSEQ); | ||
1608 | |||
1609 | /* Check for conversion error. Assuming defaultChar is 0x3F. */ | ||
1610 | /* ascii should be converted from ascii */ | ||
1611 | if (wbuf[0] == buf[0] | ||
1612 | && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1613 | return seterror(EILSEQ); | ||
1614 | |||
1615 | /* reset the mode for informal sequence */ | ||
1616 | if (cv->mode != ISO2022_MODE(cs, shift)) | ||
1617 | cv->mode = ISO2022_MODE(cs, shift); | ||
1618 | |||
1619 | return len; | ||
1620 | } | ||
1621 | |||
1622 | static int | ||
1623 | iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) | ||
1624 | { | ||
1625 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1626 | char tmp[MB_CHAR_MAX]; | ||
1627 | int tmpsize = MB_CHAR_MAX; | ||
1628 | int insize = wbufsize; | ||
1629 | HRESULT hr; | ||
1630 | DWORD dummy = 0; | ||
1631 | int len; | ||
1632 | int esc_len; | ||
1633 | int cs; | ||
1634 | int shift; | ||
1635 | int i; | ||
1636 | |||
1637 | /* | ||
1638 | * MultiByte = [escape sequence] + character + [escape sequence] | ||
1639 | * | ||
1640 | * Whether trailing escape sequence is added depends on which API is | ||
1641 | * used (kernel or MLang, and its version). | ||
1642 | */ | ||
1643 | hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, | ||
1644 | (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); | ||
1645 | if (hr != S_OK || insize != wbufsize) | ||
1646 | return seterror(EILSEQ); | ||
1647 | else if (bufsize < tmpsize) | ||
1648 | return seterror(E2BIG); | ||
1649 | |||
1650 | if (tmpsize == 1) | ||
1651 | { | ||
1652 | cs = ISO2022JP_CS_ASCII; | ||
1653 | esc_len = 0; | ||
1654 | } | ||
1655 | else | ||
1656 | { | ||
1657 | for (i = 1; iesc[i].esc != NULL; ++i) | ||
1658 | { | ||
1659 | esc_len = iesc[i].esc_len; | ||
1660 | if (strncmp(tmp, iesc[i].esc, esc_len) == 0) | ||
1661 | { | ||
1662 | cs = iesc[i].cs; | ||
1663 | break; | ||
1664 | } | ||
1665 | } | ||
1666 | if (iesc[i].esc == NULL) | ||
1667 | /* not supported escape sequence */ | ||
1668 | return seterror(EILSEQ); | ||
1669 | } | ||
1670 | |||
1671 | shift = ISO2022_SI; | ||
1672 | if (tmp[esc_len] == iso2022_SO_seq[0]) | ||
1673 | { | ||
1674 | shift = ISO2022_SO; | ||
1675 | esc_len += 1; | ||
1676 | } | ||
1677 | |||
1678 | len = iesc[cs].len; | ||
1679 | |||
1680 | /* Check for converting error. Assuming defaultChar is 0x3F. */ | ||
1681 | /* ascii should be converted from ascii */ | ||
1682 | if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) | ||
1683 | return seterror(EILSEQ); | ||
1684 | else if (tmpsize < esc_len + len) | ||
1685 | return seterror(EILSEQ); | ||
1686 | |||
1687 | if (cv->mode == ISO2022_MODE(cs, shift)) | ||
1688 | { | ||
1689 | /* remove escape sequence */ | ||
1690 | if (esc_len != 0) | ||
1691 | memmove(tmp, tmp + esc_len, len); | ||
1692 | esc_len = 0; | ||
1693 | } | ||
1694 | else | ||
1695 | { | ||
1696 | if (cs == ISO2022JP_CS_ASCII) | ||
1697 | { | ||
1698 | esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1699 | memmove(tmp + esc_len, tmp, len); | ||
1700 | memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); | ||
1701 | } | ||
1702 | if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) | ||
1703 | { | ||
1704 | /* shift-in before changing to other mode */ | ||
1705 | memmove(tmp + 1, tmp, len + esc_len); | ||
1706 | memcpy(tmp, iso2022_SI_seq, 1); | ||
1707 | esc_len += 1; | ||
1708 | } | ||
1709 | } | ||
1710 | |||
1711 | if (bufsize < len + esc_len) | ||
1712 | return seterror(E2BIG); | ||
1713 | memcpy(buf, tmp, len + esc_len); | ||
1714 | cv->mode = ISO2022_MODE(cs, shift); | ||
1715 | return len + esc_len; | ||
1716 | } | ||
1717 | |||
1718 | static int | ||
1719 | iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) | ||
1720 | { | ||
1721 | iso2022_esc_t *iesc = iso2022jp_esc; | ||
1722 | int esc_len; | ||
1723 | |||
1724 | if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) | ||
1725 | { | ||
1726 | esc_len = 0; | ||
1727 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1728 | esc_len += 1; | ||
1729 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1730 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1731 | if (bufsize < esc_len) | ||
1732 | return seterror(E2BIG); | ||
1733 | |||
1734 | esc_len = 0; | ||
1735 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) | ||
1736 | { | ||
1737 | memcpy(buf, iso2022_SI_seq, 1); | ||
1738 | esc_len += 1; | ||
1739 | } | ||
1740 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) | ||
1741 | { | ||
1742 | memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, | ||
1743 | iesc[ISO2022JP_CS_ASCII].esc_len); | ||
1744 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; | ||
1745 | } | ||
1746 | return esc_len; | ||
1747 | } | ||
1748 | return 0; | ||
1749 | } | ||
1750 | |||
1751 | static void process_file(iconv_t cd, FILE *in, FILE *out) | ||
1752 | { | ||
1753 | char inbuf[BUFSIZ]; | ||
1754 | char outbuf[BUFSIZ]; | ||
1755 | const char *pin; | ||
1756 | char *pout; | ||
1757 | size_t inbytesleft; | ||
1758 | size_t outbytesleft; | ||
1759 | size_t rest = 0; | ||
1760 | size_t r; | ||
1761 | |||
1762 | while ((inbytesleft=fread(inbuf+rest, 1, sizeof(inbuf)-rest, in)) != 0 | ||
1763 | || rest != 0) { | ||
1764 | inbytesleft += rest; | ||
1765 | pin = inbuf; | ||
1766 | pout = outbuf; | ||
1767 | outbytesleft = sizeof(outbuf); | ||
1768 | r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); | ||
1769 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1770 | if (r == (size_t)(-1) && errno != E2BIG && | ||
1771 | (errno != EINVAL || feof(in))) | ||
1772 | bb_perror_msg_and_die("conversion error"); | ||
1773 | memmove(inbuf, pin, inbytesleft); | ||
1774 | rest = inbytesleft; | ||
1775 | } | ||
1776 | pout = outbuf; | ||
1777 | outbytesleft = sizeof(outbuf); | ||
1778 | r = iconv(cd, NULL, NULL, &pout, &outbytesleft); | ||
1779 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); | ||
1780 | if (r == (size_t)(-1)) | ||
1781 | bb_perror_msg_and_die("conversion error"); | ||
1782 | } | ||
1783 | |||
1784 | int iconv_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | ||
1785 | int iconv_main(int argc, char **argv) | ||
1786 | { | ||
1787 | char *fromcode = NULL; | ||
1788 | char *tocode = NULL; | ||
1789 | int i; | ||
1790 | iconv_t cd; | ||
1791 | FILE *in = stdin; | ||
1792 | FILE *out = stdout; | ||
1793 | int ignore = 0; | ||
1794 | |||
1795 | while ((i = getopt(argc, argv, "f:t:lco:")) != -1) { | ||
1796 | switch (i) { | ||
1797 | case 'l': | ||
1798 | for (i = 0; codepage_alias[i].name != NULL; ++i) | ||
1799 | printf("%s\n", codepage_alias[i].name); | ||
1800 | return 0; | ||
1801 | |||
1802 | case 'f': | ||
1803 | fromcode = optarg; | ||
1804 | break; | ||
1805 | |||
1806 | case 't': | ||
1807 | tocode = optarg; | ||
1808 | break; | ||
1809 | |||
1810 | case 'c': | ||
1811 | ignore = 1; | ||
1812 | break; | ||
1813 | |||
1814 | case 'o': | ||
1815 | out = xfopen(optarg, "wb"); | ||
1816 | break; | ||
1817 | |||
1818 | default: | ||
1819 | bb_show_usage(); | ||
1820 | } | ||
1821 | } | ||
1822 | |||
1823 | if (fromcode == NULL || tocode == NULL) | ||
1824 | bb_show_usage(); | ||
1825 | |||
1826 | if (ignore) | ||
1827 | tocode = xasprintf("%s//IGNORE", tocode); | ||
1828 | |||
1829 | cd = iconv_open(tocode, fromcode); | ||
1830 | if (cd == (iconv_t)(-1)) | ||
1831 | bb_perror_msg_and_die("iconv_open error"); | ||
1832 | |||
1833 | if (optind == argc || | ||
1834 | (optind == argc-1 && strcmp(argv[optind], "-") == 0)) { | ||
1835 | process_file(cd, in, out); | ||
1836 | } | ||
1837 | else { | ||
1838 | for (i=optind; i<argc; ++i) { | ||
1839 | in = xfopen(argv[i], "rb"); | ||
1840 | process_file(cd, in, out); | ||
1841 | fclose(in); | ||
1842 | } | ||
1843 | } | ||
1844 | |||
1845 | iconv_close(cd); | ||
1846 | return 0; | ||
1847 | } | ||
diff --git a/miscutils/less.c b/miscutils/less.c index 223c2558d..a5ce14c91 100644 --- a/miscutils/less.c +++ b/miscutils/less.c | |||
@@ -145,6 +145,10 @@ | |||
145 | 145 | ||
146 | #include <sched.h> /* sched_yield() */ | 146 | #include <sched.h> /* sched_yield() */ |
147 | 147 | ||
148 | #if ENABLE_PLATFORM_MINGW32 | ||
149 | #include <conio.h> | ||
150 | #endif | ||
151 | |||
148 | #include "libbb.h" | 152 | #include "libbb.h" |
149 | #include "common_bufsiz.h" | 153 | #include "common_bufsiz.h" |
150 | #if ENABLE_FEATURE_LESS_REGEXP | 154 | #if ENABLE_FEATURE_LESS_REGEXP |
@@ -236,7 +240,9 @@ struct globals { | |||
236 | smallint winsize_err; | 240 | smallint winsize_err; |
237 | #endif | 241 | #endif |
238 | smallint terminated; | 242 | smallint terminated; |
243 | #if !ENABLE_PLATFORM_MINGW32 | ||
239 | struct termios term_orig, term_less; | 244 | struct termios term_orig, term_less; |
245 | #endif | ||
240 | char kbd_input[KEYCODE_BUFFER_SIZE]; | 246 | char kbd_input[KEYCODE_BUFFER_SIZE]; |
241 | }; | 247 | }; |
242 | #define G (*ptr_to_globals) | 248 | #define G (*ptr_to_globals) |
@@ -298,7 +304,9 @@ struct globals { | |||
298 | static void set_tty_cooked(void) | 304 | static void set_tty_cooked(void) |
299 | { | 305 | { |
300 | fflush_all(); | 306 | fflush_all(); |
307 | #if !ENABLE_PLATFORM_MINGW32 | ||
301 | tcsetattr(kbd_fd, TCSANOW, &term_orig); | 308 | tcsetattr(kbd_fd, TCSANOW, &term_orig); |
309 | #endif | ||
302 | } | 310 | } |
303 | 311 | ||
304 | /* Move the cursor to a position (x,y), where (0,0) is the | 312 | /* Move the cursor to a position (x,y), where (0,0) is the |
@@ -330,7 +338,11 @@ static void less_exit(int code) | |||
330 | set_tty_cooked(); | 338 | set_tty_cooked(); |
331 | if (!(G.kbd_fd_orig_flags & O_NONBLOCK)) | 339 | if (!(G.kbd_fd_orig_flags & O_NONBLOCK)) |
332 | ndelay_off(kbd_fd); | 340 | ndelay_off(kbd_fd); |
341 | #if !ENABLE_PLATFORM_MINGW32 | ||
333 | clear_line(); | 342 | clear_line(); |
343 | #else | ||
344 | printf(ESC"[?1049l"); | ||
345 | #endif | ||
334 | if (code < 0) | 346 | if (code < 0) |
335 | kill_myself_with_sig(- code); /* does not return */ | 347 | kill_myself_with_sig(- code); /* does not return */ |
336 | exit(code); | 348 | exit(code); |
@@ -575,6 +587,11 @@ static void read_lines(void) | |||
575 | last_line_pos = 0; | 587 | last_line_pos = 0; |
576 | break; | 588 | break; |
577 | } | 589 | } |
590 | #if ENABLE_PLATFORM_MINGW32 | ||
591 | if (c == '\r') { | ||
592 | continue; | ||
593 | } | ||
594 | #endif | ||
578 | /* NUL is substituted by '\n'! */ | 595 | /* NUL is substituted by '\n'! */ |
579 | if (c == '\0') c = '\n'; | 596 | if (c == '\0') c = '\n'; |
580 | *p++ = c; | 597 | *p++ = c; |
@@ -671,7 +688,12 @@ static void update_num_lines(void) | |||
671 | /* only do this for regular files */ | 688 | /* only do this for regular files */ |
672 | if (num_lines == REOPEN_AND_COUNT || num_lines == REOPEN_STDIN) { | 689 | if (num_lines == REOPEN_AND_COUNT || num_lines == REOPEN_STDIN) { |
673 | count = 0; | 690 | count = 0; |
691 | #if !ENABLE_PLATFORM_MINGW32 | ||
674 | fd = open("/proc/self/fd/0", O_RDONLY); | 692 | fd = open("/proc/self/fd/0", O_RDONLY); |
693 | #else | ||
694 | /* don't even try to access /proc on WIN32 */ | ||
695 | fd = -1; | ||
696 | #endif | ||
675 | if (fd < 0 && num_lines == REOPEN_AND_COUNT) { | 697 | if (fd < 0 && num_lines == REOPEN_AND_COUNT) { |
676 | /* "filename" is valid only if REOPEN_AND_COUNT */ | 698 | /* "filename" is valid only if REOPEN_AND_COUNT */ |
677 | fd = open(filename, O_RDONLY); | 699 | fd = open(filename, O_RDONLY); |
@@ -854,7 +876,12 @@ static void print_found(const char *line) | |||
854 | match_status = 1; | 876 | match_status = 1; |
855 | } | 877 | } |
856 | 878 | ||
879 | #if !ENABLE_PLATFORM_MINGW32 | ||
857 | printf("%s%s\n", growline ? growline : "", str); | 880 | printf("%s%s\n", growline ? growline : "", str); |
881 | #else | ||
882 | /* skip newline, we use explicit positioning on WIN32 */ | ||
883 | printf("%s%s", growline ? growline : "", str); | ||
884 | #endif | ||
858 | free(growline); | 885 | free(growline); |
859 | } | 886 | } |
860 | #else | 887 | #else |
@@ -890,7 +917,12 @@ static void print_ascii(const char *str) | |||
890 | *p = '\0'; | 917 | *p = '\0'; |
891 | print_hilite(buf); | 918 | print_hilite(buf); |
892 | } | 919 | } |
920 | #if !ENABLE_PLATFORM_MINGW32 | ||
893 | puts(str); | 921 | puts(str); |
922 | #else | ||
923 | /* skip newline, we use explicit positioning on WIN32 */ | ||
924 | printf("%s", str); | ||
925 | #endif | ||
894 | } | 926 | } |
895 | 927 | ||
896 | /* Print the buffer */ | 928 | /* Print the buffer */ |
@@ -900,6 +932,10 @@ static void buffer_print(void) | |||
900 | 932 | ||
901 | move_cursor(0, 0); | 933 | move_cursor(0, 0); |
902 | for (i = 0; i <= max_displayed_line; i++) { | 934 | for (i = 0; i <= max_displayed_line; i++) { |
935 | #if ENABLE_PLATFORM_MINGW32 | ||
936 | /* make sure we're on the right line */ | ||
937 | move_cursor(i+1, 0); | ||
938 | #endif | ||
903 | printf(CLEAR_2_EOL); | 939 | printf(CLEAR_2_EOL); |
904 | if (option_mask32 & FLAG_N) | 940 | if (option_mask32 & FLAG_N) |
905 | print_lineno(buffer[i]); | 941 | print_lineno(buffer[i]); |
@@ -1087,9 +1123,13 @@ static void reinitialize(void) | |||
1087 | if (G.winsize_err) | 1123 | if (G.winsize_err) |
1088 | printf(ESC"[999;999H" ESC"[6n"); | 1124 | printf(ESC"[999;999H" ESC"[6n"); |
1089 | #endif | 1125 | #endif |
1126 | #if ENABLE_PLATFORM_MINGW32 | ||
1127 | printf(ESC"[?1049h"); | ||
1128 | #endif | ||
1090 | buffer_fill_and_print(); | 1129 | buffer_fill_and_print(); |
1091 | } | 1130 | } |
1092 | 1131 | ||
1132 | #if !ENABLE_PLATFORM_MINGW32 | ||
1093 | static int64_t getch_nowait(void) | 1133 | static int64_t getch_nowait(void) |
1094 | { | 1134 | { |
1095 | int rd; | 1135 | int rd; |
@@ -1151,6 +1191,46 @@ static int64_t getch_nowait(void) | |||
1151 | set_tty_cooked(); | 1191 | set_tty_cooked(); |
1152 | return key64; | 1192 | return key64; |
1153 | } | 1193 | } |
1194 | #else | ||
1195 | static int64_t getch_nowait(void) | ||
1196 | { | ||
1197 | int64_t c; | ||
1198 | |||
1199 | retry: | ||
1200 | c = _getch(); | ||
1201 | if (c == 0 || c == 0xe0) { | ||
1202 | switch (_getch()) { | ||
1203 | case 0x48: | ||
1204 | c = KEYCODE_UP; | ||
1205 | break; | ||
1206 | case 0x50: | ||
1207 | c = KEYCODE_DOWN; | ||
1208 | break; | ||
1209 | case 0x49: | ||
1210 | c = KEYCODE_PAGEUP; | ||
1211 | break; | ||
1212 | case 0x51: | ||
1213 | c = KEYCODE_PAGEDOWN; | ||
1214 | break; | ||
1215 | case 0x47: | ||
1216 | c = KEYCODE_HOME; | ||
1217 | break; | ||
1218 | case 0x4f: | ||
1219 | c = KEYCODE_END; | ||
1220 | break; | ||
1221 | default: | ||
1222 | goto retry; | ||
1223 | } | ||
1224 | } | ||
1225 | |||
1226 | /* Position cursor if line input is done */ | ||
1227 | if (less_gets_pos >= 0) | ||
1228 | move_cursor(max_displayed_line + 2, less_gets_pos + 1); | ||
1229 | fflush_all(); | ||
1230 | |||
1231 | return c; | ||
1232 | } | ||
1233 | #endif | ||
1154 | 1234 | ||
1155 | /* Grab a character from input without requiring the return key. | 1235 | /* Grab a character from input without requiring the return key. |
1156 | * May return KEYCODE_xxx values. | 1236 | * May return KEYCODE_xxx values. |
@@ -1791,10 +1871,12 @@ static void keypress_process(int keypress) | |||
1791 | number_process(keypress); | 1871 | number_process(keypress); |
1792 | } | 1872 | } |
1793 | 1873 | ||
1874 | #if !ENABLE_PLATFORM_MINGW32 | ||
1794 | static void sig_catcher(int sig) | 1875 | static void sig_catcher(int sig) |
1795 | { | 1876 | { |
1796 | less_exit(- sig); | 1877 | less_exit(- sig); |
1797 | } | 1878 | } |
1879 | #endif | ||
1798 | 1880 | ||
1799 | #if ENABLE_FEATURE_LESS_WINCH | 1881 | #if ENABLE_FEATURE_LESS_WINCH |
1800 | static void sigwinch_handler(int sig UNUSED_PARAM) | 1882 | static void sigwinch_handler(int sig UNUSED_PARAM) |
@@ -1806,7 +1888,9 @@ static void sigwinch_handler(int sig UNUSED_PARAM) | |||
1806 | int less_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 1888 | int less_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
1807 | int less_main(int argc, char **argv) | 1889 | int less_main(int argc, char **argv) |
1808 | { | 1890 | { |
1891 | #if !ENABLE_PLATFORM_MINGW32 | ||
1809 | char *tty_name; | 1892 | char *tty_name; |
1893 | #endif | ||
1810 | int tty_fd; | 1894 | int tty_fd; |
1811 | 1895 | ||
1812 | INIT_G(); | 1896 | INIT_G(); |
@@ -1865,6 +1949,7 @@ int less_main(int argc, char **argv) | |||
1865 | if (option_mask32 & FLAG_TILDE) | 1949 | if (option_mask32 & FLAG_TILDE) |
1866 | empty_line_marker = ""; | 1950 | empty_line_marker = ""; |
1867 | 1951 | ||
1952 | #if !ENABLE_PLATFORM_MINGW32 | ||
1868 | /* Some versions of less can survive w/o controlling tty, | 1953 | /* Some versions of less can survive w/o controlling tty, |
1869 | * try to do the same. This also allows to specify an alternative | 1954 | * try to do the same. This also allows to specify an alternative |
1870 | * tty via "less 1<>TTY". | 1955 | * tty via "less 1<>TTY". |
@@ -1890,8 +1975,13 @@ int less_main(int argc, char **argv) | |||
1890 | } | 1975 | } |
1891 | G.kbd_fd_orig_flags = ndelay_on(tty_fd); | 1976 | G.kbd_fd_orig_flags = ndelay_on(tty_fd); |
1892 | kbd_fd = tty_fd; /* save in a global */ | 1977 | kbd_fd = tty_fd; /* save in a global */ |
1978 | #else | ||
1979 | kbd_fd = tty_fd = 0; | ||
1980 | #endif | ||
1893 | 1981 | ||
1982 | #if !ENABLE_PLATFORM_MINGW32 | ||
1894 | get_termios_and_make_raw(tty_fd, &term_less, &term_orig, TERMIOS_RAW_CRNL_INPUT); | 1983 | get_termios_and_make_raw(tty_fd, &term_less, &term_orig, TERMIOS_RAW_CRNL_INPUT); |
1984 | #endif | ||
1895 | 1985 | ||
1896 | IF_FEATURE_LESS_ASK_TERMINAL(G.winsize_err =) get_terminal_width_height(tty_fd, &width, &max_displayed_line); | 1986 | IF_FEATURE_LESS_ASK_TERMINAL(G.winsize_err =) get_terminal_width_height(tty_fd, &width, &max_displayed_line); |
1897 | /* 20: two tabstops + 4 */ | 1987 | /* 20: two tabstops + 4 */ |
diff --git a/miscutils/man.c b/miscutils/man.c index 61086612a..6724b4b5d 100644 --- a/miscutils/man.c +++ b/miscutils/man.c | |||
@@ -199,8 +199,7 @@ static char **add_MANPATH(char **man_path_list, int *count_mp, char *path) | |||
199 | if (path) while (*path) { | 199 | if (path) while (*path) { |
200 | char *next_path; | 200 | char *next_path; |
201 | char **path_element; | 201 | char **path_element; |
202 | 202 | next_path = strchr(path, PATH_SEP); | |
203 | next_path = strchr(path, ':'); | ||
204 | if (next_path) { | 203 | if (next_path) { |
205 | if (next_path == path) /* "::"? */ | 204 | if (next_path == path) /* "::"? */ |
206 | goto next; | 205 | goto next; |
@@ -223,7 +222,7 @@ static char **add_MANPATH(char **man_path_list, int *count_mp, char *path) | |||
223 | if (!next_path) | 222 | if (!next_path) |
224 | break; | 223 | break; |
225 | /* "path" may be a result of getenv(), be nice and don't mangle it */ | 224 | /* "path" may be a result of getenv(), be nice and don't mangle it */ |
226 | *next_path = ':'; | 225 | *next_path = PATH_SEP; |
227 | next: | 226 | next: |
228 | path = next_path + 1; | 227 | path = next_path + 1; |
229 | } | 228 | } |
@@ -249,11 +248,24 @@ int man_main(int argc UNUSED_PARAM, char **argv) | |||
249 | int count_mp; | 248 | int count_mp; |
250 | int opt, not_found; | 249 | int opt, not_found; |
251 | char *token[2]; | 250 | char *token[2]; |
251 | #if ENABLE_PLATFORM_MINGW32 | ||
252 | char **ptr; | ||
253 | char *exepath, *relpath; | ||
254 | const char *mpl[] = { "/usr/man", "/usr/share/man", NULL, NULL }; | ||
255 | #endif | ||
252 | 256 | ||
253 | INIT_G(); | 257 | INIT_G(); |
254 | 258 | ||
255 | opt = getopt32(argv, "^+" "aw" "\0" "-1"/*at least one arg*/); | 259 | opt = getopt32(argv, "^+" "aw" "\0" "-1"/*at least one arg*/); |
256 | argv += optind; | 260 | argv += optind; |
261 | #if ENABLE_PLATFORM_MINGW32 | ||
262 | /* add system drive prefix to filenames, if necessary */ | ||
263 | for (ptr = argv; *ptr; ++ptr) { | ||
264 | if (strchr(*ptr, '/') || strchr(*ptr, '\\')) | ||
265 | *ptr = xabsolute_path(*ptr); | ||
266 | } | ||
267 | chdir_system_drive(); | ||
268 | #endif | ||
257 | 269 | ||
258 | sec_list = xstrdup("0p:1:1p:2:3:3p:4:5:6:7:8:9"); | 270 | sec_list = xstrdup("0p:1:1p:2:3:3p:4:5:6:7:8:9"); |
259 | 271 | ||
@@ -291,11 +303,25 @@ int man_main(int argc UNUSED_PARAM, char **argv) | |||
291 | } | 303 | } |
292 | config_close(parser); | 304 | config_close(parser); |
293 | 305 | ||
306 | #if ENABLE_PLATFORM_MINGW32 | ||
307 | /* allow man pages to be stored relative to the executable */ | ||
308 | exepath = xstrdup(bb_busybox_exec_path); | ||
309 | relpath = concat_path_file(dirname(exepath), "man"); | ||
310 | |||
311 | if (!man_path_list) { | ||
312 | mpl[2] = relpath; | ||
313 | man_path_list = (char**)mpl; | ||
314 | } | ||
315 | else { | ||
316 | man_path_list = add_MANPATH(man_path_list, &count_mp, relpath); | ||
317 | } | ||
318 | #else | ||
294 | if (!man_path_list) { | 319 | if (!man_path_list) { |
295 | static const char *const mpl[] = { "/usr/man", "/usr/share/man", NULL }; | 320 | static const char *const mpl[] = { "/usr/man", "/usr/share/man", NULL }; |
296 | man_path_list = (char**)mpl; | 321 | man_path_list = (char**)mpl; |
297 | /*count_mp = 2; - not used below anyway */ | 322 | /*count_mp = 2; - not used below anyway */ |
298 | } | 323 | } |
324 | #endif | ||
299 | 325 | ||
300 | { | 326 | { |
301 | /* environment overrides setting from man.config */ | 327 | /* environment overrides setting from man.config */ |