aboutsummaryrefslogtreecommitdiff
path: root/miscutils
diff options
context:
space:
mode:
Diffstat (limited to 'miscutils')
-rw-r--r--miscutils/bbconfig.c1
-rw-r--r--miscutils/dc.c2
-rw-r--r--miscutils/iconv.c1847
-rw-r--r--miscutils/less.c90
-rw-r--r--miscutils/man.c32
5 files changed, 1968 insertions, 4 deletions
diff --git a/miscutils/bbconfig.c b/miscutils/bbconfig.c
index fe02516a8..077e03c5d 100644
--- a/miscutils/bbconfig.c
+++ b/miscutils/bbconfig.c
@@ -35,6 +35,7 @@
35#include "libbb.h" 35#include "libbb.h"
36#include "bbconfigopts.h" 36#include "bbconfigopts.h"
37#if ENABLE_FEATURE_COMPRESS_BBCONFIG 37#if ENABLE_FEATURE_COMPRESS_BBCONFIG
38#define BB_ARCHIVE_PUBLIC
38# include "bb_archive.h" 39# include "bb_archive.h"
39# include "bbconfigopts_bz2.h" 40# include "bbconfigopts_bz2.h"
40#endif 41#endif
diff --git a/miscutils/dc.c b/miscutils/dc.c
index 5aef64b60..c7ce2be0b 100644
--- a/miscutils/dc.c
+++ b/miscutils/dc.c
@@ -17,7 +17,7 @@ typedef unsigned long data_t;
17#define DATA_FMT "l" 17#define DATA_FMT "l"
18#else 18#else
19typedef unsigned long long data_t; 19typedef unsigned long long data_t;
20#define DATA_FMT "ll" 20#define DATA_FMT LL_FMT
21#endif 21#endif
22 22
23struct globals { 23struct globals {
diff --git a/miscutils/iconv.c b/miscutils/iconv.c
new file mode 100644
index 000000000..c3289982d
--- /dev/null
+++ b/miscutils/iconv.c
@@ -0,0 +1,1847 @@
1/*
2 * iconv implementation using Win32 API to convert.
3 *
4 * This file is placed in the public domain.
5 */
6
7/*
8 * This code was obtained from:
9 *
10 * https://github.com/win-iconv/win-iconv
11 *
12 * Modified for busybox-w32 by Ronald M Yorston. These modifications
13 * are also dedicated to the public domain.
14 */
15
16//config:config ICONV
17//config: bool "iconv"
18//config: default y
19//config: depends on PLATFORM_MINGW32
20//config: help
21//config: 'iconv' converts text between character encodings.
22
23//applet:IF_ICONV(APPLET(iconv, BB_DIR_USR_BIN, BB_SUID_DROP))
24
25//kbuild:lib-$(CONFIG_ICONV) += iconv.o
26
27//usage:#define iconv_trivial_usage
28//usage: "[-lc] [-o outfile] -f from-enc -t to-enc [FILE]..."
29//usage:#define iconv_full_usage "\n\n"
30//usage: "Convert text between character encodings\n"
31//usage: "\n -l List all known character encodings"
32//usage: "\n -c Silently discard characters that cannot be converted"
33//usage: "\n -o Use outfile for output"
34//usage: "\n -f Use from-enc for input characters"
35//usage: "\n -t Use to-enc for output characters"
36
37#include "libbb.h"
38
39/* WORKAROUND: */
40#define GetProcAddressA GetProcAddress
41
42#define MB_CHAR_MAX 16
43
44#define UNICODE_MODE_BOM_DONE 1
45#define UNICODE_MODE_SWAPPED 2
46
47#define FLAG_USE_BOM 1
48#define FLAG_TRANSLIT 2 /* //TRANSLIT */
49#define FLAG_IGNORE 4 /* //IGNORE */
50
51typedef unsigned char uchar;
52typedef unsigned short ushort;
53typedef unsigned int uint;
54
55typedef void* iconv_t;
56
57iconv_t iconv_open(const char *tocode, const char *fromcode);
58int iconv_close(iconv_t cd);
59size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
60
61typedef struct compat_t compat_t;
62typedef struct csconv_t csconv_t;
63typedef struct rec_iconv_t rec_iconv_t;
64
65typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode);
66typedef int (*f_iconv_close)(iconv_t cd);
67typedef size_t (*f_iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
68typedef int* (*f_errno)(void);
69typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
70typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
71typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize);
72typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize);
73
74#define COMPAT_IN 1
75#define COMPAT_OUT 2
76
77/* unicode mapping for compatibility with other conversion table. */
78struct compat_t {
79 uint in;
80 uint out;
81 uint flag;
82};
83
84struct csconv_t {
85 int codepage;
86 int flags;
87 f_mbtowc mbtowc;
88 f_wctomb wctomb;
89 f_mblen mblen;
90 f_flush flush;
91 DWORD mode;
92 compat_t *compat;
93};
94
95struct rec_iconv_t {
96 iconv_t cd;
97 f_iconv_close iconv_close;
98 f_iconv iconv;
99 f_errno _errno;
100 csconv_t from;
101 csconv_t to;
102};
103
104static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode);
105static int win_iconv_close(iconv_t cd);
106static size_t win_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
107
108static int load_mlang(void);
109static int make_csconv(const char *name, csconv_t *cv);
110static int name_to_codepage(const char *name);
111static uint utf16_to_ucs4(const ushort *wbuf);
112static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
113static int mbtowc_flags(int codepage);
114static int must_use_null_useddefaultchar(int codepage);
115static int seterror(int err);
116
117static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
118static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
119static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
120static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
121static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
122
123static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
124static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
125static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
126static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
127static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
128static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
129static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
130static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
131static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize);
132static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize);
133static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize);
134
135static struct {
136 int codepage;
137 const char *name;
138} codepage_alias[] = {
139 {65001, "CP65001"},
140 {65001, "UTF8"},
141 {65001, "UTF-8"},
142
143 {1200, "CP1200"},
144 {1200, "UTF16LE"},
145 {1200, "UTF-16LE"},
146 {1200, "UCS2LE"},
147 {1200, "UCS-2LE"},
148 {1200, "UCS-2-INTERNAL"},
149
150 {1201, "CP1201"},
151 {1201, "UTF16BE"},
152 {1201, "UTF-16BE"},
153 {1201, "UCS2BE"},
154 {1201, "UCS-2BE"},
155 {1201, "unicodeFFFE"},
156
157 {12000, "CP12000"},
158 {12000, "UTF32LE"},
159 {12000, "UTF-32LE"},
160 {12000, "UCS4LE"},
161 {12000, "UCS-4LE"},
162
163 {12001, "CP12001"},
164 {12001, "UTF32BE"},
165 {12001, "UTF-32BE"},
166 {12001, "UCS4BE"},
167 {12001, "UCS-4BE"},
168
169#ifndef GLIB_COMPILATION
170 /*
171 * Default is big endian.
172 * See rfc2781 4.3 Interpreting text labelled as UTF-16.
173 */
174 {1201, "UTF16"},
175 {1201, "UTF-16"},
176 {1201, "UCS2"},
177 {1201, "UCS-2"},
178 {12001, "UTF32"},
179 {12001, "UTF-32"},
180 {12001, "UCS-4"},
181 {12001, "UCS4"},
182#else
183 /* Default is little endian, because the platform is */
184 {1200, "UTF16"},
185 {1200, "UTF-16"},
186 {1200, "UCS2"},
187 {1200, "UCS-2"},
188 {12000, "UTF32"},
189 {12000, "UTF-32"},
190 {12000, "UCS4"},
191 {12000, "UCS-4"},
192#endif
193
194 /* copy from libiconv `iconv -l` */
195 /* !IsValidCodePage(367) */
196 {20127, "ANSI_X3.4-1968"},
197 {20127, "ANSI_X3.4-1986"},
198 {20127, "ASCII"},
199 {20127, "CP367"},
200 {20127, "IBM367"},
201 {20127, "ISO-IR-6"},
202 {20127, "ISO646-US"},
203 {20127, "ISO_646.IRV:1991"},
204 {20127, "US"},
205 {20127, "US-ASCII"},
206 {20127, "CSASCII"},
207
208 /* !IsValidCodePage(819) */
209 {1252, "CP819"},
210 {1252, "IBM819"},
211 {28591, "ISO-8859-1"},
212 {28591, "ISO-IR-100"},
213 {28591, "ISO8859-1"},
214 {28591, "ISO_8859-1"},
215 {28591, "ISO_8859-1:1987"},
216 {28591, "L1"},
217 {28591, "LATIN1"},
218 {28591, "CSISOLATIN1"},
219
220 {1250, "CP1250"},
221 {1250, "MS-EE"},
222 {1250, "WINDOWS-1250"},
223
224 {1251, "CP1251"},
225 {1251, "MS-CYRL"},
226 {1251, "WINDOWS-1251"},
227
228 {1252, "CP1252"},
229 {1252, "MS-ANSI"},
230 {1252, "WINDOWS-1252"},
231
232 {1253, "CP1253"},
233 {1253, "MS-GREEK"},
234 {1253, "WINDOWS-1253"},
235
236 {1254, "CP1254"},
237 {1254, "MS-TURK"},
238 {1254, "WINDOWS-1254"},
239
240 {1255, "CP1255"},
241 {1255, "MS-HEBR"},
242 {1255, "WINDOWS-1255"},
243
244 {1256, "CP1256"},
245 {1256, "MS-ARAB"},
246 {1256, "WINDOWS-1256"},
247
248 {1257, "CP1257"},
249 {1257, "WINBALTRIM"},
250 {1257, "WINDOWS-1257"},
251
252 {1258, "CP1258"},
253 {1258, "WINDOWS-1258"},
254
255 {850, "850"},
256 {850, "CP850"},
257 {850, "IBM850"},
258 {850, "CSPC850MULTILINGUAL"},
259
260 /* !IsValidCodePage(862) */
261 {862, "862"},
262 {862, "CP862"},
263 {862, "IBM862"},
264 {862, "CSPC862LATINHEBREW"},
265
266 {866, "866"},
267 {866, "CP866"},
268 {866, "IBM866"},
269 {866, "CSIBM866"},
270
271 /* !IsValidCodePage(154) */
272 {154, "CP154"},
273 {154, "CYRILLIC-ASIAN"},
274 {154, "PT154"},
275 {154, "PTCP154"},
276 {154, "CSPTCP154"},
277
278 /* !IsValidCodePage(1133) */
279 {1133, "CP1133"},
280 {1133, "IBM-CP1133"},
281
282 {874, "CP874"},
283 {874, "WINDOWS-874"},
284
285 /* !IsValidCodePage(51932) */
286 {51932, "CP51932"},
287 {51932, "MS51932"},
288 {51932, "WINDOWS-51932"},
289 {51932, "EUC-JP"},
290
291 {932, "CP932"},
292 {932, "MS932"},
293 {932, "SHIFFT_JIS"},
294 {932, "SHIFFT_JIS-MS"},
295 {932, "SJIS"},
296 {932, "SJIS-MS"},
297 {932, "SJIS-OPEN"},
298 {932, "SJIS-WIN"},
299 {932, "WINDOWS-31J"},
300 {932, "WINDOWS-932"},
301 {932, "CSWINDOWS31J"},
302
303 {50221, "CP50221"},
304 {50221, "ISO-2022-JP"},
305 {50221, "ISO-2022-JP-MS"},
306 {50221, "ISO2022-JP"},
307 {50221, "ISO2022-JP-MS"},
308 {50221, "MS50221"},
309 {50221, "WINDOWS-50221"},
310
311 {936, "CP936"},
312 {936, "GBK"},
313 {936, "MS936"},
314 {936, "WINDOWS-936"},
315
316 {950, "CP950"},
317 {950, "BIG5"},
318 {950, "BIG5HKSCS"},
319 {950, "BIG5-HKSCS"},
320
321 {949, "CP949"},
322 {949, "UHC"},
323 {949, "EUC-KR"},
324
325 {1361, "CP1361"},
326 {1361, "JOHAB"},
327
328 {437, "437"},
329 {437, "CP437"},
330 {437, "IBM437"},
331 {437, "CSPC8CODEPAGE437"},
332
333 {737, "CP737"},
334
335 {775, "CP775"},
336 {775, "IBM775"},
337 {775, "CSPC775BALTIC"},
338
339 {852, "852"},
340 {852, "CP852"},
341 {852, "IBM852"},
342 {852, "CSPCP852"},
343
344 /* !IsValidCodePage(853) */
345 {853, "CP853"},
346
347 {855, "855"},
348 {855, "CP855"},
349 {855, "IBM855"},
350 {855, "CSIBM855"},
351
352 {857, "857"},
353 {857, "CP857"},
354 {857, "IBM857"},
355 {857, "CSIBM857"},
356
357 /* !IsValidCodePage(858) */
358 {858, "CP858"},
359
360 {860, "860"},
361 {860, "CP860"},
362 {860, "IBM860"},
363 {860, "CSIBM860"},
364
365 {861, "861"},
366 {861, "CP-IS"},
367 {861, "CP861"},
368 {861, "IBM861"},
369 {861, "CSIBM861"},
370
371 {863, "863"},
372 {863, "CP863"},
373 {863, "IBM863"},
374 {863, "CSIBM863"},
375
376 {864, "CP864"},
377 {864, "IBM864"},
378 {864, "CSIBM864"},
379
380 {865, "865"},
381 {865, "CP865"},
382 {865, "IBM865"},
383 {865, "CSIBM865"},
384
385 {869, "869"},
386 {869, "CP-GR"},
387 {869, "CP869"},
388 {869, "IBM869"},
389 {869, "CSIBM869"},
390
391 /* !IsValidCodePage(1152) */
392 {1125, "CP1125"},
393
394 /*
395 * Code Page Identifiers
396 * http://msdn2.microsoft.com/en-us/library/ms776446.aspx
397 */
398 {37, "IBM037"}, /* IBM EBCDIC US-Canada */
399 {437, "IBM437"}, /* OEM United States */
400 {500, "IBM500"}, /* IBM EBCDIC International */
401 {708, "ASMO-708"}, /* Arabic (ASMO 708) */
402 /* 709 Arabic (ASMO-449+, BCON V4) */
403 /* 710 Arabic - Transparent Arabic */
404 {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */
405 {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */
406 {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */
407 {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */
408 {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */
409 {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */
410 {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */
411 {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */
412 {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */
413 {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */
414 {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */
415 {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */
416 {864, "IBM864"}, /* OEM Arabic; Arabic (864) */
417 {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */
418 {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */
419 {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */
420 {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */
421 {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */
422 {875, "cp875"}, /* IBM EBCDIC Greek Modern */
423 {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */
424 {932, "shift-jis"}, /* alternative name for it */
425 {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */
426 {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */
427 {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */
428 {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */
429 {950, "big5-hkscs"}, /* alternative name for it */
430 {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */
431 {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */
432 {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */
433 {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */
434 {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */
435 {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */
436 {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */
437 {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */
438 {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */
439 {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */
440 {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */
441 {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */
442 {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */
443 {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */
444 {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */
445 {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */
446 {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */
447 {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */
448 {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */
449 {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */
450 {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */
451 {1361, "Johab"}, /* Korean (Johab) */
452 {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */
453 {10001, "x-mac-japanese"}, /* Japanese (Mac) */
454 {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */
455 {10003, "x-mac-korean"}, /* Korean (Mac) */
456 {10004, "x-mac-arabic"}, /* Arabic (Mac) */
457 {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */
458 {10006, "x-mac-greek"}, /* Greek (Mac) */
459 {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */
460 {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */
461 {10010, "x-mac-romanian"}, /* Romanian (Mac) */
462 {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */
463 {10021, "x-mac-thai"}, /* Thai (Mac) */
464 {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */
465 {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */
466 {10081, "x-mac-turkish"}, /* Turkish (Mac) */
467 {10082, "x-mac-croatian"}, /* Croatian (Mac) */
468 {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */
469 {20001, "x-cp20001"}, /* TCA Taiwan */
470 {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */
471 {20003, "x-cp20003"}, /* IBM5550 Taiwan */
472 {20004, "x-cp20004"}, /* TeleText Taiwan */
473 {20005, "x-cp20005"}, /* Wang Taiwan */
474 {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */
475 {20106, "x-IA5-German"}, /* IA5 German (7-bit) */
476 {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */
477 {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */
478 {20127, "us-ascii"}, /* US-ASCII (7-bit) */
479 {20261, "x-cp20261"}, /* T.61 */
480 {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */
481 {20273, "IBM273"}, /* IBM EBCDIC Germany */
482 {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */
483 {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */
484 {20280, "IBM280"}, /* IBM EBCDIC Italy */
485 {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */
486 {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */
487 {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */
488 {20297, "IBM297"}, /* IBM EBCDIC France */
489 {20420, "IBM420"}, /* IBM EBCDIC Arabic */
490 {20423, "IBM423"}, /* IBM EBCDIC Greek */
491 {20424, "IBM424"}, /* IBM EBCDIC Hebrew */
492 {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */
493 {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */
494 {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */
495 {20871, "IBM871"}, /* IBM EBCDIC Icelandic */
496 {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */
497 {20905, "IBM905"}, /* IBM EBCDIC Turkish */
498 {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */
499 {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */
500 {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */
501 {20949, "x-cp20949"}, /* Korean Wansung */
502 {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */
503 /* 21027 (deprecated) */
504 {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */
505 {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
506 {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */
507 {28591, "iso_8859-1"},
508 {28591, "iso_8859_1"},
509 {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
510 {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */
511 {28592, "iso_8859-2"},
512 {28592, "iso_8859_2"},
513 {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */
514 {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */
515 {28593, "iso_8859-3"},
516 {28593, "iso_8859_3"},
517 {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */
518 {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */
519 {28594, "iso_8859-4"},
520 {28594, "iso_8859_4"},
521 {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */
522 {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */
523 {28595, "iso_8859-5"},
524 {28595, "iso_8859_5"},
525 {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */
526 {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */
527 {28596, "iso_8859-6"},
528 {28596, "iso_8859_6"},
529 {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */
530 {28597, "iso8859-7"}, /* ISO 8859-7 Greek */
531 {28597, "iso_8859-7"},
532 {28597, "iso_8859_7"},
533 {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
534 {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */
535 {28598, "iso_8859-8"},
536 {28598, "iso_8859_8"},
537 {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */
538 {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */
539 {28599, "iso_8859-9"},
540 {28599, "iso_8859_9"},
541 {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */
542 {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */
543 {28603, "iso_8859-13"},
544 {28603, "iso_8859_13"},
545 {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */
546 {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */
547 {28605, "iso_8859-15"},
548 {28605, "iso_8859_15"},
549 {29001, "x-Europa"}, /* Europa 3 */
550 {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
551 {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */
552 {38598, "iso_8859-8-i"},
553 {38598, "iso_8859_8-i"},
554 {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */
555 {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */
556 {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */
557 {50225, "iso-2022-kr"}, /* ISO 2022 Korean */
558 {50225, "iso2022-kr"}, /* ISO 2022 Korean */
559 {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */
560 /* 50229 ISO 2022 Traditional Chinese */
561 /* 50930 EBCDIC Japanese (Katakana) Extended */
562 /* 50931 EBCDIC US-Canada and Japanese */
563 /* 50933 EBCDIC Korean Extended and Korean */
564 /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */
565 /* 50936 EBCDIC Simplified Chinese */
566 /* 50937 EBCDIC US-Canada and Traditional Chinese */
567 /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */
568 {51932, "euc-jp"}, /* EUC Japanese */
569 {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */
570 {51949, "euc-kr"}, /* EUC Korean */
571 /* 51950 EUC Traditional Chinese */
572 {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */
573 {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */
574 {57002, "x-iscii-de"}, /* ISCII Devanagari */
575 {57003, "x-iscii-be"}, /* ISCII Bengali */
576 {57004, "x-iscii-ta"}, /* ISCII Tamil */
577 {57005, "x-iscii-te"}, /* ISCII Telugu */
578 {57006, "x-iscii-as"}, /* ISCII Assamese */
579 {57007, "x-iscii-or"}, /* ISCII Oriya */
580 {57008, "x-iscii-ka"}, /* ISCII Kannada */
581 {57009, "x-iscii-ma"}, /* ISCII Malayalam */
582 {57010, "x-iscii-gu"}, /* ISCII Gujarati */
583 {57011, "x-iscii-pa"}, /* ISCII Punjabi */
584
585 {0, NULL}
586};
587
588/*
589 * SJIS SHIFTJIS table CP932 table
590 * ---- --------------------------- --------------------------------
591 * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS
592 * 7E U+203E OVERLINE U+007E TILDE
593 * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR
594 * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS
595 * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE
596 * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO
597 * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS
598 * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN
599 * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN
600 * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN
601 *
602 * EUC-JP and ISO-2022-JP should be compatible with CP932.
603 *
604 * Kernel and MLang have different Unicode mapping table. Make sure
605 * which API is used.
606 */
607static compat_t cp932_compat[] = {
608 {0x00A5, 0x005C, COMPAT_OUT},
609 {0x203E, 0x007E, COMPAT_OUT},
610 {0x2014, 0x2015, COMPAT_OUT},
611 {0x301C, 0xFF5E, COMPAT_OUT},
612 {0x2016, 0x2225, COMPAT_OUT},
613 {0x2212, 0xFF0D, COMPAT_OUT},
614 {0x00A2, 0xFFE0, COMPAT_OUT},
615 {0x00A3, 0xFFE1, COMPAT_OUT},
616 {0x00AC, 0xFFE2, COMPAT_OUT},
617 {0, 0, 0}
618};
619
620static compat_t cp20932_compat[] = {
621 {0x00A5, 0x005C, COMPAT_OUT},
622 {0x203E, 0x007E, COMPAT_OUT},
623 {0x2014, 0x2015, COMPAT_OUT},
624 {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN},
625 {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN},
626 {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN},
627 {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN},
628 {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN},
629 {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN},
630 {0, 0, 0}
631};
632
633static compat_t *cp51932_compat = cp932_compat;
634
635/* cp20932_compat for kernel. cp932_compat for mlang. */
636static compat_t *cp5022x_compat = cp932_compat;
637
638typedef HRESULT (WINAPI *CONVERTINETSTRING)(
639 LPDWORD lpdwMode,
640 DWORD dwSrcEncoding,
641 DWORD dwDstEncoding,
642 LPCSTR lpSrcStr,
643 LPINT lpnSrcSize,
644 LPBYTE lpDstStr,
645 LPINT lpnDstSize
646);
647typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)(
648 LPDWORD lpdwMode,
649 DWORD dwSrcEncoding,
650 LPCSTR lpSrcStr,
651 LPINT lpnMultiCharCount,
652 LPWSTR lpDstStr,
653 LPINT lpnWideCharCount
654);
655typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)(
656 LPDWORD lpdwMode,
657 DWORD dwEncoding,
658 LPCWSTR lpSrcStr,
659 LPINT lpnWideCharCount,
660 LPSTR lpDstStr,
661 LPINT lpnMultiCharCount
662);
663typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)(
664 DWORD dwSrcEncoding,
665 DWORD dwDstEncoding
666);
667typedef HRESULT (WINAPI *LCIDTORFC1766A)(
668 LCID Locale,
669 LPSTR pszRfc1766,
670 int nChar
671);
672typedef HRESULT (WINAPI *LCIDTORFC1766W)(
673 LCID Locale,
674 LPWSTR pszRfc1766,
675 int nChar
676);
677typedef HRESULT (WINAPI *RFC1766TOLCIDA)(
678 LCID *pLocale,
679 LPSTR pszRfc1766
680);
681typedef HRESULT (WINAPI *RFC1766TOLCIDW)(
682 LCID *pLocale,
683 LPWSTR pszRfc1766
684);
685static CONVERTINETSTRING ConvertINetString;
686static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode;
687static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte;
688static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable;
689static LCIDTORFC1766A LcidToRfc1766A;
690static RFC1766TOLCIDA Rfc1766ToLcidA;
691
692static int
693load_mlang(void)
694{
695 HMODULE h;
696 if (ConvertINetString != NULL)
697 return TRUE;
698 h = LoadLibrary(TEXT("mlang.dll"));
699 if (!h)
700 return FALSE;
701 ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString");
702 ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode");
703 ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte");
704 IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable");
705 LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A");
706 Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA");
707 return TRUE;
708}
709
710iconv_t
711iconv_open(const char *tocode, const char *fromcode)
712{
713 rec_iconv_t *cd;
714
715 cd = (rec_iconv_t *)xzalloc(sizeof(rec_iconv_t));
716
717 /* reset the errno to prevent reporting wrong error code.
718 * 0 for unsorted error. */
719 errno = 0;
720 if (win_iconv_open(cd, tocode, fromcode))
721 return (iconv_t)cd;
722
723 free(cd);
724
725 return (iconv_t)(-1);
726}
727
728int
729iconv_close(iconv_t _cd)
730{
731 rec_iconv_t *cd = (rec_iconv_t *)_cd;
732 int r = cd->iconv_close(cd->cd);
733 int e = *(cd->_errno());
734 free(cd);
735 errno = e;
736 return r;
737}
738
739size_t
740iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
741{
742 rec_iconv_t *cd = (rec_iconv_t *)_cd;
743 size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft);
744 errno = *(cd->_errno());
745 return r;
746}
747
748static int
749win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode)
750{
751 if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to))
752 return FALSE;
753 cd->iconv_close = win_iconv_close;
754 cd->iconv = win_iconv;
755 cd->_errno = _errno;
756 cd->cd = (iconv_t)cd;
757 return TRUE;
758}
759
760static int
761win_iconv_close(iconv_t cd UNUSED_PARAM)
762{
763 return 0;
764}
765
766static size_t
767win_iconv(iconv_t _cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
768{
769 rec_iconv_t *cd = (rec_iconv_t *)_cd;
770 ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */
771 int insize;
772 int outsize;
773 int wsize;
774 DWORD frommode;
775 DWORD tomode;
776 uint wc;
777 compat_t *cp;
778 int i;
779
780 if (inbuf == NULL || *inbuf == NULL)
781 {
782 if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL)
783 {
784 tomode = cd->to.mode;
785 outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft);
786 if (outsize == -1)
787 {
788 if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
789 {
790 outsize = 0;
791 }
792 else
793 {
794 cd->to.mode = tomode;
795 return (size_t)(-1);
796 }
797 }
798 *outbuf += outsize;
799 *outbytesleft -= outsize;
800 }
801 cd->from.mode = 0;
802 cd->to.mode = 0;
803 return 0;
804 }
805
806 while (*inbytesleft != 0)
807 {
808 frommode = cd->from.mode;
809 tomode = cd->to.mode;
810 wsize = MB_CHAR_MAX;
811
812 insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize);
813 if (insize == -1)
814 {
815 if (cd->to.flags & FLAG_IGNORE)
816 {
817 cd->from.mode = frommode;
818 insize = 1;
819 wsize = 0;
820 }
821 else
822 {
823 cd->from.mode = frommode;
824 return (size_t)(-1);
825 }
826 }
827
828 if (wsize == 0)
829 {
830 *inbuf += insize;
831 *inbytesleft -= insize;
832 continue;
833 }
834
835 if (cd->from.compat != NULL)
836 {
837 wc = utf16_to_ucs4(wbuf);
838 cp = cd->from.compat;
839 for (i = 0; cp[i].in != 0; ++i)
840 {
841 if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc)
842 {
843 ucs4_to_utf16(cp[i].in, wbuf, &wsize);
844 break;
845 }
846 }
847 }
848
849 if (cd->to.compat != NULL)
850 {
851 wc = utf16_to_ucs4(wbuf);
852 cp = cd->to.compat;
853 for (i = 0; cp[i].in != 0; ++i)
854 {
855 if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc)
856 {
857 ucs4_to_utf16(cp[i].out, wbuf, &wsize);
858 break;
859 }
860 }
861 }
862
863 outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft);
864 if (outsize == -1)
865 {
866 if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG)
867 {
868 cd->to.mode = tomode;
869 outsize = 0;
870 }
871 else
872 {
873 cd->from.mode = frommode;
874 cd->to.mode = tomode;
875 return (size_t)(-1);
876 }
877 }
878
879 *inbuf += insize;
880 *outbuf += outsize;
881 *inbytesleft -= insize;
882 *outbytesleft -= outsize;
883 }
884
885 return 0;
886}
887
888static int
889make_csconv(const char *_name, csconv_t *cv)
890{
891 CPINFO cpinfo;
892 int use_compat = TRUE;
893 int flag = 0;
894 char *name;
895 char *p;
896
897 name = xstrndup(_name, strlen(_name));
898 if (name == NULL)
899 return FALSE;
900
901 /* check for option "enc_name//opt1//opt2" */
902 while ((p = strrstr(name, "//")) != NULL)
903 {
904 if (_stricmp(p + 2, "nocompat") == 0)
905 use_compat = FALSE;
906 else if (_stricmp(p + 2, "translit") == 0)
907 flag |= FLAG_TRANSLIT;
908 else if (_stricmp(p + 2, "ignore") == 0)
909 flag |= FLAG_IGNORE;
910 *p = 0;
911 }
912
913 cv->mode = 0;
914 cv->flags = flag;
915 cv->mblen = NULL;
916 cv->flush = NULL;
917 cv->compat = NULL;
918 cv->codepage = name_to_codepage(name);
919 if (cv->codepage == 1200 || cv->codepage == 1201)
920 {
921 cv->mbtowc = utf16_mbtowc;
922 cv->wctomb = utf16_wctomb;
923 if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 ||
924 _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0 ||
925 _stricmp(name,"UCS-2-INTERNAL") == 0)
926 cv->flags |= FLAG_USE_BOM;
927 }
928 else if (cv->codepage == 12000 || cv->codepage == 12001)
929 {
930 cv->mbtowc = utf32_mbtowc;
931 cv->wctomb = utf32_wctomb;
932 if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 ||
933 _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0)
934 cv->flags |= FLAG_USE_BOM;
935 }
936 else if (cv->codepage == 65001)
937 {
938 cv->mbtowc = kernel_mbtowc;
939 cv->wctomb = kernel_wctomb;
940 cv->mblen = utf8_mblen;
941 }
942 else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang())
943 {
944 cv->mbtowc = iso2022jp_mbtowc;
945 cv->wctomb = iso2022jp_wctomb;
946 cv->flush = iso2022jp_flush;
947 }
948 else if (cv->codepage == 51932 && load_mlang())
949 {
950 cv->mbtowc = mlang_mbtowc;
951 cv->wctomb = mlang_wctomb;
952 cv->mblen = eucjp_mblen;
953 }
954 else if (IsValidCodePage(cv->codepage)
955 && GetCPInfo(cv->codepage, &cpinfo) != 0)
956 {
957 cv->mbtowc = kernel_mbtowc;
958 cv->wctomb = kernel_wctomb;
959 if (cpinfo.MaxCharSize == 1)
960 cv->mblen = sbcs_mblen;
961 else if (cpinfo.MaxCharSize == 2)
962 cv->mblen = dbcs_mblen;
963 else
964 cv->mblen = mbcs_mblen;
965 }
966 else
967 {
968 /* not supported */
969 free(name);
970 errno = EINVAL;
971 return FALSE;
972 }
973
974 if (use_compat)
975 {
976 switch (cv->codepage)
977 {
978 case 932: cv->compat = cp932_compat; break;
979 case 20932: cv->compat = cp20932_compat; break;
980 case 51932: cv->compat = cp51932_compat; break;
981 case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break;
982 }
983 }
984
985 free(name);
986
987 return TRUE;
988}
989
990static int
991name_to_codepage(const char *name)
992{
993 int i;
994
995 if (*name == '\0' ||
996 strcmp(name, "char") == 0)
997 return GetACP();
998 else if (strcmp(name, "wchar_t") == 0)
999 return 1200;
1000 else if (_strnicmp(name, "cp", 2) == 0)
1001 return atoi(name + 2); /* CP123 */
1002 else if ('0' <= name[0] && name[0] <= '9')
1003 return atoi(name); /* 123 */
1004 else if (_strnicmp(name, "xx", 2) == 0)
1005 return atoi(name + 2); /* XX123 for debug */
1006
1007 for (i = 0; codepage_alias[i].name != NULL; ++i)
1008 if (_stricmp(name, codepage_alias[i].name) == 0)
1009 return codepage_alias[i].codepage;
1010 return -1;
1011}
1012
1013/*
1014 * http://www.faqs.org/rfcs/rfc2781.html
1015 */
1016static uint
1017utf16_to_ucs4(const ushort *wbuf)
1018{
1019 uint wc = wbuf[0];
1020 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1021 wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000;
1022 return wc;
1023}
1024
1025static void
1026ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize)
1027{
1028 if (wc < 0x10000)
1029 {
1030 wbuf[0] = wc;
1031 *wbufsize = 1;
1032 }
1033 else
1034 {
1035 wc -= 0x10000;
1036 wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF);
1037 wbuf[1] = 0xDC00 | (wc & 0x3FF);
1038 *wbufsize = 2;
1039 }
1040}
1041
1042/*
1043 * Check if codepage is one of those for which the dwFlags parameter
1044 * to MultiByteToWideChar() must be zero. Return zero or
1045 * MB_ERR_INVALID_CHARS. The docs in Platform SDK for Windows
1046 * Server 2003 R2 claims that also codepage 65001 is one of these, but
1047 * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
1048 * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
1049 * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
1050 * from UTF-8.
1051 */
1052static int
1053mbtowc_flags(int codepage)
1054{
1055 return (codepage == 50220 || codepage == 50221 ||
1056 codepage == 50222 || codepage == 50225 ||
1057 codepage == 50227 || codepage == 50229 ||
1058 codepage == 52936 || codepage == 54936 ||
1059 (codepage >= 57002 && codepage <= 57011) ||
1060 codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
1061}
1062
1063/*
1064 * Check if codepage is one those for which the lpUsedDefaultChar
1065 * parameter to WideCharToMultiByte() must be NULL. The docs in
1066 * Platform SDK for Windows Server 2003 R2 claims that this is the
1067 * list below, while the MSDN docs for MSVS2008 claim that it is only
1068 * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
1069 * SDK seems to be correct, at least for XP.
1070 */
1071static int
1072must_use_null_useddefaultchar(int codepage)
1073{
1074 return (codepage == 65000 || codepage == 65001 ||
1075 codepage == 50220 || codepage == 50221 ||
1076 codepage == 50222 || codepage == 50225 ||
1077 codepage == 50227 || codepage == 50229 ||
1078 codepage == 52936 || codepage == 54936 ||
1079 (codepage >= 57002 && codepage <= 57011) ||
1080 codepage == 42);
1081}
1082
1083static int
1084seterror(int err)
1085{
1086 errno = err;
1087 return -1;
1088}
1089
1090static int
1091sbcs_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf UNUSED_PARAM,
1092 int bufsize UNUSED_PARAM)
1093{
1094 return 1;
1095}
1096
1097static int
1098dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1099{
1100 int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1;
1101 if (bufsize < len)
1102 return seterror(EINVAL);
1103 return len;
1104}
1105
1106static int
1107mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
1108{
1109 int len = 0;
1110
1111 if (cv->codepage == 54936) {
1112 if (buf[0] <= 0x7F)
1113 len = 1;
1114 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1115 bufsize >= 2 &&
1116 ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
1117 (buf[1] >= 0x80 && buf[1] <= 0xFE)))
1118 len = 2;
1119 else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
1120 bufsize >= 4 &&
1121 buf[1] >= 0x30 && buf[1] <= 0x39)
1122 len = 4;
1123 else
1124 return seterror(EINVAL);
1125 return len;
1126 }
1127 else
1128 return seterror(EINVAL);
1129}
1130
1131static int
1132utf8_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize)
1133{
1134 int len = 0;
1135
1136 if (buf[0] < 0x80) len = 1;
1137 else if ((buf[0] & 0xE0) == 0xC0) len = 2;
1138 else if ((buf[0] & 0xF0) == 0xE0) len = 3;
1139 else if ((buf[0] & 0xF8) == 0xF0) len = 4;
1140 else if ((buf[0] & 0xFC) == 0xF8) len = 5;
1141 else if ((buf[0] & 0xFE) == 0xFC) len = 6;
1142
1143 if (len == 0)
1144 return seterror(EILSEQ);
1145 else if (bufsize < len)
1146 return seterror(EINVAL);
1147 return len;
1148}
1149
1150static int
1151eucjp_mblen(csconv_t *cv UNUSED_PARAM, const uchar *buf, int bufsize)
1152{
1153 if (buf[0] < 0x80) /* ASCII */
1154 return 1;
1155 else if (buf[0] == 0x8E) /* JIS X 0201 */
1156 {
1157 if (bufsize < 2)
1158 return seterror(EINVAL);
1159 else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF))
1160 return seterror(EILSEQ);
1161 return 2;
1162 }
1163 else if (buf[0] == 0x8F) /* JIS X 0212 */
1164 {
1165 if (bufsize < 3)
1166 return seterror(EINVAL);
1167 else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE)
1168 || !(0xA1 <= buf[2] && buf[2] <= 0xFE))
1169 return seterror(EILSEQ);
1170 return 3;
1171 }
1172 else /* JIS X 0208 */
1173 {
1174 if (bufsize < 2)
1175 return seterror(EINVAL);
1176 else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE)
1177 || !(0xA1 <= buf[1] && buf[1] <= 0xFE))
1178 return seterror(EILSEQ);
1179 return 2;
1180 }
1181}
1182
1183static int
1184kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1185{
1186 int len;
1187
1188 len = cv->mblen(cv, buf, bufsize);
1189 if (len == -1)
1190 return -1;
1191 /* If converting from ASCII, reject 8bit
1192 * chars. MultiByteToWideChar() doesn't. Note that for ASCII we
1193 * know that the mblen function is sbcs_mblen() so len is 1.
1194 */
1195 if (cv->codepage == 20127 && buf[0] >= 0x80)
1196 return seterror(EILSEQ);
1197 *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
1198 (const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
1199 if (*wbufsize == 0)
1200 return seterror(EILSEQ);
1201 return len;
1202}
1203
1204static int
1205kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1206{
1207 BOOL usedDefaultChar = 0;
1208 BOOL *p = NULL;
1209 int flags = 0;
1210 int len;
1211
1212 if (bufsize == 0)
1213 return seterror(E2BIG);
1214 if (!must_use_null_useddefaultchar(cv->codepage))
1215 {
1216 p = &usedDefaultChar;
1217#ifdef WC_NO_BEST_FIT_CHARS
1218 if (!(cv->flags & FLAG_TRANSLIT))
1219 flags |= WC_NO_BEST_FIT_CHARS;
1220#endif
1221 }
1222 len = WideCharToMultiByte(cv->codepage, flags,
1223 (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p);
1224 if (len == 0)
1225 {
1226 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
1227 return seterror(E2BIG);
1228 return seterror(EILSEQ);
1229 }
1230 else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT))
1231 return seterror(EILSEQ);
1232 else if (cv->mblen(cv, buf, len) != len) /* validate result */
1233 return seterror(EILSEQ);
1234 return len;
1235}
1236
1237/*
1238 * It seems that the mode (cv->mode) is fixnum.
1239 * For example, when converting iso-2022-jp(cp50221) to unicode:
1240 * in ascii sequence: mode=0xC42C0000
1241 * in jisx0208 sequence: mode=0xC42C0001
1242 * "C42C" is same for each convert session.
1243 * It should be: ((codepage-1)<<16)|state
1244 */
1245static int
1246mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1247{
1248 int len;
1249 int insize;
1250 HRESULT hr;
1251
1252 len = cv->mblen(cv, buf, bufsize);
1253 if (len == -1)
1254 return -1;
1255 insize = len;
1256 hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage,
1257 (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize);
1258 if (hr != S_OK || insize != len)
1259 return seterror(EILSEQ);
1260 return len;
1261}
1262
1263static int
1264mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1265{
1266 char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */
1267 int tmpsize = MB_CHAR_MAX;
1268 int insize = wbufsize;
1269 HRESULT hr;
1270
1271 hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage,
1272 (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize);
1273 if (hr != S_OK || insize != wbufsize)
1274 return seterror(EILSEQ);
1275 else if (bufsize < tmpsize)
1276 return seterror(E2BIG);
1277 else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize)
1278 return seterror(EILSEQ);
1279 memcpy(buf, tmpbuf, tmpsize);
1280 return tmpsize;
1281}
1282
1283static int
1284utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1285{
1286 int codepage = cv->codepage;
1287
1288 /* swap endian: 1200 <-> 1201 */
1289 if (cv->mode & UNICODE_MODE_SWAPPED)
1290 codepage ^= 1;
1291
1292 if (bufsize < 2)
1293 return seterror(EINVAL);
1294 if (codepage == 1200) /* little endian */
1295 wbuf[0] = (buf[1] << 8) | buf[0];
1296 else if (codepage == 1201) /* big endian */
1297 wbuf[0] = (buf[0] << 8) | buf[1];
1298
1299 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1300 {
1301 cv->mode |= UNICODE_MODE_BOM_DONE;
1302 if (wbuf[0] == 0xFFFE)
1303 {
1304 cv->mode |= UNICODE_MODE_SWAPPED;
1305 *wbufsize = 0;
1306 return 2;
1307 }
1308 else if (wbuf[0] == 0xFEFF)
1309 {
1310 *wbufsize = 0;
1311 return 2;
1312 }
1313 }
1314
1315 if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF)
1316 return seterror(EILSEQ);
1317 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1318 {
1319 if (bufsize < 4)
1320 return seterror(EINVAL);
1321 if (codepage == 1200) /* little endian */
1322 wbuf[1] = (buf[3] << 8) | buf[2];
1323 else if (codepage == 1201) /* big endian */
1324 wbuf[1] = (buf[2] << 8) | buf[3];
1325 if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF))
1326 return seterror(EILSEQ);
1327 *wbufsize = 2;
1328 return 4;
1329 }
1330 *wbufsize = 1;
1331 return 2;
1332}
1333
1334static int
1335utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1336{
1337 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1338 {
1339 int r;
1340
1341 cv->mode |= UNICODE_MODE_BOM_DONE;
1342 if (bufsize < 2)
1343 return seterror(E2BIG);
1344 if (cv->codepage == 1200) /* little endian */
1345 memcpy(buf, "\xFF\xFE", 2);
1346 else if (cv->codepage == 1201) /* big endian */
1347 memcpy(buf, "\xFE\xFF", 2);
1348
1349 r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2);
1350 if (r == -1)
1351 return -1;
1352 return r + 2;
1353 }
1354
1355 if (bufsize < 2)
1356 return seterror(E2BIG);
1357 if (cv->codepage == 1200) /* little endian */
1358 {
1359 buf[0] = (wbuf[0] & 0x00FF);
1360 buf[1] = (wbuf[0] & 0xFF00) >> 8;
1361 }
1362 else if (cv->codepage == 1201) /* big endian */
1363 {
1364 buf[0] = (wbuf[0] & 0xFF00) >> 8;
1365 buf[1] = (wbuf[0] & 0x00FF);
1366 }
1367 if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF)
1368 {
1369 if (bufsize < 4)
1370 return seterror(E2BIG);
1371 if (cv->codepage == 1200) /* little endian */
1372 {
1373 buf[2] = (wbuf[1] & 0x00FF);
1374 buf[3] = (wbuf[1] & 0xFF00) >> 8;
1375 }
1376 else if (cv->codepage == 1201) /* big endian */
1377 {
1378 buf[2] = (wbuf[1] & 0xFF00) >> 8;
1379 buf[3] = (wbuf[1] & 0x00FF);
1380 }
1381 return 4;
1382 }
1383 return 2;
1384}
1385
1386static int
1387utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1388{
1389 int codepage = cv->codepage;
1390 uint wc = 0xD800;
1391
1392 /* swap endian: 12000 <-> 12001 */
1393 if (cv->mode & UNICODE_MODE_SWAPPED)
1394 codepage ^= 1;
1395
1396 if (bufsize < 4)
1397 return seterror(EINVAL);
1398 if (codepage == 12000) /* little endian */
1399 wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
1400 else if (codepage == 12001) /* big endian */
1401 wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
1402
1403 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1404 {
1405 cv->mode |= UNICODE_MODE_BOM_DONE;
1406 if (wc == 0xFFFE0000)
1407 {
1408 cv->mode |= UNICODE_MODE_SWAPPED;
1409 *wbufsize = 0;
1410 return 4;
1411 }
1412 else if (wc == 0x0000FEFF)
1413 {
1414 *wbufsize = 0;
1415 return 4;
1416 }
1417 }
1418
1419 if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc)
1420 return seterror(EILSEQ);
1421 ucs4_to_utf16(wc, wbuf, wbufsize);
1422 return 4;
1423}
1424
1425static int
1426utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1427{
1428 uint wc;
1429
1430 if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE))
1431 {
1432 int r;
1433
1434 cv->mode |= UNICODE_MODE_BOM_DONE;
1435 if (bufsize < 4)
1436 return seterror(E2BIG);
1437 if (cv->codepage == 12000) /* little endian */
1438 memcpy(buf, "\xFF\xFE\x00\x00", 4);
1439 else if (cv->codepage == 12001) /* big endian */
1440 memcpy(buf, "\x00\x00\xFE\xFF", 4);
1441
1442 r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4);
1443 if (r == -1)
1444 return -1;
1445 return r + 4;
1446 }
1447
1448 if (bufsize < 4)
1449 return seterror(E2BIG);
1450 wc = utf16_to_ucs4(wbuf);
1451 if (cv->codepage == 12000) /* little endian */
1452 {
1453 buf[0] = wc & 0x000000FF;
1454 buf[1] = (wc & 0x0000FF00) >> 8;
1455 buf[2] = (wc & 0x00FF0000) >> 16;
1456 buf[3] = (wc & 0xFF000000) >> 24;
1457 }
1458 else if (cv->codepage == 12001) /* big endian */
1459 {
1460 buf[0] = (wc & 0xFF000000) >> 24;
1461 buf[1] = (wc & 0x00FF0000) >> 16;
1462 buf[2] = (wc & 0x0000FF00) >> 8;
1463 buf[3] = wc & 0x000000FF;
1464 }
1465 return 4;
1466}
1467
1468/*
1469 * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
1470 * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow
1471 * 1 byte Kana)
1472 * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte
1473 * Kana - SO/SI)
1474 *
1475 * MultiByteToWideChar() and WideCharToMultiByte() behave differently
1476 * depending on Windows version. On XP, WideCharToMultiByte() doesn't
1477 * terminate result sequence with ascii escape. But Vista does.
1478 * Use MLang instead.
1479 */
1480
1481#define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift))
1482#define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF)
1483#define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF)
1484
1485#define ISO2022_SI 0
1486#define ISO2022_SO 1
1487
1488/* shift in */
1489static const char iso2022_SI_seq[] = "\x0F";
1490/* shift out */
1491static const char iso2022_SO_seq[] = "\x0E";
1492
1493typedef struct iso2022_esc_t iso2022_esc_t;
1494struct iso2022_esc_t {
1495 const char *esc;
1496 int esc_len;
1497 int len;
1498 int cs;
1499};
1500
1501#define ISO2022JP_CS_ASCII 0
1502#define ISO2022JP_CS_JISX0201_ROMAN 1
1503#define ISO2022JP_CS_JISX0201_KANA 2
1504#define ISO2022JP_CS_JISX0208_1978 3
1505#define ISO2022JP_CS_JISX0208_1983 4
1506#define ISO2022JP_CS_JISX0212 5
1507
1508static iso2022_esc_t iso2022jp_esc[] = {
1509 {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII},
1510 {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN},
1511 {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA},
1512 {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */
1513 {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983},
1514 {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212},
1515 {NULL, 0, 0, 0}
1516};
1517
1518static int
1519iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize)
1520{
1521 iso2022_esc_t *iesc = iso2022jp_esc;
1522 char tmp[MB_CHAR_MAX];
1523 int insize;
1524 HRESULT hr;
1525 DWORD dummy = 0;
1526 int len;
1527 int esc_len;
1528 int cs;
1529 int shift;
1530 int i;
1531
1532 if (buf[0] == 0x1B)
1533 {
1534 for (i = 0; iesc[i].esc != NULL; ++i)
1535 {
1536 esc_len = iesc[i].esc_len;
1537 if (bufsize < esc_len)
1538 {
1539 if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0)
1540 return seterror(EINVAL);
1541 }
1542 else
1543 {
1544 if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0)
1545 {
1546 cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI);
1547 *wbufsize = 0;
1548 return esc_len;
1549 }
1550 }
1551 }
1552 /* not supported escape sequence */
1553 return seterror(EILSEQ);
1554 }
1555 else if (buf[0] == iso2022_SO_seq[0])
1556 {
1557 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO);
1558 *wbufsize = 0;
1559 return 1;
1560 }
1561 else if (buf[0] == iso2022_SI_seq[0])
1562 {
1563 cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI);
1564 *wbufsize = 0;
1565 return 1;
1566 }
1567
1568 cs = ISO2022_MODE_CS(cv->mode);
1569 shift = ISO2022_MODE_SHIFT(cv->mode);
1570
1571 /* reset the mode for informal sequence */
1572 if (buf[0] < 0x20)
1573 {
1574 cs = ISO2022JP_CS_ASCII;
1575 shift = ISO2022_SI;
1576 }
1577
1578 len = iesc[cs].len;
1579 if (bufsize < len)
1580 return seterror(EINVAL);
1581 for (i = 0; i < len; ++i)
1582 if (!(buf[i] < 0x80))
1583 return seterror(EILSEQ);
1584 esc_len = iesc[cs].esc_len;
1585 memcpy(tmp, iesc[cs].esc, esc_len);
1586 if (shift == ISO2022_SO)
1587 {
1588 memcpy(tmp + esc_len, iso2022_SO_seq, 1);
1589 esc_len += 1;
1590 }
1591 memcpy(tmp + esc_len, buf, len);
1592
1593 if ((cv->codepage == 50220 || cv->codepage == 50221
1594 || cv->codepage == 50222) && shift == ISO2022_SO)
1595 {
1596 /* XXX: shift-out cannot be used for mbtowc (both kernel and
1597 * mlang) */
1598 esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len;
1599 memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len);
1600 memcpy(tmp + esc_len, buf, len);
1601 }
1602
1603 insize = len + esc_len;
1604 hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage,
1605 (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize);
1606 if (hr != S_OK || insize != len + esc_len)
1607 return seterror(EILSEQ);
1608
1609 /* Check for conversion error. Assuming defaultChar is 0x3F. */
1610 /* ascii should be converted from ascii */
1611 if (wbuf[0] == buf[0]
1612 && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1613 return seterror(EILSEQ);
1614
1615 /* reset the mode for informal sequence */
1616 if (cv->mode != ISO2022_MODE(cs, shift))
1617 cv->mode = ISO2022_MODE(cs, shift);
1618
1619 return len;
1620}
1621
1622static int
1623iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize)
1624{
1625 iso2022_esc_t *iesc = iso2022jp_esc;
1626 char tmp[MB_CHAR_MAX];
1627 int tmpsize = MB_CHAR_MAX;
1628 int insize = wbufsize;
1629 HRESULT hr;
1630 DWORD dummy = 0;
1631 int len;
1632 int esc_len;
1633 int cs;
1634 int shift;
1635 int i;
1636
1637 /*
1638 * MultiByte = [escape sequence] + character + [escape sequence]
1639 *
1640 * Whether trailing escape sequence is added depends on which API is
1641 * used (kernel or MLang, and its version).
1642 */
1643 hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage,
1644 (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize);
1645 if (hr != S_OK || insize != wbufsize)
1646 return seterror(EILSEQ);
1647 else if (bufsize < tmpsize)
1648 return seterror(E2BIG);
1649
1650 if (tmpsize == 1)
1651 {
1652 cs = ISO2022JP_CS_ASCII;
1653 esc_len = 0;
1654 }
1655 else
1656 {
1657 for (i = 1; iesc[i].esc != NULL; ++i)
1658 {
1659 esc_len = iesc[i].esc_len;
1660 if (strncmp(tmp, iesc[i].esc, esc_len) == 0)
1661 {
1662 cs = iesc[i].cs;
1663 break;
1664 }
1665 }
1666 if (iesc[i].esc == NULL)
1667 /* not supported escape sequence */
1668 return seterror(EILSEQ);
1669 }
1670
1671 shift = ISO2022_SI;
1672 if (tmp[esc_len] == iso2022_SO_seq[0])
1673 {
1674 shift = ISO2022_SO;
1675 esc_len += 1;
1676 }
1677
1678 len = iesc[cs].len;
1679
1680 /* Check for converting error. Assuming defaultChar is 0x3F. */
1681 /* ascii should be converted from ascii */
1682 if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80))
1683 return seterror(EILSEQ);
1684 else if (tmpsize < esc_len + len)
1685 return seterror(EILSEQ);
1686
1687 if (cv->mode == ISO2022_MODE(cs, shift))
1688 {
1689 /* remove escape sequence */
1690 if (esc_len != 0)
1691 memmove(tmp, tmp + esc_len, len);
1692 esc_len = 0;
1693 }
1694 else
1695 {
1696 if (cs == ISO2022JP_CS_ASCII)
1697 {
1698 esc_len = iesc[ISO2022JP_CS_ASCII].esc_len;
1699 memmove(tmp + esc_len, tmp, len);
1700 memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len);
1701 }
1702 if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO)
1703 {
1704 /* shift-in before changing to other mode */
1705 memmove(tmp + 1, tmp, len + esc_len);
1706 memcpy(tmp, iso2022_SI_seq, 1);
1707 esc_len += 1;
1708 }
1709 }
1710
1711 if (bufsize < len + esc_len)
1712 return seterror(E2BIG);
1713 memcpy(buf, tmp, len + esc_len);
1714 cv->mode = ISO2022_MODE(cs, shift);
1715 return len + esc_len;
1716}
1717
1718static int
1719iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize)
1720{
1721 iso2022_esc_t *iesc = iso2022jp_esc;
1722 int esc_len;
1723
1724 if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI))
1725 {
1726 esc_len = 0;
1727 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1728 esc_len += 1;
1729 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1730 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1731 if (bufsize < esc_len)
1732 return seterror(E2BIG);
1733
1734 esc_len = 0;
1735 if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI)
1736 {
1737 memcpy(buf, iso2022_SI_seq, 1);
1738 esc_len += 1;
1739 }
1740 if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII)
1741 {
1742 memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc,
1743 iesc[ISO2022JP_CS_ASCII].esc_len);
1744 esc_len += iesc[ISO2022JP_CS_ASCII].esc_len;
1745 }
1746 return esc_len;
1747 }
1748 return 0;
1749}
1750
1751static void process_file(iconv_t cd, FILE *in, FILE *out)
1752{
1753 char inbuf[BUFSIZ];
1754 char outbuf[BUFSIZ];
1755 const char *pin;
1756 char *pout;
1757 size_t inbytesleft;
1758 size_t outbytesleft;
1759 size_t rest = 0;
1760 size_t r;
1761
1762 while ((inbytesleft=fread(inbuf+rest, 1, sizeof(inbuf)-rest, in)) != 0
1763 || rest != 0) {
1764 inbytesleft += rest;
1765 pin = inbuf;
1766 pout = outbuf;
1767 outbytesleft = sizeof(outbuf);
1768 r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft);
1769 fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out);
1770 if (r == (size_t)(-1) && errno != E2BIG &&
1771 (errno != EINVAL || feof(in)))
1772 bb_perror_msg_and_die("conversion error");
1773 memmove(inbuf, pin, inbytesleft);
1774 rest = inbytesleft;
1775 }
1776 pout = outbuf;
1777 outbytesleft = sizeof(outbuf);
1778 r = iconv(cd, NULL, NULL, &pout, &outbytesleft);
1779 fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out);
1780 if (r == (size_t)(-1))
1781 bb_perror_msg_and_die("conversion error");
1782}
1783
1784int iconv_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1785int iconv_main(int argc, char **argv)
1786{
1787 char *fromcode = NULL;
1788 char *tocode = NULL;
1789 int i;
1790 iconv_t cd;
1791 FILE *in = stdin;
1792 FILE *out = stdout;
1793 int ignore = 0;
1794
1795 while ((i = getopt(argc, argv, "f:t:lco:")) != -1) {
1796 switch (i) {
1797 case 'l':
1798 for (i = 0; codepage_alias[i].name != NULL; ++i)
1799 printf("%s\n", codepage_alias[i].name);
1800 return 0;
1801
1802 case 'f':
1803 fromcode = optarg;
1804 break;
1805
1806 case 't':
1807 tocode = optarg;
1808 break;
1809
1810 case 'c':
1811 ignore = 1;
1812 break;
1813
1814 case 'o':
1815 out = xfopen(optarg, "wb");
1816 break;
1817
1818 default:
1819 bb_show_usage();
1820 }
1821 }
1822
1823 if (fromcode == NULL || tocode == NULL)
1824 bb_show_usage();
1825
1826 if (ignore)
1827 tocode = xasprintf("%s//IGNORE", tocode);
1828
1829 cd = iconv_open(tocode, fromcode);
1830 if (cd == (iconv_t)(-1))
1831 bb_perror_msg_and_die("iconv_open error");
1832
1833 if (optind == argc ||
1834 (optind == argc-1 && strcmp(argv[optind], "-") == 0)) {
1835 process_file(cd, in, out);
1836 }
1837 else {
1838 for (i=optind; i<argc; ++i) {
1839 in = xfopen(argv[i], "rb");
1840 process_file(cd, in, out);
1841 fclose(in);
1842 }
1843 }
1844
1845 iconv_close(cd);
1846 return 0;
1847}
diff --git a/miscutils/less.c b/miscutils/less.c
index 223c2558d..a5ce14c91 100644
--- a/miscutils/less.c
+++ b/miscutils/less.c
@@ -145,6 +145,10 @@
145 145
146#include <sched.h> /* sched_yield() */ 146#include <sched.h> /* sched_yield() */
147 147
148#if ENABLE_PLATFORM_MINGW32
149#include <conio.h>
150#endif
151
148#include "libbb.h" 152#include "libbb.h"
149#include "common_bufsiz.h" 153#include "common_bufsiz.h"
150#if ENABLE_FEATURE_LESS_REGEXP 154#if ENABLE_FEATURE_LESS_REGEXP
@@ -236,7 +240,9 @@ struct globals {
236 smallint winsize_err; 240 smallint winsize_err;
237#endif 241#endif
238 smallint terminated; 242 smallint terminated;
243#if !ENABLE_PLATFORM_MINGW32
239 struct termios term_orig, term_less; 244 struct termios term_orig, term_less;
245#endif
240 char kbd_input[KEYCODE_BUFFER_SIZE]; 246 char kbd_input[KEYCODE_BUFFER_SIZE];
241}; 247};
242#define G (*ptr_to_globals) 248#define G (*ptr_to_globals)
@@ -298,7 +304,9 @@ struct globals {
298static void set_tty_cooked(void) 304static void set_tty_cooked(void)
299{ 305{
300 fflush_all(); 306 fflush_all();
307#if !ENABLE_PLATFORM_MINGW32
301 tcsetattr(kbd_fd, TCSANOW, &term_orig); 308 tcsetattr(kbd_fd, TCSANOW, &term_orig);
309#endif
302} 310}
303 311
304/* Move the cursor to a position (x,y), where (0,0) is the 312/* Move the cursor to a position (x,y), where (0,0) is the
@@ -330,7 +338,11 @@ static void less_exit(int code)
330 set_tty_cooked(); 338 set_tty_cooked();
331 if (!(G.kbd_fd_orig_flags & O_NONBLOCK)) 339 if (!(G.kbd_fd_orig_flags & O_NONBLOCK))
332 ndelay_off(kbd_fd); 340 ndelay_off(kbd_fd);
341#if !ENABLE_PLATFORM_MINGW32
333 clear_line(); 342 clear_line();
343#else
344 printf(ESC"[?1049l");
345#endif
334 if (code < 0) 346 if (code < 0)
335 kill_myself_with_sig(- code); /* does not return */ 347 kill_myself_with_sig(- code); /* does not return */
336 exit(code); 348 exit(code);
@@ -575,6 +587,11 @@ static void read_lines(void)
575 last_line_pos = 0; 587 last_line_pos = 0;
576 break; 588 break;
577 } 589 }
590#if ENABLE_PLATFORM_MINGW32
591 if (c == '\r') {
592 continue;
593 }
594#endif
578 /* NUL is substituted by '\n'! */ 595 /* NUL is substituted by '\n'! */
579 if (c == '\0') c = '\n'; 596 if (c == '\0') c = '\n';
580 *p++ = c; 597 *p++ = c;
@@ -671,7 +688,12 @@ static void update_num_lines(void)
671 /* only do this for regular files */ 688 /* only do this for regular files */
672 if (num_lines == REOPEN_AND_COUNT || num_lines == REOPEN_STDIN) { 689 if (num_lines == REOPEN_AND_COUNT || num_lines == REOPEN_STDIN) {
673 count = 0; 690 count = 0;
691#if !ENABLE_PLATFORM_MINGW32
674 fd = open("/proc/self/fd/0", O_RDONLY); 692 fd = open("/proc/self/fd/0", O_RDONLY);
693#else
694 /* don't even try to access /proc on WIN32 */
695 fd = -1;
696#endif
675 if (fd < 0 && num_lines == REOPEN_AND_COUNT) { 697 if (fd < 0 && num_lines == REOPEN_AND_COUNT) {
676 /* "filename" is valid only if REOPEN_AND_COUNT */ 698 /* "filename" is valid only if REOPEN_AND_COUNT */
677 fd = open(filename, O_RDONLY); 699 fd = open(filename, O_RDONLY);
@@ -854,7 +876,12 @@ static void print_found(const char *line)
854 match_status = 1; 876 match_status = 1;
855 } 877 }
856 878
879#if !ENABLE_PLATFORM_MINGW32
857 printf("%s%s\n", growline ? growline : "", str); 880 printf("%s%s\n", growline ? growline : "", str);
881#else
882 /* skip newline, we use explicit positioning on WIN32 */
883 printf("%s%s", growline ? growline : "", str);
884#endif
858 free(growline); 885 free(growline);
859} 886}
860#else 887#else
@@ -890,7 +917,12 @@ static void print_ascii(const char *str)
890 *p = '\0'; 917 *p = '\0';
891 print_hilite(buf); 918 print_hilite(buf);
892 } 919 }
920#if !ENABLE_PLATFORM_MINGW32
893 puts(str); 921 puts(str);
922#else
923 /* skip newline, we use explicit positioning on WIN32 */
924 printf("%s", str);
925#endif
894} 926}
895 927
896/* Print the buffer */ 928/* Print the buffer */
@@ -900,6 +932,10 @@ static void buffer_print(void)
900 932
901 move_cursor(0, 0); 933 move_cursor(0, 0);
902 for (i = 0; i <= max_displayed_line; i++) { 934 for (i = 0; i <= max_displayed_line; i++) {
935#if ENABLE_PLATFORM_MINGW32
936 /* make sure we're on the right line */
937 move_cursor(i+1, 0);
938#endif
903 printf(CLEAR_2_EOL); 939 printf(CLEAR_2_EOL);
904 if (option_mask32 & FLAG_N) 940 if (option_mask32 & FLAG_N)
905 print_lineno(buffer[i]); 941 print_lineno(buffer[i]);
@@ -1087,9 +1123,13 @@ static void reinitialize(void)
1087 if (G.winsize_err) 1123 if (G.winsize_err)
1088 printf(ESC"[999;999H" ESC"[6n"); 1124 printf(ESC"[999;999H" ESC"[6n");
1089#endif 1125#endif
1126#if ENABLE_PLATFORM_MINGW32
1127 printf(ESC"[?1049h");
1128#endif
1090 buffer_fill_and_print(); 1129 buffer_fill_and_print();
1091} 1130}
1092 1131
1132#if !ENABLE_PLATFORM_MINGW32
1093static int64_t getch_nowait(void) 1133static int64_t getch_nowait(void)
1094{ 1134{
1095 int rd; 1135 int rd;
@@ -1151,6 +1191,46 @@ static int64_t getch_nowait(void)
1151 set_tty_cooked(); 1191 set_tty_cooked();
1152 return key64; 1192 return key64;
1153} 1193}
1194#else
1195static int64_t getch_nowait(void)
1196{
1197 int64_t c;
1198
1199retry:
1200 c = _getch();
1201 if (c == 0 || c == 0xe0) {
1202 switch (_getch()) {
1203 case 0x48:
1204 c = KEYCODE_UP;
1205 break;
1206 case 0x50:
1207 c = KEYCODE_DOWN;
1208 break;
1209 case 0x49:
1210 c = KEYCODE_PAGEUP;
1211 break;
1212 case 0x51:
1213 c = KEYCODE_PAGEDOWN;
1214 break;
1215 case 0x47:
1216 c = KEYCODE_HOME;
1217 break;
1218 case 0x4f:
1219 c = KEYCODE_END;
1220 break;
1221 default:
1222 goto retry;
1223 }
1224 }
1225
1226 /* Position cursor if line input is done */
1227 if (less_gets_pos >= 0)
1228 move_cursor(max_displayed_line + 2, less_gets_pos + 1);
1229 fflush_all();
1230
1231 return c;
1232}
1233#endif
1154 1234
1155/* Grab a character from input without requiring the return key. 1235/* Grab a character from input without requiring the return key.
1156 * May return KEYCODE_xxx values. 1236 * May return KEYCODE_xxx values.
@@ -1791,10 +1871,12 @@ static void keypress_process(int keypress)
1791 number_process(keypress); 1871 number_process(keypress);
1792} 1872}
1793 1873
1874#if !ENABLE_PLATFORM_MINGW32
1794static void sig_catcher(int sig) 1875static void sig_catcher(int sig)
1795{ 1876{
1796 less_exit(- sig); 1877 less_exit(- sig);
1797} 1878}
1879#endif
1798 1880
1799#if ENABLE_FEATURE_LESS_WINCH 1881#if ENABLE_FEATURE_LESS_WINCH
1800static void sigwinch_handler(int sig UNUSED_PARAM) 1882static void sigwinch_handler(int sig UNUSED_PARAM)
@@ -1806,7 +1888,9 @@ static void sigwinch_handler(int sig UNUSED_PARAM)
1806int less_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 1888int less_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1807int less_main(int argc, char **argv) 1889int less_main(int argc, char **argv)
1808{ 1890{
1891#if !ENABLE_PLATFORM_MINGW32
1809 char *tty_name; 1892 char *tty_name;
1893#endif
1810 int tty_fd; 1894 int tty_fd;
1811 1895
1812 INIT_G(); 1896 INIT_G();
@@ -1865,6 +1949,7 @@ int less_main(int argc, char **argv)
1865 if (option_mask32 & FLAG_TILDE) 1949 if (option_mask32 & FLAG_TILDE)
1866 empty_line_marker = ""; 1950 empty_line_marker = "";
1867 1951
1952#if !ENABLE_PLATFORM_MINGW32
1868 /* Some versions of less can survive w/o controlling tty, 1953 /* Some versions of less can survive w/o controlling tty,
1869 * try to do the same. This also allows to specify an alternative 1954 * try to do the same. This also allows to specify an alternative
1870 * tty via "less 1<>TTY". 1955 * tty via "less 1<>TTY".
@@ -1890,8 +1975,13 @@ int less_main(int argc, char **argv)
1890 } 1975 }
1891 G.kbd_fd_orig_flags = ndelay_on(tty_fd); 1976 G.kbd_fd_orig_flags = ndelay_on(tty_fd);
1892 kbd_fd = tty_fd; /* save in a global */ 1977 kbd_fd = tty_fd; /* save in a global */
1978#else
1979 kbd_fd = tty_fd = 0;
1980#endif
1893 1981
1982#if !ENABLE_PLATFORM_MINGW32
1894 get_termios_and_make_raw(tty_fd, &term_less, &term_orig, TERMIOS_RAW_CRNL_INPUT); 1983 get_termios_and_make_raw(tty_fd, &term_less, &term_orig, TERMIOS_RAW_CRNL_INPUT);
1984#endif
1895 1985
1896 IF_FEATURE_LESS_ASK_TERMINAL(G.winsize_err =) get_terminal_width_height(tty_fd, &width, &max_displayed_line); 1986 IF_FEATURE_LESS_ASK_TERMINAL(G.winsize_err =) get_terminal_width_height(tty_fd, &width, &max_displayed_line);
1897 /* 20: two tabstops + 4 */ 1987 /* 20: two tabstops + 4 */
diff --git a/miscutils/man.c b/miscutils/man.c
index 61086612a..6724b4b5d 100644
--- a/miscutils/man.c
+++ b/miscutils/man.c
@@ -199,8 +199,7 @@ static char **add_MANPATH(char **man_path_list, int *count_mp, char *path)
199 if (path) while (*path) { 199 if (path) while (*path) {
200 char *next_path; 200 char *next_path;
201 char **path_element; 201 char **path_element;
202 202 next_path = strchr(path, PATH_SEP);
203 next_path = strchr(path, ':');
204 if (next_path) { 203 if (next_path) {
205 if (next_path == path) /* "::"? */ 204 if (next_path == path) /* "::"? */
206 goto next; 205 goto next;
@@ -223,7 +222,7 @@ static char **add_MANPATH(char **man_path_list, int *count_mp, char *path)
223 if (!next_path) 222 if (!next_path)
224 break; 223 break;
225 /* "path" may be a result of getenv(), be nice and don't mangle it */ 224 /* "path" may be a result of getenv(), be nice and don't mangle it */
226 *next_path = ':'; 225 *next_path = PATH_SEP;
227 next: 226 next:
228 path = next_path + 1; 227 path = next_path + 1;
229 } 228 }
@@ -249,11 +248,24 @@ int man_main(int argc UNUSED_PARAM, char **argv)
249 int count_mp; 248 int count_mp;
250 int opt, not_found; 249 int opt, not_found;
251 char *token[2]; 250 char *token[2];
251#if ENABLE_PLATFORM_MINGW32
252 char **ptr;
253 char *exepath, *relpath;
254 const char *mpl[] = { "/usr/man", "/usr/share/man", NULL, NULL };
255#endif
252 256
253 INIT_G(); 257 INIT_G();
254 258
255 opt = getopt32(argv, "^+" "aw" "\0" "-1"/*at least one arg*/); 259 opt = getopt32(argv, "^+" "aw" "\0" "-1"/*at least one arg*/);
256 argv += optind; 260 argv += optind;
261#if ENABLE_PLATFORM_MINGW32
262 /* add system drive prefix to filenames, if necessary */
263 for (ptr = argv; *ptr; ++ptr) {
264 if (strchr(*ptr, '/') || strchr(*ptr, '\\'))
265 *ptr = xabsolute_path(*ptr);
266 }
267 chdir_system_drive();
268#endif
257 269
258 sec_list = xstrdup("0p:1:1p:2:3:3p:4:5:6:7:8:9"); 270 sec_list = xstrdup("0p:1:1p:2:3:3p:4:5:6:7:8:9");
259 271
@@ -291,11 +303,25 @@ int man_main(int argc UNUSED_PARAM, char **argv)
291 } 303 }
292 config_close(parser); 304 config_close(parser);
293 305
306#if ENABLE_PLATFORM_MINGW32
307 /* allow man pages to be stored relative to the executable */
308 exepath = xstrdup(bb_busybox_exec_path);
309 relpath = concat_path_file(dirname(exepath), "man");
310
311 if (!man_path_list) {
312 mpl[2] = relpath;
313 man_path_list = (char**)mpl;
314 }
315 else {
316 man_path_list = add_MANPATH(man_path_list, &count_mp, relpath);
317 }
318#else
294 if (!man_path_list) { 319 if (!man_path_list) {
295 static const char *const mpl[] = { "/usr/man", "/usr/share/man", NULL }; 320 static const char *const mpl[] = { "/usr/man", "/usr/share/man", NULL };
296 man_path_list = (char**)mpl; 321 man_path_list = (char**)mpl;
297 /*count_mp = 2; - not used below anyway */ 322 /*count_mp = 2; - not used below anyway */
298 } 323 }
324#endif
299 325
300 { 326 {
301 /* environment overrides setting from man.config */ 327 /* environment overrides setting from man.config */