aboutsummaryrefslogtreecommitdiff
path: root/win32/winansi.c
diff options
context:
space:
mode:
Diffstat (limited to 'win32/winansi.c')
-rw-r--r--win32/winansi.c1608
1 files changed, 1608 insertions, 0 deletions
diff --git a/win32/winansi.c b/win32/winansi.c
new file mode 100644
index 000000000..c7529c453
--- /dev/null
+++ b/win32/winansi.c
@@ -0,0 +1,1608 @@
1/*
2 * Copyright 2008 Peter Harris <git@peter.is-a-geek.org>
3 */
4
5#include "libbb.h"
6#include <windows.h>
7#include "lazyload.h"
8#undef PACKED
9
10static BOOL charToConBuffA(LPSTR s, DWORD len);
11static BOOL charToConA(LPSTR s);
12
13static int conv_fwriteCon(FILE *stream, char *buf, size_t siz);
14static int conv_writeCon(int fd, char *buf, size_t siz);
15
16/*
17 Functions to be wrapped:
18*/
19#undef vfprintf
20#undef vprintf
21#undef printf
22#undef fprintf
23#undef fputs
24#undef fputc
25#undef putchar
26#undef fwrite
27#undef puts
28#undef write
29#undef read
30#undef fread
31#undef getc
32#undef fgets
33
34#define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)
35#define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE)
36
37static WORD plain_attr = 0xffff;
38static WORD current_attr;
39
40static HANDLE get_console(void)
41{
42 return GetStdHandle(STD_OUTPUT_HANDLE);
43}
44
45static WORD get_console_attr(void)
46{
47 CONSOLE_SCREEN_BUFFER_INFO sbi;
48
49 if (GetConsoleScreenBufferInfo(get_console(), &sbi))
50 return sbi.wAttributes;
51
52 return FOREGROUND_ALL;
53}
54
55static int is_console(int fd)
56{
57 if (plain_attr == 0xffff)
58 current_attr = plain_attr = get_console_attr();
59 return isatty(fd) && get_console() != INVALID_HANDLE_VALUE;
60}
61
62static ALWAYS_INLINE int is_console_in(int fd)
63{
64 return isatty(fd) && GetStdHandle(STD_INPUT_HANDLE) != INVALID_HANDLE_VALUE;
65}
66
67static int is_wine(void)
68{
69 DECLARE_PROC_ADDR(const char *, wine_get_version, void);
70
71 return INIT_PROC_ADDR(ntdll.dll, wine_get_version) != NULL;
72}
73
74#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
75#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
76#endif
77
78#ifndef DISABLE_NEWLINE_AUTO_RETURN
79#define DISABLE_NEWLINE_AUTO_RETURN 0x0008
80#endif
81
82#ifndef ENABLE_VIRTUAL_TERMINAL_INPUT
83#define ENABLE_VIRTUAL_TERMINAL_INPUT 0x0200
84#endif
85
86int FAST_FUNC terminal_mode(int reset)
87{
88 static int mode = -1;
89
90#if ENABLE_FEATURE_EURO
91 if (mode < 0) {
92 if (GetConsoleCP() == 850 && GetConsoleOutputCP() == 850) {
93 SetConsoleCP(858);
94 SetConsoleOutputCP(858);
95 }
96 }
97#endif
98
99 if (mode < 0 || reset) {
100 HANDLE h;
101 DWORD oldmode, newmode;
102 const char *term = getenv(BB_TERMINAL_MODE);
103 const char *skip = getenv(BB_SKIP_ANSI_EMULATION);
104
105 if (term) {
106 mode = atoi(term);
107 } else if (skip) {
108 mode = atoi(skip);
109 if (mode == 2)
110 mode = 5;
111 else if (mode != 1)
112 mode = 0;
113 } else {
114 mode = (getenv("CONEMUPID") != NULL || is_wine()) ? 0 :
115 CONFIG_TERMINAL_MODE;
116 }
117
118 if (mode < 0 || mode > 5)
119 mode = CONFIG_TERMINAL_MODE;
120
121 if (is_console(STDOUT_FILENO)) {
122 h = get_console();
123 if (GetConsoleMode(h, &oldmode)) {
124 // Try to recover from mode 0 induced by SSH.
125 newmode = oldmode == 0 ? 3 : oldmode;
126 // Turn off DISABLE_NEWLINE_AUTO_RETURN induced by Gradle?
127 newmode &= ~DISABLE_NEWLINE_AUTO_RETURN;
128
129 if ((mode & VT_OUTPUT)) {
130 newmode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
131 } else if (mode < 4) {
132 newmode &= ~ENABLE_VIRTUAL_TERMINAL_PROCESSING;
133 } else if ((oldmode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
134 mode |= VT_OUTPUT;
135 }
136
137 if (newmode != oldmode) {
138 if (!SetConsoleMode(h, newmode)) {
139 if (mode >= 4)
140 mode &= ~VT_OUTPUT;
141 newmode &= ~ENABLE_VIRTUAL_TERMINAL_PROCESSING;
142 SetConsoleMode(h, newmode);
143 }
144 }
145 }
146 }
147
148 if (is_console_in(STDIN_FILENO)) {
149 h = GetStdHandle(STD_INPUT_HANDLE);
150 if (GetConsoleMode(h, &oldmode)) {
151 // Try to recover from mode 0 induced by SSH.
152 newmode = oldmode == 0 ? 0x1f7 : oldmode;
153
154 if (mode < 4) {
155 if ((mode & VT_INPUT))
156 newmode |= ENABLE_VIRTUAL_TERMINAL_INPUT;
157 else
158 newmode &= ~ENABLE_VIRTUAL_TERMINAL_INPUT;
159 } else if ((oldmode & ENABLE_VIRTUAL_TERMINAL_INPUT)) {
160 mode |= VT_INPUT;
161 }
162
163 if (newmode != oldmode) {
164 if (!SetConsoleMode(h, newmode)) {
165 if (mode >= 4)
166 mode &= ~VT_INPUT;
167 // Failure to set the new mode seems to leave
168 // the flag set. Forcibly unset it.
169 newmode &= ~ENABLE_VIRTUAL_TERMINAL_INPUT;
170 SetConsoleMode(h, newmode);
171 }
172 }
173 }
174 }
175 }
176
177 return mode;
178}
179
180void set_title(const char *str)
181{
182 SetConsoleTitle(str);
183}
184
185int get_title(char *buf, int len)
186{
187 return GetConsoleTitle(buf, len);
188}
189
190static HANDLE dup_handle(HANDLE h)
191{
192 HANDLE h2;
193
194 if (!DuplicateHandle(GetCurrentProcess(), h, GetCurrentProcess(),
195 &h2, 0, TRUE, DUPLICATE_SAME_ACCESS))
196 return INVALID_HANDLE_VALUE;
197 return h2;
198}
199
200static void use_alt_buffer(int flag)
201{
202 static HANDLE console_orig = INVALID_HANDLE_VALUE;
203 HANDLE console, h;
204
205 if (flag) {
206 SECURITY_ATTRIBUTES sa;
207 CONSOLE_SCREEN_BUFFER_INFO sbi;
208
209 if (console_orig != INVALID_HANDLE_VALUE)
210 return;
211
212 console = get_console();
213 console_orig = dup_handle(console);
214
215 // handle should be inheritable
216 memset(&sa, 0, sizeof(sa));
217 sa.nLength = sizeof(sa);
218 /* sa.lpSecurityDescriptor = NULL; - memset did it */
219 sa.bInheritHandle = TRUE;
220
221 // create new alternate buffer
222 h = CreateConsoleScreenBuffer(GENERIC_READ|GENERIC_WRITE,
223 FILE_SHARE_READ|FILE_SHARE_WRITE, &sa,
224 CONSOLE_TEXTMODE_BUFFER, NULL);
225 if (h == INVALID_HANDLE_VALUE)
226 return;
227
228 if (GetConsoleScreenBufferInfo(console, &sbi))
229 SetConsoleScreenBufferSize(h, sbi.dwSize);
230 }
231 else {
232 if (console_orig == INVALID_HANDLE_VALUE)
233 return;
234
235 // revert to original buffer
236 h = dup_handle(console_orig);
237 console_orig = INVALID_HANDLE_VALUE;
238 if (h == INVALID_HANDLE_VALUE)
239 return;
240 }
241
242 console = h;
243 SetConsoleActiveScreenBuffer(console);
244 close(STDOUT_FILENO);
245 _open_osfhandle((intptr_t)console, O_RDWR|O_BINARY);
246}
247
248static void clear_buffer(DWORD len, COORD pos)
249{
250 HANDLE console = get_console();
251 DWORD dummy;
252
253 FillConsoleOutputCharacterA(console, ' ', len, pos, &dummy);
254 FillConsoleOutputAttribute(console, plain_attr, len, pos, &dummy);
255}
256
257static void erase_in_line(void)
258{
259 HANDLE console = get_console();
260 CONSOLE_SCREEN_BUFFER_INFO sbi;
261
262 if (!GetConsoleScreenBufferInfo(console, &sbi))
263 return;
264 clear_buffer(sbi.dwSize.X - sbi.dwCursorPosition.X, sbi.dwCursorPosition);
265}
266
267static void erase_till_end_of_screen(void)
268{
269 HANDLE console = get_console();
270 CONSOLE_SCREEN_BUFFER_INFO sbi;
271 DWORD len;
272
273 if(!GetConsoleScreenBufferInfo(console, &sbi))
274 return;
275 len = sbi.dwSize.X - sbi.dwCursorPosition.X +
276 sbi.dwSize.X * (sbi.srWindow.Bottom - sbi.dwCursorPosition.Y);
277 clear_buffer(len, sbi.dwCursorPosition);
278}
279
280void reset_screen(void)
281{
282 HANDLE console = get_console();
283 CONSOLE_SCREEN_BUFFER_INFO sbi;
284 COORD pos = { 0, 0 };
285
286 /* move to start of screen buffer and clear it all */
287 if (!GetConsoleScreenBufferInfo(console, &sbi))
288 return;
289 SetConsoleCursorPosition(console, pos);
290 clear_buffer(sbi.dwSize.X * sbi.dwSize.Y, pos);
291}
292
293void move_cursor_row(int n)
294{
295 HANDLE console = get_console();
296 CONSOLE_SCREEN_BUFFER_INFO sbi;
297
298 if(!GetConsoleScreenBufferInfo(console, &sbi))
299 return;
300 sbi.dwCursorPosition.Y += n;
301 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
302}
303
304static void move_cursor_column(int n)
305{
306 HANDLE console = get_console();
307 CONSOLE_SCREEN_BUFFER_INFO sbi;
308
309 if (!GetConsoleScreenBufferInfo(console, &sbi))
310 return;
311 sbi.dwCursorPosition.X += n;
312 SetConsoleCursorPosition(console, sbi.dwCursorPosition);
313}
314
315static void move_cursor(int x, int y)
316{
317 HANDLE console = get_console();
318 COORD pos;
319 CONSOLE_SCREEN_BUFFER_INFO sbi;
320
321 if (!GetConsoleScreenBufferInfo(console, &sbi))
322 return;
323 pos.X = sbi.srWindow.Left + x;
324 pos.Y = sbi.srWindow.Top + y;
325 SetConsoleCursorPosition(console, pos);
326}
327
328static const unsigned char colour_1bit[16] = {
329 /* Black */ 0,
330 /* Red */ FOREGROUND_RED,
331 /* Green */ FOREGROUND_GREEN,
332 /* Yellow */ FOREGROUND_RED | FOREGROUND_GREEN,
333 /* Blue */ FOREGROUND_BLUE,
334 /* Magenta */ FOREGROUND_RED | FOREGROUND_BLUE,
335 /* Cyan */ FOREGROUND_GREEN | FOREGROUND_BLUE,
336 /* White */ FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE,
337 /* ... and again but brighter */
338 FOREGROUND_INTENSITY,
339 FOREGROUND_RED | FOREGROUND_INTENSITY,
340 FOREGROUND_GREEN | FOREGROUND_INTENSITY,
341 FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY,
342 FOREGROUND_BLUE | FOREGROUND_INTENSITY,
343 FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_INTENSITY,
344 FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY,
345 FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY
346};
347
348#if !ENABLE_FEATURE_IMPROVED_COLOUR_MAPPING
349static WORD rgb_to_console(int *rgb)
350{
351 int dark = 0, bright;
352 WORD attr = 0;
353
354 if (rgb[0] > 85)
355 attr |= FOREGROUND_RED;
356 else
357 ++dark;
358
359 if (rgb[1] > 85)
360 attr |= FOREGROUND_GREEN;
361 else
362 ++dark;
363
364 if (rgb[2] > 85)
365 attr |= FOREGROUND_BLUE;
366 else
367 ++dark;
368
369 /* increase intensity if all components are either bright or
370 * dark and at least one is bright */
371 bright = (rgb[0] > 171) + (rgb[1] > 171) + (rgb[2] > 171);
372 if (bright + dark == 3 && dark != 3) {
373 attr |= FOREGROUND_INTENSITY;
374 }
375
376 return attr;
377}
378#else
379#include <math.h>
380
381/* Standard console colours in LAB colour space */
382static float colour_lab[16][3] = {
383 {-0.000000, 0.000000, 0.000000},
384 {25.530788, 48.055233, 38.059635},
385 {46.228817, -51.699638, 49.897949},
386 {51.868336, -12.930751, 56.677288},
387 {12.975313, 47.507763, -64.704285},
388 {29.782101, 58.939846, -36.497940},
389 {48.256081, -28.841570, -8.481050},
390 {77.704361, 0.004262, -0.008416},
391 {53.585018, 0.003129, -0.006235},
392 {53.232883, 80.109299, 67.220078},
393 {87.737038, -86.184654, 83.181168},
394 {97.138245, -21.555901, 94.482483},
395 {32.302586, 79.196678, -107.863686},
396 {60.319931, 98.254234, -60.842991},
397 {91.116524, -48.079609, -14.138126},
398 {100.000000, 0.005245, -0.010419},
399};
400
401/* Convert RGB to XYZ and XYZ to LAB. See:
402 * http://www.easyrgb.com/en/math.php#text1 */
403static void rgb2lab(const int *rgb, float *lab)
404{
405 float var_RGB[3], var_XYZ[3];
406 int i;
407
408 for (i = 0; i < 3; ++i) {
409 var_RGB[i] = rgb[i]/255.0f;
410 if (var_RGB[i] > 0.04045f)
411 var_RGB[i] = pow((var_RGB[i] + 0.055f) / 1.055f, 2.4f);
412 else
413 var_RGB[i] /= 12.92f;
414 }
415
416 /* use equal energy reference values */
417 var_XYZ[0] = var_RGB[0]*0.4124f + var_RGB[1]*0.3576f + var_RGB[2]*0.1805f;
418 var_XYZ[1] = var_RGB[0]*0.2126f + var_RGB[1]*0.7152f + var_RGB[2]*0.0722f;
419 var_XYZ[2] = var_RGB[0]*0.0193f + var_RGB[1]*0.1192f + var_RGB[2]*0.9505f;
420
421 for (i = 0; i < 3; ++i) {
422 if (var_XYZ[i] > 0.008856f)
423 var_XYZ[i] = pow(var_XYZ[i], 1.0f / 3.0f);
424 else
425 var_XYZ[i] = 7.787f * var_XYZ[i] + 16.0f / 116.0f;
426 }
427
428 lab[0] = 116.0f * var_XYZ[1] - 16.0f;
429 lab[1] = 500.0f * (var_XYZ[0] - var_XYZ[1]);
430 lab[2] = 200.0f * (var_XYZ[1] - var_XYZ[2]);
431}
432
433static WORD rgb_to_console(int *rgb)
434{
435 int i, imin = 0;
436 float deltamin = 1.0e20;
437
438 /* Use 1976 CIE deltaE to find closest console colour. See:
439 * https://zschuessler.github.io/DeltaE/learn */
440 for (i = 0; i < 16; ++i) {
441 float lab[3], dl, da, db, delta;
442
443 rgb2lab(rgb, lab);
444 dl = colour_lab[i][0] - lab[0];
445 da = colour_lab[i][1] - lab[1];
446 db = colour_lab[i][2] - lab[2];
447 delta = dl * dl + da * da + db *db;
448 if (delta < deltamin) {
449 imin = i;
450 deltamin = delta;
451 }
452 }
453 return colour_1bit[imin];
454}
455#endif
456
457/* 24-bit colour */
458static char *process_24bit(char *str, WORD *attr)
459{
460 int count;
461 int rgb[3];
462
463 for (count = 0; count < 3; ++count) {
464 rgb[count] = strtol(str, (char **)&str, 10);
465 if (*str == ';')
466 ++str;
467 }
468
469 *attr = rgb_to_console(rgb);
470
471 return *(str - 1) == ';' ? str - 1 : str;
472}
473
474/* 8-bit colour */
475static char *process_8bit(char *str, WORD *attr)
476{
477 int val = strtol(str, &str, 10);
478
479 if (val < 16) {
480 *attr = colour_1bit[val];
481 }
482 else if (val < 232) {
483 int i, rgb[3];
484
485 val -= 16;
486 for (i = 2; i >= 0; --i) {
487 rgb[i] = (val % 6) * 42 + 21;
488 val /= 6;
489 }
490
491 *attr = rgb_to_console(rgb);
492 }
493 else if (val < 238) {
494 /* black */
495 *attr = 0;
496 }
497 else if (val < 244) {
498 /* bright black */
499 *attr = FOREGROUND_INTENSITY;
500 }
501 else if (val < 250) {
502 /* white */
503 *attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE;
504 }
505 else if (val < 256) {
506 /* bright white */
507 *attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE |
508 FOREGROUND_INTENSITY;
509 }
510
511 return str;
512}
513
514static char *process_colour(char *str, WORD *attr)
515{
516 long val = strtol(str, (char **)&str, 10);
517
518 *attr = 0xffff; /* error return */
519 switch (val) {
520 case 2:
521 str = process_24bit(str + 1, attr);
522 break;
523 case 5:
524 str = process_8bit(str + 1, attr);
525 break;
526 default:
527 break;
528 }
529
530 return str;
531}
532
533/* On input pos points to the start of a suspected escape sequence.
534 * If a valid sequence is found return a pointer to the character
535 * following it, otherwise return the original pointer. */
536static char *process_escape(char *pos)
537{
538 char *str, *func;
539 char *bel;
540 size_t len;
541 WORD t, attr = current_attr;
542 static int reverse = 0;
543
544 switch (pos[1]) {
545 case '[':
546 /* go ahead and process "\033[" sequence */
547 break;
548 case ']':
549 if ((pos[2] == '0' || pos[2] == '2') && pos[3] == ';' &&
550 (bel=strchr(pos+4, '\007')) && bel - pos < 260) {
551 /* set console title */
552 *bel++ = '\0';
553 charToConA(pos+4);
554 SetConsoleTitle(pos+4);
555 return bel;
556 }
557 /* invalid "\033]" sequence, fall through */
558 default:
559 return pos;
560 }
561
562 str = pos + 2;
563 len = strspn(str, "0123456789;");
564 func = str + len;
565 switch (*func) {
566 case 'm':
567 do {
568 long val = strtol(str, (char **)&str, 10);
569 switch (val) {
570 case 0: /* reset */
571 attr = plain_attr;
572 reverse = 0;
573 break;
574 case 1: /* bold */
575 attr |= FOREGROUND_INTENSITY;
576 break;
577 case 2: /* faint */
578 case 22: /* normal */
579 attr &= ~FOREGROUND_INTENSITY;
580 break;
581 case 3: /* italic */
582 /* Unsupported */
583 break;
584 case 4: /* underline */
585 case 21: /* double underline */
586 /* Wikipedia says this flag does nothing */
587 /* Furthermore, mingw doesn't define this flag
588 attr |= COMMON_LVB_UNDERSCORE; */
589 break;
590 case 24: /* no underline */
591 /* attr &= ~COMMON_LVB_UNDERSCORE; */
592 break;
593 case 5: /* slow blink */
594 case 6: /* fast blink */
595 /* We don't have blink, but we do have
596 background intensity */
597 attr |= BACKGROUND_INTENSITY;
598 break;
599 case 25: /* no blink */
600 attr &= ~BACKGROUND_INTENSITY;
601 break;
602 case 7: /* reverse video on */
603 reverse = 1;
604 break;
605 case 27: /* reverse video off */
606 reverse = 0;
607 break;
608 case 8: /* conceal */
609 case 9: /* strike through */
610 case 28: /* reveal */
611 /* Unsupported */
612 break;
613
614 /* Foreground colours */
615 case 30: /* Black */
616 case 31: /* Red */
617 case 32: /* Green */
618 case 33: /* Yellow */
619 case 34: /* Blue */
620 case 35: /* Magenta */
621 case 36: /* Cyan */
622 case 37: /* White */
623 attr &= ~FOREGROUND_ALL;
624 attr |= colour_1bit[val - 30];
625 break;
626 case 38: /* 8/24 bit */
627 str = process_colour(str + 1, &t);
628 if (t != 0xffff) {
629 attr &= ~(FOREGROUND_ALL|FOREGROUND_INTENSITY);
630 attr |= t;
631 }
632 break;
633 case 39: /* reset */
634 attr &= ~FOREGROUND_ALL;
635 attr |= (plain_attr & FOREGROUND_ALL);
636 break;
637
638 /* Background colours */
639 case 40: /* Black */
640 case 41: /* Red */
641 case 42: /* Green */
642 case 43: /* Yellow */
643 case 44: /* Blue */
644 case 45: /* Magenta */
645 case 46: /* Cyan */
646 case 47: /* White */
647 attr &= ~BACKGROUND_ALL;
648 attr |= colour_1bit[val - 40] << 4;
649 break;
650 case 48: /* 8/24 bit */
651 str = process_colour(str + 1, &t);
652 if (t != 0xffff) {
653 attr &= ~(BACKGROUND_ALL|BACKGROUND_INTENSITY);
654 attr |= t << 4;
655 }
656 break;
657 case 49: /* reset */
658 attr &= ~BACKGROUND_ALL;
659 attr |= (plain_attr & BACKGROUND_ALL);
660 break;
661
662 default:
663 /* Unsupported code */
664 return pos;
665 }
666 str++;
667 } while (str < func);
668
669 current_attr = attr;
670 if (reverse)
671 attr = ((attr >> 4) & 0xf) | ((attr << 4) & 0xf0);
672 SetConsoleTextAttribute(get_console(), attr);
673 break;
674 case 'A': /* up */
675 move_cursor_row(-strtol(str, (char **)&str, 10));
676 break;
677 case 'B': /* down */
678 move_cursor_row(strtol(str, (char **)&str, 10));
679 break;
680 case 'C': /* forward */
681 move_cursor_column(strtol(str, (char **)&str, 10));
682 break;
683 case 'D': /* back */
684 move_cursor_column(-strtol(str, (char **)&str, 10));
685 break;
686 case 'H':
687 if (!len)
688 move_cursor(0, 0);
689 else {
690 int row, col = 1;
691
692 row = strtol(str, (char **)&str, 10);
693 if (*str == ';') {
694 col = strtol(str+1, (char **)&str, 10);
695 }
696 move_cursor(col > 0 ? col-1 : 0, row > 0 ? row-1 : 0);
697 }
698 break;
699 case 'J':
700 erase_till_end_of_screen();
701 break;
702 case 'K':
703 erase_in_line();
704 break;
705 case '?':
706 if (strncmp(str+1, "1049", 4) == 0 &&
707 (str[5] == 'h' || str[5] == 'l') ) {
708 use_alt_buffer(str[5] == 'h');
709 func = str + 5;
710 break;
711 }
712 /* fall through */
713 default:
714 /* Unsupported code */
715 return pos;
716 }
717
718 return (char *)func + 1;
719}
720
721static BOOL charToConBuffA(LPSTR s, DWORD len)
722{
723 UINT acp = GetACP(), conocp = GetConsoleOutputCP();
724 CPINFO acp_info, con_info;
725 WCHAR *buf;
726
727 if (acp == conocp)
728 return TRUE;
729
730 if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conocp, &con_info) ||
731 con_info.MaxCharSize > acp_info.MaxCharSize ||
732 (len == 1 && acp_info.MaxCharSize != 1))
733 return FALSE;
734
735 terminal_mode(FALSE);
736 buf = xmalloc(len*sizeof(WCHAR));
737 MultiByteToWideChar(CP_ACP, 0, s, len, buf, len);
738 WideCharToMultiByte(conocp, 0, buf, len, s, len, NULL, NULL);
739 free(buf);
740 return TRUE;
741}
742
743static BOOL charToConA(LPSTR s)
744{
745 if (!s)
746 return FALSE;
747 return charToConBuffA(s, strlen(s)+1);
748}
749
750BOOL conToCharBuffA(LPSTR s, DWORD len)
751{
752 UINT acp = GetACP(), conicp = GetConsoleCP();
753 CPINFO acp_info, con_info;
754 WCHAR *buf;
755
756 if (acp == conicp
757#if ENABLE_FEATURE_UTF8_INPUT
758 // if acp is UTF8 then we got UTF8 via readConsoleInput_utf8
759 || acp == CP_UTF8
760#endif
761 )
762 return TRUE;
763
764 if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conicp, &con_info) ||
765 acp_info.MaxCharSize > con_info.MaxCharSize ||
766 (len == 1 && con_info.MaxCharSize != 1))
767 return FALSE;
768
769 terminal_mode(FALSE);
770 buf = xmalloc(len*sizeof(WCHAR));
771 MultiByteToWideChar(conicp, 0, s, len, buf, len);
772 WideCharToMultiByte(CP_ACP, 0, buf, len, s, len, NULL, NULL);
773 free(buf);
774 return TRUE;
775}
776
777static int ansi_emulate(const char *s, FILE *stream)
778{
779 int rv = 0;
780 const unsigned char *t;
781 char *pos, *str;
782 size_t cur_len;
783 static size_t max_len = 0;
784 static char *mem = NULL;
785
786 /* if no special treatment is required output the string as-is */
787 for ( t=(unsigned char *)s; *t; ++t ) {
788 if ( *t == '\033' || *t > 0x7f ) {
789 break;
790 }
791 }
792
793 if ( *t == '\0' ) {
794 return fputs(s, stream) == EOF ? EOF : strlen(s);
795 }
796
797 /*
798 * Make a writable copy of the string and retain array for reuse.
799 * The test above guarantees that the string length won't be zero
800 * so the array will always be allocated.
801 */
802 cur_len = strlen(s);
803 if ( cur_len > max_len ) {
804 free(mem);
805 mem = xstrdup(s);
806 max_len = cur_len;
807 }
808 else {
809 strcpy(mem, s);
810 }
811 pos = str = mem;
812
813 while (*pos) {
814 pos = strchr(str, '\033');
815 if (pos && !(terminal_mode(FALSE) & VT_OUTPUT)) {
816 size_t len = pos - str;
817
818 if (len) {
819 if (conv_fwriteCon(stream, str, len) == EOF)
820 return EOF;
821 rv += len;
822 }
823
824 if (fflush(stream) == EOF)
825 return EOF;
826
827 str = process_escape(pos);
828 if (str == pos) {
829 if (fputc('\033', stream) == EOF)
830 return EOF;
831 ++str;
832 }
833 rv += str - pos;
834 pos = str;
835
836 if (fflush(stream) == EOF)
837 return EOF;
838
839 } else {
840 size_t len = strlen(str);
841 rv += len;
842 return conv_fwriteCon(stream, str, len) == EOF ? EOF : rv;
843 }
844 }
845 return rv;
846}
847
848int winansi_putchar(int c)
849{
850 return winansi_fputc(c, stdout);
851}
852
853int winansi_puts(const char *s)
854{
855 return (winansi_fputs(s, stdout) == EOF || putchar('\n') == EOF) ? EOF : 0;
856}
857
858static sighandler_t sigpipe_handler = SIG_DFL;
859
860#undef signal
861sighandler_t winansi_signal(int signum, sighandler_t handler)
862{
863 sighandler_t old;
864
865 if (signum == SIGPIPE) {
866 old = sigpipe_handler;
867 sigpipe_handler = handler;
868 return old;
869 }
870 return signal(signum, handler);
871}
872
873static void check_pipe_fd(int fd)
874{
875 int error = GetLastError();
876
877 if ((error == ERROR_NO_DATA &&
878 GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_PIPE) ||
879 error == ERROR_BROKEN_PIPE) {
880 if (sigpipe_handler == SIG_DFL)
881 exit(128+SIGPIPE);
882 else /* SIG_IGN */
883 errno = EPIPE;
884 }
885}
886
887static void check_pipe(FILE *stream)
888{
889 int fd = fileno(stream);
890
891 if (fd != -1 && ferror(stream)) {
892 check_pipe_fd(fd);
893 }
894}
895
896size_t winansi_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
897{
898 size_t lsize, lmemb, ret;
899 char *str;
900 int rv;
901
902 lsize = MIN(size, nmemb);
903 lmemb = MAX(size, nmemb);
904 if (lsize != 1 || !is_console(fileno(stream))) {
905 SetLastError(0);
906 if ((ret=fwrite(ptr, size, nmemb, stream)) < nmemb)
907 check_pipe(stream);
908 return ret;
909 }
910
911 str = xmalloc(lmemb+1);
912 memcpy(str, ptr, lmemb);
913 str[lmemb] = '\0';
914
915 rv = ansi_emulate(str, stream);
916 free(str);
917
918 return rv == EOF ? 0 : nmemb;
919}
920
921int winansi_fputs(const char *str, FILE *stream)
922{
923 int ret;
924
925 if (!is_console(fileno(stream))) {
926 SetLastError(0);
927 if ((ret=fputs(str, stream)) == EOF)
928 check_pipe(stream);
929 return ret;
930 }
931
932 return ansi_emulate(str, stream) == EOF ? EOF : 0;
933}
934
935int winansi_fputc(int c, FILE *stream)
936{
937 int ret;
938 char t = c;
939 char *s = &t;
940
941 if ((unsigned char)c <= 0x7f || !is_console(fileno(stream))) {
942 SetLastError(0);
943 if ((ret=fputc(c, stream)) == EOF)
944 check_pipe(stream);
945 return ret;
946 }
947
948 return conv_fwriteCon(stream, s, 1) == EOF ? EOF : (unsigned char )c;
949}
950
951#if !defined(__USE_MINGW_ANSI_STDIO) || !__USE_MINGW_ANSI_STDIO
952/*
953 * Prior to Windows 10 vsnprintf was incompatible with the C99 standard.
954 * Implement a replacement using _vsnprintf.
955 */
956int winansi_vsnprintf(char *buf, size_t size, const char *format, va_list list)
957{
958 size_t len;
959 va_list list2;
960
961 va_copy(list2, list);
962 len = _vsnprintf(NULL, 0, format, list2);
963 va_end(list2);
964 if (len < 0)
965 return -1;
966
967 _vsnprintf(buf, size, format, list);
968 buf[size-1] = '\0';
969 return len;
970}
971#endif
972
973int winansi_vfprintf(FILE *stream, const char *format, va_list list)
974{
975 int len, rv;
976 char small_buf[256];
977 char *buf = small_buf;
978 va_list cp;
979
980 if (!is_console(fileno(stream)))
981 goto abort;
982
983 va_copy(cp, list);
984 len = vsnprintf(small_buf, sizeof(small_buf), format, cp);
985 va_end(cp);
986
987 if (len > sizeof(small_buf) - 1) {
988 buf = xmalloc(len + 1);
989 va_copy(cp, list);
990 len = vsnprintf(buf, len + 1, format, cp);
991 va_end(cp);
992 }
993
994 if (len == -1)
995 goto abort;
996
997 rv = ansi_emulate(buf, stream);
998
999 if (buf != small_buf)
1000 free(buf);
1001 return rv;
1002
1003abort:
1004 SetLastError(0);
1005 if ((rv=vfprintf(stream, format, list)) == EOF || ferror(stream) != 0)
1006 check_pipe(stream);
1007 return rv;
1008}
1009
1010int winansi_fprintf(FILE *stream, const char *format, ...)
1011{
1012 va_list list;
1013 int rv;
1014
1015 va_start(list, format);
1016 rv = winansi_vfprintf(stream, format, list);
1017 va_end(list);
1018
1019 return rv;
1020}
1021
1022int winansi_printf(const char *format, ...)
1023{
1024 va_list list;
1025 int rv;
1026
1027 va_start(list, format);
1028 rv = winansi_vfprintf(stdout, format, list);
1029 va_end(list);
1030
1031 return rv;
1032}
1033
1034static int ansi_emulate_write(int fd, const void *buf, size_t count)
1035{
1036 int rv = 0, i;
1037 int special = FALSE, has_null = FALSE;
1038 const unsigned char *s = (const unsigned char *)buf;
1039 char *pos, *str;
1040 size_t len, out_len;
1041 static size_t max_len = 0;
1042 static char *mem = NULL;
1043
1044 for ( i=0; i<count; ++i ) {
1045 if ( s[i] == '\033' || s[i] > 0x7f ) {
1046 special = TRUE;
1047 }
1048 else if ( !s[i] ) {
1049 has_null = TRUE;
1050 }
1051 }
1052
1053 /*
1054 * If no special treatment is required or the data contains NUL
1055 * characters output the string as-is.
1056 */
1057 if ( !special || has_null ) {
1058 return write(fd, buf, count);
1059 }
1060
1061 /* make a writable copy of the data and retain array for reuse */
1062 if ( count > max_len ) {
1063 free(mem);
1064 mem = malloc(count+1);
1065 max_len = count;
1066 }
1067 memcpy(mem, buf, count);
1068 mem[count] = '\0';
1069 pos = str = mem;
1070
1071 /* we've checked the data doesn't contain any NULs */
1072 while (*pos) {
1073 pos = strchr(str, '\033');
1074 if (pos && !(terminal_mode(FALSE) & VT_OUTPUT)) {
1075 len = pos - str;
1076
1077 if (len) {
1078 out_len = conv_writeCon(fd, str, len);
1079 if (out_len == -1)
1080 return -1;
1081 rv += out_len;
1082 }
1083
1084 str = process_escape(pos);
1085 if (str == pos) {
1086 if (write(fd, pos, 1) == -1)
1087 return -1;
1088 ++str;
1089 }
1090 rv += str - pos;
1091 pos = str;
1092 } else {
1093 len = strlen(str);
1094 out_len = conv_writeCon(fd, str, len);
1095 return (out_len == -1) ? -1 : rv+out_len;
1096 }
1097 }
1098 return rv;
1099}
1100
1101int winansi_write(int fd, const void *buf, size_t count)
1102{
1103 if (!is_console(fd)) {
1104 int ret;
1105
1106 SetLastError(0);
1107 if ((ret=write(fd, buf, count)) == -1) {
1108 check_pipe_fd(fd);
1109 }
1110 return ret;
1111 }
1112
1113 return ansi_emulate_write(fd, buf, count);
1114}
1115
1116int winansi_read(int fd, void *buf, size_t count)
1117{
1118 int rv;
1119
1120 rv = mingw_read(fd, buf, count);
1121 if (!is_console_in(fd))
1122 return rv;
1123
1124 if ( rv > 0 ) {
1125 conToCharBuffA(buf, rv);
1126 }
1127
1128 return rv;
1129}
1130
1131size_t winansi_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
1132{
1133 int rv;
1134
1135 rv = fread(ptr, size, nmemb, stream);
1136 if (!is_console_in(fileno(stream)))
1137 return rv;
1138
1139 if (rv > 0)
1140 conToCharBuffA(ptr, rv * size);
1141
1142 return rv;
1143}
1144
1145int winansi_getc(FILE *stream)
1146{
1147 int rv;
1148
1149 rv = _getc_nolock(stream);
1150 if (!is_console_in(fileno(stream)))
1151 return rv;
1152
1153 if ( rv != EOF ) {
1154 unsigned char c = (unsigned char)rv;
1155 char *s = (char *)&c;
1156 conToCharBuffA(s, 1);
1157 rv = (int)c;
1158 }
1159
1160 return rv;
1161}
1162
1163int winansi_getchar(void)
1164{
1165 return winansi_getc(stdin);
1166}
1167
1168char *winansi_fgets(char *s, int size, FILE *stream)
1169{
1170 char *rv;
1171
1172 rv = fgets(s, size, stream);
1173 if (!is_console_in(fileno(stream)))
1174 return rv;
1175
1176 if (rv)
1177 conToCharBuffA(s, strlen(s));
1178
1179 return rv;
1180}
1181
1182/* Ensure that isatty(fd) returns 0 for the NUL device */
1183int mingw_isatty(int fd)
1184{
1185 int result = _isatty(fd);
1186
1187 if (result) {
1188 HANDLE handle = (HANDLE) _get_osfhandle(fd);
1189 DWORD mode;
1190
1191 if (handle == INVALID_HANDLE_VALUE)
1192 return 0;
1193
1194 /* check if its a device (i.e. console, printer, serial port) */
1195 if (GetFileType(handle) != FILE_TYPE_CHAR)
1196 return 0;
1197
1198 if (!GetConsoleMode(handle, &mode))
1199 return 0;
1200 }
1201
1202 return result;
1203}
1204
1205#if ENABLE_FEATURE_UTF8_INPUT
1206// intentionally also converts invalid values (surrogate halfs, too big)
1207static int toutf8(DWORD cp, unsigned char *buf) {
1208 if (cp <= 0x7f) {
1209 *buf = cp;
1210 return 1;
1211 }
1212 if (cp <= 0x7ff) {
1213 *buf++ = 0xc0 | (cp >> 6);
1214 *buf = 0x80 | (cp & 0x3f);
1215 return 2;
1216 }
1217 if (cp <= 0xffff) {
1218 *buf++ = 0xe0 | (cp >> 12);
1219 *buf++ = 0x80 | ((cp >> 6) & 0x3f);
1220 *buf = 0x80 | (cp & 0x3f);
1221 return 3;
1222 }
1223 if (cp <= 0x10ffff) {
1224 *buf++ = 0xf0 | (cp >> 18);
1225 *buf++ = 0x80 | ((cp >> 12) & 0x3f);
1226 *buf++ = 0x80 | ((cp >> 6) & 0x3f);
1227 *buf = 0x80 | (cp & 0x3f);
1228 return 4;
1229 }
1230 // invalid. returning 0 works in our context because it's delivered
1231 // as a key event, where 0 values are typically ignored by the caller
1232 *buf = 0;
1233 return 1;
1234}
1235
1236// peek into the console input queue and try to find a key-up event of
1237// a surrugate-2nd-half, at which case eat the console events up to this
1238// one (excluding), and combine the pair values into *ph1
1239static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1)
1240{
1241 // Peek into the queue arbitrary 16 records deep
1242 INPUT_RECORD r[16];
1243 DWORD got;
1244 int i;
1245
1246 if (!PeekConsoleInputW(h, r, 16, &got))
1247 return;
1248
1249 // we're conservative, and abort the search on anything which
1250 // seems out of place, like non-key event, non-2nd-half, etc.
1251 // search from 1 because i==0 is still the 1st half down record.
1252 for (i = 1; i < got; ++i) {
1253 DWORD h2;
1254 int is2nd, isdown;
1255
1256 if (r[i].EventType != KEY_EVENT)
1257 return;
1258
1259 isdown = r[i].Event.KeyEvent.bKeyDown;
1260 h2 = r[i].Event.KeyEvent.uChar.UnicodeChar;
1261 is2nd = h2 >= 0xDC00 && h2 <= 0xDFFF;
1262
1263 // skip 0 values, keyup of 1st half, and keydown of a 2nd half, if any
1264 if (!h2 || (h2 == *ph1 && !isdown) || (is2nd && isdown))
1265 continue;
1266
1267 if (!is2nd)
1268 return;
1269
1270 // got 2nd-half-up. eat the events up to this, combine the values
1271 ReadConsoleInputW(h, r, i, &got);
1272 *ph1 = 0x10000 + (((*ph1 & ~0xD800) << 10) | (h2 & ~0xDC00));
1273 return;
1274 }
1275}
1276
1277// if the codepoint is a key-down event, remember it, else if
1278// it's a key-up event with matching prior down - forget the down,
1279// else (up without matching prior key-down) - change it to down.
1280// We remember few prior key-down events so that a sequence
1281// like X-down Y-down X-up Y-up won't trigger this hack for Y-up.
1282// When up is changed into down there won't be further key-up event,
1283// but that's OK because the caller ignores key-up events anyway.
1284static void maybe_change_up_to_down(wchar_t key, BOOL *isdown)
1285{
1286 #define DOWN_BUF_SIZ 8
1287 static wchar_t downbuf[DOWN_BUF_SIZ] = {0};
1288 static int pos = 0;
1289
1290 if (*isdown) {
1291 downbuf[pos++] = key;
1292 pos = pos % DOWN_BUF_SIZ;
1293 return;
1294 }
1295
1296 // the missing-key-down issue was only observed with unicode values,
1297 // so limit this hack to non-ASCII-7 values.
1298 // also, launching a new shell/read process from CLI captures
1299 // an ENTER-up event without prior down at this new process, which
1300 // would otherwise change it to down - creating a wrong ENTER keypress.
1301 if (key <= 127)
1302 return;
1303
1304 // key up, try to match a prior down
1305 for (int i = 0; i < DOWN_BUF_SIZ; ++i) {
1306 if (downbuf[i] == key) {
1307 downbuf[i] = 0; // "forget" this down
1308 return;
1309 }
1310 }
1311
1312 // no prior key-down - replace the up with down
1313 *isdown = TRUE;
1314}
1315
1316/*
1317 * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when
1318 * the console (input) CP is UTF8, but addressed two issues:
1319 * - It depend on the console CP, while we use ReadConsoleInputW internally.
1320 * - ReadConsoleInputA with Console CP of UTF8 (65001) is buggy:
1321 * - Doesn't work on Windows 7 (reads 0 or '?' for non-ASCII codepoints).
1322 * - When used at the cmd.exe console - but not Windows Terminal:
1323 * sometimes only key-up events arrive without the expected prior key-down.
1324 * Seems to depend both on the console CP and the entered/pasted codepoint.
1325 * - If reading one record at a time (which is how we use it), then input
1326 * codepoints of U+0800 or higher crash the console/terminal window.
1327 * (tested on Windows 10.0.19045.3086: console and Windows Terminal 1.17)
1328 * Example: U+0C80 (UTF8: 0xE0 0xB2 0x80): "ಀ"
1329 * Example: U+1F600 (UTF8: 0xF0 0x9F 0x98 0x80): "😀"
1330 * - If reading more than one record at a time:
1331 * - Unknown whether it can still crash in some cases (was not observed).
1332 * - Codepoints above U+FFFF are broken, and arrive as
1333 * U+FFFD REPLACEMENT CHARACTER "�"
1334 * - Few more codepoints to test the issues above (and below):
1335 * - U+0500 (UTF8: 0xD4, 0x80): "Ô€" (OK in UTF8 CP, else maybe no key-down)
1336 * - U+07C0 (UTF8: 0xDF, 0x80): "߀" (might exhibit missing key-down)
1337 *
1338 * So this function uses ReadConsoleInputW and then delivers it as UTF8:
1339 * - Works with any console CP, in Windows terminal and Windows 7/10 console.
1340 * - Surrogate pairs are combined and delivered as a single UTF8 codepoint.
1341 * - Ignore occasional intermediate control events between the halfs.
1342 * - If we can't find the 2nd half, or if for some reason we get a 2nd half
1343 * wiithout the 1st, deliver the half we got as UTF8 (a-la WTF8).
1344 * - The "sometimes key-down is missing" issue at the cmd.exe console happens
1345 * also when using ReadConsoleInputW (for U+0080 or higher), so handle it.
1346 * This can also happen with surrogate pairs.
1347 * - Up to 4-bytes state is maintained for a single UTF8 codepoint buffer.
1348 *
1349 * Gotchas (could be solved, but currently there's no need):
1350 * - We support reading one record at a time, else fail - to make it obvious.
1351 * - We have a state which is hidden from PeekConsoleInput - so not in sync.
1352 * - We don't deliver key-up events in some cases: when working around
1353 * the "missing key-down" issue, and with combined surrogate halfs value.
1354 */
1355BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1356{
1357 static unsigned char u8buf[4]; // any single codepoint in UTF8
1358 static int u8pos = 0, u8len = 0;
1359 static INPUT_RECORD srec;
1360
1361 if (len != 1)
1362 return FALSE;
1363
1364 // if ACP is UTF8 then we read UTF8 regardless of console (in) CP
1365 if (GetConsoleCP() != CP_UTF8 && GetACP() != CP_UTF8)
1366 return ReadConsoleInput(h, r, len, got);
1367
1368 if (u8pos == u8len) {
1369 DWORD codepoint;
1370
1371 // wait-and-peek rather than read to keep the last processed record
1372 // at the console queue until we deliver all of its products, so
1373 // that external WaitForSingleObject(h) shows there's data ready.
1374 if (WaitForSingleObject(h, INFINITE) != WAIT_OBJECT_0)
1375 return FALSE;
1376 if (!PeekConsoleInputW(h, r, 1, got))
1377 return FALSE;
1378 if (*got == 0)
1379 return TRUE;
1380 if (r->EventType != KEY_EVENT)
1381 return ReadConsoleInput(h, r, 1, got);
1382
1383 srec = *r;
1384 codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;
1385
1386 // Observed when pasting unicode at cmd.exe console (but not
1387 // windows terminal), we sometimes get key-up event without
1388 // a prior matching key-down (or with key-down codepoint 0),
1389 // so this call would change the up into down in such case.
1390 // E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ"
1391 if (codepoint)
1392 maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown);
1393
1394 // if it's a 1st (high) surrogate pair half, try to eat upto and
1395 // excluding the 2nd (low) half, and combine them into codepoint.
1396 // this does not interfere with the missing-key-down workaround
1397 // (no issue if the down-buffer has 1st-half-down without up).
1398 if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
1399 maybeEatUpto2ndHalfUp(h, &codepoint);
1400
1401 u8len = toutf8(codepoint, u8buf);
1402 u8pos = 0;
1403 }
1404
1405 *r = srec;
1406 r->Event.KeyEvent.uChar.AsciiChar = (char)u8buf[u8pos++];
1407 if (u8pos == u8len) // consume the record which generated this buffer
1408 ReadConsoleInputW(h, &srec, 1, got);
1409 *got = 1;
1410 return TRUE;
1411}
1412#else
1413/*
1414 * In Windows 10 and 11 using ReadConsoleInputA() with a console input
1415 * code page of CP_UTF8 can crash the console/terminal. Avoid this by
1416 * using ReadConsoleInputW() in that case.
1417 */
1418BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1419{
1420 if (GetConsoleCP() != CP_UTF8)
1421 return ReadConsoleInput(h, r, len, got);
1422
1423 if (ReadConsoleInputW(h, r, len, got)) {
1424 wchar_t uchar = r->Event.KeyEvent.uChar.UnicodeChar;
1425 char achar = uchar & 0x7f;
1426 if (achar != uchar)
1427 achar = '?';
1428 r->Event.KeyEvent.uChar.AsciiChar = achar;
1429 return TRUE;
1430 }
1431 return FALSE;
1432}
1433#endif
1434
1435#if ENABLE_FEATURE_UTF8_OUTPUT
1436// Write u8buf as if the console output CP is UTF8 - regardless of the CP.
1437// fd should be associated with a console output.
1438// Return: 0 on successful write[s], else -1 (e.g. if fd is not a console).
1439//
1440// Up to 3 bytes of an incomplete codepoint may be buffered from prior call[s].
1441// All the completed codepoints in one call are written using WriteConsoleW.
1442// Bad sequence of any length (till ASCII7 or UTF8 lead) prints 1 subst wchar.
1443//
1444// note: one console is assumed, and the (3 bytes) buffer is shared regardless
1445// of the original output stream (stdout/err), or even if the handle is
1446// of a different console. This can result in invalid codepoints output
1447// if streams are multiplexed mid-codepoint (same as elsewhere?)
1448static int writeCon_utf8(int fd, const char *u8buf, size_t u8siz)
1449{
1450 static int state = 0; // -1: bad, 0-3: remaining cp bytes (0: done/new)
1451 static uint32_t codepoint = 0; // accumulated from up to 4 UTF8 bytes
1452
1453 // not a state, only avoids re-alloc on every call
1454 static const int wbufwsiz = 4096;
1455 static wchar_t *wbuf = 0;
1456
1457 HANDLE h = (HANDLE)_get_osfhandle(fd);
1458 int wlen = 0;
1459
1460 if (!wbuf)
1461 wbuf = xmalloc(wbufwsiz * sizeof(wchar_t));
1462
1463 // ASCII7 uses least logic, then UTF8 continuations, UTF8 lead, errors
1464 while (u8siz--) {
1465 unsigned char c = *u8buf++;
1466 int topbits = 0;
1467
1468 while (c & (0x80 >> topbits))
1469 ++topbits;
1470
1471 if (state == 0 && topbits == 0) {
1472 // valid ASCII7, state remains 0
1473 codepoint = c;
1474
1475 } else if (state > 0 && topbits == 1) {
1476 // valid continuation byte
1477 codepoint = (codepoint << 6) | (c & 0x3f);
1478 if (--state)
1479 continue;
1480
1481 } else if (state == 0 && topbits >= 2 && topbits <= 4) {
1482 // valid UTF8 lead of 2/3/4 bytes codepoint
1483 codepoint = c & (0x7f >> topbits);
1484 state = topbits - 1; // remaining bytes after lead
1485 continue;
1486
1487 } else {
1488 // already bad (state<0), or unexpected c at state 0-3.
1489 // placeholder is added only at the 1st (state>=0).
1490 // regardless, c may be valid to reprocess as state 0
1491 // (even when it's the 1st unexpected in state 1/2/3)
1492 int placeholder_done = state < 0;
1493
1494 if (topbits < 5 && topbits != 1) {
1495 --u8buf; // valid for state 0, reprocess
1496 ++u8siz;
1497 state = 0;
1498 } else {
1499 state = -1; // set/keep bad state
1500 }
1501
1502 if (placeholder_done)
1503 continue;
1504
1505 // 1st unexpected char, add placeholder
1506 codepoint = CONFIG_SUBST_WCHAR;
1507 }
1508
1509 // codepoint is complete
1510 // we don't reject surrogate halves, reserved, etc
1511 if (codepoint < 0x10000) {
1512 wbuf[wlen++] = codepoint;
1513 } else {
1514 // generate a surrogates pair (wbuf has room for 2+)
1515 codepoint -= 0x10000;
1516 wbuf[wlen++] = 0xd800 | (codepoint >> 10);
1517 wbuf[wlen++] = 0xdc00 | (codepoint & 0x3ff);
1518 }
1519
1520 // flush if we have less than two empty spaces
1521 if (wlen > wbufwsiz - 2) {
1522 if (!WriteConsoleW(h, wbuf, wlen, 0, 0))
1523 return -1;
1524 wlen = 0;
1525 }
1526 }
1527
1528 if (wlen && !WriteConsoleW(h, wbuf, wlen, 0, 0))
1529 return -1;
1530 return 0;
1531}
1532#endif
1533
1534void console_write(const char *str, int len)
1535{
1536 char *buf = xmemdup(str, len);
1537 int fd = _open("CONOUT$", _O_WRONLY);
1538 conv_writeCon(fd, buf, len);
1539 close(fd);
1540 free(buf);
1541}
1542
1543// LC_ALL=C disables console output conversion, so that the source
1544// data is interpreted only by the console according to its output CP.
1545static int conout_conv_enabled(void)
1546{
1547 static int enabled, tested; /* = 0 */
1548
1549 if (!tested) {
1550 // keep in sync with [re]init_unicode at libbb/unicode.c
1551 char *s = getenv("LC_ALL");
1552 if (!s) s = getenv("LC_CTYPE");
1553 if (!s) s = getenv("LANG");
1554
1555 enabled = !(s && s[0] == 'C' && s[1] == 0);
1556 tested = 1;
1557 }
1558
1559 return enabled;
1560}
1561
1562// TODO: improvements:
1563//
1564// 1. currently conv_[f]writeCon modify buf inplace, which means the caller
1565// typically has to make a writable copy first just for this.
1566// Sometimes it allocates a big copy once, and calls us with substrings.
1567// Instead, we could make a writable copy here - it's not used later anyway.
1568// To avoid the performance hit of many small allocations, we could use
1569// a local buffer for short strings, and allocate only if it doesn't fit
1570// (or maybe just reuse the local buffer with substring iterations).
1571//
1572// 2. Instead of converting from ACP to the console out CP - which guarantees
1573// potential data-loss if they differ, we could convert it to wchar_t and
1574// write it using WriteConsoleW. This should prevent all output data-loss.
1575// care should be taken with DBCS codepages (e.g. 936) or other multi-byte
1576// because then converting on arbitrary substring boundaries can fail.
1577
1578// convert buf inplace from ACP to console out CP and write it to stream
1579// returns EOF on error, 0 on success
1580static int conv_fwriteCon(FILE *stream, char *buf, size_t siz)
1581{
1582 if (conout_conv_enabled()) {
1583#if ENABLE_FEATURE_UTF8_OUTPUT
1584 if (GetConsoleOutputCP() != CP_UTF8) {
1585 fflush(stream); // writeCon_utf8 is unbuffered
1586 return writeCon_utf8(fileno(stream), buf, siz) ? EOF : 0;
1587 }
1588#else
1589 charToConBuffA(buf, siz);
1590#endif
1591 }
1592 return fwrite(buf, 1, siz, stream) < siz ? EOF : 0;
1593}
1594
1595// similar to above, but using lower level write
1596// returns -1 on error, actually-written bytes on suceess
1597static int conv_writeCon(int fd, char *buf, size_t siz)
1598{
1599 if (conout_conv_enabled()) {
1600#if ENABLE_FEATURE_UTF8_OUTPUT
1601 if (GetConsoleOutputCP() != CP_UTF8)
1602 return writeCon_utf8(fd, buf, siz) ? -1 : siz;
1603#else
1604 charToConBuffA(buf, siz);
1605#endif
1606 }
1607 return write(fd, buf, siz);
1608}