aboutsummaryrefslogtreecommitdiff
path: root/src/term.c
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2024-05-06 11:44:47 +0200
committerThijs Schreijer <thijs@thijsschreijer.nl>2024-05-20 12:43:55 +0200
commitdcd5d62501e61e0f6901d4d4687ab56430a4b8a7 (patch)
tree4501938052c0f62279eaae66c34811d4b5232fa2 /src/term.c
parent1d64b5790f26760cb830336ccca9d51474b73ae8 (diff)
downloadluasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.gz
luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.bz2
luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.zip
add example for reading a line from the terminal, non-blocking
Handles utf8, and character width
Diffstat (limited to 'src/term.c')
-rw-r--r--src/term.c330
1 files changed, 313 insertions, 17 deletions
diff --git a/src/term.c b/src/term.c
index f73d23f..e557a11 100644
--- a/src/term.c
+++ b/src/term.c
@@ -15,6 +15,7 @@
15 15
16#ifdef _WIN32 16#ifdef _WIN32
17# include <windows.h> 17# include <windows.h>
18# include <locale.h>
18#else 19#else
19# include <termios.h> 20# include <termios.h>
20# include <string.h> 21# include <string.h>
@@ -22,8 +23,16 @@
22# include <fcntl.h> 23# include <fcntl.h>
23# include <sys/ioctl.h> 24# include <sys/ioctl.h>
24# include <unistd.h> 25# include <unistd.h>
26# include <wchar.h>
27# include <locale.h>
25#endif 28#endif
26 29
30
31// Windows does not have a wcwidth function, so we use compatibilty code from
32// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn
33#include "wcwidth.h"
34
35
27#ifdef _WIN32 36#ifdef _WIN32
28// after an error is returned, GetLastError() result can be passed to this function to get a string 37// after an error is returned, GetLastError() result can be passed to this function to get a string
29// representation of the error on the stack. 38// representation of the error on the stack.
@@ -423,7 +432,7 @@ static int lst_getconsoleflags(lua_State *L)
423// see https://github.com/luaposix/luaposix 432// see https://github.com/luaposix/luaposix
424 433
425/*** 434/***
426Get termios state. 435Get termios state (Posix).
427The terminal attributes is a table with the following fields: 436The terminal attributes is a table with the following fields:
428 437
429- `iflag` input flags 438- `iflag` input flags
@@ -511,7 +520,7 @@ static int lst_tcgetattr(lua_State *L)
511 520
512 521
513/*** 522/***
514Set termios state. 523Set termios state (Posix).
515This function will set the flags as given. 524This function will set the flags as given.
516 525
517The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective 526The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective
@@ -689,13 +698,28 @@ static int lst_getnonblock(lua_State *L)
689 * Reading keyboard input 698 * Reading keyboard input
690 *-------------------------------------------------------------------------*/ 699 *-------------------------------------------------------------------------*/
691 700
701#ifdef _WIN32
702// Define a static buffer for UTF-8 characters
703static char utf8_buffer[4];
704static int utf8_buffer_len = 0;
705static int utf8_buffer_index = 0;
706#endif
707
708
692/*** 709/***
693Reads a key from the console non-blocking. 710Reads a key from the console non-blocking. This function should not be called
711directly, but through the `system.readkey` or `system.readansi` functions. It
712will return the next byte from the input stream, or `nil` if no key was pressed.
713
694On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock` 714On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock`
695before calling this function. Otherwise it will block. 715before calling this function. Otherwise it will block. No conversions are
716done on Posix, so the byte read is returned as-is.
696 717
697@function readkey 718On Windows this reads a wide character and converts it to UTF-8. Multi-byte
698@treturn[1] integer the key code of the key that was pressed 719sequences will be buffered internally and returned one byte at a time.
720
721@function _readkey
722@treturn[1] integer the byte read from the input stream
699@treturn[2] nil if no key was pressed 723@treturn[2] nil if no key was pressed
700@treturn[3] nil on error 724@treturn[3] nil on error
701@treturn[3] string error message 725@treturn[3] string error message
@@ -703,20 +727,87 @@ before calling this function. Otherwise it will block.
703*/ 727*/
704static int lst_readkey(lua_State *L) { 728static int lst_readkey(lua_State *L) {
705#ifdef _WIN32 729#ifdef _WIN32
706 if (_kbhit()) { 730 if (utf8_buffer_len > 0) {
707 int ch = _getch(); 731 // Buffer not empty, return the next byte
708 if (ch == EOF) { 732 lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
709 // Error handling for end-of-file or read error 733 utf8_buffer_index++;
710 lua_pushnil(L); 734 utf8_buffer_len--;
711 lua_pushliteral(L, "_getch error"); 735 // printf("returning from buffer: %d\n", luaL_checkinteger(L, -1));
712 return 2; 736 if (utf8_buffer_len == 0) {
737 utf8_buffer_index = 0;
713 } 738 }
714 lua_pushinteger(L, (unsigned char)ch);
715 return 1; 739 return 1;
716 } 740 }
717 return 0; 741
742 if (!_kbhit()) {
743 return 0;
744 }
745
746 wchar_t wc = _getwch();
747 // printf("----\nread wchar_t: %x\n", wc);
748 if (wc == WEOF) {
749 lua_pushnil(L);
750 lua_pushliteral(L, "read error");
751 return 2;
752 }
753
754 if (sizeof(wchar_t) == 2) {
755 // printf("2-byte wchar_t\n");
756 // only 2 bytes wide, not 4
757 if (wc >= 0xD800 && wc <= 0xDBFF) {
758 // printf("2-byte wchar_t, received high, getting low...\n");
759
760 // we got a high surrogate, so we need to read the next one as the low surrogate
761 if (!_kbhit()) {
762 lua_pushnil(L);
763 lua_pushliteral(L, "incomplete surrogate pair");
764 return 2;
765 }
766
767 wchar_t wc2 = _getwch();
768 // printf("read wchar_t 2: %x\n", wc2);
769 if (wc2 == WEOF) {
770 lua_pushnil(L);
771 lua_pushliteral(L, "read error");
772 return 2;
773 }
774
775 if (wc2 < 0xDC00 || wc2 > 0xDFFF) {
776 lua_pushnil(L);
777 lua_pushliteral(L, "invalid surrogate pair");
778 return 2;
779 }
780 // printf("2-byte pair complete now\n");
781 wchar_t wch_pair[2] = { wc, wc2 };
782 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, wch_pair, 2, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
783
784 } else {
785 // printf("2-byte wchar_t, no surrogate pair\n");
786 // not a high surrogate, so we can handle just the 2 bytes directly
787 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
788 }
789
790 } else {
791 // printf("4-byte wchar_t\n");
792 // 4 bytes wide, so handle as UTF-32 directly
793 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
794 }
795 // printf("utf8_buffer_len: %d\n", utf8_buffer_len);
796 utf8_buffer_index = 0;
797 if (utf8_buffer_len <= 0) {
798 lua_pushnil(L);
799 lua_pushliteral(L, "UTF-8 conversion error");
800 return 2;
801 }
802
803 lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
804 utf8_buffer_index++;
805 utf8_buffer_len--;
806 // printf("returning from buffer: %x\n", luaL_checkinteger(L, -1));
807 return 1;
718 808
719#else 809#else
810 // Posix implementation
720 char ch; 811 char ch;
721 ssize_t bytes_read = read(STDIN_FILENO, &ch, 1); 812 ssize_t bytes_read = read(STDIN_FILENO, &ch, 1);
722 if (bytes_read > 0) { 813 if (bytes_read > 0) {
@@ -782,6 +873,205 @@ static int lst_termsize(lua_State *L) {
782 873
783 874
784/*------------------------------------------------------------------------- 875/*-------------------------------------------------------------------------
876 * utf8 conversion and support
877 *-------------------------------------------------------------------------*/
878
879// Function to convert a single UTF-8 character to a Unicode code point (uint32_t)
880// To prevent having to do codepage/locale changes, we use a custom implementation
881int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) {
882 if (len == 0) {
883 return -1; // No input provided
884 }
885
886 unsigned char c = (unsigned char)utf8[0];
887 if (c <= 0x7F) {
888 *codepoint = c;
889 return 1;
890 } else if ((c & 0xE0) == 0xC0) {
891 if (len < 2) return -1; // Not enough bytes
892 *codepoint = ((utf8[0] & 0x1F) << 6) | (utf8[1] & 0x3F);
893 return 2;
894 } else if ((c & 0xF0) == 0xE0) {
895 if (len < 3) return -1; // Not enough bytes
896 *codepoint = ((utf8[0] & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6) | (utf8[2] & 0x3F);
897 return 3;
898 } else if ((c & 0xF8) == 0xF0) {
899 if (len < 4) return -1; // Not enough bytes
900 *codepoint = ((utf8[0] & 0x07) << 18) | ((utf8[1] & 0x3F) << 12) | ((utf8[2] & 0x3F) << 6) | (utf8[3] & 0x3F);
901 return 4;
902 } else {
903 // Invalid UTF-8 character
904 return -1;
905 }
906}
907
908
909/***
910Get the width of a utf8 character for terminal display.
911@function utf8cwidth
912@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned
913@treturn[1] int the display width in columns of the first character in the string (0 for an empty string)
914@treturn[2] nil
915@treturn[2] string error message
916*/
917int lst_utf8cwidth(lua_State *L) {
918 const char *utf8_char;
919 size_t utf8_len;
920 utf8_char = luaL_checklstring(L, 1, &utf8_len);
921 int width = 0;
922
923 mk_wchar_t wc;
924
925 if (utf8_len == 0) {
926 lua_pushinteger(L, 0);
927 return 1;
928 }
929
930 // Convert the UTF-8 string to a wide character
931 int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc);
932 if (bytes_processed == -1) {
933 lua_pushnil(L);
934 lua_pushstring(L, "Invalid UTF-8 character");
935 return 2;
936 }
937
938 // Get the width of the wide character
939 width = mk_wcwidth(wc);
940 if (width == -1) {
941 lua_pushnil(L);
942 lua_pushstring(L, "Character width determination failed");
943 return 2;
944 }
945
946 lua_pushinteger(L, width);
947 return 1;
948}
949
950
951
952
953/***
954Get the width of a utf8 string for terminal display.
955@function utf8swidth
956@tparam string utf8_string the utf8 string to check
957@treturn[1] int the display width of the string in columns (0 for an empty string)
958@treturn[2] nil
959@treturn[2] string error message
960*/
961int lst_utf8swidth(lua_State *L) {
962 const char *utf8_str;
963 size_t utf8_len;
964 utf8_str = luaL_checklstring(L, 1, &utf8_len);
965 int total_width = 0;
966
967 if (utf8_len == 0) {
968 lua_pushinteger(L, 0);
969 return 1;
970 }
971
972 int bytes_processed = 0;
973 size_t i = 0;
974 mk_wchar_t wc;
975
976 while (i < utf8_len) {
977 bytes_processed = utf8_to_wchar(utf8_str + i, utf8_len - i, &wc);
978 if (bytes_processed == -1) {
979 lua_pushnil(L);
980 lua_pushstring(L, "Invalid UTF-8 character");
981 return 2;
982 }
983
984 int width = mk_wcwidth(wc);
985 if (width == -1) {
986 lua_pushnil(L);
987 lua_pushstring(L, "Character width determination failed");
988 return 2;
989 }
990
991 total_width += width;
992 i += bytes_processed;
993 }
994
995 lua_pushinteger(L, total_width);
996 return 1;
997}
998
999
1000
1001/*-------------------------------------------------------------------------
1002 * Windows codepage functions
1003 *-------------------------------------------------------------------------*/
1004
1005
1006/***
1007Gets the current console code page (Windows).
1008@function getconsolecp
1009@treturn[1] int the current code page (always 65001 on Posix systems)
1010*/
1011static int lst_getconsolecp(lua_State *L) {
1012 unsigned int cp = 65001;
1013#ifdef _WIN32
1014 cp = GetConsoleCP();
1015#endif
1016 lua_pushinteger(L, cp);
1017 return 1;
1018}
1019
1020
1021
1022/***
1023Sets the current console code page (Windows).
1024@function setconsolecp
1025@tparam int cp the code page to set, use 65001 for UTF-8
1026@treturn[1] bool `true` on success (always `true` on Posix systems)
1027*/
1028static int lst_setconsolecp(lua_State *L) {
1029 unsigned int cp = (unsigned int)luaL_checkinteger(L, 1);
1030 int success = TRUE;
1031#ifdef _WIN32
1032 SetConsoleCP(cp);
1033#endif
1034 lua_pushboolean(L, success);
1035 return 1;
1036}
1037
1038
1039
1040/***
1041Gets the current console output code page (Windows).
1042@function getconsoleoutputcp
1043@treturn[1] int the current code page (always 65001 on Posix systems)
1044*/
1045static int lst_getconsoleoutputcp(lua_State *L) {
1046 unsigned int cp = 65001;
1047#ifdef _WIN32
1048 cp = GetConsoleOutputCP();
1049#endif
1050 lua_pushinteger(L, cp);
1051 return 1;
1052}
1053
1054
1055
1056/***
1057Sets the current console output code page (Windows).
1058@function setconsoleoutputcp
1059@tparam int cp the code page to set, use 65001 for UTF-8
1060@treturn[1] bool `true` on success (always `true` on Posix systems)
1061*/
1062static int lst_setconsoleoutputcp(lua_State *L) {
1063 unsigned int cp = (unsigned int)luaL_checkinteger(L, 1);
1064 int success = TRUE;
1065#ifdef _WIN32
1066 SetConsoleOutputCP(cp);
1067#endif
1068 lua_pushboolean(L, success);
1069 return 1;
1070}
1071
1072
1073
1074/*-------------------------------------------------------------------------
785 * Initializes module 1075 * Initializes module
786 *-------------------------------------------------------------------------*/ 1076 *-------------------------------------------------------------------------*/
787 1077
@@ -791,10 +1081,16 @@ static luaL_Reg func[] = {
791 { "setconsoleflags", lst_setconsoleflags }, 1081 { "setconsoleflags", lst_setconsoleflags },
792 { "tcgetattr", lst_tcgetattr }, 1082 { "tcgetattr", lst_tcgetattr },
793 { "tcsetattr", lst_tcsetattr }, 1083 { "tcsetattr", lst_tcsetattr },
794 { "getnonblock", lst_setnonblock }, 1084 { "getnonblock", lst_getnonblock },
795 { "setnonblock", lst_setnonblock }, 1085 { "setnonblock", lst_setnonblock },
796 { "readkey", lst_readkey }, 1086 { "_readkey", lst_readkey },
797 { "termsize", lst_termsize }, 1087 { "termsize", lst_termsize },
1088 { "utf8cwidth", lst_utf8cwidth },
1089 { "utf8swidth", lst_utf8swidth },
1090 { "getconsolecp", lst_getconsolecp },
1091 { "setconsolecp", lst_setconsolecp },
1092 { "getconsoleoutputcp", lst_getconsoleoutputcp },
1093 { "setconsoleoutputcp", lst_setconsoleoutputcp },
798 { NULL, NULL } 1094 { NULL, NULL }
799}; 1095};
800 1096