diff options
author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2024-05-06 11:44:47 +0200 |
---|---|---|
committer | Thijs Schreijer <thijs@thijsschreijer.nl> | 2024-05-20 12:43:55 +0200 |
commit | dcd5d62501e61e0f6901d4d4687ab56430a4b8a7 (patch) | |
tree | 4501938052c0f62279eaae66c34811d4b5232fa2 /src/term.c | |
parent | 1d64b5790f26760cb830336ccca9d51474b73ae8 (diff) | |
download | luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.gz luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.bz2 luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.zip |
add example for reading a line from the terminal, non-blocking
Handles utf8, and character width
Diffstat (limited to 'src/term.c')
-rw-r--r-- | src/term.c | 330 |
1 files changed, 313 insertions, 17 deletions
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #ifdef _WIN32 | 16 | #ifdef _WIN32 |
17 | # include <windows.h> | 17 | # include <windows.h> |
18 | # include <locale.h> | ||
18 | #else | 19 | #else |
19 | # include <termios.h> | 20 | # include <termios.h> |
20 | # include <string.h> | 21 | # include <string.h> |
@@ -22,8 +23,16 @@ | |||
22 | # include <fcntl.h> | 23 | # include <fcntl.h> |
23 | # include <sys/ioctl.h> | 24 | # include <sys/ioctl.h> |
24 | # include <unistd.h> | 25 | # include <unistd.h> |
26 | # include <wchar.h> | ||
27 | # include <locale.h> | ||
25 | #endif | 28 | #endif |
26 | 29 | ||
30 | |||
31 | // Windows does not have a wcwidth function, so we use compatibilty code from | ||
32 | // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn | ||
33 | #include "wcwidth.h" | ||
34 | |||
35 | |||
27 | #ifdef _WIN32 | 36 | #ifdef _WIN32 |
28 | // after an error is returned, GetLastError() result can be passed to this function to get a string | 37 | // after an error is returned, GetLastError() result can be passed to this function to get a string |
29 | // representation of the error on the stack. | 38 | // representation of the error on the stack. |
@@ -423,7 +432,7 @@ static int lst_getconsoleflags(lua_State *L) | |||
423 | // see https://github.com/luaposix/luaposix | 432 | // see https://github.com/luaposix/luaposix |
424 | 433 | ||
425 | /*** | 434 | /*** |
426 | Get termios state. | 435 | Get termios state (Posix). |
427 | The terminal attributes is a table with the following fields: | 436 | The terminal attributes is a table with the following fields: |
428 | 437 | ||
429 | - `iflag` input flags | 438 | - `iflag` input flags |
@@ -511,7 +520,7 @@ static int lst_tcgetattr(lua_State *L) | |||
511 | 520 | ||
512 | 521 | ||
513 | /*** | 522 | /*** |
514 | Set termios state. | 523 | Set termios state (Posix). |
515 | This function will set the flags as given. | 524 | This function will set the flags as given. |
516 | 525 | ||
517 | The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective | 526 | The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective |
@@ -689,13 +698,28 @@ static int lst_getnonblock(lua_State *L) | |||
689 | * Reading keyboard input | 698 | * Reading keyboard input |
690 | *-------------------------------------------------------------------------*/ | 699 | *-------------------------------------------------------------------------*/ |
691 | 700 | ||
701 | #ifdef _WIN32 | ||
702 | // Define a static buffer for UTF-8 characters | ||
703 | static char utf8_buffer[4]; | ||
704 | static int utf8_buffer_len = 0; | ||
705 | static int utf8_buffer_index = 0; | ||
706 | #endif | ||
707 | |||
708 | |||
692 | /*** | 709 | /*** |
693 | Reads a key from the console non-blocking. | 710 | Reads a key from the console non-blocking. This function should not be called |
711 | directly, but through the `system.readkey` or `system.readansi` functions. It | ||
712 | will return the next byte from the input stream, or `nil` if no key was pressed. | ||
713 | |||
694 | On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock` | 714 | On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock` |
695 | before calling this function. Otherwise it will block. | 715 | before calling this function. Otherwise it will block. No conversions are |
716 | done on Posix, so the byte read is returned as-is. | ||
696 | 717 | ||
697 | @function readkey | 718 | On Windows this reads a wide character and converts it to UTF-8. Multi-byte |
698 | @treturn[1] integer the key code of the key that was pressed | 719 | sequences will be buffered internally and returned one byte at a time. |
720 | |||
721 | @function _readkey | ||
722 | @treturn[1] integer the byte read from the input stream | ||
699 | @treturn[2] nil if no key was pressed | 723 | @treturn[2] nil if no key was pressed |
700 | @treturn[3] nil on error | 724 | @treturn[3] nil on error |
701 | @treturn[3] string error message | 725 | @treturn[3] string error message |
@@ -703,20 +727,87 @@ before calling this function. Otherwise it will block. | |||
703 | */ | 727 | */ |
704 | static int lst_readkey(lua_State *L) { | 728 | static int lst_readkey(lua_State *L) { |
705 | #ifdef _WIN32 | 729 | #ifdef _WIN32 |
706 | if (_kbhit()) { | 730 | if (utf8_buffer_len > 0) { |
707 | int ch = _getch(); | 731 | // Buffer not empty, return the next byte |
708 | if (ch == EOF) { | 732 | lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]); |
709 | // Error handling for end-of-file or read error | 733 | utf8_buffer_index++; |
710 | lua_pushnil(L); | 734 | utf8_buffer_len--; |
711 | lua_pushliteral(L, "_getch error"); | 735 | // printf("returning from buffer: %d\n", luaL_checkinteger(L, -1)); |
712 | return 2; | 736 | if (utf8_buffer_len == 0) { |
737 | utf8_buffer_index = 0; | ||
713 | } | 738 | } |
714 | lua_pushinteger(L, (unsigned char)ch); | ||
715 | return 1; | 739 | return 1; |
716 | } | 740 | } |
717 | return 0; | 741 | |
742 | if (!_kbhit()) { | ||
743 | return 0; | ||
744 | } | ||
745 | |||
746 | wchar_t wc = _getwch(); | ||
747 | // printf("----\nread wchar_t: %x\n", wc); | ||
748 | if (wc == WEOF) { | ||
749 | lua_pushnil(L); | ||
750 | lua_pushliteral(L, "read error"); | ||
751 | return 2; | ||
752 | } | ||
753 | |||
754 | if (sizeof(wchar_t) == 2) { | ||
755 | // printf("2-byte wchar_t\n"); | ||
756 | // only 2 bytes wide, not 4 | ||
757 | if (wc >= 0xD800 && wc <= 0xDBFF) { | ||
758 | // printf("2-byte wchar_t, received high, getting low...\n"); | ||
759 | |||
760 | // we got a high surrogate, so we need to read the next one as the low surrogate | ||
761 | if (!_kbhit()) { | ||
762 | lua_pushnil(L); | ||
763 | lua_pushliteral(L, "incomplete surrogate pair"); | ||
764 | return 2; | ||
765 | } | ||
766 | |||
767 | wchar_t wc2 = _getwch(); | ||
768 | // printf("read wchar_t 2: %x\n", wc2); | ||
769 | if (wc2 == WEOF) { | ||
770 | lua_pushnil(L); | ||
771 | lua_pushliteral(L, "read error"); | ||
772 | return 2; | ||
773 | } | ||
774 | |||
775 | if (wc2 < 0xDC00 || wc2 > 0xDFFF) { | ||
776 | lua_pushnil(L); | ||
777 | lua_pushliteral(L, "invalid surrogate pair"); | ||
778 | return 2; | ||
779 | } | ||
780 | // printf("2-byte pair complete now\n"); | ||
781 | wchar_t wch_pair[2] = { wc, wc2 }; | ||
782 | utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, wch_pair, 2, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); | ||
783 | |||
784 | } else { | ||
785 | // printf("2-byte wchar_t, no surrogate pair\n"); | ||
786 | // not a high surrogate, so we can handle just the 2 bytes directly | ||
787 | utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); | ||
788 | } | ||
789 | |||
790 | } else { | ||
791 | // printf("4-byte wchar_t\n"); | ||
792 | // 4 bytes wide, so handle as UTF-32 directly | ||
793 | utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); | ||
794 | } | ||
795 | // printf("utf8_buffer_len: %d\n", utf8_buffer_len); | ||
796 | utf8_buffer_index = 0; | ||
797 | if (utf8_buffer_len <= 0) { | ||
798 | lua_pushnil(L); | ||
799 | lua_pushliteral(L, "UTF-8 conversion error"); | ||
800 | return 2; | ||
801 | } | ||
802 | |||
803 | lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]); | ||
804 | utf8_buffer_index++; | ||
805 | utf8_buffer_len--; | ||
806 | // printf("returning from buffer: %x\n", luaL_checkinteger(L, -1)); | ||
807 | return 1; | ||
718 | 808 | ||
719 | #else | 809 | #else |
810 | // Posix implementation | ||
720 | char ch; | 811 | char ch; |
721 | ssize_t bytes_read = read(STDIN_FILENO, &ch, 1); | 812 | ssize_t bytes_read = read(STDIN_FILENO, &ch, 1); |
722 | if (bytes_read > 0) { | 813 | if (bytes_read > 0) { |
@@ -782,6 +873,205 @@ static int lst_termsize(lua_State *L) { | |||
782 | 873 | ||
783 | 874 | ||
784 | /*------------------------------------------------------------------------- | 875 | /*------------------------------------------------------------------------- |
876 | * utf8 conversion and support | ||
877 | *-------------------------------------------------------------------------*/ | ||
878 | |||
879 | // Function to convert a single UTF-8 character to a Unicode code point (uint32_t) | ||
880 | // To prevent having to do codepage/locale changes, we use a custom implementation | ||
881 | int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { | ||
882 | if (len == 0) { | ||
883 | return -1; // No input provided | ||
884 | } | ||
885 | |||
886 | unsigned char c = (unsigned char)utf8[0]; | ||
887 | if (c <= 0x7F) { | ||
888 | *codepoint = c; | ||
889 | return 1; | ||
890 | } else if ((c & 0xE0) == 0xC0) { | ||
891 | if (len < 2) return -1; // Not enough bytes | ||
892 | *codepoint = ((utf8[0] & 0x1F) << 6) | (utf8[1] & 0x3F); | ||
893 | return 2; | ||
894 | } else if ((c & 0xF0) == 0xE0) { | ||
895 | if (len < 3) return -1; // Not enough bytes | ||
896 | *codepoint = ((utf8[0] & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6) | (utf8[2] & 0x3F); | ||
897 | return 3; | ||
898 | } else if ((c & 0xF8) == 0xF0) { | ||
899 | if (len < 4) return -1; // Not enough bytes | ||
900 | *codepoint = ((utf8[0] & 0x07) << 18) | ((utf8[1] & 0x3F) << 12) | ((utf8[2] & 0x3F) << 6) | (utf8[3] & 0x3F); | ||
901 | return 4; | ||
902 | } else { | ||
903 | // Invalid UTF-8 character | ||
904 | return -1; | ||
905 | } | ||
906 | } | ||
907 | |||
908 | |||
909 | /*** | ||
910 | Get the width of a utf8 character for terminal display. | ||
911 | @function utf8cwidth | ||
912 | @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned | ||
913 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) | ||
914 | @treturn[2] nil | ||
915 | @treturn[2] string error message | ||
916 | */ | ||
917 | int lst_utf8cwidth(lua_State *L) { | ||
918 | const char *utf8_char; | ||
919 | size_t utf8_len; | ||
920 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | ||
921 | int width = 0; | ||
922 | |||
923 | mk_wchar_t wc; | ||
924 | |||
925 | if (utf8_len == 0) { | ||
926 | lua_pushinteger(L, 0); | ||
927 | return 1; | ||
928 | } | ||
929 | |||
930 | // Convert the UTF-8 string to a wide character | ||
931 | int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); | ||
932 | if (bytes_processed == -1) { | ||
933 | lua_pushnil(L); | ||
934 | lua_pushstring(L, "Invalid UTF-8 character"); | ||
935 | return 2; | ||
936 | } | ||
937 | |||
938 | // Get the width of the wide character | ||
939 | width = mk_wcwidth(wc); | ||
940 | if (width == -1) { | ||
941 | lua_pushnil(L); | ||
942 | lua_pushstring(L, "Character width determination failed"); | ||
943 | return 2; | ||
944 | } | ||
945 | |||
946 | lua_pushinteger(L, width); | ||
947 | return 1; | ||
948 | } | ||
949 | |||
950 | |||
951 | |||
952 | |||
953 | /*** | ||
954 | Get the width of a utf8 string for terminal display. | ||
955 | @function utf8swidth | ||
956 | @tparam string utf8_string the utf8 string to check | ||
957 | @treturn[1] int the display width of the string in columns (0 for an empty string) | ||
958 | @treturn[2] nil | ||
959 | @treturn[2] string error message | ||
960 | */ | ||
961 | int lst_utf8swidth(lua_State *L) { | ||
962 | const char *utf8_str; | ||
963 | size_t utf8_len; | ||
964 | utf8_str = luaL_checklstring(L, 1, &utf8_len); | ||
965 | int total_width = 0; | ||
966 | |||
967 | if (utf8_len == 0) { | ||
968 | lua_pushinteger(L, 0); | ||
969 | return 1; | ||
970 | } | ||
971 | |||
972 | int bytes_processed = 0; | ||
973 | size_t i = 0; | ||
974 | mk_wchar_t wc; | ||
975 | |||
976 | while (i < utf8_len) { | ||
977 | bytes_processed = utf8_to_wchar(utf8_str + i, utf8_len - i, &wc); | ||
978 | if (bytes_processed == -1) { | ||
979 | lua_pushnil(L); | ||
980 | lua_pushstring(L, "Invalid UTF-8 character"); | ||
981 | return 2; | ||
982 | } | ||
983 | |||
984 | int width = mk_wcwidth(wc); | ||
985 | if (width == -1) { | ||
986 | lua_pushnil(L); | ||
987 | lua_pushstring(L, "Character width determination failed"); | ||
988 | return 2; | ||
989 | } | ||
990 | |||
991 | total_width += width; | ||
992 | i += bytes_processed; | ||
993 | } | ||
994 | |||
995 | lua_pushinteger(L, total_width); | ||
996 | return 1; | ||
997 | } | ||
998 | |||
999 | |||
1000 | |||
1001 | /*------------------------------------------------------------------------- | ||
1002 | * Windows codepage functions | ||
1003 | *-------------------------------------------------------------------------*/ | ||
1004 | |||
1005 | |||
1006 | /*** | ||
1007 | Gets the current console code page (Windows). | ||
1008 | @function getconsolecp | ||
1009 | @treturn[1] int the current code page (always 65001 on Posix systems) | ||
1010 | */ | ||
1011 | static int lst_getconsolecp(lua_State *L) { | ||
1012 | unsigned int cp = 65001; | ||
1013 | #ifdef _WIN32 | ||
1014 | cp = GetConsoleCP(); | ||
1015 | #endif | ||
1016 | lua_pushinteger(L, cp); | ||
1017 | return 1; | ||
1018 | } | ||
1019 | |||
1020 | |||
1021 | |||
1022 | /*** | ||
1023 | Sets the current console code page (Windows). | ||
1024 | @function setconsolecp | ||
1025 | @tparam int cp the code page to set, use 65001 for UTF-8 | ||
1026 | @treturn[1] bool `true` on success (always `true` on Posix systems) | ||
1027 | */ | ||
1028 | static int lst_setconsolecp(lua_State *L) { | ||
1029 | unsigned int cp = (unsigned int)luaL_checkinteger(L, 1); | ||
1030 | int success = TRUE; | ||
1031 | #ifdef _WIN32 | ||
1032 | SetConsoleCP(cp); | ||
1033 | #endif | ||
1034 | lua_pushboolean(L, success); | ||
1035 | return 1; | ||
1036 | } | ||
1037 | |||
1038 | |||
1039 | |||
1040 | /*** | ||
1041 | Gets the current console output code page (Windows). | ||
1042 | @function getconsoleoutputcp | ||
1043 | @treturn[1] int the current code page (always 65001 on Posix systems) | ||
1044 | */ | ||
1045 | static int lst_getconsoleoutputcp(lua_State *L) { | ||
1046 | unsigned int cp = 65001; | ||
1047 | #ifdef _WIN32 | ||
1048 | cp = GetConsoleOutputCP(); | ||
1049 | #endif | ||
1050 | lua_pushinteger(L, cp); | ||
1051 | return 1; | ||
1052 | } | ||
1053 | |||
1054 | |||
1055 | |||
1056 | /*** | ||
1057 | Sets the current console output code page (Windows). | ||
1058 | @function setconsoleoutputcp | ||
1059 | @tparam int cp the code page to set, use 65001 for UTF-8 | ||
1060 | @treturn[1] bool `true` on success (always `true` on Posix systems) | ||
1061 | */ | ||
1062 | static int lst_setconsoleoutputcp(lua_State *L) { | ||
1063 | unsigned int cp = (unsigned int)luaL_checkinteger(L, 1); | ||
1064 | int success = TRUE; | ||
1065 | #ifdef _WIN32 | ||
1066 | SetConsoleOutputCP(cp); | ||
1067 | #endif | ||
1068 | lua_pushboolean(L, success); | ||
1069 | return 1; | ||
1070 | } | ||
1071 | |||
1072 | |||
1073 | |||
1074 | /*------------------------------------------------------------------------- | ||
785 | * Initializes module | 1075 | * Initializes module |
786 | *-------------------------------------------------------------------------*/ | 1076 | *-------------------------------------------------------------------------*/ |
787 | 1077 | ||
@@ -791,10 +1081,16 @@ static luaL_Reg func[] = { | |||
791 | { "setconsoleflags", lst_setconsoleflags }, | 1081 | { "setconsoleflags", lst_setconsoleflags }, |
792 | { "tcgetattr", lst_tcgetattr }, | 1082 | { "tcgetattr", lst_tcgetattr }, |
793 | { "tcsetattr", lst_tcsetattr }, | 1083 | { "tcsetattr", lst_tcsetattr }, |
794 | { "getnonblock", lst_setnonblock }, | 1084 | { "getnonblock", lst_getnonblock }, |
795 | { "setnonblock", lst_setnonblock }, | 1085 | { "setnonblock", lst_setnonblock }, |
796 | { "readkey", lst_readkey }, | 1086 | { "_readkey", lst_readkey }, |
797 | { "termsize", lst_termsize }, | 1087 | { "termsize", lst_termsize }, |
1088 | { "utf8cwidth", lst_utf8cwidth }, | ||
1089 | { "utf8swidth", lst_utf8swidth }, | ||
1090 | { "getconsolecp", lst_getconsolecp }, | ||
1091 | { "setconsolecp", lst_setconsolecp }, | ||
1092 | { "getconsoleoutputcp", lst_getconsoleoutputcp }, | ||
1093 | { "setconsoleoutputcp", lst_setconsoleoutputcp }, | ||
798 | { NULL, NULL } | 1094 | { NULL, NULL } |
799 | }; | 1095 | }; |
800 | 1096 | ||