aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2024-05-06 11:44:47 +0200
committerThijs Schreijer <thijs@thijsschreijer.nl>2024-05-20 12:43:55 +0200
commitdcd5d62501e61e0f6901d4d4687ab56430a4b8a7 (patch)
tree4501938052c0f62279eaae66c34811d4b5232fa2
parent1d64b5790f26760cb830336ccca9d51474b73ae8 (diff)
downloadluasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.gz
luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.tar.bz2
luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.zip
add example for reading a line from the terminal, non-blocking
Handles utf8, and character width
-rw-r--r--examples/compat.lua5
-rw-r--r--examples/readline.lua476
-rw-r--r--luasystem-scm-0.rockspec1
-rw-r--r--spec/04-term_spec.lua192
-rw-r--r--src/term.c330
-rw-r--r--src/wcwidth.c285
-rw-r--r--src/wcwidth.h21
-rw-r--r--system/init.lua126
8 files changed, 1358 insertions, 78 deletions
diff --git a/examples/compat.lua b/examples/compat.lua
index c00d44a..a59d964 100644
--- a/examples/compat.lua
+++ b/examples/compat.lua
@@ -5,12 +5,15 @@ local sys = require "system"
5 5
6 6
7 7
8if sys.is_windows then 8if sys.windows then
9 -- Windows holds multiple copies of environment variables, to ensure `getenv` 9 -- Windows holds multiple copies of environment variables, to ensure `getenv`
10 -- returns what `setenv` sets we need to use the `system.getenv` instead of 10 -- returns what `setenv` sets we need to use the `system.getenv` instead of
11 -- `os.getenv`. 11 -- `os.getenv`.
12 os.getenv = sys.getenv -- luacheck: ignore 12 os.getenv = sys.getenv -- luacheck: ignore
13 13
14 -- Set console output to UTF-8 encoding.
15 sys.setconsoleoutputcp(65001)
16
14 -- Set up the terminal to handle ANSI escape sequences on Windows. 17 -- Set up the terminal to handle ANSI escape sequences on Windows.
15 if sys.isatty(io.stdout) then 18 if sys.isatty(io.stdout) then
16 sys.setconsoleflags(io.stdout, sys.getconsoleflags(io.stdout) + sys.COF_VIRTUAL_TERMINAL_PROCESSING) 19 sys.setconsoleflags(io.stdout, sys.getconsoleflags(io.stdout) + sys.COF_VIRTUAL_TERMINAL_PROCESSING)
diff --git a/examples/readline.lua b/examples/readline.lua
new file mode 100644
index 0000000..f1e6258
--- /dev/null
+++ b/examples/readline.lua
@@ -0,0 +1,476 @@
1local sys = require("system")
2
3
4-- Mapping of key-sequences to key-names
5local key_names = {
6 ["\27[C"] = "right",
7 ["\27[D"] = "left",
8 ["\127"] = "backspace",
9 ["\27[3~"] = "delete",
10 ["\27[H"] = "home",
11 ["\27[F"] = "end",
12 ["\27"] = "escape",
13 ["\9"] = "tab",
14 ["\27[Z"] = "shift-tab",
15}
16
17if sys.windows then
18 key_names["\13"] = "enter"
19else
20 key_names["\10"] = "enter"
21end
22
23
24-- Mapping of key-names to key-sequences
25local key_sequences = {}
26for k, v in pairs(key_names) do
27 key_sequences[v] = k
28end
29
30
31-- bell character
32local function bell()
33 io.write("\7")
34 io.flush()
35end
36
37
38-- generate string to move cursor horizontally
39-- positive goes right, negative goes left
40local function cursor_move_horiz(n)
41 if n == 0 then
42 return ""
43 end
44 return "\27[" .. (n > 0 and n or -n) .. (n > 0 and "C" or "D")
45end
46
47
48-- -- generate string to move cursor vertically
49-- -- positive goes down, negative goes up
50-- local function cursor_move_vert(n)
51-- if n == 0 then
52-- return ""
53-- end
54-- return "\27[" .. (n > 0 and n or -n) .. (n > 0 and "B" or "A")
55-- end
56
57
58-- -- log to the line above the current line
59-- local function log(...)
60-- local arg = { n = select("#", ...), ...}
61-- for i = 1, arg.n do
62-- arg[i] = tostring(arg[i])
63-- end
64-- arg = " " .. table.concat(arg, " ") .. " "
65
66-- io.write(cursor_move_vert(-1), arg, cursor_move_vert(1), cursor_move_horiz(-#arg))
67-- end
68
69
70-- UTF8 character size in bytes
71-- @tparam number b the byte value of the first byte of a UTF8 character
72local function utf8size(b)
73 return b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or b < 248 and 4
74end
75
76
77
78local utf8parse do
79 local utf8_value_mt = {
80 __tostring = function(self)
81 return table.concat(self, "")
82 end,
83 }
84
85 -- Parses a UTF8 string into list of individual characters.
86 -- key 'chars' gets the length in UTF8 characters, whilst # returns the length
87 -- for display (to handle double-width UTF8 chars).
88 -- in the list the double-width characters are followed by an empty string.
89 -- @tparam string s the UTF8 string to parse
90 -- @treturn table the list of characters
91 function utf8parse(s)
92 local t = setmetatable({ chars = 0 }, utf8_value_mt)
93 local i = 1
94 while i <= #s do
95 local b = s:byte(i)
96 local w = utf8size(b)
97 local char = s:sub(i, i + w - 1)
98 t[#t + 1] = char
99 t.chars = t.chars + 1
100 if sys.utf8cwidth(char) == 2 then
101 -- double width character, add empty string to keep the length of the
102 -- list the same as the character width on screen
103 t[#t + 1] = ""
104 end
105 i = i + w
106 end
107 return t
108 end
109end
110
111
112
113-- inline tests for utf8parse
114-- do
115-- local t = utf8parse("a你b好c")
116-- assert(t[1] == "a")
117-- assert(t[2] == "你") -- double width
118-- assert(t[3] == "")
119-- assert(t[4] == "b")
120-- assert(t[5] == "好") -- double width
121-- assert(t[6] == "")
122-- assert(t[7] == "c")
123-- assert(#t == 7) -- size as displayed
124-- end
125
126
127
128-- readline class
129
130local readline = {}
131readline.__index = readline
132
133
134--- Create a new readline object.
135-- @tparam table opts the options for the readline object
136-- @tparam[opt=""] string opts.prompt the prompt to display
137-- @tparam[opt=80] number opts.max_length the maximum length of the input
138-- @tparam[opt=""] string opts.value the default value
139-- @tparam[opt=`#value`] number opts.position of the cursor in the input
140-- @tparam[opt={"\10"/"\13"}] table opts.exit_keys an array of keys that will cause the readline to exit
141-- @treturn readline the new readline object
142function readline.new(opts)
143 local value = utf8parse(opts.value or "")
144 local prompt = utf8parse(opts.prompt or "")
145 local pos = math.floor(opts.position or (#value + 1))
146 pos = math.max(math.min(pos, (#value + 1)), 1)
147 local len = math.floor(opts.max_length or 80)
148 if len < 1 then
149 error("max_length must be at least 1", 2)
150 end
151
152 if value.chars > len then
153 error("value is longer than max_length", 2)
154 end
155
156 local exit_keys = {}
157 for _, key in ipairs(opts.exit_keys or {}) do
158 exit_keys[key] = true
159 end
160 if exit_keys[1] == nil then
161 -- nothing provided, default to Enter-key
162 exit_keys[1] = key_sequences.enter
163 end
164
165 local self = {
166 value = value, -- the default value
167 max_length = len, -- the maximum length of the input
168 prompt = prompt, -- the prompt to display
169 position = pos, -- the current position in the input
170 drawn_before = false, -- if the prompt has been drawn
171 exit_keys = exit_keys, -- the keys that will cause the readline to exit
172 }
173
174 setmetatable(self, readline)
175 return self
176end
177
178
179
180-- draw the prompt and the input value, and position the cursor.
181local function draw(self, redraw)
182 if redraw or not self.drawn_before then
183 -- we are at start of prompt
184 self.drawn_before = true
185 else
186 -- we are at current cursor position, move to start of prompt
187 io.write(cursor_move_horiz(-(#self.prompt + self.position)))
188 end
189 -- write prompt & value
190 io.write(tostring(self.prompt) .. tostring(self.value))
191 -- clear remainder of input size
192 io.write(string.rep(" ", self.max_length - self.value.chars))
193 io.write(cursor_move_horiz(-(self.max_length - self.value.chars)))
194 -- move to cursor position
195 io.write(cursor_move_horiz(-(#self.value + 1 - self.position)))
196 io.flush()
197end
198
199
200local handle_key do -- keyboard input handler
201
202 local key_handlers
203 key_handlers = {
204 left = function(self)
205 if self.position == 1 then
206 bell()
207 return
208 end
209
210 local new_pos = self.position - 1
211 while self.value[new_pos] == "" do -- skip empty strings; double width chars
212 new_pos = new_pos - 1
213 end
214
215 io.write(cursor_move_horiz(-(self.position - new_pos)))
216 io.flush()
217 self.position = new_pos
218 end,
219
220 right = function(self)
221 if self.position == #self.value + 1 then
222 bell()
223 return
224 end
225
226 local new_pos = self.position + 1
227 while self.value[new_pos] == "" do -- skip empty strings; double width chars
228 new_pos = new_pos + 1
229 end
230
231 io.write(cursor_move_horiz(new_pos - self.position))
232 io.flush()
233 self.position = new_pos
234 end,
235
236 backspace = function(self)
237 if self.position == 1 then
238 bell()
239 return
240 end
241
242 while self.value[self.position - 1] == "" do -- remove empty strings; double width chars
243 io.write(cursor_move_horiz(-1))
244 self.position = self.position - 1
245 table.remove(self.value, self.position)
246 end
247 -- remove char itself
248 io.write(cursor_move_horiz(-1))
249 self.position = self.position - 1
250 table.remove(self.value, self.position)
251 self.value.chars = self.value.chars - 1
252 draw(self)
253 end,
254
255 home = function(self)
256 local new_pos = 1
257 io.write(cursor_move_horiz(new_pos - self.position))
258 self.position = new_pos
259 end,
260
261 ["end"] = function(self)
262 local new_pos = #self.value + 1
263 io.write(cursor_move_horiz(new_pos - self.position))
264 self.position = new_pos
265 end,
266
267 delete = function(self)
268 if self.position > #self.value then
269 bell()
270 return
271 end
272
273 key_handlers.right(self)
274 key_handlers.backspace(self)
275 end,
276 }
277
278
279 -- handles a single input key/ansi-sequence.
280 -- @tparam string key the key or ansi-sequence (from `system.readansi`)
281 -- @tparam string keytype the type of the key, either "char" or "ansi" (from `system.readansi`)
282 -- @treturn string status the status of the key handling, either "ok", "exit_key" or an error message
283 function handle_key(self, key, keytype)
284 if self.exit_keys[key] then
285 -- registered exit key
286 return "exit_key"
287 end
288
289 local handler = key_handlers[key_names[key] or true ]
290 if handler then
291 handler(self)
292 return "ok"
293 end
294
295 if keytype == "ansi" then
296 -- we got an ansi sequence, but dunno how to handle it, ignore
297 -- print("unhandled ansi: ", key:sub(2,-1), string.byte(key, 1, -1))
298 bell()
299 return "ok"
300 end
301
302 -- just a single key
303 if key < " " then
304 -- control character
305 bell()
306 return "ok"
307 end
308
309 if self.value.chars >= self.max_length then
310 bell()
311 return "ok"
312 end
313
314 -- insert the key into the value
315 if sys.utf8cwidth(key) == 2 then
316 -- double width character, insert empty string after it
317 table.insert(self.value, self.position, "")
318 table.insert(self.value, self.position, key)
319 self.position = self.position + 2
320 io.write(cursor_move_horiz(2))
321 else
322 table.insert(self.value, self.position, key)
323 self.position = self.position + 1
324 io.write(cursor_move_horiz(1))
325 end
326 self.value.chars = self.value.chars + 1
327 draw(self)
328 return "ok"
329 end
330end
331
332
333
334--- Get_size returns the maximum size of the input box (prompt + input).
335-- The size is in rows and columns. Columns is determined by
336-- the prompt and the `max_length * 2` (characters can be double-width).
337-- @treturn number the number of rows (always 1)
338-- @treturn number the number of columns
339function readline:get_size()
340 return 1, #self.prompt + self.max_length * 2
341end
342
343
344
345--- Get coordinates of the cursor in the input box (prompt + input).
346-- The coordinates are 1-based. They are returned as row and column, within the
347-- size as reported by `get_size`.
348-- @treturn number the row of the cursor (always 1)
349-- @treturn number the column of the cursor
350function readline:get_cursor()
351 return 1, #self.prompt + self.position
352end
353
354
355
356--- Set the coordinates of the cursor in the input box (prompt + input).
357-- The coordinates are 1-based. They are expected to be within the
358-- size as reported by `get_size`, and beyond the prompt.
359-- If the position is invalid, it will be corrected.
360-- Use the results to check if the position was adjusted.
361-- @tparam number row the row of the cursor (always 1)
362-- @tparam number col the column of the cursor
363-- @return results of get_cursor
364function readline:set_cursor(row, col)
365 local l_prompt = #self.prompt
366 local l_value = #self.value
367
368 if col < l_prompt + 1 then
369 col = l_prompt + 1
370 elseif col > l_prompt + l_value + 1 then
371 col = l_prompt + l_value + 1
372 end
373
374 while self.value[col - l_prompt] == "" do
375 col = col - 1 -- on an empty string, so move back to start of double-width char
376 end
377
378 local new_pos = col - l_prompt
379
380 cursor_move_horiz(self.position - new_pos)
381 io.flush()
382
383 self.position = new_pos
384 return self:get_cursor()
385end
386
387
388
389--- Read a line of input from the user.
390-- It will first print the `prompt` and then wait for input. Ensure the cursor
391-- is at the correct position before calling this function. This function will
392-- do all cursor movements in a relative way.
393-- Can be called again after an exit-key or timeout has occurred. Just make sure
394-- the cursor is at the same position where is was when it returned the last time.
395-- Alternatively the cursor can be set to the position of the prompt (the position
396-- the cursor was in before the first call), and the parameter `redraw` can be set
397-- to `true`.
398-- @tparam[opt=math.huge] number timeout the maximum time to wait for input in seconds
399-- @tparam[opt=false] boolean redraw if `true` the prompt will be redrawn (cursor must be at prompt position!)
400-- @treturn[1] string the input string as entered the user
401-- @treturn[1] string the exit-key used to exit the readline (see `new`)
402-- @treturn[2] nil when input is incomplete
403-- @treturn[2] string error message, the reason why the input is incomplete, `"timeout"`, or an error reading a key
404function readline:__call(timeout, redraw)
405 draw(self, redraw)
406 timeout = timeout or math.huge
407 local timeout_end = sys.gettime() + timeout
408
409 while true do
410 local key, keytype = sys.readansi(timeout_end - sys.gettime())
411 if not key then
412 -- error or timeout
413 return nil, keytype
414 end
415
416 local status = handle_key(self, key, keytype)
417 if status == "exit_key" then
418 return tostring(self.value), key
419
420 elseif status ~= "ok" then
421 error("unknown status received: " .. tostring(status))
422 end
423 end
424end
425
426
427
428-- return readline
429
430
431
432
433-- setup Windows console to handle ANSI processing
434local of_in = sys.getconsoleflags(io.stdin)
435local cp_in = sys.getconsolecp()
436-- sys.setconsolecp(65001)
437sys.setconsolecp(850)
438local of_out = sys.getconsoleflags(io.stdout)
439local cp_out = sys.getconsoleoutputcp()
440sys.setconsoleoutputcp(65001)
441sys.setconsoleflags(io.stdout, sys.getconsoleflags(io.stdout) + sys.COF_VIRTUAL_TERMINAL_PROCESSING)
442sys.setconsoleflags(io.stdin, sys.getconsoleflags(io.stdin) + sys.CIF_VIRTUAL_TERMINAL_INPUT)
443
444-- setup Posix terminal to use non-blocking mode, and disable line-mode
445local of_attr = sys.tcgetattr(io.stdin)
446local of_block = sys.getnonblock(io.stdin)
447sys.setnonblock(io.stdin, true)
448sys.tcsetattr(io.stdin, sys.TCSANOW, {
449 lflag = of_attr.lflag - sys.L_ICANON - sys.L_ECHO, -- disable canonical mode and echo
450})
451
452
453local rl = readline.new{
454 prompt = "Enter something: ",
455 max_length = 60,
456 value = "Hello, 你-好 World 🚀!",
457 -- position = 2,
458 exit_keys = {key_sequences.enter, "\27", "\t", "\27[Z"}, -- enter, escape, tab, shift-tab
459}
460
461
462local result, key = rl()
463print("") -- newline after input, to move cursor down from the input line
464print("Result (string): '" .. result .. "'")
465print("Result (bytes):", result:byte(1,-1))
466print("Exit-Key (bytes):", key:byte(1,-1))
467
468
469-- Clean up afterwards
470sys.setnonblock(io.stdin, false)
471sys.setconsoleflags(io.stdout, of_out)
472sys.setconsoleflags(io.stdin, of_in)
473sys.tcsetattr(io.stdin, sys.TCSANOW, of_attr)
474sys.setnonblock(io.stdin, of_block)
475sys.setconsolecp(cp_in)
476sys.setconsoleoutputcp(cp_out)
diff --git a/luasystem-scm-0.rockspec b/luasystem-scm-0.rockspec
index dac3d9b..00a442c 100644
--- a/luasystem-scm-0.rockspec
+++ b/luasystem-scm-0.rockspec
@@ -60,6 +60,7 @@ local function make_platform(plat)
60 'src/random.c', 60 'src/random.c',
61 'src/term.c', 61 'src/term.c',
62 'src/bitflags.c', 62 'src/bitflags.c',
63 'src/wcwidth.c',
63 }, 64 },
64 defines = defines[plat], 65 defines = defines[plat],
65 libraries = libraries[plat], 66 libraries = libraries[plat],
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua
index 9ca37e9..ee4145a 100644
--- a/spec/04-term_spec.lua
+++ b/spec/04-term_spec.lua
@@ -4,6 +4,19 @@ require("spec.helpers")
4 4
5describe("Terminal:", function() 5describe("Terminal:", function()
6 6
7 local wincodepage
8
9 setup(function()
10 wincodepage = system.getconsoleoutputcp()
11 assert(system.setconsoleoutputcp(65001))
12 end)
13
14 teardown(function()
15 assert(system.setconsoleoutputcp(wincodepage))
16 end)
17
18
19
7 describe("isatty()", function() 20 describe("isatty()", function()
8 21
9 local newtmpfile = require("pl.path").tmpname 22 local newtmpfile = require("pl.path").tmpname
@@ -93,7 +106,7 @@ describe("Terminal:", function()
93 106
94 107
95 108
96 describe("getconsoleflags()", function() 109 pending("getconsoleflags()", function()
97 110
98 pending("returns the consoleflags, if called without flags", function() 111 pending("returns the consoleflags, if called without flags", function()
99print"1" 112print"1"
@@ -111,4 +124,181 @@ for k,v in pairs(debug.getinfo(system.isatty)) do print(k,v) end
111 end) 124 end)
112 125
113 end) 126 end)
127
128
129
130 pending("setconsoleflags()", function()
131
132 pending("sets the consoleflags, if called with flags", function()
133 end)
134
135 end)
136
137
138
139 pending("tcgetattr()", function()
140
141 pending("sets the consoleflags, if called with flags", function()
142 end)
143
144 end)
145
146
147
148 pending("tcsetattr()", function()
149
150 pending("sets the consoleflags, if called with flags", function()
151 end)
152
153 end)
154
155
156
157 pending("getconsolecp()", function()
158
159 pending("sets the consoleflags, if called with flags", function()
160 end)
161
162 end)
163
164
165
166 pending("setconsolecp()", function()
167
168 pending("sets the consoleflags, if called with flags", function()
169 end)
170
171 end)
172
173
174
175 pending("getconsoleoutputcp()", function()
176
177 pending("sets the consoleflags, if called with flags", function()
178 end)
179
180 end)
181
182
183
184 pending("setconsoleoutputcp()", function()
185
186 pending("sets the consoleflags, if called with flags", function()
187 end)
188
189 end)
190
191
192
193 pending("getnonblock()", function()
194
195 pending("sets the consoleflags, if called with flags", function()
196 end)
197
198 end)
199
200
201
202 pending("setnonblock()", function()
203
204 pending("sets the consoleflags, if called with flags", function()
205 end)
206
207 end)
208
209
210
211 pending("termsize()", function()
212
213 pending("sets the consoleflags, if called with flags", function()
214 end)
215
216 end)
217
218
219
220 describe("utf8cwidth()", function()
221
222 local ch1 = string.char(226, 130, 172) -- "€" single
223 local ch2 = string.char(240, 159, 154, 128) -- "🚀" double
224 local ch3 = string.char(228, 189, 160) -- "你" double
225 local ch4 = string.char(229, 165, 189) -- "好" double
226
227 it("handles zero width characters", function()
228 assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size
229 assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character
230 assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\27")}) -- escape character
231 end)
232
233 it("handles single width characters", function()
234 assert.same({1}, {system.utf8cwidth("a")})
235 assert.same({1}, {system.utf8cwidth(ch1)})
236 end)
237
238 it("handles double width characters", function()
239 assert.same({2}, {system.utf8cwidth(ch2)})
240 assert.same({2}, {system.utf8cwidth(ch3)})
241 assert.same({2}, {system.utf8cwidth(ch4)})
242 end)
243
244 it("returns the width of the first character in the string", function()
245 assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO
246 assert.same({1}, {system.utf8cwidth(ch1 .. ch2)})
247 assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)})
248 end)
249
250 end)
251
252
253
254 describe("utf8swidth()", function()
255
256 local ch1 = string.char(226, 130, 172) -- "€" single
257 local ch2 = string.char(240, 159, 154, 128) -- "🚀" double
258 local ch3 = string.char(228, 189, 160) -- "你" double
259 local ch4 = string.char(229, 165, 189) -- "好" double
260
261 it("handles zero width characters", function()
262 assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size
263 assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\a")}) -- bell character
264 assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\27")}) -- escape character
265 end)
266
267 it("handles multi-character UTF8 strings", function()
268 assert.same({15}, {system.utf8swidth("hello " .. ch1 .. ch2 .. " world")})
269 assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")})
270 end)
271
272 end)
273
274
275
276 pending("termbackup()", function()
277
278 end)
279
280
281
282 pending("termrestore()", function()
283
284 end)
285
286
287
288 pending("termwrap()", function()
289
290 end)
291
292
293
294 pending("readkey()", function()
295
296 end)
297
298
299
300 pending("readansi()", function()
301
302 end)
303
114end) 304end)
diff --git a/src/term.c b/src/term.c
index f73d23f..e557a11 100644
--- a/src/term.c
+++ b/src/term.c
@@ -15,6 +15,7 @@
15 15
16#ifdef _WIN32 16#ifdef _WIN32
17# include <windows.h> 17# include <windows.h>
18# include <locale.h>
18#else 19#else
19# include <termios.h> 20# include <termios.h>
20# include <string.h> 21# include <string.h>
@@ -22,8 +23,16 @@
22# include <fcntl.h> 23# include <fcntl.h>
23# include <sys/ioctl.h> 24# include <sys/ioctl.h>
24# include <unistd.h> 25# include <unistd.h>
26# include <wchar.h>
27# include <locale.h>
25#endif 28#endif
26 29
30
31// Windows does not have a wcwidth function, so we use compatibilty code from
32// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn
33#include "wcwidth.h"
34
35
27#ifdef _WIN32 36#ifdef _WIN32
28// after an error is returned, GetLastError() result can be passed to this function to get a string 37// after an error is returned, GetLastError() result can be passed to this function to get a string
29// representation of the error on the stack. 38// representation of the error on the stack.
@@ -423,7 +432,7 @@ static int lst_getconsoleflags(lua_State *L)
423// see https://github.com/luaposix/luaposix 432// see https://github.com/luaposix/luaposix
424 433
425/*** 434/***
426Get termios state. 435Get termios state (Posix).
427The terminal attributes is a table with the following fields: 436The terminal attributes is a table with the following fields:
428 437
429- `iflag` input flags 438- `iflag` input flags
@@ -511,7 +520,7 @@ static int lst_tcgetattr(lua_State *L)
511 520
512 521
513/*** 522/***
514Set termios state. 523Set termios state (Posix).
515This function will set the flags as given. 524This function will set the flags as given.
516 525
517The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective 526The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective
@@ -689,13 +698,28 @@ static int lst_getnonblock(lua_State *L)
689 * Reading keyboard input 698 * Reading keyboard input
690 *-------------------------------------------------------------------------*/ 699 *-------------------------------------------------------------------------*/
691 700
701#ifdef _WIN32
702// Define a static buffer for UTF-8 characters
703static char utf8_buffer[4];
704static int utf8_buffer_len = 0;
705static int utf8_buffer_index = 0;
706#endif
707
708
692/*** 709/***
693Reads a key from the console non-blocking. 710Reads a key from the console non-blocking. This function should not be called
711directly, but through the `system.readkey` or `system.readansi` functions. It
712will return the next byte from the input stream, or `nil` if no key was pressed.
713
694On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock` 714On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock`
695before calling this function. Otherwise it will block. 715before calling this function. Otherwise it will block. No conversions are
716done on Posix, so the byte read is returned as-is.
696 717
697@function readkey 718On Windows this reads a wide character and converts it to UTF-8. Multi-byte
698@treturn[1] integer the key code of the key that was pressed 719sequences will be buffered internally and returned one byte at a time.
720
721@function _readkey
722@treturn[1] integer the byte read from the input stream
699@treturn[2] nil if no key was pressed 723@treturn[2] nil if no key was pressed
700@treturn[3] nil on error 724@treturn[3] nil on error
701@treturn[3] string error message 725@treturn[3] string error message
@@ -703,20 +727,87 @@ before calling this function. Otherwise it will block.
703*/ 727*/
704static int lst_readkey(lua_State *L) { 728static int lst_readkey(lua_State *L) {
705#ifdef _WIN32 729#ifdef _WIN32
706 if (_kbhit()) { 730 if (utf8_buffer_len > 0) {
707 int ch = _getch(); 731 // Buffer not empty, return the next byte
708 if (ch == EOF) { 732 lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
709 // Error handling for end-of-file or read error 733 utf8_buffer_index++;
710 lua_pushnil(L); 734 utf8_buffer_len--;
711 lua_pushliteral(L, "_getch error"); 735 // printf("returning from buffer: %d\n", luaL_checkinteger(L, -1));
712 return 2; 736 if (utf8_buffer_len == 0) {
737 utf8_buffer_index = 0;
713 } 738 }
714 lua_pushinteger(L, (unsigned char)ch);
715 return 1; 739 return 1;
716 } 740 }
717 return 0; 741
742 if (!_kbhit()) {
743 return 0;
744 }
745
746 wchar_t wc = _getwch();
747 // printf("----\nread wchar_t: %x\n", wc);
748 if (wc == WEOF) {
749 lua_pushnil(L);
750 lua_pushliteral(L, "read error");
751 return 2;
752 }
753
754 if (sizeof(wchar_t) == 2) {
755 // printf("2-byte wchar_t\n");
756 // only 2 bytes wide, not 4
757 if (wc >= 0xD800 && wc <= 0xDBFF) {
758 // printf("2-byte wchar_t, received high, getting low...\n");
759
760 // we got a high surrogate, so we need to read the next one as the low surrogate
761 if (!_kbhit()) {
762 lua_pushnil(L);
763 lua_pushliteral(L, "incomplete surrogate pair");
764 return 2;
765 }
766
767 wchar_t wc2 = _getwch();
768 // printf("read wchar_t 2: %x\n", wc2);
769 if (wc2 == WEOF) {
770 lua_pushnil(L);
771 lua_pushliteral(L, "read error");
772 return 2;
773 }
774
775 if (wc2 < 0xDC00 || wc2 > 0xDFFF) {
776 lua_pushnil(L);
777 lua_pushliteral(L, "invalid surrogate pair");
778 return 2;
779 }
780 // printf("2-byte pair complete now\n");
781 wchar_t wch_pair[2] = { wc, wc2 };
782 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, wch_pair, 2, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
783
784 } else {
785 // printf("2-byte wchar_t, no surrogate pair\n");
786 // not a high surrogate, so we can handle just the 2 bytes directly
787 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
788 }
789
790 } else {
791 // printf("4-byte wchar_t\n");
792 // 4 bytes wide, so handle as UTF-32 directly
793 utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
794 }
795 // printf("utf8_buffer_len: %d\n", utf8_buffer_len);
796 utf8_buffer_index = 0;
797 if (utf8_buffer_len <= 0) {
798 lua_pushnil(L);
799 lua_pushliteral(L, "UTF-8 conversion error");
800 return 2;
801 }
802
803 lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
804 utf8_buffer_index++;
805 utf8_buffer_len--;
806 // printf("returning from buffer: %x\n", luaL_checkinteger(L, -1));
807 return 1;
718 808
719#else 809#else
810 // Posix implementation
720 char ch; 811 char ch;
721 ssize_t bytes_read = read(STDIN_FILENO, &ch, 1); 812 ssize_t bytes_read = read(STDIN_FILENO, &ch, 1);
722 if (bytes_read > 0) { 813 if (bytes_read > 0) {
@@ -782,6 +873,205 @@ static int lst_termsize(lua_State *L) {
782 873
783 874
784/*------------------------------------------------------------------------- 875/*-------------------------------------------------------------------------
876 * utf8 conversion and support
877 *-------------------------------------------------------------------------*/
878
879// Function to convert a single UTF-8 character to a Unicode code point (uint32_t)
880// To prevent having to do codepage/locale changes, we use a custom implementation
881int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) {
882 if (len == 0) {
883 return -1; // No input provided
884 }
885
886 unsigned char c = (unsigned char)utf8[0];
887 if (c <= 0x7F) {
888 *codepoint = c;
889 return 1;
890 } else if ((c & 0xE0) == 0xC0) {
891 if (len < 2) return -1; // Not enough bytes
892 *codepoint = ((utf8[0] & 0x1F) << 6) | (utf8[1] & 0x3F);
893 return 2;
894 } else if ((c & 0xF0) == 0xE0) {
895 if (len < 3) return -1; // Not enough bytes
896 *codepoint = ((utf8[0] & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6) | (utf8[2] & 0x3F);
897 return 3;
898 } else if ((c & 0xF8) == 0xF0) {
899 if (len < 4) return -1; // Not enough bytes
900 *codepoint = ((utf8[0] & 0x07) << 18) | ((utf8[1] & 0x3F) << 12) | ((utf8[2] & 0x3F) << 6) | (utf8[3] & 0x3F);
901 return 4;
902 } else {
903 // Invalid UTF-8 character
904 return -1;
905 }
906}
907
908
909/***
910Get the width of a utf8 character for terminal display.
911@function utf8cwidth
912@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned
913@treturn[1] int the display width in columns of the first character in the string (0 for an empty string)
914@treturn[2] nil
915@treturn[2] string error message
916*/
917int lst_utf8cwidth(lua_State *L) {
918 const char *utf8_char;
919 size_t utf8_len;
920 utf8_char = luaL_checklstring(L, 1, &utf8_len);
921 int width = 0;
922
923 mk_wchar_t wc;
924
925 if (utf8_len == 0) {
926 lua_pushinteger(L, 0);
927 return 1;
928 }
929
930 // Convert the UTF-8 string to a wide character
931 int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc);
932 if (bytes_processed == -1) {
933 lua_pushnil(L);
934 lua_pushstring(L, "Invalid UTF-8 character");
935 return 2;
936 }
937
938 // Get the width of the wide character
939 width = mk_wcwidth(wc);
940 if (width == -1) {
941 lua_pushnil(L);
942 lua_pushstring(L, "Character width determination failed");
943 return 2;
944 }
945
946 lua_pushinteger(L, width);
947 return 1;
948}
949
950
951
952
953/***
954Get the width of a utf8 string for terminal display.
955@function utf8swidth
956@tparam string utf8_string the utf8 string to check
957@treturn[1] int the display width of the string in columns (0 for an empty string)
958@treturn[2] nil
959@treturn[2] string error message
960*/
961int lst_utf8swidth(lua_State *L) {
962 const char *utf8_str;
963 size_t utf8_len;
964 utf8_str = luaL_checklstring(L, 1, &utf8_len);
965 int total_width = 0;
966
967 if (utf8_len == 0) {
968 lua_pushinteger(L, 0);
969 return 1;
970 }
971
972 int bytes_processed = 0;
973 size_t i = 0;
974 mk_wchar_t wc;
975
976 while (i < utf8_len) {
977 bytes_processed = utf8_to_wchar(utf8_str + i, utf8_len - i, &wc);
978 if (bytes_processed == -1) {
979 lua_pushnil(L);
980 lua_pushstring(L, "Invalid UTF-8 character");
981 return 2;
982 }
983
984 int width = mk_wcwidth(wc);
985 if (width == -1) {
986 lua_pushnil(L);
987 lua_pushstring(L, "Character width determination failed");
988 return 2;
989 }
990
991 total_width += width;
992 i += bytes_processed;
993 }
994
995 lua_pushinteger(L, total_width);
996 return 1;
997}
998
999
1000
1001/*-------------------------------------------------------------------------
1002 * Windows codepage functions
1003 *-------------------------------------------------------------------------*/
1004
1005
1006/***
1007Gets the current console code page (Windows).
1008@function getconsolecp
1009@treturn[1] int the current code page (always 65001 on Posix systems)
1010*/
1011static int lst_getconsolecp(lua_State *L) {
1012 unsigned int cp = 65001;
1013#ifdef _WIN32
1014 cp = GetConsoleCP();
1015#endif
1016 lua_pushinteger(L, cp);
1017 return 1;
1018}
1019
1020
1021
1022/***
1023Sets the current console code page (Windows).
1024@function setconsolecp
1025@tparam int cp the code page to set, use 65001 for UTF-8
1026@treturn[1] bool `true` on success (always `true` on Posix systems)
1027*/
1028static int lst_setconsolecp(lua_State *L) {
1029 unsigned int cp = (unsigned int)luaL_checkinteger(L, 1);
1030 int success = TRUE;
1031#ifdef _WIN32
1032 SetConsoleCP(cp);
1033#endif
1034 lua_pushboolean(L, success);
1035 return 1;
1036}
1037
1038
1039
1040/***
1041Gets the current console output code page (Windows).
1042@function getconsoleoutputcp
1043@treturn[1] int the current code page (always 65001 on Posix systems)
1044*/
1045static int lst_getconsoleoutputcp(lua_State *L) {
1046 unsigned int cp = 65001;
1047#ifdef _WIN32
1048 cp = GetConsoleOutputCP();
1049#endif
1050 lua_pushinteger(L, cp);
1051 return 1;
1052}
1053
1054
1055
1056/***
1057Sets the current console output code page (Windows).
1058@function setconsoleoutputcp
1059@tparam int cp the code page to set, use 65001 for UTF-8
1060@treturn[1] bool `true` on success (always `true` on Posix systems)
1061*/
1062static int lst_setconsoleoutputcp(lua_State *L) {
1063 unsigned int cp = (unsigned int)luaL_checkinteger(L, 1);
1064 int success = TRUE;
1065#ifdef _WIN32
1066 SetConsoleOutputCP(cp);
1067#endif
1068 lua_pushboolean(L, success);
1069 return 1;
1070}
1071
1072
1073
1074/*-------------------------------------------------------------------------
785 * Initializes module 1075 * Initializes module
786 *-------------------------------------------------------------------------*/ 1076 *-------------------------------------------------------------------------*/
787 1077
@@ -791,10 +1081,16 @@ static luaL_Reg func[] = {
791 { "setconsoleflags", lst_setconsoleflags }, 1081 { "setconsoleflags", lst_setconsoleflags },
792 { "tcgetattr", lst_tcgetattr }, 1082 { "tcgetattr", lst_tcgetattr },
793 { "tcsetattr", lst_tcsetattr }, 1083 { "tcsetattr", lst_tcsetattr },
794 { "getnonblock", lst_setnonblock }, 1084 { "getnonblock", lst_getnonblock },
795 { "setnonblock", lst_setnonblock }, 1085 { "setnonblock", lst_setnonblock },
796 { "readkey", lst_readkey }, 1086 { "_readkey", lst_readkey },
797 { "termsize", lst_termsize }, 1087 { "termsize", lst_termsize },
1088 { "utf8cwidth", lst_utf8cwidth },
1089 { "utf8swidth", lst_utf8swidth },
1090 { "getconsolecp", lst_getconsolecp },
1091 { "setconsolecp", lst_setconsolecp },
1092 { "getconsoleoutputcp", lst_getconsoleoutputcp },
1093 { "setconsoleoutputcp", lst_setconsoleoutputcp },
798 { NULL, NULL } 1094 { NULL, NULL }
799}; 1095};
800 1096
diff --git a/src/wcwidth.c b/src/wcwidth.c
new file mode 100644
index 0000000..6032158
--- /dev/null
+++ b/src/wcwidth.c
@@ -0,0 +1,285 @@
1// This file was modified from the original versions, check "modified:" comments for details
2// Character range updates (both the table and the +1 check) were generated using ChatGPT.
3
4/*
5 * This is an implementation of wcwidth() and wcswidth() (defined in
6 * IEEE Std 1002.1-2001) for Unicode.
7 *
8 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
9 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
10 *
11 * In fixed-width output devices, Latin characters all occupy a single
12 * "cell" position of equal width, whereas ideographic CJK characters
13 * occupy two such cells. Interoperability between terminal-line
14 * applications and (teletype-style) character terminals using the
15 * UTF-8 encoding requires agreement on which character should advance
16 * the cursor by how many cell positions. No established formal
17 * standards exist at present on which Unicode character shall occupy
18 * how many cell positions on character terminals. These routines are
19 * a first attempt of defining such behavior based on simple rules
20 * applied to data provided by the Unicode Consortium.
21 *
22 * For some graphical characters, the Unicode standard explicitly
23 * defines a character-cell width via the definition of the East Asian
24 * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
25 * In all these cases, there is no ambiguity about which width a
26 * terminal shall use. For characters in the East Asian Ambiguous (A)
27 * class, the width choice depends purely on a preference of backward
28 * compatibility with either historic CJK or Western practice.
29 * Choosing single-width for these characters is easy to justify as
30 * the appropriate long-term solution, as the CJK practice of
31 * displaying these characters as double-width comes from historic
32 * implementation simplicity (8-bit encoded characters were displayed
33 * single-width and 16-bit ones double-width, even for Greek,
34 * Cyrillic, etc.) and not any typographic considerations.
35 *
36 * Much less clear is the choice of width for the Not East Asian
37 * (Neutral) class. Existing practice does not dictate a width for any
38 * of these characters. It would nevertheless make sense
39 * typographically to allocate two character cells to characters such
40 * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
41 * represented adequately with a single-width glyph. The following
42 * routines at present merely assign a single-cell width to all
43 * neutral characters, in the interest of simplicity. This is not
44 * entirely satisfactory and should be reconsidered before
45 * establishing a formal standard in this area. At the moment, the
46 * decision which Not East Asian (Neutral) characters should be
47 * represented by double-width glyphs cannot yet be answered by
48 * applying a simple rule from the Unicode database content. Setting
49 * up a proper standard for the behavior of UTF-8 character terminals
50 * will require a careful analysis not only of each Unicode character,
51 * but also of each presentation form, something the author of these
52 * routines has avoided to do so far.
53 *
54 * http://www.unicode.org/unicode/reports/tr11/
55 *
56 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
57 *
58 * Permission to use, copy, modify, and distribute this software
59 * for any purpose and without fee is hereby granted. The author
60 * disclaims all warranties with regard to this software.
61 *
62 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
63 */
64
65#include "wcwidth.h" // modified: used to define mk_wchar_t
66
67struct interval {
68 int first;
69 int last;
70};
71
72/* auxiliary function for binary search in interval table */
73static int bisearch(mk_wchar_t ucs, const struct interval *table, int max) { // modified: use mk_wchar_t
74 int min = 0;
75 int mid;
76
77 if (ucs < table[0].first || ucs > table[max].last)
78 return 0;
79 while (max >= min) {
80 mid = (min + max) / 2;
81 if (ucs > table[mid].last)
82 min = mid + 1;
83 else if (ucs < table[mid].first)
84 max = mid - 1;
85 else
86 return 1;
87 }
88
89 return 0;
90}
91
92
93/* The following two functions define the column width of an ISO 10646
94 * character as follows:
95 *
96 * - The null character (U+0000) has a column width of 0.
97 *
98 * - Other C0/C1 control characters and DEL will lead to a return
99 * value of -1.
100 *
101 * - Non-spacing and enclosing combining characters (general
102 * category code Mn or Me in the Unicode database) have a
103 * column width of 0.
104 *
105 * - SOFT HYPHEN (U+00AD) has a column width of 1.
106 *
107 * - Other format characters (general category code Cf in the Unicode
108 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
109 *
110 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
111 * have a column width of 0.
112 *
113 * - Spacing characters in the East Asian Wide (W) or East Asian
114 * Full-width (F) category as defined in Unicode Technical
115 * Report #11 have a column width of 2.
116 *
117 * - All remaining characters (including all printable
118 * ISO 8859-1 and WGL4 characters, Unicode control characters,
119 * etc.) have a column width of 1.
120 *
121 * This implementation assumes that mk_wchar_t characters are encoded
122 * in ISO 10646.
123 */
124
125int mk_wcwidth(mk_wchar_t ucs) // modified: use mk_wchar_t
126{
127 /* sorted list of non-overlapping intervals of non-spacing characters */
128 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
129 static const struct interval combining[] = { // modified: added new ranges to the list
130 { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD },
131 { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 },
132 { 0x05C7, 0x05C7 }, { 0x0600, 0x0605 }, { 0x0610, 0x061A },
133 { 0x061C, 0x061C }, { 0x064B, 0x065F }, { 0x0670, 0x0670 },
134 { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 },
135 { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
136 { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x07FD, 0x07FD },
137 { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 },
138 { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08D3, 0x08E1 },
139 { 0x08E3, 0x0903 }, { 0x093A, 0x093C }, { 0x093E, 0x094F },
140 { 0x0951, 0x0957 }, { 0x0962, 0x0963 }, { 0x0981, 0x0983 },
141 { 0x09BC, 0x09BC }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 },
142 { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 },
143 { 0x09FE, 0x09FE }, { 0x0A01, 0x0A03 }, { 0x0A3C, 0x0A3C },
144 { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
145 { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 },
146 { 0x0A81, 0x0A83 }, { 0x0ABC, 0x0ABC }, { 0x0ABE, 0x0AC5 },
147 { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AE2, 0x0AE3 },
148 { 0x0AFA, 0x0AFF }, { 0x0B01, 0x0B03 }, { 0x0B3C, 0x0B3C },
149 { 0x0B3E, 0x0B44 }, { 0x0B47, 0x0B48 }, { 0x0B4B, 0x0B4D },
150 { 0x0B55, 0x0B57 }, { 0x0B62, 0x0B63 }, { 0x0B82, 0x0B82 },
151 { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD },
152 { 0x0BD7, 0x0BD7 }, { 0x0C00, 0x0C04 }, { 0x0C3E, 0x0C44 },
153 { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },
154 { 0x0C62, 0x0C63 }, { 0x0C81, 0x0C83 }, { 0x0CBC, 0x0CBC },
155 { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD },
156 { 0x0CD5, 0x0CD6 }, { 0x0CE2, 0x0CE3 }, { 0x0D00, 0x0D03 },
157 { 0x0D3B, 0x0D3C }, { 0x0D3E, 0x0D44 }, { 0x0D46, 0x0D48 },
158 { 0x0D4A, 0x0D4D }, { 0x0D57, 0x0D57 }, { 0x0D62, 0x0D63 },
159 { 0x0D82, 0x0D83 }, { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
160 { 0x0DD8, 0x0DDF }, { 0x0DF2, 0x0DF3 }, { 0x0E31, 0x0E31 },
161 { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 },
162 { 0x0EB4, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
163 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
164 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
165 { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
166 { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A },
167 { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 },
168 { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 },
169 { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F },
170 { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
171 { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
172 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
173 { 0x180B, 0x180E }, { 0x1885, 0x1886 }, { 0x18A9, 0x18A9 },
174 { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 },
175 { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, { 0x1A1B, 0x1A1B },
176 { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, { 0x1A60, 0x1A60 },
177 { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, { 0x1A73, 0x1A7C },
178 { 0x1A7F, 0x1A7F }, { 0x1AB0, 0x1ACE }, { 0x1B00, 0x1B03 },
179 { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C },
180 { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B82 },
181 { 0x1BA1, 0x1BA1 }, { 0x1BA6, 0x1BA7 }, { 0x1BAA, 0x1BAA },
182 { 0x1BAB, 0x1BAD }, { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 },
183 { 0x1BED, 0x1BED }, { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 },
184 { 0x1C36, 0x1C37 }, { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE8 },
185 { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1CF8, 0x1CF9 },
186 { 0x1DC0, 0x1DF9 }, { 0x1DFB, 0x1DFF }, { 0x20D0, 0x20DC },
187 { 0x20E1, 0x20E1 }, { 0x20E5, 0x20F0 }, { 0x2CEF, 0x2CF1 },
188 { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D },
189 { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D },
190 { 0xA69E, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 },
191 { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
192 { 0xA82C, 0xA82C }, { 0xA8C4, 0xA8C5 }, { 0xA8E0, 0xA8F1 },
193 { 0xA8FF, 0xA8FF }, { 0xA926, 0xA92D }, { 0xA947, 0xA951 },
194 { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, { 0xA9B6, 0xA9B9 },
195 { 0xA9BC, 0xA9BD }, { 0xA9E5, 0xA9E5 }, { 0xAA29, 0xAA2E },
196 { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 },
197 { 0xAA4C, 0xAA4C }, { 0xAA7C, 0xAA7C }, { 0xAAB0, 0xAAB0 },
198 { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF },
199 { 0xAAC1, 0xAAC1 }, { 0xAAEB, 0xAAEB }, { 0xAAEE, 0xAAEF },
200 { 0xAAF5, 0xAAF6 }, { 0xABE3, 0xABE4 }, { 0xABE6, 0xABE7 },
201 { 0xABE9, 0xABEA }, { 0xABEC, 0xABED }, { 0xFB1E, 0xFB1E },
202 { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE2F }, { 0x101FD, 0x101FD },
203 { 0x102E0, 0x102E0 }, { 0x10376, 0x1037A }, { 0x10A01, 0x10A03 },
204 { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, { 0x10A38, 0x10A3A },
205 { 0x10A3F, 0x10A3F }, { 0x10AE5, 0x10AE6 }, { 0x10D24, 0x10D27 },
206 { 0x10EAB, 0x10EAC }, { 0x10F46, 0x10F50 }, { 0x10F82, 0x10F85 },
207 { 0x11000, 0x11002 }, { 0x11038, 0x11046 }, { 0x1107F, 0x11082 },
208 { 0x110B0, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x11134 },
209 { 0x11145, 0x11146 }, { 0x11173, 0x11173 }, { 0x11180, 0x11182 },
210 { 0x111B3, 0x111C0 }, { 0x111C9, 0x111CC }, { 0x1122C, 0x11237 },
211 { 0x1123E, 0x1123E }, { 0x112DF, 0x112EA }, { 0x11300, 0x11303 },
212 { 0x1133B, 0x1133C }, { 0x1133E, 0x11344 }, { 0x11347, 0x11348 },
213 { 0x1134B, 0x1134D }, { 0x11357, 0x11357 }, { 0x11362, 0x11363 },
214 { 0x11435, 0x11446 }, { 0x1145E, 0x1145E }, { 0x114B0, 0x114C3 },
215 { 0x115AF, 0x115B5 }, { 0x115B8, 0x115C0 }, { 0x115DC, 0x115DD },
216 { 0x11630, 0x11640 }, { 0x116AB, 0x116B7 }, { 0x1171D, 0x1172B },
217 { 0x1182C, 0x1183A }, { 0x11930, 0x11935 }, { 0x11937, 0x11938 },
218 { 0x1193B, 0x1193E }, { 0x11940, 0x11940 }, { 0x11942, 0x11942 },
219 { 0x119D1, 0x119D7 }, { 0x119DA, 0x119E0 }, { 0x11A01, 0x11A0A },
220 { 0x11A33, 0x11A39 }, { 0x11A3B, 0x11A3E }, { 0x11A47, 0x11A47 },
221 { 0x11A51, 0x11A5B }, { 0x11A8A, 0x11A96 }, { 0x11A98, 0x11A99 },
222 { 0x11C30, 0x11C36 }, { 0x11C38, 0x11C3D }, { 0x11C3F, 0x11C3F },
223 { 0x11C92, 0x11CA7 }, { 0x11CAA, 0x11CB0 }, { 0x11CB2, 0x11CB3 },
224 { 0x11CB5, 0x11CB6 }, { 0x11D31, 0x11D36 }, { 0x11D3A, 0x11D3A },
225 { 0x11D3C, 0x11D3D }, { 0x11D3F, 0x11D45 }, { 0x11D47, 0x11D47 },
226 { 0x11D90, 0x11D91 }, { 0x11D95, 0x11D95 }, { 0x11D97, 0x11D97 },
227 { 0x11EF3, 0x11EF4 }, { 0x13430, 0x13438 }, { 0x16AF0, 0x16AF4 },
228 { 0x16B30, 0x16B36 }, { 0x16F4F, 0x16F4F }, { 0x16F8F, 0x16F92 },
229 { 0x1BC9D, 0x1BC9E }, { 0x1BCA0, 0x1BCA3 }, { 0x1D167, 0x1D169 },
230 { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
231 { 0x1D242, 0x1D244 }, { 0x1DA00, 0x1DA36 }, { 0x1DA3B, 0x1DA6C },
232 { 0x1DA75, 0x1DA75 }, { 0x1DA84, 0x1DA84 }, { 0x1DA9B, 0x1DA9F },
233 { 0x1DAA1, 0x1DAAF }, { 0x1E000, 0x1E006 }, { 0x1E008, 0x1E018 },
234 { 0x1E01B, 0x1E021 }, { 0x1E023, 0x1E024 }, { 0x1E026, 0x1E02A },
235 { 0x1E130, 0x1E136 }, { 0x1E2AE, 0x1E2AE }, { 0x1E2EC, 0x1E2EF },
236 { 0x1E4EC, 0x1E4EF }, { 0x1E8D0, 0x1E8D6 }, { 0x1E944, 0x1E94A },
237 { 0x1E947, 0x1E94A }, { 0xE0100, 0xE01EF }
238 };
239
240 /* test for 8-bit control characters */
241 if (ucs == 0)
242 return 0;
243 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
244 return -1;
245
246 /* binary search in table of non-spacing characters */
247 if (bisearch(ucs, combining,
248 sizeof(combining) / sizeof(struct interval) - 1))
249 return 0;
250
251 /* if we arrive here, ucs is not a combining or C0/C1 control character */
252
253 return 1 +
254 (ucs >= 0x1100 &&
255 (ucs <= 0x115f || /* Hangul Jamo init. consonants */
256 ucs == 0x2329 || ucs == 0x232a ||
257 (ucs >= 0x2e80 && ucs <= 0xa4cf &&
258 ucs != 0x303f) || /* CJK ... Yi */
259 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
260 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
261 (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
262 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
263 (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
264 (ucs >= 0xffe0 && ucs <= 0xffe6) ||
265 (ucs >= 0x1f300 && ucs <= 0x1f64f) || /* modified: added Emoticons */
266 (ucs >= 0x1f680 && ucs <= 0x1f6ff) || /* modified: added Transport and Map Symbols */
267 (ucs >= 0x1f900 && ucs <= 0x1f9ff) || /* modified: added Supplemental Symbols and Pictographs */
268 (ucs >= 0x20000 && ucs <= 0x2fffd) ||
269 (ucs >= 0x30000 && ucs <= 0x3fffd)));
270}
271
272
273int mk_wcswidth(const mk_wchar_t *pwcs, size_t n) // modified: use mk_wchar_t
274{
275 int w, width = 0;
276
277 for (;*pwcs && n-- > 0; pwcs++)
278 if ((w = mk_wcwidth(*pwcs)) < 0)
279 return -1;
280 else
281 width += w;
282
283 return width;
284}
285
diff --git a/src/wcwidth.h b/src/wcwidth.h
new file mode 100644
index 0000000..f2fee11
--- /dev/null
+++ b/src/wcwidth.h
@@ -0,0 +1,21 @@
1// wcwidth.h
2
3// Windows does not have a wcwidth function, so we use compatibilty code from
4// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn
5
6#ifndef MK_WCWIDTH_H
7#define MK_WCWIDTH_H
8
9
10#ifdef _WIN32
11#include <stdint.h>
12typedef uint32_t mk_wchar_t; // Windows wchar_t can be 16-bit, we need 32-bit
13#else
14#include <wchar.h>
15typedef wchar_t mk_wchar_t; // Posix wchar_t is 32-bit so just use that
16#endif
17
18int mk_wcwidth(mk_wchar_t ucs);
19int mk_wcswidth(const mk_wchar_t *pwcs, size_t n);
20
21#endif // MK_WCWIDTH_H
diff --git a/system/init.lua b/system/init.lua
index 893cd91..c232cd2 100644
--- a/system/init.lua
+++ b/system/init.lua
@@ -2,45 +2,11 @@
2-- @module init 2-- @module init
3 3
4local sys = require 'system.core' 4local sys = require 'system.core'
5local global_backup -- global backup for terminal settings
6
7
8
9local add_gc_method do
10 -- feature detection; __GC meta-method, not available in all Lua versions
11 local has_gc = false
12 local tt = setmetatable({}, { -- luacheck: ignore
13 __gc = function() has_gc = true end
14 })
15
16 -- clear table and run GC to trigger
17 tt = nil
18 collectgarbage()
19 collectgarbage()
20
21
22 if has_gc then
23 -- use default GC mechanism since it is available
24 function add_gc_method(t, f)
25 setmetatable(t, { __gc = f })
26 end
27 else
28 -- create workaround using a proxy userdata, typical for Lua 5.1
29 function add_gc_method(t, f)
30 local proxy = newproxy(true)
31 getmetatable(proxy).__gc = function()
32 t["__gc_proxy"] = nil
33 f(t)
34 end
35 t["__gc_proxy"] = proxy
36 end
37 end
38end
39 5
40 6
41 7
42--- Returns a backup of terminal setting for stdin/out/err. 8--- Returns a backup of terminal setting for stdin/out/err.
43-- Handles terminal/console flags and non-block flags on the streams. 9-- Handles terminal/console flags, Windows codepage, and non-block flags on the streams.
44-- Backs up terminal/console flags only if a stream is a tty. 10-- Backs up terminal/console flags only if a stream is a tty.
45-- @return table with backup of terminal settings 11-- @return table with backup of terminal settings
46function sys.termbackup() 12function sys.termbackup()
@@ -63,6 +29,9 @@ function sys.termbackup()
63 backup.block_out = sys.getnonblock(io.stdout) 29 backup.block_out = sys.getnonblock(io.stdout)
64 backup.block_err = sys.getnonblock(io.stderr) 30 backup.block_err = sys.getnonblock(io.stderr)
65 31
32 backup.consoleoutcodepage = sys.getconsoleoutputcp()
33 backup.consolecp = sys.getconsolecp()
34
66 return backup 35 return backup
67end 36end
68 37
@@ -82,25 +51,65 @@ function sys.termrestore(backup)
82 if backup.block_in ~= nil then sys.setnonblock(io.stdin, backup.block_in) end 51 if backup.block_in ~= nil then sys.setnonblock(io.stdin, backup.block_in) end
83 if backup.block_out ~= nil then sys.setnonblock(io.stdout, backup.block_out) end 52 if backup.block_out ~= nil then sys.setnonblock(io.stdout, backup.block_out) end
84 if backup.block_err ~= nil then sys.setnonblock(io.stderr, backup.block_err) end 53 if backup.block_err ~= nil then sys.setnonblock(io.stderr, backup.block_err) end
54
55 if backup.consoleoutcodepage then sys.setconsoleoutputcp(backup.consoleoutcodepage) end
56 if backup.consolecp then sys.setconsolecp(backup.consolecp) end
85 return true 57 return true
86end 58end
87 59
88 60
89 61
90--- Backs up terminal settings and restores them on application exit. 62do -- autotermrestore
91-- Calls `termbackup` to back up terminal settings and sets up a GC method to 63 local global_backup -- global backup for terminal settings
92-- automatically restore them on application exit (also works on Lua 5.1). 64
93-- @treturn[1] boolean true 65
94-- @treturn[2] nil if the backup was already created 66 local add_gc_method do
95-- @treturn[2] string error message 67 -- feature detection; __GC meta-method, not available in all Lua versions
96function sys.autotermrestore() 68 local has_gc = false
97 if global_backup then 69 local tt = setmetatable({}, { -- luacheck: ignore
98 return nil, "global terminal backup was already set up" 70 __gc = function() has_gc = true end
71 })
72
73 -- clear table and run GC to trigger
74 tt = nil
75 collectgarbage()
76 collectgarbage()
77
78
79 if has_gc then
80 -- use default GC mechanism since it is available
81 function add_gc_method(t, f)
82 setmetatable(t, { __gc = f })
83 end
84 else
85 -- create workaround using a proxy userdata, typical for Lua 5.1
86 function add_gc_method(t, f)
87 local proxy = newproxy(true)
88 getmetatable(proxy).__gc = function()
89 t["__gc_proxy"] = nil
90 f(t)
91 end
92 t["__gc_proxy"] = proxy
93 end
94 end
95 end
96
97
98 --- Backs up terminal settings and restores them on application exit.
99 -- Calls `termbackup` to back up terminal settings and sets up a GC method to
100 -- automatically restore them on application exit (also works on Lua 5.1).
101 -- @treturn[1] boolean true
102 -- @treturn[2] nil if the backup was already created
103 -- @treturn[2] string error message
104 function sys.autotermrestore()
105 if global_backup then
106 return nil, "global terminal backup was already set up"
107 end
108 global_backup = sys.termbackup()
109 add_gc_method(global_backup, function(self)
110 sys.termrestore(self) end)
111 return true
99 end 112 end
100 global_backup = sys.termbackup()
101 add_gc_method(global_backup, function(self)
102 sys.termrestore(self) end)
103 return true
104end 113end
105 114
106 115
@@ -208,12 +217,9 @@ end
208 217
209 218
210do 219do
211 local _readkey = sys.readkey
212 local interval = 0.1
213
214 --- Reads a single byte from the console, with a timeout. 220 --- Reads a single byte from the console, with a timeout.
215 -- This function uses `system.sleep` to wait in increments of 0.1 seconds until either a byte is 221 -- This function uses `system.sleep` to wait until either a byte is available or the timeout is reached.
216 -- available or the timeout is reached. 222 -- The sleep period is exponentially backing off, starting at 0.0125 seconds, with a maximum of 0.2 seconds.
217 -- It returns immediately if a byte is available or if `timeout` is less than or equal to `0`. 223 -- It returns immediately if a byte is available or if `timeout` is less than or equal to `0`.
218 -- @tparam number timeout the timeout in seconds. 224 -- @tparam number timeout the timeout in seconds.
219 -- @treturn[1] integer the key code of the key that was received 225 -- @treturn[1] integer the key code of the key that was received
@@ -224,11 +230,13 @@ do
224 error("arg #1 to readkey, expected timeout in seconds, got " .. type(timeout), 2) 230 error("arg #1 to readkey, expected timeout in seconds, got " .. type(timeout), 2)
225 end 231 end
226 232
227 local key = _readkey() 233 local interval = 0.0125
234 local key = sys._readkey()
228 while key == nil and timeout > 0 do 235 while key == nil and timeout > 0 do
229 sys.sleep(interval) 236 sys.sleep(math.min(interval, timeout))
230 timeout = timeout - interval 237 timeout = timeout - interval
231 key = _readkey() 238 interval = math.min(0.2, interval * 2)
239 key = sys._readkey()
232 end 240 end
233 241
234 if key then 242 if key then
@@ -246,14 +254,14 @@ do
246 local utf8_length -- length of utf8 sequence currently being processed 254 local utf8_length -- length of utf8 sequence currently being processed
247 local unpack = unpack or table.unpack 255 local unpack = unpack or table.unpack
248 256
249 -- Reads a single key, if it is the start of ansi escape sequence then it reads 257 --- Reads a single key, if it is the start of ansi escape sequence then it reads
250 -- the full sequence. 258 -- the full sequence. The key can be a multi-byte string in case of multibyte UTF-8 character.
251 -- This function uses `system.readkey`, and hence `system.sleep` to wait until either a key is 259 -- This function uses `system.readkey`, and hence `system.sleep` to wait until either a key is
252 -- available or the timeout is reached. 260 -- available or the timeout is reached.
253 -- It returns immediately if a key is available or if `timeout` is less than or equal to `0`. 261 -- It returns immediately if a key is available or if `timeout` is less than or equal to `0`.
254 -- In case of an ANSI sequence, it will return the full sequence as a string. 262 -- In case of an ANSI sequence, it will return the full sequence as a string.
255 -- @tparam number timeout the timeout in seconds. 263 -- @tparam number timeout the timeout in seconds.
256 -- @treturn[1] string the character that was received, or a complete ANSI sequence 264 -- @treturn[1] string the character that was received (can be multi-byte), or a complete ANSI sequence
257 -- @treturn[1] string the type of input: `"char"` for a single key, `"ansi"` for an ANSI sequence 265 -- @treturn[1] string the type of input: `"char"` for a single key, `"ansi"` for an ANSI sequence
258 -- @treturn[2] nil in case of an error 266 -- @treturn[2] nil in case of an error
259 -- @treturn[2] string error message; `"timeout"` if the timeout was reached. 267 -- @treturn[2] string error message; `"timeout"` if the timeout was reached.