diff options
| author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2026-01-29 11:02:33 +0100 |
|---|---|---|
| committer | Thijs Schreijer <thijs@thijsschreijer.nl> | 2026-01-29 13:55:39 +0100 |
| commit | 1159329b247d6532fecb375e7008aca979261eaa (patch) | |
| tree | 0b56caf9a03ba47c2c77d5313662ea27f198752d /spec/04-term_spec.lua | |
| parent | dfd0d4b8ca3607ae39b1d2cbad4e3a7180dd6754 (diff) | |
| download | luasystem-1159329b247d6532fecb375e7008aca979261eaa.tar.gz luasystem-1159329b247d6532fecb375e7008aca979261eaa.tar.bz2 luasystem-1159329b247d6532fecb375e7008aca979261eaa.zip | |
fix(wcwidth): add a generator for width ranges
The generator script will parse official unicode data to create
the actual ranges for 0, double, and ambiguous width characters.
Diffstat (limited to 'spec/04-term_spec.lua')
| -rw-r--r-- | spec/04-term_spec.lua | 59 |
1 files changed, 51 insertions, 8 deletions
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 5dea046..2d50a6b 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua | |||
| @@ -512,21 +512,35 @@ describe("Terminal:", function() | |||
| 512 | describe("utf8cwidth()", function() | 512 | describe("utf8cwidth()", function() |
| 513 | 513 | ||
| 514 | -- utf-8 strings | 514 | -- utf-8 strings |
| 515 | local ch1 = string.char(226, 130, 172) -- "€" single | 515 | local ch1 = string.char(65) -- "A" single |
| 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
| 517 | local ch3 = string.char(228, 189, 160) -- "你" double | 517 | local ch3 = string.char(228, 189, 160) -- "你" double |
| 518 | local ch4 = string.char(229, 165, 189) -- "好" double | 518 | local ch4 = string.char(229, 165, 189) -- "好" double |
| 519 | local ch5 = string.char(226, 130, 172) -- "€" ambiguous | ||
| 519 | 520 | ||
| 520 | -- unicode codepoints | 521 | -- unicode codepoints |
| 521 | local cp1 = 8364 -- "€" single | 522 | local cp1 = 65 -- "A" single |
| 522 | local cp2 = 128640 -- "🚀" double | 523 | local cp2 = 128640 -- "🚀" double |
| 523 | local cp3 = 20320 -- "你" double | 524 | local cp3 = 20320 -- "你" double |
| 524 | local cp4 = 22909 -- "好" double | 525 | local cp4 = 22909 -- "好" double |
| 526 | local cp5 = 8364 -- "€" ambiguous | ||
| 525 | 527 | ||
| 526 | it("handles zero width characters", function() | 528 | it("handles zero width characters", function() |
| 527 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size | 529 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size |
| 528 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character | 530 | assert.same({0}, {system.utf8cwidth("\0")}) -- null character |
| 529 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\27")}) -- escape character | 531 | |
| 532 | -- zero-width (from wcwidth_zero_width.c / wcwidth_update.lua) | ||
| 533 | local zw_sp = string.char(0xE2, 0x80, 0x8B) -- U+200B Zero Width Space | ||
| 534 | local zw_nj = string.char(0xE2, 0x80, 0x8C) -- U+200C Zero Width Non-Joiner | ||
| 535 | local zw_j = string.char(0xE2, 0x80, 0x8D) -- U+200D Zero Width Joiner | ||
| 536 | local zw_nb = string.char(0xEF, 0xBB, 0xBF) -- U+FEFF Zero Width No-Break Space (BOM) | ||
| 537 | local soft_hy = string.char(0xC2, 0xAD) -- U+00AD Soft hyphen | ||
| 538 | |||
| 539 | assert.same({0}, {system.utf8cwidth(zw_sp)}) | ||
| 540 | assert.same({0}, {system.utf8cwidth(zw_nj)}) | ||
| 541 | assert.same({0}, {system.utf8cwidth(zw_j)}) | ||
| 542 | assert.same({0}, {system.utf8cwidth(zw_nb)}) | ||
| 543 | assert.same({0}, {system.utf8cwidth(soft_hy)}) | ||
| 530 | end) | 544 | end) |
| 531 | 545 | ||
| 532 | it("handles single width characters", function() | 546 | it("handles single width characters", function() |
| @@ -540,8 +554,16 @@ describe("Terminal:", function() | |||
| 540 | assert.same({2}, {system.utf8cwidth(ch4)}) | 554 | assert.same({2}, {system.utf8cwidth(ch4)}) |
| 541 | end) | 555 | end) |
| 542 | 556 | ||
| 557 | it("handles ambiguous width characters", function() | ||
| 558 | assert.same({99}, {system.utf8cwidth(ch5, 99)}) | ||
| 559 | end) | ||
| 560 | |||
| 561 | it("ambiguous width defaults to 1", function() | ||
| 562 | assert.same({1}, {system.utf8cwidth(ch5, nil)}) | ||
| 563 | end) | ||
| 564 | |||
| 543 | it("returns the width of the first character in the string", function() | 565 | it("returns the width of the first character in the string", function() |
| 544 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO | 566 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO |
| 545 | assert.same({1}, {system.utf8cwidth(ch1 .. ch2)}) | 567 | assert.same({1}, {system.utf8cwidth(ch1 .. ch2)}) |
| 546 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) | 568 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) |
| 547 | end) | 569 | end) |
| @@ -551,6 +573,12 @@ describe("Terminal:", function() | |||
| 551 | assert.same({2}, {system.utf8cwidth(cp2)}) | 573 | assert.same({2}, {system.utf8cwidth(cp2)}) |
| 552 | assert.same({2}, {system.utf8cwidth(cp3)}) | 574 | assert.same({2}, {system.utf8cwidth(cp3)}) |
| 553 | assert.same({2}, {system.utf8cwidth(cp4)}) | 575 | assert.same({2}, {system.utf8cwidth(cp4)}) |
| 576 | assert.same({99}, {system.utf8cwidth(cp5, 99)}) -- ambiguous width | ||
| 577 | end) | ||
| 578 | |||
| 579 | it("returns an error on control characters", function() | ||
| 580 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\a")}) -- bell character | ||
| 581 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\27")}) -- escape character | ||
| 554 | end) | 582 | end) |
| 555 | 583 | ||
| 556 | it("returns an error on bad argument", function() | 584 | it("returns an error on bad argument", function() |
| @@ -570,15 +598,16 @@ describe("Terminal:", function() | |||
| 570 | 598 | ||
| 571 | describe("utf8swidth()", function() | 599 | describe("utf8swidth()", function() |
| 572 | 600 | ||
| 573 | local ch1 = string.char(226, 130, 172) -- "€" single | 601 | local ch1 = string.char(65) -- "A" single |
| 574 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 602 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
| 575 | local ch3 = string.char(228, 189, 160) -- "你" double | 603 | local ch3 = string.char(228, 189, 160) -- "你" double |
| 576 | local ch4 = string.char(229, 165, 189) -- "好" double | 604 | local ch4 = string.char(229, 165, 189) -- "好" double |
| 605 | local ch5 = string.char(226, 130, 172) -- "€" ambiguous | ||
| 577 | 606 | ||
| 578 | it("handles zero width characters", function() | 607 | it("handles zero width characters", function() |
| 579 | assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size | 608 | assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size |
| 580 | assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\a")}) -- bell character | 609 | assert.same({nil, 'Control characters have no width'}, {system.utf8swidth("\a")}) -- bell character |
| 581 | assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\27")}) -- escape character | 610 | assert.same({nil, 'Control characters have no width'}, {system.utf8swidth("\27")}) -- escape character |
| 582 | end) | 611 | end) |
| 583 | 612 | ||
| 584 | it("handles multi-character UTF8 strings", function() | 613 | it("handles multi-character UTF8 strings", function() |
| @@ -586,6 +615,20 @@ describe("Terminal:", function() | |||
| 586 | assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")}) | 615 | assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")}) |
| 587 | end) | 616 | end) |
| 588 | 617 | ||
| 618 | it("handles ambiguous width characters", function() | ||
| 619 | assert.same({12}, {system.utf8swidth(ch5 .. "1234567890", 2)}) | ||
| 620 | end) | ||
| 621 | |||
| 622 | it("ambiguous width defaults to 1", function() | ||
| 623 | assert.same({1}, {system.utf8swidth(ch5, nil)}) | ||
| 624 | end) | ||
| 625 | |||
| 626 | it("ambiguous width must be 1 or 2", function() | ||
| 627 | assert.has.error(function() | ||
| 628 | system.utf8swidth(ch5, 3) | ||
| 629 | end, "bad argument #2 to 'utf8swidth' (Ambiguous width must be 1 or 2)") | ||
| 630 | end) | ||
| 631 | |||
| 589 | end) | 632 | end) |
| 590 | 633 | ||
| 591 | 634 | ||
