From 24bf757183d8bd97f6f5b43d916814f3269c8347 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 17 Apr 2019 14:08:22 -0300 Subject: Implementation of UTF-8 ranges New constructor 'lpeg.utfR(from, to)' creates a pattern that matches UTF-8 byte sequences representing code points in the range [from, to]. --- lpprint.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'lpprint.c') diff --git a/lpprint.c b/lpprint.c index 397785e..6893bb8 100644 --- a/lpprint.c +++ b/lpprint.c @@ -56,7 +56,7 @@ void printinst (const Instruction *op, const Instruction *p) { const char *const names[] = { "any", "char", "set", "testany", "testchar", "testset", - "span", "behind", + "span", "utf-range", "behind", "ret", "end", "choice", "jmp", "call", "open_call", "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", @@ -66,11 +66,15 @@ void printinst (const Instruction *op, const Instruction *p) { printf("%02ld: %s ", (long)(p - op), names[p->i.code]); switch ((Opcode)p->i.code) { case IChar: { - printf("'%c'", p->i.aux); + printf("'%c' (%02x)", p->i.aux, p->i.aux); break; } case ITestChar: { - printf("'%c'", p->i.aux); printjmp(op, p); + printf("'%c' (%02x)", p->i.aux, p->i.aux); printjmp(op, p); + break; + } + case IUTFR: { + printf("%d - %d", p[1].offset, utf_to(p)); break; } case IFullCapture: { @@ -148,7 +152,7 @@ void printcaplist (Capture *cap, Capture *limit) { static const char *tagnames[] = { "char", "set", "any", - "true", "false", + "true", "false", "utf8.range", "rep", "seq", "choice", "not", "and", @@ -177,6 +181,13 @@ void printtree (TTree *tree, int ident) { printf("\n"); break; } + case TUTFR: { + assert(sib1(tree)->tag == TXInfo); + printf(" %d (%02x %d) - %d (%02x %d) \n", + tree->u.n, tree->key, tree->cap, + sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap); + break; + } case TOpenCall: case TCall: { assert(sib1(sib2(tree))->tag == TXInfo); printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); -- cgit v1.2.3-55-g6feb