diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-17 14:08:22 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-17 14:08:22 -0300 |
commit | 24bf757183d8bd97f6f5b43d916814f3269c8347 (patch) | |
tree | 646cd65d6e2dab57691f98f83f15f25c70685ef8 /lpprint.c | |
parent | 3f7797419e4d7493e1364290a5b127d1cb45e3bf (diff) | |
download | lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.tar.gz lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.tar.bz2 lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.zip |
Implementation of UTF-8 ranges
New constructor 'lpeg.utfR(from, to)' creates a pattern that matches
UTF-8 byte sequences representing code points in the range [from, to].
Diffstat (limited to 'lpprint.c')
-rw-r--r-- | lpprint.c | 19 |
1 files changed, 15 insertions, 4 deletions
@@ -56,7 +56,7 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
56 | const char *const names[] = { | 56 | const char *const names[] = { |
57 | "any", "char", "set", | 57 | "any", "char", "set", |
58 | "testany", "testchar", "testset", | 58 | "testany", "testchar", "testset", |
59 | "span", "behind", | 59 | "span", "utf-range", "behind", |
60 | "ret", "end", | 60 | "ret", "end", |
61 | "choice", "jmp", "call", "open_call", | 61 | "choice", "jmp", "call", "open_call", |
62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
@@ -66,11 +66,15 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
67 | switch ((Opcode)p->i.code) { | 67 | switch ((Opcode)p->i.code) { |
68 | case IChar: { | 68 | case IChar: { |
69 | printf("'%c'", p->i.aux); | 69 | printf("'%c' (%02x)", p->i.aux, p->i.aux); |
70 | break; | 70 | break; |
71 | } | 71 | } |
72 | case ITestChar: { | 72 | case ITestChar: { |
73 | printf("'%c'", p->i.aux); printjmp(op, p); | 73 | printf("'%c' (%02x)", p->i.aux, p->i.aux); printjmp(op, p); |
74 | break; | ||
75 | } | ||
76 | case IUTFR: { | ||
77 | printf("%d - %d", p[1].offset, utf_to(p)); | ||
74 | break; | 78 | break; |
75 | } | 79 | } |
76 | case IFullCapture: { | 80 | case IFullCapture: { |
@@ -148,7 +152,7 @@ void printcaplist (Capture *cap, Capture *limit) { | |||
148 | 152 | ||
149 | static const char *tagnames[] = { | 153 | static const char *tagnames[] = { |
150 | "char", "set", "any", | 154 | "char", "set", "any", |
151 | "true", "false", | 155 | "true", "false", "utf8.range", |
152 | "rep", | 156 | "rep", |
153 | "seq", "choice", | 157 | "seq", "choice", |
154 | "not", "and", | 158 | "not", "and", |
@@ -177,6 +181,13 @@ void printtree (TTree *tree, int ident) { | |||
177 | printf("\n"); | 181 | printf("\n"); |
178 | break; | 182 | break; |
179 | } | 183 | } |
184 | case TUTFR: { | ||
185 | assert(sib1(tree)->tag == TXInfo); | ||
186 | printf(" %d (%02x %d) - %d (%02x %d) \n", | ||
187 | tree->u.n, tree->key, tree->cap, | ||
188 | sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap); | ||
189 | break; | ||
190 | } | ||
180 | case TOpenCall: case TCall: { | 191 | case TOpenCall: case TCall: { |
181 | assert(sib1(sib2(tree))->tag == TXInfo); | 192 | assert(sib1(sib2(tree))->tag == TXInfo); |
182 | printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); | 193 | printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); |