diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-17 14:08:22 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-17 14:08:22 -0300 |
commit | 24bf757183d8bd97f6f5b43d916814f3269c8347 (patch) | |
tree | 646cd65d6e2dab57691f98f83f15f25c70685ef8 /lpvm.h | |
parent | 3f7797419e4d7493e1364290a5b127d1cb45e3bf (diff) | |
download | lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.tar.gz lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.tar.bz2 lpeg-24bf757183d8bd97f6f5b43d916814f3269c8347.zip |
Implementation of UTF-8 ranges
New constructor 'lpeg.utfR(from, to)' creates a pattern that matches
UTF-8 byte sequences representing code points in the range [from, to].
Diffstat (limited to 'lpvm.h')
-rw-r--r-- | lpvm.h | 5 |
1 files changed, 5 insertions, 0 deletions
@@ -17,6 +17,7 @@ typedef enum Opcode { | |||
17 | ITestChar, /* if char != aux, jump to 'offset' */ | 17 | ITestChar, /* if char != aux, jump to 'offset' */ |
18 | ITestSet, /* if char not in buff, jump to 'offset' */ | 18 | ITestSet, /* if char not in buff, jump to 'offset' */ |
19 | ISpan, /* read a span of chars in buff */ | 19 | ISpan, /* read a span of chars in buff */ |
20 | IUTFR, /* if codepoint not in range [offset, utf_to], fail */ | ||
20 | IBehind, /* walk back 'aux' characters (fail if not possible) */ | 21 | IBehind, /* walk back 'aux' characters (fail if not possible) */ |
21 | IRet, /* return from a rule */ | 22 | IRet, /* return from a rule */ |
22 | IEnd, /* end of pattern */ | 23 | IEnd, /* end of pattern */ |
@@ -50,6 +51,10 @@ typedef union Instruction { | |||
50 | } Instruction; | 51 | } Instruction; |
51 | 52 | ||
52 | 53 | ||
54 | /* extract 24-bit value from an instruction */ | ||
55 | #define utf_to(inst) (((inst)->i.key << 8) | (inst)->i.aux) | ||
56 | |||
57 | |||
53 | void printpatt (Instruction *p, int n); | 58 | void printpatt (Instruction *p, int n); |
54 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 59 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
55 | Instruction *op, Capture *capture, int ptop); | 60 | Instruction *op, Capture *capture, int ptop); |