From 24bf757183d8bd97f6f5b43d916814f3269c8347 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 17 Apr 2019 14:08:22 -0300 Subject: Implementation of UTF-8 ranges New constructor 'lpeg.utfR(from, to)' creates a pattern that matches UTF-8 byte sequences representing code points in the range [from, to]. --- lpvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lpvm.h') diff --git a/lpvm.h b/lpvm.h index 576429f..9fde967 100644 --- a/lpvm.h +++ b/lpvm.h @@ -17,6 +17,7 @@ typedef enum Opcode { ITestChar, /* if char != aux, jump to 'offset' */ ITestSet, /* if char not in buff, jump to 'offset' */ ISpan, /* read a span of chars in buff */ + IUTFR, /* if codepoint not in range [offset, utf_to], fail */ IBehind, /* walk back 'aux' characters (fail if not possible) */ IRet, /* return from a rule */ IEnd, /* end of pattern */ @@ -50,6 +51,10 @@ typedef union Instruction { } Instruction; +/* extract 24-bit value from an instruction */ +#define utf_to(inst) (((inst)->i.key << 8) | (inst)->i.aux) + + void printpatt (Instruction *p, int n); const char *match (lua_State *L, const char *o, const char *s, const char *e, Instruction *op, Capture *capture, int ptop); -- cgit v1.2.3-55-g6feb