From 24bf757183d8bd97f6f5b43d916814f3269c8347 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 17 Apr 2019 14:08:22 -0300 Subject: Implementation of UTF-8 ranges New constructor 'lpeg.utfR(from, to)' creates a pattern that matches UTF-8 byte sequences representing code points in the range [from, to]. --- lptree.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lptree.h') diff --git a/lptree.h b/lptree.h index 3e8b52b..892e013 100644 --- a/lptree.h +++ b/lptree.h @@ -18,6 +18,9 @@ typedef enum TTag { TAny, TTrue, TFalse, + TUTFR, /* range of UTF-8 codepoints; 'n' has initial codepoint; + 'cap' has length; 'key' has first byte; + extra info is similar for end codepoint */ TRep, /* 'sib1'* */ TSeq, /* 'sib1' 'sib2' */ TChoice, /* 'sib1' / 'sib2' */ @@ -26,8 +29,9 @@ typedef enum TTag { TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ TOpenCall, /* ktable[key] is rule's key */ TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); - 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; */ - TXInfo, /* extra info; 'n' is rule's sequential number */ + 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; + extra info 'n' is rule's sequential number */ + TXInfo, /* extra info */ TGrammar, /* 'sib1' is initial (and first) rule */ TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); -- cgit v1.2.3-55-g6feb