diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-23 11:02:52 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-23 11:02:52 -0300 |
commit | f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef (patch) | |
tree | fcd765f59c5d74574bdb21cd7a11e1f723068d87 /lpvm.h | |
parent | 9f7183c280f310c0d0b49b7b9c3b8eac297fafa7 (diff) | |
download | lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.gz lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.bz2 lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.zip |
Towards a smaller encoding for charsets in code
Diffstat (limited to 'lpvm.h')
-rw-r--r-- | lpvm.h | 18 |
1 files changed, 15 insertions, 3 deletions
@@ -5,15 +5,22 @@ | |||
5 | #include "lpcap.h" | 5 | #include "lpcap.h" |
6 | 6 | ||
7 | 7 | ||
8 | /* | ||
9 | ** About Character sets in instructions: a set is a bit map with an | ||
10 | ** initial offset, in bits, and a size, in number of instructions. If | ||
11 | ** aux1 is one, set is inverted (bit == 1 means char is not in set). | ||
12 | */ | ||
13 | |||
14 | |||
8 | /* Virtual Machine's instructions */ | 15 | /* Virtual Machine's instructions */ |
9 | typedef enum Opcode { | 16 | typedef enum Opcode { |
10 | IAny, /* if no char, fail */ | 17 | IAny, /* if no char, fail */ |
11 | IChar, /* if char != aux1, fail */ | 18 | IChar, /* if char != aux1, fail */ |
12 | ISet, /* if char not in buff, fail */ | 19 | ISet, /* if char not in set, fail */ |
13 | ITestAny, /* in no char, jump to 'offset' */ | 20 | ITestAny, /* in no char, jump to 'offset' */ |
14 | ITestChar, /* if char != aux1, jump to 'offset' */ | 21 | ITestChar, /* if char != aux1, jump to 'offset' */ |
15 | ITestSet, /* if char not in buff, jump to 'offset' */ | 22 | ITestSet, /* if char not in set, jump to 'offset' */ |
16 | ISpan, /* read a span of chars in buff */ | 23 | ISpan, /* read a span of chars in set */ |
17 | IUTFR, /* if codepoint not in range [offset, utf_to], fail */ | 24 | IUTFR, /* if codepoint not in range [offset, utf_to], fail */ |
18 | IBehind, /* walk back 'aux1' characters (fail if not possible) */ | 25 | IBehind, /* walk back 'aux1' characters (fail if not possible) */ |
19 | IRet, /* return from a rule */ | 26 | IRet, /* return from a rule */ |
@@ -43,6 +50,10 @@ typedef union Instruction { | |||
43 | byte aux1; | 50 | byte aux1; |
44 | union { | 51 | union { |
45 | short key; | 52 | short key; |
53 | struct { | ||
54 | byte offset; | ||
55 | byte size; | ||
56 | } set; | ||
46 | } aux2; | 57 | } aux2; |
47 | } i; | 58 | } i; |
48 | int offset; | 59 | int offset; |
@@ -54,6 +65,7 @@ typedef union Instruction { | |||
54 | #define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1) | 65 | #define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1) |
55 | 66 | ||
56 | 67 | ||
68 | int charinset (const Instruction *i, const byte *buff, unsigned int c); | ||
57 | void printpatt (Instruction *p, int n); | 69 | void printpatt (Instruction *p, int n); |
58 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 70 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
59 | Instruction *op, Capture *capture, int ptop); | 71 | Instruction *op, Capture *capture, int ptop); |