aboutsummaryrefslogtreecommitdiff
path: root/lpvm.h
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
commitf8e9bc1c721a0802b2260f48ced72c7e04d7b1ef (patch)
treefcd765f59c5d74574bdb21cd7a11e1f723068d87 /lpvm.h
parent9f7183c280f310c0d0b49b7b9c3b8eac297fafa7 (diff)
downloadlpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.gz
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.bz2
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.zip
Towards a smaller encoding for charsets in code
Diffstat (limited to 'lpvm.h')
-rw-r--r--lpvm.h18
1 files changed, 15 insertions, 3 deletions
diff --git a/lpvm.h b/lpvm.h
index 607bf48..c02e943 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -5,15 +5,22 @@
5#include "lpcap.h" 5#include "lpcap.h"
6 6
7 7
8/*
9** About Character sets in instructions: a set is a bit map with an
10** initial offset, in bits, and a size, in number of instructions. If
11** aux1 is one, set is inverted (bit == 1 means char is not in set).
12*/
13
14
8/* Virtual Machine's instructions */ 15/* Virtual Machine's instructions */
9typedef enum Opcode { 16typedef enum Opcode {
10 IAny, /* if no char, fail */ 17 IAny, /* if no char, fail */
11 IChar, /* if char != aux1, fail */ 18 IChar, /* if char != aux1, fail */
12 ISet, /* if char not in buff, fail */ 19 ISet, /* if char not in set, fail */
13 ITestAny, /* in no char, jump to 'offset' */ 20 ITestAny, /* in no char, jump to 'offset' */
14 ITestChar, /* if char != aux1, jump to 'offset' */ 21 ITestChar, /* if char != aux1, jump to 'offset' */
15 ITestSet, /* if char not in buff, jump to 'offset' */ 22 ITestSet, /* if char not in set, jump to 'offset' */
16 ISpan, /* read a span of chars in buff */ 23 ISpan, /* read a span of chars in set */
17 IUTFR, /* if codepoint not in range [offset, utf_to], fail */ 24 IUTFR, /* if codepoint not in range [offset, utf_to], fail */
18 IBehind, /* walk back 'aux1' characters (fail if not possible) */ 25 IBehind, /* walk back 'aux1' characters (fail if not possible) */
19 IRet, /* return from a rule */ 26 IRet, /* return from a rule */
@@ -43,6 +50,10 @@ typedef union Instruction {
43 byte aux1; 50 byte aux1;
44 union { 51 union {
45 short key; 52 short key;
53 struct {
54 byte offset;
55 byte size;
56 } set;
46 } aux2; 57 } aux2;
47 } i; 58 } i;
48 int offset; 59 int offset;
@@ -54,6 +65,7 @@ typedef union Instruction {
54#define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1) 65#define utf_to(inst) (((inst)->i.aux2.key << 8) | (inst)->i.aux1)
55 66
56 67
68int charinset (const Instruction *i, const byte *buff, unsigned int c);
57void printpatt (Instruction *p, int n); 69void printpatt (Instruction *p, int n);
58const char *match (lua_State *L, const char *o, const char *s, const char *e, 70const char *match (lua_State *L, const char *o, const char *s, const char *e,
59 Instruction *op, Capture *capture, int ptop); 71 Instruction *op, Capture *capture, int ptop);