aboutsummaryrefslogtreecommitdiff
path: root/lpcset.c
blob: 2dcffd9a4e8921633cec4bfdb7509a5b03b42fcf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

#include "lptypes.h"
#include "lpcset.h"


/*
** Add to 'c' the index of the (only) bit set in byte 'b'
*/
static int onlybit (int c, int b) {
  if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
  if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
  if ((b & 0x02) != 0) { c += 1; }
  return c;
}


/*
** Check whether a charset is empty (returns IFail), singleton (IChar),
** full (IAny), or none of those (ISet). When singleton, 'info.offset'
** returns which character it is. When generic set, 'info' returns
** information about its range.
*/
Opcode charsettype (const byte *cs, charsetinfo *info) {
  int low0, low1, high0, high1;
  for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
    /* find lowest byte with a 1-bit */;
  if (low1 == CHARSETSIZE)
    return IFail;  /* no characters in set */
  for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
    /* find highest byte with a 1-bit; low1 is a sentinel */;
  if (low1 == high1) {  /* only one byte with 1-bits? */
    int b = cs[low1];
    if ((b & (b - 1)) == 0) {  /* does byte has only one 1-bit? */
      info->offset = onlybit(low1 * BITSPERCHAR, b);  /* get that bit */
      return IChar;  /* single character */
    }
  }
  for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
    /* find lowest byte with a 0-bit */;
  if (low0 == CHARSETSIZE)
    return IAny;  /* set has all bits set */
  for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
    /* find highest byte with a 0-bit; low0 is a sentinel */;
  if (high1 - low1 <= high0 - low0) {  /* range of 1s smaller than of 0s? */
    info->offset = low1;
    info->size = high1 - low1 + 1;
    info->deflt = 0;  /* all discharged bits were 0 */
  }
  else {
    info->offset = low0;
    info->size = high0 - low0 + 1;
    info->deflt = 0xFF;  /* all discharged bits were 1 */
  }
  info->cs = cs + info->offset;
  return ISet;
}


/*
** Get a byte from a compact charset. If index is inside the charset
** range, get the byte from the supporting charset (correcting it
** by the offset). Otherwise, return the default for the set.
*/
byte getbytefromcharset (const charsetinfo *info, int index) {
  if (index < info->size)
    return info->cs[index];
  else return info->deflt;
}


/*
** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it
** into a charset and return 1; else return 0.
*/
int tocharset (TTree *tree, Charset *cs) {
  switch (tree->tag) {
    case TChar: {  /* only one char */
      assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
      clearset(cs->cs);  /* erase all chars */
      setchar(cs->cs, tree->u.n);  /* add that one */
      return 1;
    }
    case TAny: {
      fillset(cs->cs, 0xFF);  /* add all characters to the set */
      return 1;
    }
    case TFalse: {
      clearset(cs->cs);  /* empty set */
      return 1;
    }
    case TSet: {  /* fill set */
      int i;
      fillset(cs->cs, tree->u.set.deflt);
      for (i = 0; i < tree->u.set.size; i++)
        cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i];
      return 1;
    }
    default: return 0;
  }
}


void tree2cset (TTree *tree, charsetinfo *info) {
  assert(tree->tag == TSet);
  info->offset = tree->u.set.offset;
  info->size = tree->u.set.size;
  info->deflt = tree->u.set.deflt;
  info->cs = treebuffer(tree);
}