diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-11 14:13:42 -0200 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-11 14:13:42 -0200 |
commit | c5fee7615e979e3a39af44614f82938519dedb68 (patch) | |
tree | 831ad8d88aba1d15e6337838d11e0234bd2e96d6 /lstring.c | |
parent | cca78b5c71f4def3d3d80c71f690f8380b3cb35e (diff) | |
download | lua-c5fee7615e979e3a39af44614f82938519dedb68.tar.gz lua-c5fee7615e979e3a39af44614f82938519dedb68.tar.bz2 lua-c5fee7615e979e3a39af44614f82938519dedb68.zip |
new implementation for string hashing, with chaining.
Diffstat (limited to 'lstring.c')
-rw-r--r-- | lstring.c | 132 |
1 files changed, 50 insertions, 82 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $ | 2 | ** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $ |
3 | ** String table (keeps all strings handled by Lua) | 3 | ** String table (keeps all strings handled by Lua) |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -19,11 +19,6 @@ | |||
19 | 19 | ||
20 | 20 | ||
21 | 21 | ||
22 | TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0, | ||
23 | {{{LUA_T_NIL, {NULL}}, 0L}}, {0}}; | ||
24 | |||
25 | |||
26 | |||
27 | /* | 22 | /* |
28 | ** to avoid hash tables with size = 0 (cannot hash with size=0), all | 23 | ** to avoid hash tables with size = 0 (cannot hash with size=0), all |
29 | ** hash tables are initialized with this `array'. Elements are never | 24 | ** hash tables are initialized with this `array'. Elements are never |
@@ -48,6 +43,7 @@ void luaS_init (void) { | |||
48 | void luaS_freeall (void) { | 43 | void luaS_freeall (void) { |
49 | int i; | 44 | int i; |
50 | for (i=0; i<NUM_HASHS; i++) { | 45 | for (i=0; i<NUM_HASHS; i++) { |
46 | LUA_ASSERT(L->string_root[i].nuse==0, "non-empty string table"); | ||
51 | if (L->string_root[i].hash != init_hash) | 47 | if (L->string_root[i].hash != init_hash) |
52 | luaM_free(L->string_root[i].hash); | 48 | luaM_free(L->string_root[i].hash); |
53 | } | 49 | } |
@@ -56,43 +52,27 @@ void luaS_freeall (void) { | |||
56 | 52 | ||
57 | 53 | ||
58 | static unsigned long hash_s (const char *s, long l) { | 54 | static unsigned long hash_s (const char *s, long l) { |
59 | unsigned long h = 0; /* seed */ | 55 | unsigned long h = l; /* seed */ |
60 | while (l--) | 56 | while (l--) |
61 | h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); | 57 | h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); |
62 | return h; | 58 | return h; |
63 | } | 59 | } |
64 | 60 | ||
65 | 61 | ||
66 | static int newsize (const stringtable *tb) { | ||
67 | int realuse = 0; | ||
68 | int i; | ||
69 | /* count how many entries are really in use */ | ||
70 | for (i=0; i<tb->size; i++) { | ||
71 | if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) | ||
72 | realuse++; | ||
73 | } | ||
74 | return luaO_redimension(realuse*2); | ||
75 | } | ||
76 | |||
77 | |||
78 | static void grow (stringtable *tb) { | 62 | static void grow (stringtable *tb) { |
79 | int ns = newsize(tb); | 63 | int ns = luaO_redimension(tb->size*2); /* new size */ |
80 | TaggedString **newhash = luaM_newvector(ns, TaggedString *); | 64 | TaggedString **newhash = luaM_newvector(ns, TaggedString *); |
81 | int i; | 65 | int i; |
82 | for (i=0; i<ns; i++) | 66 | for (i=0; i<ns; i++) newhash[i] = NULL; |
83 | newhash[i] = NULL; | ||
84 | /* rehash */ | 67 | /* rehash */ |
85 | tb->nuse = 0; | ||
86 | for (i=0; i<tb->size; i++) { | 68 | for (i=0; i<tb->size; i++) { |
87 | if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) { | 69 | TaggedString *p = tb->hash[i]; |
88 | unsigned long h = tb->hash[i]->hash; | 70 | while (p) { /* for each node in the list */ |
89 | int h1 = h%ns; | 71 | TaggedString *next = p->nexthash; /* save next */ |
90 | while (newhash[h1]) { | 72 | int h = p->hash%ns; /* new position */ |
91 | h1 += (h&(ns-2)) + 1; /* double hashing */ | 73 | p->nexthash = newhash[h]; /* chain it in new position */ |
92 | if (h1 >= ns) h1 -= ns; | 74 | newhash[h] = p; |
93 | } | 75 | p = next; |
94 | newhash[h1] = tb->hash[i]; | ||
95 | tb->nuse++; | ||
96 | } | 76 | } |
97 | } | 77 | } |
98 | luaM_free(tb->hash); | 78 | luaM_free(tb->hash); |
@@ -101,91 +81,79 @@ static void grow (stringtable *tb) { | |||
101 | } | 81 | } |
102 | 82 | ||
103 | 83 | ||
84 | static TaggedString *newone (long l, unsigned long h) { | ||
85 | TaggedString *ts = (TaggedString *)luaM_malloc( | ||
86 | sizeof(TaggedString)+l*sizeof(char)); | ||
87 | ts->marked = 0; | ||
88 | ts->nexthash = NULL; | ||
89 | ts->nextglobal = ts; /* signal it is not in global list */ | ||
90 | ts->hash = h; | ||
91 | return ts; | ||
92 | } | ||
93 | |||
94 | |||
104 | static TaggedString *newone_s (const char *str, long l, unsigned long h) { | 95 | static TaggedString *newone_s (const char *str, long l, unsigned long h) { |
105 | TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l); | 96 | TaggedString *ts = newone(l, h); |
106 | memcpy(ts->str, str, l); | 97 | memcpy(ts->str, str, l); |
107 | ts->str[l] = 0; /* ending 0 */ | 98 | ts->str[l] = 0; /* ending 0 */ |
108 | ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ | 99 | ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ |
109 | ts->u.s.len = l; | 100 | ts->u.s.len = l; |
110 | ts->constindex = 0; | 101 | ts->constindex = 0; |
111 | L->nblocks += gcsizestring(l); | 102 | L->nblocks += gcsizestring(l); |
112 | ts->marked = 0; | ||
113 | ts->next = ts; /* signal it is in no list */ | ||
114 | ts->hash = h; | ||
115 | return ts; | 103 | return ts; |
116 | } | 104 | } |
117 | 105 | ||
106 | |||
118 | static TaggedString *newone_u (void *buff, int tag, unsigned long h) { | 107 | static TaggedString *newone_u (void *buff, int tag, unsigned long h) { |
119 | TaggedString *ts = luaM_new(TaggedString); | 108 | TaggedString *ts = newone(0, h); |
120 | ts->u.d.v = buff; | 109 | ts->u.d.v = buff; |
121 | ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; | 110 | ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; |
122 | ts->constindex = -1; /* tag -> this is a userdata */ | 111 | ts->constindex = -1; /* tag -> this is a userdata */ |
123 | L->nblocks++; | 112 | L->nblocks++; |
124 | ts->marked = 0; | ||
125 | ts->next = ts; /* signal it is in no list */ | ||
126 | ts->hash = h; | ||
127 | return ts; | 113 | return ts; |
128 | } | 114 | } |
129 | 115 | ||
130 | 116 | ||
131 | static void newentry (stringtable *tb, TaggedString *ts, int h1) { | 117 | static void newentry (stringtable *tb, TaggedString *ts, int h) { |
132 | tb->nuse++; | 118 | tb->nuse++; |
133 | if ((long)tb->nuse*3 < (long)tb->size*2) /* still have room? */ | 119 | if (tb->nuse >= tb->size) { /* no more room? */ |
134 | tb->hash[h1] = ts; | ||
135 | else { /* must grow */ | ||
136 | if (tb->hash == init_hash) { /* cannot change init_hash */ | 120 | if (tb->hash == init_hash) { /* cannot change init_hash */ |
137 | LUA_ASSERT(h1==0, "`init_hash' has size 1"); | 121 | LUA_ASSERT(h==0, "`init_hash' has size 1"); |
138 | tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ | 122 | tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ |
123 | tb->hash[0] = NULL; | ||
139 | } | 124 | } |
140 | tb->hash[h1] = ts; | ||
141 | grow(tb); | 125 | grow(tb); |
126 | h = ts->hash%tb->size; /* new hash position */ | ||
142 | } | 127 | } |
128 | ts->nexthash = tb->hash[h]; /* chain new entry */ | ||
129 | tb->hash[h] = ts; | ||
143 | } | 130 | } |
144 | 131 | ||
145 | 132 | ||
146 | static TaggedString *insert_s (const char *str, long l, stringtable *tb) { | 133 | static TaggedString *insert_s (const char *str, long l, |
134 | stringtable *tb, unsigned long h) { | ||
135 | int h1 = h%tb->size; | ||
147 | TaggedString *ts; | 136 | TaggedString *ts; |
148 | unsigned long h = hash_s(str, l); | 137 | for (ts = tb->hash[h1]; ts; ts = ts->nexthash) |
149 | int size = tb->size; | 138 | if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) |
150 | int j = -1; /* last empty place found (or -1) */ | ||
151 | int h1 = h%size; | ||
152 | while ((ts = tb->hash[h1]) != NULL) { | ||
153 | if (ts == &luaS_EMPTY) | ||
154 | j = h1; | ||
155 | else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) | ||
156 | return ts; | 139 | return ts; |
157 | h1 += (h&(size-2)) + 1; /* double hashing */ | ||
158 | if (h1 >= size) h1 -= size; | ||
159 | } | ||
160 | /* not found */ | 140 | /* not found */ |
161 | ts = newone_s(str, l, h); /* create new entry */ | 141 | ts = newone_s(str, l, h); /* create new entry */ |
162 | if (j != -1) /* is there an EMPTY space? */ | 142 | newentry(tb, ts, h1); /* insert it on table */ |
163 | tb->hash[j] = ts; /* use it for new element */ | ||
164 | else | ||
165 | newentry(tb, ts, h1); /* no EMPTY places; must use a virgin one */ | ||
166 | return ts; | 143 | return ts; |
167 | } | 144 | } |
168 | 145 | ||
169 | 146 | ||
170 | static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { | 147 | static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { |
171 | TaggedString *ts; | ||
172 | unsigned long h = (unsigned long)buff; | 148 | unsigned long h = (unsigned long)buff; |
173 | int size = tb->size; | 149 | int h1 = h%tb->size; |
174 | int j = -1; | 150 | TaggedString *ts; |
175 | int h1 = h%size; | 151 | for (ts = tb->hash[h1]; ts; ts = ts->nexthash) |
176 | while ((ts = tb->hash[h1]) != NULL) { | 152 | if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) |
177 | if (ts == &luaS_EMPTY) | ||
178 | j = h1; | ||
179 | else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) | ||
180 | return ts; | 153 | return ts; |
181 | h1 += (h&(size-2)) + 1; | 154 | /* not found */ |
182 | if (h1 >= size) h1 -= size; | ||
183 | } | ||
184 | ts = newone_u(buff, tag, h); | 155 | ts = newone_u(buff, tag, h); |
185 | if (j != -1) | 156 | newentry(tb, ts, h1); |
186 | tb->hash[j] = ts; | ||
187 | else | ||
188 | newentry(tb, ts, h1); | ||
189 | return ts; | 157 | return ts; |
190 | } | 158 | } |
191 | 159 | ||
@@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) { | |||
196 | } | 164 | } |
197 | 165 | ||
198 | TaggedString *luaS_newlstr (const char *str, long l) { | 166 | TaggedString *luaS_newlstr (const char *str, long l) { |
199 | int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR; | 167 | unsigned long h = hash_s(str, l); |
200 | return insert_s(str, l, &L->string_root[t]); | 168 | return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h); |
201 | } | 169 | } |
202 | 170 | ||
203 | TaggedString *luaS_new (const char *str) { | 171 | TaggedString *luaS_new (const char *str) { |
@@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) { | |||
206 | 174 | ||
207 | TaggedString *luaS_newfixedstring (const char *str) { | 175 | TaggedString *luaS_newfixedstring (const char *str) { |
208 | TaggedString *ts = luaS_new(str); | 176 | TaggedString *ts = luaS_new(str); |
209 | if (ts->marked == 0) ts->marked = 2; /* avoid GC */ | 177 | if (ts->marked == 0) ts->marked = FIXMARK; /* avoid GC */ |
210 | return ts; | 178 | return ts; |
211 | } | 179 | } |
212 | 180 | ||
@@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) { | |||
219 | 187 | ||
220 | void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { | 188 | void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { |
221 | ts->u.s.globalval = *newval; | 189 | ts->u.s.globalval = *newval; |
222 | if (ts->next == ts) { /* is not in list? */ | 190 | if (ts->nextglobal == ts) { /* is not in list? */ |
223 | ts->next = L->rootglobal; | 191 | ts->nextglobal = L->rootglobal; |
224 | L->rootglobal = ts; | 192 | L->rootglobal = ts; |
225 | } | 193 | } |
226 | } | 194 | } |