diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-11 14:13:42 -0200 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-11 14:13:42 -0200 |
| commit | c5fee7615e979e3a39af44614f82938519dedb68 (patch) | |
| tree | 831ad8d88aba1d15e6337838d11e0234bd2e96d6 /lstring.c | |
| parent | cca78b5c71f4def3d3d80c71f690f8380b3cb35e (diff) | |
| download | lua-c5fee7615e979e3a39af44614f82938519dedb68.tar.gz lua-c5fee7615e979e3a39af44614f82938519dedb68.tar.bz2 lua-c5fee7615e979e3a39af44614f82938519dedb68.zip | |
new implementation for string hashing, with chaining.
Diffstat (limited to 'lstring.c')
| -rw-r--r-- | lstring.c | 132 |
1 files changed, 50 insertions, 82 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $ | 2 | ** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $ |
| 3 | ** String table (keeps all strings handled by Lua) | 3 | ** String table (keeps all strings handled by Lua) |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| @@ -19,11 +19,6 @@ | |||
| 19 | 19 | ||
| 20 | 20 | ||
| 21 | 21 | ||
| 22 | TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0, | ||
| 23 | {{{LUA_T_NIL, {NULL}}, 0L}}, {0}}; | ||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | /* | 22 | /* |
| 28 | ** to avoid hash tables with size = 0 (cannot hash with size=0), all | 23 | ** to avoid hash tables with size = 0 (cannot hash with size=0), all |
| 29 | ** hash tables are initialized with this `array'. Elements are never | 24 | ** hash tables are initialized with this `array'. Elements are never |
| @@ -48,6 +43,7 @@ void luaS_init (void) { | |||
| 48 | void luaS_freeall (void) { | 43 | void luaS_freeall (void) { |
| 49 | int i; | 44 | int i; |
| 50 | for (i=0; i<NUM_HASHS; i++) { | 45 | for (i=0; i<NUM_HASHS; i++) { |
| 46 | LUA_ASSERT(L->string_root[i].nuse==0, "non-empty string table"); | ||
| 51 | if (L->string_root[i].hash != init_hash) | 47 | if (L->string_root[i].hash != init_hash) |
| 52 | luaM_free(L->string_root[i].hash); | 48 | luaM_free(L->string_root[i].hash); |
| 53 | } | 49 | } |
| @@ -56,43 +52,27 @@ void luaS_freeall (void) { | |||
| 56 | 52 | ||
| 57 | 53 | ||
| 58 | static unsigned long hash_s (const char *s, long l) { | 54 | static unsigned long hash_s (const char *s, long l) { |
| 59 | unsigned long h = 0; /* seed */ | 55 | unsigned long h = l; /* seed */ |
| 60 | while (l--) | 56 | while (l--) |
| 61 | h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); | 57 | h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); |
| 62 | return h; | 58 | return h; |
| 63 | } | 59 | } |
| 64 | 60 | ||
| 65 | 61 | ||
| 66 | static int newsize (const stringtable *tb) { | ||
| 67 | int realuse = 0; | ||
| 68 | int i; | ||
| 69 | /* count how many entries are really in use */ | ||
| 70 | for (i=0; i<tb->size; i++) { | ||
| 71 | if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) | ||
| 72 | realuse++; | ||
| 73 | } | ||
| 74 | return luaO_redimension(realuse*2); | ||
| 75 | } | ||
| 76 | |||
| 77 | |||
| 78 | static void grow (stringtable *tb) { | 62 | static void grow (stringtable *tb) { |
| 79 | int ns = newsize(tb); | 63 | int ns = luaO_redimension(tb->size*2); /* new size */ |
| 80 | TaggedString **newhash = luaM_newvector(ns, TaggedString *); | 64 | TaggedString **newhash = luaM_newvector(ns, TaggedString *); |
| 81 | int i; | 65 | int i; |
| 82 | for (i=0; i<ns; i++) | 66 | for (i=0; i<ns; i++) newhash[i] = NULL; |
| 83 | newhash[i] = NULL; | ||
| 84 | /* rehash */ | 67 | /* rehash */ |
| 85 | tb->nuse = 0; | ||
| 86 | for (i=0; i<tb->size; i++) { | 68 | for (i=0; i<tb->size; i++) { |
| 87 | if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) { | 69 | TaggedString *p = tb->hash[i]; |
| 88 | unsigned long h = tb->hash[i]->hash; | 70 | while (p) { /* for each node in the list */ |
| 89 | int h1 = h%ns; | 71 | TaggedString *next = p->nexthash; /* save next */ |
| 90 | while (newhash[h1]) { | 72 | int h = p->hash%ns; /* new position */ |
| 91 | h1 += (h&(ns-2)) + 1; /* double hashing */ | 73 | p->nexthash = newhash[h]; /* chain it in new position */ |
| 92 | if (h1 >= ns) h1 -= ns; | 74 | newhash[h] = p; |
| 93 | } | 75 | p = next; |
| 94 | newhash[h1] = tb->hash[i]; | ||
| 95 | tb->nuse++; | ||
| 96 | } | 76 | } |
| 97 | } | 77 | } |
| 98 | luaM_free(tb->hash); | 78 | luaM_free(tb->hash); |
| @@ -101,91 +81,79 @@ static void grow (stringtable *tb) { | |||
| 101 | } | 81 | } |
| 102 | 82 | ||
| 103 | 83 | ||
| 84 | static TaggedString *newone (long l, unsigned long h) { | ||
| 85 | TaggedString *ts = (TaggedString *)luaM_malloc( | ||
| 86 | sizeof(TaggedString)+l*sizeof(char)); | ||
| 87 | ts->marked = 0; | ||
| 88 | ts->nexthash = NULL; | ||
| 89 | ts->nextglobal = ts; /* signal it is not in global list */ | ||
| 90 | ts->hash = h; | ||
| 91 | return ts; | ||
| 92 | } | ||
| 93 | |||
| 94 | |||
| 104 | static TaggedString *newone_s (const char *str, long l, unsigned long h) { | 95 | static TaggedString *newone_s (const char *str, long l, unsigned long h) { |
| 105 | TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l); | 96 | TaggedString *ts = newone(l, h); |
| 106 | memcpy(ts->str, str, l); | 97 | memcpy(ts->str, str, l); |
| 107 | ts->str[l] = 0; /* ending 0 */ | 98 | ts->str[l] = 0; /* ending 0 */ |
| 108 | ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ | 99 | ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ |
| 109 | ts->u.s.len = l; | 100 | ts->u.s.len = l; |
| 110 | ts->constindex = 0; | 101 | ts->constindex = 0; |
| 111 | L->nblocks += gcsizestring(l); | 102 | L->nblocks += gcsizestring(l); |
| 112 | ts->marked = 0; | ||
| 113 | ts->next = ts; /* signal it is in no list */ | ||
| 114 | ts->hash = h; | ||
| 115 | return ts; | 103 | return ts; |
| 116 | } | 104 | } |
| 117 | 105 | ||
| 106 | |||
| 118 | static TaggedString *newone_u (void *buff, int tag, unsigned long h) { | 107 | static TaggedString *newone_u (void *buff, int tag, unsigned long h) { |
| 119 | TaggedString *ts = luaM_new(TaggedString); | 108 | TaggedString *ts = newone(0, h); |
| 120 | ts->u.d.v = buff; | 109 | ts->u.d.v = buff; |
| 121 | ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; | 110 | ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; |
| 122 | ts->constindex = -1; /* tag -> this is a userdata */ | 111 | ts->constindex = -1; /* tag -> this is a userdata */ |
| 123 | L->nblocks++; | 112 | L->nblocks++; |
| 124 | ts->marked = 0; | ||
| 125 | ts->next = ts; /* signal it is in no list */ | ||
| 126 | ts->hash = h; | ||
| 127 | return ts; | 113 | return ts; |
| 128 | } | 114 | } |
| 129 | 115 | ||
| 130 | 116 | ||
| 131 | static void newentry (stringtable *tb, TaggedString *ts, int h1) { | 117 | static void newentry (stringtable *tb, TaggedString *ts, int h) { |
| 132 | tb->nuse++; | 118 | tb->nuse++; |
| 133 | if ((long)tb->nuse*3 < (long)tb->size*2) /* still have room? */ | 119 | if (tb->nuse >= tb->size) { /* no more room? */ |
| 134 | tb->hash[h1] = ts; | ||
| 135 | else { /* must grow */ | ||
| 136 | if (tb->hash == init_hash) { /* cannot change init_hash */ | 120 | if (tb->hash == init_hash) { /* cannot change init_hash */ |
| 137 | LUA_ASSERT(h1==0, "`init_hash' has size 1"); | 121 | LUA_ASSERT(h==0, "`init_hash' has size 1"); |
| 138 | tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ | 122 | tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ |
| 123 | tb->hash[0] = NULL; | ||
| 139 | } | 124 | } |
| 140 | tb->hash[h1] = ts; | ||
| 141 | grow(tb); | 125 | grow(tb); |
| 126 | h = ts->hash%tb->size; /* new hash position */ | ||
| 142 | } | 127 | } |
| 128 | ts->nexthash = tb->hash[h]; /* chain new entry */ | ||
| 129 | tb->hash[h] = ts; | ||
| 143 | } | 130 | } |
| 144 | 131 | ||
| 145 | 132 | ||
| 146 | static TaggedString *insert_s (const char *str, long l, stringtable *tb) { | 133 | static TaggedString *insert_s (const char *str, long l, |
| 134 | stringtable *tb, unsigned long h) { | ||
| 135 | int h1 = h%tb->size; | ||
| 147 | TaggedString *ts; | 136 | TaggedString *ts; |
| 148 | unsigned long h = hash_s(str, l); | 137 | for (ts = tb->hash[h1]; ts; ts = ts->nexthash) |
| 149 | int size = tb->size; | 138 | if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) |
| 150 | int j = -1; /* last empty place found (or -1) */ | ||
| 151 | int h1 = h%size; | ||
| 152 | while ((ts = tb->hash[h1]) != NULL) { | ||
| 153 | if (ts == &luaS_EMPTY) | ||
| 154 | j = h1; | ||
| 155 | else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) | ||
| 156 | return ts; | 139 | return ts; |
| 157 | h1 += (h&(size-2)) + 1; /* double hashing */ | ||
| 158 | if (h1 >= size) h1 -= size; | ||
| 159 | } | ||
| 160 | /* not found */ | 140 | /* not found */ |
| 161 | ts = newone_s(str, l, h); /* create new entry */ | 141 | ts = newone_s(str, l, h); /* create new entry */ |
| 162 | if (j != -1) /* is there an EMPTY space? */ | 142 | newentry(tb, ts, h1); /* insert it on table */ |
| 163 | tb->hash[j] = ts; /* use it for new element */ | ||
| 164 | else | ||
| 165 | newentry(tb, ts, h1); /* no EMPTY places; must use a virgin one */ | ||
| 166 | return ts; | 143 | return ts; |
| 167 | } | 144 | } |
| 168 | 145 | ||
| 169 | 146 | ||
| 170 | static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { | 147 | static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { |
| 171 | TaggedString *ts; | ||
| 172 | unsigned long h = (unsigned long)buff; | 148 | unsigned long h = (unsigned long)buff; |
| 173 | int size = tb->size; | 149 | int h1 = h%tb->size; |
| 174 | int j = -1; | 150 | TaggedString *ts; |
| 175 | int h1 = h%size; | 151 | for (ts = tb->hash[h1]; ts; ts = ts->nexthash) |
| 176 | while ((ts = tb->hash[h1]) != NULL) { | 152 | if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) |
| 177 | if (ts == &luaS_EMPTY) | ||
| 178 | j = h1; | ||
| 179 | else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) | ||
| 180 | return ts; | 153 | return ts; |
| 181 | h1 += (h&(size-2)) + 1; | 154 | /* not found */ |
| 182 | if (h1 >= size) h1 -= size; | ||
| 183 | } | ||
| 184 | ts = newone_u(buff, tag, h); | 155 | ts = newone_u(buff, tag, h); |
| 185 | if (j != -1) | 156 | newentry(tb, ts, h1); |
| 186 | tb->hash[j] = ts; | ||
| 187 | else | ||
| 188 | newentry(tb, ts, h1); | ||
| 189 | return ts; | 157 | return ts; |
| 190 | } | 158 | } |
| 191 | 159 | ||
| @@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) { | |||
| 196 | } | 164 | } |
| 197 | 165 | ||
| 198 | TaggedString *luaS_newlstr (const char *str, long l) { | 166 | TaggedString *luaS_newlstr (const char *str, long l) { |
| 199 | int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR; | 167 | unsigned long h = hash_s(str, l); |
| 200 | return insert_s(str, l, &L->string_root[t]); | 168 | return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h); |
| 201 | } | 169 | } |
| 202 | 170 | ||
| 203 | TaggedString *luaS_new (const char *str) { | 171 | TaggedString *luaS_new (const char *str) { |
| @@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) { | |||
| 206 | 174 | ||
| 207 | TaggedString *luaS_newfixedstring (const char *str) { | 175 | TaggedString *luaS_newfixedstring (const char *str) { |
| 208 | TaggedString *ts = luaS_new(str); | 176 | TaggedString *ts = luaS_new(str); |
| 209 | if (ts->marked == 0) ts->marked = 2; /* avoid GC */ | 177 | if (ts->marked == 0) ts->marked = FIXMARK; /* avoid GC */ |
| 210 | return ts; | 178 | return ts; |
| 211 | } | 179 | } |
| 212 | 180 | ||
| @@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) { | |||
| 219 | 187 | ||
| 220 | void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { | 188 | void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { |
| 221 | ts->u.s.globalval = *newval; | 189 | ts->u.s.globalval = *newval; |
| 222 | if (ts->next == ts) { /* is not in list? */ | 190 | if (ts->nextglobal == ts) { /* is not in list? */ |
| 223 | ts->next = L->rootglobal; | 191 | ts->nextglobal = L->rootglobal; |
| 224 | L->rootglobal = ts; | 192 | L->rootglobal = ts; |
| 225 | } | 193 | } |
| 226 | } | 194 | } |
