diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-14 17:13:31 -0200 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-10-14 17:13:31 -0200 |
| commit | 4e9f2d13d5b6fa71ca480394e0b7e75463d4aeec (patch) | |
| tree | d11eee681ce7b01a273e489f47e070494b51de1a /ltable.c | |
| parent | b6ebbb2fee13aa223fdd12921cd0411e02db9dd0 (diff) | |
| download | lua-4e9f2d13d5b6fa71ca480394e0b7e75463d4aeec.tar.gz lua-4e9f2d13d5b6fa71ca480394e0b7e75463d4aeec.tar.bz2 lua-4e9f2d13d5b6fa71ca480394e0b7e75463d4aeec.zip | |
new implementation of hash tables.
Diffstat (limited to 'ltable.c')
| -rw-r--r-- | ltable.c | 271 |
1 files changed, 198 insertions, 73 deletions
| @@ -1,10 +1,23 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: ltable.c,v 1.24 1999/09/22 14:38:45 roberto Exp roberto $ | 2 | ** $Id: ltable.c,v 1.25 1999/10/04 17:51:04 roberto Exp roberto $ |
| 3 | ** Lua tables (hash) | 3 | ** Lua tables (hash) |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | 7 | ||
| 8 | /* | ||
| 9 | ** Implementation of tables (aka arrays, objects, or hash tables); | ||
| 10 | ** uses a mix of chained scatter table with Brent's variation. | ||
| 11 | ** A main invariant of these tables is that, if an element is not | ||
| 12 | ** in its main position (i.e. the `original' position that its hash gives | ||
| 13 | ** to it), then the colliding element is in its own main position. | ||
| 14 | ** In other words, there are collisions only when two elements have the | ||
| 15 | ** same main position (i.e. the same hash values for that table size). | ||
| 16 | ** Because of that, the load factor of these tables can be 100% without | ||
| 17 | ** performance penalties. | ||
| 18 | */ | ||
| 19 | |||
| 20 | |||
| 8 | #include "lauxlib.h" | 21 | #include "lauxlib.h" |
| 9 | #include "lmem.h" | 22 | #include "lmem.h" |
| 10 | #include "lobject.h" | 23 | #include "lobject.h" |
| @@ -15,93 +28,104 @@ | |||
| 15 | 28 | ||
| 16 | #define gcsize(n) (1+(n/16)) | 29 | #define gcsize(n) (1+(n/16)) |
| 17 | 30 | ||
| 18 | #define nuse(t) ((t)->nuse) | ||
| 19 | #define nodevector(t) ((t)->node) | ||
| 20 | 31 | ||
| 21 | 32 | ||
| 22 | #define TagDefault LUA_T_ARRAY; | 33 | #define TagDefault LUA_T_ARRAY; |
| 23 | 34 | ||
| 24 | 35 | ||
| 25 | 36 | ||
| 26 | static long int hashindex (const TObject *ref) { | 37 | /* |
| 27 | long int h; | 38 | ** returns the `main' position of an element in a table (that is, the index |
| 28 | switch (ttype(ref)) { | 39 | ** of its hash value) |
| 40 | */ | ||
| 41 | static Node *luaH_mainposition (const Hash *t, const TObject *key) { | ||
| 42 | unsigned long h; | ||
| 43 | switch (ttype(key)) { | ||
| 29 | case LUA_T_NUMBER: | 44 | case LUA_T_NUMBER: |
| 30 | h = (long int)nvalue(ref); | 45 | h = (unsigned long)(long)nvalue(key); |
| 31 | break; | 46 | break; |
| 32 | case LUA_T_STRING: case LUA_T_USERDATA: | 47 | case LUA_T_STRING: case LUA_T_USERDATA: |
| 33 | h = (IntPoint)tsvalue(ref); | 48 | h = tsvalue(key)->hash; |
| 34 | break; | 49 | break; |
| 35 | case LUA_T_ARRAY: | 50 | case LUA_T_ARRAY: |
| 36 | h = (IntPoint)avalue(ref); | 51 | h = (IntPoint)avalue(key); |
| 37 | break; | 52 | break; |
| 38 | case LUA_T_PROTO: | 53 | case LUA_T_PROTO: |
| 39 | h = (IntPoint)tfvalue(ref); | 54 | h = (IntPoint)tfvalue(key); |
| 40 | break; | 55 | break; |
| 41 | case LUA_T_CPROTO: | 56 | case LUA_T_CPROTO: |
| 42 | h = (IntPoint)fvalue(ref); | 57 | h = (IntPoint)fvalue(key); |
| 43 | break; | 58 | break; |
| 44 | case LUA_T_CLOSURE: | 59 | case LUA_T_CLOSURE: |
| 45 | h = (IntPoint)clvalue(ref); | 60 | h = (IntPoint)clvalue(key); |
| 46 | break; | 61 | break; |
| 47 | default: | 62 | default: |
| 48 | lua_error("unexpected type to index table"); | 63 | lua_error("unexpected type to index table"); |
| 49 | h = 0; /* to avoid warnings */ | 64 | h = 0; /* to avoid warnings */ |
| 50 | } | 65 | } |
| 51 | return (h >= 0 ? h : -(h+1)); | 66 | return &t->node[h%t->size]; |
| 52 | } | 67 | } |
| 53 | 68 | ||
| 54 | 69 | ||
| 55 | Node *luaH_present (const Hash *t, const TObject *key) { | 70 | const TObject *luaH_get (const Hash *t, const TObject *key) { |
| 56 | const int tsize = nhash(t); | 71 | Node *n = luaH_mainposition(t, key); |
| 57 | const long int h = hashindex(key); | 72 | do { |
| 58 | int h1 = h%tsize; | 73 | if (luaO_equalObj(key, &n->key)) |
| 59 | Node *n = node(t, h1); | 74 | return &n->val; |
| 60 | /* keep looking until an entry with "ref" equal to key or nil */ | 75 | n = n->next; |
| 61 | while ((ttype(ref(n)) == ttype(key)) ? !luaO_equalval(key, ref(n)) | 76 | } while (n); |
| 62 | : ttype(ref(n)) != LUA_T_NIL) { | 77 | return &luaO_nilobject; |
| 63 | h1 += (h&(tsize-2)) + 1; /* double hashing */ | 78 | } |
| 64 | if (h1 >= tsize) h1 -= tsize; | 79 | |
| 65 | n = node(t, h1); | 80 | |
| 66 | } | 81 | int luaH_pos (const Hash *t, const TObject *key) { |
| 67 | return n; | 82 | const TObject *v = luaH_get(t, key); |
| 83 | return (v == &luaO_nilobject) ? -1 : /* key not found */ | ||
| 84 | ((const char *)v - (const char *)(&t->node[0].val))/sizeof(Node); | ||
| 68 | } | 85 | } |
| 69 | 86 | ||
| 70 | 87 | ||
| 88 | |||
| 71 | static Node *hashnodecreate (int nhash) { | 89 | static Node *hashnodecreate (int nhash) { |
| 72 | Node *const v = luaM_newvector(nhash, Node); | 90 | Node *v = luaM_newvector(nhash, Node); |
| 73 | int i; | 91 | int i; |
| 74 | for (i=0; i<nhash; i++) | 92 | for (i=0; i<nhash; i++) { |
| 75 | ttype(ref(&v[i])) = ttype(val(&v[i])) = LUA_T_NIL; | 93 | ttype(key(&v[i])) = ttype(val(&v[i])) = LUA_T_NIL; |
| 94 | v[i].next = NULL; | ||
| 95 | } | ||
| 76 | return v; | 96 | return v; |
| 77 | } | 97 | } |
| 78 | 98 | ||
| 79 | 99 | ||
| 80 | Hash *luaH_new (int nhash) { | 100 | static void setnodevector (Hash *t, int size) { |
| 81 | Hash *const t = luaM_new(Hash); | 101 | t->node = hashnodecreate(size); |
| 82 | nhash = luaO_redimension(nhash*3/2); | 102 | t->size = size; |
| 83 | nodevector(t) = hashnodecreate(nhash); | 103 | t->firstfree = &t->node[size-1]; /* first free position to be used */ |
| 84 | nhash(t) = nhash; | 104 | L->nblocks += gcsize(size); |
| 85 | nuse(t) = 0; | 105 | } |
| 106 | |||
| 107 | |||
| 108 | Hash *luaH_new (int size) { | ||
| 109 | Hash *t = luaM_new(Hash); | ||
| 110 | setnodevector(t, luaO_redimension(size+1)); | ||
| 86 | t->htag = TagDefault; | 111 | t->htag = TagDefault; |
| 87 | t->next = L->roottable; | 112 | t->next = L->roottable; |
| 88 | L->roottable = t; | 113 | L->roottable = t; |
| 89 | t->marked = 0; | 114 | t->marked = 0; |
| 90 | L->nblocks += gcsize(nhash); | ||
| 91 | return t; | 115 | return t; |
| 92 | } | 116 | } |
| 93 | 117 | ||
| 94 | 118 | ||
| 95 | void luaH_free (Hash *t) { | 119 | void luaH_free (Hash *t) { |
| 96 | L->nblocks -= gcsize(t->nhash); | 120 | L->nblocks -= gcsize(t->size); |
| 97 | luaM_free(nodevector(t)); | 121 | luaM_free(t->node); |
| 98 | luaM_free(t); | 122 | luaM_free(t); |
| 99 | } | 123 | } |
| 100 | 124 | ||
| 101 | 125 | ||
| 102 | static int newsize (Hash *t) { | 126 | static int newsize (const Hash *t) { |
| 103 | Node *const v = t->node; | 127 | Node *v = t->node; |
| 104 | const int size = nhash(t); | 128 | int size = t->size; |
| 105 | int realuse = 0; | 129 | int realuse = 0; |
| 106 | int i; | 130 | int i; |
| 107 | for (i=0; i<size; i++) { | 131 | for (i=0; i<size; i++) { |
| @@ -112,57 +136,158 @@ static int newsize (Hash *t) { | |||
| 112 | } | 136 | } |
| 113 | 137 | ||
| 114 | 138 | ||
| 115 | static void rehash (Hash *t) { | 139 | #ifdef DEBUG |
| 116 | const int nold = nhash(t); | 140 | /* check invariant of a table */ |
| 117 | Node *const vold = nodevector(t); | 141 | static int listfind (const Node *m, const Node *n) { |
| 118 | const int nnew = newsize(t); | 142 | do { |
| 119 | int i; | 143 | if (m==n) return 1; |
| 120 | nodevector(t) = hashnodecreate(nnew); | 144 | m = m->next; |
| 121 | nhash(t) = nnew; | 145 | } while (m); |
| 122 | nuse(t) = 0; | 146 | return 0; |
| 123 | for (i=0; i<nold; i++) { | 147 | } |
| 124 | Node *n = vold+i; | 148 | |
| 125 | if (ttype(val(n)) != LUA_T_NIL) { | 149 | static int check_invariant (const Hash *t, int filled) { |
| 126 | *luaH_present(t, ref(n)) = *n; /* copy old node to new hash */ | 150 | Node *n; |
| 127 | nuse(t)++; | 151 | for (n=t->node; n<t->firstfree; n++) { |
| 128 | } | 152 | TObject *key = &n->key; |
| 153 | LUA_ASSERT(ttype(key) == LUA_T_NIL || n == luaH_mainposition(t, key), | ||
| 154 | "all elements before firstfree are empty or in their main positions"); | ||
| 155 | } | ||
| 156 | if (!filled) | ||
| 157 | LUA_ASSERT(ttype(&(n++)->key) == LUA_T_NIL, "firstfree must be empty"); | ||
| 158 | else | ||
| 159 | LUA_ASSERT(n == t->node, "table cannot have empty places"); | ||
| 160 | for (; n<t->node+t->size; n++) { | ||
| 161 | TObject *key = &n->key; | ||
| 162 | Node *mp = luaH_mainposition(t, key); | ||
| 163 | LUA_ASSERT(ttype(key) != LUA_T_NIL, | ||
| 164 | "cannot exist empty elements after firstfree"); | ||
| 165 | LUA_ASSERT(n == mp || luaH_mainposition(t, &mp->key) == mp, | ||
| 166 | "either an element or its colliding element is in its main position"); | ||
| 167 | LUA_ASSERT(listfind(mp,n), "element is in its main position list"); | ||
| 129 | } | 168 | } |
| 130 | L->nblocks += gcsize(nnew)-gcsize(nold); | 169 | return 1; |
| 131 | luaM_free(vold); | ||
| 132 | } | 170 | } |
| 171 | #endif | ||
| 133 | 172 | ||
| 134 | 173 | ||
| 135 | void luaH_set (Hash *t, const TObject *ref, const TObject *val) { | 174 | /* |
| 136 | Node *const n = luaH_present(t, ref); | 175 | ** the rehash is done in two stages: first, we insert only the elements whose |
| 137 | *val(n) = *val; | 176 | ** main position is free, to avoid needless collisions. In the second stage, |
| 138 | if (ttype(ref(n)) == LUA_T_NIL) { /* new node? */ | 177 | ** we insert the other elements. |
| 139 | *ref(n) = *ref; /* set key */ | 178 | */ |
| 140 | nuse(t)++; /* count it */ | 179 | static void rehash (Hash *t) { |
| 141 | if ((long)nuse(t)*3L > (long)nhash(t)*2L) /* check size */ | 180 | int oldsize = t->size; |
| 142 | rehash(t); | 181 | Node *nold = t->node; |
| 182 | int i; | ||
| 183 | LUA_ASSERT(check_invariant(t, 1), "invalid table"); | ||
| 184 | L->nblocks -= gcsize(oldsize); | ||
| 185 | setnodevector(t, newsize(t)); /* create new array of nodes */ | ||
| 186 | /* first loop; set only elements that can go in their main positions */ | ||
| 187 | for (i=0; i<oldsize; i++) { | ||
| 188 | Node *old = nold+i; | ||
| 189 | if (ttype(&old->val) == LUA_T_NIL) | ||
| 190 | old->next = NULL; /* `remove' it for next loop */ | ||
| 191 | else { | ||
| 192 | Node *mp = luaH_mainposition(t, &old->key); /* new main position */ | ||
| 193 | if (ttype(&mp->key) == LUA_T_NIL) { /* is it empty? */ | ||
| 194 | mp->key = old->key; /* put element there */ | ||
| 195 | mp->val = old->val; | ||
| 196 | old->next = NULL; /* `remove' it for next loop */ | ||
| 197 | } | ||
| 198 | else /* it will be copied in next loop */ | ||
| 199 | old->next = mp; /* to be used in next loop */ | ||
| 200 | } | ||
| 143 | } | 201 | } |
| 202 | /* update `firstfree' */ | ||
| 203 | while (ttype(&t->firstfree->key) != LUA_T_NIL) t->firstfree--; | ||
| 204 | /* second loop; update elements with colision */ | ||
| 205 | for (i=0; i<oldsize; i++) { | ||
| 206 | Node *old = nold+i; | ||
| 207 | if (old->next) { /* wasn't already `removed'? */ | ||
| 208 | Node *mp = old->next; /* main position */ | ||
| 209 | Node *e = t->firstfree; /* actual position */ | ||
| 210 | e->key = old->key; /* put element in the free position */ | ||
| 211 | e->val = old->val; | ||
| 212 | e->next = mp->next; /* chain actual position in main position's list */ | ||
| 213 | mp->next = e; | ||
| 214 | do { /* update `firstfree' */ | ||
| 215 | t->firstfree--; | ||
| 216 | } while (ttype(&t->firstfree->key) != LUA_T_NIL); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | LUA_ASSERT(check_invariant(t, 0), "invalid table"); | ||
| 220 | luaM_free(nold); /* free old array */ | ||
| 144 | } | 221 | } |
| 145 | 222 | ||
| 146 | 223 | ||
| 147 | int luaH_pos (const Hash *t, const TObject *r) { | 224 | /* |
| 148 | Node *const n = luaH_present(t, r); | 225 | ** sets a pair key-value in a hash table; first, check whether key is |
| 149 | luaL_arg_check(ttype(val(n)) != LUA_T_NIL, 2, "key not found"); | 226 | ** already present; if not, check whether key's main position is free; |
| 150 | return n-(t->node); | 227 | ** if not, check whether colliding node is in its main position or not; |
| 228 | ** if it is not, move colliding node to an empty place and put new pair | ||
| 229 | ** in its main position; otherwise (colliding node is in its main position), | ||
| 230 | ** new pair goes to an empty position. | ||
| 231 | ** Tricky point: the only place where an old element is moved is when | ||
| 232 | ** we move the colliding node to an empty place; nevertheless, its old | ||
| 233 | ** value is still in that position until we set the value for the new | ||
| 234 | ** pair; therefore, even when `val' points to an element of this table | ||
| 235 | ** (this happens when we use `luaH_move'), there is no problem. | ||
| 236 | */ | ||
| 237 | void luaH_set (Hash *t, const TObject *key, const TObject *val) { | ||
| 238 | Node *mp = luaH_mainposition(t, key); | ||
| 239 | Node *n = mp; | ||
| 240 | do { /* check whether `key' is somewhere in the chain */ | ||
| 241 | if (luaO_equalObj(key, &n->key)) { | ||
| 242 | n->val = *val; /* update value */ | ||
| 243 | return; /* that's all */ | ||
| 244 | } | ||
| 245 | else n = n->next; | ||
| 246 | } while (n); | ||
| 247 | /* `key' not found; must insert it */ | ||
| 248 | if (ttype(&mp->key) != LUA_T_NIL) { /* main position is not free? */ | ||
| 249 | Node *othern; /* main position of colliding node */ | ||
| 250 | n = t->firstfree; /* get a free place */ | ||
| 251 | /* is colliding node out of its main position? (can only happens if | ||
| 252 | its position if after "firstfree") */ | ||
| 253 | if (mp > n && (othern=luaH_mainposition(t, &mp->key)) != mp) { | ||
| 254 | /* yes; move colliding node into free position */ | ||
| 255 | while (othern->next != mp) othern = othern->next; /* find previous */ | ||
| 256 | othern->next = n; /* redo the chain with `n' in place of `mp' */ | ||
| 257 | *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */ | ||
| 258 | mp->next = NULL; /* now `mp' is free */ | ||
| 259 | } | ||
| 260 | else { /* colliding node is in its own main position */ | ||
| 261 | /* new node will go into free position */ | ||
| 262 | n->next = mp->next; /* chain new position */ | ||
| 263 | mp->next = n; | ||
| 264 | mp = n; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | mp->key = *key; | ||
| 268 | mp->val = *val; | ||
| 269 | for (;;) { /* check free places */ | ||
| 270 | if (ttype(&(t->firstfree)->key) == LUA_T_NIL) | ||
| 271 | return; /* OK; table still has a free place */ | ||
| 272 | else if (t->firstfree == t->node) break; /* cannot decrement from here */ | ||
| 273 | else (t->firstfree)--; | ||
| 274 | } | ||
| 275 | rehash(t); /* no more free places */ | ||
| 151 | } | 276 | } |
| 152 | 277 | ||
| 153 | 278 | ||
| 154 | void luaH_setint (Hash *t, int ref, const TObject *val) { | 279 | void luaH_setint (Hash *t, int key, const TObject *val) { |
| 155 | TObject index; | 280 | TObject index; |
| 156 | ttype(&index) = LUA_T_NUMBER; | 281 | ttype(&index) = LUA_T_NUMBER; |
| 157 | nvalue(&index) = ref; | 282 | nvalue(&index) = key; |
| 158 | luaH_set(t, &index, val); | 283 | luaH_set(t, &index, val); |
| 159 | } | 284 | } |
| 160 | 285 | ||
| 161 | 286 | ||
| 162 | TObject *luaH_getint (const Hash *t, int ref) { | 287 | const TObject *luaH_getint (const Hash *t, int key) { |
| 163 | TObject index; | 288 | TObject index; |
| 164 | ttype(&index) = LUA_T_NUMBER; | 289 | ttype(&index) = LUA_T_NUMBER; |
| 165 | nvalue(&index) = ref; | 290 | nvalue(&index) = key; |
| 166 | return luaH_get(t, &index); | 291 | return luaH_get(t, &index); |
| 167 | } | 292 | } |
| 168 | 293 | ||
