diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2020-04-01 10:52:41 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2020-04-01 10:52:41 -0300 |
commit | 7288528a1e081d101a1bc19346a974088b6b8315 (patch) | |
tree | 8a3b73aafaea2d705ea1f53e131d38f29aef52fd | |
parent | 513559cc4760392b6fa33754c516683ef49dba22 (diff) | |
download | lua-7288528a1e081d101a1bc19346a974088b6b8315.tar.gz lua-7288528a1e081d101a1bc19346a974088b6b8315.tar.bz2 lua-7288528a1e081d101a1bc19346a974088b6b8315.zip |
Short strings always use all bytes in the hash
Collisions in short strings occurr just by their existence, when
internalizing them. (Collisions in long strings is caused/controlled
by the program, when adding them as keys to the same table.)
-rw-r--r-- | lstate.c | 2 | ||||
-rw-r--r-- | lstring.c | 12 | ||||
-rw-r--r-- | lstring.h | 3 |
3 files changed, 10 insertions, 7 deletions
@@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) { | |||
76 | addbuff(buff, p, &h); /* local variable */ | 76 | addbuff(buff, p, &h); /* local variable */ |
77 | addbuff(buff, p, &lua_newstate); /* public function */ | 77 | addbuff(buff, p, &lua_newstate); /* public function */ |
78 | lua_assert(p == sizeof(buff)); | 78 | lua_assert(p == sizeof(buff)); |
79 | return luaS_hash(buff, p, h); | 79 | return luaS_hash(buff, p, h, 1); |
80 | } | 80 | } |
81 | 81 | ||
82 | #endif | 82 | #endif |
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | 24 | ||
25 | /* | 25 | /* |
26 | ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to | 26 | ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to |
27 | ** compute its hash | 27 | ** compute its hash |
28 | */ | 28 | */ |
29 | #if !defined(LUAI_HASHLIMIT) | 29 | #if !defined(LUAI_HASHLIMIT) |
@@ -50,9 +50,9 @@ int luaS_eqlngstr (TString *a, TString *b) { | |||
50 | } | 50 | } |
51 | 51 | ||
52 | 52 | ||
53 | unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { | 53 | unsigned int luaS_hash (const char *str, size_t l, unsigned int seed, |
54 | size_t step) { | ||
54 | unsigned int h = seed ^ cast_uint(l); | 55 | unsigned int h = seed ^ cast_uint(l); |
55 | size_t step = (l >> LUAI_HASHLIMIT) + 1; | ||
56 | for (; l >= step; l -= step) | 56 | for (; l >= step; l -= step) |
57 | h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1])); | 57 | h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1])); |
58 | return h; | 58 | return h; |
@@ -62,7 +62,9 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { | |||
62 | unsigned int luaS_hashlongstr (TString *ts) { | 62 | unsigned int luaS_hashlongstr (TString *ts) { |
63 | lua_assert(ts->tt == LUA_VLNGSTR); | 63 | lua_assert(ts->tt == LUA_VLNGSTR); |
64 | if (ts->extra == 0) { /* no hash? */ | 64 | if (ts->extra == 0) { /* no hash? */ |
65 | ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash); | 65 | size_t len = ts->u.lnglen; |
66 | size_t step = (len >> LUAI_HASHLIMIT) + 1; | ||
67 | ts->hash = luaS_hash(getstr(ts), len, ts->hash, step); | ||
66 | ts->extra = 1; /* now it has its hash */ | 68 | ts->extra = 1; /* now it has its hash */ |
67 | } | 69 | } |
68 | return ts->hash; | 70 | return ts->hash; |
@@ -199,7 +201,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { | |||
199 | TString *ts; | 201 | TString *ts; |
200 | global_State *g = G(L); | 202 | global_State *g = G(L); |
201 | stringtable *tb = &g->strt; | 203 | stringtable *tb = &g->strt; |
202 | unsigned int h = luaS_hash(str, l, g->seed); | 204 | unsigned int h = luaS_hash(str, l, g->seed, 1); |
203 | TString **list = &tb->hash[lmod(h, tb->size)]; | 205 | TString **list = &tb->hash[lmod(h, tb->size)]; |
204 | lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ | 206 | lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ |
205 | for (ts = *list; ts != NULL; ts = ts->u.hnext) { | 207 | for (ts = *list; ts != NULL; ts = ts->u.hnext) { |
@@ -37,7 +37,8 @@ | |||
37 | #define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b)) | 37 | #define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b)) |
38 | 38 | ||
39 | 39 | ||
40 | LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed); | 40 | LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, |
41 | unsigned int seed, size_t step); | ||
41 | LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts); | 42 | LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts); |
42 | LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); | 43 | LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); |
43 | LUAI_FUNC void luaS_resize (lua_State *L, int newsize); | 44 | LUAI_FUNC void luaS_resize (lua_State *L, int newsize); |