summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2020-04-01 10:52:41 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2020-04-01 10:52:41 -0300
commit7288528a1e081d101a1bc19346a974088b6b8315 (patch)
tree8a3b73aafaea2d705ea1f53e131d38f29aef52fd
parent513559cc4760392b6fa33754c516683ef49dba22 (diff)
downloadlua-7288528a1e081d101a1bc19346a974088b6b8315.tar.gz
lua-7288528a1e081d101a1bc19346a974088b6b8315.tar.bz2
lua-7288528a1e081d101a1bc19346a974088b6b8315.zip
Short strings always use all bytes in the hash
Collisions in short strings occurr just by their existence, when internalizing them. (Collisions in long strings is caused/controlled by the program, when adding them as keys to the same table.)
-rw-r--r--lstate.c2
-rw-r--r--lstring.c12
-rw-r--r--lstring.h3
3 files changed, 10 insertions, 7 deletions
diff --git a/lstate.c b/lstate.c
index 7770e314..8fba70d7 100644
--- a/lstate.c
+++ b/lstate.c
@@ -76,7 +76,7 @@ static unsigned int luai_makeseed (lua_State *L) {
76 addbuff(buff, p, &h); /* local variable */ 76 addbuff(buff, p, &h); /* local variable */
77 addbuff(buff, p, &lua_newstate); /* public function */ 77 addbuff(buff, p, &lua_newstate); /* public function */
78 lua_assert(p == sizeof(buff)); 78 lua_assert(p == sizeof(buff));
79 return luaS_hash(buff, p, h); 79 return luaS_hash(buff, p, h, 1);
80} 80}
81 81
82#endif 82#endif
diff --git a/lstring.c b/lstring.c
index a6ffbdd0..6f157473 100644
--- a/lstring.c
+++ b/lstring.c
@@ -23,7 +23,7 @@
23 23
24 24
25/* 25/*
26** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to 26** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a long string to
27** compute its hash 27** compute its hash
28*/ 28*/
29#if !defined(LUAI_HASHLIMIT) 29#if !defined(LUAI_HASHLIMIT)
@@ -50,9 +50,9 @@ int luaS_eqlngstr (TString *a, TString *b) {
50} 50}
51 51
52 52
53unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { 53unsigned int luaS_hash (const char *str, size_t l, unsigned int seed,
54 size_t step) {
54 unsigned int h = seed ^ cast_uint(l); 55 unsigned int h = seed ^ cast_uint(l);
55 size_t step = (l >> LUAI_HASHLIMIT) + 1;
56 for (; l >= step; l -= step) 56 for (; l >= step; l -= step)
57 h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1])); 57 h ^= ((h<<5) + (h>>2) + cast_byte(str[l - 1]));
58 return h; 58 return h;
@@ -62,7 +62,9 @@ unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) {
62unsigned int luaS_hashlongstr (TString *ts) { 62unsigned int luaS_hashlongstr (TString *ts) {
63 lua_assert(ts->tt == LUA_VLNGSTR); 63 lua_assert(ts->tt == LUA_VLNGSTR);
64 if (ts->extra == 0) { /* no hash? */ 64 if (ts->extra == 0) { /* no hash? */
65 ts->hash = luaS_hash(getstr(ts), ts->u.lnglen, ts->hash); 65 size_t len = ts->u.lnglen;
66 size_t step = (len >> LUAI_HASHLIMIT) + 1;
67 ts->hash = luaS_hash(getstr(ts), len, ts->hash, step);
66 ts->extra = 1; /* now it has its hash */ 68 ts->extra = 1; /* now it has its hash */
67 } 69 }
68 return ts->hash; 70 return ts->hash;
@@ -199,7 +201,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) {
199 TString *ts; 201 TString *ts;
200 global_State *g = G(L); 202 global_State *g = G(L);
201 stringtable *tb = &g->strt; 203 stringtable *tb = &g->strt;
202 unsigned int h = luaS_hash(str, l, g->seed); 204 unsigned int h = luaS_hash(str, l, g->seed, 1);
203 TString **list = &tb->hash[lmod(h, tb->size)]; 205 TString **list = &tb->hash[lmod(h, tb->size)];
204 lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ 206 lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */
205 for (ts = *list; ts != NULL; ts = ts->u.hnext) { 207 for (ts = *list; ts != NULL; ts = ts->u.hnext) {
diff --git a/lstring.h b/lstring.h
index c23d6874..56896867 100644
--- a/lstring.h
+++ b/lstring.h
@@ -37,7 +37,8 @@
37#define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b)) 37#define eqshrstr(a,b) check_exp((a)->tt == LUA_VSHRSTR, (a) == (b))
38 38
39 39
40LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l, unsigned int seed); 40LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l,
41 unsigned int seed, size_t step);
41LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts); 42LUAI_FUNC unsigned int luaS_hashlongstr (TString *ts);
42LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); 43LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
43LUAI_FUNC void luaS_resize (lua_State *L, int newsize); 44LUAI_FUNC void luaS_resize (lua_State *L, int newsize);