From b44787652bd3a51dfc2ee92120abe05b28159d32 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 4 Apr 2018 13:12:53 -0300 Subject: using 'xoroshiro128+' for PRNG (plus a rotate at the final result to have better lower bits) --- lmathlib.c | 175 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 88 insertions(+), 87 deletions(-) diff --git a/lmathlib.c b/lmathlib.c index 9432d405..3e5ceea4 100644 --- a/lmathlib.c +++ b/lmathlib.c @@ -1,5 +1,5 @@ /* -** $Id: lmathlib.c,v 1.127 2018/03/22 19:54:49 roberto Exp roberto $ +** $Id: lmathlib.c,v 1.128 2018/03/26 19:48:46 roberto Exp $ ** Standard mathematical library ** See Copyright Notice in lua.h */ @@ -247,7 +247,7 @@ static int math_type (lua_State *L) { /* ** {================================================================== -** Pseudo-Random Number Generator based on 'xorshift128+'. +** Pseudo-Random Number Generator based on 'xoroshiro128+'. ** =================================================================== */ @@ -270,34 +270,45 @@ static int math_type (lua_State *L) { /* a 64-bit value */ typedef unsigned long long I; -static I xorshift128plus (I *state) { - I x = state[0]; - I y = state[1]; - state[0] = y; - x ^= x << 23; - state[1] = (x ^ (x >> 18)) ^ (y ^ (y >> 5)); - return state[1] + y; + +/* rotate left 'x' by 'n' bits */ +static I rotl (I x, int n) { + return (x << n) | (x >> (64 - n)); +} + +static I nextrand (I *state) { + I s0 = state[0]; + I s1 = state[1]; + I res = s0 + s1; + res = rotl(res, 41); /* extra step to change place of lower bits */ + s1 = s1 ^ s0; + state[0] = rotl(s0, 55) ^ (s1 ^ (s1 << 14)); + state[1] = rotl(s1, 36); + return res; } + /* must take care to not shift stuff by more than 63 slots */ -#define shiftI (64 - FIGS) /* leave FIGS bits */ -#define shiftF (l_mathop(0.5) / (1LLU << (FIGS - 1))) /* 2^(-FIG) */ /* ** Convert bits from a random integer into a float in the ** interval [0,1). */ +#define maskFIG (~(~1LLU << (FIGS - 1))) /* use FIGS bits */ +#define shiftFIG (l_mathop(0.5) / (1LLU << (FIGS - 1))) /* 2^(-FIGS) */ + static lua_Number I2d (I x) { - return (lua_Number)(x >> shiftI) * shiftF; + return (lua_Number)(x & maskFIG) * shiftFIG; } -/* convert an 'I' to a lua_Unsigned (using higher bits) */ -#define I2UInt(x) ((lua_Unsigned)((x) >> (64 - LUA_UNSIGNEDBITS))) +/* convert an 'I' to a lua_Unsigned */ +#define I2UInt(x) ((lua_Unsigned)(x)) -/* convert a lua_Integer to an 'I' */ +/* convert a lua_Unsigned to an 'I' */ #define Int2I(x) ((I)(x)) + #else /* no long long }{ */ /* @@ -330,14 +341,10 @@ static I packI (lu_int32 h, lu_int32 l) { /* i ^ (i << n) */ static I Ixorshl (I i, int n) { + lua_assert(n > 0 && n < 32); return packI(i.h ^ ((i.h << n) | (i.l >> (32 - n))), i.l ^ (i.l << n)); } -/* i ^ (i >> n) */ -static I Ixorshr (I i, int n) { - return packI(i.h ^ (i.h >> n), i.l ^ ((i.l >> n) | (i.h << (32 - n)))); -} - static I Ixor (I i1, I i2) { return packI(i1.h ^ i2.h, i1.l ^ i2.l); } @@ -349,18 +356,29 @@ static I Iadd (I i1, I i2) { return result; } +/* +** Rotate left. As all offsets here are larger than 32, do a rotate right +** of 64 - offset. +*/ +static I Irotli (I i, int n) { + n = 64 - n; + lua_assert(n > 0 && n < 32); + return packI((i.h >> n) | (i.l << (32 - n)), + (i.h << (32 - n)) | (i.l >> n)); +} /* -** implementation of 'xorshift128+' algorithm on 'I' values +** implementation of 'xoroshiro128+' algorithm on 'I' values */ -static I xorshift128plus (I *state) { - I x = state[0]; - I y = state[1]; - state[0] = y; - x = Ixorshl(x, 23); /* x ^= x << 23; */ - /* state[1] = (x ^ (x >> 18)) ^ (y ^ (y >> 5)); */ - state[1] = Ixor(Ixorshr(x, 18), Ixorshr(y, 5)); - return Iadd(state[1], y); /* return state[1] + y; */ +static I nextrand (I *state) { + I s0 = state[0]; + I s1 = state[1]; + I res = Iadd(s0, s1); + res = Irotli(res, 41); + s1 = Ixor(s1, s0); + state[0] = Ixor(Irotli(s0, 55), Ixorshl(s1, 14)); + state[1] = Irotli(s1, 36); + return res; } @@ -373,45 +391,39 @@ static I xorshift128plus (I *state) { #if FIGS <= 32 -#define maskLOW 0 /* do not need bits from lower half */ -#define maskHI (~(~(lu_int32)0 >> (FIGS - 1) >> 1)) /* use FIGS bits */ -#define shiftHI 1 /* no shift */ -#define shiftF (1 / l_mathop(4294967296.0)) /* 2^(-32) */ +#define maskHI 0 /* do not need bits from higher half */ +#define maskLOW (~(~UONE << (FIGS - 1))) /* use FIGS bits */ +#define shiftFIG (l_mathop(0.5) / (UONE << (FIGS - 1))) /* 2^(-FIGS) */ #else /* 32 < FIGS <= 64 */ /* must take care to not shift stuff by more than 31 slots */ -/* use FIGS - 32 bits from lower half */ -#define maskLOW (~(~(lu_int32)0 >> (FIGS - 33) >> 1)) - -/* use all bits from higher half */ -#define maskHI (~(lu_int32)0) +/* use FIGS - 32 bits from higher half */ +#define maskHI (~(~UONE << (FIGS - 33))) -#define shiftHI l_mathop(4294967296.0) /* 2^32 */ +/* use all bits from lower half */ +#define maskLOW (~(lu_int32)0) -/* 2^(-64) */ -#define shiftF ((lua_Number)(1 / (shiftHI * shiftHI))) +/* 2^(-FIGS) == (1 / 2^33) / 2^(FIGS-33) */ +#define shiftFIG ((lua_Number)(1.0 / 8589934592.0) / (UONE << (FIGS - 33))) #endif +#define twoto32 l_mathop(4294967296.0) /* 2^32 */ + static lua_Number I2d (I x) { lua_Number h = (lua_Number)(x.h & maskHI); lua_Number l = (lua_Number)(x.l & maskLOW); - return (h * shiftHI + l) * shiftF; + return (h * twoto32 + l) * shiftFIG; } + static lua_Unsigned I2UInt (I x) { -#if (LUA_MAXINTEGER >> 30) <= 1 -/* at most 32 bits; use only high bits */ - return ((lua_Unsigned)x.h); -#else -/* at least 33 bits */ - return ((lua_Unsigned)x.h << (LUA_UNSIGNEDBITS - 32)) | - (lua_Unsigned)x.l >> (64 - LUA_UNSIGNEDBITS); -#endif + return ((lua_Unsigned)x.h << 31 << 1) | (lua_Unsigned)x.l; } + static I Int2I (lua_Unsigned n) { return packI((lu_int32)(n >> 31 >> 1), (lu_int32)n & ~(lu_int32)0); } @@ -427,47 +439,36 @@ typedef struct { } RanState; -/* -** Return the higher bit set in 'x' (first bit is 1). -*/ -static int higherbit (lua_Unsigned x) { - /* table of higher bits from 0 to 255 */ - static const unsigned char hb[256] = { - 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - }; - int l = 0; - while (x >= 256) { l += 8; x >>= 8; } - return l + hb[x]; -} - - /* ** Project the random integer 'ran' into the interval [0, n]. -** To get a uniform projection into [0,n], we first compute 'shf', the -** largest number that we can right-shift 'ran' and still get numbers -** as larger as 'n'. We then shift 'ran'; if the result is inside [0, n], -** we are done. Otherwise, we try with another 'ran' until we have a -** result inside the interval. (We use right shifts to avoid the lowest -** bits of 'ran', which has poorer distributions.) +** Because 'ran' has 2^B possible values, the projection can only be +** uniform when the size of the interval is a power of 2 (exact +** division). To get a uniform projection into [0, n], we first compute +** 'lim', the smallest Mersenne number not smaller than 'n'. We then +** project 'ran' into the interval [0, lim]. If the result is inside +** [0, n], we are done. Otherwise, we try with another 'ran' until we +** have a result inside the interval. */ static lua_Unsigned project (lua_Unsigned ran, lua_Unsigned n, RanState *state) { - if (n == 0) return 0; /* special case for the unit set */ - else { - int shf = LUA_UNSIGNEDBITS - higherbit(n); - lua_assert(~(lua_Unsigned)0 >> shf >= n && /* not larger */ - ~(lua_Unsigned)0 >> shf >> 1 < n); /* largest */ - while ((ran >>= shf) > n) - ran = I2UInt(xorshift128plus(state->s)); - return ran; + lua_Unsigned lim = n; + if ((lim & (lim + 1)) > 0) { /* 'lim + 1' is not a power of 2? */ + /* compute the smallest (2^b - 1) not smaller than 'n' */ + lim |= (lim >> 1); + lim |= (lim >> 2); + lim |= (lim >> 4); + lim |= (lim >> 8); + lim |= (lim >> 16); +#if (LUA_MAXINTEGER >> 30 >> 2) > 0 + lim |= (lim >> 32); /* integer type has more than 32 bits */ +#endif } + lua_assert((lim & (lim + 1)) == 0 /* 'lim + 1' is a power of 2 */ + && lim >= n /* not smaller than 'n' */ + && (lim == 0 || (lim >> 1) < n)); /* it is the smallest one */ + while ((ran &= lim) > n) + ran = I2UInt(nextrand(state->s)); + return ran; } @@ -475,7 +476,7 @@ static int math_random (lua_State *L) { lua_Integer low, up; lua_Unsigned p; RanState *state = (RanState *)lua_touserdata(L, lua_upvalueindex(1)); - I rv = xorshift128plus(state->s); /* next pseudo-random value */ + I rv = nextrand(state->s); /* next pseudo-random value */ switch (lua_gettop(L)) { /* check number of arguments */ case 0: { /* no arguments */ lua_pushnumber(L, I2d(rv)); /* float between 0 and 1 */ @@ -511,7 +512,7 @@ static void setseed (I *state, lua_Unsigned n) { state[0] = Int2I(n); state[1] = Int2I(0xff); /* avoid a zero state */ for (i = 0; i < 16; i++) - xorshift128plus(state); /* discard initial values to "spread" seed */ + nextrand(state); /* discard initial values to "spread" seed */ } -- cgit v1.2.3-55-g6feb