diff --git a/polymer/eduke32/Makefile b/polymer/eduke32/Makefile index 1448f7f2a..cccbb40fd 100644 --- a/polymer/eduke32/Makefile +++ b/polymer/eduke32/Makefile @@ -166,7 +166,8 @@ ifneq (0,$(LUNATIC)) GAMEOBJS+= $(OBJ)/../lpeg.$o # TEMP GAMEOBJS+= $(OBJ)/luaJIT_BC_con_lang.$o \ $(OBJ)/luaJIT_BC_lunacon.$o \ - $(OBJ)/luaJIT_BC_geom.$o + $(OBJ)/luaJIT_BC_geom.$o \ + $(OBJ)/luaJIT_BC_randgen.$o # now, take care of having the necessary symbols (sector, wall, etc.) in the # executable no matter what the debugging level diff --git a/polymer/eduke32/source/lunatic/defs.ilua b/polymer/eduke32/source/lunatic/defs.ilua index 515565b43..0c8937554 100644 --- a/polymer/eduke32/source/lunatic/defs.ilua +++ b/polymer/eduke32/source/lunatic/defs.ilua @@ -482,6 +482,19 @@ void kclose(int32_t handle); int32_t kread(int32_t handle, void *buffer, int32_t leng); ]] +-- JKISS PRNG state struct and functions, see randgen module +ffi.cdef[[ +typedef struct { + uint32_t x, y, z, c; +} rng_jkiss_t; +]] +decl[[ +uint32_t rand_jkiss_u32(rng_jkiss_t *s); +double rand_jkiss_dbl(rng_jkiss_t *s); + +void md4once(const unsigned char *block, unsigned int len, unsigned char digest[16]); +]] + ffi.cdef[[ int32_t ksqrt(uint32_t num); ]] @@ -523,6 +536,8 @@ local string_dump = string.dump string.dump = nil +gv_tmp = gv_ -- required by randgen + local allowed_modules = { coroutine=coroutine, bit=bit, table=table, math=math, string=string, @@ -530,6 +545,7 @@ local allowed_modules = { clock = function() return gv_.gethitickms()/1000 end, }, + randgen = require("randgen"), geom = require("geom"), } @@ -620,7 +636,7 @@ local function our_require(modname) local modfunc, errmsg = loadstring(str) if (modfunc == nil) then - errorf(ERRLEV-1, "Couldn't load \"%s\": %s", fn, errmsg) + errorf(ERRLEV-1, "Couldn't load \"%s\": %s", modname, errmsg) end local modtab = modfunc(modname) diff --git a/polymer/eduke32/source/lunatic/dynsymlist b/polymer/eduke32/source/lunatic/dynsymlist index 27d4ce937..850ea2212 100644 --- a/polymer/eduke32/source/lunatic/dynsymlist +++ b/polymer/eduke32/source/lunatic/dynsymlist @@ -41,6 +41,11 @@ g_player; luaJIT_BC_lunacon; luaJIT_BC_con_lang; luaJIT_BC_geom; +luaJIT_BC_randgen; + +rand_jkiss_u32; +rand_jkiss_dbl; +md4once; gethitickms; }; diff --git a/polymer/eduke32/source/lunatic/lunatic.c b/polymer/eduke32/source/lunatic/lunatic.c index e6fa6e29f..9c1f18bc9 100644 --- a/polymer/eduke32/source/lunatic/lunatic.c +++ b/polymer/eduke32/source/lunatic/lunatic.c @@ -35,6 +35,35 @@ static int32_t SetActor_luacf(lua_State *L); // in lpeg.o extern int luaopen_lpeg(lua_State *L); + +typedef struct { + uint32_t x, y, z, c; +} rng_jkiss_t; + +// See: Good Practice in (Pseudo) Random Number Generation for +// Bioinformatics Applications, by David Jones +ATTRIBUTE((optimize("O2"))) +uint32_t rand_jkiss_u32(rng_jkiss_t *s) +{ + uint64_t t; + s->x = 314527869 * s->x + 1234567; + s->y ^= s->y << 5; s->y ^= s->y >> 7; s->y ^= s->y << 22; + t = 4294584393ULL * s->z + s->c; s->c = t >> 32; s->z = t; + return s->x + s->y + s->z; +} + +ATTRIBUTE((optimize("O2"))) +double rand_jkiss_dbl(rng_jkiss_t *s) +{ + double x; + unsigned int a, b; + a = rand_jkiss_u32(s) >> 6; /* Upper 26 bits */ + b = rand_jkiss_u32(s) >> 5; /* Upper 27 bits */ + x = (a * 134217728.0 + b) / 9007199254740992.0; + return x; +} + + void El_PrintTimes(void) { int32_t i; diff --git a/polymer/eduke32/source/lunatic/randgen.lua b/polymer/eduke32/source/lunatic/randgen.lua new file mode 100644 index 000000000..b6b6de10f --- /dev/null +++ b/polymer/eduke32/source/lunatic/randgen.lua @@ -0,0 +1,78 @@ +-- Pseudo random number generation module for Lunatic + +local ffi = require("ffi") +local ffiC = ffi.C + +local rawset = rawset + +local type = type +local gv = gv_tmp -- temporarily set in defs.c + +local print = print -- for commented out debug block in new() below + +module(...) + + +-- NOTE: PRNG state struct and functions are declared in defs.ilua + +ffi.cdef[[ +typedef union { unsigned char u[16]; double d[2]; } uchar_double_u_t; +typedef union { unsigned char u[16]; uint32_t i[4]; } uchar_uint_u_t; +]] + +local mt = { + __tostring = function(s) + return "rand.new("..s.x..","..s.y..","..s.z..","..s.c..")" + end, + + __index = { + getu32 = ffiC.rand_jkiss_u32, + getdbl = ffiC.rand_jkiss_dbl, + + -- Initialize the JKISS PRNG using the MD4 of the lower bits of the + -- profiling timer + init_time_md4 = function(s) + local tin = ffi.new("uchar_double_u_t") + local tout = ffi.new("uchar_uint_u_t") + + repeat + tin.d[0] = gv.gethitickms() % 1 + tin.d[1] = gv.gethitickms() % 1 + + ffiC.md4once(tin.u, 16, tout.u) + + s.y = tout.u[1] + until (s.y ~= 0) -- y must not be zero! + + s.x = tout.u[0] + s.z = tout.u[2] + s.c = tout.u[3] % 698769068 + 1 -- Should be less than 698769069 + end, + }, +} +local jkiss = ffi.metatype("rng_jkiss_t", mt) + +function new(x,y,z,c) + local s + if (x == nil or type(x)=="boolean") then + s = jkiss(0,0,0,0) -- invalid state, must be initialized first + if (x) then + s:init_time_md4() + end + else + s = jkiss(x,y,z,c) + end +--[[ + print("TEST") + local r=ffi.new("rng_jkiss_t") + r.x = 123456789; r.y = 987654321; r.z = 43219876; r.c = 6543217; + + t=gv.gethitickms() + for i=1,4*2*1e6 do + ffiC.rand_jkiss_dbl(r) + end + print("TIME: "..gv.gethitickms()-t) -- x86_64: approx. 100 ms +--]] + + return s +end diff --git a/polymer/eduke32/source/lunatic/test/test_geom.lua b/polymer/eduke32/source/lunatic/test/test_geom.lua index 9d577ab2b..f74b041a5 100755 --- a/polymer/eduke32/source/lunatic/test/test_geom.lua +++ b/polymer/eduke32/source/lunatic/test/test_geom.lua @@ -1,22 +1,49 @@ #!/usr/bin/env luajit -local math = require("math") local os = require("os") local geom = require("geom") -local N = os.exit and tostring(arg[1]) or 1e6 +local N = os.exit and (arg[1] and tostring(arg[1])) or 1e6 local A,B = {}, {} local V,W = {}, {} -local function randvec() - return geom.vec2(math.random(), math.random()) +local randvec + +if (os.exit) then + local math = require("math") + + randvec = function() + return geom.vec2(math.random(), math.random()) + end +else + local randgen = require("randgen") + local s = randgen.new(true) + + -- NOTE: factoring out the inner s:getdbl() into a separate function + -- reduces performance seriously (about an order of magnitude!) + randvec = function() + return geom.vec2(s:getdbl(), s:getdbl()) + end end local t1 = os.clock() +if (os.exit == nil) then + local randgen = require("randgen") + local r = randgen.new(true) + + for i=1,4*2*N do + -- This is to test the performance compared to a direct + -- ffiC.rand_jkiss_dbl() call in randgen.lua + r:getdbl() + end +end + +local t2 = os.clock() + -- init random points and vectors for i=1,N do A[i] = randvec() @@ -25,17 +52,20 @@ for i=1,N do W[i] = randvec() end -local t2 = os.clock() +local t3 = os.clock() local v = geom.vec2(0, 0) for i=1,N do v = v + geom.intersect(A[i],V[i], B[i],W[i], true) end -local t3 = os.clock() +local t4 = os.clock() +-- x86_64 (embedded): approx. 200 ms (vs. the 100 ms of direct +-- ffiC.rand_jkiss_dbl()): print(1000*(t2-t1)) -print(1000*(t3-t2)) +print(1000*(t3-t2)) -- x86_64: approx. 500 ms +print(1000*(t4-t3)) -- x86_64: approx. 35 ms print(v) return {} -- appease Lunatic's require