diff --git a/polymer/eduke32/source/lunatic/bitar.lua b/polymer/eduke32/source/lunatic/bitar.lua new file mode 100644 index 000000000..f5eacfa9e --- /dev/null +++ b/polymer/eduke32/source/lunatic/bitar.lua @@ -0,0 +1,47 @@ + +-- "Bit array" module based on LuaJIT's BitOp. + +local bit = require "bit" + +local error = error +local type = type + +module(...) + +-- Create new bit array. +-- Returns a table p in which entries p[0] through p[floor((size+31)/32)] +-- are set to an initialization value: 0 if 0 has been passed, -1 if 1 +-- has been passed. +function new(size, initval) + if (type(size) ~= "number" or size < 0) then + error("bad argument #1 to newarray (must be a nonnegative number)", 2) + end + + if (initval ~= 0 and initval ~= 1) then + error("bad argument #2 to newarray (must be either 0 or 1)", 2) + end + + local p = {} + for i=0,(size+31)/32 do + p[i] = -initval + end + + return p +end + +-- Is bit i set in bit array ar? +function isset(ar, i) + return bit.band(ar[bit.rshift(i, 5)], bit.lshift(1, i)) ~= 0 +end + +-- Set bit j in bit array ar. +function set0(ar, j) + local jx = bit.rshift(j, 5) + ar[jx] = bit.band(ar[jx], bit.rol(0xfffffffe, j)) +end + +-- Clear bit j in bit array ar. +function set1(ar, j) + local jx = bit.rshift(j, 5) + ar[jx] = bit.bor(ar[jx], bit.rol(0x00000001, j)) +end diff --git a/polymer/eduke32/source/lunatic/bittest.lua b/polymer/eduke32/source/lunatic/bittest.lua index 64de2317d..de22b32d5 100755 --- a/polymer/eduke32/source/lunatic/bittest.lua +++ b/polymer/eduke32/source/lunatic/bittest.lua @@ -1,11 +1,16 @@ #!/usr/bin/env luajit +-- Usage: luajit bittest.lua [-ffi] [-bchk] + local string = require "string" local getticks local bit = require("bit") +local bitar = require "bitar" + local print = print +local tonumber = tonumber if (string.dump) then -- stand-alone @@ -20,18 +25,29 @@ else module(...) end --- from http://bitop.luajit.org/api.html +-- based on example from http://bitop.luajit.org/api.html -local band, bxor = bit.band, bit.bxor -local rshift, rol = bit.rshift, bit.rol +local isset, set0 = bitar.isset, bitar.set0 -local m = string.dump and arg[1] or 1e7 +local m = string.dump and tonumber(arg[1]) or 1e7 + +local ffiar_p, boundchk_p = false, false + +if (string.dump) then + if (arg[2]=="-ffi" or arg[3]=="-ffi") then + ffiar_p = true + end + + if (arg[2]=="-bchk" or arg[3]=="-bchk") then + boundchk_p = true + end +end function sieve() local count = 0 local p = {} - if (string.dump) then + if (ffiar_p) then -- stand-alone using unchecked int32_t array instead of table: -- on x86_64 approx. 100 vs. 160 ms for m = 1e7 -- (enabling bound checking makes it be around 170 ms) @@ -39,61 +55,42 @@ function sieve() local pp = ffi.new("int32_t [?]", (m+31)/32 + 1) p = pp ---[[ - local mt = { - __index = function(tab,idx) - if (idx >= 0 and idx <= (m+31)/32) then - return pp[idx] - end - end, - __newindex = function(tab,idx,val) - if (idx >= 0 and idx <= (m+31)/32) then - pp[idx] = val - end - end, - } + if (boundchk_p) then + local mt = { + __index = function(tab,idx) + if (idx >= 0 and idx <= (m+31)/32) then + return pp[idx] + end + end, - p = setmetatable({}, mt) ---]] + __newindex = function(tab,idx,val) + if (idx >= 0 and idx <= (m+31)/32) then + pp[idx] = val + end + end, + } + + p = setmetatable({}, mt) + end + + for i=0,(m+31)/32 do p[i] = -1; end + else + p = bitar.new(m, 1) end - for i=0,(m+31)/32 do p[i] = -1 end - local t = getticks() - -- See http://luajit.org/ext_ffi_tutorial.html, - -- "To Cache or Not to Cache" - -- bit. - qualified version: with m=1e7 ond x86_64: - -- 165 ms embedded, 100 ms stand-alone ----[[ for i=2,m do - if bit.band(bit.rshift(p[bit.rshift(i, 5)], i), 1) ~= 0 then + if (isset(p, i)) then count = count + 1 - for j=i+i,m,i do - local jx = bit.rshift(j, 5) - p[jx] = bit.band(p[jx], bit.rol(-2, j)) - end + for j=i+i,m,i do set0(p, j); end end end ---]] - -- local var version: with m=1e7 on x86_64: - -- 205 ms embedded, 90 ms stand-alone ---[[ - for i=2,m do - if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then - count = count + 1 - for j=i+i,m,i do - local jx = rshift(j, 5) - p[jx] = band(p[jx], rol(-2, j)) - end - end - end ---]] - - print(string.format("Found %d primes up to %d (%.02f ms)", count, m, - getticks()-t)) + print(string.format("[%s] Found %d primes up to %d (%.02f ms)", + ffiar_p and "ffi-ar"..(boundchk_p and ", bchk" or "") or "tab-ar", + count, m, getticks()-t)) end if (string.dump) then