tests/perftest.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

--
-- PERFTEST.LUA         Copyright (c) 2007-08, Asko Kauppi <akauppi@gmail.com>
--
-- Performance comparison of multithreaded Lua (= how much ballast does using
-- Lua Lanes introduce)
--
-- Usage:
--      [time] lua -lstrict perftest.lua [threads] [-plain|-single[=2..n]] [-time] [-prio[=-2..+2[,-2..+2]]]
--
--      threads: number of threads to launch (loops in 'plain' mode)
--      -plain: runs in nonthreaded mode, to get a comparison baseline
--      -single: runs using just a single CPU core (or 'n' cores if given)
--      -prio: sets odd numbered threads to higher/lower priority
--
-- History:
--      AKa 20-Jul-08: updated to Lanes 2008
--      AK 14-Apr-07: works on Win32
--
-- To do:
--      (none?)
--

-- On MSYS, stderr is buffered. In this test it matters.
-- Seems, even with this MSYS wants to buffer linewise, needing '\n'
-- before actual output.
--
local MSYS= os.getenv("OSTYPE")=="msys"


local lanes = require "lanes".configure{ with_timers = false}

local m= require "argtable"
local argtable= assert( m.argtable )

local N= 1000   -- threads/loops to use
local M= 1000   -- sieves from 1..M
local PLAIN= false      -- single threaded (true) or using Lanes (false)
local SINGLE= 0     -- cores to use (0 / 1..n) 
local TIME= false       -- use Lua for the timing
local PRIO_ODD, PRIO_EVEN   -- -3..+3

local function HELP()
    io.stderr:write( "Usage: lua perftest.lua [threads]\n" )
end

-- nil -> +2
-- odd_prio[,even_prio]
--
local function prio_param(v)
    if v==true then return 2,-2 end

    local a,b= string.match( v, "^([%+%-]?%d+)%,([%+%-]?%d+)$" )
    if a then
        return tonumber(a), tonumber(b)
    elseif tonumber(v) then
        return tonumber(v)
    else
        error( "Bad priority: "..v )
    end
end

for k,v in pairs( argtable(...) ) do
    if k==1 then            N= tonumber(v) or HELP()
    elseif k=="plain" then  PLAIN= true
    elseif k=="single" then  SINGLE= v  -- number
    elseif k=="time" then   TIME= true
    elseif k=="prio" then   PRIO_ODD, PRIO_EVEN= prio_param(v)
    else                    HELP()
    end
end

PRIO_ODD= PRIO_ODD or 0
PRIO_EVEN= PRIO_EVEN or 0


-- SAMPLE ADOPTED FROM Lua 5.1.1 test/sieve.lua --

-- the sieve of of Eratosthenes programmed with coroutines
-- typical usage: lua -e N=1000 sieve.lua | column

-- AK: Wrapped within a surrounding function, so we can pass it to Lanes
--     Note that coroutines can perfectly fine be used within each Lane. :)
--
-- AKa 20-Jul-2008: Now the wrapping to one function is no longer needed;
--     Lanes 2008 can take the used functions as upvalues.

-- It looks like this implementation uses a lot of C stack, possibly resulting in stack overflow with Lua54
-- this is reproducible with the original sieve.lua implementation found at https://www.lua.org/extras/
-- for example: 
-- Lua54 exe built with 1Mb of C stack crashes for M above 230, C stack at 500 calls
-- Lua53 exe built with 1Mb of C stack crashes for M above 491, C stack at 740 calls
-- Lua52 exe built with 1Mb of C stack crashes for M above 672, C stack at 1000 calls
-- Lua51 exe built with 1Mb of C stack crashes for M above 718, C stack at 900 calls
local function sieve_lane(N,id)

    if MSYS then
        io.stderr:setvbuf "no"
    end

    -- generate all the numbers from 2 to n
    local function gen (n)
        return coroutine.wrap(function ()
            for i=2,n do coroutine.yield(i) end
        end)
    end

    -- filter the numbers generated by `g', removing multiples of `p'
    local function filter (p, g)
        return coroutine.wrap(function ()
            while 1 do
                local n = g()
                if n == nil then return end
                if math.fmod(n, p) ~= 0 then coroutine.yield(n) end
            end
        end)
    end

    local ret= {}      -- returned values: { 2, 3, 5, 7, 11, ... }
    N=N or 1000	    -- from caller
    local x = gen(N)   -- generate primes up to N
    while 1 do
        local n = x()		-- pick a number until done
        if n == nil then break end
        --print(n)		-- must be a prime number
        table.insert( ret, n )

        x = filter(n, x)	-- now remove its multiples
    end
 
    io.stderr:write(id..(MSYS and "\n" or "\t"))   -- mark we're ready

    return ret
end
-- ** END OF LANE ** --


-- Keep preparation code outside of the performance test
--
local f_even= lanes.gen( "base,coroutine,math,table,io",  -- "*" = all
                            { name = 'auto', priority= PRIO_EVEN }, sieve_lane )
                             
local f_odd= lanes.gen( "base,coroutine,math,table,io",  -- "*" = all
                            { name = 'auto', priority= PRIO_ODD }, sieve_lane )

io.stderr:write( "*** Counting primes 1.."..M.." "..N.." times ***\n\n" )

local t0= TIME and lanes.now_secs()

if PLAIN then
    io.stderr:write( "Plain (no multithreading):\n" )

    for i=1,N do
        local tmp= sieve_lane(M,i)
        assert( type(tmp)=="table" and tmp[1]==2 and tmp[168]==997 )
    end
else
    if SINGLE > 0 then
        io.stderr:write( (tonumber(SINGLE) and SINGLE or 1) .. " core(s):\n" )
        lanes.set_singlethreaded(SINGLE)    -- limit to N cores (just OS X)
    else
        io.stderr:write( "Multi core:\n" )
    end

    if PRIO_ODD ~= PRIO_EVEN then
        io.stderr:write( ( PRIO_ODD > PRIO_EVEN and "ODD" or "EVEN" )..
                        " LANES should come first (odd:"..PRIO_ODD..", even:"..PRIO_EVEN..")\n\n" )
    else
        io.stderr:write( "EVEN AND ODD lanes should be mingled (both: "..PRIO_ODD..")\n\n" )
    end
    local t= {}
    for i=1,N do
        t[i]= ((i%2==0) and f_even or f_odd) (M,i)
    end

    -- Make sure all lanes finished
    --
    for i=1,N do
        local tmp= t[i]:join()
        -- this assert will trigger if you change M to values below 1000 in order to solve C stack overflow
        assert( type(tmp)=="table" and tmp[1]==2 and tmp[168]==997 )
    end
end

io.stderr:write "\n"

if TIME then
    local t= lanes.now_secs() - t0
    io.stderr:write( "*** TIMING: "..t.." seconds ***\n" )
end

io.stderr:write "done\n"

--
-- end