aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenoit Germain <benoit.germain@ubisoft.com>2024-06-28 17:52:29 +0200
committerBenoit Germain <benoit.germain@ubisoft.com>2024-06-28 17:52:29 +0200
commitac12af5c39b0689edb931fbe9a162db5687d392f (patch)
tree91dd99b3808a1dae237a7f343c449c999e80a4f1
parent726aee3fbb909946e69866cc6c4497c5ec365fe8 (diff)
downloadlanes-ac12af5c39b0689edb931fbe9a162db5687d392f.tar.gz
lanes-ac12af5c39b0689edb931fbe9a162db5687d392f.tar.bz2
lanes-ac12af5c39b0689edb931fbe9a162db5687d392f.zip
Make Lanes crash on purpose at shutdown if some lanes still run
Diffstat (limited to '')
-rw-r--r--CHANGES4
-rw-r--r--docs/index.html50
-rw-r--r--src/cancel.cpp2
-rw-r--r--src/cancel.h1
-rw-r--r--src/lanes.lua9
-rw-r--r--src/universe.cpp34
-rw-r--r--src/universe.h2
7 files changed, 48 insertions, 54 deletions
diff --git a/CHANGES b/CHANGES
index 59ad924..9b87eda 100644
--- a/CHANGES
+++ b/CHANGES
@@ -9,7 +9,6 @@ CHANGE 2: BGe 11-Jun-24
9 - demote_full_userdata removed. Use __lanesconvert instead. 9 - demote_full_userdata removed. Use __lanesconvert instead.
10 - keepers_gc_threshold added. Controls when GC runs inside keepers. 10 - keepers_gc_threshold added. Controls when GC runs inside keepers.
11 - nb_keepers changed to nb_user_keepers. limited to 100 keepers on top of the internal keeper used by the timer Linda. 11 - nb_keepers changed to nb_user_keepers. limited to 100 keepers on top of the internal keeper used by the timer Linda.
12 - shutdown_mode added. Controls how free running lanes are signalled at Lanes shutdown.
13 - strip_functions added. Only useful for Lua 5.3+. 12 - strip_functions added. Only useful for Lua 5.3+.
14 - verbose_errors removed. Use lane error_trace_level instead. 13 - verbose_errors removed. Use lane error_trace_level instead.
15 - with_timers is false by default. 14 - with_timers is false by default.
@@ -17,7 +16,8 @@ CHANGE 2: BGe 11-Jun-24
17 - __lanesignore removed. Use __lanesconvert instead. 16 - __lanesignore removed. Use __lanesconvert instead.
18 - __lanesconvert added. 17 - __lanesconvert added.
19 - Lanes API and behavior: 18 - Lanes API and behavior:
20 - new function lanes.finally(). Installs a function that gets called at Lanes shutdown. 19 - new function lanes.finally(). Installs a function that gets called at Lanes shutdown after attempting to terminate all lanes.
20 - If some lanes still run at shutdown, Lanes with throw an exception (or freeze, this is to be decided).
21 - lanes have a __close metamethod that calls join(). 21 - lanes have a __close metamethod that calls join().
22 - lanes can no longer be "killed" by hard-stopping their thread without any resource cleanup (see lane:cancel()). 22 - lanes can no longer be "killed" by hard-stopping their thread without any resource cleanup (see lane:cancel()).
23 - lane:join() returns nil, error in case of problem. 23 - lane:join() returns nil, error in case of problem.
diff --git a/docs/index.html b/docs/index.html
index 148b5ab..1cc007f 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -370,18 +370,6 @@
370 </tr> 370 </tr>
371 371
372 <tr valign=top> 372 <tr valign=top>
373 <td id="shutdown_mode">
374 <code>.shutdown_mode</code>
375 </td>
376 <td>
377 <tt>"hard"</tt>/<tt>"soft"</tt>/<tt>"call"</tt>/<tt>"ret"</tt>/<tt>"line"</tt>/<tt>"count"</tt>
378 </td>
379 <td>
380 Select the cancellation mode used at Lanes shutdown to request free running lane termination. See <a href="#cancelling">lane cancellation</a>. Default is <tt>"hard"</tt>.
381 </td>
382 </tr>
383
384 <tr valign=top>
385 <td id="shutdown_timeout"> 373 <td id="shutdown_timeout">
386 <code>.shutdown_timeout</code> 374 <code>.shutdown_timeout</code>
387 </td> 375 </td>
@@ -389,7 +377,9 @@
389 number >= 0 377 number >= 0
390 </td> 378 </td>
391 <td> 379 <td>
392 Sets the duration in seconds Lanes will wait for graceful termination of running lanes at application shutdown. Default is <tt>0.25</tt>. 380 Sets the duration in seconds Lanes will wait for graceful termination of running lanes at application shutdown. Default is <tt>0.25</tt>.<br />
381 Lanes signals all lanes for cancellation with <tt>"soft"</tt>, <tt>"hard"</tt>, and <tt>"all"</tt> modes, in that order. Each attempt has <tt>shutdown_timeout</tt> seconds to succeed before the next one.<br />
382 Then there is a last chance at cleanup with <a href="#finally"><tt>lanes.finally()</tt></a>. If some lanes are still running after that point, shutdown will freeze the application forever. It is YOUR responsibility to cleanup properly after yourself.
393 </td> 383 </td>
394 </tr> 384 </tr>
395 385
@@ -462,7 +452,7 @@
462 </tr> 452 </tr>
463</table> 453</table>
464 454
465<p> 455<p id="finally">
466 It is also possible to install a function that will be called when Lanes is shutdown (that is, when the first state that required Lanes is closed). 456 It is also possible to install a function that will be called when Lanes is shutdown (that is, when the first state that required Lanes is closed).
467</p> 457</p>
468 458
@@ -479,7 +469,8 @@
479<p> 469<p>
480 An error will be raised if you attempt to do this from inside a lane, or on bad arguments (non-function, or too many arguments).<br /> 470 An error will be raised if you attempt to do this from inside a lane, or on bad arguments (non-function, or too many arguments).<br />
481 Only the last registered finalizer is kept. It can be cleared by passing <tt>nil</tt> or nothing.<br /> 471 Only the last registered finalizer is kept. It can be cleared by passing <tt>nil</tt> or nothing.<br />
482 The installed function is called after all free-running lanes are terminated, but before lindas become unusable.<br /> 472 The installed function is called after all free-running lanes got a chance to terminate (see<a href="#shutdown_timeout"><tt>shutdown_timeout</tt></a>), but before lindas become unusable.<br />
473 The finalizer receives a single argument, a <tt>bool</tt> indicating whether some Lanes are still running or not at that point. It is possible to inspect them with <a href="#tracking">tracking</a>.<br />
483 If an error occurs inside this finalizer, it is silently swallowed, since it happens only during state shutdown, and you can't do anything about it. 474 If an error occurs inside this finalizer, it is silently swallowed, since it happens only during state shutdown, and you can't do anything about it.
484</p> 475</p>
485 476
@@ -1064,26 +1055,31 @@
1064 <br /> 1055 <br />
1065 <tt>cancel()</tt> sends a cancellation request to the lane. 1056 <tt>cancel()</tt> sends a cancellation request to the lane.
1066 <br /> 1057 <br />
1067 First argument is a <tt>mode</tt> can be one of <tt>"hard"</tt>, <tt>"soft"</tt>, <tt>"call"</tt>, <tt>"ret"</tt>, <tt>"line"</tt>, <tt>"count"</tt>. 1058 First argument is a <tt>mode</tt> can be one of:
1059 <ul>
1060 <li>
1061 <tt>"soft"</tt>: Cancellation will only cause <tt>cancel_test()</tt> to return <tt>true</tt>, so that the lane can cleanup manually.
1062 </li>
1063 <li>
1064 <tt>"hard"</tt>: waits for the request to be processed, or a timeout to occur. <a href="#lindas">Linda</a> operations detecting the cancellation request will raise a special cancellation error (meaning they won't return in that case).<br />
1065 <tt>wake_lane</tt> defaults to <tt>true</tt>, and <tt>timeout</tt> defaults to 0 if not specified.
1066 </li>
1067 <li>
1068 <tt>"call"</tt>, <tt>"ret"</tt>, <tt>"line"</tt>, <tt>"count"</tt>: Asynchronously install the corresponding hook, then behave as <tt>"hard"</tt>.
1069 </li>
1070 <li>
1071 <tt>"all"</tt>: Installs all hooks in one shot, just to be sure.
1072 </li>
1073 </ul>
1068 If <tt>mode</tt> is not specified, it defaults to <tt>"hard"</tt>. 1074 If <tt>mode</tt> is not specified, it defaults to <tt>"hard"</tt>.
1069 If <tt>wake_lane</tt> is <tt>true</tt>, the lane is also signalled so that execution returns from any pending <a href="#lindas">Linda</a> operation. <a href="#lindas">Linda</a> operations detecting the cancellation request return <tt>lanes.cancel_error</tt>. 1075 If <tt>wake_lane</tt> is <tt>true</tt>, the lane is also signalled so that execution returns from any pending <a href="#lindas">Linda</a> operation. <a href="#lindas">Linda</a> operations detecting the cancellation request return <tt>lanes.cancel_error</tt>.
1070</p> 1076</p>
1071<p> 1077<p>
1072 If <tt>mode</tt> is <tt>"soft"</tt>, cancellation will only cause <tt>cancel_test()</tt> to return <tt>true</tt>, so that the lane can cleanup manually.<br />
1073</p>
1074<p>
1075 If <tt>mode</tt> is <tt>"hard"</tt>, waits for the request to be processed, or a timeout to occur. <a href="#lindas">Linda</a> operations detecting the cancellation request will raise a special cancellation error (meaning they won't return in that case).<br />
1076 <tt>wake_lane</tt> defaults to <tt>true</tt>, and <tt>timeout</tt> defaults to 0 if not specified.
1077</p>
1078<p>
1079 Other values of <tt>mode</tt> will asynchronously install the corresponding hook, then behave as <tt>"hard"</tt>.
1080</p>
1081<p>
1082 Returns <tt>true, lane_h.status</tt> if lane was already done (in <tt>"done"</tt>, <tt>"error"</tt> or <tt>"cancelled"</tt> status), or the cancellation was fruitful within <tt>timeout_secs</tt> timeout period.<br /> 1078 Returns <tt>true, lane_h.status</tt> if lane was already done (in <tt>"done"</tt>, <tt>"error"</tt> or <tt>"cancelled"</tt> status), or the cancellation was fruitful within <tt>timeout_secs</tt> timeout period.<br />
1083 Returns <tt>false, "timeout"</tt> otherwise. 1079 Returns <tt>false, "timeout"</tt> otherwise.
1084</p> 1080</p>
1085<p> 1081<p>
1086 If the lane is still running after the timeout expired, there is a chance lanes will raise an error at shutdown when failing to terminate all free-running lanes within the specified timeout. 1082 If the lane is still running after the timeout expired, there is a chance lanes will freeze forever at shutdown when failing to terminate all free-running lanes within the specified timeout.
1087</p> 1083</p>
1088<p> 1084<p>
1089 Cancellation is tested <u>before</u> going to sleep in <tt>receive()</tt> or <tt>send()</tt> calls and after executing <tt>cancelstep</tt> Lua statements. A pending <tt>receive()</tt>or <tt>send()</tt> call is awakened. 1085 Cancellation is tested <u>before</u> going to sleep in <tt>receive()</tt> or <tt>send()</tt> calls and after executing <tt>cancelstep</tt> Lua statements. A pending <tt>receive()</tt>or <tt>send()</tt> call is awakened.
diff --git a/src/cancel.cpp b/src/cancel.cpp
index 755215f..15a2c83 100644
--- a/src/cancel.cpp
+++ b/src/cancel.cpp
@@ -95,6 +95,8 @@ CancelOp WhichCancelOp(std::string_view const& opString_)
95 _op = CancelOp::MaskLine; 95 _op = CancelOp::MaskLine;
96 } else if (opString_ == "count") { 96 } else if (opString_ == "count") {
97 _op = CancelOp::MaskCount; 97 _op = CancelOp::MaskCount;
98 } else if (opString_ == "all") {
99 _op = CancelOp::MaskAll;
98 } 100 }
99 return _op; 101 return _op;
100} 102}
diff --git a/src/cancel.h b/src/cancel.h
index 93fae4d..e62cf0a 100644
--- a/src/cancel.h
+++ b/src/cancel.h
@@ -29,6 +29,7 @@ enum class CancelOp
29 MaskRet = LUA_MASKRET, 29 MaskRet = LUA_MASKRET,
30 MaskLine = LUA_MASKLINE, 30 MaskLine = LUA_MASKLINE,
31 MaskCount = LUA_MASKCOUNT, 31 MaskCount = LUA_MASKCOUNT,
32 MaskAll = LUA_MASKCALL | LUA_MASKRET | LUA_MASKLINE | LUA_MASKCOUNT
32}; 33};
33 34
34// xxh64 of string "kCancelError" generated at https://www.pelock.com/products/hash-calculator 35// xxh64 of string "kCancelError" generated at https://www.pelock.com/products/hash-calculator
diff --git a/src/lanes.lua b/src/lanes.lua
index 48ebeb6..d28fcf4 100644
--- a/src/lanes.lua
+++ b/src/lanes.lua
@@ -98,7 +98,6 @@ local default_params =
98 keepers_gc_threshold = -1, 98 keepers_gc_threshold = -1,
99 nb_user_keepers = 0, 99 nb_user_keepers = 0,
100 on_state_create = nil, 100 on_state_create = nil,
101 shutdown_mode = "hard",
102 shutdown_timeout = 0.25, 101 shutdown_timeout = 0.25,
103 strip_functions = true, 102 strip_functions = true,
104 track_lanes = false, 103 track_lanes = false,
@@ -159,14 +158,6 @@ local param_checkers =
159 end 158 end
160 return true 159 return true
161 end, 160 end,
162 shutdown_mode = function(val_)
163 local valid_hooks = { soft = true, hard = true, call = true, ret = true, line = true, count = true }
164 -- shutdown_mode should be a known hook mask
165 if not valid_hooks[val_] then
166 return nil, "unknown value"
167 end
168 return true
169 end,
170 shutdown_timeout = function(val_) 161 shutdown_timeout = function(val_)
171 -- shutdown_timeout should be a number in [0,3600] 162 -- shutdown_timeout should be a number in [0,3600]
172 if type(val_) ~= "number" then 163 if type(val_) ~= "number" then
diff --git a/src/universe.cpp b/src/universe.cpp
index 1cb4fd0..5fda29a 100644
--- a/src/universe.cpp
+++ b/src/universe.cpp
@@ -147,8 +147,7 @@ void Universe::callOnStateCreate(lua_State* const L_, lua_State* const from_, Lo
147 DEBUGSPEW_CODE(DebugSpewIndentScope _scope{ _U }); 147 DEBUGSPEW_CODE(DebugSpewIndentScope _scope{ _U });
148 lua_createtable(L_, 0, 1); // L_: settings universe {mt} 148 lua_createtable(L_, 0, 1); // L_: settings universe {mt}
149 std::ignore = luaG_getfield(L_, 1, "shutdown_timeout"); // L_: settings universe {mt} shutdown_timeout 149 std::ignore = luaG_getfield(L_, 1, "shutdown_timeout"); // L_: settings universe {mt} shutdown_timeout
150 std::ignore = luaG_getfield(L_, 1, "shutdown_mode"); // L_: settings universe {mt} shutdown_timeout shutdown_mode 150 lua_pushcclosure(L_, LG_universe_gc, 1); // L_: settings universe {mt} LG_universe_gc
151 lua_pushcclosure(L_, LG_universe_gc, 2); // L_: settings universe {mt} LG_universe_gc
152 lua_setfield(L_, -2, "__gc"); // L_: settings universe {mt} 151 lua_setfield(L_, -2, "__gc"); // L_: settings universe {mt}
153 lua_setmetatable(L_, -2); // L_: settings universe 152 lua_setmetatable(L_, -2); // L_: settings universe
154 lua_pop(L_, 1); // L_: settings 153 lua_pop(L_, 1); // L_: settings
@@ -352,7 +351,7 @@ lanes::AllocatorDefinition Universe::resolveAllocator(lua_State* const L_, std::
352 351
353// ################################################################################################# 352// #################################################################################################
354 353
355void Universe::terminateFreeRunningLanes(lua_State* const L_, lua_Duration const shutdownTimeout_, CancelOp const op_) 354bool Universe::terminateFreeRunningLanes(lua_Duration const shutdownTimeout_, CancelOp const op_)
356{ 355{
357 if (selfdestructFirst != SELFDESTRUCT_END) { 356 if (selfdestructFirst != SELFDESTRUCT_END) {
358 // Signal _all_ still running threads to exit (including the timer thread) 357 // Signal _all_ still running threads to exit (including the timer thread)
@@ -404,15 +403,8 @@ void Universe::terminateFreeRunningLanes(lua_State* const L_, lua_Duration const
404 } 403 }
405 } 404 }
406 405
407 // If after all this, we still have some free-running lanes, it's an external user error, they should have stopped appropriately 406 // are all lanes successfully terminated?
408 { 407 return selfdestructFirst == SELFDESTRUCT_END;
409 std::lock_guard<std::mutex> _guard{ selfdestructMutex };
410 Lane* _lane{ selfdestructFirst };
411 if (_lane != SELFDESTRUCT_END) {
412 // this causes a leak because we don't call U's destructor (which could be bad if the still running lanes are accessing it)
413 raise_luaL_error(L_, "Zombie thread '%s' refuses to die!", _lane->debugName.data());
414 }
415 }
416} 408}
417 409
418// ################################################################################################# 410// #################################################################################################
@@ -421,15 +413,21 @@ void Universe::terminateFreeRunningLanes(lua_State* const L_, lua_Duration const
421LUAG_FUNC(universe_gc) 413LUAG_FUNC(universe_gc)
422{ 414{
423 lua_Duration const _shutdown_timeout{ lua_tonumber(L_, lua_upvalueindex(1)) }; 415 lua_Duration const _shutdown_timeout{ lua_tonumber(L_, lua_upvalueindex(1)) };
424 std::string_view const _op_string{ luaG_tostring(L_, lua_upvalueindex(2)) };
425 STACK_CHECK_START_ABS(L_, 1); 416 STACK_CHECK_START_ABS(L_, 1);
426 Universe* const _U{ luaG_tofulluserdata<Universe>(L_, 1) }; // L_: U 417 Universe* const _U{ luaG_tofulluserdata<Universe>(L_, 1) }; // L_: U
427 _U->terminateFreeRunningLanes(L_, _shutdown_timeout, WhichCancelOp(_op_string)); 418
419 // attempt to terminate all lanes with increasingly stronger cancel methods
420 bool const _allLanesTerminated{
421 _U->terminateFreeRunningLanes(_shutdown_timeout, CancelOp::Soft)
422 || _U->terminateFreeRunningLanes(_shutdown_timeout, CancelOp::Hard)
423 || _U->terminateFreeRunningLanes(_shutdown_timeout, CancelOp::MaskAll)
424 };
428 425
429 // invoke the function installed by lanes.finally() 426 // invoke the function installed by lanes.finally()
430 kFinalizerRegKey.pushValue(L_); // L_: U finalizer|nil 427 kFinalizerRegKey.pushValue(L_); // L_: U finalizer|nil
431 if (!lua_isnil(L_, -1)) { 428 if (!lua_isnil(L_, -1)) {
432 lua_pcall(L_, 0, 0, 0); // L_: U 429 lua_pushboolean(L_, _allLanesTerminated); // L_: U finalizer bool
430 lua_pcall(L_, 1, 0, 0); // L_: U
433 // discard any error that might have occured 431 // discard any error that might have occured
434 lua_settop(L_, 1); 432 lua_settop(L_, 1);
435 } else { 433 } else {
@@ -438,6 +436,12 @@ LUAG_FUNC(universe_gc)
438 // in case of error, the message is pushed on the stack 436 // in case of error, the message is pushed on the stack
439 STACK_CHECK(L_, 1); 437 STACK_CHECK(L_, 1);
440 438
439 // if some lanes are still running here, we have no other choice than crashing and let the client figure out what's wrong
440 while (_U->selfdestructFirst != SELFDESTRUCT_END) {
441 throw std::logic_error{ "Some lanes are still running at shutdown" };
442 //std::this_thread::yield();
443 }
444
441 // no need to mutex-protect this as all threads in the universe are gone at that point 445 // no need to mutex-protect this as all threads in the universe are gone at that point
442 if (_U->timerLinda != nullptr) { // test in case some early internal error prevented Lanes from creating the deep timer 446 if (_U->timerLinda != nullptr) { // test in case some early internal error prevented Lanes from creating the deep timer
443 [[maybe_unused]] int const _prev_ref_count{ _U->timerLinda->refcount.fetch_sub(1, std::memory_order_relaxed) }; 447 [[maybe_unused]] int const _prev_ref_count{ _U->timerLinda->refcount.fetch_sub(1, std::memory_order_relaxed) };
diff --git a/src/universe.h b/src/universe.h
index 6374648..dc8940f 100644
--- a/src/universe.h
+++ b/src/universe.h
@@ -139,7 +139,7 @@ class Universe
139 void initializeOnStateCreate(lua_State* const L_); 139 void initializeOnStateCreate(lua_State* const L_);
140 lanes::AllocatorDefinition resolveAllocator(lua_State* const L_, std::string_view const& hint_) const; 140 lanes::AllocatorDefinition resolveAllocator(lua_State* const L_, std::string_view const& hint_) const;
141 static inline void Store(lua_State* L_, Universe* U_); 141 static inline void Store(lua_State* L_, Universe* U_);
142 void terminateFreeRunningLanes(lua_State* L_, lua_Duration shutdownTimeout_, CancelOp op_); 142 [[nodiscard]] bool terminateFreeRunningLanes(lua_Duration shutdownTimeout_, CancelOp op_);
143}; 143};
144 144
145// ################################################################################################# 145// #################################################################################################