From 051a587071e1bd25f72c77e84443be4a3426c83d Mon Sep 17 00:00:00 2001
From: Benoit Germain <benoit.germain@ubisoft.com>
Date: Mon, 25 Mar 2024 17:55:31 +0100
Subject: C++ migration: Universe MUTEX_T replaced with std::mutex and
 std::atomic

---
 deep_test/deep_test.cpp          |  3 +-
 deep_test/deep_test.vcxproj.user |  4 +-
 src/deep.cpp                     | 79 ++++++++++++++++++----------------------
 src/deep.h                       | 10 +++--
 src/lanes.cpp                    | 70 +++++++++++++++--------------------
 src/linda.cpp                    |  8 ++--
 src/state.cpp                    |  6 +--
 src/tools.cpp                    | 18 +++------
 src/universe.cpp                 | 15 ++++----
 src/universe.h                   | 47 +++++++++++-------------
 10 files changed, 115 insertions(+), 145 deletions(-)

diff --git a/deep_test/deep_test.cpp b/deep_test/deep_test.cpp
index bbae48e..d7ecd33 100644
--- a/deep_test/deep_test.cpp
+++ b/deep_test/deep_test.cpp
@@ -8,9 +8,8 @@
 // ################################################################################################
 
 // a lanes-deep userdata. needs DeepPrelude and luaG_newdeepuserdata from Lanes code.
-struct MyDeepUserdata
+struct MyDeepUserdata : public DeepPrelude // Deep userdata MUST start with a DeepPrelude
 {
-	DeepPrelude prelude; // Deep userdata MUST start with this header
 	lua_Integer val{ 0 };
 };
 
diff --git a/deep_test/deep_test.vcxproj.user b/deep_test/deep_test.vcxproj.user
index abaec72..24e3d31 100644
--- a/deep_test/deep_test.vcxproj.user
+++ b/deep_test/deep_test.vcxproj.user
@@ -33,9 +33,9 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug 5.4|x64'">
     <LocalDebuggerCommand>$(SolutionDir)..\framework\lua54.exe</LocalDebuggerCommand>
     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
-    <LocalDebuggerCommandArguments>-i deeptest.lua</LocalDebuggerCommandArguments>
+    <LocalDebuggerCommandArguments>deeptest.lua</LocalDebuggerCommandArguments>
     <LocalDebuggerWorkingDirectory>$(SolutionDir)Lanes\lanes\deep_test\</LocalDebuggerWorkingDirectory>
-    <RemoteDebuggerCommandArguments>-i deeptest.lua</RemoteDebuggerCommandArguments>
+    <RemoteDebuggerCommandArguments>deeptest.lua</RemoteDebuggerCommandArguments>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug MoonJIT|x64'">
     <LocalDebuggerCommand>$(SolutionDir)..\MoonJIT\bin\$(Platform)\moonjit.exe</LocalDebuggerCommand>
diff --git a/src/deep.cpp b/src/deep.cpp
index cd5a844..dd682e4 100644
--- a/src/deep.cpp
+++ b/src/deep.cpp
@@ -143,10 +143,13 @@ static inline luaG_IdFunction* get_idfunc( lua_State* L, int index, LookupMode m
 
 void free_deep_prelude( lua_State* L, DeepPrelude* prelude_)
 {
+    ASSERT_L(prelude_->idfunc);
+    STACK_CHECK_START_REL(L, 0);
     // Call 'idfunc( "delete", deep_ptr )' to make deep cleanup
     lua_pushlightuserdata( L, prelude_);
-    ASSERT_L( prelude_->idfunc);
     prelude_->idfunc( L, eDO_delete);
+    lua_pop(L, 1);
+    STACK_CHECK(L, 0);
 }
 
 
@@ -160,14 +163,10 @@ static int deep_userdata_gc( lua_State* L)
 {
     DeepPrelude** proxy = (DeepPrelude**) lua_touserdata( L, 1);
     DeepPrelude* p = *proxy;
-    Universe* U = universe_get( L);
-    int v;
 
     // can work without a universe if creating a deep userdata from some external C module when Lanes isn't loaded
     // in that case, we are not multithreaded and locking isn't necessary anyway
-    if( U) MUTEX_LOCK( &U->deep_lock);
-    v = -- (p->refcount);
-    if (U) MUTEX_UNLOCK( &U->deep_lock);
+    int v{ p->m_refcount.fetch_sub(1, std::memory_order_relaxed) };
 
     if( v == 0)
     {
@@ -202,7 +201,7 @@ static int deep_userdata_gc( lua_State* L)
  * used in this Lua state (metatable, registring it). Otherwise, increments the
  * reference count.
  */
-char const* push_deep_proxy( Universe* U, lua_State* L, DeepPrelude* prelude, int nuv_, LookupMode mode_)
+char const* push_deep_proxy(lua_State* L, DeepPrelude* prelude, int nuv_, LookupMode mode_)
 {
     DeepPrelude** proxy;
 
@@ -220,12 +219,6 @@ char const* push_deep_proxy( Universe* U, lua_State* L, DeepPrelude* prelude, in
         lua_pop( L, 1);                                                                                // DPC
     }
 
-    // can work without a universe if creating a deep userdata from some external C module when Lanes isn't loaded
-    // in that case, we are not multithreaded and locking isn't necessary anyway
-    if( U) MUTEX_LOCK( &U->deep_lock);
-    ++ (prelude->refcount);  // one more proxy pointing to this deep data
-    if( U) MUTEX_UNLOCK( &U->deep_lock);
-
     STACK_GROW( L, 7);
     STACK_CHECK_START_REL(L, 0);
 
@@ -233,6 +226,7 @@ char const* push_deep_proxy( Universe* U, lua_State* L, DeepPrelude* prelude, in
     proxy = (DeepPrelude**) lua_newuserdatauv( L, sizeof(DeepPrelude*), nuv_);                         // DPC proxy
     ASSERT_L( proxy);
     *proxy = prelude;
+    prelude->m_refcount.fetch_add(1, std::memory_order_relaxed); // one more proxy pointing to this deep data
 
     // Get/create metatable for 'idfunc' (in this state)
     lua_pushlightuserdata( L, (void*)(ptrdiff_t)(prelude->idfunc));                                    // DPC proxy idfunc
@@ -378,39 +372,38 @@ char const* push_deep_proxy( Universe* U, lua_State* L, DeepPrelude* prelude, in
 */
 int luaG_newdeepuserdata( lua_State* L, luaG_IdFunction* idfunc, int nuv_)
 {
-    char const* errmsg;
-
     STACK_GROW( L, 1);
     STACK_CHECK_START_REL(L, 0);
+    int const oldtop{ lua_gettop(L) };
+    DeepPrelude* const prelude{ static_cast<DeepPrelude*>(idfunc(L, eDO_new)) };
+    if (prelude == nullptr)
     {
-        int const oldtop = lua_gettop( L);
-        DeepPrelude* prelude = (DeepPrelude*) idfunc( L, eDO_new);
-        if (prelude == nullptr)
-        {
-            return luaL_error( L, "idfunc(eDO_new) failed to create deep userdata (out of memory)");
-        }
-        if( prelude->magic != DEEP_VERSION)
-        {
-            // just in case, don't leak the newly allocated deep userdata object
-            lua_pushlightuserdata( L, prelude);
-            idfunc( L, eDO_delete);
-            return luaL_error( L, "Bad idfunc(eDO_new): DEEP_VERSION is incorrect, rebuild your implementation with the latest deep implementation");
-        }
-        prelude->refcount = 0; // 'push_deep_proxy' will lift it to 1
-        prelude->idfunc = idfunc;
+        return luaL_error( L, "idfunc(eDO_new) failed to create deep userdata (out of memory)");
+    }
 
-        if( lua_gettop( L) - oldtop != 0)
-        {
-            // just in case, don't leak the newly allocated deep userdata object
-            lua_pushlightuserdata( L, prelude);
-            idfunc( L, eDO_delete);
-            return luaL_error( L, "Bad idfunc(eDO_new): should not push anything on the stack");
-        }
-        errmsg = push_deep_proxy( universe_get( L), L, prelude, nuv_, eLM_LaneBody);  // proxy
-        if (errmsg != nullptr)
-        {
-            return luaL_error( L, errmsg);
-        }
+    if( prelude->magic != DEEP_VERSION)
+    {
+        // just in case, don't leak the newly allocated deep userdata object
+        lua_pushlightuserdata( L, prelude);
+        idfunc( L, eDO_delete);
+        return luaL_error( L, "Bad idfunc(eDO_new): DEEP_VERSION is incorrect, rebuild your implementation with the latest deep implementation");
+    }
+
+    ASSERT_L(prelude->m_refcount.load(std::memory_order_relaxed) == 0); // 'push_deep_proxy' will lift it to 1
+    prelude->idfunc = idfunc;
+
+    if( lua_gettop( L) - oldtop != 0)
+    {
+        // just in case, don't leak the newly allocated deep userdata object
+        lua_pushlightuserdata( L, prelude);
+        idfunc( L, eDO_delete);
+        return luaL_error( L, "Bad idfunc(eDO_new): should not push anything on the stack");
+    }
+
+    char const* const errmsg{ push_deep_proxy(L, prelude, nuv_, eLM_LaneBody) }; // proxy
+    if (errmsg != nullptr)
+    {
+        return luaL_error( L, errmsg);
     }
     STACK_CHECK( L, 1);
     return 1;
@@ -471,7 +464,7 @@ bool copydeep(Universe* U, lua_State* L2, int L2_cache_i, lua_State* L, int i, L
     lua_pop( L, 1);                                                                      // ... u [uv]*
     STACK_CHECK( L, nuv);
 
-    errmsg = push_deep_proxy( U, L2, *(DeepPrelude**) lua_touserdata( L, i), nuv, mode_);                   // u
+    errmsg = push_deep_proxy(L2, *(DeepPrelude**) lua_touserdata( L, i), nuv, mode_);                   // u
 
     // transfer all uservalues of the source in the destination
     {
diff --git a/src/deep.h b/src/deep.h
index 5322525..878fa63 100644
--- a/src/deep.h
+++ b/src/deep.h
@@ -16,6 +16,8 @@ extern "C" {
 #include "lanesconf.h"
 #include "uniquekey.h"
 
+#include <atomic>
+
 // forwards
 struct Universe;
 
@@ -38,8 +40,8 @@ using luaG_IdFunction = void*( lua_State* L, DeepOp op_);
 
 // ################################################################################################
 
-// fnv164 of string "DEEP_VERSION_2" generated at https://www.pelock.com/products/hash-calculator
-static constexpr UniqueKey DEEP_VERSION{ 0xB4B0119C10642B29ull };
+// xxh64 of string "DEEP_VERSION_3" generated at https://www.pelock.com/products/hash-calculator
+static constexpr UniqueKey DEEP_VERSION{ 0xB2CC0FD9C0AE9674ull };
 
 // should be used as header for full userdata
 struct DeepPrelude
@@ -48,10 +50,10 @@ struct DeepPrelude
     // when stored in a keeper state, the full userdata doesn't have a metatable, so we need direct access to the idfunc
     luaG_IdFunction* idfunc { nullptr };
     // data is destroyed when refcount is 0
-    volatile int refcount{ 0 };
+    std::atomic<int> m_refcount{ 0 };
 };
 
-char const* push_deep_proxy( Universe* U, lua_State* L, DeepPrelude* prelude, int nuv_, LookupMode mode_);
+char const* push_deep_proxy(lua_State* L, DeepPrelude* prelude, int nuv_, LookupMode mode_);
 void free_deep_prelude( lua_State* L, DeepPrelude* prelude_);
 
 LANES_API int luaG_newdeepuserdata( lua_State* L, luaG_IdFunction* idfunc, int nuv_);
diff --git a/src/lanes.cpp b/src/lanes.cpp
index 6123766..99c5812 100644
--- a/src/lanes.cpp
+++ b/src/lanes.cpp
@@ -185,26 +185,26 @@ static bool push_registry_table( lua_State* L, UniqueKey key, bool create)
  * Add the lane to tracking chain; the ones still running at the end of the
  * whole process will be cancelled.
  */
-static void tracking_add( Lane* s)
+static void tracking_add(Lane* s)
 {
 
-    MUTEX_LOCK( &s->U->tracking_cs);
+    s->U->tracking_cs.lock();
     {
         assert( s->tracking_next == nullptr);
 
         s->tracking_next = s->U->tracking_first;
         s->U->tracking_first = s;
     }
-    MUTEX_UNLOCK( &s->U->tracking_cs);
+    s->U->tracking_cs.unlock();
 }
 
 /*
  * A free-running lane has ended; remove it from tracking chain
  */
-static bool tracking_remove( Lane* s)
+static bool tracking_remove(Lane* s)
 {
     bool found{ false };
-    MUTEX_LOCK( &s->U->tracking_cs);
+    s->U->tracking_cs.lock();
     {
         // Make sure (within the MUTEX) that we actually are in the chain
         // still (at process exit they will remove us from chain and then
@@ -228,7 +228,7 @@ static bool tracking_remove( Lane* s)
             assert( found);
         }
     }
-    MUTEX_UNLOCK( &s->U->tracking_cs);
+    s->U->tracking_cs.unlock();
     return found;
 }
 
@@ -396,12 +396,12 @@ static int run_finalizers( lua_State* L, int lua_rc)
  */
 static void selfdestruct_add( Lane* s)
 {
-    MUTEX_LOCK( &s->U->selfdestruct_cs);
+    s->U->selfdestruct_cs.lock();
     assert( s->selfdestruct_next == nullptr);
 
     s->selfdestruct_next = s->U->selfdestruct_first;
     s->U->selfdestruct_first= s;
-    MUTEX_UNLOCK( &s->U->selfdestruct_cs);
+    s->U->selfdestruct_cs.unlock();
 }
 
 /*
@@ -410,7 +410,7 @@ static void selfdestruct_add( Lane* s)
 static bool selfdestruct_remove( Lane* s)
 {
     bool found{ false };
-    MUTEX_LOCK( &s->U->selfdestruct_cs);
+    s->U->selfdestruct_cs.lock();
     {
         // Make sure (within the MUTEX) that we actually are in the chain
         // still (at process exit they will remove us from chain and then
@@ -436,7 +436,7 @@ static bool selfdestruct_remove( Lane* s)
             assert( found);
         }
     }
-    MUTEX_UNLOCK( &s->U->selfdestruct_cs);
+    s->U->selfdestruct_cs.unlock();
     return found;
 }
 
@@ -451,7 +451,7 @@ static int selfdestruct_gc( lua_State* L)
     {
         // Signal _all_ still running threads to exit (including the timer thread)
         //
-        MUTEX_LOCK( &U->selfdestruct_cs);
+        U->selfdestruct_cs.lock();
         {
             Lane* s = U->selfdestruct_first;
             while( s != SELFDESTRUCT_END)
@@ -470,7 +470,7 @@ static int selfdestruct_gc( lua_State* L)
                 s = s->selfdestruct_next;
             }
         }
-        MUTEX_UNLOCK( &U->selfdestruct_cs);
+        U->selfdestruct_cs.unlock();
 
         // When noticing their cancel, the lanes will remove themselves from
         // the selfdestruct chain.
@@ -497,7 +497,7 @@ static int selfdestruct_gc( lua_State* L)
                     // count the number of cancelled thread that didn't have the time to act yet
                     int n = 0;
                     double t_now = 0.0;
-                    MUTEX_LOCK( &U->selfdestruct_cs);
+                    U->selfdestruct_cs.lock();
                     {
                         Lane* s = U->selfdestruct_first;
                         while( s != SELFDESTRUCT_END)
@@ -507,7 +507,7 @@ static int selfdestruct_gc( lua_State* L)
                             s = s->selfdestruct_next;
                         }
                     }
-                    MUTEX_UNLOCK( &U->selfdestruct_cs);
+                    U->selfdestruct_cs.unlock();
                     // if timeout elapsed, or we know all threads have acted, stop waiting
                     t_now = now_secs();
                     if( n == 0 || (t_now >= t_until))
@@ -535,7 +535,7 @@ static int selfdestruct_gc( lua_State* L)
             // first thing we did was to raise the linda signals the threads were waiting on (if any)
             // therefore, any well-behaved thread should be in CANCELLED state
             // these are not running, and the state can be closed
-            MUTEX_LOCK( &U->selfdestruct_cs);
+            U->selfdestruct_cs.lock();
             {
                 Lane* s = U->selfdestruct_first;
                 while( s != SELFDESTRUCT_END)
@@ -557,7 +557,7 @@ static int selfdestruct_gc( lua_State* L)
                 }
                 U->selfdestruct_first = SELFDESTRUCT_END;
             }
-            MUTEX_UNLOCK( &U->selfdestruct_cs);
+            U->selfdestruct_cs.unlock();
 
             DEBUGSPEW_CODE( fprintf( stderr, "Killed %d lane(s) at process end.\n", n));
         }
@@ -575,7 +575,8 @@ static int selfdestruct_gc( lua_State* L)
     // no need to mutex-protect this as all threads in the universe are gone at that point
     if( U->timer_deep != nullptr) // test ins case some early internal error prevented Lanes from creating the deep timer
     {
-        -- U->timer_deep->refcount; // should be 0 now
+        int const prev_ref_count{ U->timer_deep->m_refcount.fetch_sub(1, std::memory_order_relaxed) };
+        ASSERT_L(prev_ref_count == 1); // this should be the last reference
         free_deep_prelude( L, (DeepPrelude*) U->timer_deep);
         U->timer_deep = nullptr;
     }
@@ -585,15 +586,8 @@ static int selfdestruct_gc( lua_State* L)
     // remove the protected allocator, if any
     cleanup_allocator_function( U, L);
 
-#if HAVE_LANE_TRACKING()
-    MUTEX_FREE( &U->tracking_cs);
-#endif // HAVE_LANE_TRACKING()
-    // Linked chains handling
-    MUTEX_FREE( &U->selfdestruct_cs);
-    MUTEX_FREE( &U->require_cs);
-    // Locks for 'tools.c' inc/dec counters
-    MUTEX_FREE( &U->deep_lock);
-    MUTEX_FREE( &U->mtid_lock);
+    U->Universe::~Universe();
+
     // universe is no longer available (nor necessary)
     // we need to do this in case some deep userdata objects were created before Lanes was initialized,
     // as potentially they will be garbage collected after Lanes at application shutdown
@@ -950,10 +944,10 @@ static THREAD_RETURN_T THREAD_CALLCONV lane_main( void* vs)
         //
         lua_close( s->L);
 
-        MUTEX_LOCK( &s->U->selfdestruct_cs);
+        s->U->selfdestruct_cs.lock();
         // done with lua_close(), terminal shutdown sequence may proceed
         -- s->U->selfdestructing_count;
-        MUTEX_UNLOCK( &s->U->selfdestruct_cs);
+        s->U->selfdestruct_cs.unlock();
 
         lane_cleanup( s); // s is freed at this point
     }
@@ -1665,7 +1659,7 @@ LUAG_FUNC( threads)
 
   // List _all_ still running threads
   //
-  MUTEX_LOCK( &U->tracking_cs);
+  U->tracking_cs.lock();
   if( U->tracking_first && U->tracking_first != TRACKING_END)
   {
     Lane* s = U->tracking_first;
@@ -1683,7 +1677,7 @@ LUAG_FUNC( threads)
       s = s->tracking_next;
     }
   }
-  MUTEX_UNLOCK( &U->tracking_cs);
+  U->tracking_cs.unlock();
   return lua_gettop( L) - top; // 0 or 1
 }
 #endif // HAVE_LANE_TRACKING()
@@ -1863,12 +1857,12 @@ LUAG_FUNC( configure)
 #endif // THREADAPI == THREADAPI_PTHREAD
 
     STACK_GROW( L, 4);
-    STACK_CHECK_START_ABS( L, 1);                                                                // settings
+    STACK_CHECK_START_ABS( L, 1);                                                          // settings
 
     DEBUGSPEW_CODE( fprintf( stderr, INDENT_BEGIN "%p: lanes.configure() BEGIN\n" INDENT_END, L));
     DEBUGSPEW_CODE( if( U) ++ U->debugspew_indent_depth);
 
-    if( U == nullptr)
+    if(U == nullptr)
     {
         U = universe_create( L);                                                           // settings universe
         DEBUGSPEW_CODE( ++ U->debugspew_indent_depth);
@@ -1885,17 +1879,11 @@ LUAG_FUNC( configure)
         U->demoteFullUserdata = lua_toboolean( L, -1) ? true : false;
         lua_pop( L, 1);                                                                    // settings
 #if HAVE_LANE_TRACKING()
-        MUTEX_INIT( &U->tracking_cs);
         lua_getfield( L, 1, "track_lanes");                                                // settings track_lanes
         U->tracking_first = lua_toboolean( L, -1) ? TRACKING_END : nullptr;
         lua_pop( L, 1);                                                                    // settings
 #endif // HAVE_LANE_TRACKING()
         // Linked chains handling
-        MUTEX_INIT( &U->selfdestruct_cs);
-        MUTEX_RECURSIVE_INIT( &U->require_cs);
-        // Locks for 'tools.c' inc/dec counters
-        MUTEX_INIT( &U->deep_lock);
-        MUTEX_INIT( &U->mtid_lock);
         U->selfdestruct_first = SELFDESTRUCT_END;
         initialize_allocator_function( U, L);
         initialize_on_state_create( U, L);
@@ -1908,10 +1896,10 @@ LUAG_FUNC( configure)
         lua_call( L, 1, 1);                                                                // settings linda
         STACK_CHECK( L, 2);
 
-        // Proxy userdata contents is only a 'DEEP_PRELUDE*' pointer
+        // Proxy userdata contents is only a 'DeepPrelude*' pointer
         U->timer_deep = *(DeepPrelude**) lua_touserdata( L, -1);
         // increment refcount so that this linda remains alive as long as the universe exists.
-        ++ U->timer_deep->refcount;
+        U->timer_deep->m_refcount.fetch_add(1, std::memory_order_relaxed);
         lua_pop( L, 1);                                                                    // settings
     }
     STACK_CHECK( L, 1);
@@ -1938,7 +1926,7 @@ LUAG_FUNC( configure)
 
     {
         char const* errmsg;
-        errmsg = push_deep_proxy( U, L, (DeepPrelude*) U->timer_deep, 0, eLM_LaneBody);    // settings M timer_deep
+        errmsg = push_deep_proxy(L, (DeepPrelude*) U->timer_deep, 0, eLM_LaneBody);        // settings M timer_deep
         if( errmsg != nullptr)
         {
             return luaL_error( L, errmsg);
diff --git a/src/linda.cpp b/src/linda.cpp
index a6025ce..f2e39a8 100644
--- a/src/linda.cpp
+++ b/src/linda.cpp
@@ -791,8 +791,8 @@ static void* linda_id( lua_State* L, DeepOp op_)
             * One can use any memory allocation scheme.
             * just don't use L's allocF because we don't know which state will get the honor of GCing the linda
             */
+            Universe* const U = universe_get(L);
             {
-                Universe* const U = universe_get(L);
                 AllocatorDefinition* const allocD = &U->internal_allocator;
                 s = (struct s_Linda*) allocD->allocF(allocD->allocUD, nullptr, 0, sizeof(struct s_Linda) + name_len); // terminating 0 is already included
             }
@@ -801,7 +801,7 @@ static void* linda_id( lua_State* L, DeepOp op_)
                 s->prelude.DeepPrelude::DeepPrelude();
                 SIGNAL_INIT( &s->read_happened);
                 SIGNAL_INIT( &s->write_happened);
-                s->U = universe_get( L);
+                s->U = U;
                 s->simulate_cancel = CancelRequest::None;
                 s->group = linda_group << KEEPER_MAGIC_SHIFT;
                 s->name[0] = 0;
@@ -828,9 +828,9 @@ static void* linda_id( lua_State* L, DeepOp op_)
             // There aren't any lanes waiting on these lindas, since all proxies have been gc'ed. Right?
             SIGNAL_FREE( &linda->read_happened);
             SIGNAL_FREE( &linda->write_happened);
+            linda->prelude.DeepPrelude::~DeepPrelude();
             {
-                Universe* const U = universe_get(L);
-                AllocatorDefinition* const allocD = &U->internal_allocator;
+                AllocatorDefinition* const allocD = &linda->U->internal_allocator;
                 (void) allocD->allocF(allocD->allocUD, linda, sizeof(struct s_Linda) + strlen(linda->name), 0);
             }
             return nullptr;
diff --git a/src/state.cpp b/src/state.cpp
index aa6b38a..c66242c 100644
--- a/src/state.cpp
+++ b/src/state.cpp
@@ -72,11 +72,11 @@ static int luaG_new_require( lua_State* L)
 
     // Using 'lua_pcall()' to catch errors; otherwise a failing 'require' would
     // leave us locked, blocking any future 'require' calls from other lanes.
-    
-    MUTEX_LOCK( &U->require_cs);
+
+    U->require_cs.lock();
     // starting with Lua 5.4, require may return a second optional value, so we need LUA_MULTRET
     rc = lua_pcall( L, args, LUA_MULTRET, 0 /*errfunc*/ ); // err|result(s)
-    MUTEX_UNLOCK( &U->require_cs);
+    U->require_cs.unlock();
 
     // the required module (or an error message) is left on the stack as returned value by original require function
 
diff --git a/src/tools.cpp b/src/tools.cpp
index 1e38144..68846ba 100644
--- a/src/tools.cpp
+++ b/src/tools.cpp
@@ -174,9 +174,9 @@ static void* protected_lua_Alloc( void *ud, void *ptr, size_t osize, size_t nsiz
 {
     void* p;
     ProtectedAllocator* s = (ProtectedAllocator*) ud;
-    MUTEX_LOCK( &s->lock);
+    s->lock.lock();
     p = s->definition.allocF( s->definition.allocUD, ptr, osize, nsize);
-    MUTEX_UNLOCK( &s->lock);
+    s->lock.unlock();
     return p;
 }
 
@@ -214,9 +214,7 @@ void initialize_allocator_function( Universe* U, lua_State* L)
         }
         else if( lua_type( L, -1) == LUA_TSTRING) // should be "protected"
         {
-            // initialize all we need for the protected allocator
-            MUTEX_INIT( &U->protected_allocator.lock); // the mutex
-            // and the original allocator to call from inside protection by the mutex
+            // set the original allocator to call from inside protection by the mutex
             U->protected_allocator.definition.allocF = lua_getallocf( L, &U->protected_allocator.definition.allocUD);
             // before a state is created, this function will be called to obtain the allocator
             U->provide_allocator = luaG_provide_protected_allocator;
@@ -226,8 +224,6 @@ void initialize_allocator_function( Universe* U, lua_State* L)
     }
     else
     {
-        // initialize the mutex even if we are not going to use it, because cleanup_allocator_function will deinitialize it
-        MUTEX_INIT( &U->protected_allocator.lock);
         // just grab whatever allocator was provided to lua_newstate
         U->protected_allocator.definition.allocF = lua_getallocf( L, &U->protected_allocator.definition.allocUD);
     }
@@ -258,8 +254,6 @@ void cleanup_allocator_function( Universe* U, lua_State* L)
     {
         // install the non-protected allocator
         lua_setallocf( L, U->protected_allocator.definition.allocF, U->protected_allocator.definition.allocUD);
-        // release the mutex
-        MUTEX_FREE( &U->protected_allocator.lock);
     }
 }
 
@@ -645,15 +639,13 @@ static lua_Integer get_mt_id( Universe* U, lua_State* L, int i)
     lua_pushvalue( L, i);                        // ... _R[REG_MTID] {mt}
     lua_rawget( L, -2);                          // ... _R[REG_MTID] mtk?
 
-    id = lua_tointeger( L, -1);    // 0 for nil
+    id = lua_tointeger( L, -1); // 0 for nil
     lua_pop( L, 1);                              // ... _R[REG_MTID]
     STACK_CHECK( L, 1);
 
     if( id == 0)
     {
-        MUTEX_LOCK( &U->mtid_lock);
-        id = ++ U->last_mt_id;
-        MUTEX_UNLOCK( &U->mtid_lock);
+        id = U->last_mt_id.fetch_add(1, std::memory_order_relaxed);
 
         /* Create two-way references: id_uint <-> table
         */
diff --git a/src/universe.cpp b/src/universe.cpp
index 06c7313..4dd956d 100644
--- a/src/universe.cpp
+++ b/src/universe.cpp
@@ -43,11 +43,11 @@ static constexpr UniqueKey UNIVERSE_REGKEY{ 0x9f877b2cf078f17full };
 
 Universe* universe_create(lua_State* L)
 {
-    Universe* U = (Universe*) lua_newuserdatauv( L, sizeof(Universe), 0);                          // universe
-    memset( U, 0, sizeof( Universe));
+    Universe* const U = static_cast<Universe*>(lua_newuserdatauv(L, sizeof(Universe), 0));               // universe
+    U->Universe::Universe();
     STACK_CHECK_START_REL(L, 1);
     UNIVERSE_REGKEY.set_registry(L, [](lua_State* L) { lua_pushvalue(L, -2); });                   // universe
-    STACK_CHECK( L, 1);
+    STACK_CHECK(L, 1);
     return U;
 }
 
@@ -64,12 +64,11 @@ void universe_store(lua_State* L, Universe* U)
 
 Universe* universe_get(lua_State* L)
 {
-    Universe* universe;
-    STACK_GROW( L, 2);
+    STACK_GROW(L, 2);
     STACK_CHECK_START_REL(L, 0);
     UNIVERSE_REGKEY.query_registry(L);
-    universe = (Universe*) lua_touserdata( L, -1); // nullptr if nil
-    lua_pop( L, 1);
-    STACK_CHECK( L, 0);
+    Universe* const universe = static_cast<Universe*>(lua_touserdata(L, -1)); // nullptr if nil
+    lua_pop(L, 1);
+    STACK_CHECK(L, 0);
     return universe;
 }
diff --git a/src/universe.h b/src/universe.h
index 34cef33..a6beb68 100644
--- a/src/universe.h
+++ b/src/universe.h
@@ -8,9 +8,10 @@ extern "C" {
 }
 #endif // __cplusplus
 
-#include "threading.h"
 #include "macros_and_utils.h"
 
+#include <mutex>
+
 // forwards
 struct DeepPrelude;
 struct Keepers;
@@ -28,15 +29,15 @@ struct Lane;
 // everything we need to provide to lua_newstate()
 struct AllocatorDefinition
 {
-    lua_Alloc allocF;
-    void* allocUD;
+    lua_Alloc allocF{ nullptr };
+    void* allocUD{ nullptr };
 };
 
 // mutex-protected allocator for use with Lua states that share a non-threadsafe allocator
 struct ProtectedAllocator
 {
     AllocatorDefinition definition;
-    MUTEX_T lock;
+    std::mutex lock;
 };
 
 // ################################################################################################
@@ -47,15 +48,15 @@ struct ProtectedAllocator
 struct Universe
 {
     // for verbose errors
-    bool verboseErrors;
+    bool verboseErrors{ false };
 
-    bool demoteFullUserdata;
+    bool demoteFullUserdata{ false };
 
     // before a state is created, this function will be called to obtain the allocator
-    lua_CFunction provide_allocator;
+    lua_CFunction provide_allocator{ nullptr };
 
     // after a state is created, this function will be called right after the bases libraries are loaded
-    lua_CFunction on_state_create_func;
+    lua_CFunction on_state_create_func{ nullptr };
 
     // if allocator="protected" is found in the configuration settings, a wrapper allocator will protect all allocator calls with a mutex
     // contains a mutex and the original allocator definition
@@ -63,38 +64,34 @@ struct Universe
 
     AllocatorDefinition internal_allocator;
 
-    Keepers* keepers;
+    Keepers* keepers{ nullptr };
 
     // Initialized by 'init_once_LOCKED()': the deep userdata Linda object
     // used for timers (each lane will get a proxy to this)
-    volatile DeepPrelude* timer_deep;  // = nullptr
+    volatile DeepPrelude* timer_deep{ nullptr }; // = nullptr
 
 #if HAVE_LANE_TRACKING()
-    MUTEX_T tracking_cs;
-    Lane* volatile tracking_first; // will change to TRACKING_END if we want to activate tracking
+    std::mutex tracking_cs;
+    Lane* volatile tracking_first{ nullptr }; // will change to TRACKING_END if we want to activate tracking
 #endif // HAVE_LANE_TRACKING()
 
-    MUTEX_T selfdestruct_cs;
+    std::mutex selfdestruct_cs;
 
     // require() serialization
-    MUTEX_T require_cs;
-
-    // Lock for reference counter inc/dec locks (to be initialized by outside code) TODO: get rid of this and use atomics instead!
-    MUTEX_T deep_lock;
-    MUTEX_T mtid_lock;
+    std::recursive_mutex require_cs;
 
-    lua_Integer last_mt_id;
+    std::atomic<lua_Integer> last_mt_id{ 0 };
 
 #if USE_DEBUG_SPEW()
-    int debugspew_indent_depth;
+    int debugspew_indent_depth{ 0 };
 #endif // USE_DEBUG_SPEW()
 
-    Lane* volatile selfdestruct_first;
+    Lane* volatile selfdestruct_first{ nullptr };
     // After a lane has removed itself from the chain, it still performs some processing.
     // The terminal desinit sequence should wait for all such processing to terminate before force-killing threads
-    int volatile selfdestructing_count;
+    int volatile selfdestructing_count{ 0 };
 };
 
-Universe* universe_get( lua_State* L);
-Universe* universe_create( lua_State* L);
-void universe_store( lua_State* L, Universe* U);
+Universe* universe_get(lua_State* L);
+Universe* universe_create(lua_State* L);
+void universe_store(lua_State* L, Universe* U);
-- 
cgit v1.2.3-55-g6feb