From 89d9c98af1ac352ba4d49d660e61b0853d6e1a86 Mon Sep 17 00:00:00 2001
From: Peter Drahoš <drahosp@gmail.com>
Date: Fri, 1 Oct 2010 03:22:32 +0200
Subject: Import to git

---
 ABOUT                                   |   17 +
 BUGS                                    |    6 +
 CHANGES                                 |   72 ++
 CMakeLists.txt                          |   86 ++
 COPYRIGHT                               |   31 +
 Makefile                                |  229 ++++
 README                                  |  106 ++
 TODO                                    |    4 +
 dist.cmake                              |  130 +++
 dist.info                               |   14 +
 docs/Lua multithreading choices.graffle |  Bin 0 -> 2290 bytes
 docs/Lua multithreading choices.svg     |   15 +
 docs/comparison.html                    |  297 +++++
 docs/index.html                         |  951 ++++++++++++++++
 docs/multi.png                          |  Bin 0 -> 4657 bytes
 docs/performance.ods                    |  Bin 0 -> 66817 bytes
 make-vc.cmd                             |  274 +++++
 setup-vc.cmd                            |   90 ++
 src/Makefile                            |  176 +++
 src/keeper.lua                          |  244 ++++
 src/lanes.c                             | 1849 +++++++++++++++++++++++++++++++
 src/lanes.lua                           |  611 ++++++++++
 src/threading.c                         |  721 ++++++++++++
 src/threading.h                         |  196 ++++
 src/tools.c                             | 1198 ++++++++++++++++++++
 src/tools.h                             |   72 ++
 tests/argtable.lua                      |   38 +
 tests/assert.lua                        |  318 ++++++
 tests/atomic.lua                        |   18 +
 tests/basic.lua                         |  331 ++++++
 tests/cyclic.lua                        |   64 ++
 tests/ehynes.lua                        |   52 +
 tests/error.lua                         |   47 +
 tests/fibonacci.lua                     |   75 ++
 tests/fifo.lua                          |   43 +
 tests/finalizer.lua                     |   81 ++
 tests/hangtest.lua                      |   26 +
 tests/irayo_closure.lua                 |   35 +
 tests/irayo_recursive.lua               |   18 +
 tests/keeper.lua                        |   47 +
 tests/launchtest.lua                    |   78 ++
 tests/objects.lua                       |   76 ++
 tests/perftest.lua                      |  184 +++
 tests/recursive.lua                     |   21 +
 tests/require.lua                       |   30 +
 tests/timer.lua                         |   93 ++
 tools/bin2c.lua                         |  131 +++
 47 files changed, 9195 insertions(+)
 create mode 100644 ABOUT
 create mode 100644 BUGS
 create mode 100644 CHANGES
 create mode 100644 CMakeLists.txt
 create mode 100644 COPYRIGHT
 create mode 100644 Makefile
 create mode 100644 README
 create mode 100644 TODO
 create mode 100644 dist.cmake
 create mode 100644 dist.info
 create mode 100644 docs/Lua multithreading choices.graffle
 create mode 100644 docs/Lua multithreading choices.svg
 create mode 100644 docs/comparison.html
 create mode 100644 docs/index.html
 create mode 100644 docs/multi.png
 create mode 100644 docs/performance.ods
 create mode 100644 make-vc.cmd
 create mode 100644 setup-vc.cmd
 create mode 100644 src/Makefile
 create mode 100644 src/keeper.lua
 create mode 100644 src/lanes.c
 create mode 100644 src/lanes.lua
 create mode 100644 src/threading.c
 create mode 100644 src/threading.h
 create mode 100644 src/tools.c
 create mode 100644 src/tools.h
 create mode 100644 tests/argtable.lua
 create mode 100644 tests/assert.lua
 create mode 100644 tests/atomic.lua
 create mode 100644 tests/basic.lua
 create mode 100644 tests/cyclic.lua
 create mode 100644 tests/ehynes.lua
 create mode 100644 tests/error.lua
 create mode 100644 tests/fibonacci.lua
 create mode 100644 tests/fifo.lua
 create mode 100644 tests/finalizer.lua
 create mode 100644 tests/hangtest.lua
 create mode 100644 tests/irayo_closure.lua
 create mode 100644 tests/irayo_recursive.lua
 create mode 100644 tests/keeper.lua
 create mode 100644 tests/launchtest.lua
 create mode 100644 tests/objects.lua
 create mode 100644 tests/perftest.lua
 create mode 100644 tests/recursive.lua
 create mode 100644 tests/require.lua
 create mode 100644 tests/timer.lua
 create mode 100644 tools/bin2c.lua

diff --git a/ABOUT b/ABOUT
new file mode 100644
index 0000000..81cf640
--- /dev/null
+++ b/ABOUT
@@ -0,0 +1,17 @@
+
+Lua Lanes
+---------
+
+Lanes is a lightweight, native, lazy evaluating multithreading library for 
+Lua 5.1. It allows efficient use of multicore processors in Lua, by passing
+function calls into separate OS threads, and separate Lua states.
+
+No locking of the threads is needed, only launching and waiting for (with an
+optional timeout) a thread to get ready. Efficient communications between the
+running threads are possible either using message passing or shared state 
+models. Values passed can be anything but coroutines (see detailed limitations
+in the manual).
+
+Lua Lanes has been optimized for performance, and provides around 50-60%
+speed increase when running heavily threaded applications on dual core
+processors (compared to running a non-threaded plain Lua implementation). 
diff --git a/BUGS b/BUGS
new file mode 100644
index 0000000..d25cc0e
--- /dev/null
+++ b/BUGS
@@ -0,0 +1,6 @@
+
+BUGS:
+
+- tests/irayo_closure.lua fails     (trouble with setting globals right
+    for functions carried over to another Lua state)
+
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..ae4da3c
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,72 @@
+
+CHANGES:
+
+CHANGE X:
+
+CHANGE 12 (bug fix on Windows, 2.0.3) AKa 25-Jan-2009:
+    Did CHANGE 9 the way it should be done.
+
+CHANGE 11 (new feature, 2.0.3) AKa 23-Jan-2009:
+    Finalizers ('set_finalizer()') for being able to do cleanup of a lane's 
+    resources, whether it returned succesfully or via an error.
+
+CHANGE 10 (new feature, 2.0.3) AKa 23-Jan-2009:
+    Call stack showing where an error occurred is not merged with the error
+    message, but delivered as a separate stack table ({ "filename:line" [, ...] }).
+    Getting call stacks of errorred lanes is now possible.
+
+CHANGE 9 (bug fix on Windows) AKa 10-Dec-2008 (> 2.0.2):
+    Applied patch from Kriss Daniels to avoid issues on 'now_time()' in Win32
+    (http://luaforge.net/forum/forum.php?thread_id=22704&forum_id=1781).
+    
+CHANGE 8 (bug fix) AKa 26-Oct-2008:
+    Avoids occasional segfault at process exit (on multicore CPUs). Does this
+    by keeping track of "free running" threads (s.a. the time thread) and
+    cancelling them at process exit. 
+    
+    Tested (2.0.2) on Linux 64,x86, OS X, WinXP.
+
+CHANGE 7 (bug fix) AKa 15-Oct-2008:
+    Recursive functions that use themselves as direct upvalue can now be
+    passed to other lanes, and used as a lane function.
+
+CHANGE 6 (bug fix) AKa 15-Oct-2008:
+    Added local caches of the following to src/lanes.lua (was otherwise getting
+    errors at least in 'tests/irayo_recursive.lua').
+
+		local assert= assert 
+		local string_gmatch= assert( string.gmatch ) 
+		local select= assert( select ) 
+		local type= assert( type ) 
+		local pairs= assert( pairs ) 
+		local tostring= assert( tostring ) 
+		local error= assert( error ) 
+		local setmetatable= assert( setmetatable ) 
+		local rawget= assert( rawget ) 
+
+    Thanks to Irayo for detecting and reporting this.
+
+CHANGE 5 (new feature):
+    Modifying Makefile so it's better suited to LuaRocks.
+
+CHANGE 4 (new feature):
+    Metatable copying, allowing Lua objects to be copied across lanes.
+
+CHANGE 3 (bug fix) AKa 5-Aug-2008:
+    The '__gc' method was not tied to thread userdata, at all. Caused memory
+    lifespan problems at least on OS X when threads were cancelled (EINVAL).
+    
+CHANGE 2 (bug fix) AKa 5-Aug-2008:
+    Better calculation of timeouts, always making them absolute (even in Win32)
+    to allow for events that wake the lane up but don't read/write the Linda
+    key that it was observing.
+
+CHANGE 1 (bug fix) AKa 4-Aug-2008:
+    Signalling woke up only one waiting thread, not all. This caused i.e. 
+    receive to not wake up if there was another thread waiting on the same 
+    Linda object.
+    
+    PThread fix: using 'pthread_cond_broadcast()' instead of 'pthread_cond_signal()'
+    Win32 fix: using manual events and 'PulseEvent()'
+
+(end)
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..1799c01
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,86 @@
+# Copyright (C) 2007-2009 LuaDist.
+# Created by Peter Kapec
+# Redistribution and use of this file is allowed according to the terms of the MIT license.
+# For details see the COPYRIGHT file distributed with LuaDist.
+# Please note that the package source code is licensed under its own license.
+
+PROJECT(lanes C)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+INCLUDE(dist.cmake)
+
+#2DO - patch threading.c to suppot cygwin.
+# The following values are just a guess.
+# WARNING: test segfault under Cygwin
+IF(CYGWIN)
+  ADD_DEFINITIONS(-D_PRIO_MODE=SCHED_FIFO)
+  ADD_DEFINITIONS(-D_PRIO_HI=15) # maximum that doesn't crash
+  ADD_DEFINITIONS(-D_PRIO_0=0)
+  ADD_DEFINITIONS(-D_PRIO_LO=-15) # ???
+  ADD_DEFINITIONS(-Dpthread_yield=sched_yield)
+ENDIF(CYGWIN)
+
+#2DO - use provided bin2c
+# Compile Lua bytecode to C
+ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/keeper.lch
+  DEPENDS src/keeper.lua
+    COMMAND "${LUAC}" "-o" "${CMAKE_CURRENT_BINARY_DIR}/keeper.lo"
+                           "${CMAKE_CURRENT_SOURCE_DIR}/src/keeper.lua"
+    COMMAND "${LUA}" "${CMAKE_CURRENT_SOURCE_DIR}/tools/bin2c.lua"
+            "${CMAKE_CURRENT_BINARY_DIR}/keeper.lo"
+            "-o" "${CMAKE_CURRENT_BINARY_DIR}/keeper.lch")
+SET_SOURCE_FILES_PROPERTIES(src/lanes.c PROPERTIES OBJECT_DEPENDS
+  ${CMAKE_CURRENT_BINARY_DIR}/keeper.lch)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+
+
+# Build
+INCLUDE_DIRECTORIES(src)
+ADD_LIBRARY(lua51-lanes MODULE src/lanes.c src/threading.c src/tools.c)
+
+IF(UNIX AND NOT CYGWIN)
+  SET(LIBS pthread)
+ENDIF(UNIX AND NOT CYGWIN)
+
+IF(MINGW)
+#~ 	FIND_FILE(MSVCR80 NAMES msvcr80.dll msvcr90.dll)
+#~ 	SET(LIBS gcc ${MSVCR80})
+#~ 	IF(MSVC90)
+
+#~ 		from InstallRequiredSystemLibraries.cmake
+
+		IF(CMAKE_CL_64)
+			SET(CMAKE_MSVC_ARCH amd64)
+		ELSE(CMAKE_CL_64)
+			SET(CMAKE_MSVC_ARCH x86)
+		ENDIF(CMAKE_CL_64)
+
+		GET_FILENAME_COMPONENT(devenv_dir "${CMAKE_MAKE_PROGRAM}" PATH)
+		GET_FILENAME_COMPONENT(base_dir "${devenv_dir}/../.." ABSOLUTE)
+
+		# Find the runtime library redistribution directory.
+		FIND_PATH(MSVC90_REDIST_DIR NAMES ${CMAKE_MSVC_ARCH}/Microsoft.VC90.CRT/Microsoft.VC90.CRT.manifest
+		PATHS
+		"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\VisualStudio\\9.0;InstallDir]/../../VC/redist"
+		"[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\VCExpress\\9.0;InstallDir]/../../VC/redist"
+		"${base_dir}/VC/redist"
+		)
+		SET(MSVC90_CRT_DIR "${MSVC90_REDIST_DIR}/${CMAKE_MSVC_ARCH}/Microsoft.VC90.CRT")
+		SET(LIBS gcc msvcr90 "${MSVC90_CRT_DIR}/msvcr90.dll")
+
+#~ 	ENDIF()
+
+ENDIF()
+
+
+
+
+TARGET_LINK_LIBRARIES(lua51-lanes ${LUA_LIBRARY} ${LIBS})
+SET_TARGET_PROPERTIES(lua51-lanes PROPERTIES PREFIX "")
+
+# Install all files and documentation
+INSTALL (TARGETS lua51-lanes DESTINATION ${INSTALL_CMOD})
+INSTALL (FILES src/lanes.lua DESTINATION ${INSTALL_LMOD})
+
+INSTALL (FILES ABOUT BUGS COPYRIGHT CHANGES README TODO DESTINATION ${INSTALL_DATA})
+INSTALL (DIRECTORY docs/ DESTINATION ${INSTALL_DOC})
+INSTALL (DIRECTORY tests/ DESTINATION ${INSTALL_TEST})
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..2930f19
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,31 @@
+
+Lua Lanes is licensed under the same MIT license as the Lua 5.1 source code, 
+reproduced below.
+
+For details and rationale, see http://www.lua.org/license.html
+
+===============================================================================
+
+Copyright (C) 2007-09 Asko Kauppi, <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+
+(end of COPYRIGHT)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4c0ff4b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,229 @@
+#
+# Lanes/Makefile
+#
+#   make
+#   make test
+#   make basic|fifo|keeper|...
+#
+#   make perftest[-odd|-even|-plain]
+#   make launchtest
+#
+#   make install DESTDIR=path
+#   make tar|tgz VERSION=x.x
+#   make clean
+#
+
+MODULE = lanes
+
+N=1000
+
+_SO=.so
+_TARGET_SO=src/lua51-lanes.so
+TIME=time
+
+ifeq "$(findstring MINGW32,$(shell uname -s))" "MINGW32"
+  # MinGW MSYS on XP
+  #
+  LUA=lua
+  LUAC=luac
+  _SO=.dll
+  _TARGET_SO=./lua51-lanes.dll
+  TIME=timeit.exe
+else
+  # Autodetect LUA & LUAC
+  #
+  LUA=$(word 1,$(shell which lua5.1) $(shell which lua51) lua)
+  LUAC=$(word 1,$(shell which luac5.1) $(shell which luac51) luac)
+endif
+
+_PREFIX=LUA_CPATH=./src/?$(_SO) LUA_PATH="src/?.lua;./tests/?.lua"
+
+#---
+all: $(_TARGET_SO)
+
+$(_TARGET_SO): src/*.lua src/*.c src/*.h
+	cd src && $(MAKE) LUA=$(LUA) LUAC=$(LUAC)
+
+clean:
+	cd src && $(MAKE) clean
+
+debug:
+	$(MAKE) clean
+	cd src && $(MAKE) LUA=$(LUA) LUAC=$(LUAC) OPT_FLAGS="-O0 -g"
+	@echo ""
+	@echo "** Now, try 'make repetitive' or something and if it crashes, 'gdb $(LUA) ...core file...'"
+	@echo "   Then 'bt' for a backtrace."
+	@echo ""
+	@echo "   You have enabled core, no?   'ulimit -c unlimited'"
+	@echo "   On OS X, core files are under '/cores/'"
+	@echo ""
+
+gdb:
+	@echo "echo *** To start debugging: 'run tests/basic.lua' ***\n\n" > .gdb.cmd
+	$(_PREFIX) gdb -x .gdb.cmd $(LUA)
+
+#--- LuaRocks automated build ---
+#
+rock:
+	cd src && $(MAKE) LUAROCKS=1 CFLAGS="$(CFLAGS)" LIBFLAG="$(LIBFLAG)" LUA=$(LUA) LUAC=$(LUAC)
+
+
+#--- Testing ---
+#
+test:
+	$(MAKE) irayo_recursive
+#	$(MAKE) irayo_closure
+	$(MAKE) basic
+	$(MAKE) fifo
+	$(MAKE) keeper
+	$(MAKE) timer
+	$(MAKE) atomic
+	$(MAKE) cyclic
+	$(MAKE) objects
+	$(MAKE) fibonacci
+	$(MAKE) recursive
+
+basic: tests/basic.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+# 
+# This tries to show out a bug which happens in lane cleanup (multicore CPU's only)
+#
+REP_ARGS=-llanes -e "print'say aaa'; for i=1,10 do print(i) end"
+repetitive: $(_TARGET_SO)
+	for i in 1 2 3 4 5 6 7 8 9 10 a b c d e f g h i j k l m n o p q r s t u v w x y z; \
+	   do $(_PREFIX) $(LUA) $(REP_ARGS); \
+    done
+
+repetitive1: $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $(REP_ARGS)
+
+fifo: tests/fifo.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+keeper: tests/keeper.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+fibonacci: tests/fibonacci.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+timer: tests/timer.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+atomic: tests/atomic.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+cyclic: tests/cyclic.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+recursive: tests/recursive.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+hangtest: tests/hangtest.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+ehynes: tests/ehynes.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+#require: tests/require.lua $(_TARGET_SO)
+#	$(_PREFIX) $(LUA) $<
+
+objects: tests/objects.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+irayo_recursive: tests/irayo_recursive.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+irayo_closure: tests/irayo_closure.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+finalizer: tests/finalizer.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+error-test: tests/error.lua $(_TARGET_SO)
+	$(_PREFIX) $(LUA) $<
+
+#---
+perftest-plain: tests/perftest.lua $(_TARGET_SO)
+	$(MAKE) _perftest ARGS="$< $(N) -plain"
+
+perftest: tests/perftest.lua $(_TARGET_SO)
+	$(MAKE) _perftest ARGS="$< $(N)"
+
+perftest-odd: tests/perftest.lua $(_TARGET_SO)
+	$(MAKE) _perftest ARGS="$< $(N) -prio=+2"
+
+perftest-even: tests/perftest.lua $(_TARGET_SO)
+	$(MAKE) _perftest ARGS="$< $(N) -prio=-2"
+
+#---
+launchtest: tests/launchtest.lua $(_TARGET_SO)
+	$(MAKE) _perftest ARGS="$< $(N)"
+
+_perftest:
+	$(_PREFIX) $(TIME) $(LUA) $(ARGS)
+
+
+#--- Installing ---
+#
+# This is for LuaRocks automatic install, mainly
+#
+# LUA_LIBDIR and LUA_SHAREDIR are used by the .rockspec (don't change the names!)
+#
+DESTDIR=/usr/local
+LUA_LIBDIR=$(DESTDIR)/lib/lua/5.1
+LUA_SHAREDIR=$(DESTDIR)/share/lua/5.1
+
+#
+# AKa 17-Oct: changed to use 'install -m 644' and 'cp -p' 
+#
+install: $(_TARGET_SO) src/lanes.lua
+	mkdir -p $(LUA_LIBDIR) $(LUA_SHAREDIR)
+	install -m 644 $(_TARGET_SO) $(LUA_LIBDIR)
+	cp -p src/lanes.lua $(LUA_SHAREDIR)
+
+
+#--- Packaging ---
+#
+# Make a folder of the same name as tgz, good manners (for the manual
+# expander)
+#
+# "make tgz VERSION=yyyymmdd"
+#
+VERSION=
+
+tar tgz:
+ifeq "$(VERSION)" ""
+	echo "Usage: make tar VERSION=x.x"; false
+else
+	$(MAKE) clean 
+	-rm -rf $(MODULE)-$(VERSION)
+	mkdir $(MODULE)-$(VERSION)
+	tar c * --exclude=.svn --exclude=.DS_Store --exclude="_*" \
+	        --exclude="*.tgz" --exclude="*.rockspec" \
+	        --exclude=lanes.dev --exclude="$(MODULE)-*" --exclude=xcode \
+		    --exclude="*.obj" --exclude="*.dll" --exclude=timeit.dat \
+	   | (cd $(MODULE)-$(VERSION) && tar x)
+	tar czvf $(MODULE)-$(VERSION).tgz $(MODULE)-$(VERSION)
+	rm -rf $(MODULE)-$(VERSION)
+	md5sum $(MODULE)-$(VERSION).tgz
+endif
+	
+	
+#--- Undocumented ---
+#
+
+# 2.0.1: Running this (instant exit of the main Lua state) occasionally
+#        segfaults (1:15 or so on OS X PowerPC G4).
+#
+require: $(_TARGET_SO)
+	$(_PREFIX) $(LUA) -e "require '$(MODULE)'"
+
+run: $(_TARGET_SO)
+	$(_PREFIX) $(LUA) -e "require '$(MODULE)'" -i
+
+echo:
+	@echo $(PROGRAMFILES:C=X)
+
+.PROXY:	all clean test require debug _nodemo _notest
+
diff --git a/README b/README
new file mode 100644
index 0000000..b29d43d
--- /dev/null
+++ b/README
@@ -0,0 +1,106 @@
+
+=====================
+  Usage on Windows:
+=====================
+
+For once, Win32 thread prioritazion scheme is actually a good one, and
+it works. :)  Windows users, feel yourself as VIP citizens!!
+
+-------------------
+  Windows / MSYS:
+-------------------
+
+On MSYS, 'stderr' output seems to be buffered. You might want to make
+it auto-flush, to help you track & debug your scripts. Like this:
+
+    io.stderr:setvbuf "no"
+
+Even after this, MSYS insists on linewise buffering; it will flush at
+each newline only.
+
+
+===================
+  Usage on Linux:
+===================
+
+Linux NTPL 2.5 (Ubuntu 7.04) was used in the testing of Lua Lanes.
+
+This document (http://www.net.in.tum.de/~gregor/docs/pthread-scheduling.html)
+describes fairly well, what (all) is wrong with Linux threading, even today.
+
+For other worthy links:
+    http://kerneltrap.org/node/6080
+    http://en.wikipedia.org/wiki/Native_POSIX_Thread_Library
+
+In short, you cannot use thread prioritation in Linux. Unless you run as
+root, and I _truly_ would not recommend that. Lobby for yet-another thread
+implementation for Linux, and mail -> akauppi@gmail.com about it. :)
+
+
+======================
+  Usage on Mac OS X:
+======================
+
+No real problems in OS X, _once_ everything is set up right...
+
+In short, have your Lua core compiled with LUA_USE_DLOPEN and LUA_USE_POSIX
+instead of the (default as of 5.1) LUA_DL_DYLD and LUA_USE_MACOSX. This is
+crucial to have each module loaded only once (even if initialized separately
+for each thread) and for the static & global variables within the modules to
+actually be process-wide. Lua Lanes cannot live without (and hopefully,
+LUA_DL_DYLD is long gone by Lua 5.2)...
+
+Another issue is making sure you only have _one_ Lua core. Your 'lua' binary
+must link dynamically to a .dylib, it must _not_ carry a personal copy of Lua
+core with itself. If it does, you will gain many mysterious malloc errors
+when entering multithreading realm.
+
+<<
+lua-xcode2(9473,0xa000ed88) malloc: ***  Deallocation of a pointer not malloced: 0xe9fc0; This could be a double free(), or free() called with the middle of an allocated block; Try setting environment variable MallocHelp to see tools to help debug
+<<
+
+rm lua.o luac.o
+gcc -dynamiclib -install_name /usr/local/lib/liblua.5.1.dylib \
+    -compatibility_version 5.1 -current_version 5.1.2 \
+    -o liblua.5.1.2.dylib *.o
+
+gcc -fno-common -DLUA_USE_POSIX -DLUA_USE_DLOPEN -DLUA_USE_READLINE -lreadline -L. -llua.5.1.2 lua.c -o lua
+
+That should be it. :)
+
+Fink 'lua51' packaging has the necessary changes since 5.1.2-3.
+
+
+=====================
+  Usage on FreeBSD:
+=====================
+
+Unlike in Linux, also the Lua engine used with Lanes needs to be compiled with
+'-lpthread'. Otherwise, the following malloc errors are received:
+
+    <<
+    lua in free(): warning: recursive call
+    PANIC: unprotected error in call to Lua API (not enough memory)
+    <<
+
+Here are the Lua compilation steps that proved to work (FreeBSD 6.2 i386):
+
+    gmake freebsd
+    rm lua.o luac.o liblua.a
+    gcc -shared -lm -Wl,-E -o liblua.5.1.2.so *.o
+    gcc -O2 -Wall -DLUA_USE_LINUX -lm -Wl,-E -lpthread -lreadline -L. -llua.5.1.2 lua.c -o lua
+
+To compile Lanes, use 'gmake' or simply:
+
+    cc -O2 -Wall -llua.5.1.2 -lpthread -shared -o out/bsd-x86/lua51-lanes.so \
+        -DGLUA_LUA51 gluax.c lanes.c
+
+To place Lua into ~/local, set the following environment variables (this is
+just a reminder for myself...):
+
+    export CPATH=.../local/include
+    export LIBRARY_PATH=.../local/lib
+    export LD_LIBRARY_PATH=.../local/lib
+
+
+(end)
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..7c4c3d8
--- /dev/null
+++ b/TODO
@@ -0,0 +1,4 @@
+
+TODO:
+
+- Testing Lane killing (not cancellation, but actual killing)
diff --git a/dist.cmake b/dist.cmake
new file mode 100644
index 0000000..95928b2
--- /dev/null
+++ b/dist.cmake
@@ -0,0 +1,130 @@
+# LuaDist CMake utility library.
+# Provides variables and utility functions common to LuaDist CMake builds.
+# 
+# Copyright (C) 2007-2010 LuaDist.
+# by David Manura, Peter Drahos
+# Redistribution and use of this file is allowed according to the terms of the MIT license.
+# For details see the COPYRIGHT file distributed with LuaDist.
+# Please note that the package source code is licensed under its own license.
+
+# Few convinence settings
+SET (CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true)
+SET (CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_MODULE_PATH})
+
+# Where to install module parts:
+set(INSTALL_BIN bin CACHE PATH "Where to install binaries to.")
+set(INSTALL_LIB lib CACHE PATH "Where to install libraries to.")
+set(INSTALL_INC include CACHE PATH "Where to install headers to.")
+set(INSTALL_ETC etc CACHE PATH "Where to store configuration files")
+set(INSTALL_LMOD share/lua/lmod CACHE PATH "Directory to install Lua modules.")
+set(INSTALL_CMOD share/lua/cmod CACHE PATH "Directory to install Lua binary modules.")
+set(INSTALL_DATA share/${PROJECT_NAME} CACHE PATH "Directory the package can store documentation, tests or other data in.")
+set(INSTALL_DOC ${INSTALL_DATA}/doc CACHE PATH "Recommended directory to install documentation into.")
+set(INSTALL_EXAMPLE ${INSTALL_DATA}/example CACHE PATH "Recommended directory to install examples into.")
+set(INSTALL_TEST ${INSTALL_DATA}/test CACHE PATH "Recommended directory to install tests into.")
+set(INSTALL_FOO ${INSTALL_DATA}/etc CACHE PATH "Where to install additional files")
+
+
+# In MSVC, prevent warnings that can occur when using standard libraries.
+if(MSVC)
+	add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+endif(MSVC)
+
+# Adds Lua shared library module target `_target`.
+# Additional sources to build the module are listed after `_target`.
+macro(add_lua_module _target)
+	find_package(Lua51 REQUIRED)
+	include_directories(${LUA_INCLUDE_DIR})  #2DO: somehow apply only to _target?
+
+	add_library(${_target} MODULE ${ARGN})
+	set_target_properties(${_target} PROPERTIES PREFIX "")
+	target_link_libraries(${_target} ${LUA_LIBRARY})
+	
+	IF(WIN32)
+		set_target_properties(${_target} PROPERTIES LINK_FLAGS "-Wl,--enable-auto-import")
+	ENDIF()
+
+endmacro(add_lua_module)
+
+# Runs Lua script `_testfile` under CTest tester.
+# Optional argument `_testcurrentdir` is current working directory to run test under
+# (defaults to ${CMAKE_CURRENT_BINARY_DIR}).
+# Both paths, if relative, are relative to ${CMAKE_CURRENT_SOURCE_DIR}.
+# Under LuaDist, set test=true in config.lua to enable testing.
+macro(add_lua_test _testfile)
+	include(CTest)
+	if(BUILD_TESTING)
+		find_program(LUA NAMES lua lua.bat)
+		get_filename_component(TESTFILEABS ${_testfile} ABSOLUTE)
+		get_filename_component(TESTFILENAME ${_testfile} NAME)
+		get_filename_component(TESTFILEBASE ${_testfile} NAME_WE)
+
+		# Write wrapper script.
+		set(TESTWRAPPER ${CMAKE_CURRENT_BINARY_DIR}/${TESTFILENAME})
+		set(TESTWRAPPERSOURCE
+"package.path = '${CMAKE_CURRENT_BINARY_DIR}/?.lua\;${CMAKE_CURRENT_SOURCE_DIR}/?.lua\;' .. package.path
+package.cpath = '${CMAKE_CURRENT_BINARY_DIR}/?.so\;${CMAKE_CURRENT_BINARY_DIR}/?.dll\;' .. package.cpath
+return dofile '${TESTFILEABS}'
+"		)
+		if(${ARGC} GREATER 1)
+			set(_testcurrentdir ${ARGV1})
+			get_filename_component(TESTCURRENTDIRABS ${_testcurrentdir} ABSOLUTE)
+			set(TESTWRAPPERSOURCE
+"require 'lfs'
+lfs.chdir('${TESTCURRENTDIRABS}')
+${TESTWRAPPERSOURCE}")
+		endif()
+		FILE(WRITE ${TESTWRAPPER} ${TESTWRAPPERSOURCE})
+
+		add_test(${TESTFILEBASE} ${LUA} ${TESTWRAPPER})
+	endif(BUILD_TESTING)
+
+	# see also http://gdcm.svn.sourceforge.net/viewvc/gdcm/Sandbox/CMakeModules/UsePythonTest.cmake
+endmacro(add_lua_test)
+
+# Converts Lua source file `_source` to binary string embedded in C source
+# file `_target`.  Optionally compiles Lua source to byte code (not available
+# under LuaJIT2, which doesn't have a bytecode loader).  Additionally, Lua
+# versions of bin2c [1] and luac [2] may be passed respectively as additional
+# arguments.
+#
+# [1] http://lua-users.org/wiki/BinToCee
+# [2] http://lua-users.org/wiki/LuaCompilerInLua
+function(add_lua_bin2c _target _source)
+	find_program(LUA NAMES lua lua.bat)
+	execute_process(COMMAND ${LUA} -e "string.dump(function()end)" RESULT_VARIABLE _LUA_DUMP_RESULT ERROR_QUIET)
+	if (NOT ${_LUA_DUMP_RESULT})
+		SET(HAVE_LUA_DUMP true)
+	endif()
+	message("-- string.dump=${HAVE_LUA_DUMP}")
+
+	if (ARGV2)
+		get_filename_component(BIN2C ${ARGV2} ABSOLUTE)
+		set(BIN2C ${LUA} ${BIN2C})
+	else()
+		find_program(BIN2C NAMES bin2c bin2c.bat)
+	endif()
+	if (HAVE_LUA_DUMP)
+		if (ARGV3)
+			get_filename_component(LUAC ${ARGV3} ABSOLUTE)
+			set(LUAC ${LUA} ${LUAC})
+		else()
+			find_program(LUAC NAMES luac luac.bat)
+		endif()
+	endif (HAVE_LUA_DUMP)
+	message("-- bin2c=${BIN2C}")
+	message("-- luac=${LUAC}")
+
+	get_filename_component(SOURCEABS ${_source} ABSOLUTE)
+	if (HAVE_LUA_DUMP)
+		get_filename_component(SOURCEBASE ${_source} NAME_WE)
+		add_custom_command(
+			OUTPUT  ${_target} DEPENDS ${_source}
+			COMMAND ${LUAC} -o ${CMAKE_CURRENT_BINARY_DIR}/${SOURCEBASE}.lo ${SOURCEABS}
+			COMMAND ${BIN2C} ${CMAKE_CURRENT_BINARY_DIR}/${SOURCEBASE}.lo ">${_target}" )
+	else()
+		add_custom_command(
+			OUTPUT  ${_target} DEPENDS ${SOURCEABS}
+			COMMAND ${BIN2C} ${_source} ">${_target}" )
+	endif()
+endfunction(add_lua_bin2c)
diff --git a/dist.info b/dist.info
new file mode 100644
index 0000000..a6553b0
--- /dev/null
+++ b/dist.info
@@ -0,0 +1,14 @@
+--- This file is part of LuaDist project
+
+name = "lanes"
+version = "2.0.3"
+
+desc = "Lanes is a lightweight, native, lazy evaluating multithreading library for Lua 5.1."
+author = "Asko Kauppi"
+license = "MIT"
+url = "http://luaforge.net/projects/lanes"
+maintainer = "Peter Kapec"
+
+depends = {
+	"lua ~> 5.1"
+}
diff --git a/docs/Lua multithreading choices.graffle b/docs/Lua multithreading choices.graffle
new file mode 100644
index 0000000..2bd4cb4
Binary files /dev/null and b/docs/Lua multithreading choices.graffle differ
diff --git a/docs/Lua multithreading choices.svg b/docs/Lua multithreading choices.svg
new file mode 100644
index 0000000..8a09698
--- /dev/null
+++ b/docs/Lua multithreading choices.svg	
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" viewBox="0 0 813.60004 566.95" width="813.60004pt" height="566.95pt"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>2008-07-25 19:33Z</dc:date><!-- Produced by OmniGraffle Professional 4.2.1 --></metadata><defs><filter id="Shadow" filterUnits="userSpaceOnUse"><feGaussianBlur in="SourceAlpha" result="blur" stdDeviation="3.488"/><feOffset in="blur" result="offset" dx="0" dy="4"/><feFlood flood-color="black" flood-opacity=".75" result="flood"/><feComposite in="flood" in2="offset" operator="in"/></filter><font-face font-family="Helvetica" font-size="18" units-per-em="1000" underline-position="-75.683594" underline-thickness="49.316406" slope="0" x-height="555.55554" cap-height="722.22223" ascent="770.01953" descent="-229.98047" font-weight="500"><!--NSCTFontDescriptor &lt;0x7988130&gt; = {
+    NSFontNameAttribute = Helvetica;
+    NSFontSizeAttribute = 18;
+}--><font-face-src><font-face-name name="Helvetica"/></font-face-src></font-face><marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="black"><g><path d="M 8 0 L 0 -3 L 0 3 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/></g></marker><font-face font-family="Courier" font-size="18" units-per-em="1000" underline-position="-178.22266" underline-thickness="57.617188" slope="0" x-height="444.44446" cap-height="611.11115" ascent="753.90625" descent="-246.09375" font-weight="500"><!--NSCTFontDescriptor &lt;0x79dbbf0&gt; = {
+    NSFontNameAttribute = Courier;
+    NSFontSizeAttribute = 18;
+}--><font-face-src><font-face-name name="Courier"/></font-face-src></font-face><font-face font-family="Helvetica" font-size="16" units-per-em="1000" underline-position="-75.683594" underline-thickness="49.316406" slope="0" x-height="531.25" cap-height="718.75" ascent="770.01953" descent="-229.98047" font-weight="500"><!--NSCTFontDescriptor &lt;0x79dde00&gt; = {
+    NSFontNameAttribute = Helvetica;
+    NSFontSizeAttribute = 16;
+}--><font-face-src><font-face-name name="Helvetica"/></font-face-src></font-face><font-face font-family="Times" font-size="24" units-per-em="1000" underline-position="-75.683594" underline-thickness="49.316406" slope="0" x-height="458.33334" cap-height="666.6667" ascent="750" descent="-250" font-weight="500"><!--NSCTFontDescriptor &lt;0x79df960&gt; = {
+    NSFontNameAttribute = "Times-Roman";
+    NSFontSizeAttribute = 24;
+}--><font-face-src><font-face-name name="Times-Roman"/></font-face-src></font-face></defs><g stroke="none" stroke-opacity="1" stroke-dasharray="none" fill="none" fill-opacity="1"><title>Canvas 1</title><rect fill="white" width="813.60004" height="566.95"/><g><title>Layer 1</title><g><use xl:href="#id4_Graphic" filter="url(#Shadow)"/><use xl:href="#id2_Graphic" filter="url(#Shadow)"/><use xl:href="#id33_Graphic" filter="url(#Shadow)"/><use xl:href="#id36_Graphic" filter="url(#Shadow)"/></g><g id="id4_Graphic"><path d="M 162.15001 63.999992 L 484.84998 63.999992 C 523.0208 63.999992 554 94.02719 554 131.02499 C 554 168.0228 523.0208 198.04999 484.84998 198.04999 L 162.15001 198.04999 C 123.9792 198.04999 93 168.0228 93 131.02499 C 93 94.02719 123.9792 63.999992 162.15001 63.999992" fill="#ffff51"/><path d="M 162.15001 63.999992 L 484.84998 63.999992 C 523.0208 63.999992 554 94.02719 554 131.02499 C 554 168.0228 523.0208 198.04999 484.84998 198.04999 L 162.15001 198.04999 C 123.9792 198.04999 93 168.0228 93 131.02499 C 93 94.02719 123.9792 63.999992 162.15001 63.999992" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(144.10001 120.024994)" fill="black"><tspan font-family="Helvetica" font-size="18" font-weight="500" x="154.878525" y="18" textLength="49.04297">Lanes</tspan></text></g><line x1="67" y1="508" x2="733.09998" y2="508" marker-end="url(#FilledArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><line x1="67" y1="508" x2="67" y2="68.90001" marker-end="url(#FilledArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(613 516)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="118.819336">distributed</tspan></text><text transform="translate(104.9992 516)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="118.819336">shared data</tspan></text><text transform="translate(323.5 516)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="140.42285">isolated data</tspan></text><text transform="translate(21.501301 355) rotate(-90)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="108.01758">coroutines</tspan></text><text transform="translate(21.501297 187.5) rotate(-90)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="108.01758">OS threads</tspan></text><g id="id2_Graphic"><ellipse cx="164.4995" cy="312.5" rx="71.49962" ry="67.025116" fill="white"/><ellipse cx="164.4995" cy="312.5" rx="71.49962" ry="67.025116" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(112.299896 293.50003)" fill="black"><tspan font-family="Helvetica" font-size="16" font-weight="500" x="38.85194" y="15" textLength="26.695312">Lua</tspan><tspan font-family="Helvetica" font-size="16" font-weight="500" x="15.28944" y="34" textLength="73.820312">coroutines</tspan></text></g><text transform="translate(21.501301 504) rotate(-90)" fill="black"><tspan font-family="Courier" font-size="18" font-weight="500" x="0" y="18" textLength="97.21582">core mods</tspan></text><g id="id33_Graphic"><ellipse cx="164.49921" cy="441.975" rx="71.49961" ry="67.02514" fill="#ff7695"/><ellipse cx="164.49921" cy="441.975" rx="71.49961" ry="67.02514" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(112.2996 432.47504)" fill="black"><tspan font-family="Helvetica" font-size="16" font-weight="500" x="13.504284" y="15" textLength="77.390625">LuaThread</tspan></text></g><text transform="translate(518 26)" fill="black"><tspan font-family="Times" font-size="24" font-weight="500" x="0" y="23" textLength="262.58203">Lua multithreading choices</tspan></text><g id="id36_Graphic"><path d="M 345.15002 245.475 L 667.84998 245.475 C 706.0208 245.475 737 275.5022 737 312.5 C 737 349.4978 706.0208 379.525 667.84998 379.525 L 345.15002 379.525 C 306.97919 379.525 276 349.4978 276 312.5 C 276 275.5022 306.97919 245.475 345.15002 245.475" fill="#69b1ff"/><path d="M 345.15002 245.475 L 667.84998 245.475 C 706.0208 245.475 737 275.5022 737 312.5 C 737 349.4978 706.0208 379.525 667.84998 379.525 L 345.15002 379.525 C 306.97919 379.525 276 349.4978 276 312.5 C 276 275.5022 306.97919 245.475 345.15002 245.475" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(327.1 301.5)" fill="black"><tspan font-family="Helvetica" font-size="18" font-weight="500" x="119.8629" y="18" textLength="119.07422">ConcurrentLua</tspan></text></g></g></g></svg>
diff --git a/docs/comparison.html b/docs/comparison.html
new file mode 100644
index 0000000..84ef9ca
--- /dev/null
+++ b/docs/comparison.html
@@ -0,0 +1,297 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<!--
+    Comparison of Lua Lanes with other approaches
+-->
+
+<html>
+<head>
+  <meta name="description" content="Lua Lanes - Comparison" />
+  <meta name="keywords" content="Lua, Library, Multithreading, Threads, Rocks" />
+
+  <title>Lua Lanes - Comparison</title>
+</head>
+
+<body>
+
+<!-- comparisons +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="comparisons">Comparisons to other threading kits</h2>
+
+<p>
+A short comparison of Lanes with other existing Lua multithreading kits.
+</p>
+
+<table><tr><td width=40>
+    <td bgcolor="#ffffe0">
+<pre>
+=============
+  Lua Lanes
+=============
+
+With the introduction of Lindas (Jun-2008), Lua Lanes simplifies its API while
+simultaneously adding more power and speed.
+
+Pros:
+    - regular Lua 5.1 module
+    - completely separate Lua states, one per OS thread
+    - message passing, or shared data using Lindas
+    - no application level locking, ever
+    - scales well, up to 1000's of threads
+    - priorities (-2..+2) for launched threads
+    - threads are cancellable (with complete cleanup)
+    - timeouts on all pending operations
+    - thread contents are given as regular Lua functions; syntax checked early on,
+      syntax highlighting works
+    - standard libraries opened to subthreads can be granually selected
+    - fast stack-to-stack copies, via hidden "keeper states". No serialization needed.
+    - protects calls to 'require', allowing wide compatibility with existing
+      modules (and all, with minor changes)
+
+Cons:
+    - requires OS threads
+    - not utilizing network parallelism (all threads on one CPU)
+
+Sample:
+<<
+  require "lanes"
+  
+  local function calculate(a,b,c)
+    if not a then 
+      error "sample error; propagated to main lane when reading results"
+    end
+    return a+b+c
+  end
+
+  local h1= lanes.new(calculate)(1,2,3)
+  local h2= lanes.new(calculate)(10,20,30)
+  local h3= lanes.new(calculate)(100,200,300)
+
+  print( h1[1], h2[1], h3[1] )     -- pends for the results, or propagates error
+<<
+
+
+==================
+  Lua coroutines    (by Lua authors)
+==================
+
+<A HREF="http://www.lua.org/manual/5.1/manual.html#2.11">http://www.lua.org/manual/5.1/manual.html#2.11</A>
+<A HREF="http://lua-users.org/wiki/CoroutinesTutorial">http://lua-users.org/wiki/CoroutinesTutorial</A>
+
+Lua coroutines is an integral part of Lua 5 itself. It is listed here, since
+it should be the _first_ multitasking mechanism to consider. It can also be
+used together with Lanes, or the other mechanisms listed below.
+
+Coroutines are very usable in provider/consumer situations, allowing you to
+queue Lua functions on an as-needed dataflow basis with each other.
+ 
+Pros:
+    - works with plain Lua (no extensions)
+    - works on any platform
+    - lightweight (no OS level threading or locking involved)
+
+Cons:
+    - co-operative, meaning your code will need to decide, who gets to run
+    - does not utilize multiple CPUs/cores
+
+Sample:
+
+    ..TBD: sample code doing the "child" "parent" output here (see below)..
+
+
+=============
+  LuaThread     (by Diego Nehab)
+=============
+
+<A HREF="http://www.cs.princeton.edu/~diego/professional/luathread/">http://www.cs.princeton.edu/~diego/professional/luathread/</A>
+
+LuaThread provides thread creation, mutexes, condition variables, and inter-
+thread queues to the Lua scripts. It takes a C-kind of approach, where Lua
+globals are shared by the threads running, and need therefore to be guarded
+against multithreading conflicts. 
+
+Whether this is exactly what you want, or whether a more loosely implemented
+multithreading (s.a. Lanes) would be better, is up to you. One can argue that
+a loose implementation is easier for the developer, since no application level
+lockings need to be considered.
+
+Pros:
+    - no marshalling overhead, since threads share the same Lua state
+
+Cons:
+    - requires a modified Lua core
+    - application level locking required
+
+Sample:
+&lt;&lt;
+  local function flood(output, word)
+    while 1 do 
+        output:lock()
+        io.write(word, ", ")
+        output:unlock()
+    end
+  end
+
+  local output = thread.newmutex()
+  thread.newthread(flood, {output, "child"})
+  flood(output, "parent")
+&lt;&lt;
+
+
+=============
+  Lua Rings     (by Roberto Ierusalimschy &amp; Tom&aacute;s Guisasola)
+=============
+
+<A HREF="http://www.keplerproject.org/rings/">http://www.keplerproject.org/rings/</A>
+
+".. library which provides a way to create new Lua states from within Lua. 
+It also offers a simple way to communicate between the creator (master) and 
+the created (slave) states."
+
+".. master can execute code in any of its slaves but each slave only has
+access to its master (or its own slaves)."
+
+Rings offers separate Lua states, but no multithreading. This makes it simple,
+but it won't use more than one CPU core. Other differences include:
+
+    - opens all Lua standard libraries for subthreads
+      (Lanes opens the needed ones)
+
+    - marshalls numbers, strings, booleans, userdata
+      (Lanes marshalls also non-cyclic tables)
+
+    - "remotedostring" allows executing code in the master state
+      (Lanes does _not_ allow subthreads to trouble/modify master automatically,
+      to allow effective sandboxing. The same can be achieved by sending code 
+      between the threads, but master needs to explicitly allow this = receive
+      a function and execute it)
+
+    - offers "Stable, a very simple API to manage a shared table at the master
+      state"
+      (Lanes 2008 offers keeper tables)
+
+Pros:
+    - "offers Stable, a very simple API to manage a shared table at the master 
+    state"
+
+Cons:
+    - thread contents defined as strings, not Lua source as such; does not
+      give syntax check at file parsing, does not allow syntax highlight
+
+Sample:
+&lt;&lt;
+  require"rings"
+  S = rings.new ()
+
+  data = { 12, 13, 14, }
+  print (S:dostring ([[
+  aux = {}
+  for i, v in ipairs (arg) do
+	table.insert (aux, 1, v)
+  end
+  return unpack (aux)]], unpack (data))) -- true, 14, 13, 12
+
+  S:close ()
+&lt;&lt;
+
+
+==========================
+  Helper Threads Toolkit    (by Javier Guerra G.)
+==========================
+
+<A HREF="http://luaforge.net/projects/helper-threads/">http://luaforge.net/projects/helper-threads/</A>
+
+"Provides a consistent framework to write non-blocking C libraries, with a Lua
+interface for starting tasks and managing the Futures, Queues and Threads."
+
+This seems like a companion of the "occasional threading" model (see below);
+Lua side is kept clear of multithreading, while C side can be "spawn" off to
+do things on the background.
+
+Pros:
+    - provides an "update" mechanism, allowing the (C) thread and controlling
+      Lua to interact during execution of the thread
+    - ...
+
+Cons:
+    - thread is "only for C code and it can't touch or access the Lua state",
+      in other words there is no Lua-side multithreading concept (by design)
+
+
+========================
+  Occasional threading      (by Russ Cox)
+========================
+
+<A HREF="http://lua-users.org/lists/lua-l/2006-11/msg00368.html">http://lua-users.org/lists/lua-l/2006-11/msg00368.html</A>
+
+".. able to have certain C calls run in parallel with the [Lua] VM, but
+otherwise keep the VM single-threaded."
+
+That pretty much says it all.
+
+Pros:
+    - simple, yet providing for the "occasional" need to run really multithreaded
+    - can be made into a loadable module (says the message)
+
+Cons:
+    - only for occasional usage, the programming paradigm is still essentially
+      singlethreaded (by definition)
+    - not a real project, just a message on the Lua list (but a good one!)
+    
+
+=================
+  ConcurrentLua 
+=================
+
+<A TARGET="_blank" HREF="http://concurrentlua.luaforge.net/index.html"
+>http://concurrentlua.luaforge.net/index.html</A>
+
+ConcurrentLua is based on the Lua model for concurrency, namely coroutines, and
+extends this model by providing message-passing primitives. 
+
+".. implementation of the share-nothing asynchronous message-passing model"
+
+".. process can check its mailbox for new messages at any time, and if there 
+are any, they can be read in the order of arrival."
+
+".. processes in the system are implemented with Lua coroutines"
+
+".. still based on the cooperative multithreading model that Lua uses"
+
+Recent, released on 21 June 2008.
+
+Pros:
+    - From ground up designed for distributed computing (multiple computers,
+    not only CPU cores)
+    - Does not require a pre-emptive operating system
+
+Cons:
+    - Serialization must degrade raw performance in one-computer scenarios
+      (vs. stack-to-stack copying ala Lanes)
+    - Depends on LuaSocket and Copas modules.
+    - Each thread has a single mailbox tied to it (vs. separating threads and
+      connection resources)
+
+</pre>
+</td></tr></table>
+
+
+<!-- footnotes +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+
+<p>For feedback, questions and suggestions:
+<UL>
+    <li><A HREF="http://luaforge.net/projects/lanes">Lanes @ LuaForge</A></li>
+    <li><A HREF="mailto:akauppi@gmail.com">the author</A></li>
+</UL>
+</p>
+
+<!--
+<font size="-1">
+<p>
+1) ...
+</p>
+</font>
+-->
+
+</body>
+</html>
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..956e691
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,951 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<!--
+    Documentation for Lua Lanes
+-->
+
+<html>
+<head>
+  <meta name="description" content="Lua Lanes - multithreading in Lua" />
+  <meta name="keywords" content="Lua, Library, Multithreading, Threads, Rocks" />
+
+  <title>Lua Lanes - multithreading in Lua</title>
+</head>
+
+<body>
+<div class="header">
+<hr />
+
+<center>
+<table summary="Lua logo">
+  <tbody>
+    <tr>
+      <td align="center">
+      <a href="http://www.lua.org">
+        <img src="http://akauppi.googlepages.com/multi.png" alt="Lua" align="middle" border="0" height="120" width="128" />
+        <img src="http://akauppi.googlepages.com/multi.png" alt="Lua" align="middle" border="0" height="120" width="128" />
+        <img src="http://akauppi.googlepages.com/multi.png" alt="Lua" align="middle" border="0" height="120" width="128" />
+        <img src="http://akauppi.googlepages.com/multi.png" alt="Lua" align="middle" border="0" height="120" width="128" />
+        <img src="http://akauppi.googlepages.com/multi.png" alt="Lua" align="middle" border="0" height="120" width="128" />
+      </a></td>
+    </tr>
+    <tr>
+      <td align="center" valign="top"><h1>Lua Lanes - multithreading in Lua</h1>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
+<p class="bar">
+  <a href="#description">Description</a> &middot;
+  <a href="#systems">Supported systems</a> &middot;
+  <a href="#installing">Building and Installing</a>
+</p><p class="bar">
+  <a href="#creation">Creation</a> &middot;
+  <a href="#status">Status</a> &middot;
+  <a href="#results">Results and errors</a>
+</p><p class="bar">
+  <a href="#cancelling">Cancelling</a> &middot;
+  <a href="#finalizers">Finalizers</a> &middot;
+  <a href="#lindas">Lindas</a> &middot;
+  <a href="#timers">Timers</a> &middot;
+  <a href="#locks">Locks etc.</a>
+</p><p class="bar">
+  <a href="#other">Other issues</a> &middot;
+  <a href="#changes">Change log</a>
+  <!-- ... -->
+
+<p><br/><font size="-1"><i>Copyright &copy; 2007-08 Asko Kauppi. All rights reserved.</i>
+    <br>Lua Lanes is published under the same <A HREF="http://en.wikipedia.org/wiki/MIT_License">MIT license</A> as Lua 5.1.
+    </p><p>This document was revised on 23-Jan-09, and applies to version 2.0.3.
+</font></p>
+
+</center>
+</div>
+
+
+<!-- description +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="description">Description</h2>
+
+<p>Lua Lanes is a Lua extension library providing
+    the possibility to run multiple Lua states in parallel. It is intended to
+    be used for optimizing performance on multicore CPU's and to study ways to make Lua programs naturally parallel to begin with.
+</p><p>
+    Lanes is included into your software by the regular
+    <tt>require "lanes"</tt> method. No C side programming is needed; all APIs are Lua side, and most existing extension modules should
+    work seamlessly together with the multiple lanes.
+</p><p>
+    See <A HREF="comparison.html">comparison</A> of Lua Lanes with other Lua multithreading solutions.
+</p><p>
+    <h3>Features:</h3>
+
+  <ul>
+    <li>Lanes have separated data, by default. Shared data is possible with Linda objects.
+    </li>
+    <li>Communications is separate of threads, using Linda objects.
+    </li>
+    <li>Data passing uses fast inter-state copies (no serialization required)</li>
+    </li>
+    <li>"Deep userdata" concept, for sharing userdata over multiple lanes
+    </li>
+    <li>Millisecond level timers, integrated with the Linda system.
+    </li>
+    <li>Threads can be given priorities -2..+2 (default is 0).
+    </li>
+    <li>Lanes are cancellable, with proper cleanup.
+    </li>
+    <li>No application level locking - ever!
+    </li>
+  </ul>
+
+
+<h3>Limitations:</h3>
+
+  <ul><li>coroutines are not passed between states
+        </li>
+      <li>sharing full userdata between states needs special C side
+          preparations (-&gt; <A HREF="#deep_userdata">deep userdata</A>)
+      </li>
+        <li>network level parallelism not included
+        </li>
+    </ul>
+</p>
+
+
+<!-- systems +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="systems">Supported systems</h2>
+
+<p>Lua Lanes supports the following operating systems:
+
+    <ul>
+        <li>Mac OS X PowerPC / Intel (10.4 and later)</li>
+        <li>Linux x86</li>
+        <li>Windows 2000/XP and later <font size="-1">(MinGW or Visual C++ 2005/2008)</font></li>
+<!--
+    Other OS'es here once people help test them. (and the tester's name)
+    
+    Win64, BSD, Linux x64, Linux embedded, QNX, Solaris, ...
+-->
+    </ul>
+    
+    <p>The underlying threading code can be compiled either towards Win32 API 
+    or <a TARGET="_blank" HREF="http://en.wikipedia.org/wiki/POSIX_Threads">Pthreads</a>. Unfortunately, thread prioritation under Pthreads is a JOKE, 
+    requiring OS specific tweaks and guessing undocumented behaviour. Other
+    features should be portable to any modern platform.
+    </p>
+</p>
+
+
+<!-- installing +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="installing">Building and Installing</h2>
+
+<p>Lua Lanes is built simply by <tt>make</tt> on the supported platforms
+(<tt>make-vc</tt> for Visual C++). See <tt>README</tt> for system specific
+details and limitations.
+</p>
+
+<p>To install Lanes, all you need are the <tt>lanes.lua</tt> and <tt>lua51-lanes.so|dll</tt>
+files to be reachable by Lua (see LUA_PATH, LUA_CPATH). 
+
+Or use <A HREF="http://www.luarocks.org" TARGET="_blank">Lua Rocks</A> package management.
+</p>
+
+<pre>
+  > luarocks search lanes
+    ... output listing Lua Lanes is there ...
+
+  > luarocks install lanes
+    ... output ...
+</pre>
+
+
+<!-- launching +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="creation">Creation</h2>
+
+<p>The following sample shows preparing a function for parallel calling, and
+calling it with varying arguments. Each of the two results is calculated in
+a separate OS thread, parallel to the calling one. Reading the results
+joins the threads, waiting for any results not already there.
+</p>
+
+<table border=1 bgcolor="#FFFFE0" width=500><tr><td>
+<pre>
+  require "lanes"
+
+  f= lanes.gen( function(n) return 2*n end )
+  a= f(1)
+  b= f(2)
+
+  print( a[1], b[1] )     -- 2    4
+</pre>
+</table>
+
+<p>
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>func= lanes.gen( [libs_str | opt_tbl [, ...],] lane_func )
+    <br/><br/>
+    lane_h= func( ... )</code>
+</table>
+</p>
+</p><p>
+    The function returned by <tt>lanes.gen()</tt> is a "generator" for
+    launching any number of lanes. They will share code, options, initial globals,
+    but the particular arguments may vary. Only calling the generator function
+    actually launches a lane, and provides a handle for controlling it.
+<!--
+</p>
+<p>This prepares <tt>lane_func</tt> to be called in parallel. It does not yet start
+anything, but merely returns a <i>generator function</i> that can be called
+any number of times, with varying parameters. Each call will spawn a new lane.
+-->
+</p><p>
+Lanes automatically copies upvalues over to the new lanes, so you
+need not wrap all the required elements into one 'wrapper' function. If
+<tt>lane_func</tt> uses some local values, or local functions, they will be there
+also in the new lanes.
+</p><p>
+    <code>libs_str</code> defines the standard libraries made available to the
+    new Lua state:
+    <table>
+        <tr><td/><td>(nothing)</td><td>no standard libraries (default)</td></tr>
+        <tr><td width=40><td><tt>"base"</tt> or <tt>""</tt></td>
+            <td>root level names, <tt>print</tt>, <tt>assert</tt>, <tt>unpack</tt> etc.</td></tr>
+        <tr><td/><td><tt>"coroutine"</tt></td><td><tt>coroutine.*</tt> namespace <font size="-1">(part of base in Lua 5.1)</font></td></tr>
+        <tr><td/><td><tt>"debug"</tt></td><td><tt>debug.*</tt> namespace</td></tr>
+        <tr><td/><td><tt>"io"</tt></td><td><tt>io.*</tt> namespace</td></tr>
+        <tr><td/><td><tt>"math"</tt></td><td><tt>math.*</tt> namespace</td></tr>
+        <tr><td/><td><tt>"os"</tt></td><td><tt>os.*</tt> namespace</td></tr>
+        <tr><td/><td><tt>"package"</tt></td><td><tt>package.*</tt> namespace and <tt>require</tt></td></tr>
+        <tr><td/><td><tt>"string"</tt></td><td><tt>string.*</tt> namespace</td></tr>
+        <tr><td/><td><tt>"table"</tt></td><td><tt>table.*</tt> namespace</td></tr>
+        <br/>
+        <tr><td/><td><tt>"*"</tt></td><td>all standard libraries</td></tr>
+    </table>
+
+</p><p>
+    Initializing the standard libs takes a bit of time at each lane invocation.
+    This is the main reason why "no libraries" is the default.
+</p><p>
+
+    <code>opt_tbl</code> is a collection of named options to control the way
+    lanes are run:
+</p><p>
+  <table>
+    <tr valign=top><td/><td>
+        <code>.cancelstep</code> <br/><nobr>N / true</nobr></td>
+    <td>
+    By default, lanes are only cancellable when they enter a pending
+    <tt>:receive()</tt> or <tt>:send()</tt> call.
+    With this option, one can set cancellation check to occur every <tt>N</tt>
+    Lua statements. The value <tt>true</tt> uses a default value (100).
+    </td></tr>
+
+    <tr valign=top><td/><td>
+        <code>.globals</code> <br/>globals_tbl</td>
+    <td>
+    Sets the globals table for the launched threads. This can be used for giving
+    them constants.
+    </p><p>
+    The global values of different lanes are in no manner connected;
+    modifying one will only affect the particular lane.
+    </td></tr>
+
+    <tr valign=top><td width=40><td>
+        <code>.priority</code> <br/><nobr>-2..+2</nobr></td>
+        <td>The priority of lanes generated. -2 is lowest, +2 is highest.
+        <p>
+    Implementation and dependability of priorities varies
+    by platform. Especially Linux kernel 2.6 is not supporting priorities in user mode.
+    </td></tr>
+  </table>
+  
+</p>
+
+<h3>Free running lanes</h3>
+
+<p>
+The lane handles are allowed to be 'let loose'; in other words you may execute
+a lane simply by:
+
+<pre>
+    lanes.gen( function() ... end ) ()
+</pre>
+
+Normally, this kind of lanes will be in an eternal loop handling messages.
+Since the lane handle is gone,
+there is no way to control such a lane from the outside, nor read its potential
+return values. Then again, such a lane does not even normally return.
+</p>
+
+
+<!-- status +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="status">Status</h2>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>str= lane_h.status</code>
+</table>
+
+<p>The current execution state of a lane can be read via its <tt>status</tt>
+member, providing one of these values: <sup>(<a href="#2">2</a></sup>
+
+    <table>
+        <tr><td width=40><td><tt>"pending"</tt></td><td>not started, yet</td></tr>
+        <tr><td/><td><tt>"running"</tt></td><td>running</td></tr>
+        <tr><td/><td><tt>"waiting"</tt></td><td>waiting at a Linda <tt>:receive()</tt> or <tt>:send()</tt></td></tr>
+        <tr><td/><td><tt>"done"</tt></td><td>finished executing (results are ready)</td></tr>
+        <tr><td/><td><tt>"error"</tt></td><td>met an error (reading results will propagate it)</td></tr>
+        <tr><td/><td><tt>"cancelled"</tt></td><td>received cancellation and finished itself</td></tr>
+    </table>
+</p><p>
+    This is similar to <tt>coroutine.status</tt>, which has: <tt>"running"</tt> /
+    <tt>"suspended"</tt> / <tt>"normal"</tt> / <tt>"dead"</tt>. Not using the
+    exact same names is intentional.
+</p>
+
+
+<!-- results +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="results">Results and errors</h2>
+
+<p>A lane can be waited upon by simply reading its results. This can be done
+in two ways.
+</p><p>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>[val]= lane_h[1]</code>
+</table>
+<p>
+Makes sure lane has finished, and gives its first (maybe only) return value.
+Other return values will be available in other <tt>lane_h</tt> indices.
+</p><p>
+If the lane ended in an error, it is propagated to master state at this place.
+</p>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>[...]|[nil,err,stack_tbl]= lane_h:join( [timeout_secs] )</code>
+</table>
+<p>
+Waits until the lane finishes, or <tt>timeout</tt> seconds have passed.
+Returns <tt>nil</tt> on timeout, <tt>nil,err,stack_tbl</tt> if the lane hit an error,
+or the return values of the lane. Unlike in reading the results in table
+fashion, errors are not propagated.
+</p><p>
+<tt>stack_tbl</tt> is an array of "&lt;filename&gt;:&lt;line&gt;" strings,
+describing where the error was thrown. Use <tt>table.concat()</tt> to format
+it to your liking (or just ignore it).
+</p><p>
+If you use <tt>:join</tt>, make sure your lane main function returns
+a non-nil value so you can tell timeout and error cases apart from succesful
+return (using the <tt>.status</tt> property may be risky, since it might change
+between a timed out join and the moment you read it).
+</p><p>
+
+<table border=1 bgcolor="#FFFFE0" width=500><tr><td>
+<pre>
+  require "lanes"
+
+  f= lanes.gen( function() error "!!!" end )
+  a= f(1)
+
+  --print( a[1] )   -- propagates error
+
+  v,err= a:join()   -- no propagation
+  if v==nil then
+    error( "'a' faced error"..tostring(err) )   -- manual propagation
+  end
+</pre>
+</table>
+
+
+<!-- cancelling +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="cancelling">Cancelling</h2>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>bool= lane_h:cancel( [timeout_secs=0.0,] [force_kill_bool=false] )</code>
+</table>
+
+<p>Sends a cancellation request to the lane. If <tt>timeout_secs</tt> is non-zero, waits
+for the request to be processed, or a timeout to occur.
+Returns <tt>true</tt> if the lane was already done (in <tt>"done"</tt>, <tt>"error"</tt> or <tt>"cancelled"</tt> status)
+or if the cancellation was fruitful within timeout period.
+</p><p>
+If the lane is still running and <tt>force_kill</tt> is <tt>true</tt>, the 
+OS thread running the lane is forcefully killed. This means no GC, and should
+generally be the last resort.
+</p>
+<p>Cancellation is tested before going to sleep in <tt>receive()</tt> or <tt>send()</tt> calls
+and after executing <tt>cancelstep</tt> Lua statements. A currently pending <tt>receive</tt>
+or <tt>send</tt> call is currently not awakened, and may be a reason for a non-detected cancel.
+</p>
+
+
+<!-- finalizers +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="finalizers">Finalizers</h2>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>set_finalizer( finalizer_func )</code>
+    <br/><br/>
+    <code>void= finalizer_func( [error] )</code>
+</table>
+
+<p>The <tt>error</tt> call is used for throwing exceptions in Lua. What Lua
+does not offer, however, is scoped <a href="http://en.wikipedia.org/wiki/Finalizer">finalizers</a> 
+that would get called when a certain block of instructions gets exited, whether
+through peaceful return or abrupt <tt>error</tt>.
+</p>
+<p>Since 2.0.3, Lanes prepares a function <tt>set_finalizer</tt> for doing this. 
+Any functions given to it will be called in the lane Lua state, just prior to 
+closing it. They are not called in any particular order.
+</p>
+<p>An error in a finalizer itself overrides the state of the regular chunk
+(in practise, it would be highly preferable <i>not</i> to have errors in finalizers). 
+If one finalizer errors, the others may not get called.
+</p>
+
+
+<!-- lindas +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="lindas">Lindas</h2>
+
+<p>Communications between lanes is completely detached from the lane handles
+themselves. By itself, a lane can only provide return values once it's finished,
+or throw an error. Needs to communicate during runtime are handled by <A HREF="http://en.wikipedia.org/wiki/Linda_%28coordination_language%29" TARGET="_blank">Linda objects</A>, which are 
+<A HREF="#deep_userdata">deep userdata</A> instances. They can be provided to a lane
+as startup parameters, upvalues or in some other Linda's message.
+</p><p>
+Access to a Linda object means a lane can read or write to any of its data
+slots. Multiple lanes can be accessing the same Linda in parallel. No application
+level locking is required; each Linda operation is atomic.
+</p><p>
+
+<table border=1 bgcolor="#FFFFE0" width=500><tr><td>
+<pre>
+  require "lanes"
+
+  local linda= lanes.linda()
+
+  local function loop( max )
+    for i=1,max do
+        print( "sending: "..i )
+        linda:send( "x", i )    -- linda as upvalue
+    end
+  end
+  
+  a= lanes.gen("",loop)( 10000 )
+
+  while true do
+    local val= linda:receive( 3.0, "x" )    -- timeout in seconds&nbsp;
+    if val==nil then
+        print( "timed out" )
+        break
+    end
+    print( "received: "..val )
+  end
+</pre>
+</table>
+
+</p>
+<p>Characteristics of the Lanes implementation of Lindas are:
+
+<ul>
+    <li>keys can be of number, string or boolean type
+    </li>
+    <li>values can be any type supported by inter-state copying (same limits
+    as for function parameters and upvalues)
+    </li>
+    <li>consuming method is <tt>:receive</tt> (not in)
+    </li>
+    <li>non-consuming method is <tt>:get</tt> (not rd)
+    </li>
+    <li>two producer-side methods: <tt>:send</tt> and <tt>:set</tt> (not out)
+    </li>
+    <li><tt>send</tt> allows for sending multiple values -atomically- to a
+    given key
+    </li>
+    <li><tt>receive</tt> can wait for multiple keys at once
+    </li>
+    <li>individual keys' queue length can be limited, balancing speed differences
+    in a producer/consumer scenario (making <tt>:send</tt> wait)
+    </li>
+</ul>
+</p>
+
+<p>
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>h= lanes.linda()</code>
+    <br/><br/>
+    <code>bool= h:send( [timeout_secs,] key, ... )</code>
+    <br/>
+    <code>[val, key]= h:receive( [timeout_secs,] key [, ...] )</code>
+    <br/><br/>
+    <code>= h:limit( key, n_uint )</code>
+</table>
+
+<p>The <tt>send</tt> and <tt>receive</tt> methods use Linda keys as FIFO stacks
+(first in, first out). Timeouts are given in seconds (millisecond accuracy).
+If using numbers as the first Linda key, one must explicitly give <tt>nil</tt>
+as the timeout parameter to avoid ambiguities.
+</p><p>
+By default, stack sizes are unlimited but limits can be
+enforced using the <tt>limit</tt> method. This can be useful to balance execution
+speeds in a producer/consumer scenario.
+</p><p>
+Note that any number of lanes can be reading or writing a Linda. There can be
+many producers, and many consumers. It's up to you.
+</p>
+<p><tt>send</tt> returns <tt>true</tt> if the sending succeeded, and <tt>false</tt>
+if the queue limit was met, and the queue did not empty enough during the given
+timeout.
+</p><p>
+Equally, <tt>receive</tt> returns a value and the key that provided the value, 
+or nothing for timeout. Note that <tt>nil</tt>s can be sent and received;
+the <tt>key</tt> value will tell it apart from a timeout.
+</p><p>
+Multiple values can be sent to a given key at once, atomically (the send will
+fail unless all the values fit within the queue limit). This can be useful for
+multiple producer scenarios, if the protocols used are giving data in streams
+of multiple units. Atomicity avoids the producers from garbling each others
+messages, which could happen if the units were sent individually.
+</p><p>
+
+When receiving from multiple slots, the keys are checked in order, which can
+be used for making priority queues.
+</p><p>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>linda_h:set( key, [val] )</code>
+    <br/>
+    <code>[val]= linda_h:get( key )</code>
+</table>
+
+</p><p>
+The table access methods are for accessing a slot without queuing or consuming.
+They can be used for making shared tables of storage among the lanes.
+</p><p>
+Writing to a slot overwrites existing value, and clears any possible queued 
+entries. Table access and <tt>send</tt>/<tt>receive</tt> can be used together; 
+reading a slot essentially peeks the next outcoming value of a queue.
+</p>
+
+<!--
+<p>
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>lightuserdata= linda_h:deep()</code>
+</table>
+
+<p>There is one more method that is not required in applications, but
+discussing it is good for a preview of how deep userdata works.
+</p><p>
+Because proxy objects (<tt>linda_h</tt>) are just pointers to the real, deep
+userdata, they cannot be used to identify a certain Linda from the others.
+The internal timer system needs to do this, and the <tt>:deep()</tt> method
+has been added for its use. It returns a light userdata pointing to the 
+<i>actual</i> deep object, and thus can be used for seeing, which proxies actually
+mean the same underlying object. You might or might not need a similar system
+with your own deep userdata.
+</p>
+-->
+
+
+<h3>Granularity of using Lindas</h3>
+
+<p>A single Linda object provides an infinite number of slots, so why would
+you want to use several?
+</p><p>There are some important reasons:
+
+<ul>
+    <li>Access control. If you don't trust certain code completely, or just
+    to modularize your design, use one Linda for one usage and another one
+    for the other. This keeps your code clear and readable. You can pass
+    multiple Linda handles to a lane with practically no added cost.
+    </li>
+    
+    <li>Namespace control. Linda keys have a "flat" namespace, so collisions
+    are possible if you try to use the same Linda for too many separate uses.
+    </li>
+    
+    <li>Performance. Changing any slot in a Linda causes all pending threads
+    for that Linda to be momentarily awakened (at least in the C level). 
+    This can degrade performance due to unnecessary OS level context switches.
+    </li>
+</ul>
+
+On the other side, you need to use a common Linda for waiting for multiple
+keys. You cannot wait for keys from two separate Linda objects at the same
+time.
+</p><p>
+<font size="-1">Actually, you can. Make separate lanes to wait each, and then multiplex those
+events to a common Linda, but... :).</font>
+</p>
+
+
+<!-- timers +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="timers">Timers</h2>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td>
+    <code>= lanes.timer( linda_h, key, date_tbl|first_secs [,period_secs] )</code>
+</table>
+
+<p>
+Timers can be run once, or in a reoccurring fashion (<tt>period_secs > 0</tt>). 
+The first occurrence can be given either as a date or as a relative delay in seconds. 
+The <tt>date</tt> table is like what <tt>os.date("*t")</tt> returns, in the 
+local time zone.
+</p><p>
+Once a timer expires, the <tt>key</tt> is set with the current time
+(in seconds, same offset as <tt>os.time()</tt> but with millisecond accuracy). 
+The key can be waited upon using the regular Linda <tt>:receive()</tt>
+method.
+</p><p>
+A timer can be stopped simply by <tt>first_secs=0</tt> and no period.
+</p><p>
+
+<table border=1 bgcolor="#FFFFE0" width=500><tr><td>
+<pre>
+  require "lanes"
+
+  local linda= lanes.linda()
+
+  -- First timer once a second, not synchronized to wall clock
+  --
+  lanes.timer( linda, "sec", 1, 1 )
+
+  -- Timer to a future event (next even minute); wall clock synchronized&nbsp;
+  --
+  local t= os.date( "*t", os.time()+60 )    -- now + 1min
+  t.sec= 0
+
+  lanes.timer( linda, "min", t, 60 )   -- reoccur every minute (sharp)
+    
+  while true do
+    local v,key= linda:receive( "sec", "min" )
+    print( "Timer "..key..": "..v )
+  end  
+</pre>
+</table>
+
+</p><p>
+NOTE: Timer keys are set, not queued, so missing a beat is possible especially
+if the timer cycle is extremely small. The key value can be used to know the 
+actual time passed.
+</p><p>
+<table>
+    <tr><td valign=top><nobr><i>Design note:</i></nobr>&nbsp;</td>
+        <td>
+<font size="-1">
+Having the API as <tt>lanes.timer()</tt> is intentional. Another
+alternative would be <tt>linda_h:timer()</tt> but timers are not traditionally
+seen to be part of Lindas. Also, it would mean any lane getting a Linda handle
+would be able to modify timers on it. A third choice could
+be abstracting the timers out of Linda realm altogether (<tt>timer_h= lanes.timer( date|first_secs, period_secs )</tt>)
+but that would mean separate waiting functions for timers, and lindas. Even if
+a linda object and key was returned, that key couldn't be waited upon simultaneously
+with one's general linda events.
+The current system gives maximum capabilities with minimum API, and any smoothenings
+can easily be crafted in Lua at the application level.
+</font>
+        </td>
+    </tr>
+</table>
+</p>
+
+
+<!-- locks +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="locks">Locks etc.</h2>
+
+<p>
+Lanes does not generally require locks or critical sections to be used, at all.
+If necessary, a limited queue can be used to emulate them. <tt>lanes.lua</tt>
+offers some sugar to make it easy:
+</p><p>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td><pre>
+  lock_func= lanes.genlock( linda_h, key [,N_uint=1] )
+
+  lock_func( M_uint )     -- acquire
+    ..
+  lock_func( -M_uint )    -- release
+</table>
+</p><p>
+
+The generated function acquires M entries from the N available, or releases
+them if the value is negative. The acquiring call will suspend the lane, if necessary.
+Use <tt>M=N=1</tt> for a critical section lock (only one lane allowed to enter).
+</p><p>
+
+Note: The locks generated are <u>not recursive</u>. That would need another
+kind of generator, which is currently not implemented.
+</p><p>
+
+Similar sugar exists for atomic counters:
+</p><p>
+
+<table border=1 bgcolor="#E0E0FF" cellpadding=10><tr><td><pre>
+  atomic_func= lanes.genatomic( linda_h, key [,initial_num=0.0] )
+
+  new_num= atomic_func( [diff_num=+1.0] )
+</table>
+</p><p>
+
+Each time called, the generated function will change <tt>linda[key]</tt> 
+atomically, without other lanes being able to interfere. The new value is
+returned. You can use either <tt>diff 0.0</tt> or <tt>get</tt> to just read the current
+value.
+</p><p>
+
+Note that the generated functions can be passed on to other lanes.
+</p>
+
+
+<!-- others +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="other">Other issues</h2>
+
+<h3>Limitations on data passing</h3>
+
+<p>Data passed between lanes (either as starting parameters, return values, upvalues or via Lindas) must conform to the following:
+</p>
+<p><ul>
+	<li>Booleans, numbers, strings, light userdata, Lua functions and tables of such can always be passed.
+	</li>
+	<li>Cyclic tables and/or duplicate references are allowed and reproduced appropriately, 
+	but only <u>within the same transmission</u>.
+	   <ul>
+	       <li>using the same source table in multiple Linda messages keeps no ties between the tables
+	       </li>
+	   </ul>
+    </li>
+    <li>Objects (tables with a metatable) are copyable between lanes.
+        <ul>
+            <li>metatables are assumed to be immutable; they are internally indexed and only copied once
+            per each type of objects per lane
+            </li>
+        </ul>
+    </li>
+    <li>C functions (<tt>lua_CFunction</tt>) referring to <tt>LUA_ENVIRONINDEX</tt> or <tt>LUA_REGISTRYINDEX</tt> might not
+    work right in the target
+        <ul>
+            <li>rather completely re-initialize a module with <tt>require</tt> in the target lane
+            </li>
+        </ul>
+    </li>
+    <li>Full userdata can be passed only if it's prepared using the <A HREF="#deep_userdata">deep userdata</A>
+        system, which handles its lifespan management
+        <ul>
+            <li>in particular, lane handles cannot be passed between lanes
+            </li>
+        </ul>
+    </li>
+    <li>coroutines cannot be passed
+    </li>
+</ul>
+</p>
+
+
+<h3>Required of module makers</h3>
+
+<p>
+Most Lua extension modules should work unaltered with Lanes.
+If the module simply ties C side features to Lua, everything is fine without
+alterations. The <tt>luaopen_...()</tt> entry point will be called separately for each
+lane, where the module is <tt>require</tt>'d from.
+</p><p>
+If it, however, also does one-time C side initializations, these
+should be covered into a one-time-only construct such as below.
+</p><p>
+
+<table><tr><td width=40>
+    <td bgcolor="#ffffe0">
+<pre>
+ int luaopen_module( lua_State *L )
+ {
+    static char been_here;  /* 0 by ANSI C */
+    
+    /* Calls to 'require' serialized by Lanes; this is safe.&nbsp;&nbsp;
+    */
+    if (!been_here) {
+        been_here= 1;
+        ... one time initializations ...
+    }
+    
+    ... binding to Lua ...
+ }
+</pre>
+</td></tr></table>
+</p>
+
+
+<h3 id="shared_userdata">Deep userdata in your own apps</h3>
+
+<p>
+The mechanism Lanes uses for sharing Linda handles between separate Lua states
+can be used for custom userdata as well. Here's what to do.
+</p>
+<ol>
+    <li>Provide an <i>identity function</i> for your userdata, in C. This function is
+used for creation and deletion of your deep userdata (the shared resource),
+and for making metatables for the state-specific proxies for accessing it.
+Take a look at <tt>linda_id</tt> in <tt>lanes.c</tt>.
+    </li>
+    <li>Create your userdata using <tt>luaG_deep_userdata()</tt>, which is
+    a Lua-callable function. Given an <tt>idfunc</tt>, it sets up the support
+    structures and returns a state-specific proxy userdata for accessing your
+    data. This proxy can also be copied over to other lanes.
+    </li>
+    <li>Accessing the deep userdata from your C code, use <tt>luaG_todeep()</tt>
+    instead of the regular <tt>lua_touserdata()</tt>.
+    </li>
+</ol>
+
+<p>Deep userdata management will take care of tying to <tt>__gc</tt> methods,
+and doing reference counting to see how many proxies are still there for 
+accessing the data. Once there are none, the data will be freed through a call
+to the <tt>idfunc</tt> you provided.
+</p>
+<p><b>NOTE</b>: The lifespan of deep userdata may exceed that of the Lua state
+that created it. The allocation of the data storage should not be tied to
+the Lua state used. In other words, use <tt>malloc</tt>/<tt>free</tt> or
+similar memory handling mechanism.
+</p>
+
+
+<h3>Lane handles don't travel</h3>
+
+<p>
+Lane handles are not implemented as deep userdata, and cannot thus be
+copied across lanes. This is intentional; problems would occur at least when
+multiple lanes were to wait upon one to get ready. Also, it is a matter of
+design simplicity.
+</p><p>
+The same benefits can be achieved by having a single worker lane spawn all
+the sublanes, and keep track of them. Communications to and from this lane
+can be handled via a Linda.
+</p>
+
+
+<h3>Beware with print and file output</h3>
+
+<p>
+In multithreaded scenarios, giving multiple parameters to <tt>print()</tt>
+or <tt>file:write()</tt> may cause them to be overlapped in the output,
+something like this:
+
+<pre>
+  A:  print( 1, 2, 3, 4 )
+  B:  print( 'a', 'b', 'c', 'd' )
+  
+  1   a   b   2   3   c   d   4
+</pre>
+
+Lanes does not protect you from this behaviour. The thing to do is either to
+concentrate your output to a certain lane per stream, or to concatenate output
+into a single string before you call the output function.
+</p>
+
+
+<h3 id="performance">Performance considerations</h3>
+
+<p>
+Lanes is about making multithreading easy, and natural in the Lua state of mind.
+Expect performance not to be an issue, if your program is logically built.
+Here are some things one should consider, if best performance is vital:
+</p><p>
+<ul>
+    <li>Data passing (parameters, upvalues, Linda messages) is generally fast,
+    doing two binary state-to-state copies (from source state to hidden state,
+    hidden state to target state). Remember that not only the function you 
+    specify but also its upvalues, their upvalues, etc. etc. will get copied.
+    </li>
+    <li>Lane startup is fast (1000's of lanes a second), depending on the
+    number of standard libraries initialized. Initializing all standard libraries
+    is about 3-4 times slower than having no standard libraries at all. If you
+    throw in a lot of lanes per second, make sure you give them minimal necessary
+    set of libraries.
+    </li>
+    <li>Waiting Lindas are woken up (and execute some hidden Lua code) each
+    time <u>any</u> key in the Lindas they are waiting for are changed. This
+    may give essential slow-down (not measured, just a gut feeling) if a lot
+    of Linda keys are used. Using separate Linda objects for logically separate
+    issues will help (which is good practise anyhow).
+    </li>
+    <li>Linda objects are light. The memory footprint is two OS-level signalling
+    objects (<tt>HANDLE</tt> or <tt>pthread_cond_t</tt>) for each, plus one
+    C pointer for the proxies per each Lua state using the Linda. Barely nothing.
+    </li>
+    <li>Timers are light. You can probably expect timers up to 0.01 second
+    resolution to be useful, but that is very system specific. All timers are
+    merged into one main timer state (see <tt>timer.lua</tt>); no OS side
+    timers are utilized.
+    </li>
+    <li>Lindas are hashed to a fixed number of "keeper states", which are a locking entity. 
+    If you are using a lot of Linda objects,
+    it may be useful to try having more of these keeper states. By default,
+    only one is used (see <tt>KEEPER_STATES_N</tt>), but this is an implementation detail.
+    </li>
+</ul>
+</p>
+
+
+<h3 id="cancelling_cancel">Cancelling cancel</h3>
+
+<p>
+Cancellation of lanes uses the Lua error mechanism with a special lightuserdata
+error sentinel. 
+If you use <tt>pcall</tt> in code that needs to be cancellable
+from the outside, the special error might not get through to Lanes, thus
+preventing the Lane from being cleanly cancelled. You should throw any
+lightuserdata error further.
+</p><p>
+This system can actually be used by application to detect cancel, do your own
+cancellation duties, and pass on the error so Lanes will get it. If it does
+not get a clean cancellation from a lane in due time,
+it may forcefully kill the lane.
+</p><p>
+The sentinel is exposed as <tt>lanes.cancel_error</tt>, if you wish to use
+its actual value.
+</p>
+
+
+
+<!-- change log +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+<h2 id="changes">Change log</h2>
+
+<p>
+Jan-2009 (2.0.3):
+<ul>
+  <li>Added 'finalizer' to lane options. (TBD: not implemented yet!)
+  </li>
+  <li>Added call stack to errors coming from a lane.
+  </li>
+</ul>
+
+Jul-2008 (2.0):
+<ul>
+  <li>Too many changes to list (you'll need to re-read this manual)
+  </li>
+</ul>
+</p>
+
+<!-- footnotes +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<hr/>
+
+<p>For feedback, questions and suggestions:
+<UL>
+    <li><A HREF="http://luaforge.net/projects/lanes">Lanes @ LuaForge</A></li>
+    <li><A HREF="mailto:akauppi@gmail.com">the author</A></li>
+</UL>
+</p>
+
+<p><br/></p>
+
+</body>
+</html>
diff --git a/docs/multi.png b/docs/multi.png
new file mode 100644
index 0000000..f527aff
Binary files /dev/null and b/docs/multi.png differ
diff --git a/docs/performance.ods b/docs/performance.ods
new file mode 100644
index 0000000..541cc8e
Binary files /dev/null and b/docs/performance.ods differ
diff --git a/make-vc.cmd b/make-vc.cmd
new file mode 100644
index 0000000..2b4a7f6
--- /dev/null
+++ b/make-vc.cmd
@@ -0,0 +1,274 @@
+@REM
+@REM make-vc.cmd to build Lanes on Visual C++ 2005/08
+@REM
+@REM Requires:  Windows XP or later (cmd.exe)
+@REM            Visual C++ 2005/2008 (Express)
+@REM            LuaBinaries 5.1.3 or Lua for Windows 5.1.3
+@REM
+
+@setlocal
+@set LUA_PATH=src\?.lua;tests\?.lua
+
+@if not "%LUA51%"=="" (
+  @goto LUA_OK
+)
+
+@REM *** Lua for Windows >=5.1.3.14 (%LUA_DEV%) ***
+@REM
+@if exist "%LUA_DEV%\lua.exe" (
+  @set LUA51=%LUA_DEV%
+  @goto LUA_OK
+)
+
+@REM *** Lua for Windows (default path) ***
+@REM
+@if exist "%ProgramFiles%\Lua\5.1\lua.exe" (
+  @set LUA51=%ProgramFiles:~0,2%\Progra~1\Lua\5.1
+  @goto LUA_OK
+)
+
+@REM *** LuaBinaries (default path) ***
+@REM
+@if exist "%ProgramFiles%\Lua5.1\lua5.1.exe" (
+  @set LUA51=%ProgramFiles:~0,2%\Progra~1\Lua5.1
+  @goto LUA_OK
+)
+
+goto ERR_NOLUA
+:LUA_OK
+
+@REM ---
+@REM %LUA_EXE% = %LUA51%\lua[5.1].exe
+@REM %LUAC_EXE% = %LUA51%\luac[5.1].exe
+@REM %LUA_LIB% = %LUA51%[\lib]
+@REM ---
+
+@set LUA_EXE=%LUA51%\lua5.1.exe
+@if exist "%LUA_EXE%" goto LUA_EXE_OK
+@set LUA_EXE=%LUA51%\lua.exe
+@if exist "%LUA_EXE%" goto LUA_EXE_OK
+@echo "Cannot find %LUA51%\lua[5.1].exe
+@goto EXIT
+:LUA_EXE_OK
+
+@set LUAC_EXE=%LUA51%\luac5.1.exe
+@if exist "%LUAC_EXE%" goto LUAC_EXE_OK
+@set LUAC_EXE=%LUA51%\luac.exe
+@if exist "%LUAC_EXE%" goto LUAC_EXE_OK
+@echo "Cannot find %LUA51%\luac[5.1].exe
+@goto EXIT
+:LUAC_EXE_OK
+
+
+@if "%1"=="" goto BUILD
+@if "%1"=="clean" goto CLEAN
+@if "%1"=="test" goto TEST
+@if "%1"=="launchtest" goto LAUNCHTEST
+@if "%1"=="perftest" goto PERFTEST
+@if "%1"=="perftest-plain" goto PERFTEST-PLAIN
+@if "%1"=="stress" goto STRESS
+@if "%1"=="basic" goto BASIC
+@if "%1"=="fifo" goto FIFO
+@if "%1"=="keeper" goto KEEPER
+@if "%1"=="atomic" goto ATOMIC
+@if "%1"=="cyclic" goto CYCLIC
+@if "%1"=="timer" goto TIMER
+@if "%1"=="recursive" goto RECURSIVE
+@if "%1"=="fibonacci" goto FIBONACCI
+@if "%1"=="hangtest" goto HANGTEST
+@if "%1"=="require" goto REQUIRE
+
+@echo Unknown target: %1
+@echo.
+@goto EXIT
+
+:BUILD
+@REM LuaBinaries: 
+@REM 	The current build system does not show 'lua51-lanes.dll' to
+@REM 	be dependent on more than 'KERNEL32.DLL'. Good.
+@REM
+@REM Lua for Windows:
+@REM    Depends on KERNEL32.DLL and LUA5.1.DLL. Good?
+
+@set LUA_LIB=%LUA51%
+@if exist "%LUA_LIB%\lua5.1.lib" (
+  @echo.
+  @echo ***
+  @echo *** Using Lua from: %LUA51%
+  @echo ***
+  @echo.
+  @goto LUA_LIB_OK
+)
+
+@set LUA_LIB=%LUA51%\lib
+@if exist "%LUA_LIB%\lua5.1.lib" (
+  @echo.
+  @echo ***
+  @echo *** Using Lua from: %LUA51%
+  @echo ***
+  @echo.
+  @goto LUA_LIB_OK
+)
+@echo Cannot find %LUA51%\[lib\]lua5.1.lib
+@goto EXIT
+:LUA_LIB_OK
+
+@REM
+@REM Precompile src/.lua -> .lch
+@REM 
+@REM Note: we cannot use piping in Windows since we need binary output.
+@REM 
+"%LUAC_EXE%" -o delme src/keeper.lua
+"%LUA_EXE%" tools/bin2c.lua -o src/keeper.lch delme
+@del delme
+
+@if "%VCINSTALLDIR%"=="" goto ERR_NOVC
+
+@REM
+@REM Win32 (Visual C++ 2005/08 Express) build commands
+@REM
+@REM MS itself has warnings in stdlib.h (4255), winbase.h (4668), several (4820, 4826)
+@REM 4054: "type cast from function pointer to data pointer"
+@REM 4127: "conditional expression is constant"
+@REM 4711: ".. selected for automatic inline expansion"
+@REM
+@set WARN=/Wall /wd4054 /wd4127 /wd4255 /wd4668 /wd4711 /wd4820 /wd4826
+
+@REM /LDd: debug DLL
+@REM /O2 /LD: release DLL
+@REM
+@set FLAGS=/O2 /LD
+
+cl %WARN% %FLAGS% /I "%LUA51%\include" /Felua51-lanes.dll src\*.c "%LUA_LIB%\lua5.1.lib"
+@REM cl %WARN% %FLAGS% /I "%LUA51%\include" /Felua51-lanes.dll src\*.c "%LUA_LIB%\lua5.1.lib" /link /NODEFAULTLIB:libcmt
+
+@del lua51-lanes.lib
+@del lua51-lanes.exp
+@goto EXIT
+
+:CLEAN
+if exist *.dll del *.dll
+if exist delme del delme
+@goto EXIT
+
+:TEST
+@REM "make test" does not automatically build/update the dll. We're NOT a makefile. :!
+@REM
+"%LUA_EXE%" tests\basic.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\fifo.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\keeper.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\fibonacci.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\timer.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\atomic.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\cyclic.lua
+@IF errorlevel 1 goto EXIT
+
+"%LUA_EXE%" tests\recursive.lua
+@IF errorlevel 1 goto EXIT
+
+@goto EXIT
+
+:BASIC
+"%LUA_EXE%" tests\basic.lua
+@goto EXIT
+
+:FIFO
+"%LUA_EXE%" tests\fifo.lua
+@goto EXIT
+
+:KEEPER
+"%LUA_EXE%" tests\keeper.lua
+@goto EXIT
+
+:ATOMIC
+"%LUA_EXE%" tests\atomic.lua
+@goto EXIT
+
+:CYCLIC
+"%LUA_EXE%" tests\cyclic.lua
+@goto EXIT
+
+:TIMER
+"%LUA_EXE%" tests\timer.lua
+@goto EXIT
+
+:RECURSIVE
+"%LUA_EXE%" tests\recursive.lua
+@goto EXIT
+
+:FIBONACCI
+"%LUA_EXE%" tests\fibonacci.lua
+@goto EXIT
+
+:HANGTEST
+"%LUA_EXE%" tests\hangtest.lua
+@goto EXIT
+
+:REQUIRE
+"%LUA_EXE%" -e "require'lanes'"
+@goto EXIT
+
+REM ---
+REM NOTE: 'timeit' is a funny thing; it does _not_ work with quoted
+REM long paths, but it _does_ work without the quotes. I have no idea,
+REM how it knows the spaces in paths apart from spaces in between
+REM parameters.
+
+:LAUNCHTEST
+timeit %LUA_EXE% tests\launchtest.lua %2 %3 %4
+@goto EXIT
+
+:PERFTEST
+timeit %LUA_EXE% tests\perftest.lua %2 %3 %4
+@goto EXIT
+
+:PERFTEST-PLAIN
+timeit %LUA_EXE% tests\perftest.lua --plain %2 %3 %4
+@goto EXIT
+
+:STRESS
+"%LUA_EXE%" tests\test.lua
+"%LUA_EXE%" tests\perftest.lua 100
+"%LUA_EXE%" tests\perftest.lua 50 -prio=-1,0
+"%LUA_EXE%" tests\perftest.lua 50 -prio=0,-1
+"%LUA_EXE%" tests\perftest.lua 50 -prio=0,2
+"%LUA_EXE%" tests\perftest.lua 50 -prio=2,0
+
+@echo All seems okay!
+@goto EXIT
+
+REM ---
+:ERR_NOLUA
+@echo ***
+@echo *** Please set LUA51 to point to either LuaBinaries or
+@echo *** Lua for Windows directory.
+@echo ***
+@echo *** http://luabinaries.luaforge.net/download.html
+@echo ***	lua5_1_2_Win32_dll8_lib
+@echo ***	lua5_1_2_Win32_bin
+@echo ***
+@echo *** http://luaforge.net/frs/?group_id=377&release_id=1138
+@echo ***
+@echo.
+@goto EXIT
+
+:ERR_NOVC
+@echo ***
+@echo *** VCINSTALLDIR not defined; please run 'setup-vc'
+@echo ***
+@echo.
+@goto EXIT
+
+:EXIT
diff --git a/setup-vc.cmd b/setup-vc.cmd
new file mode 100644
index 0000000..e93262e
--- /dev/null
+++ b/setup-vc.cmd
@@ -0,0 +1,90 @@
+@echo off
+REM
+REM Setting up command line to use Visual C++ 2005/2008 Express
+REM
+REM Visual C++ 2005:
+REM 	VCINSTALLDIR=C:\Program Files\Microsoft Visual Studio 8\VC
+REM 	VS80COMNTOOLS=C:\Program Files\Microsoft Visual Studio 8\Common7\Tools\
+REM 	VSINSTALLDIR=C:\Program Files\Microsoft Visual Studio 8
+REM
+REM Visual C++ 2008:
+REM 	VCINSTALLDIR=C:\Program Files\Microsoft Visual Studio 9.0\VC
+REM 	VS90COMNTOOLS=C:\Program Files\Microsoft Visual Studio 9.0\Common7\Tools\
+REM 	VSINSTALLDIR=C:\Program Files\Microsoft Visual Studio 9.0
+REM
+
+REM Test for VC++2005 FIRST, because it is the norm with Lua 5.1.4
+REM LuaBinaries and LfW. All prebuilt modules and lua.exe are built
+REM with it.
+REM
+set VSINSTALLDIR=C:\Program Files\Microsoft Visual Studio 8
+if not exist "%VSINSTALLDIR%\VC\vcvarsall.bat" goto TRY_VC9
+
+REM Win32 headers must be separately downloaded for VC++2005
+REM (VC++2008 SP1 carries an SDK with it)
+REM
+set _SDK=C:\Program Files\Microsoft Platform SDK for Windows Server 2003 R2\SetEnv.cmd
+if not exist "%_SDK%" goto ERR_NOSDK
+call "%_SDK%"
+goto FOUND_VC
+
+:TRY_VC9
+set VSINSTALLDIR=C:\Program Files\Microsoft Visual Studio 9.0
+if not exist "%VSINSTALLDIR%\VC\vcvarsall.bat" goto ERR_NOVC
+
+echo.
+echo *** Warning: Visual C++ 2008 in use ***
+echo.
+echo Using VC++2005 is recommended for runtime compatibility issues
+echo (LuaBinaries and LfW use it; if you compile everything from
+echo scratch, ignore this message)
+echo.
+
+:FOUND_VC
+set VCINSTALLDIR=%VSINSTALLDIR%\vc
+
+REM vcvars.bat sets the following values right:
+REM
+REM PATH=...
+REM INCLUDE=%VCINSTALLDIR%\ATLMFC\INCLUDE;%VCINSTALLDIR%\INCLUDE;%VCINSTALLDIR%\PlatformSDK\include;%FrameworkSDKDir%\include;%INCLUDE%
+REM LIB=%VCINSTALLDIR%\ATLMFC\LIB;%VCINSTALLDIR%\LIB;%VCINSTALLDIR%\PlatformSDK\lib;%FrameworkSDKDir%\lib;%LIB%
+REM LIBPATH=%FrameworkDir%\%FrameworkVersion%;%VCINSTALLDIR%\ATLMFC\LIB
+REM
+call "%VSINSTALLDIR%\VC\vcvarsall.bat"
+
+REM 'timeit.exe' is part of the MS Server Res Kit Tools (needed for "make perftest")
+REM
+set _RESKIT=C:\Program Files\Windows Resource Kits\Tools\
+if not exist "%_RESKIT%\timeit.exe" goto WARN_NOTIMEIT
+PATH=%PATH%;%_RESKIT%
+goto EXIT
+
+:WARN_NOTIMEIT
+echo.
+echo ** WARNING: Windows Server 2003 Resource Kit Tools - not detected
+echo             You will need the 'timeit' utility to run 'make perftest'
+echo             http://www.microsoft.com/downloads/details.aspx?familyid=9D467A69-57FF-4AE7-96EE-B18C4790CFFD
+echo.
+goto EXIT
+
+REM ---
+:ERR_NOVC
+echo.
+echo ** ERROR: Visual C++ 2005/08 Express - not detected
+echo           You can set the environment variables separately, and run 'make-vc.cmd'
+echo           or download the compiler from:
+echo           http://msdn.microsoft.com/vstudio/express/downloads/
+echo.
+goto EXIT
+
+:ERR_NOSDK
+echo.
+echo ** ERROR: Windows Server 2003 Platform SDK - not detected
+echo           You will need the core API's of it to compile Win32 applications.
+echo           http://www.microsoft.com/downloads/details.aspx?familyid=0BAF2B35-C656-4969-ACE8-E4C0C0716ADB
+echo.
+goto EXIT
+
+:EXIT
+set _SDK=
+set _RESKIT=
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..a17e9cd
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,176 @@
+#
+# Lanes/src/Makefile
+#
+#   make [LUA=... LUAC=...]                                   Manual build
+#   make LUAROCKS=1 CFLAGS=... LIBFLAG=... LUA=... LUAC=...   LuaRocks automated build
+#
+
+MODULE=lanes
+
+SRC=lanes.c threading.c tools.c
+
+OBJ=$(SRC:.c=.o)
+
+# LuaRocks gives 'LIBFLAG' from the outside
+#
+LIBFLAG=-shared
+
+OPT_FLAGS=-O2
+    # -O0 -g
+
+LUA=lua
+LUAC=luac
+
+_SO=.so
+ifeq "$(findstring MINGW32,$(shell uname -s))" "MINGW32"
+  _SO=.dll
+endif
+
+ifeq "$(LUAROCKS)" ""
+  ifeq "$(findstring MINGW32,$(shell uname -s))" "MINGW32"
+    # MinGW MSYS on Windows
+    #
+    # - 'lua' and 'luac' expected to be on the path
+    # - %LUA_DEV% must lead to include files and libraries (Lua for Windows >= 5.1.3.14)
+    # - %MSCVR80% must be the full pathname of 'msvcr80.dll'
+    #
+    ifeq "$(LUA_DEV)" ""
+      $(error LUA_DEV not defined - try i.e. 'make LUA_DEV=/c/Program\ Files/Lua/5.1')
+    endif
+    ifeq "$(MSVCR80)" ""
+      MSVCR80:=$(LUA_DEV)/install/support/Microsoft.VC80.CRT.SP1/MSVCR80.DLL
+      ifneq '$(shell test -f "$(MSVCR80)" && echo found)' 'found'
+        $(error MSVCR80 not defined - set it to full path of msvcr80.dll')
+      endif
+      $(warning MSVCR80=$(MSVCR80))
+    endif
+    LUA_FLAGS:=-I "$(LUA_DEV)/include"
+    LUA_LIBS:="$(LUA_DEV)/lua5.1.dll" -lgcc -lmsvcr80 "$(MSVCR80)"
+    LIBFLAG=-shared -Wl,-Map,lanes.map
+  else
+    # Autodetect LUA_FLAGS and/or LUA_LIBS
+    #
+    ifneq "$(shell which pkg-config)" ""
+      ifeq "$(shell pkg-config --exists lua5.1 && echo 1)" "1"
+        LUA_FLAGS:=$(shell pkg-config --cflags lua5.1)
+        LUA_LIBS:=$(shell pkg-config --libs lua5.1)
+          #
+          # Ubuntu: -I/usr/include/lua5.1 
+          #         -llua5.1
+      else
+        ifeq "$(shell pkg-config --exists lua && echo 1)" "1"
+          LUA_FLAGS:=$(shell pkg-config --cflags lua)
+          LUA_LIBS:=$(shell pkg-config --libs lua)
+            #
+            # OS X fink with pkg-config:
+            #      -I/sw/include 
+            #      -L/sw/lib -llua -lm
+        else
+          $(warning *** 'pkg-config' existed but did not know of 'lua[5.1]' - Good luck!)
+          LUA_FLAGS:=
+          LUA_LIBS:=-llua
+        endif
+      endif
+    else
+      # No 'pkg-config'; try defaults
+      #
+      ifeq "$(shell uname -s)" "Darwin"
+        $(warning *** Assuming 'fink' at default path)
+        LUA_FLAGS:=-I/sw/include
+        LUA_LIBS:=-L/sw/lib -llua
+      else
+        $(warning *** Assuming an arbitrary Lua installation; try installing 'pkg-config')
+        LUA_FLAGS:=
+        LUA_LIBS:=-llua
+      endif
+    endif
+  endif
+
+  ifeq "$(shell uname -s)" "Darwin"
+    # Some machines need 'MACOSX_DEPLOYMENT_TARGET=10.3' for using '-undefined dynamic_lookup'
+    # (at least PowerPC running 10.4.11); does not harm the others
+    #
+    CC = MACOSX_DEPLOYMENT_TARGET=10.3 gcc
+    LIBFLAG = -bundle -undefined dynamic_lookup
+  endif
+  
+  CFLAGS=-Wall -Werror $(OPT_FLAGS) $(LUA_FLAGS)
+  LIBS=$(LUA_LIBS)
+endif
+
+#---
+# PThread platform specifics
+#
+ifeq "$(shell uname -s)" "Linux"
+  # -D_GNU_SOURCE needed for 'pthread_mutexattr_settype'
+  CFLAGS += -D_GNU_SOURCE -fPIC
+
+  # Use of -DUSE_PTHREAD_TIMEDJOIN is possible, but not recommended (slower & keeps threads
+  # unreleased somewhat longer)
+  #CFLAGS += -DUSE_PTHREAD_TIMEDJOIN
+
+  LIBS += -lpthread
+endif
+
+ifeq "$(shell uname -s)" "BSD"
+  LIBS += -lpthread
+endif
+
+#---
+all: lua51-$(MODULE)$(_SO)
+
+%.o: %.c *.h Makefile
+
+# Note: Don't put $(LUA_LIBS) ahead of $^; MSYS will not like that (I think)
+#
+lua51-$(MODULE)$(_SO): $(OBJ)
+	$(CC) $(LIBFLAG) $(LIBS) $^ $(LUA_LIBS) -o $@
+
+clean:
+	-rm -rf lua51-$(MODULE)$(_SO) *.lch *.o *.tmp *.map
+
+lanes.o: keeper.lch
+
+# Note: 'luac -o -' could be used on systems other than Windows (where pipes
+#       are binary). We need to support MinGW as well, so a temporary file.
+#
+%.lch: %.lua
+	$(LUAC) -o $@.tmp $<
+	$(LUA) ../tools/bin2c.lua $@.tmp -o $@
+	-rm $@.tmp
+
+#---
+# NSLU2 "slug" Linux ARM
+#
+nslu2:
+	$(MAKE) all CFLAGS="$(CFLAGS) -I/opt/include -L/opt/lib -D_GNU_SOURCE -lpthread"
+
+#---
+# Cross compiling to Win32 (MinGW on OS X Intel)
+#
+# Point WIN32_LUA51 to an extraction of LuaBinaries dll8 and dev packages.
+#
+# Note: Only works on platforms with same endianess (i.e. not from PowerPC OS X,
+#       since 'luac' uses the host endianess)
+#
+# EXPERIMENTAL; NOT TESTED OF LATE.
+#
+MINGW_GCC=mingw32-gcc
+    # i686-pc-mingw32-gcc
+
+win32: $(WIN32_LUA51)/include/lua.h
+	$(MAKE) build CC=$(MINGW_GCC) \
+            LUA_FLAGS=-I$(WIN32_LUA51)/include \
+            LUA_LIBS="-L$(WIN32_LUA51) -llua51" \
+            _SO=.dll \
+            SO_FLAGS=-shared \
+            LUA=lua51 \
+            LUAC=luac51
+
+$(WIN32_LUA51)/include/lua.h:
+	@echo "Usage: make win32 WIN32_LUA51=<path of extracted LuaBinaries dll8 and dev packages>"
+	@echo "                  [MINGW_GCC=...mingw32-gcc]"
+	@false
+
+.PROXY:	all clean nslu2 win32
+
diff --git a/src/keeper.lua b/src/keeper.lua
new file mode 100644
index 0000000..f76173b
--- /dev/null
+++ b/src/keeper.lua
@@ -0,0 +1,244 @@
+--
+-- KEEPER.LUA
+--
+-- Keeper state logic
+--
+-- This code is read in for each "keeper state", which are the hidden, inter-
+-- mediate data stores used by Lanes inter-state communication objects.
+--
+-- Author: Asko Kauppi <akauppi@gmail.com>
+--
+--[[
+===============================================================================
+
+Copyright (C) 2008 Asko Kauppi <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+]]--
+
+-- unique key instead of 'nil' in queues
+--
+assert( nil_sentinel )
+
+-- We only need to have base and table libraries (and io for debugging)
+--
+local table_remove= assert( table.remove )
+local table_concat= assert( table.concat )
+
+local function WR(...)
+    if io then 
+        io.stderr:write( table_concat({...},'\t').."\n" ) 
+    end
+end
+
+-----
+-- Actual data store
+--
+-- { [linda_deep_ud]= { key= val [, ...] }
+--      ...
+-- }
+--
+local _data= {}
+
+-----
+-- Entries queued for use when the existing 'data[ud][key]' entry is consumed.
+--
+-- { [linda_deep_ud]= { key= { val [, ... } [, ...] }
+--      ...
+-- }
+--
+local _incoming= {}
+
+-----
+-- Length limits (if any) for queues
+--
+-- 0:   don't queue values at all; ':send()' waits if the slot is not vacant
+-- N:   allow N values to be queued (slot itself + N-1); wait if full
+-- nil: no limits, '_incoming' may grow endlessly
+--
+local _limits= {}
+
+-----
+-- data_tbl, incoming_tbl, limits_tbl = tables( linda_deep_ud )
+--
+-- Gives appropriate tables for a certain Linda (creates them if needed)
+--
+local function tables( ud )
+    -- tables are created either all or nothing
+    --
+    if not _data[ud] then
+        _data[ud]= {}
+        _incoming[ud]= {}
+        _limits[ud]= {}
+    end
+    return _data[ud], _incoming[ud], _limits[ud]
+end
+
+
+local function DEBUG(title,ud,key)
+    assert( title and ud and key )
+
+    local data,incoming,_= tables(ud)
+
+    local s= tostring(data[key])
+    for _,v in ipairs( incoming[key] or {} ) do
+        s= s..", "..tostring(v)
+    end
+    WR( "*** "..title.." ("..tostring(key).."): ", s )
+end
+
+
+-----
+-- bool= send( linda_deep_ud, key, ... )
+--
+-- Send new data (1..N) to 'key' slot. This send is atomic; all the values
+-- end up one after each other (this is why having possibility for sending
+-- multiple values in one call is deemed important).
+--
+-- If the queue has a limit, values are sent only if all of them fit in.
+--
+-- Returns: 'true' if all the values were placed
+--          'false' if sending would exceed the queue limit (wait & retry)
+--
+function send( ud, key, ... )
+
+    local data,incoming,limits= tables(ud)
+
+    local n= select('#',...)
+    if n==0 then return true end    -- nothing to send
+
+    -- Initialize queue for all keys that have been used with ':send()'
+    --
+    if incoming[key]==nil then
+        incoming[key]= {}
+    end
+
+    local len= data[key] and 1+#incoming[key] or 0
+    local m= limits[key]
+
+    if m and len+n > m then
+        return false    -- would exceed the limit; try again later
+    end
+
+    for i=1,n do
+        local val= select(i,...)
+
+        -- 'nil' in the data replaced by sentinel
+        if val==nil then
+            val= nil_sentinel
+        end
+
+        if len==0 then
+            data[key]= val
+            len= 1
+        else
+            incoming[key][len]= val
+            len= len+1
+        end
+    end
+    return true
+end
+
+
+-----
+-- [val, key]= receive( linda_deep_ud, key [, ...] )
+--
+-- Read any of the given keys, consuming the data found. Keys are read in
+-- order.
+--
+function receive( ud, ... )
+
+    local data,incoming,_= tables(ud)
+
+    for i=1,select('#',...) do
+        local key= select(i,...)
+        local val= data[key]
+
+        if val~=nil then
+            if incoming[key] and incoming[key][1]~=nil then
+                -- pop [1] from 'incoming[key]' into the actual slot
+                data[key]= table_remove( incoming[key], 1 )
+            else
+                data[key]= nil  -- empty the slot
+            end
+            if val==nil_sentinel then
+                val= nil
+            end
+            return val, key
+        end
+    end
+    --return nil
+end
+
+
+-----
+-- = limit( linda_deep_ud, key, uint )
+--
+function limit( ud, key, n )
+
+    local _,_,limits= tables(ud)
+
+    limits[key]= n
+end
+
+
+-----
+-- void= set( linda_deep_ud, key, [val] )
+--
+function set( ud, key, val )
+
+    local data,incoming,_= tables(ud)
+
+    -- Setting a key to 'nil' really clears it; only queing uses sentinels.
+    --
+    data[key]= val
+    incoming[key]= nil
+end
+
+
+-----
+-- [val]= get( linda_deep_ud, key )
+--
+function get( ud, key )
+
+    local data,_,_= tables(ud)
+
+    local val= data[key]
+    if val==nil_sentinel then
+        val= nil
+    end
+    return val
+end
+
+
+-----
+-- void= clear( linda_deep_ud )
+--
+-- Clear the data structures used for a Linda (at its destructor)
+--
+function clear( ud )
+
+    _data[ud]= nil
+    _incoming[ud]= nil
+    _limits[ud]= nil
+end
+
+
diff --git a/src/lanes.c b/src/lanes.c
new file mode 100644
index 0000000..9b36e4d
--- /dev/null
+++ b/src/lanes.c
@@ -0,0 +1,1849 @@
+/*
+ * LANES.C   	                          Copyright (c) 2007-08, Asko Kauppi
+ *
+ * Multithreading in Lua.
+ * 
+ * History:
+ *      20-Oct-08 (2.0.2): Added closing of free-running threads, but it does
+ *                  not seem to eliminate the occasional segfaults at process
+ *                  exit.
+ *          ...
+ *      24-Jun-08 .. 14-Aug-08 AKa: Major revise, Lanes 2008 version (2.0 rc1)
+ *          ...
+ *      18-Sep-06 AKa: Started the module.
+ *
+ * Platforms (tested internally):
+ *      OS X (10.5.4 PowerPC/Intel)
+ *      Linux x86 (Ubuntu 8.04)
+ *      Win32 (Windows XP Home SP2, Visual C++ 2005/2008 Express)
+ *      PocketPC (TBD)
+ *
+ * Platforms (tested externally):
+ *      Win32 (MSYS) by Ross Berteig.
+ *
+ * Platforms (testers appreciated):
+ *      Win64 - should work???
+ *      Linux x64 - should work
+ *      FreeBSD - should work
+ *      QNX - porting shouldn't be hard
+ *      Sun Solaris - porting shouldn't be hard
+ *
+ * References:
+ *      "Porting multithreaded applications from Win32 to Mac OS X":
+ *      <http://developer.apple.com/macosx/multithreadedprogramming.html>
+ *
+ *      Pthreads:
+ *      <http://vergil.chemistry.gatech.edu/resources/programming/threads.html>
+ *
+ *      MSDN: <http://msdn2.microsoft.com/en-us/library/ms686679.aspx>
+ *
+ *      <http://ridiculousfish.com/blog/archives/2007/02/17/barrier>
+ *
+ * Defines:
+ *      -DLINUX_SCHED_RR: all threads are lifted to SCHED_RR category, to
+ *          allow negative priorities (-2,-1) be used. Even without this,
+ *          using priorities will require 'sudo' privileges on Linux.
+ *
+ *		-DUSE_PTHREAD_TIMEDJOIN: use 'pthread_timedjoin_np()' for waiting
+ *          for threads with a timeout. This changes the thread cleanup
+ *          mechanism slightly (cleans up at the join, not once the thread
+ *          has finished). May or may not be a good idea to use it.
+ *          Available only in selected operating systems (Linux).
+ *
+ * Bugs:
+ *
+ * To-do:
+ *
+ *      ...
+ */
+
+const char *VERSION= "2.0.3";
+
+/*
+===============================================================================
+
+Copyright (C) 2007-08 Asko Kauppi <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+*/
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "threading.h"
+#include "tools.h"
+
+#if !((defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC))
+# include <sys/time.h>
+#endif
+
+/* geteuid() */
+#ifdef PLATFORM_LINUX
+# include <unistd.h>
+# include <sys/types.h>
+#endif
+
+/* The selected number is not optimal; needs to be tested. Even using just
+* one keeper state may be good enough (depends on the number of Lindas used
+* in the applications).
+*/
+#define KEEPER_STATES_N 1   // 6
+
+/* Do you want full call stacks, or just the line where the error happened?
+*
+* TBD: The full stack feature does not seem to work (try 'make error').
+*/
+#define ERROR_FULL_STACK
+
+#ifdef ERROR_FULL_STACK
+# define STACK_TRACE_KEY ((void*)lane_error)     // used as registry key
+#endif
+
+/*
+* Lua code for the keeper states (baked in)
+*/
+static char keeper_chunk[]= 
+#include "keeper.lch"
+
+struct s_lane;
+static bool_t cancel_test( lua_State *L );
+static void cancel_error( lua_State *L );
+
+#define CANCEL_TEST_KEY ((void*)cancel_test)    // used as registry key
+#define CANCEL_ERROR ((void*)cancel_error)      // 'cancel_error' sentinel
+
+/*
+* registry[FINALIZER_REG_KEY] is either nil (no finalizers) or a table
+* of functions that Lanes will call after the executing 'pcall' has ended.
+*
+* We're NOT using the GC system for finalizer mainly because providing the
+* error (and maybe stack trace) parameters to the finalizer functions would
+* anyways complicate that approach.
+*/
+#define FINALIZER_REG_KEY ((void*)LG_set_finalizer)
+
+struct s_Linda;
+
+#if 1
+# define DEBUG_SIGNAL( msg, signal_ref ) /* */
+#else
+# define DEBUG_SIGNAL( msg, signal_ref ) \
+    { int i; unsigned char *ptr; char buf[999]; \
+      sprintf( buf, ">>> " msg ": %p\t", (signal_ref) ); \
+      ptr= (unsigned char *)signal_ref; \
+      for( i=0; i<sizeof(*signal_ref); i++ ) { \
+        sprintf( strchr(buf,'\0'), "%02x %c ", ptr[i], ptr[i] ); \
+      } \
+      fprintf( stderr, "%s\n", buf ); \
+    }
+#endif
+
+static bool_t thread_cancel( struct s_lane *s, double secs, bool_t force );
+
+
+/*
+* Push a table stored in registry onto Lua stack.
+*
+* If there is no existing table, create one if 'create' is TRUE.
+* 
+* Returns: TRUE if a table was pushed
+*          FALSE if no table found, not created, and nothing pushed
+*/
+static bool_t push_registry_table( lua_State *L, void *key, bool_t create ) {
+
+    STACK_GROW(L,3);
+    
+    lua_pushlightuserdata( L, key );
+    lua_gettable( L, LUA_REGISTRYINDEX );
+    
+    if (lua_isnil(L,-1)) {
+        lua_pop(L,1);
+
+        if (!create) return FALSE;  // nothing pushed
+
+        lua_newtable(L);
+        lua_pushlightuserdata( L, key );
+        lua_pushvalue(L,-2);    // duplicate of the table
+        lua_settable( L, LUA_REGISTRYINDEX );
+        
+        // [-1]: table that's also bound in registry
+    }
+    return TRUE;    // table pushed
+}
+
+
+/*---=== Serialize require ===---
+*/
+
+static MUTEX_T require_cs;
+
+//---
+// [val]= new_require( ... )
+//
+// Call 'old_require' but only one lane at a time.
+//
+// Upvalues: [1]: original 'require' function
+//
+static int new_require( lua_State *L ) {
+    int rc;
+    int args= lua_gettop(L);
+
+  STACK_GROW(L,1);
+  STACK_CHECK(L)
+    
+    // Using 'lua_pcall()' to catch errors; otherwise a failing 'require' would
+    // leave us locked, blocking any future 'require' calls from other lanes.
+    //
+    MUTEX_LOCK( &require_cs );
+    {
+        lua_pushvalue( L, lua_upvalueindex(1) );
+        lua_insert( L, 1 );
+
+        rc= lua_pcall( L, args, 1 /*retvals*/, 0 /*errfunc*/ );
+            //
+            // LUA_ERRRUN / LUA_ERRMEM
+    }
+    MUTEX_UNLOCK( &require_cs );
+
+    if (rc) lua_error(L);   // error message already at [-1]
+
+  STACK_END(L,0)
+    return 1;
+}
+
+/*
+* Serialize calls to 'require', if it exists
+*/
+static 
+void serialize_require( lua_State *L ) {
+
+  STACK_GROW(L,1);  
+  STACK_CHECK(L)
+    
+    // Check 'require' is there; if not, do nothing
+    //
+    lua_getglobal( L, "require" );
+    if (lua_isfunction( L, -1 )) {
+        // [-1]: original 'require' function
+
+        lua_pushcclosure( L, new_require, 1 /*upvalues*/ );
+        lua_setglobal( L, "require" );
+
+    } else {
+        // [-1]: nil
+        lua_pop(L,1);
+    }
+
+  STACK_END(L,0)
+}
+
+
+/*---=== Keeper states ===---
+*/
+
+/*
+* Pool of keeper states
+*
+* Access to keeper states is locked (only one OS thread at a time) so the 
+* bigger the pool, the less chances of unnecessary waits. Lindas map to the
+* keepers randomly, by a hash.
+*/
+struct s_Keeper {
+    MUTEX_T lock_;
+    lua_State *L;
+} keeper[ KEEPER_STATES_N ];
+
+/* We could use an empty table in 'keeper.lua' as the sentinel, but maybe
+* checking for a lightuserdata is faster.
+*/
+static bool_t nil_sentinel;
+
+/*
+* Initialize keeper states
+*
+* If there is a problem, return an error message (NULL for okay).
+*
+* Note: Any problems would be design flaws; the created Lua state is left
+*       unclosed, because it does not really matter. In production code, this
+*       function never fails.
+*/
+static const char *init_keepers(void) {
+    unsigned int i;
+    for( i=0; i<KEEPER_STATES_N; i++ ) {
+        
+        // Initialize Keeper states with bare minimum of libs (those required
+        // by 'keeper.lua')
+        //
+        lua_State *L= luaL_newstate();
+        if (!L) return "out of memory";
+
+        luaG_openlibs( L, "io,table" );     // 'io' for debugging messages
+
+        lua_pushlightuserdata( L, &nil_sentinel );
+        lua_setglobal( L, "nil_sentinel" );
+
+        // Read in the preloaded chunk (and run it)
+        //
+        if (luaL_loadbuffer( L, keeper_chunk, sizeof(keeper_chunk), "=lanes_keeper" ))
+            return "luaL_loadbuffer() failed";   // LUA_ERRMEM
+
+        if (lua_pcall( L, 0 /*args*/, 0 /*results*/, 0 /*errfunc*/ )) {
+            // LUA_ERRRUN / LUA_ERRMEM / LUA_ERRERR
+            //
+            const char *err= lua_tostring(L,-1);
+            assert(err);
+            return err;
+        }
+
+        MUTEX_INIT( &keeper[i].lock_ );
+        keeper[i].L= L;
+    }
+    return NULL;    // ok
+}
+
+static 
+struct s_Keeper *keeper_acquire( const void *ptr ) {
+    /*
+    * Any hashing will do that maps pointers to 0..KEEPER_STATES_N-1 
+    * consistently.
+    *
+    * Pointers are often aligned by 8 or so - ignore the low order bits
+    */
+    unsigned int i= ((unsigned long)(ptr) >> 3) % KEEPER_STATES_N;
+    struct s_Keeper *K= &keeper[i];
+
+    MUTEX_LOCK( &K->lock_ );
+    return K;
+}
+
+static 
+void keeper_release( struct s_Keeper *K ) {
+    MUTEX_UNLOCK( &K->lock_ );
+}
+
+/*
+* Call a function ('func_name') in the keeper state, and pass on the returned
+* values to 'L'.
+*
+* 'linda':          deep Linda pointer (used only as a unique table key, first parameter)
+* 'starting_index': first of the rest of parameters (none if 0)
+*
+* Returns:  number of return values (pushed to 'L')
+*/
+static
+int keeper_call( lua_State* K, const char *func_name, 
+                  lua_State *L, struct s_Linda *linda, uint_t starting_index ) {
+
+    int args= starting_index ? (lua_gettop(L) - starting_index +1) : 0;
+    int Ktos= lua_gettop(K);
+    int retvals;
+
+    lua_getglobal( K, func_name );
+    ASSERT_L( lua_isfunction(K,-1) );
+
+    STACK_GROW( K, 1 );
+    lua_pushlightuserdata( K, linda );
+
+    luaG_inter_copy( L,K, args );   // L->K
+    lua_call( K, 1+args, LUA_MULTRET );
+
+    retvals= lua_gettop(K) - Ktos;
+
+    luaG_inter_move( K,L, retvals );    // K->L
+    return retvals;
+}
+
+
+/*---=== Linda ===---
+*/
+
+/*
+* Actual data is kept within a keeper state, which is hashed by the 's_Linda'
+* pointer (which is same to all userdatas pointing to it).
+*/
+struct s_Linda {
+    SIGNAL_T read_happened;
+    SIGNAL_T write_happened;
+};
+
+static int LG_linda_id( lua_State* );
+
+#define lua_toLinda(L,n) ((struct s_Linda *)luaG_todeep( L, LG_linda_id, n ))
+
+
+/*
+* bool= linda_send( linda_ud, [timeout_secs=-1,] key_num|str|bool|lightuserdata, ... )
+*
+* Send one or more values to a Linda. If there is a limit, all values must fit.
+*
+* Returns:  'true' if the value was queued
+*           'false' for timeout (only happens when the queue size is limited)
+*/
+LUAG_FUNC( linda_send ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+    bool_t ret;
+    bool_t cancel= FALSE;
+    struct s_Keeper *K;
+    time_d timeout= -1.0;
+    uint_t key_i= 2;    // index of first key, if timeout not there
+
+    if (lua_isnumber(L,2)) {
+        timeout= SIGNAL_TIMEOUT_PREPARE( lua_tonumber(L,2) );
+        key_i++;
+    } else if (lua_isnil(L,2))
+        key_i++;
+
+    if (lua_isnil(L,key_i))
+        luaL_error( L, "nil key" );
+
+    STACK_GROW(L,1);
+
+    K= keeper_acquire( linda );
+    {
+        lua_State *KL= K->L;    // need to do this for 'STACK_CHECK'
+STACK_CHECK(KL)
+        while(TRUE) {
+            int pushed;
+        
+STACK_MID(KL,0)
+            pushed= keeper_call( K->L, "send", L, linda, key_i );
+            ASSERT_L( pushed==1 );
+        
+            ret= lua_toboolean(L,-1);
+            lua_pop(L,1);
+        
+            if (ret) {
+                // Wake up ALL waiting threads
+                //
+                SIGNAL_ALL( &linda->write_happened );
+                break;
+
+            } else if (timeout==0.0) {
+                break;  /* no wait; instant timeout */
+
+            } else {
+                /* limit faced; push until timeout */
+                    
+                cancel= cancel_test( L );   // testing here causes no delays
+                if (cancel) break;
+
+                // K lock will be released for the duration of wait and re-acquired
+                //
+                if (!SIGNAL_WAIT( &linda->read_happened, &K->lock_, timeout ))
+                    break;  // timeout
+            }
+        }
+STACK_END(KL,0)
+    }
+    keeper_release(K);
+
+    if (cancel)
+        cancel_error(L);
+    
+    lua_pushboolean( L, ret );
+    return 1;
+}
+
+
+/*
+* [val, key]= linda_receive( linda_ud, [timeout_secs_num=-1], key_num|str|bool|lightuserdata [, ...] )
+*
+* Receive a value from Linda, consuming it.
+*
+* Returns:  value received (which is consumed from the slot)
+*           key which had it
+*/
+LUAG_FUNC( linda_receive ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+    int pushed;
+    bool_t cancel= FALSE;
+    struct s_Keeper *K;
+    time_d timeout= -1.0;
+    uint_t key_i= 2;
+
+    if (lua_isnumber(L,2)) {
+        timeout= SIGNAL_TIMEOUT_PREPARE( lua_tonumber(L,2) );
+        key_i++;
+    } else if (lua_isnil(L,2))
+        key_i++;
+
+    K= keeper_acquire( linda );
+    {
+        while(TRUE) {
+            pushed= keeper_call( K->L, "receive", L, linda, key_i );
+            if (pushed) {
+                ASSERT_L( pushed==2 );
+
+                // To be done from within the 'K' locking area
+                //
+                SIGNAL_ALL( &linda->read_happened );
+                break;
+
+            } else if (timeout==0.0) {
+                break;  /* instant timeout */
+
+            } else {    /* nothing received; wait until timeout */
+    
+                cancel= cancel_test( L );   // testing here causes no delays
+                if (cancel) break;
+
+                // Release the K lock for the duration of wait, and re-acquire
+                //
+                if (!SIGNAL_WAIT( &linda->write_happened, &K->lock_, timeout ))
+                    break;
+            }
+        }
+    }
+    keeper_release(K);
+
+    if (cancel)
+        cancel_error(L);
+
+    return pushed;
+}
+
+
+/*
+* = linda_set( linda_ud, key_num|str|bool|lightuserdata [,value] )
+*
+* Set a value to Linda.
+*
+* Existing slot value is replaced, and possible queue entries removed.
+*/
+LUAG_FUNC( linda_set ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+    bool_t has_value= !lua_isnil(L,3);
+
+    struct s_Keeper *K= keeper_acquire( linda );
+    {
+        int pushed= keeper_call( K->L, "set", L, linda, 2 );
+        ASSERT_L( pushed==0 );
+
+        /* Set the signal from within 'K' locking.
+        */
+        if (has_value) {
+            SIGNAL_ALL( &linda->write_happened );
+        }
+    }
+    keeper_release(K);
+
+    return 0;
+}
+
+
+/*
+* [val]= linda_get( linda_ud, key_num|str|bool|lightuserdata )
+*
+* Get a value from Linda.
+*/
+LUAG_FUNC( linda_get ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+    int pushed;
+
+    struct s_Keeper *K= keeper_acquire( linda );
+    {
+        pushed= keeper_call( K->L, "get", L, linda, 2 );
+        ASSERT_L( pushed==0 || pushed==1 );
+    }
+    keeper_release(K);
+
+    return pushed;
+}
+
+
+/*
+* = linda_limit( linda_ud, key_num|str|bool|lightuserdata, uint [, ...] )
+*
+* Set limits to 1 or more Linda keys.
+*/
+LUAG_FUNC( linda_limit ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+
+    struct s_Keeper *K= keeper_acquire( linda );
+    {
+        int pushed= keeper_call( K->L, "limit", L, linda, 2 );
+        ASSERT_L( pushed==0 );
+    }
+    keeper_release(K);
+
+    return 0;
+}
+
+
+/*
+* lightuserdata= linda_deep( linda_ud )
+*
+* Return the 'deep' userdata pointer, identifying the Linda.
+*
+* This is needed for using Lindas as key indices (timer system needs it);
+* separately created proxies of the same underlying deep object will have
+* different userdata and won't be known to be essentially the same deep one
+* without this.
+*/
+LUAG_FUNC( linda_deep ) {
+    struct s_Linda *linda= lua_toLinda( L, 1 );
+    lua_pushlightuserdata( L, linda );      // just the address
+    return 1;
+}
+
+
+/*
+* Identity function of a shared userdata object.
+* 
+*   lightuserdata= linda_id( "new" [, ...] )
+*   = linda_id( "delete", lightuserdata )
+*
+* Creation and cleanup of actual 'deep' objects. 'luaG_...' will wrap them into
+* regular userdata proxies, per each state using the deep data.
+*
+*   tbl= linda_id( "metatable" )
+*
+* Returns a metatable for the proxy objects ('__gc' method not needed; will
+* be added by 'luaG_...')
+*
+*   = linda_id( str, ... )
+*
+* For any other strings, the ID function must not react at all. This allows
+* future extensions of the system. 
+*/
+LUAG_FUNC( linda_id ) {
+    const char *which= lua_tostring(L,1);
+
+    if (strcmp( which, "new" )==0) {
+        struct s_Linda *s;
+
+        /* We don't use any parameters, but one could (they're at [2..TOS])
+        */
+        ASSERT_L( lua_gettop(L)==1 );
+
+        /* The deep data is allocated separately of Lua stack; we might no
+        * longer be around when last reference to it is being released.
+        * One can use any memory allocation scheme.
+        */
+        s= (struct s_Linda *) malloc( sizeof(struct s_Linda) );
+        ASSERT_L(s);
+
+        SIGNAL_INIT( &s->read_happened );
+        SIGNAL_INIT( &s->write_happened );
+
+        lua_pushlightuserdata( L, s );
+        return 1;
+
+    } else if (strcmp( which, "delete" )==0) {
+        struct s_Keeper *K;
+        struct s_Linda *s= lua_touserdata(L,2);
+        ASSERT_L(s);
+
+        /* Clean associated structures in the keeper state.
+        */
+        K= keeper_acquire(s);
+        {
+            keeper_call( K->L, "clear", L, s, 0 );
+        }
+        keeper_release(K);
+
+        /* There aren't any lanes waiting on these lindas, since all proxies
+        * have been gc'ed. Right?
+        */
+        SIGNAL_FREE( &s->read_happened );
+        SIGNAL_FREE( &s->write_happened );
+        free(s);
+
+        return 0;
+
+    } else if (strcmp( which, "metatable" )==0) {
+
+      STACK_CHECK(L)
+        lua_newtable(L);
+        lua_newtable(L);
+            //
+            // [-2]: linda metatable
+            // [-1]: metatable's to-be .__index table
+    
+        lua_pushcfunction( L, LG_linda_send );
+        lua_setfield( L, -2, "send" );
+    
+        lua_pushcfunction( L, LG_linda_receive );
+        lua_setfield( L, -2, "receive" );
+    
+        lua_pushcfunction( L, LG_linda_limit );
+        lua_setfield( L, -2, "limit" );
+
+        lua_pushcfunction( L, LG_linda_set );
+        lua_setfield( L, -2, "set" );
+    
+        lua_pushcfunction( L, LG_linda_get );
+        lua_setfield( L, -2, "get" );
+
+        lua_pushcfunction( L, LG_linda_deep );
+        lua_setfield( L, -2, "deep" );
+
+        lua_setfield( L, -2, "__index" );
+      STACK_END(L,1)
+    
+        return 1;
+    }
+    
+    return 0;   // unknown request, be quiet
+}
+
+
+/*---=== Finalizer ===---
+*/
+
+//---
+// void= finalizer( finalizer_func )
+//
+// finalizer_func( [err, stack_tbl] )
+//
+// Add a function that will be called when exiting the lane, either via
+// normal return or an error.
+//
+LUAG_FUNC( set_finalizer )
+{
+    STACK_GROW(L,3);
+    
+    // Get the current finalizer table (if any)
+    //
+    push_registry_table( L, FINALIZER_REG_KEY, TRUE /*do create if none*/ );
+
+    lua_pushinteger( L, lua_objlen(L,-1)+1 );
+    lua_pushvalue( L, 1 );  // copy of the function
+    lua_settable( L, -3 );
+    
+    lua_pop(L,1);
+    return 0;
+}
+
+
+//---
+// Run finalizers - if any - with the given parameters
+//
+// If 'rc' is nonzero, error message and stack index are available as:
+//      [-1]: stack trace (table)
+//      [-2]: error message (any type)
+//
+// Returns:
+//      0 if finalizers were run without error (or there were none)
+//      LUA_ERRxxx return code if any of the finalizers failed
+//
+// TBD: should we add stack trace on failing finalizer, wouldn't be hard..
+//
+static int run_finalizers( lua_State *L, int lua_rc )
+{
+    unsigned error_index, tbl_index;
+    unsigned n;
+    int rc= 0;
+    
+    if (!push_registry_table(L, FINALIZER_REG_KEY, FALSE /*don't create one*/))
+        return 0;   // no finalizers
+
+    tbl_index= lua_gettop(L);
+    error_index= (lua_rc!=0) ? tbl_index-1 : 0;   // absolute indices
+
+    STACK_GROW(L,4);
+
+    // [-1]: { func [, ...] }
+    //
+    for( n= lua_objlen(L,-1); n>0; n-- ) {
+        unsigned args= 0;
+        lua_pushinteger( L,n );
+        lua_gettable( L, -2 );
+        
+        // [-1]: function
+        // [-2]: finalizers table
+
+        if (error_index) {
+            lua_pushvalue( L, error_index );
+            lua_pushvalue( L, error_index+1 );  // stack trace
+            args= 2;
+        }
+
+        rc= lua_pcall( L, args, 0 /*retvals*/, 0 /*no errfunc*/ );
+            //
+            // LUA_ERRRUN / LUA_ERRMEM
+    
+        if (rc!=0) {
+            // [-1]: error message
+            //
+            // If one finalizer fails, don't run the others. Return this
+            // as the 'real' error, preceding that we could have had (or not)
+            // from the actual code.
+            //
+            break;
+        }
+    }
+    
+    lua_remove(L,tbl_index);   // take finalizer table out of stack
+
+    return rc;
+}
+
+
+/*---=== Threads ===---
+*/
+
+// NOTE: values to be changed by either thread, during execution, without
+//       locking, are marked "volatile"
+//
+struct s_lane {
+    THREAD_T thread;
+        //
+        // M: sub-thread OS thread
+        // S: not used
+
+    lua_State *L;
+        //
+        // M: prepares the state, and reads results
+        // S: while S is running, M must keep out of modifying the state
+
+    volatile enum e_status status;
+        // 
+        // M: sets to PENDING (before launching)
+        // S: updates -> RUNNING/WAITING -> DONE/ERROR_ST/CANCELLED
+    
+    volatile bool_t cancel_request;
+        //
+        // M: sets to FALSE, flags TRUE for cancel request
+        // S: reads to see if cancel is requested
+
+#if !( (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN) )
+    SIGNAL_T done_signal_;
+        //
+        // M: Waited upon at lane ending  (if Posix with no PTHREAD_TIMEDJOIN)
+        // S: sets the signal once cancellation is noticed (avoids a kill)
+
+    MUTEX_T done_lock_;
+        // 
+        // Lock required by 'done_signal' condition variable, protecting
+        // lane status changes to DONE/ERROR_ST/CANCELLED.
+#endif
+
+    volatile enum { 
+        NORMAL,         // normal master side state
+        KILLED          // issued an OS kill
+    } mstatus;
+        //
+        // M: sets to NORMAL, if issued a kill changes to KILLED
+        // S: not used
+        
+    struct s_lane * volatile selfdestruct_next;
+        //
+        // M: sets to non-NULL if facing lane handle '__gc' cycle but the lane
+        //    is still running
+        // S: cleans up after itself if non-NULL at lane exit
+};
+
+static MUTEX_T selfdestruct_cs;
+    //
+    // Protects modifying the selfdestruct chain
+
+#define SELFDESTRUCT_END ((struct s_lane *)(-1))
+    //
+    // The chain is ended by '(struct s_lane*)(-1)', not NULL:
+    //      'selfdestruct_first -> ... -> ... -> (-1)'
+
+struct s_lane * volatile selfdestruct_first= SELFDESTRUCT_END;
+
+/*
+* Add the lane to selfdestruct chain; the ones still running at the end of the
+* whole process will be cancelled.
+*/
+static void selfdestruct_add( struct s_lane *s ) {
+
+    MUTEX_LOCK( &selfdestruct_cs );
+    {
+        assert( s->selfdestruct_next == NULL );
+
+        s->selfdestruct_next= selfdestruct_first;
+        selfdestruct_first= s;
+    }
+    MUTEX_UNLOCK( &selfdestruct_cs );
+}
+
+/*
+* A free-running lane has ended; remove it from selfdestruct chain
+*/
+static void selfdestruct_remove( struct s_lane *s ) {
+
+    MUTEX_LOCK( &selfdestruct_cs );
+    {
+        // Make sure (within the MUTEX) that we actually are in the chain
+        // still (at process exit they will remove us from chain and then
+        // cancel/kill).
+        //
+        if (s->selfdestruct_next != NULL) {
+            struct s_lane **ref= (struct s_lane **) &selfdestruct_first;
+            bool_t found= FALSE;
+    
+            while( *ref != SELFDESTRUCT_END ) {
+                if (*ref == s) {
+                    *ref= s->selfdestruct_next;
+                    s->selfdestruct_next= NULL;
+                    found= TRUE;
+                    break;
+                }
+                ref= (struct s_lane **) &((*ref)->selfdestruct_next);
+            }
+            assert( found );
+        }
+    }
+    MUTEX_UNLOCK( &selfdestruct_cs );
+}
+
+/*
+* Process end; cancel any still free-running threads
+*/
+static void selfdestruct_atexit( void ) {
+
+    if (selfdestruct_first == SELFDESTRUCT_END) return;    // no free-running threads
+
+    // Signal _all_ still running threads to exit
+    //
+    MUTEX_LOCK( &selfdestruct_cs );
+    {
+        struct s_lane *s= selfdestruct_first;
+        while( s != SELFDESTRUCT_END ) {
+            s->cancel_request= TRUE;
+            s= s->selfdestruct_next;
+        }
+    }
+    MUTEX_UNLOCK( &selfdestruct_cs );
+
+    // When noticing their cancel, the lanes will remove themselves from
+    // the selfdestruct chain.
+
+    // TBD: Not sure if Windows (multi core) will require the timed approach,
+    //      or single Yield. I don't have machine to test that (so leaving
+    //      for timed approach).    -- AKa 25-Oct-2008
+ 
+#ifdef PLATFORM_LINUX
+    // It seems enough for Linux to have a single yield here, which allows
+    // other threads (timer lane) to proceed. Without the yield, there is
+    // segfault.
+    //
+    YIELD();
+#else
+    // OS X 10.5 (Intel) needs more to avoid segfaults.
+    //
+    // "make test" is okay. 100's of "make require" are okay.
+    //
+    // Tested on MacBook Core Duo 2GHz and 10.5.5:
+    //  -- AKa 25-Oct-2008
+    //
+    #ifndef ATEXIT_WAIT_SECS
+    # define ATEXIT_WAIT_SECS (0.1)
+    #endif
+    {
+        double t_until= now_secs() + ATEXIT_WAIT_SECS;
+    
+        while( selfdestruct_first != SELFDESTRUCT_END ) {
+            YIELD();    // give threads time to act on their cancel
+            
+            if (now_secs() >= t_until) break;
+        }
+    }
+#endif
+
+    //---
+    // Kill the still free running threads
+    //
+    if ( selfdestruct_first != SELFDESTRUCT_END ) {
+        unsigned n=0;
+        MUTEX_LOCK( &selfdestruct_cs );
+        {
+            struct s_lane *s= selfdestruct_first;
+            while( s != SELFDESTRUCT_END ) {
+                n++;
+                s= s->selfdestruct_next;
+            }
+        }
+        MUTEX_UNLOCK( &selfdestruct_cs );
+
+    // Linux (at least 64-bit): CAUSES A SEGFAULT IF THIS BLOCK IS ENABLED
+    //       and works without the block (so let's leave those lanes running)
+    //
+#if 1
+        // 2.0.2: at least timer lane is still here
+        //
+        //fprintf( stderr, "Left %d lane(s) with cancel request at process end.\n", n );
+#else
+        MUTEX_LOCK( &selfdestruct_cs );
+        {
+            struct s_lane *s= selfdestruct_first;
+            while( s != SELFDESTRUCT_END ) {
+                struct s_lane *next_s= s->selfdestruct_next;
+                s->selfdestruct_next= NULL;     // detach from selfdestruct chain
+
+                THREAD_KILL( &s->thread );
+                s= next_s;
+                n++;
+            }
+            selfdestruct_first= SELFDESTRUCT_END;
+        }
+        MUTEX_UNLOCK( &selfdestruct_cs );
+
+        fprintf( stderr, "Killed %d lane(s) at process end.\n", n );
+#endif
+    }
+}
+
+
+// To allow free-running threads (longer lifespan than the handle's)
+// 'struct s_lane' are malloc/free'd and the handle only carries a pointer.
+// This is not deep userdata since the handle's not portable among lanes.
+//
+#define lua_toLane(L,i)  (* ((struct s_lane**) lua_touserdata(L,i)))
+
+
+/*
+* Check if the thread in question ('L') has been signalled for cancel.
+*
+* Called by cancellation hooks and/or pending Linda operations (because then
+* the check won't affect performance).
+*
+* Returns TRUE if any locks are to be exited, and 'cancel_error()' called,
+* to make execution of the lane end.
+*/
+static bool_t cancel_test( lua_State *L ) {
+    struct s_lane *s;
+
+    STACK_GROW(L,1);
+
+  STACK_CHECK(L)
+    lua_pushlightuserdata( L, CANCEL_TEST_KEY );
+    lua_rawget( L, LUA_REGISTRYINDEX );
+    s= lua_touserdata( L, -1 );     // lightuserdata (true 's_lane' pointer) / nil
+    lua_pop(L,1);
+  STACK_END(L,0)
+
+    // 's' is NULL for the original main state (no-one can cancel that)
+    //
+    return s && s->cancel_request;
+}
+
+static void cancel_error( lua_State *L ) {
+    STACK_GROW(L,1);
+    lua_pushlightuserdata( L, CANCEL_ERROR );    // special error value
+    lua_error(L);   // no return
+}
+
+static void cancel_hook( lua_State *L, lua_Debug *ar ) {
+    (void)ar;
+    if (cancel_test(L)) cancel_error(L);
+}
+
+
+//---
+// = _single( [cores_uint=1] )
+//
+// Limits the process to use only 'cores' CPU cores. To be used for performance
+// testing on multicore devices. DEBUGGING ONLY!
+//
+LUAG_FUNC( _single ) {
+	uint_t cores= luaG_optunsigned(L,1,1);
+
+#ifdef PLATFORM_OSX
+  #ifdef _UTILBINDTHREADTOCPU
+	if (cores > 1) luaL_error( L, "Limiting to N>1 cores not possible." );
+    // requires 'chudInitialize()'
+    utilBindThreadToCPU(0);     // # of CPU to run on (we cannot limit to 2..N CPUs?)
+  #else
+    luaL_error( L, "Not available: compile with _UTILBINDTHREADTOCPU" );
+  #endif
+#else
+    luaL_error( L, "not implemented!" );
+#endif
+	(void)cores;
+	
+	return 0;
+}
+
+
+/*
+* str= lane_error( error_val|str )
+*
+* Called if there's an error in some lane; add call stack to error message 
+* just like 'lua.c' normally does.
+*
+* ".. will be called with the error message and its return value will be the 
+*     message returned on the stack by lua_pcall."
+*
+* Note: Rather than modifying the error message itself, it would be better
+*     to provide the call stack (as string) completely separated. This would
+*     work great with non-string error values as well (current system does not).
+*     (This is NOT possible with the Lua 5.1 'lua_pcall()'; we could of course
+*     implement a Lanes-specific 'pcall' of our own that does this). TBD!!! :)
+*       --AKa 22-Jan-2009
+*/
+#ifdef ERROR_FULL_STACK
+
+static int lane_error( lua_State *L ) {
+    lua_Debug ar;
+    unsigned lev,n;
+
+    // [1]: error message (any type)
+
+    assert( lua_gettop(L)==1 );
+
+    // Don't do stack survey for cancelled lanes.
+    //
+#if 1
+    if (lua_touserdata(L,1) == CANCEL_ERROR)
+        return 1;   // just pass on
+#endif
+
+    // Place stack trace at 'registry[lane_error]' for the 'luc_pcall()'
+    // caller to fetch. This bypasses the Lua 5.1 limitation of only one
+    // return value from error handler to 'lua_pcall()' caller.
+
+    // It's adequate to push stack trace as a table. This gives the receiver
+    // of the stack best means to format it to their liking. Also, it allows
+    // us to add more stack info later, if needed.
+    //
+    // table of { "sourcefile.lua:<line>", ... }
+    //
+    STACK_GROW(L,3);
+    lua_newtable(L);
+
+    // Best to start from level 1, but in some cases it might be a C function
+    // and we don't get '.currentline' for that. It's okay - just keep level
+    // and table index growing separate.    --AKa 22-Jan-2009
+    //
+    lev= 0;
+    n=1;
+    while( lua_getstack(L, ++lev, &ar ) ) {
+        lua_getinfo(L, "Sl", &ar);
+        if (ar.currentline > 0) {
+            lua_pushinteger( L, n++ );
+            lua_pushfstring( L, "%s:%d", ar.short_src, ar.currentline );
+            lua_settable( L, -3 );
+        }
+    }
+
+    lua_pushlightuserdata( L, STACK_TRACE_KEY );
+    lua_insert(L,-2);
+    lua_settable( L, LUA_REGISTRYINDEX );
+
+    assert( lua_gettop(L)== 1 );
+
+    return 1;   // the untouched error value
+}
+#endif
+
+
+//---
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+  static THREAD_RETURN_T __stdcall lane_main( void *vs )
+#else
+  static THREAD_RETURN_T lane_main( void *vs )
+#endif
+{
+    struct s_lane *s= (struct s_lane *)vs;
+    int rc, rc2;
+    lua_State *L= s->L;
+
+    s->status= RUNNING;  // PENDING -> RUNNING
+
+    // Tie "set_finalizer()" to the state
+    //
+    lua_pushcfunction( L, LG_set_finalizer );
+    lua_setglobal( L, "set_finalizer" );
+
+#ifdef ERROR_FULL_STACK
+    STACK_GROW( L, 1 );
+    lua_pushcfunction( L, lane_error );
+    lua_insert( L, 1 );
+
+    // [1]: error handler
+    // [2]: function to run
+    // [3..top]: parameters
+    //
+    rc= lua_pcall( L, lua_gettop(L)-2, LUA_MULTRET, 1 /*error handler*/ );
+        // 0: no error
+        // LUA_ERRRUN: a runtime error (error pushed on stack)
+        // LUA_ERRMEM: memory allocation error
+        // LUA_ERRERR: error while running the error handler (if any)
+
+    assert( rc!=LUA_ERRERR );   // since we've authored it
+
+    lua_remove(L,1);    // remove error handler
+
+    // Lua 5.1 error handler is limited to one return value; taking stack trace
+    // via registry
+    //
+    if (rc!=0) {    
+        STACK_GROW(L,1);
+        lua_pushlightuserdata( L, STACK_TRACE_KEY );
+        lua_gettable(L, LUA_REGISTRYINDEX);
+
+        // For cancellation, a stack trace isn't placed
+        //
+        assert( lua_istable(L,2) || (lua_touserdata(L,1)==CANCEL_ERROR) );
+        
+        // Just leaving the stack trace table on the stack is enough to get
+        // it through to the master.
+    }
+
+#else
+    // This code does not use 'lane_error'
+    //
+    // [1]: function to run
+    // [2..top]: parameters
+    //
+    rc= lua_pcall( L, lua_gettop(L)-1, LUA_MULTRET, 0 /*no error handler*/ );
+        // 0: no error
+        // LUA_ERRRUN: a runtime error (error pushed on stack)
+        // LUA_ERRMEM: memory allocation error
+#endif
+
+//STACK_DUMP(L);
+    // Call finalizers, if the script has set them up.
+    //
+    rc2= run_finalizers(L,rc);
+    if (rc2!=0) {
+        // Error within a finalizer!  
+        // 
+        // [-1]: error message
+
+        rc= rc2;    // we're overruling the earlier script error or normal return
+
+        lua_insert( L,1 );  // make error message [1]
+        lua_settop( L,1 );  // remove all rest
+
+        // Place an empty stack table just to keep the API simple (always when
+        // there's an error, there's also stack table - though it may be empty).
+        //
+        lua_newtable(L);
+    }
+
+    if (s->selfdestruct_next != NULL) {
+        // We're a free-running thread and no-one's there to clean us up.
+        //
+        lua_close( s->L );
+        L= 0;
+
+    #if !( (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN) )
+        SIGNAL_FREE( &s->done_signal_ );
+        MUTEX_FREE( &s->done_lock_ );
+    #endif
+        selfdestruct_remove(s);     // away from selfdestruct chain
+        free(s);
+
+    } else {
+        // leave results (1..top) or error message + stack trace (1..2) on the stack - master will copy them
+
+        enum e_status st= 
+            (rc==0) ? DONE 
+                    : (lua_touserdata(L,1)==CANCEL_ERROR) ? CANCELLED 
+                    : ERROR_ST;
+
+        // Posix no PTHREAD_TIMEDJOIN:
+        // 		'done_lock' protects the -> DONE|ERROR_ST|CANCELLED state change
+        //
+    #if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)
+        s->status= st;
+    #else
+        MUTEX_LOCK( &s->done_lock_ );
+        {
+            s->status= st;
+            SIGNAL_ONE( &s->done_signal_ );   // wake up master (while 's->done_lock' is on)
+        }
+        MUTEX_UNLOCK( &s->done_lock_ );
+    #endif
+    }
+
+    return 0;   // ignored
+}
+
+
+//---
+// lane_ud= thread_new( function, [libs_str], 
+//                          [cancelstep_uint=0], 
+//                          [prio_int=0],
+//                          [globals_tbl],
+//                          [... args ...] )
+//
+// Upvalues: metatable to use for 'lane_ud'
+//
+LUAG_FUNC( thread_new )
+{
+    lua_State *L2;
+    struct s_lane *s;
+    struct s_lane **ud;
+
+    const char *libs= lua_tostring( L, 2 );
+    uint_t cs= luaG_optunsigned( L, 3,0);
+    int prio= luaL_optinteger( L, 4,0);
+    uint_t glob= luaG_isany(L,5) ? 5:0;
+
+    #define FIXED_ARGS (5)
+    uint_t args= lua_gettop(L) - FIXED_ARGS;
+
+    if (prio < THREAD_PRIO_MIN || prio > THREAD_PRIO_MAX) {
+        luaL_error( L, "Priority out of range: %d..+%d (%d)", 
+                            THREAD_PRIO_MIN, THREAD_PRIO_MAX, prio );
+    }
+
+    /* --- Create and prepare the sub state --- */
+
+    L2 = luaL_newstate();   // uses standard 'realloc()'-based allocator,
+                            // sets the panic callback
+
+    if (!L2) luaL_error( L, "'luaL_newstate()' failed; out of memory" );
+
+    STACK_GROW( L,2 );
+
+    // Setting the globals table (needs to be done before loading stdlibs,
+    // and the lane function)
+    //
+    if (glob!=0) {
+STACK_CHECK(L)
+        if (!lua_istable(L,glob)) 
+            luaL_error( L, "Expected table, got %s", luaG_typename(L,glob) );
+
+        lua_pushvalue( L, glob );
+        luaG_inter_move( L,L2, 1 );     // moves the table to L2
+
+        // L2 [-1]: table of globals
+
+        // "You can change the global environment of a Lua thread using lua_replace"
+        // (refman-5.0.pdf p. 30) 
+        //
+        lua_replace( L2, LUA_GLOBALSINDEX );
+STACK_END(L,0)
+    }
+
+    // Selected libraries
+    //
+    if (libs) {
+        const char *err= luaG_openlibs( L2, libs );
+        ASSERT_L( !err );   // bad libs should have been noticed by 'lanes.lua'
+
+        serialize_require( L2 );
+    }
+
+    // Lane main function
+    //
+STACK_CHECK(L)
+    lua_pushvalue( L, 1 );
+    luaG_inter_move( L,L2, 1 );    // L->L2
+STACK_MID(L,0)
+
+    ASSERT_L( lua_gettop(L2) == 1 );
+    ASSERT_L( lua_isfunction(L2,1) );
+
+    // revive arguments
+    //
+    if (args) luaG_inter_copy( L,L2, args );    // L->L2
+STACK_MID(L,0)
+
+ASSERT_L( (uint_t)lua_gettop(L2) == 1+args );
+ASSERT_L( lua_isfunction(L2,1) );
+
+    // 's' is allocated from heap, not Lua, since its life span may surpass 
+    // the handle's (if free running thread)
+    //
+    ud= lua_newuserdata( L, sizeof(struct s_lane*) );
+    ASSERT_L(ud);
+
+    s= *ud= malloc( sizeof(struct s_lane) );
+    ASSERT_L(s);
+
+    //memset( s, 0, sizeof(struct s_lane) );
+    s->L= L2;
+    s->status= PENDING;
+    s->cancel_request= FALSE;
+
+#if !( (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN) )
+    MUTEX_INIT( &s->done_lock_ );
+    SIGNAL_INIT( &s->done_signal_ );
+#endif
+    s->mstatus= NORMAL;
+    s->selfdestruct_next= NULL;
+
+    // Set metatable for the userdata
+    //
+    lua_pushvalue( L, lua_upvalueindex(1) );
+    lua_setmetatable( L, -2 );
+STACK_MID(L,1)
+
+    // Place 's' to registry, for 'cancel_test()' (even if 'cs'==0 we still
+    // do cancel tests at pending send/receive).
+    //
+    lua_pushlightuserdata( L2, CANCEL_TEST_KEY );
+    lua_pushlightuserdata( L2, s );
+    lua_rawset( L2, LUA_REGISTRYINDEX );
+
+    if (cs) {
+        lua_sethook( L2, cancel_hook, LUA_MASKCOUNT, cs );
+    }
+
+    THREAD_CREATE( &s->thread, lane_main, s, prio );
+STACK_END(L,1)
+
+    return 1;
+}
+
+
+//---
+// = thread_gc( lane_ud )
+//
+// Cleanup for a thread userdata. If the thread is still executing, leave it
+// alive as a free-running thread (will clean up itself).
+//
+// * Why NOT cancel/kill a loose thread: 
+//
+// At least timer system uses a free-running thread, they should be handy
+// and the issue of cancelling/killing threads at gc is not very nice, either
+// (would easily cause waits at gc cycle, which we don't want).
+//
+// * Why YES kill a loose thread:
+//
+// Current way causes segfaults at program exit, if free-running threads are
+// in certain stages. Details are not clear, but this is the core reason.
+// If gc would kill threads then at process exit only one thread would remain.
+//
+// Todo: Maybe we should have a clear #define for selecting either behaviour.
+//
+LUAG_FUNC( thread_gc ) {
+    struct s_lane *s= lua_toLane(L,1);
+
+    // We can read 's->status' without locks, but not wait for it
+    //
+    if (s->status < DONE) {
+        //
+        selfdestruct_add(s);
+        assert( s->selfdestruct_next );
+        return 0;
+
+    } else if (s->mstatus==KILLED) {
+        // Make sure a kill has proceeded, before cleaning up the data structure.
+        //
+        // If not doing 'THREAD_WAIT()' we should close the Lua state here
+        // (can it be out of order, since we killed the lane abruptly?)
+        //
+#if 0
+        lua_close( s->L );
+#else
+fprintf( stderr, "** Joining with a killed thread (needs testing) **" );
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)
+        THREAD_WAIT( &s->thread, -1 );
+#else
+        THREAD_WAIT( &s->thread, &s->done_signal_, &s->done_lock_, &s->status, -1 );
+#endif
+fprintf( stderr, "** Joined ok **" );
+#endif
+    }
+
+    // Clean up after a (finished) thread
+    //
+#if (! ((defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)))
+    SIGNAL_FREE( &s->done_signal_ );
+    MUTEX_FREE( &s->done_lock_ );
+    free(s);
+#endif
+
+    return 0;
+}
+
+
+//---
+// = thread_cancel( lane_ud [,timeout_secs=0.0] [,force_kill_bool=false] )
+//
+// The originator thread asking us specifically to cancel the other thread.
+//
+// 'timeout': <0: wait forever, until the lane is finished
+//            0.0: just signal it to cancel, no time waited
+//            >0: time to wait for the lane to detect cancellation
+//
+// 'force_kill': if true, and lane does not detect cancellation within timeout,
+//            it is forcefully killed. Using this with 0.0 timeout means just kill
+//            (unless the lane is already finished).
+//
+// Returns: true if the lane was already finished (DONE/ERROR_ST/CANCELLED) or if we
+//          managed to cancel it.
+//          false if the cancellation timed out, or a kill was needed.
+//
+LUAG_FUNC( thread_cancel )
+{
+    struct s_lane *s= lua_toLane(L,1);
+    double secs= 0.0;
+    uint_t force_i=2;
+    bool_t force, done= TRUE;
+    
+    if (lua_isnumber(L,2)) {
+        secs= lua_tonumber(L,2);
+        force_i++;
+    } else if (lua_isnil(L,2))
+        force_i++;
+
+    force= lua_toboolean(L,force_i);     // FALSE if nothing there
+    
+    // We can read 's->status' without locks, but not wait for it (if Posix no PTHREAD_TIMEDJOIN)
+    //
+    if (s->status < DONE) {
+        s->cancel_request= TRUE;    // it's now signalled to stop
+
+        done= thread_cancel( s, secs, force );
+    }
+
+    lua_pushboolean( L, done );
+    return 1;
+}
+
+static bool_t thread_cancel( struct s_lane *s, double secs, bool_t force )
+{
+    bool_t done= 
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)
+        THREAD_WAIT( &s->thread, secs );
+#else
+        THREAD_WAIT( &s->thread, &s->done_signal_, &s->done_lock_, &s->status, secs );
+#endif
+
+    if ((!done) && force) {
+        // Killing is asynchronous; we _will_ wait for it to be done at
+        // GC, to make sure the data structure can be released (alternative
+        // would be use of "cancellation cleanup handlers" that at least
+        // PThread seems to have).
+        //
+        THREAD_KILL( &s->thread );
+        s->mstatus= KILLED;     // mark 'gc' to wait for it
+    }
+    return done;
+}
+
+
+//---
+// str= thread_status( lane_ud )
+//
+// Returns: "pending"   not started yet
+//          -> "running"   started, doing its work..
+//             <-> "waiting"   blocked in a receive()
+//                -> "done"     finished, results are there
+//                   / "error"     finished at an error, error value is there
+//                   / "cancelled"   execution cancelled by M (state gone)
+//
+LUAG_FUNC( thread_status )
+{
+    struct s_lane *s= lua_toLane(L,1);
+    enum e_status st= s->status;    // read just once (volatile)
+    const char *str;
+    
+    if (s->mstatus == KILLED)
+        st= CANCELLED;
+
+    str= (st==PENDING) ? "pending" :
+         (st==RUNNING) ? "running" :    // like in 'co.status()'
+         (st==WAITING) ? "waiting" :
+         (st==DONE) ? "done" :
+         (st==ERROR_ST) ? "error" :
+         (st==CANCELLED) ? "cancelled" : NULL;
+    ASSERT_L(str);
+
+    lua_pushstring( L, str );
+    return 1;
+}
+
+
+//---
+// [...] | [nil, err_any, stack_tbl]= thread_join( lane_ud [, wait_secs=-1] )
+//
+//  timeout:   returns nil
+//  done:      returns return values (0..N)
+//  error:     returns nil + error value + stack table
+//  cancelled: returns nil
+//
+LUAG_FUNC( thread_join )
+{
+    struct s_lane *s= lua_toLane(L,1);
+    double wait_secs= luaL_optnumber(L,2,-1.0);
+    lua_State *L2= s->L;
+    int ret;
+
+    bool_t done= 
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)
+        THREAD_WAIT( &s->thread, wait_secs );
+#else
+        THREAD_WAIT( &s->thread, &s->done_signal_, &s->done_lock_, &s->status, wait_secs );
+#endif
+    if (!done)
+        return 0;      // timeout: pushes none, leaves 'L2' alive
+
+    // Thread is DONE/ERROR_ST/CANCELLED; all ours now
+
+    STACK_GROW( L, 1 );
+
+    switch( s->status ) {
+        case DONE: {   
+            uint_t n= lua_gettop(L2);       // whole L2 stack
+            luaG_inter_move( L2,L, n );
+            ret= n;
+            } break;
+
+        case ERROR_ST:
+            lua_pushnil(L);
+            luaG_inter_move( L2,L, 2 );    // error message at [-2], stack trace at [-1]
+            ret= 3;
+            break;
+
+        case CANCELLED:
+            ret= 0;
+            break;
+        
+        default:
+            fprintf( stderr, "Status: %d\n", s->status );
+            ASSERT_L( FALSE ); ret= 0;
+    }
+    lua_close(L2);
+
+    return ret;
+}
+
+
+/*---=== Timer support ===---
+*/
+
+/*
+* Push a timer gateway Linda object; only one deep userdata is
+* created for this, each lane will get its own proxy.
+*
+* Note: this needs to be done on the C side; Lua wouldn't be able
+*       to even see, when we've been initialized for the very first
+*       time (with us, they will be).
+*/
+static
+void push_timer_gateway( lua_State *L ) {
+
+    /* No need to lock; 'static' is just fine
+    */
+    static DEEP_PRELUDE *p;  // = NULL
+
+  STACK_CHECK(L)
+    if (!p) {
+        // Create the Linda (only on first time)
+        //
+        // proxy_ud= deep_userdata( idfunc )
+        //
+        lua_pushcfunction( L, luaG_deep_userdata );
+        lua_pushcfunction( L, LG_linda_id );
+        lua_call( L, 1 /*args*/, 1 /*retvals*/ );
+
+        ASSERT_L( lua_isuserdata(L,-1) );
+        
+        // Proxy userdata contents is only a 'DEEP_PRELUDE*' pointer
+        //
+        p= * (DEEP_PRELUDE**) lua_touserdata( L, -1 );
+        ASSERT_L(p && p->refcount==1 && p->deep);
+
+        // [-1]: proxy for accessing the Linda
+
+    } else {
+        /* Push a proxy based on the deep userdata we stored. 
+        */
+        luaG_push_proxy( L, LG_linda_id, p );
+    }
+  STACK_END(L,1)
+}
+
+/*
+* secs= now_secs()
+*
+* Returns the current time, as seconds (millisecond resolution).
+*/
+LUAG_FUNC( now_secs )
+{
+    lua_pushnumber( L, now_secs() );
+    return 1;
+}
+
+/*
+* wakeup_at_secs= wakeup_conv( date_tbl )
+*/
+LUAG_FUNC( wakeup_conv )
+{
+    int year, month, day, hour, min, sec, isdst;
+    struct tm tm= {0};
+        //
+        // .year (four digits)
+        // .month (1..12)
+        // .day (1..31)
+        // .hour (0..23)
+        // .min (0..59)
+        // .sec (0..61)
+        // .yday (day of the year)
+        // .isdst (daylight saving on/off)
+
+  STACK_CHECK(L)    
+    lua_getfield( L, 1, "year" ); year= lua_tointeger(L,-1); lua_pop(L,1);
+    lua_getfield( L, 1, "month" ); month= lua_tointeger(L,-1); lua_pop(L,1);
+    lua_getfield( L, 1, "day" ); day= lua_tointeger(L,-1); lua_pop(L,1);
+    lua_getfield( L, 1, "hour" ); hour= lua_tointeger(L,-1); lua_pop(L,1);
+    lua_getfield( L, 1, "min" ); min= lua_tointeger(L,-1); lua_pop(L,1);
+    lua_getfield( L, 1, "sec" ); sec= lua_tointeger(L,-1); lua_pop(L,1);
+
+    // If Lua table has '.isdst' we trust that. If it does not, we'll let
+    // 'mktime' decide on whether the time is within DST or not (value -1).
+    //
+    lua_getfield( L, 1, "isdst" );
+    isdst= lua_isboolean(L,-1) ? lua_toboolean(L,-1) : -1;
+    lua_pop(L,1);
+  STACK_END(L,0)
+
+    tm.tm_year= year-1900;
+    tm.tm_mon= month-1;     // 0..11
+    tm.tm_mday= day;        // 1..31
+    tm.tm_hour= hour;       // 0..23
+    tm.tm_min= min;         // 0..59
+    tm.tm_sec= sec;         // 0..60
+    tm.tm_isdst= isdst;     // 0/1/negative
+
+    lua_pushnumber( L, (double) mktime( &tm ) );   // ms=0
+    return 1;
+}
+
+
+/*---=== Module linkage ===---
+*/
+
+#define REG_FUNC( name ) \
+    lua_pushcfunction( L, LG_##name ); \
+    lua_setglobal( L, #name )
+
+#define REG_FUNC2( name, val ) \
+    lua_pushcfunction( L, val ); \
+    lua_setglobal( L, #name )
+
+#define REG_STR2( name, val ) \
+    lua_pushstring( L, val ); \
+    lua_setglobal( L, #name )
+
+#define REG_INT2( name, val ) \
+    lua_pushinteger( L, val ); \
+    lua_setglobal( L, #name )
+
+
+int 
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+__declspec(dllexport)
+#endif
+	luaopen_lanes( lua_State *L ) {
+    const char *err;
+    static volatile char been_here;  // =0
+
+    // One time initializations:
+    //
+    if (!been_here) {
+        been_here= TRUE;
+
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+        now_secs();     // initialize 'now_secs()' internal offset
+#endif
+
+#if (defined PLATFORM_OSX) && (defined _UTILBINDTHREADTOCPU)
+        chudInitialize();
+#endif
+    
+        // Locks for 'tools.c' inc/dec counters
+        //
+        MUTEX_INIT( &deep_lock );
+        MUTEX_INIT( &mtid_lock );
+    
+        // Serialize calls to 'require' from now on, also in the primary state
+        //
+        MUTEX_RECURSIVE_INIT( &require_cs );
+
+        serialize_require( L );
+
+        // Selfdestruct chain handling
+        //
+        MUTEX_INIT( &selfdestruct_cs );
+        atexit( selfdestruct_atexit );
+
+        //---
+        // Linux needs SCHED_RR to change thread priorities, and that is only
+        // allowed for sudo'ers. SCHED_OTHER (default) has no priorities.
+        // SCHED_OTHER threads are always lower priority than SCHED_RR.
+        //
+        // ^-- those apply to 2.6 kernel.  IF **wishful thinking** these 
+        //     constraints will change in the future, non-sudo priorities can 
+        //     be enabled also for Linux.
+        //
+#ifdef PLATFORM_LINUX
+        sudo= geteuid()==0;     // we are root?
+
+        // If lower priorities (-2..-1) are wanted, we need to lift the main
+        // thread to SCHED_RR and 50 (medium) level. Otherwise, we're always below 
+        // the launched threads (even -2).
+	    //
+  #ifdef LINUX_SCHED_RR
+        if (sudo) {
+            struct sched_param sp= {0}; sp.sched_priority= _PRIO_0;
+            PT_CALL( pthread_setschedparam( pthread_self(), SCHED_RR, &sp) );
+        }
+  #endif
+#endif
+        err= init_keepers();
+        if (err) 
+            luaL_error( L, "Unable to initialize: %s", err );
+    }
+    
+    // Linda identity function
+    //
+    REG_FUNC( linda_id );
+
+    // metatable for threads
+    //
+    lua_newtable( L );
+    lua_pushcfunction( L, LG_thread_gc );
+    lua_setfield( L, -2, "__gc" );
+
+    lua_pushcclosure( L, LG_thread_new, 1 );    // metatable as closure param
+    lua_setglobal( L, "thread_new" );
+
+    REG_FUNC( thread_status );
+    REG_FUNC( thread_join );
+    REG_FUNC( thread_cancel );
+
+    REG_STR2( _version, VERSION );
+    REG_FUNC( _single );
+
+    REG_FUNC2( _deep_userdata, luaG_deep_userdata );
+
+    REG_FUNC( now_secs );
+    REG_FUNC( wakeup_conv );
+
+    push_timer_gateway(L);    
+    lua_setglobal( L, "timer_gateway" );
+
+    REG_INT2( max_prio, THREAD_PRIO_MAX );
+
+    lua_pushlightuserdata( L, CANCEL_ERROR );
+    lua_setglobal( L, "cancel_error" );
+
+    return 0;
+}
+
+
diff --git a/src/lanes.lua b/src/lanes.lua
new file mode 100644
index 0000000..c68506d
--- /dev/null
+++ b/src/lanes.lua
@@ -0,0 +1,611 @@
+--
+-- LANES.LUA
+--
+-- Multithreading and -core support for Lua
+--
+-- Author: Asko Kauppi <akauppi@gmail.com>
+--
+-- History:
+--    Jun-08 AKa: major revise
+--    15-May-07 AKa: pthread_join():less version, some speedup & ability to
+--                   handle more threads (~ 8000-9000, up from ~ 5000)
+--    26-Feb-07 AKa: serialization working (C side)
+--    17-Sep-06 AKa: started the module (serialization)
+--
+--[[
+===============================================================================
+
+Copyright (C) 2007-08 Asko Kauppi <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+]]--
+
+module( "lanes", package.seeall )
+
+require "lua51-lanes"
+assert( type(lanes)=="table" )
+
+local mm= lanes
+
+local linda_id=    assert( mm.linda_id )
+
+local thread_new=   assert(mm.thread_new)
+local thread_status= assert(mm.thread_status)
+local thread_join=  assert(mm.thread_join)
+local thread_cancel= assert(mm.thread_cancel)
+
+local _single= assert(mm._single)
+local _version= assert(mm._version)
+
+local _deep_userdata= assert(mm._deep_userdata)
+
+local now_secs= assert( mm.now_secs )
+local wakeup_conv= assert( mm.wakeup_conv )
+local timer_gateway= assert( mm.timer_gateway )
+
+local max_prio= assert( mm.max_prio )
+
+-- This check is for sublanes requiring Lanes
+--
+-- TBD: We could also have the C level expose 'string.gmatch' for us. But this is simpler.
+--
+if not string then
+    error( "To use 'lanes', you will also need to have 'string' available.", 2 )
+end
+
+-- 
+-- Cache globals for code that might run under sandboxing 
+--
+local assert= assert
+local string_gmatch= assert( string.gmatch )
+local select= assert( select )
+local type= assert( type )
+local pairs= assert( pairs )
+local tostring= assert( tostring )
+local error= assert( error )
+local setmetatable= assert( setmetatable )
+local rawget= assert( rawget )
+
+ABOUT= 
+{
+    author= "Asko Kauppi <akauppi@gmail.com>",
+    description= "Running multiple Lua states in parallel",
+    license= "MIT/X11",
+    copyright= "Copyright (c) 2007-08, Asko Kauppi",
+    version= _version,
+}
+
+
+-- Making copies of necessary system libs will pass them on as upvalues;
+-- only the first state doing "require 'lanes'" will need to have 'string'
+-- and 'table' visible.
+--
+local function WR(str)
+    io.stderr:write( str.."\n" )
+end
+
+local function DUMP( tbl )
+    if not tbl then return end
+    local str=""
+    for k,v in pairs(tbl) do
+        str= str..k.."="..tostring(v).."\n"
+    end
+    WR(str)
+end
+
+
+---=== Laning ===---
+
+-- lane_h[1..n]: lane results, same as via 'lane_h:join()'
+-- lane_h[0]:    can be read to make sure a thread has finished (always gives 'true')
+-- lane_h[-1]:   error message, without propagating the error
+--
+--      Reading a Lane result (or [0]) propagates a possible error in the lane
+--      (and execution does not return). Cancelled lanes give 'nil' values.
+--
+-- lane_h.state: "pending"/"running"/"waiting"/"done"/"error"/"cancelled"
+--
+local lane_mt= {
+    __index= function( me, k )
+                if type(k) == "number" then
+                    -- 'me[0]=true' marks we've already taken in the results
+                    --
+                    if not rawget( me, 0 ) then
+                        -- Wait indefinately; either propagates an error or
+                        -- returns the return values
+                        --
+                        me[0]= true  -- marker, even on errors
+
+                        local t= { thread_join(me._ud) }   -- wait indefinate
+                            --
+                            -- { ... }      "done": regular return, 0..N results
+                            -- { }          "cancelled"
+                            -- { nil, err_str, stack_tbl } "error"
+                        
+                        local st= thread_status(me._ud)
+                        if st=="done" then
+                            -- Use 'pairs' and not 'ipairs' so that nil holes in
+                            -- the returned values are tolerated.
+                            --
+                            for i,v in pairs(t) do
+                                me[i]= v
+                            end
+                        elseif st=="error" then
+                            assert( t[1]==nil and t[2] and type(t[3])=="table" )
+                            me[-1]= t[2]
+                            -- me[-2] could carry the stack table, but even 
+                            -- me[-1] is rather unnecessary (and undocumented);
+                            -- use ':join()' instead.   --AKa 22-Jan-2009
+                        elseif st=="cancelled" then
+                            -- do nothing
+                        else
+                            error( "Unexpected status: "..st )
+                        end
+                    end
+
+                    -- Check errors even if we'd first peeked them via [-1]
+                    -- and then came for the actual results.
+                    --
+                    local err= rawget(me, -1)
+                    if err~=nil and k~=-1 then
+                        -- Note: Lua 5.1 interpreter is not prepared to show
+                        --       non-string errors, so we use 'tostring()' here
+                        --       to get meaningful output.  --AKa 22-Jan-2009
+                        --
+                        --       Also, the stack dump we get is no good; it only
+                        --       lists our internal Lanes functions. There seems
+                        --       to be no way to switch it off, though.
+                        
+                        -- Level 3 should show the line where 'h[x]' was read
+                        -- but this only seems to work for string messages
+                        -- (Lua 5.1.4). No idea, why.   --AKa 22-Jan-2009
+                        --
+                        error( tostring(err), 3 )   -- level 3 should show the line where 'h[x]' was read
+                    end
+                    return rawget( me, k )
+                    --
+                elseif k=="status" then     -- me.status
+                    return thread_status(me._ud)
+                    --
+                else
+                    error( "Unknown key: "..k )
+                end
+             end
+    }
+
+-----
+-- h= lanes.gen( [libs_str|opt_tbl [, ...],] lane_func ) ( [...] )
+--
+-- 'libs': nil:     no libraries available (default)
+--         "":      only base library ('assert', 'print', 'unpack' etc.)
+--         "math,os": math + os + base libraries (named ones + base)
+--         "*":     all standard libraries available
+--
+-- 'opt': .priority:  int (-2..+2) smaller is lower priority (0 = default)
+--
+--	      .cancelstep: bool | uint
+--            false: cancellation check only at pending Linda operations
+--                   (send/receive) so no runtime performance penalty (default)
+--            true:  adequate cancellation check (same as 100)
+--            >0:    cancellation check every x Lua lines (small number= faster
+--                   reaction but more performance overhead)
+--
+--        .globals:  table of globals to set for a new thread (passed by value)
+--
+--        ... (more options may be introduced later) ...
+--
+-- Calling with a function parameter ('lane_func') ends the string/table
+-- modifiers, and prepares a lane generator. One can either finish here,
+-- and call the generator later (maybe multiple times, with different parameters) 
+-- or add on actual thread arguments to also ignite the thread on the same call.
+--
+local lane_proxy
+
+local valid_libs= {
+    ["package"]= true,
+    ["table"]= true,
+    ["io"]= true,
+    ["os"]= true,
+    ["string"]= true,
+    ["math"]= true,
+    ["debug"]= true,
+    --
+    ["base"]= true,
+    ["coroutine"]= true,
+    ["*"]= true
+}
+
+function gen( ... )
+    local opt= {}
+    local libs= nil
+    local lev= 2  -- level for errors
+
+    local n= select('#',...)
+    
+    if n==0 then
+        error( "No parameters!" )
+    end
+
+    for i=1,n-1 do
+        local v= select(i,...)
+        if type(v)=="string" then
+            libs= libs and libs..","..v or v
+        elseif type(v)=="table" then
+            for k,vv in pairs(v) do
+                opt[k]= vv
+            end
+        elseif v==nil then
+            -- skip
+        else
+            error( "Bad parameter: "..tostring(v) )
+        end
+    end
+
+    local func= select(n,...)
+    if type(func)~="function" then
+        error( "Last parameter not function: "..tostring(func) )
+    end
+
+    -- Check 'libs' already here, so the error goes in the right place
+    -- (otherwise will be noticed only once the generator is called)
+    --
+    if libs then
+        for s in string_gmatch(libs, "[%a*]+") do
+            if not valid_libs[s] then
+                error( "Bad library name: "..s )
+            end
+        end
+    end
+    
+    local prio, cs, g_tbl
+
+    for k,v in pairs(opt) do
+            if k=="priority" then prio= v
+        elseif k=="cancelstep" then cs= (v==true) and 100 or
+                                        (v==false) and 0 or 
+                                        type(v)=="number" and v or
+                                        error( "Bad cancelstep: "..tostring(v), lev )
+        elseif k=="globals" then g_tbl= v
+        --..
+        elseif k==1 then error( "unkeyed option: ".. tostring(v), lev )
+        else error( "Bad option: ".. tostring(k), lev )
+        end
+    end
+
+    -- Lane generator
+    --
+    return function(...)
+              return lane_proxy( thread_new( func, libs, cs, prio, g_tbl,
+                                             ... ) )     -- args
+           end
+end
+
+lane_proxy= function( ud )
+    local proxy= {
+        _ud= ud,
+        
+        -- void= me:cancel()
+        --
+        cancel= function(me) thread_cancel(me._ud) end,
+        
+        -- [...] | [nil,err,stack_tbl]= me:join( [wait_secs=-1] )
+        --
+        join= function( me, wait ) 
+                return thread_join( me._ud, wait )
+            end,
+        }
+    assert( proxy._ud )
+    setmetatable( proxy, lane_mt )
+
+    return proxy
+end
+
+
+---=== Lindas ===---
+
+-- We let the C code attach methods to userdata directly
+
+-----
+-- linda_ud= lanes.linda()
+--
+function linda()
+    local proxy= _deep_userdata( linda_id )
+    assert( (type(proxy) == "userdata") and getmetatable(proxy) )
+    return proxy
+end
+
+
+---=== Timers ===---
+
+--
+-- On first 'require "lanes"', a timer lane is spawned that will maintain
+-- timer tables and sleep in between the timer events. All interaction with
+-- the timer lane happens via a 'timer_gateway' Linda, which is common to
+-- all that 'require "lanes"'.
+-- 
+-- Linda protocol to timer lane:
+--
+--  TGW_KEY: linda_h, key, [wakeup_at_secs], [repeat_secs]
+--
+local TGW_KEY= "(timer control)"    -- the key does not matter, a 'weird' key may help debugging
+local first_time_key= "first time"
+
+local first_time= timer_gateway:get(first_time_key) == nil
+timer_gateway:set(first_time_key,true)
+
+--
+-- Timer lane; initialize only on the first 'require "lanes"' instance (which naturally
+-- has 'table' always declared)
+--
+if first_time then
+    local table_remove= assert( table.remove )
+    local table_insert= assert( table.insert )
+
+    --
+    -- { [deep_linda_lightuserdata]= { [deep_linda_lightuserdata]=linda_h, 
+    --                                 [key]= { wakeup_secs [,period_secs] } [, ...] },
+    -- }
+    --
+    -- Collection of all running timers, indexed with linda's & key.
+    --
+    -- Note that we need to use the deep lightuserdata identifiers, instead
+    -- of 'linda_h' themselves as table indices. Otherwise, we'd get multiple
+    -- entries for the same timer.
+    --
+    -- The 'hidden' reference to Linda proxy is used in 'check_timers()' but
+    -- also important to keep the Linda alive, even if all outside world threw
+    -- away pointers to it (which would ruin uniqueness of the deep pointer).
+    -- Now we're safe.
+    --
+    local collection= {}
+
+    --
+    -- set_timer( linda_h, key [,wakeup_at_secs [,period_secs]] )
+    --
+    local function set_timer( linda, key, wakeup_at, period )
+
+        assert( wakeup_at==nil or wakeup_at>0.0 )
+        assert( period==nil or period>0.0 )
+
+        local linda_deep= linda:deep()
+        assert( linda_deep )
+
+        -- Find or make a lookup for this timer
+        --
+        local t1= collection[linda_deep]
+        if not t1 then
+            t1= { [linda_deep]= linda }     -- proxy to use the Linda
+            collection[linda_deep]= t1
+        end
+    
+        if wakeup_at==nil then
+            -- Clear the timer
+            --
+            t1[key]= nil
+
+            -- Remove empty tables from collection; speeds timer checks and
+            -- lets our 'safety reference' proxy be gc:ed as well.
+            --
+            local empty= true
+            for k,_ in pairs(t1) do
+                if k~= linda_deep then
+                    empty= false; break
+                end
+            end
+            if empty then
+                collection[linda_deep]= nil
+            end
+            
+            -- Note: any unread timer value is left at 'linda[key]' intensionally;
+            --       clearing a timer just stops it.
+        else
+            -- New timer or changing the timings
+            --
+            local t2= t1[key]
+            if not t2 then
+                t2= {}; t1[key]= t2
+            end
+    
+            t2[1]= wakeup_at
+            t2[2]= period   -- can be 'nil'
+        end
+    end
+
+    -----
+    -- [next_wakeup_at]= check_timers()
+    --
+    -- Check timers, and wake up the ones expired (if any)
+    --
+    -- Returns the closest upcoming (remaining) wakeup time (or 'nil' if none).
+    --
+    local function check_timers()
+
+        local now= now_secs()
+        local next_wakeup
+
+        for linda_deep,t1 in pairs(collection) do
+            for key,t2 in pairs(t1) do
+                --
+                if key==linda_deep then
+                    -- no 'continue' in Lua :/
+                else
+                    -- 't2': { wakeup_at_secs [,period_secs] }
+                    --
+                    local wakeup_at= t2[1]
+                    local period= t2[2]     -- may be 'nil'
+                    
+                    if wakeup_at <= now then    
+                        local linda= t1[linda_deep]
+                        assert(linda)
+    
+                        linda:set( key, now )
+        
+                        -- 'pairs()' allows the values to be modified (and even
+                        -- removed) as far as keys are not touched
+                                                
+                        if not period then
+                            -- one-time timer; gone
+                            --
+                            t1[key]= nil
+                            wakeup_at= nil   -- no 'continue' in Lua :/
+                        else
+                            -- repeating timer; find next wakeup (may jump multiple repeats)
+                            --
+                            repeat
+                                wakeup_at= wakeup_at+period
+                            until wakeup_at > now
+    
+                            t2[1]= wakeup_at
+                        end
+                    end
+                    
+                    if wakeup_at and ((not next_wakeup) or (wakeup_at < next_wakeup)) then
+                        next_wakeup= wakeup_at
+                    end 
+                end
+            end -- t2 loop
+        end -- t1 loop
+        
+        return next_wakeup  -- may be 'nil'
+    end    
+
+    -----
+    -- Snore loop (run as a lane on the background)
+    --
+    -- High priority, to get trustworthy timings.
+    --
+    -- We let the timer lane be a "free running" thread; no handle to it
+    -- remains.
+    --
+    gen( "io", { priority=max_prio }, function()
+
+        while true do
+            local next_wakeup= check_timers()
+
+            -- Sleep until next timer to wake up, or a set/clear command
+            --
+            local secs= next_wakeup and (next_wakeup - now_secs()) or nil
+            local linda= timer_gateway:receive( secs, TGW_KEY )
+
+            if linda then
+                local key= timer_gateway:receive( 0.0, TGW_KEY )
+                local wakeup_at= timer_gateway:receive( 0.0, TGW_KEY )
+                local period= timer_gateway:receive( 0.0, TGW_KEY )
+                assert( key and wakeup_at and period )
+
+                set_timer( linda, key, wakeup_at, period>0 and period or nil )
+            end
+        end
+    end )()
+end
+
+-----
+-- = timer( linda_h, key_val, date_tbl|first_secs [,period_secs] )
+--
+function timer( linda, key, a, period )
+
+    if a==0.0 then
+        -- Caller expects to get current time stamp in Linda, on return
+        -- (like the timer had expired instantly); it would be good to set this
+        -- as late as possible (to give most current time) but also we want it
+        -- to precede any possible timers that might start striking.
+        --
+        linda:set( key, now_secs() )
+
+        if not period or period==0.0 then
+            timer_gateway:send( TGW_KEY, linda, key, nil, nil )   -- clear the timer
+            return  -- nothing more to do
+        end
+        a= period
+    end
+
+    local wakeup_at= type(a)=="table" and wakeup_conv(a)    -- given point of time
+                                       or now_secs()+a
+    -- queue to timer
+    --
+    timer_gateway:send( TGW_KEY, linda, key, wakeup_at, period )
+end
+
+
+---=== Lock & atomic generators ===---
+
+-- These functions are just surface sugar, but make solutions easier to read.
+-- Not many applications should even need explicit locks or atomic counters.
+
+--
+-- lock_f= lanes.genlock( linda_h, key [,N_uint=1] )
+--
+-- = lock_f( +M )   -- acquire M
+--      ...locked...
+-- = lock_f( -M )   -- release M
+--
+-- Returns an access function that allows 'N' simultaneous entries between
+-- acquire (+M) and release (-M). For binary locks, use M==1.
+--
+function genlock( linda, key, N )
+    linda:limit(key,N)
+    linda:set(key,nil)  -- clears existing data
+
+    --
+    -- [true [, ...]= trues(uint)
+    --
+    local function trues(n)
+        if n>0 then return true,trues(n-1) end
+    end
+
+    return
+    function(M)
+        if M>0 then
+            -- 'nil' timeout allows 'key' to be numeric
+            linda:send( nil, key, trues(M) )    -- suspends until been able to push them
+        else
+            for i=1,-M do
+                linda:receive( key )
+            end
+        end
+    end
+end
+
+
+--
+-- atomic_f= lanes.genatomic( linda_h, key [,initial_num=0.0] )
+--
+-- int= atomic_f( [diff_num=1.0] )
+--
+-- Returns an access function that allows atomic increment/decrement of the
+-- number in 'key'.
+--
+function genatomic( linda, key, initial_val )
+    linda:limit(key,2)          -- value [,true]
+    linda:set(key,initial_val or 0.0)   -- clears existing data (also queue)
+
+    return
+    function(diff)
+        -- 'nil' allows 'key' to be numeric
+        linda:send( nil, key, true )    -- suspends until our 'true' is in
+        local val= linda:get(key) + (diff or 1.0)
+        linda:set( key, val )   -- releases the lock, by emptying queue
+        return val
+    end
+end
+
+
+--the end
diff --git a/src/threading.c b/src/threading.c
new file mode 100644
index 0000000..68d1e41
--- /dev/null
+++ b/src/threading.c
@@ -0,0 +1,721 @@
+/*
+ * THREADING.C   	                    Copyright (c) 2007-08, Asko Kauppi
+ *
+ * Lua Lanes OS threading specific code.
+ *
+ * References:
+ *      <http://www.cse.wustl.edu/~schmidt/win32-cv-1.html>
+*/
+
+/*
+===============================================================================
+
+Copyright (C) 2007-08 Asko Kauppi <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+
+#include "threading.h"
+#include "lua.h"
+
+#if !((defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC))
+# include <sys/time.h>
+#endif
+
+
+#if defined(PLATFORM_LINUX) || defined(PLATFORM_CYGWIN)
+# include <sys/types.h>
+# include <unistd.h>
+#endif
+
+/* Linux needs to check, whether it's been run as root
+*/
+#ifdef PLATFORM_LINUX
+  volatile bool_t sudo;
+#endif
+
+#ifdef _MSC_VER
+// ".. selected for automatic inline expansion" (/O2 option)
+# pragma warning( disable : 4711 )
+// ".. type cast from function pointer ... to data pointer"
+# pragma warning( disable : 4054 )
+#endif
+
+//#define THREAD_CREATE_RETRIES_MAX 20
+    // loops (maybe retry forever?)
+
+/* 
+* FAIL is for unexpected API return values - essentially programming 
+* error in _this_ code. 
+*/
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+static void FAIL( const char *funcname, int rc ) {
+    fprintf( stderr, "%s() failed! (%d)\n", funcname, rc );
+    abort();
+}
+#endif
+
+
+/*
+* Returns millisecond timing (in seconds) for the current time.
+*
+* Note: This function should be called once in single-threaded mode in Win32,
+*       to get it initialized.
+*/
+time_d now_secs(void) {
+
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+    /*
+    * Windows FILETIME values are "100-nanosecond intervals since 
+    * January 1, 1601 (UTC)" (MSDN). Well, we'd want Unix Epoch as
+    * the offset and it seems, so would they:
+    *
+    * <http://msdn.microsoft.com/en-us/library/ms724928(VS.85).aspx>
+    */
+    SYSTEMTIME st;
+    FILETIME ft;
+    ULARGE_INTEGER uli;
+    static ULARGE_INTEGER uli_epoch;   // Jan 1st 1970 0:0:0
+
+    if (uli_epoch.HighPart==0) {
+        st.wYear= 1970;
+        st.wMonth= 1;   // Jan
+        st.wDay= 1;
+        st.wHour= st.wMinute= st.wSecond= st.wMilliseconds= 0;
+
+        if (!SystemTimeToFileTime( &st, &ft ))
+            FAIL( "SystemTimeToFileTime", GetLastError() );
+
+        uli_epoch.LowPart= ft.dwLowDateTime;
+        uli_epoch.HighPart= ft.dwHighDateTime;
+    }
+
+    GetSystemTime( &st );	// current system date/time in UTC
+    if (!SystemTimeToFileTime( &st, &ft ))
+        FAIL( "SystemTimeToFileTime", GetLastError() );
+
+    uli.LowPart= ft.dwLowDateTime;
+    uli.HighPart= ft.dwHighDateTime;
+
+    /* 'double' has less accuracy than 64-bit int, but if it were to degrade,
+     * it would do so gracefully. In practise, the integer accuracy is not
+     * of the 100ns class but just 1ms (Windows XP).
+     */
+# if 1
+    // >= 2.0.3 code
+    return (double) ((uli.QuadPart - uli_epoch.QuadPart)/10000) / 1000.0;
+# elif 0
+    // fix from Kriss Daniels, see: 
+    // <http://luaforge.net/forum/forum.php?thread_id=22704&forum_id=1781>
+    //
+    // "seem to be getting negative numbers from the old version, probably number
+    // conversion clipping, this fixes it and maintains ms resolution"
+    //
+    // This was a bad fix, and caused timer test 5 sec timers to disappear.
+    // --AKa 25-Jan-2009
+    //
+    return ((double)((signed)((uli.QuadPart/10000) - (uli_epoch.QuadPart/10000)))) / 1000.0;
+# else
+    // <= 2.0.2 code
+    return (double)(uli.QuadPart - uli_epoch.QuadPart) / 10000000.0;
+# endif
+#else
+    struct timeval tv;
+        // {
+        //   time_t       tv_sec;   /* seconds since Jan. 1, 1970 */
+        //   suseconds_t  tv_usec;  /* and microseconds */
+        // };
+
+    int rc= gettimeofday( &tv, NULL /*time zone not used any more (in Linux)*/ );
+    assert( rc==0 );
+
+    return ((double)tv.tv_sec) + ((tv.tv_usec)/1000) / 1000.0;
+#endif
+}
+
+
+/*
+*/
+time_d SIGNAL_TIMEOUT_PREPARE( double secs ) {
+    if (secs<=0.0) return secs;
+    else return now_secs() + secs;
+}
+
+
+#if !((defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC))
+/*
+* Prepare 'abs_secs' kind of timeout to 'timespec' format
+*/
+static void prepare_timeout( struct timespec *ts, time_d abs_secs ) {
+    assert(ts);
+    assert( abs_secs >= 0.0 );
+
+    if (abs_secs==0.0)
+        abs_secs= now_secs();
+
+    ts->tv_sec= floor( abs_secs );
+    ts->tv_nsec= ((long)((abs_secs - ts->tv_sec) * 1000.0 +0.5)) * 1000000UL;   // 1ms = 1000000ns
+}
+#endif
+
+
+/*---=== Threading ===---*/
+
+//---
+// It may be meaningful to explicitly limit the new threads' C stack size.
+// We should know how much Lua needs in the C stack, all Lua side allocations
+// are done in heap so they don't count.
+//
+// Consequence of _not_ limiting the stack is running out of virtual memory
+// with 1000-5000 threads on 32-bit systems.
+//
+// Note: using external C modules may be affected by the stack size check.
+//       if having problems, set back to '0' (default stack size of the system).
+// 
+// Win32:       64K (?)
+// Win64:       xxx
+//
+// Linux x86:   2MB     Ubuntu 7.04 via 'pthread_getstacksize()'
+// Linux x64:   xxx
+// Linux ARM:   xxx
+//
+// OS X 10.4.9: 512K    <http://developer.apple.com/qa/qa2005/qa1419.html>
+//                      valid values N * 4KB
+//
+#ifndef _THREAD_STACK_SIZE
+# if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PLATFORM_CYGWIN)
+#  define _THREAD_STACK_SIZE 0
+      // Win32: does it work with less?
+# elif (defined PLATFORM_OSX)
+#  define _THREAD_STACK_SIZE (524288/2)   // 262144
+      // OS X: "make test" works on 65536 and even below
+      //       "make perftest" works on >= 4*65536 == 262144 (not 3*65536)
+# elif (defined PLATFORM_LINUX) && (defined __i386)
+#  define _THREAD_STACK_SIZE (2097152/16)  // 131072
+      // Linux x86 (Ubuntu 7.04): "make perftest" works on /16 (not on /32)
+# elif (defined PLATFORM_BSD) && (defined __i386)
+#  define _THREAD_STACK_SIZE (1048576/8)  // 131072
+      // FreeBSD 6.2 SMP i386: ("gmake perftest" works on /8 (not on /16)
+# endif
+#endif
+
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+  //
+  void MUTEX_INIT( MUTEX_T *ref ) {
+     *ref= CreateMutex( NULL /*security attr*/, FALSE /*not locked*/, NULL );
+     if (!ref) FAIL( "CreateMutex", GetLastError() );
+  }
+  void MUTEX_FREE( MUTEX_T *ref ) {
+     if (!CloseHandle(*ref)) FAIL( "CloseHandle (mutex)", GetLastError() );
+     *ref= NULL;
+  }
+  void MUTEX_LOCK( MUTEX_T *ref ) {
+    DWORD rc= WaitForSingleObject(*ref,INFINITE);
+    if (rc!=0) FAIL( "WaitForSingleObject", rc==WAIT_FAILED ? GetLastError() : rc );
+  }
+  void MUTEX_UNLOCK( MUTEX_T *ref ) {
+    if (!ReleaseMutex(*ref))
+        FAIL( "ReleaseMutex", GetLastError() );
+  }
+    /* MSDN: "If you would like to use the CRT in ThreadProc, use the
+              _beginthreadex function instead (of CreateThread)."
+       MSDN: "you can create at most 2028 threads"
+    */
+  void
+  THREAD_CREATE( THREAD_T *ref,
+                 THREAD_RETURN_T (__stdcall *func)( void * ),
+                     // Note: Visual C++ requires '__stdcall' where it is
+                 void *data, int prio /* -3..+3 */ ) {
+
+    HANDLE h= (HANDLE)_beginthreadex( NULL, // security
+                              _THREAD_STACK_SIZE,
+                              func,
+                              data,
+                              0,    // flags (0/CREATE_SUSPENDED)
+                              NULL  // thread id (not used)
+                            );    
+
+    if (h == INVALID_HANDLE_VALUE) FAIL( "CreateThread", GetLastError() );
+
+    if (prio!= 0) {
+        int win_prio= (prio == +3) ? THREAD_PRIORITY_TIME_CRITICAL :
+                      (prio == +2) ? THREAD_PRIORITY_HIGHEST :
+                      (prio == +1) ? THREAD_PRIORITY_ABOVE_NORMAL :
+                      (prio == -1) ? THREAD_PRIORITY_BELOW_NORMAL :
+                      (prio == -2) ? THREAD_PRIORITY_LOWEST :
+                                     THREAD_PRIORITY_IDLE;  // -3
+
+        if (!SetThreadPriority( h, win_prio )) 
+            FAIL( "SetThreadPriority", GetLastError() );
+    }
+    *ref= h;
+  }
+  //
+  bool_t THREAD_WAIT( THREAD_T *ref, double secs ) {
+    long ms= (long)((secs*1000.0)+0.5);
+
+    DWORD rc= WaitForSingleObject( *ref, ms<0 ? INFINITE:ms /*timeout*/ );
+        //
+        // (WAIT_ABANDONED)
+        // WAIT_OBJECT_0    success (0)
+        // WAIT_TIMEOUT
+        // WAIT_FAILED      more info via GetLastError()
+
+    if (rc == WAIT_TIMEOUT) return FALSE;
+    if (rc != 0) FAIL( "WaitForSingleObject", rc );
+    *ref= NULL;     // thread no longer usable
+    return TRUE;
+  }
+  //
+  void THREAD_KILL( THREAD_T *ref ) {
+    if (!TerminateThread( *ref, 0 )) FAIL("TerminateThread", GetLastError());
+    *ref= NULL;
+  }
+  //
+  void SIGNAL_INIT( SIGNAL_T *ref ) {
+    // 'manual reset' event type selected, to be able to wake up all the
+    // waiting threads.
+    //
+    HANDLE h= CreateEvent( NULL,    // security attributes
+                           TRUE,    // TRUE: manual event
+                           FALSE,   // Initial state
+                           NULL );  // name
+
+    if (h == NULL) FAIL( "CreateEvent", GetLastError() );
+    *ref= h;
+  }
+  void SIGNAL_FREE( SIGNAL_T *ref ) {
+    if (!CloseHandle(*ref)) FAIL( "CloseHandle (event)", GetLastError() );
+    *ref= NULL;
+  }
+  //
+  bool_t SIGNAL_WAIT( SIGNAL_T *ref, MUTEX_T *mu_ref, time_d abs_secs ) {
+    DWORD rc;
+    long ms;
+    
+    if (abs_secs<0.0)
+        ms= INFINITE;
+    else if (abs_secs==0.0)
+        ms= 0;
+    else {
+        ms= (long) ((abs_secs - now_secs())*1000.0 + 0.5);
+        
+        // If the time already passed, still try once (ms==0). A short timeout
+        // may have turned negative or 0 because of the two time samples done.
+        //
+        if (ms<0) ms= 0;
+    }
+
+    // Unlock and start a wait, atomically (like condition variables do)
+    //
+    rc= SignalObjectAndWait( *mu_ref,   // "object to signal" (unlock)
+                             *ref,      // "object to wait on"
+                             ms,
+                             FALSE );   // not alertable
+
+    // All waiting locks are woken here; each competes for the lock in turn.
+    //
+    // Note: We must get the lock even if we've timed out; it makes upper
+    //       level code equivalent to how PThread does it.
+    //
+    MUTEX_LOCK(mu_ref);
+
+    if (rc==WAIT_TIMEOUT) return FALSE;
+    if (rc!=0) FAIL( "SignalObjectAndWait", rc );
+    return TRUE;
+  }
+  void SIGNAL_ALL( SIGNAL_T *ref ) {
+/* 
+ * MSDN tries to scare that 'PulseEvent' is bad, unreliable and should not be
+ * used. Use condition variables instead (wow, they have that!?!); which will
+ * ONLY WORK on Vista and 2008 Server, it seems... so MS, isn't it.
+ * 
+ * I refuse to believe that; using 'PulseEvent' is probably just as good as
+ * using Windows (XP) in the first place. Just don't use APC's (asynchronous
+ * process calls) in your C side coding.
+ */
+    // PulseEvent on manual event:
+    //
+    // Release ALL threads waiting for it (and go instantly back to unsignalled
+    // status = future threads to start a wait will wait)
+    //
+    if (!PulseEvent( *ref ))
+        FAIL( "PulseEvent", GetLastError() );
+  }
+#else
+  // PThread (Linux, OS X, ...)
+  //
+  // On OS X, user processes seem to be able to change priorities.
+  // On Linux, SCHED_RR and su privileges are required..  !-(
+  //
+  #include <errno.h>
+  #include <sys/time.h>
+  //
+  static void _PT_FAIL( int rc, const char *name, const char *file, uint_t line ) {
+    const char *why= (rc==EINVAL) ? "EINVAL" : 
+                     (rc==EBUSY) ? "EBUSY" : 
+                     (rc==EPERM) ? "EPERM" :
+                     (rc==ENOMEM) ? "ENOMEM" :
+                     (rc==ESRCH) ? "ESRCH" :
+                     //...
+                     "";
+    fprintf( stderr, "%s %d: %s failed, %d %s\n", file, line, name, rc, why );
+    abort();
+  }
+  #define PT_CALL( call ) { int rc= call; if (rc!=0) _PT_FAIL( rc, #call, __FILE__, __LINE__ ); }
+  //
+  void SIGNAL_INIT( SIGNAL_T *ref ) {
+    PT_CALL( pthread_cond_init(ref,NULL /*attr*/) );
+    }
+  void SIGNAL_FREE( SIGNAL_T *ref ) {
+    PT_CALL( pthread_cond_destroy(ref) );
+  }
+  //
+  /*
+  * Timeout is given as absolute since we may have fake wakeups during
+  * a timed out sleep. A Linda with some other key read, or just because
+  * PThread cond vars can wake up unwantedly.
+  */
+  bool_t SIGNAL_WAIT( SIGNAL_T *ref, pthread_mutex_t *mu, time_d abs_secs ) {
+    if (abs_secs<0.0) {
+        PT_CALL( pthread_cond_wait( ref, mu ) );  // infinite
+    } else {
+        int rc;
+        struct timespec ts;
+
+        assert( abs_secs != 0.0 );
+        prepare_timeout( &ts, abs_secs );
+
+        rc= pthread_cond_timedwait( ref, mu, &ts );
+
+        if (rc==ETIMEDOUT) return FALSE;
+        if (rc) { _PT_FAIL( rc, "pthread_cond_timedwait()", __FILE__, __LINE__ ); }
+    }
+    return TRUE;
+  }
+  //
+  void SIGNAL_ONE( SIGNAL_T *ref ) {
+    PT_CALL( pthread_cond_signal(ref) );     // wake up ONE (or no) waiting thread
+  }
+  //
+  void SIGNAL_ALL( SIGNAL_T *ref ) {
+    PT_CALL( pthread_cond_broadcast(ref) );     // wake up ALL waiting threads
+  }
+  //
+  void THREAD_CREATE( THREAD_T* ref, 
+                      THREAD_RETURN_T (*func)( void * ),
+                      void *data, int prio /* -2..+2 */ ) {
+    pthread_attr_t _a;
+    pthread_attr_t *a= &_a;
+    struct sched_param sp;
+
+    PT_CALL( pthread_attr_init(a) );
+
+#ifndef PTHREAD_TIMEDJOIN
+    // We create a NON-JOINABLE thread. This is mainly due to the lack of
+    // 'pthread_timedjoin()', but does offer other benefits (s.a. earlier 
+    // freeing of the thread's resources).
+    //
+    PT_CALL( pthread_attr_setdetachstate(a,PTHREAD_CREATE_DETACHED) );
+#endif
+
+    // Use this to find a system's default stack size (DEBUG)
+#if 0
+  { size_t n; pthread_attr_getstacksize( a, &n );
+    fprintf( stderr, "Getstack: %u\n", (unsigned int)n ); }
+    	//  524288 on OS X
+    	// 2097152 on Linux x86 (Ubuntu 7.04)
+    	// 1048576 on FreeBSD 6.2 SMP i386
+#endif
+
+#if (defined _THREAD_STACK_SIZE) && (_THREAD_STACK_SIZE > 0)
+    PT_CALL( pthread_attr_setstacksize( a, _THREAD_STACK_SIZE ) );
+#endif
+    
+    bool_t normal= 
+#if defined(PLATFORM_LINUX) && defined(LINUX_SCHED_RR)
+        !sudo;          // with sudo, even normal thread must use SCHED_RR
+#else
+        prio == 0;      // create a default thread if
+#endif
+    if (!normal) {
+        // NB: PThreads priority handling is about as twisty as one can get it
+        //     (and then some). DON*T TRUST ANYTHING YOU READ ON THE NET!!!
+
+        // "The specified scheduling parameters are only used if the scheduling
+        //  parameter inheritance attribute is PTHREAD_EXPLICIT_SCHED."
+        //
+        PT_CALL( pthread_attr_setinheritsched( a, PTHREAD_EXPLICIT_SCHED ) );
+
+        //---
+        // "Select the scheduling policy for the thread: one of SCHED_OTHER 
+        // (regular, non-real-time scheduling), SCHED_RR (real-time, 
+        // round-robin) or SCHED_FIFO (real-time, first-in first-out)."
+        //
+        // "Using the RR policy ensures that all threads having the same
+        // priority level will be scheduled equally, regardless of their activity."
+        //
+        // "For SCHED_FIFO and SCHED_RR, the only required member of the
+        // sched_param structure is the priority sched_priority. For SCHED_OTHER,
+        // the affected scheduling parameters are implementation-defined."
+        //
+        // "The priority of a thread is specified as a delta which is added to 
+        // the priority of the process."
+        //
+        // ".. priority is an integer value, in the range from 1 to 127. 
+        //  1 is the least-favored priority, 127 is the most-favored."
+        //
+        // "Priority level 0 cannot be used: it is reserved for the system."
+        //
+        // "When you use specify a priority of -99 in a call to 
+        // pthread_setschedparam(), the priority of the target thread is 
+        // lowered to the lowest possible value."
+        //
+        // ...
+
+        // ** CONCLUSION **
+        //
+        // PThread priorities are _hugely_ system specific, and we need at
+        // least OS specific settings. Hopefully, Linuxes and OS X versions
+        // are uniform enough, among each other...
+        //
+#ifdef PLATFORM_OSX
+        // AK 10-Apr-07 (OS X PowerPC 10.4.9):
+        //
+        // With SCHED_RR, 26 seems to be the "normal" priority, where setting
+        // it does not seem to affect the order of threads processed.
+        //
+        // With SCHED_OTHER, the range 25..32 is normal (maybe the same 26,
+        // but the difference is not so clear with OTHER).
+        //
+        // 'sched_get_priority_min()' and '..max()' give 15, 47 as the 
+        // priority limits. This could imply, user mode applications won't
+        // be able to use values outside of that range.
+        //
+        #define _PRIO_MODE SCHED_OTHER
+        
+        // OS X 10.4.9 (PowerPC) gives ENOTSUP for process scope
+        //#define _PRIO_SCOPE PTHREAD_SCOPE_PROCESS
+
+        #define _PRIO_HI  32    // seems to work (_carefully_ picked!)
+        #define _PRIO_0   26    // detected
+        #define _PRIO_LO   1    // seems to work (tested)
+
+#elif defined(PLATFORM_LINUX)
+        // (based on Ubuntu Linux 2.6.15 kernel)
+        //
+        // SCHED_OTHER is the default policy, but does not allow for priorities.
+        // SCHED_RR allows priorities, all of which (1..99) are higher than
+        // a thread with SCHED_OTHER policy.
+        //
+        // <http://kerneltrap.org/node/6080>
+        // <http://en.wikipedia.org/wiki/Native_POSIX_Thread_Library>
+        // <http://www.net.in.tum.de/~gregor/docs/pthread-scheduling.html>
+        //
+        // Manuals suggest checking #ifdef _POSIX_THREAD_PRIORITY_SCHEDULING,
+        // but even Ubuntu does not seem to define it.
+        //
+        #define _PRIO_MODE SCHED_RR
+        
+        // NTLP 2.5: only system scope allowed (being the basic reason why
+        //           root privileges are required..)
+        //#define _PRIO_SCOPE PTHREAD_SCOPE_PROCESS
+
+        #define _PRIO_HI 99
+        #define _PRIO_0  50
+        #define _PRIO_LO 1
+
+#elif defined(PLATFORM_BSD)
+        //
+        // <http://www.net.in.tum.de/~gregor/docs/pthread-scheduling.html>
+        //
+        // "When control over the thread scheduling is desired, then FreeBSD
+        //  with the libpthread implementation is by far the best choice .."
+        //
+        #define _PRIO_MODE SCHED_OTHER
+        #define _PRIO_SCOPE PTHREAD_SCOPE_PROCESS
+        #define _PRIO_HI 31
+        #define _PRIO_0  15
+        #define _PRIO_LO 1
+
+#elif defined(PLATFORM_CYGWIN)
+	//
+	// TBD: Find right values for Cygwin
+	//
+#else
+        #error "Unknown OS: not implemented!"
+#endif
+
+#ifdef _PRIO_SCOPE
+        PT_CALL( pthread_attr_setscope( a, _PRIO_SCOPE ) );
+#endif
+        PT_CALL( pthread_attr_setschedpolicy( a, _PRIO_MODE ) );
+
+#define _PRIO_AN (_PRIO_0 + ((_PRIO_HI-_PRIO_0)/2) )
+#define _PRIO_BN (_PRIO_LO + ((_PRIO_0-_PRIO_LO)/2) )
+
+        sp.sched_priority= 
+            (prio == +2) ? _PRIO_HI :
+            (prio == +1) ? _PRIO_AN :
+#if defined(PLATFORM_LINUX) && defined(LINUX_SCHED_RR)
+            (prio == 0) ? _PRIO_0 :
+#endif
+            (prio == -1) ? _PRIO_BN : _PRIO_LO;
+
+        PT_CALL( pthread_attr_setschedparam( a, &sp ) );
+    }
+
+    //---
+    // Seems on OS X, _POSIX_THREAD_THREADS_MAX is some kind of system
+    // thread limit (not userland thread). Actual limit for us is way higher.
+    // PTHREAD_THREADS_MAX is not defined (even though man page refers to it!)
+    //
+# ifndef THREAD_CREATE_RETRIES_MAX
+    // Don't bother with retries; a failure is a failure
+    //
+    { 
+      int rc= pthread_create( ref, a, func, data );
+      if (rc) _PT_FAIL( rc, "pthread_create()", __FILE__, __LINE__-1 );
+    }
+# else
+# error "This code deprecated"
+/*
+    // Wait slightly if thread creation has exchausted the system
+    //
+    { uint_t retries;
+    for( retries=0; retries<THREAD_CREATE_RETRIES_MAX; retries++ ) {
+
+        int rc= pthread_create( ref, a, func, data );
+            //
+            // OS X / Linux:
+            //    EAGAIN: ".. lacked the necessary resources to create
+            //             another thread, or the system-imposed limit on the
+            //             total number of threads in a process 
+            //             [PTHREAD_THREADS_MAX] would be exceeded."
+            //    EINVAL: attr is invalid
+            // Linux:
+            //    EPERM: no rights for given parameters or scheduling (no sudo)
+            //    ENOMEM: (known to fail with this code, too - not listed in man)
+            
+        if (rc==0) break;   // ok!
+
+        // In practise, exhaustion seems to be coming from memory, not a
+        // maximum number of threads. Keep tuning... ;)
+        //
+        if (rc==EAGAIN) {
+//fprintf( stderr, "Looping (retries=%d) ", retries );    // DEBUG
+
+            // Try again, later.
+
+            Yield();
+        } else {
+            _PT_FAIL( rc, "pthread_create()", __FILE__, __LINE__ );
+        }
+    }
+    }
+*/
+# endif
+
+    if (a) {
+        PT_CALL( pthread_attr_destroy(a) );
+    }
+  }
+  //
+  /*
+  * Wait for a thread to finish.
+  *
+  * 'mu_ref' is a lock we should use for the waiting; initially unlocked.
+  * Same lock as passed to THREAD_EXIT.
+  *
+  * Returns TRUE for succesful wait, FALSE for timed out
+  */
+#ifdef PTHREAD_TIMEDJOIN
+  bool_t THREAD_WAIT( THREAD_T *ref, double secs )
+#else
+  bool_t THREAD_WAIT( THREAD_T *ref, SIGNAL_T *signal_ref, MUTEX_T *mu_ref, volatile enum e_status *st_ref, double secs )
+#endif
+{
+    struct timespec ts_store;
+    const struct timespec *timeout= NULL;
+    bool_t done;
+
+    // Do timeout counting before the locks
+    //
+#ifdef PTHREAD_TIMEDJOIN
+    if (secs>=0.0) {
+#else
+    if (secs>0.0) {
+#endif
+        prepare_timeout( &ts_store, now_secs()+secs );
+        timeout= &ts_store;
+    }
+
+#ifdef PTHREAD_TIMEDJOIN
+    /* Thread is joinable
+    */
+    if (!timeout) {
+        PT_CALL( pthread_join( *ref, NULL /*ignore exit value*/ ));
+        done= TRUE;
+    } else {
+        int rc= PTHREAD_TIMEDJOIN( *ref, NULL, timeout );
+        if ((rc!=0) && (rc!=ETIMEDOUT)) {
+            _PT_FAIL( rc, "PTHREAD_TIMEDJOIN", __FILE__, __LINE__-2 );
+        }
+        done= rc==0;
+    }
+#else
+    /* Since we've set the thread up as PTHREAD_CREATE_DETACHED, we cannot
+     * join with it. Use the cond.var.
+    */
+    MUTEX_LOCK( mu_ref );
+    
+        // 'secs'==0.0 does not need to wait, just take the current status
+        // within the 'mu_ref' locks
+        //
+        if (secs != 0.0) {
+            while( *st_ref < DONE ) {
+                if (!timeout) {
+                    PT_CALL( pthread_cond_wait( signal_ref, mu_ref ));
+                } else {
+                    int rc= pthread_cond_timedwait( signal_ref, mu_ref, timeout );
+                    if (rc==ETIMEDOUT) break;
+                    if (rc!=0) _PT_FAIL( rc, "pthread_cond_timedwait", __FILE__, __LINE__-2 );
+                }
+            }
+        }
+        done= *st_ref >= DONE;  // DONE|ERROR_ST|CANCELLED
+
+    MUTEX_UNLOCK( mu_ref );
+#endif
+    return done;
+  }    
+  //
+  void THREAD_KILL( THREAD_T *ref ) {
+    pthread_cancel( *ref );
+  }
+#endif
+
+static const lua_Alloc alloc_f= 0;
diff --git a/src/threading.h b/src/threading.h
new file mode 100644
index 0000000..4a83229
--- /dev/null
+++ b/src/threading.h
@@ -0,0 +1,196 @@
+/*
+* THREADING.H
+*/
+#ifndef THREADING_H
+#define THREADING_H
+
+/* Platform detection
+*/
+#ifdef _WIN32_WCE
+  #define PLATFORM_POCKETPC
+#elif (defined _WIN32)
+  #define PLATFORM_WIN32
+#elif (defined __linux__)
+  #define PLATFORM_LINUX
+#elif (defined __APPLE__) && (defined __MACH__)
+  #define PLATFORM_OSX
+#elif (defined __NetBSD__) || (defined __FreeBSD__) || (defined BSD)
+  #define PLATFORM_BSD
+#elif (defined __QNX__)
+  #define PLATFORM_QNX
+#elif (defined __CYGWIN__)
+  #define PLATFORM_CYGWIN
+#else
+  #error "Unknown platform!"
+#endif
+
+typedef int bool_t;
+#ifndef FALSE
+# define FALSE 0
+# define TRUE 1
+#endif
+
+typedef unsigned int uint_t;
+
+#if defined(PLATFORM_WIN32) && defined(__GNUC__)
+/* MinGW with MSVCR80.DLL */
+/* Do this BEFORE including time.h so that it is declaring _mktime32()
+ * as it would have declared mktime().
+ */
+# define mktime _mktime32
+#endif
+#include <time.h>
+
+/* Note: ERROR is a defined entity on Win32
+*/
+enum e_status { PENDING, RUNNING, WAITING, DONE, ERROR_ST, CANCELLED };
+
+
+/*---=== Locks & Signals ===---
+*/
+
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+  #define WIN32_LEAN_AND_MEAN
+  // 'SignalObjectAndWait' needs this (targets Windows 2000 and above)
+  #define _WIN32_WINNT 0x0400
+  #include <windows.h>
+  #include <process.h>
+
+  // MSDN: http://msdn2.microsoft.com/en-us/library/ms684254.aspx
+  //
+  // CRITICAL_SECTION can be used for simple code protection. Mutexes are
+  // needed for use with the SIGNAL system.
+  //
+  #define MUTEX_T HANDLE
+  void MUTEX_INIT( MUTEX_T *ref );
+  #define MUTEX_RECURSIVE_INIT(ref)  MUTEX_INIT(ref)  /* always recursive in Win32 */
+  void MUTEX_FREE( MUTEX_T *ref );
+  void MUTEX_LOCK( MUTEX_T *ref );
+  void MUTEX_UNLOCK( MUTEX_T *ref );
+
+  typedef unsigned THREAD_RETURN_T;
+
+  #define SIGNAL_T HANDLE
+  
+  #define YIELD() Sleep(0)
+#else
+  // PThread (Linux, OS X, ...)
+  //
+  #include <pthread.h>
+
+  #ifdef PLATFORM_LINUX
+  # define _MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+  #else
+    /* OS X, ... */
+  # define _MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE
+  #endif
+
+  #define MUTEX_T            pthread_mutex_t
+  #define MUTEX_INIT(ref)    pthread_mutex_init(ref,NULL)
+  #define MUTEX_RECURSIVE_INIT(ref) \
+      { pthread_mutexattr_t a; pthread_mutexattr_init( &a ); \
+        pthread_mutexattr_settype( &a, _MUTEX_RECURSIVE ); \
+        pthread_mutex_init(ref,&a); pthread_mutexattr_destroy( &a ); \
+      }
+  #define MUTEX_FREE(ref)    pthread_mutex_destroy(ref)
+  #define MUTEX_LOCK(ref)    pthread_mutex_lock(ref)
+  #define MUTEX_UNLOCK(ref)  pthread_mutex_unlock(ref)
+
+  typedef void * THREAD_RETURN_T;
+
+  typedef pthread_cond_t SIGNAL_T;
+
+  void SIGNAL_ONE( SIGNAL_T *ref );
+  
+  // Yield is non-portable:
+  //
+  //    OS X 10.4.8/9 has pthread_yield_np()
+  //    Linux 2.4   has pthread_yield() if _GNU_SOURCE is #defined
+  //    FreeBSD 6.2 has pthread_yield()
+  //    ...
+  //
+  #ifdef PLATFORM_OSX
+    #define YIELD() pthread_yield_np()
+  #else
+    #define YIELD() pthread_yield()
+  #endif
+#endif
+
+void SIGNAL_INIT( SIGNAL_T *ref );
+void SIGNAL_FREE( SIGNAL_T *ref );
+void SIGNAL_ALL( SIGNAL_T *ref );
+
+/*
+* 'time_d': <0.0 for no timeout
+*           0.0 for instant check
+*           >0.0 absolute timeout in secs + ms
+*/
+typedef double time_d;
+time_d now_secs(void);
+
+time_d SIGNAL_TIMEOUT_PREPARE( double rel_secs );
+
+bool_t SIGNAL_WAIT( SIGNAL_T *ref, MUTEX_T *mu, time_d timeout );
+
+
+/*---=== Threading ===---
+*/
+
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC)
+
+  typedef HANDLE THREAD_T;
+  //
+  void THREAD_CREATE( THREAD_T *ref,
+                      THREAD_RETURN_T (__stdcall *func)( void * ),
+                      void *data, int prio /* -3..+3 */ );
+                 
+# define THREAD_PRIO_MIN (-3)
+# define THREAD_PRIO_MAX (+3)
+
+#else
+    /* Platforms that have a timed 'pthread_join()' can get away with a simpler
+    * implementation. Others will use a condition variable.
+    */
+# ifdef USE_PTHREAD_TIMEDJOIN
+#  ifdef PLATFORM_OSX
+#   error "No 'pthread_timedjoin()' on this system"
+#  else
+    /* Linux, ... */
+#   define PTHREAD_TIMEDJOIN pthread_timedjoin_np
+#  endif
+# endif
+
+  typedef pthread_t THREAD_T;
+
+  void THREAD_CREATE( THREAD_T *ref, 
+                      THREAD_RETURN_T (*func)( void * ),
+                      void *data, int prio /* -2..+2 */ );
+                      
+# if defined(PLATFORM_LINUX)
+  volatile bool_t sudo;
+#  ifdef LINUX_SCHED_RR
+#   define THREAD_PRIO_MIN (sudo ? -2 : 0)
+#  else
+#   define THREAD_PRIO_MIN (0)
+#  endif
+# define THREAD_PRIO_MAX (sudo ? +2 : 0)
+# else
+#  define THREAD_PRIO_MIN (-2)
+#  define THREAD_PRIO_MAX (+2)
+# endif
+#endif
+
+/* 
+* Win32 and PTHREAD_TIMEDJOIN allow waiting for a thread with a timeout.
+* Posix without PTHREAD_TIMEDJOIN needs to use a condition variable approach.
+*/
+#if (defined PLATFORM_WIN32) || (defined PLATFORM_POCKETPC) || (defined PTHREAD_TIMEDJOIN)
+  bool_t THREAD_WAIT( THREAD_T *ref, double secs );
+#else
+  bool_t THREAD_WAIT( THREAD_T *ref, SIGNAL_T *signal_ref, MUTEX_T *mu_ref, volatile enum e_status *st_ref, double secs );
+#endif
+
+void THREAD_KILL( THREAD_T *ref );
+
+#endif
+    // THREADING_H
diff --git a/src/tools.c b/src/tools.c
new file mode 100644
index 0000000..a2ec517
--- /dev/null
+++ b/src/tools.c
@@ -0,0 +1,1198 @@
+/*
+ * TOOLS.C   	                    Copyright (c) 2002-08, Asko Kauppi
+ *
+ * Lua tools to support Lanes.
+*/
+
+/*
+===============================================================================
+
+Copyright (C) 2002-08 Asko Kauppi <akauppi@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+*/
+
+#include "tools.h"
+
+#include "lualib.h"
+#include "lauxlib.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+static volatile lua_CFunction hijacked_tostring;     // = NULL
+
+MUTEX_T deep_lock;
+MUTEX_T mtid_lock;
+
+/*---=== luaG_dump ===---*/
+
+void luaG_dump( lua_State* L ) {
+
+    int top= lua_gettop(L);
+    int i;
+
+	fprintf( stderr, "\n\tDEBUG STACK:\n" );
+
+	if (top==0)
+		fprintf( stderr, "\t(none)\n" );
+
+	for( i=1; i<=top; i++ ) {
+		int type= lua_type( L, i );
+
+		fprintf( stderr, "\t[%d]= (%s) ", i, lua_typename(L,type) );
+
+		// Print item contents here...
+		//
+		// Note: this requires 'tostring()' to be defined. If it is NOT,
+		//       enable it for more debugging.
+		//
+    STACK_CHECK(L)
+        STACK_GROW( L, 2 )
+
+        lua_getglobal( L, "tostring" );
+            //
+            // [-1]: tostring function, or nil
+        
+        if (!lua_isfunction(L,-1)) {
+             fprintf( stderr, "('tostring' not available)" );
+         } else {
+             lua_pushvalue( L, i );
+             lua_call( L, 1 /*args*/, 1 /*retvals*/ );
+
+             // Don't trust the string contents
+             //                
+             fprintf( stderr, "%s", lua_tostring(L,-1) );
+         }
+         lua_pop(L,1);
+    STACK_END(L,0)
+		fprintf( stderr, "\n" );
+		}
+	fprintf( stderr, "\n" );
+}
+
+
+/*---=== luaG_openlibs ===---*/
+
+static const luaL_Reg libs[] = {
+  { LUA_LOADLIBNAME, luaopen_package },
+  { LUA_TABLIBNAME, luaopen_table },
+  { LUA_IOLIBNAME, luaopen_io },
+  { LUA_OSLIBNAME, luaopen_os },
+  { LUA_STRLIBNAME, luaopen_string },
+  { LUA_MATHLIBNAME, luaopen_math },
+  { LUA_DBLIBNAME, luaopen_debug },
+  //
+  { "base", NULL },         // ignore "base" (already acquired it)
+  { "coroutine", NULL },    // part of Lua 5.1 base package
+  { NULL, NULL }
+};
+
+static bool_t openlib( lua_State *L, const char *name, size_t len ) {
+
+    unsigned i;
+    bool_t all= strncmp( name, "*", len ) == 0;
+
+    for( i=0; libs[i].name; i++ ) {
+        if (all || (strncmp(name, libs[i].name, len) ==0)) {
+            if (libs[i].func) {
+                STACK_GROW(L,2);
+                lua_pushcfunction( L, libs[i].func );
+                lua_pushstring( L, libs[i].name );
+                lua_call( L, 1, 0 );
+            }
+            if (!all) return TRUE;
+        }
+    }
+    return all;
+}
+
+/* 
+* Like 'luaL_openlibs()' but allows the set of libraries be selected
+*
+*   NULL    no libraries, not even base
+*   ""      base library only
+*   "io,string"     named libraries
+*   "*"     all libraries
+*
+* Base ("unpack", "print" etc.) is always added, unless 'libs' is NULL.
+*
+* Returns NULL for ok, position of error within 'libs' on failure.
+*/
+#define is_name_char(c) (isalpha(c) || (c)=='*')
+
+const char *luaG_openlibs( lua_State *L, const char *libs ) {
+    const char *p;
+    unsigned len;
+
+	if (!libs) return NULL;     // no libs, not even 'base'
+
+    // 'lua.c' stops GC during initialization so perhaps its a good idea. :)
+    //
+    lua_gc(L, LUA_GCSTOP, 0);
+
+    // Anything causes 'base' to be taken in
+    //
+    STACK_GROW(L,2);
+    lua_pushcfunction( L, luaopen_base );
+    lua_pushliteral( L, "" );
+    lua_call( L, 1, 0 );
+
+    for( p= libs; *p; p+=len ) {
+        len=0;
+        while (*p && !is_name_char(*p)) p++;    // bypass delimiters
+        while (is_name_char(p[len])) len++;     // bypass name
+        if (len && (!openlib( L, p, len )))
+            break;
+    }
+    lua_gc(L, LUA_GCRESTART, 0);
+
+    return *p ? p : NULL;
+}
+
+
+
+/*---=== Deep userdata ===---*/
+
+/* The deep portion must be allocated separately of any Lua state's; it's
+* lifespan may be longer than that of the creating state.
+*/
+#define DEEP_MALLOC malloc
+#define DEEP_FREE   free
+
+/* 
+* 'registry[REGKEY]' is a two-way lookup table for 'idfunc's and those type's
+* metatables:
+*
+*   metatable   ->  idfunc
+*   idfunc      ->  metatable
+*/
+#define DEEP_LOOKUP_KEY ((void*)set_deep_lookup)
+    // any unique light userdata
+
+static void push_registry_subtable( lua_State *L, void *token );
+
+/*
+* Sets up [-1]<->[-2] two-way lookups, and ensures the lookup table exists.
+* Pops the both values off the stack.
+*/
+void set_deep_lookup( lua_State *L ) {
+
+    STACK_GROW(L,3);
+
+  STACK_CHECK(L)
+#if 1
+    push_registry_subtable( L, DEEP_LOOKUP_KEY );
+#else
+    /* ..to be removed.. */
+    lua_pushlightuserdata( L, DEEP_LOOKUP_KEY );
+    lua_rawget( L, LUA_REGISTRYINDEX );
+
+    if (lua_isnil(L,-1)) {
+        // First time here; let's make the lookup
+        //
+        lua_pop(L,1);
+
+        lua_newtable(L);
+        lua_pushlightuserdata( L, DEEP_LOOKUP_KEY );
+        lua_pushvalue(L,-2);
+            //
+            // [-3]: {} (2nd ref)
+            // [-2]: DEEP_LOOKUP_KEY
+            // [-1]: {}
+
+        lua_rawset( L, LUA_REGISTRYINDEX );
+            //
+            // [-1]: lookup table (empty)
+    }
+#endif
+  STACK_MID(L,1)
+
+    lua_insert(L,-3);
+
+    // [-3]: lookup table
+    // [-2]: A
+    // [-1]: B
+    
+    lua_pushvalue( L,-1 );  // B
+    lua_pushvalue( L,-3 );  // A
+    lua_rawset( L, -5 );    // B->A
+    lua_rawset( L, -3 );    // A->B
+    lua_pop( L,1 );
+
+  STACK_END(L,-2)
+}
+
+/*
+* Pops the key (metatable or idfunc) off the stack, and replaces with the
+* deep lookup value (idfunc/metatable/nil).
+*/
+void get_deep_lookup( lua_State *L ) {
+    
+    STACK_GROW(L,1);
+
+  STACK_CHECK(L)    
+    lua_pushlightuserdata( L, DEEP_LOOKUP_KEY );
+    lua_rawget( L, LUA_REGISTRYINDEX );
+    
+    if (!lua_isnil(L,-1)) {
+        // [-2]: key (metatable or idfunc)
+        // [-1]: lookup table
+    
+        lua_insert( L, -2 );
+        lua_rawget( L, -2 );
+    
+        // [-2]: lookup table
+        // [-1]: value (metatable / idfunc / nil)
+    }    
+    lua_remove(L,-2);
+        // remove lookup, or unused key
+  STACK_END(L,0)
+}
+
+/*
+* Return the registered ID function for 'index' (deep userdata proxy),
+* or NULL if 'index' is not a deep userdata proxy.
+*/
+static
+lua_CFunction get_idfunc( lua_State *L, int index ) {
+    lua_CFunction ret;
+
+    index= STACK_ABS(L,index);
+
+    STACK_GROW(L,1);
+
+  STACK_CHECK(L)
+    if (!lua_getmetatable( L, index ))
+        return NULL;    // no metatable
+    
+    // [-1]: metatable of [index]
+
+    get_deep_lookup(L);
+        //    
+        // [-1]: idfunc/nil
+
+    ret= lua_tocfunction(L,-1);
+    lua_pop(L,1);
+  STACK_END(L,0)
+    return ret;
+}
+
+
+/*
+* void= mt.__gc( proxy_ud )
+*
+* End of life for a proxy object; reduce the deep reference count and clean
+* it up if reaches 0.
+*/
+static
+int deep_userdata_gc( lua_State *L ) {
+    DEEP_PRELUDE **proxy= (DEEP_PRELUDE**)lua_touserdata( L, 1 );
+    DEEP_PRELUDE *p= *proxy;
+    int v;
+
+    *proxy= 0;  // make sure we don't use it any more
+
+    MUTEX_LOCK( &deep_lock );
+      v= --(p->refcount);
+    MUTEX_UNLOCK( &deep_lock );
+
+    if (v==0) {
+        int pushed;
+
+        // Call 'idfunc( "delete", deep_ptr )' to make deep cleanup
+        //
+        lua_CFunction idfunc= get_idfunc(L,1);
+        ASSERT_L(idfunc);
+        
+        lua_settop(L,0);    // clean stack so we can call 'idfunc' directly
+
+        // void= idfunc( "delete", lightuserdata )
+        //
+        lua_pushliteral( L, "delete" );
+        lua_pushlightuserdata( L, p->deep );
+        pushed= idfunc(L);
+        
+        if (pushed)
+            luaL_error( L, "Bad idfunc on \"delete\": returned something" );
+
+        DEEP_FREE( (void*)p );
+    }
+    return 0;
+}
+
+
+/*
+* Push a proxy userdata on the stack.
+*
+* Initializes necessary structures if it's the first time 'idfunc' is being
+* used in this Lua state (metatable, registring it). Otherwise, increments the
+* reference count.
+*/
+void luaG_push_proxy( lua_State *L, lua_CFunction idfunc, DEEP_PRELUDE *prelude ) {
+    DEEP_PRELUDE **proxy;
+
+    MUTEX_LOCK( &deep_lock );
+      ++(prelude->refcount);  // one more proxy pointing to this deep data
+    MUTEX_UNLOCK( &deep_lock );
+
+    STACK_GROW(L,4);
+
+  STACK_CHECK(L)
+
+    proxy= lua_newuserdata( L, sizeof( DEEP_PRELUDE* ) );
+    ASSERT_L(proxy);
+    *proxy= prelude;
+
+    // Get/create metatable for 'idfunc' (in this state)
+    //
+    lua_pushcfunction( L, idfunc );    // key
+    get_deep_lookup(L);
+        //
+        // [-2]: proxy
+        // [-1]: metatable / nil
+    
+    if (lua_isnil(L,-1)) {
+        // No metatable yet; make one and register it
+        //
+        lua_pop(L,1);
+
+        // tbl= idfunc( "metatable" )
+        //
+        lua_pushcfunction( L, idfunc );
+        lua_pushliteral( L, "metatable" );
+        lua_call( L, 1 /*args*/, 1 /*results*/ );
+            //
+            // [-2]: proxy
+            // [-1]: metatable (returned by 'idfunc')
+
+        if (!lua_istable(L,-1))
+            luaL_error( L, "Bad idfunc on \"metatable\": did not return one" );
+
+        // Add '__gc' method
+        //
+        lua_pushcfunction( L, deep_userdata_gc );
+        lua_setfield( L, -2, "__gc" );
+
+        // Memorize for later rounds
+        //
+        lua_pushvalue( L,-1 );
+        lua_pushcfunction( L, idfunc );
+            //
+            // [-4]: proxy
+            // [-3]: metatable (2nd ref)
+            // [-2]: metatable
+            // [-1]: idfunc
+
+        set_deep_lookup(L);
+    } 
+  STACK_MID(L,2)
+    ASSERT_L( lua_isuserdata(L,-2) );
+    ASSERT_L( lua_istable(L,-1) );
+
+    // [-2]: proxy userdata
+    // [-1]: metatable to use
+
+    lua_setmetatable( L, -2 );
+    
+  STACK_END(L,1)
+    // [-1]: proxy userdata
+}
+
+
+/*
+* Create a deep userdata
+*
+*   proxy_ud= deep_userdata( idfunc [, ...] )
+*
+* Creates a deep userdata entry of the type defined by 'idfunc'.
+* Other parameters are passed on to the 'idfunc' "new" invocation.
+*
+* 'idfunc' must fulfill the following features:
+*
+*   lightuserdata= idfunc( "new" [, ...] )      -- creates a new deep data instance
+*   void= idfunc( "delete", lightuserdata )     -- releases a deep data instance
+*   tbl= idfunc( "metatable" )          -- gives metatable for userdata proxies
+*
+* Reference counting and true userdata proxying are taken care of for the
+* actual data type.
+*
+* Types using the deep userdata system (and only those!) can be passed between
+* separate Lua states via 'luaG_inter_move()'.
+*
+* Returns:  'proxy' userdata for accessing the deep data via 'luaG_todeep()'
+*/
+int luaG_deep_userdata( lua_State *L ) {
+    lua_CFunction idfunc= lua_tocfunction( L,1 );
+    int pushed;
+
+    DEEP_PRELUDE *prelude= DEEP_MALLOC( sizeof(DEEP_PRELUDE) );
+    ASSERT_L(prelude);
+
+    prelude->refcount= 0;   // 'luaG_push_proxy' will lift it to 1
+
+    STACK_GROW(L,1);
+  STACK_CHECK(L)
+
+    // Replace 'idfunc' with "new" in the stack (keep possible other params)
+    //
+    lua_remove(L,1);
+    lua_pushliteral( L, "new" );
+    lua_insert(L,1);
+
+    // lightuserdata= idfunc( "new" [, ...] )
+    //
+    pushed= idfunc(L);
+
+    if ((pushed!=1) || lua_type(L,-1) != LUA_TLIGHTUSERDATA)
+        luaL_error( L, "Bad idfunc on \"new\": did not return light userdata" );
+
+    prelude->deep= lua_touserdata(L,-1);
+    ASSERT_L(prelude->deep);
+
+    lua_pop(L,1);   // pop deep data
+
+    luaG_push_proxy( L, idfunc, prelude );
+        //
+        // [-1]: proxy userdata
+
+  STACK_END(L,1)
+    return 1;
+}
+
+
+/*
+* Access deep userdata through a proxy.
+*
+* Reference count is not changed, and access to the deep userdata is not
+* serialized. It is the module's responsibility to prevent conflicting usage.
+*/
+void *luaG_todeep( lua_State *L, lua_CFunction idfunc, int index ) {
+    DEEP_PRELUDE **proxy;
+
+  STACK_CHECK(L)
+    if (get_idfunc(L,index) != idfunc)
+        return NULL;    // no metatable, or wrong kind
+
+    proxy= (DEEP_PRELUDE**)lua_touserdata( L, index );
+  STACK_END(L,0)
+
+    return (*proxy)->deep;
+}
+
+
+/*
+* Copy deep userdata between two separate Lua states.
+*
+* Returns:
+*   the id function of the copied value, or NULL for non-deep userdata
+*   (not copied)
+*/
+static
+lua_CFunction luaG_copydeep( lua_State *L, lua_State *L2, int index ) {
+    DEEP_PRELUDE **proxy;
+    DEEP_PRELUDE *p;
+
+    lua_CFunction idfunc;
+    
+    idfunc= get_idfunc( L, index );
+    if (!idfunc) return NULL;   // not a deep userdata
+
+    // Increment reference count
+    //
+    proxy= (DEEP_PRELUDE**)lua_touserdata( L, index );
+    p= *proxy;
+
+    luaG_push_proxy( L2, idfunc, p );
+        //
+        // L2 [-1]: proxy userdata
+
+    return idfunc;
+}
+
+
+
+/*---=== Inter-state copying ===---*/
+
+/*-- Metatable copying --*/
+
+/*
+ * 'reg[ REG_MT_KNOWN ]'= {
+ *      [ table ]= id_uint,
+ *          ...
+ *      [ id_uint ]= table,
+ *          ...
+ * }
+ */
+
+/*
+* Push a registry subtable (keyed by unique 'token') onto the stack.
+* If the subtable does not exist, it is created and chained.
+*/
+static
+void push_registry_subtable( lua_State *L, void *token ) {
+
+    STACK_GROW(L,3);
+
+  STACK_CHECK(L)
+    
+    lua_pushlightuserdata( L, token );
+    lua_rawget( L, LUA_REGISTRYINDEX );
+        //
+        // [-1]: nil/subtable
+    
+    if (lua_isnil(L,-1)) {
+        lua_pop(L,1);
+        lua_newtable(L);                    // value
+        lua_pushlightuserdata( L, token );  // key
+        lua_pushvalue(L,-2);
+            //
+            // [-3]: value (2nd ref)
+            // [-2]: key
+            // [-1]: value
+
+        lua_rawset( L, LUA_REGISTRYINDEX );
+    }
+  STACK_END(L,1)
+
+    ASSERT_L( lua_istable(L,-1) );
+}
+
+#define REG_MTID ( (void*) get_mt_id )
+
+/*
+* Get a unique ID for metatable at [i].
+*/
+static
+uint_t get_mt_id( lua_State *L, int i ) {
+    static uint_t last_id= 0;
+    uint_t id;
+
+    i= STACK_ABS(L,i);
+
+    STACK_GROW(L,3);
+
+  STACK_CHECK(L)
+    push_registry_subtable( L, REG_MTID );
+    lua_pushvalue(L, i);
+    lua_rawget( L, -2 );
+        //
+        // [-2]: reg[REG_MTID]
+        // [-1]: nil/uint
+    
+    id= lua_tointeger(L,-1);    // 0 for nil
+    lua_pop(L,1);
+  STACK_MID(L,1)
+    
+    if (id==0) {
+        MUTEX_LOCK( &mtid_lock );
+            id= ++last_id;
+        MUTEX_UNLOCK( &mtid_lock );
+
+        /* Create two-way references: id_uint <-> table
+        */
+        lua_pushvalue(L,i);
+        lua_pushinteger(L,id);
+        lua_rawset( L, -3 );
+        
+        lua_pushinteger(L,id);
+        lua_pushvalue(L,i);
+        lua_rawset( L, -3 );
+    }
+    lua_pop(L,1);     // remove 'reg[REG_MTID]' reference
+
+  STACK_END(L,0)
+  
+    return id;
+}
+
+
+static int buf_writer( lua_State *L, const void* b, size_t n, void* B ) {
+  (void)L;
+  luaL_addlstring((luaL_Buffer*) B, (const char *)b, n);
+  return 0;
+}
+
+
+/* 
+ * Check if we've already copied the same table from 'L', and
+ * reuse the old copy. This allows table upvalues shared by multiple
+ * local functions to point to the same table, also in the target.
+ *
+ * Always pushes a table to 'L2'.
+ *
+ * Returns TRUE if the table was cached (no need to fill it!); FALSE if
+ * it's a virgin.
+ */
+static
+bool_t push_cached_table( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i ) {
+    bool_t ret;
+
+    ASSERT_L( hijacked_tostring );
+    ASSERT_L( L2_cache_i != 0 );
+
+    STACK_GROW(L,2);
+    STACK_GROW(L2,3);
+
+    // Create an identity string for table at [i]; it should stay unique at
+    // least during copying of the data (then we can clear the caches).
+    //
+  STACK_CHECK(L)
+    lua_pushcfunction( L, hijacked_tostring );
+    lua_pushvalue( L, i );
+    lua_call( L, 1 /*args*/, 1 /*retvals*/ );
+        //
+        // [-1]: "table: 0x...."
+
+  STACK_END(L,1)
+    ASSERT_L( lua_type(L,-1) == LUA_TSTRING );
+
+    // L2_cache[id_str]= [{...}]
+    //
+  STACK_CHECK(L2)
+
+    // We don't need to use the from state ('L') in ID since the life span
+    // is only for the duration of a copy (both states are locked).
+    //
+    lua_pushstring( L2, lua_tostring(L,-1) );
+    lua_pop(L,1);   // remove the 'tostring(tbl)' value (in L!)
+
+//fprintf( stderr, "<< ID: %s >>\n", lua_tostring(L2,-1) );
+
+    lua_pushvalue( L2, -1 );
+    lua_rawget( L2, L2_cache_i );
+        //
+        // [-2]: identity string ("table: 0x...")
+        // [-1]: table|nil
+
+    if (lua_isnil(L2,-1)) {
+        lua_pop(L2,1);
+        lua_newtable(L2);
+        lua_pushvalue(L2,-1);
+        lua_insert(L2,-3);
+            //
+            // [-3]: new table (2nd ref)
+            // [-2]: identity string
+            // [-1]: new table
+
+        lua_rawset(L2, L2_cache_i);
+            //
+            // [-1]: new table (tied to 'L2_cache' table')
+
+        ret= FALSE;     // brand new
+        
+    } else {
+        lua_remove(L2,-2);
+        ret= TRUE;      // from cache
+    }
+  STACK_END(L2,1)
+    //
+    // L2 [-1]: table to use as destination
+
+    ASSERT_L( lua_istable(L2,-1) );
+    return ret;
+}
+
+
+/* 
+ * Check if we've already copied the same function from 'L', and reuse the old
+ * copy.
+ *
+ * Always pushes a function to 'L2'.
+ */
+static void inter_copy_func( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i );
+
+static
+void push_cached_func( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i ) {
+    // TBD: Merge this and same code for tables
+
+    ASSERT_L( hijacked_tostring );
+    ASSERT_L( L2_cache_i != 0 );
+
+    STACK_GROW(L,2);
+    STACK_GROW(L2,3);
+
+  STACK_CHECK(L)
+    lua_pushcfunction( L, hijacked_tostring );
+    lua_pushvalue( L, i );
+    lua_call( L, 1 /*args*/, 1 /*retvals*/ );
+        //
+        // [-1]: "function: 0x...."
+
+  STACK_END(L,1)
+    ASSERT_L( lua_type(L,-1) == LUA_TSTRING );
+
+    // L2_cache[id_str]= function
+    //
+  STACK_CHECK(L2)
+
+    // We don't need to use the from state ('L') in ID since the life span
+    // is only for the duration of a copy (both states are locked).
+    //
+    lua_pushstring( L2, lua_tostring(L,-1) );
+    lua_pop(L,1);   // remove the 'tostring(tbl)' value (in L!)
+
+//fprintf( stderr, "<< ID: %s >>\n", lua_tostring(L2,-1) );
+
+    lua_pushvalue( L2, -1 );
+    lua_rawget( L2, L2_cache_i );
+        //
+        // [-2]: identity string ("function: 0x...")
+        // [-1]: function|nil|true  (true means: we're working on it; recursive)
+
+    if (lua_isnil(L2,-1)) {
+        lua_pop(L2,1);
+        
+        // Set to 'true' for the duration of creation; need to find self-references
+        // via upvalues
+        //
+        lua_pushboolean(L2,TRUE);
+        lua_setfield( L2, L2_cache_i, lua_tostring(L2,-2) );        
+
+        inter_copy_func( L2, L2_cache_i, L, i );    // pushes a copy of the func
+
+        lua_pushvalue(L2,-1);
+        lua_insert(L2,-3);
+            //
+            // [-3]: function (2nd ref)
+            // [-2]: identity string
+            // [-1]: function
+
+        lua_rawset(L2,L2_cache_i);
+            //
+            // [-1]: function (tied to 'L2_cache' table')
+        
+    } else if (lua_isboolean(L2,-1)) {
+        // Loop in preparing upvalues; either direct or via a table
+        // 
+        // Note: This excludes the case where a function directly addresses
+        //       itself as an upvalue (recursive lane creation).
+        //
+        luaL_error( L, "Recursive use of upvalues; cannot copy the function" );
+    
+    } else {
+        lua_remove(L2,-2);
+    }
+  STACK_END(L2,1)
+    //
+    // L2 [-1]: function
+
+    ASSERT_L( lua_isfunction(L2,-1) );
+}
+
+
+/*
+* Copy a function over, which has not been found in the cache.
+*/
+enum e_vt {
+    VT_NORMAL, VT_KEY, VT_METATABLE
+};
+static bool_t inter_copy_one_( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i, enum e_vt value_type );
+
+static void inter_copy_func( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i ) {
+
+    lua_CFunction cfunc= lua_tocfunction( L,i );
+    unsigned n;
+
+    ASSERT_L( L2_cache_i != 0 );
+
+  STACK_GROW(L,2);
+
+  STACK_CHECK(L)
+    if (!cfunc) {   // Lua function
+        luaL_Buffer b;
+        const char *s;
+        size_t sz;
+        int tmp;
+        const char *name= NULL;
+
+#if 0
+        // "To get information about a function you push it onto the 
+        // stack and start the what string with the character '>'."
+        //
+        { lua_Debug ar;
+        lua_pushvalue( L, i );
+        lua_getinfo(L, ">n", &ar);      // fills 'name' and 'namewhat', pops function
+        name= ar.namewhat;
+        
+        fprintf( stderr, "NAME: %s\n", name );  // just gives NULL
+        }
+#endif 
+        // 'lua_dump()' needs the function at top of stack
+        //
+        if (i!=-1) lua_pushvalue( L, i );
+
+        luaL_buffinit(L,&b);
+        tmp= lua_dump(L, buf_writer, &b);
+        ASSERT_L(tmp==0);
+            //
+            // "value returned is the error code returned by the last call 
+            // to the writer" (and we only return 0)
+
+        luaL_pushresult(&b);    // pushes dumped string on 'L'
+        s= lua_tolstring(L,-1,&sz);
+        ASSERT_L( s && sz );
+
+        if (i!=-1) lua_remove( L, -2 );
+
+        // Note: Line numbers seem to be taken precisely from the 
+        //       original function. 'name' is not used since the chunk
+        //       is precompiled (it seems...). 
+        //
+        // TBD: Can we get the function's original name through, as well?
+        //
+        if (luaL_loadbuffer(L2, s, sz, name) != 0) {
+            // chunk is precompiled so only LUA_ERRMEM can happen
+            // "Otherwise, it pushes an error message"
+            //
+            STACK_GROW( L,1 );
+            luaL_error( L, "%s", lua_tostring(L2,-1) );
+        }
+        lua_pop(L,1);   // remove the dumped string
+  STACK_MID(L,0)
+    }
+
+    /* push over any upvalues; references to this function will come from
+    * cache so we don't end up in eternal loop.
+    */
+    for( n=0; lua_getupvalue( L, i, 1+n ) != NULL; n++ ) {
+        if ((!cfunc) && lua_equal(L,i,-1)) {
+            /* Lua closure that has a (recursive) upvalue to itself
+            */
+            lua_pushvalue( L2, -((int)n)-1 );
+        } else {
+            if (!inter_copy_one_( L2, L2_cache_i, L, lua_gettop(L), VT_NORMAL ))
+                luaL_error( L, "Cannot copy upvalue type '%s'", luaG_typename(L,-1) );
+        }
+        lua_pop(L,1);
+    }
+    // L2: function + 'n' upvalues (>=0)
+
+  STACK_MID(L,0)
+
+    if (cfunc) {
+        lua_pushcclosure( L2, cfunc, n );   // eats up upvalues
+    } else {
+        // Set upvalues (originally set to 'nil' by 'lua_load')
+        //
+        int func_index= lua_gettop(L2)-n;
+
+        for( ; n>0; n-- ) {
+            const char *rc= lua_setupvalue( L2, func_index, n );
+                //
+                // "assigns the value at the top of the stack to the upvalue and returns its name.
+                // It also pops the value from the stack."
+            
+            ASSERT_L(rc);      // not having enough slots?
+        }
+    }
+  STACK_END(L,0)
+}
+
+
+/*
+* Copies a value from 'L' state (at index 'i') to 'L2' state. Does not remove
+* the original value.
+*
+* NOTE: Both the states must be solely in the current OS thread's posession.
+*
+* 'i' is an absolute index (no -1, ...)
+*
+* Returns TRUE if value was pushed, FALSE if its type is non-supported.
+*/
+static bool_t inter_copy_one_( lua_State *L2, uint_t L2_cache_i, lua_State *L, uint_t i, enum e_vt vt )
+{
+    bool_t ret= TRUE;
+
+    STACK_GROW( L2, 1 );
+
+  STACK_CHECK(L2)
+
+    switch ( lua_type(L,i) ) {
+        /* Basic types allowed both as values, and as table keys */
+
+        case LUA_TBOOLEAN:
+            lua_pushboolean( L2, lua_toboolean(L, i) );
+            break;
+
+        case LUA_TNUMBER:
+            /* LNUM patch support (keeping integer accuracy) */
+#ifdef LUA_LNUM
+            if (lua_isinteger(L,i)) {
+                lua_pushinteger( L2, lua_tointeger(L, i) );
+                break;
+            }
+#endif
+            lua_pushnumber( L2, lua_tonumber(L, i) ); 
+            break;
+
+        case LUA_TSTRING: {
+            size_t len; const char *s = lua_tolstring( L, i, &len );
+            lua_pushlstring( L2, s, len );
+            } break;
+
+        case LUA_TLIGHTUSERDATA:
+            lua_pushlightuserdata( L2, lua_touserdata(L, i) );
+            break;
+
+        /* The following types are not allowed as table keys */
+
+        case LUA_TUSERDATA: if (vt==VT_KEY) { ret=FALSE; break; }
+            /* Allow only deep userdata entities to be copied across
+             */
+            if (!luaG_copydeep( L, L2, i )) {
+                // Cannot copy it full; copy as light userdata
+                //
+                lua_pushlightuserdata( L2, lua_touserdata(L, i) );
+            } break;
+
+        case LUA_TNIL: if (vt==VT_KEY) { ret=FALSE; break; }
+            lua_pushnil(L2);
+            break;
+
+        case LUA_TFUNCTION: if (vt==VT_KEY) { ret=FALSE; break; } {
+            /* 
+            * Passing C functions is risky; if they refer to LUA_ENVIRONINDEX
+            * and/or LUA_REGISTRYINDEX they might work unintended (not work)
+            * at the target.
+            *
+            * On the other hand, NOT copying them causes many self tests not
+            * to work (timer, hangtest, ...)
+            *
+            * The trouble is, we cannot KNOW if the function at hand is safe
+            * or not. We cannot study it's behaviour. We could trust the user,
+            * but they might not even know they're sending lua_CFunction over
+            * (as upvalues etc.).
+            */
+#if 0
+            if (lua_iscfunction(L,i))
+                luaL_error( L, "Copying lua_CFunction between Lua states is risky, and currently disabled." ); 
+#endif
+          STACK_CHECK(L2)
+            push_cached_func( L2, L2_cache_i, L, i );
+            ASSERT_L( lua_isfunction(L2,-1) );
+          STACK_END(L2,1)
+            } break;
+
+        case LUA_TTABLE: if (vt==VT_KEY) { ret=FALSE; break; } {
+        
+          STACK_CHECK(L)
+          STACK_CHECK(L2)
+
+            /* Check if we've already copied the same table from 'L' (during this transmission), and
+             * reuse the old copy. This allows table upvalues shared by multiple
+             * local functions to point to the same table, also in the target.
+             * Also, this takes care of cyclic tables and multiple references
+             * to the same subtable.
+             *
+             * Note: Even metatables need to go through this test; to detect
+             *      loops s.a. those in required module tables (getmetatable(lanes).lanes == lanes)
+             */
+            if (push_cached_table( L2, L2_cache_i, L, i )) {
+                ASSERT_L( lua_istable(L2, -1) );    // from cache
+                break;
+            }
+            ASSERT_L( lua_istable(L2,-1) );
+
+            STACK_GROW( L, 2 );
+            STACK_GROW( L2, 2 );
+
+            lua_pushnil(L);    // start iteration
+            while( lua_next( L, i ) ) {
+                uint_t val_i= lua_gettop(L);
+                uint_t key_i= val_i-1;
+
+                /* Only basic key types are copied over; others ignored
+                 */
+                if (inter_copy_one_( L2, 0 /*key*/, L, key_i, VT_KEY )) {
+                    /*
+                    * Contents of metatables are copied with cache checking;
+                    * important to detect loops.
+                    */
+                    if (inter_copy_one_( L2, L2_cache_i, L, val_i, VT_NORMAL )) {
+                        ASSERT_L( lua_istable(L2,-3) );
+                        lua_rawset( L2, -3 );    // add to table (pops key & val)
+                    } else {
+                        luaL_error( L, "Unable to copy over type '%s' (in %s)", 
+                                        luaG_typename(L,val_i), 
+                                        vt==VT_NORMAL ? "table":"metatable" );
+                    }
+                }
+                lua_pop( L, 1 );    // pop value (next round)
+            }
+          STACK_MID(L,0)
+          STACK_MID(L2,1)
+          
+            /* Metatables are expected to be immutable, and copied only once.
+            */
+            if (lua_getmetatable( L, i )) {
+                //
+                // L [-1]: metatable
+
+                uint_t mt_id= get_mt_id( L, -1 );    // Unique id for the metatable
+
+                STACK_GROW(L2,4);
+
+                push_registry_subtable( L2, REG_MTID );
+              STACK_MID(L2,2);
+                lua_pushinteger( L2, mt_id );
+                lua_rawget( L2, -2 );
+                    //
+                    // L2 ([-3]: copied table)
+                    //    [-2]: reg[REG_MTID]
+                    //    [-1]: nil/metatable pre-known in L2
+
+              STACK_MID(L2,3);
+
+                if (lua_isnil(L2,-1)) {   /* L2 did not know the metatable */
+                    lua_pop(L2,1);
+              STACK_MID(L2,2);
+ASSERT_L( lua_istable(L,-1) );
+                    if (inter_copy_one_( L2, L2_cache_i /*for function cacheing*/, L, lua_gettop(L) /*[-1]*/, VT_METATABLE )) {
+                        //
+                        // L2 ([-3]: copied table)
+                        //    [-2]: reg[REG_MTID]
+                        //    [-1]: metatable (copied from L)
+
+              STACK_MID(L2,3);
+                        // mt_id -> metatable
+                        //
+                        lua_pushinteger(L2,mt_id);
+                        lua_pushvalue(L2,-2);
+                        lua_rawset(L2,-4);
+
+                        // metatable -> mt_id
+                        //
+                        lua_pushvalue(L2,-1);
+                        lua_pushinteger(L2,mt_id);
+                        lua_rawset(L2,-4);
+                        
+              STACK_MID(L2,3);
+                    } else {
+                        luaL_error( L, "Error copying a metatable" );
+                    }
+              STACK_MID(L2,3);
+                }
+                    // L2 ([-3]: copied table)
+                    //    [-2]: reg[REG_MTID]
+                    //    [-1]: metatable (pre-known or copied from L)
+
+                lua_remove(L2,-2);   // take away 'reg[REG_MTID]'
+                    //
+                    // L2: ([-2]: copied table)
+                    //     [-1]: metatable for that table
+
+                lua_setmetatable( L2, -2 );
+                
+                // L2: [-1]: copied table (with metatable set if source had it)
+
+                lua_pop(L,1);   // remove source metatable (L, not L2!)
+            }
+          STACK_END(L2,1)
+          STACK_END(L,0)
+            } break;
+
+        /* The following types cannot be copied */
+
+        case LUA_TTHREAD: 
+            ret=FALSE; break;
+    }
+
+  STACK_END(L2, ret? 1:0)
+
+    return ret;
+}
+
+
+/*
+* Akin to 'lua_xmove' but copies values between _any_ Lua states.
+*
+* NOTE: Both the states must be solely in the current OS thread's posession.
+*
+* Note: Parameters are in this order ('L' = from first) to be same as 'lua_xmove'.
+*/
+void luaG_inter_copy( lua_State* L, lua_State *L2, uint_t n )
+{
+    uint_t top_L= lua_gettop(L);
+    uint_t top_L2= lua_gettop(L2);
+    uint_t i;
+
+    /* steal Lua library's 'luaB_tostring()' from the first call. Other calls
+    * don't have to have access to it.
+    *
+    * Note: multiple threads won't come here at once; this function will
+    *       be called before there can be multiple threads (no locking needed).
+    */
+    if (!hijacked_tostring) {
+        STACK_GROW( L,1 );
+        
+      STACK_CHECK(L)
+        lua_getglobal( L, "tostring" );
+            //
+            // [-1]: function|nil
+            
+        hijacked_tostring= lua_tocfunction( L, -1 );
+        lua_pop(L,1);
+      STACK_END(L,0)
+      
+        if (!hijacked_tostring) {
+            luaL_error( L, "Need to see 'tostring()' once" );
+        }
+    }
+
+    if (n > top_L) 
+        luaL_error( L, "Not enough values: %d < %d", top_L, n );
+
+    STACK_GROW( L2, n+1 );
+
+    /*
+    * Make a cache table for the duration of this copy. Collects tables and
+    * function entries, avoiding the same entries to be passed on as multiple
+    * copies. ESSENTIAL i.e. for handling upvalue tables in the right manner!
+    */
+    lua_newtable(L2);
+
+    for (i=top_L-n+1; i <= top_L; i++) {
+        if (!inter_copy_one_( L2, top_L2+1, L, i, VT_NORMAL )) {
+       
+            luaL_error( L, "Cannot copy type: %s", luaG_typename(L,i) );
+       }
+    }
+
+    /*
+    * Remove the cache table. Persistant caching would cause i.e. multiple 
+    * messages passed in the same table to use the same table also in receiving
+    * end.
+    */
+    lua_remove( L2, top_L2+1 );
+
+    ASSERT_L( (uint_t)lua_gettop(L) == top_L );
+    ASSERT_L( (uint_t)lua_gettop(L2) == top_L2+n );
+}
+
+
+void luaG_inter_move( lua_State* L, lua_State *L2, uint_t n )
+{
+    luaG_inter_copy( L, L2, n );
+    lua_pop( L,(int)n );
+}
diff --git a/src/tools.h b/src/tools.h
new file mode 100644
index 0000000..d155c65
--- /dev/null
+++ b/src/tools.h
@@ -0,0 +1,72 @@
+/*
+* TOOLS.H
+*/
+#ifndef TOOLS_H
+#define TOOLS_H
+
+#include "lua.h"
+#include "threading.h"
+    // MUTEX_T
+
+#include <assert.h>
+
+// Note: The < -10000 test is to leave registry/global/upvalue indices untouched
+//
+#define /*int*/ STACK_ABS(L,n) \
+	( ((n) >= 0 || (n) <= -10000) ? (n) : lua_gettop(L) +(n) +1 )
+
+#ifdef NDEBUG
+  #define _ASSERT_L(lua,c)  /*nothing*/
+  #define STACK_CHECK(L)    /*nothing*/
+  #define STACK_MID(L,c)    /*nothing*/
+  #define STACK_END(L,c)    /*nothing*/
+  #define STACK_DUMP(L)    /*nothing*/
+  #define DEBUG()   /*nothing*/
+#else
+  #define _ASSERT_L(lua,c)  { if (!(c)) luaL_error( lua, "ASSERT failed: %s:%d '%s'", __FILE__, __LINE__, #c ); }
+  //
+  #define STACK_CHECK(L)     { int _oldtop_##L = lua_gettop(L);
+  #define STACK_MID(L,change)  { int a= lua_gettop(L)-_oldtop_##L; int b= (change); \
+                               if (a != b) luaL_error( L, "STACK ASSERT failed (%d not %d): %s:%d", a, b, __FILE__, __LINE__ ); }
+  #define STACK_END(L,change)  STACK_MID(L,change) }
+
+  #define STACK_DUMP(L)    luaG_dump(L);
+  #define DEBUG()   fprintf( stderr, "<<%s %d>>\n", __FILE__, __LINE__ );
+#endif
+#define ASSERT_L(c) _ASSERT_L(L,c)
+
+#define STACK_GROW(L,n) { if (!lua_checkstack(L,n)) luaL_error( L, "Cannot grow stack!" ); }
+
+#define LUAG_FUNC( func_name ) static int LG_##func_name( lua_State *L )
+
+#define luaG_optunsigned(L,i,d) ((uint_t) luaL_optinteger(L,i,d))
+#define luaG_tounsigned(L,i) ((uint_t) lua_tointeger(L,i))
+
+#define luaG_isany(L,i)  (!lua_isnil(L,i))
+
+#define luaG_typename( L, index ) lua_typename( L, lua_type(L,index) )
+
+void luaG_dump( lua_State* L );
+
+const char *luaG_openlibs( lua_State *L, const char *libs );
+
+int luaG_deep_userdata( lua_State *L );
+void *luaG_todeep( lua_State *L, lua_CFunction idfunc, int index );
+
+typedef struct {
+    volatile int refcount;
+    void *deep;
+} DEEP_PRELUDE;
+
+void luaG_push_proxy( lua_State *L, lua_CFunction idfunc, DEEP_PRELUDE *deep_userdata );
+
+void luaG_inter_copy( lua_State *L, lua_State *L2, uint_t n );
+void luaG_inter_move( lua_State *L, lua_State *L2, uint_t n );
+
+// Lock for reference counter inc/dec locks (to be initialized by outside code)
+//
+extern MUTEX_T deep_lock;
+extern MUTEX_T mtid_lock;
+
+#endif
+    // TOOLS_H
diff --git a/tests/argtable.lua b/tests/argtable.lua
new file mode 100644
index 0000000..5ed5d4e
--- /dev/null
+++ b/tests/argtable.lua
@@ -0,0 +1,38 @@
+--
+-- ARGTABLE.LUA            Copyright (c) 2007, Asko Kauppi <akauppi@gmail.com>
+--
+-- Command line parameter parsing
+--
+-- NOTE: Wouldn't hurt having such a service built-in to Lua...? :P
+--
+
+local m= {}
+
+-- tbl= argtable(...)
+--
+-- Returns a table with 1..N indices being 'value' parameters, and any
+-- "-flag[=xxx]" or "--flag[=xxx]" parameters set to { flag=xxx/true }.
+--
+-- In other words, makes handling command line parameters simple. :)
+--
+--      15          -->     { 15 }
+--      -20         -->     { -20 }
+--      -a          -->     { ['a']=true }
+--      --some=15   -->     { ['some']=15 }
+--      --more=big  -->     { ['more']='big' }
+--
+function m.argtable(...)
+    local ret= {}
+    for i=1,select('#',...) do
+        local v= select(i,...)
+        local flag,val= string.match( v, "^%-+([^=]+)%=?(.*)" )
+        if flag and not tonumber(v) then
+            ret[flag]= (val=="") and true or tonumber(val) or val
+        else
+            table.insert( ret, v )  -- 1..N
+        end
+    end
+    return ret
+end
+
+return m
diff --git a/tests/assert.lua b/tests/assert.lua
new file mode 100644
index 0000000..85febfb
--- /dev/null
+++ b/tests/assert.lua
@@ -0,0 +1,318 @@
+--
+-- ASSERT.LUA                    Copyright (c) 2006-07, <akauppi@gmail.com>
+--
+-- Converting the Lua 'assert' function into a namespace table (without
+-- breaking compatibility with the basic 'assert()' calling).
+--
+-- This module allows shorthand use s.a. 'assert.table()' for asserting 
+-- variable types, and is also being used by Lua-super constraints system
+-- for testing function parameter & return types.
+--
+-- All in all, a worthy module and could be part of Lua future versions.
+--
+-- Note: the 'assert' table is available for your own assertions, too. Just add
+--       more functions s.a. 'assert.myobj()' to check for custom invariants. 
+--       They will then be available for the constraints check, too.
+--
+-- Author:  <akauppi@gmail.com>
+--
+--[[
+/******************************************************************************
+* Lua 5.1.1 support and extension functions (assert.lua)
+*
+* Copyright (C) 2006-07, Asko Kauppi.
+*
+* NOTE: This license concerns only the particular source file; not necessarily
+*       the project with which it has been delivered (the project may have a more
+*       restrictive license, s.a. [L]GPL).
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the 
+* "Software"), to deal in the Software without restriction, including   
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to   
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+]]--
+
+local m= { _info= { MODULE= "Assert.* functions for constraints, and unit testing",
+                    AUTHOR= "akauppi@gmail.com",
+                    VERSION= 20070603,    -- last change (yyyymmdd)
+                    LICENSE= "MIT/X11" } }
+
+-- Global changes:
+--      'assert' redefined, in a backwards compatible way
+--
+-- Module functions:
+--      none
+
+assert( type(assert) == "function" )    -- system assert function
+
+-----
+-- Integer range: INT_MIN..INT_MAX
+--
+local function try_maxint( n )
+    return (n > n-1) and n-1   -- false when outside the integer range 
+end
+
+local INT_MAX=
+    try_maxint( 2^64 ) or
+    try_maxint( 2^32 ) or
+    try_maxint( 2^24 ) or     -- float (24-bit mantissa)
+    assert( false )
+
+local INT_MIN= -(INT_MAX+1)
+
+
+---=== assert.*() ===---
+
+local at_msg= "type assertion error"  -- TBD: better messages, about that exact situation
+local av_msg= "value assertion error"
+
+-- void= _assert( val [, msg_str [, lev_uint]] )
+--
+local function _assert( cond, msg, lev ) 
+    -- original 'assert' provides no level override, so we use 'error'.
+    --
+    if not cond then
+        error( msg or "assertion failed!", (lev or 1)+1 )
+    end
+end
+
+-- Note: following code uses the _new_ 'assert()' by purpose, since it provides
+--       a level override (original doesn't)
+--
+local function assert_v( v0 )
+    return function(v,msg) 
+                _assert( v == v0, msg or av_msg, 2 )
+                return v
+           end
+end
+local function assert_t( str )
+    return function(v,msg) 
+                _assert( type(v) == str, msg or at_msg, 2 )
+                return v
+           end
+end
+local function assert_t2( str )
+    return function(v,subtype,msg) 
+                local t,st= type(v)
+                _assert( t==str and ((not subtype) or (st==subtype)),
+                         msg or at_msg, 2 )
+                return v
+           end
+end
+
+assert= 
+  {
+    __call= function(_,v,msg)     -- plain 'assert()' (compatibility)
+            if v then return v end
+            _assert( v, msg, 2 )
+        end,
+
+    -- Hopefully, Lua will allow use of 'nil', 'function' and other reserved words as table 
+    -- shortcuts in the future (5.1.1 does not). 
+    --
+    ["nil"]= assert_v( nil ),
+    boolean= assert_t "boolean",
+    table= assert_t2 "table",
+    ["function"]= assert_t "function",
+    userdata= assert_t2 "userdata",
+
+    string= function( v, msg )
+        local s= tostring(v)
+        _assert( s, msg or at_msg, 2 )
+        return v
+    end,
+
+    char= function( v, msg )
+        -- 'char' is _not_ doing int->string conversion
+        _assert( type(v)=="string" and v:len()==1, msg or at_msg, 2 )
+        return v
+    end,
+
+    number= function( v, msg )
+        _assert( tonumber(v), msg or at_msg, 2 )
+        return v
+    end,
+
+    int= function( v, msg )
+        local n= tonumber(v)
+        _assert( n and (n >= INT_MIN) and (n <= INT_MAX) and math.floor(n) == n,
+                    msg or at_msg, 2 )
+        return v
+    end,
+
+    uint= function( v, msg )
+        local n= tonumber(v)
+        -- unsigned integer upper range is the same as integers' (there's no
+        -- real unsigned within the Lua)
+        _assert( n and (n >= 0) and (n <= INT_MAX) and math.floor(n) == n,
+                    msg or at_msg, 2 )
+        return v
+    end,
+    
+    ['true']= assert_v( true ),
+    ['false']= assert_v( false ),
+
+    string_or_table= function( v, msg )
+        assert( tostring(v) or type(v)=="table", msg or at_msg, 2 )
+        return v
+    end,
+    
+    number_or_string= function( v, msg )
+        assert( tonumber(v) or type(v)=="table", msg or at_msg, 2 )
+        return v
+    end,
+
+    any= function( v, msg )
+        assert( v ~= nil, msg or av_msg, 2 )
+        return v
+    end,
+
+    -- Range assertion, with extra parameters per instance
+    -- 
+    -- Note: values may be of _any_ type that can do >=, <= comparisons.
+    --
+    range= function( lo, hi )
+        _assert( lo and hi and lo <= hi, "Bad limits", 2 )
+             -- make sure the limits make sense (just once)
+
+        return function(v,msg,lev)
+            if ( (lo and v<lo) or (hi and v>hi) ) then
+                msg= msg or "not in range: ("..(lo or "")..","..(hi or "")..")"
+                _assert( false, msg, 2 )
+            end
+            return v
+        end
+    end,
+    
+    -- Table contents assertion
+    --      - no unknown (non-numeric) keys are allowed
+    --      - numeric keys are ignored
+    --
+    -- Constraints patch should point to this, when using the ":{ ... }" constraint.
+    -- 
+    ["{}"]= function( tbl )
+
+        -- check all keys in 't' (including numeric, if any) that they do exist,
+        -- and carry the right type
+        --
+        local function subf1(v,t,msg,lev)
+            _assert(lev)
+            for k,f in pairs(t) do
+                -- 'f' is an assert function, or subtable
+                local ft= type(f)
+                if ft=="function" then
+                    f( v[k], msg, lev+1 )
+                elseif ft=="table" then
+                    _assert( type(v[k])=="table", msg or "no subtable "..tostring(k), lev+1 )
+                    subf1( v[k], f, msg, lev+1 )
+                else
+                    error( "Bad constraints table for '"..tostring(k).."'! (not a function)", lev+1 )
+                end
+            end
+        end
+                        
+        -- check there are no other (non-numeric) keys in 'v'
+        local function subf2(v,t,msg,lev)
+            _assert(lev)
+            for k,vv in pairs(v) do
+                if type(k)=="number" then
+                    -- skip them
+                elseif not t[k] then
+                    _assert( false, msg or "extra field: '"..tostring(k).."'", lev+1 )
+                elseif type(vv)=="table" then
+                    subf2( vv, t[k], msg, lev+1 )
+                end
+            end
+        end
+        
+        _assert( type(tbl)=="table", "Wrong parameter to assert['{}']" )
+
+        return function( v, msg, lev )
+            lev= (lev or 1)+1
+            _assert( type(v)=="table" ,msg, lev )
+            subf1( v, tbl, msg, lev )
+            subf2( v, tbl, msg, lev )
+            return v
+        end
+    end,
+
+    -- ...
+}
+setmetatable( assert, assert )
+
+assert.void= assert["nil"]
+
+
+-----    
+-- void= assert.fails( function [,err_msg_str] )
+--
+-- Special assert function, to make sure the call within it fails, and gives a 
+-- specific error message (to be used in unit testing).
+--
+function assert.fails( func_block, err_msg )
+    --
+    local st,err= pcall( func_block )
+    if st then
+        _assert( false, "Block expected to fail, but didn't.", 2 )
+    elseif err_msg and err ~= err_msg then
+        _assert( false, "Failed with wrong error message: \n"..
+                       "'"..err.."'\nexpected: '"..err_msg.."'", 2 )
+    end
+end
+
+
+-----    
+-- void= assert.failsnot( function [,err_msg_str] )
+--
+-- Similar to 'assert.fails' but expects the code to survive.
+--
+function assert.failsnot( func_block, err_msg )
+    --
+    local st,err= pcall( func_block )
+    if not st then
+        _assert( false, "Block expected NOT to fail, but did."..
+                        (err and "\n\tError: '"..err.."'" or ""), 2 )
+    end
+end
+
+
+-----    
+-- void= assert.nilerr( function [,err_msg_str] )
+--
+-- Expects the function to return with 'nil,err' failure code, with
+-- optionally error string matching. Similar to --> 'assert.fails()'
+--
+function assert.nilerr( func_block, err_msg )
+    --
+    local v,err= func_block()
+    _assert( v==nil, "Expected to return nil, but didn't: "..tostring(v), 2 )
+    if err_msg and err ~= err_msg then
+        _assert( false, "Failed with wrong error message: \n"..
+                       "'"..err.."'\nexpected: '"..err_msg.."'", 2 )
+    end
+end
+
+
+-- Sanity check
+--
+assert( true )
+assert.fails( function() assert( false ) end )
+assert.fails( function() assert( nil ) end )
+
+
+return m    -- just info
diff --git a/tests/atomic.lua b/tests/atomic.lua
new file mode 100644
index 0000000..a027453
--- /dev/null
+++ b/tests/atomic.lua
@@ -0,0 +1,18 @@
+--
+-- ATOMIC.LUA
+--
+-- Test program for Lua Lanes
+--
+
+require "lanes"
+
+local linda= lanes.linda()
+local key= "$"
+
+local f= lanes.genatomic( linda, key, 5 )
+
+local v
+v= f(); print(v); assert(v==6)
+v= f(-0.5); print(v); assert(v==5.5)
+
+v= f(-10); print(v); assert(v==-4.5)
diff --git a/tests/basic.lua b/tests/basic.lua
new file mode 100644
index 0000000..ee31ed1
--- /dev/null
+++ b/tests/basic.lua
@@ -0,0 +1,331 @@
+--
+-- BASIC.LUA           Copyright (c) 2007-08, Asko Kauppi <akauppi@gmail.com>
+--
+-- Selftests for Lua Lanes
+-- 
+-- To do:
+--      - ...
+--
+
+require "lanes"
+require "assert"    -- assert.fails()
+
+local lanes_gen=    assert( lanes.gen )
+local lanes_linda=  assert( lanes.linda )
+
+local tostring=     assert( tostring )
+
+local function PRINT(...)
+    local str=""
+    for i=1,select('#',...) do
+        str= str..tostring(select(i,...)).."\t"
+    end
+    if io then 
+        io.stderr:write(str.."\n")
+    end
+end
+
+
+---=== Local helpers ===---
+
+local tables_match
+
+-- true if 'a' is a subtable of 'b'
+--
+local function subtable( a, b )
+    --
+    assert( type(a)=="table" and type(b)=="table" )
+
+    for k,v in pairs(b) do
+        if type(v)~=type(a[k]) then
+            return false    -- not subtable (different types, or missing key)
+        elseif type(v)=="table" then
+            if not tables_match(v,a[k]) then return false end
+        else
+            if a[k] ~= v then return false end
+        end
+    end
+    return true     -- is a subtable
+end
+
+-- true when contents of 'a' and 'b' are identific
+--
+tables_match= function( a, b )
+    return subtable( a, b ) and subtable( b, a )
+end
+
+
+---=== Tasking (basic) ===---
+
+local function task( a, b, c )
+    --error "111"     -- testing error messages
+    assert(hey)
+    local v=0
+    for i=a,b,c do
+        v= v+i
+    end
+    return v, hey
+end
+
+local task_launch= lanes_gen( "", { globals={hey=true} }, task )
+	-- base stdlibs, normal priority
+
+-- 'task_launch' is a factory of multithreaded tasks, we can launch several:
+
+local lane1= task_launch( 100,200,3 )
+local lane2= task_launch( 200,300,4 )
+
+-- At this stage, states may be "pending", "running" or "done"
+
+local st1,st2= lane1.status, lane2.status
+PRINT(st1,st2)
+assert( st1=="pending" or st1=="running" or st1=="done" )
+assert( st2=="pending" or st2=="running" or st2=="done" )
+
+-- Accessing results ([1..N]) pends until they are available
+--
+PRINT("waiting...")
+local v1, v1_hey= lane1[1], lane1[2]
+local v2, v2_hey= lane2[1], lane2[2]
+
+PRINT( v1, v1_hey )
+assert( v1_hey == true )
+
+PRINT( v2, v2_hey )
+assert( v2_hey == true )
+
+assert( lane1.status == "done" )
+assert( lane1.status == "done" )
+
+
+---=== Tasking (cancelling) ===---
+
+local task_launch2= lanes_gen( "", { cancelstep=100, globals={hey=true} }, task )
+
+local N=999999999
+local lane9= task_launch2(1,N,1)   -- huuuuuuge...
+
+-- Wait until state changes "pending"->"running"
+--
+local st
+local t0= os.time()
+while os.time()-t0 < 5 do
+    st= lane9.status
+    io.stderr:write( (i==1) and st.." " or '.' )
+    if st~="pending" then break end
+end
+PRINT(" "..st)
+
+if st=="error" then
+    local _= lane9[0]  -- propagate the error here
+end
+if st=="done" then
+    error( "Looping to "..N.." was not long enough (cannot test cancellation)" )
+end
+assert( st=="running" )
+
+lane9:cancel()
+
+local t0= os.time()
+while os.time()-t0 < 5 do
+    st= lane9.status
+    io.stderr:write( (i==1) and st.." " or '.' )
+    if st~="running" then break end
+end
+PRINT(" "..st)
+assert( st == "cancelled" )
+
+
+---=== Communications ===---
+
+local function WR(...) io.stderr:write(...) end
+
+local chunk= function( linda )
+
+    local function receive() return linda:receive( "->" ) end
+    local function send(...) linda:send( "<-", ... ) end
+
+    WR( "Lane starts!\n" )
+
+    local v
+    v=receive(); WR( v.." received\n" ); assert( v==1 )
+    v=receive(); WR( v.." received\n" ); assert( v==2 )
+    v=receive(); WR( v.." received\n" ); assert( v==3 )
+
+    send( 1,2,3 );              WR( "1,2,3 sent\n" )
+    send 'a';                   WR( "'a' sent\n" )
+    send { 'a', 'b', 'c', d=10 }; WR( "{'a','b','c',d=10} sent\n" )
+
+    v=receive(); WR( v.." received\n" ); assert( v==4 )
+        
+    WR( "Lane ends!\n" )
+end
+
+local linda= lanes_linda()
+assert( type(linda) == "userdata" )
+    --
+    -- ["->"] master -> slave
+    -- ["<-"] slave <- master
+
+local function PEEK() return linda:get("<-") end
+local function SEND(...) linda:send( "->", ... ) end
+local function RECEIVE() return linda:receive( "<-" ) end
+
+local t= lanes_gen("io",chunk)(linda)     -- prepare & launch
+
+SEND(1);  WR( "1 sent\n" )
+SEND(2);  WR( "2 sent\n" )
+for i=1,100 do
+    WR "."
+    assert( PEEK() == nil )    -- nothing coming in, yet
+end
+SEND(3);  WR( "3 sent\n" )
+
+local a,b,c= RECEIVE(), RECEIVE(), RECEIVE()
+    WR( a..", "..b..", "..c.." received\n" )
+assert( a==1 and b==2 and c==3 )
+
+local a= RECEIVE();   WR( a.." received\n" )
+assert( a=='a' )
+
+local a= RECEIVE();   WR( type(a).." received\n" )
+assert( tables_match( a, {'a','b','c',d=10} ) )
+
+assert( PEEK() == nil )
+SEND(4)
+
+
+---=== Stdlib naming ===---
+
+local function io_os_f()
+    assert(io)
+    assert(os)
+    assert(print)
+    return true
+end
+
+local f1= lanes_gen( "io,os", io_os_f )     -- any delimiter will do
+local f2= lanes_gen( "io+os", io_os_f )
+local f3= lanes_gen( "io,os,base", io_os_f )
+
+assert.fails( function() lanes_gen( "xxx", io_os_f ) end )
+
+assert( f1()[1] )
+assert( f2()[1] )
+assert( f3()[1] )
+
+
+---=== Comms criss cross ===---
+
+-- We make two identical lanes, which are using the same Linda channel.
+--
+local tc= lanes_gen( "io",
+  function( linda, ch_in, ch_out )
+
+    local function STAGE(str)
+        io.stderr:write( ch_in..": "..str.."\n" )
+        linda:send( nil, ch_out, str )
+        local v= linda:receive( nil, ch_in )
+        assert(v==str)
+    end
+    STAGE("Hello")
+    STAGE("I was here first!")
+    STAGE("So waht?")
+  end
+)
+
+local linda= lanes_linda()
+
+local a,b= tc(linda, "A","B"), tc(linda, "B","A")   -- launching two lanes, twisted comms
+
+local _= a[1],b[1]  -- waits until they are both ready
+
+
+---=== Receive & send of code ===---
+
+local upvalue="123"
+
+local function chunk2( linda )
+    assert( upvalue=="123" )    -- even when running as separate thread
+
+    -- function name & line number should be there even as separate thread
+    --
+    local info= debug.getinfo(1)    -- 1 = us
+        --
+        for k,v in pairs(info) do PRINT(k,v) end
+
+        assert( info.nups == 2 )    -- one upvalue + PRINT
+        assert( info.what == "Lua" )
+        
+        --assert( info.name == "chunk2" )   -- name does not seem to come through
+        assert( string.match( info.source, "^@tests[/\\]basic.lua$" ) )
+        assert( string.match( info.short_src, "^tests[/\\]basic.lua$" ) )
+        
+        -- These vary so let's not be picky (they're there..)
+        --
+        assert( info.linedefined > 200 )   -- start of 'chunk2'
+        assert( info.currentline > info.linedefined )   -- line of 'debug.getinfo'
+        assert( info.lastlinedefined > info.currentline )   -- end of 'chunk2'
+
+    local func,k= linda:receive( "down" )
+    assert( type(func)=="function" )
+    assert( k=="down" )
+
+    func(linda)
+
+    local str= linda:receive( "down" )
+    assert( str=="ok" )
+    
+    linda:send( "up", function() return ":)" end, "ok2" )
+end
+
+local linda= lanes.linda()
+
+local t2= lanes_gen( "debug,string", chunk2 )(linda)     -- prepare & launch
+
+linda:send( "down", function(linda) linda:send( "up", "ready!" ) end,
+                    "ok" )
+
+-- wait to see if the tiny function gets executed
+--
+local s= linda:receive( "up" )
+PRINT(s)
+assert( s=="ready!" )
+
+-- returns of the 'chunk2' itself
+--
+local f= linda:receive( "up" )
+assert( type(f)=="function" )
+
+local s2= f()
+assert( s2==":)" )
+
+local ok2= linda:receive( "up" )
+assert( ok2 == "ok2" )
+
+
+---=== :join test ===---
+
+-- NOTE: 'unpack()' cannot be used on the lane handle; it will always return nil
+--       (unless [1..n] has been read earlier, in which case it would seemingly
+--       work).
+
+local S= lanes_gen( "table",
+  function(arg)
+    aux= {}
+    for i, v in ipairs(arg) do
+	   table.insert (aux, 1, v)
+    end
+    return unpack(aux)
+end )
+
+h= S { 12, 13, 14 }     -- execution starts, h[1..3] will get the return values
+
+local a,b,c,d= h:join()
+assert(a==14)
+assert(b==13)
+assert(c==12)
+assert(d==nil)
+
+--
+io.stderr:write "Done! :)\n"
diff --git a/tests/cyclic.lua b/tests/cyclic.lua
new file mode 100644
index 0000000..06452bd
--- /dev/null
+++ b/tests/cyclic.lua
@@ -0,0 +1,64 @@
+--
+-- CYCLIC.LUA
+--
+-- Test program for Lua Lanes
+--
+
+require "lanes"
+
+local table_concat= assert(table.concat)
+
+local function WR(str,...)
+    for i=1,select('#',...) do
+        str= str.."\t"..tostring( select(i,...) )
+    end
+    io.stderr:write( str..'\n' )
+end
+
+local function same(k,l)
+    return k==l and "same" or ("NOT SAME: "..k.." "..l)
+end
+
+local a= {}
+local b= {a}
+a[1]= b
+
+-- Getting the tables as upvalues should still have the <-> linkage
+--
+local function lane1()
+    WR( "Via upvalue: ", same(a,b[1]), same(a[1],b) )
+    assert( a[1]==b )
+    assert( b[1]==a )
+end
+local L1= lanes.gen( "io", lane1 )()
+    -- ...running
+
+-- Getting the tables as parameters should also keep the linkage
+--
+local function lane2( aa, bb )
+    WR( "Via parameters:", same(aa,bb[1]), same(aa[1],bb) )
+    assert( aa[1]==bb )
+    assert( bb[1]==aa )
+end
+local L2= lanes.gen( "io", lane2 )( a, b )
+    -- ...running
+
+-- Really unnecessary, but let's try a directly recursive table
+--
+c= {}
+c.a= c
+
+local function lane3( cc )
+    WR( "Directly recursive: ", same(cc, cc.a) )
+    assert( cc and cc.a==cc )
+end
+local L3= lanes.gen("io", lane3)(c)
+
+-- Without a wait, exit from the main lane will close the process
+--
+-- Waiting for multiple lanes at once could be done using a Linda
+-- (but we're okay waiting them in order)
+--
+L1:join()
+L2:join()
+L3:join()
diff --git a/tests/ehynes.lua b/tests/ehynes.lua
new file mode 100644
index 0000000..4cc370e
--- /dev/null
+++ b/tests/ehynes.lua
@@ -0,0 +1,52 @@
+--
+-- Test from <ehynes at dharmagaia.com>
+--
+require 'lanes'
+
+local function PRINT_FMT( fmt, ... )
+    io.stderr:write( string.format(fmt,...).."\n" )
+end
+
+-- a linda for sending messages
+local linda = lanes.linda()
+
+-- a linda message receiver
+local receiver_gen = lanes.gen( 'base', 'os', 'string', 'io',
+    function (message_name)
+        PRINT_FMT( 'receiver for message %s entered', message_name )
+        local n = 1
+        while linda:receive(message_name) do
+            PRINT_FMT( '%s %d receieved', message_name, n )
+            n = n + 1
+        end
+    end
+)
+
+-- create a receiver
+local receiver1 = receiver_gen('message')
+
+-- create a second receiver (a second receiver in the same linda
+-- appears to be needed to trigger the delays)
+--
+-- AKa 4-Aug-2008: No, with svn version it isn't. But it causes the 2nd
+--                 message to be hanging...
+--
+local receiver2 = receiver_gen('another message')
+
+-- a function to pause and log the execution for debugging
+local function logf(s, f, ...)
+    os.execute('sleep 1')
+    PRINT_FMT( "*** %s", s )
+    f(...)
+end
+
+-- first message sent is received right away
+logf('first message sent', linda.send, linda, 'message', true)
+
+-- second message sent is not received immediatly
+logf('second message sent', linda.send, linda, 'message', true)
+
+-- third message sent triggers receipt of both second and third messages
+logf('third message sent', linda.send, linda, 'message', true)
+
+logf('all done', function() end)
diff --git a/tests/error.lua b/tests/error.lua
new file mode 100644
index 0000000..673bcb5
--- /dev/null
+++ b/tests/error.lua
@@ -0,0 +1,47 @@
+--
+-- Error reporting
+--
+-- Note: this code is supposed to end in errors; not included in 'make test'
+--
+
+require "lanes"
+
+local function lane()
+
+    local subf= function()  -- this so that we can see the call stack
+        --error "aa"
+        error({})
+        error(error)
+    end
+    local subf2= function()
+        subf()
+    end
+    subf2()
+end
+
+local function cleanup(err)
+end
+
+local lgen = lanes.gen("*", { --[[finalizer=cleanup]] }, lane)
+
+---
+io.stderr:write( "\n** Error catching **\n" )
+--
+local h= lgen()
+local _,err,stack= h:join()   -- wait for the lane (no automatic error propagation)
+
+if err then
+    assert( type(stack)=="table" )
+    io.stderr:write( "Lane error: "..tostring(err).."\n" )
+
+    io.stderr:write( "\t", table.concat(stack,"\n\t"), "\n" );
+end
+
+---
+io.stderr:write( "\n** Error propagation **\n" )
+--
+local h2= lgen()
+local _= h2[0]
+assert(false)   -- does NOT get here
+
+--never ends
diff --git a/tests/fibonacci.lua b/tests/fibonacci.lua
new file mode 100644
index 0000000..8867e14
--- /dev/null
+++ b/tests/fibonacci.lua
@@ -0,0 +1,75 @@
+--
+-- FIBONACCI.LUA         Copyright (c) 2007-08, Asko Kauppi <akauppi@gmail.com>
+--
+-- Parallel calculation of Fibonacci numbers
+--
+-- A sample of task splitting like Intel TBB library does.
+-- 
+-- References:
+--      Intel Threading Building Blocks, 'test all'
+--      <http://shareit.intel.com/WikiHome/Articles/111111316>
+--
+
+-- Need to say it's 'local' so it can be an upvalue
+--
+local lanes= require "lanes"
+
+local function WR(str)
+    io.stderr:write( str.."\n" )
+end
+
+-- Threshold for serial calculation (optimal depends on multithreading fixed
+-- cost and is system specific)
+--
+local KNOWN= { [0]=0, 1,1,2,3,5,8,13,21,34,55,89,144 }
+
+--
+-- uint= fib( n_uint )
+--
+local function fib( n )
+    --
+    local sum
+    local floor= assert(math.floor)
+
+    WR( "fib("..n..")" )
+
+    if n <= #KNOWN then
+        sum= KNOWN[n]
+    else
+        -- Splits into two; this task remains waiting for the results
+        --
+        local gen_f= lanes.gen( "io,math,debug", fib )
+
+        local n1=floor(n/2) +1
+        local n2=floor(n/2) -1 + n%2
+
+        WR( "splitting "..n.." -> "..n1.." "..n2 )
+
+        local a= gen_f( n1 )
+        local b= gen_f( n2 )
+
+        -- children running...
+
+        local a2= a[1]^2
+        local b2= b[1]^2
+
+        sum = (n%2==1) and a2+b2 or a2-b2
+    end
+
+    io.stderr:write( "fib("..n..") = "..sum.."\n" )
+    
+    return sum
+end
+
+--
+-- Right answers from: <http://sonic.net/~douglasi/fibo.htm>
+--
+local right= { 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, 86267571272, 139583862445, 225851433717, 365435296162, 591286729879, 956722026041, 1548008755920, 2504730781961, 4052739537881, 6557470319842, 10610209857723, 17167680177565, 27777890035288, 44945570212853, 72723460248141, 117669030460994, 190392490709135, 308061521170129, 498454011879264, 806515533049393, 1304969544928657, 2111485077978050, 3416454622906707, 5527939700884757, 8944394323791464, 14472334024676220, 23416728348467684, 37889062373143900, 61305790721611580, 99194853094755490, 160500643816367070, 259695496911122560, 420196140727489660, 679891637638612200, 1100087778366101900, 1779979416004714000, 2880067194370816000, 4660046610375530000, 7540113804746346000, 12200160415121877000, 19740274219868226000, 31940434634990105000, 51680708854858334000, 83621143489848440000, 135301852344706780000, 218922995834555200000
+}
+assert( #right==99 )
+
+local N= 80
+local res= fib(N)
+print( right[N] )
+assert( res==right[N] )
+
diff --git a/tests/fifo.lua b/tests/fifo.lua
new file mode 100644
index 0000000..898b04d
--- /dev/null
+++ b/tests/fifo.lua
@@ -0,0 +1,43 @@
+--
+-- FIFO.LUA
+--
+-- Sample program for Lua Lanes
+--
+
+require "lanes"
+
+local linda= lanes.linda()
+local atomic_inc= lanes.genatomic( linda, "FIFO_n" )
+
+assert( atomic_inc()==1 )
+assert( atomic_inc()==2 )
+
+local function FIFO()
+    local my_channel= "FIFO"..atomic_inc()
+
+    return {
+        -- Giving explicit 'nil' timeout allows numbers to be used as 'my_channel'
+        --
+        send= function(...) linda:send( nil, my_channel, ... ) end,
+        receive= function(timeout) linda:receive( timeout, my_channel ) end
+    }
+end
+
+local A= FIFO()
+local B= FIFO()
+
+print "Sending to A.."
+A:send( 1,2,3,4,5 )
+
+print "Sending to B.."
+B:send( 'a','b','c' )
+
+print "Reading A.."
+print( A:receive( 1.0 ) )
+
+print "Reading B.."
+print( B:receive( 2.0 ) )
+
+-- Note: A and B can be passed between threads, or used as upvalues
+--       by multiple threads (other parts will be copied but the 'linda'
+--       handle is shared userdata and will thus point to the single place)
diff --git a/tests/finalizer.lua b/tests/finalizer.lua
new file mode 100644
index 0000000..c94b36d
--- /dev/null
+++ b/tests/finalizer.lua
@@ -0,0 +1,81 @@
+--
+-- Test resource cleanup
+--
+-- This feature was ... by discussion on the Lua list about exceptions.
+-- The idea is to always run a certain block at exit, whether due to success
+-- or error. Normally, 'pcall' would be used for such but as Lua already
+-- does that, simply giving a 'cleanup=function' parameter is a logical
+-- thing to do.     -- AKa 22-Jan-2009
+--
+
+require "lanes"
+
+local FN= "finalizer-test.tmp"
+
+local cleanup
+
+local which= os.time() % 2  -- 0/1
+
+local function lane()
+
+    set_finalizer(cleanup)
+
+    local f,err= io.open(FN,"w")
+    if not f then
+        error( "Could not create "..FN..": "..err )
+    end
+
+    f:write( "Test file that should get removed." )
+
+    io.stderr:write( "File "..FN.." created\n" )    
+
+    if which==0 then
+        error("aa")    -- exception here; the value needs NOT be a string
+    end
+
+    -- no exception
+end
+
+-- 
+-- This is called at the end of the lane; whether succesful or not.
+-- Gets the 'error()' parameter as parameter ('nil' if normal return).
+--
+cleanup= function(err)
+
+    -- An error in finalizer will override an error (or success) in the main
+    -- chunk.
+    --
+    --error( "This is important!" )
+
+    if err then
+        io.stderr:write( "Cleanup after error: "..tostring(err).."\n" )
+    else
+        io.stderr:write( "Cleanup after normal return\n" )
+    end
+        
+    local _,err2= os.remove(FN)
+    assert(not err2)    -- if this fails, it will be shown in the calling script
+                        -- as an error from the lane itself
+    
+    io.stderr:write( "Removed file "..FN.."\n" )
+end
+
+local lgen = lanes.gen("*", lane)
+
+io.stderr:write "Launching the lane!\n"
+
+local h= lgen()
+
+local _,err,stack= h:join()   -- wait for the lane (no automatic error propagation)
+if err then
+    assert(stack)
+    io.stderr:write( "Lane error: "..tostring(err).."\n" )
+    io.stderr:write( "\t", table.concat(stack,"\t\n"), "\n" )
+end
+
+local f= io.open(FN,"r")
+if f then
+    error( "CLEANUP DID NOT WORK: "..FN.." still exists!" )
+end
+
+io.stderr:write "Finished!\n"
diff --git a/tests/hangtest.lua b/tests/hangtest.lua
new file mode 100644
index 0000000..d0bbea4
--- /dev/null
+++ b/tests/hangtest.lua
@@ -0,0 +1,26 @@
+--
+-- Test case for hang on [1]s and :join()s.
+--
+
+require "lanes"
+
+local function ret(b)
+    return b
+end
+local lgen = lanes.gen("*", {}, ret)
+
+for i=1,10000 do
+    local ln = lgen(i)
+
+    print( "getting result for "..i )
+
+    -- Hangs here forever every few hundred runs or so,
+    -- can be illustrated by putting another print() statement
+    -- after, which will never be called.
+    --
+    local result = ln[1];
+
+    assert (result == i);
+end
+
+print "Finished!"
diff --git a/tests/irayo_closure.lua b/tests/irayo_closure.lua
new file mode 100644
index 0000000..faf08fd
--- /dev/null
+++ b/tests/irayo_closure.lua
@@ -0,0 +1,35 @@
+--
+-- Bugs filed by irayo Jul-2008
+--
+--[[
+"Another issue I've noticed is trying to pass a table with a function
+that uses closures in it as a global variable into a new lane.  This
+causes a segmentation fault and it appears to be related to the
+luaG_inter_move function near line 835-836 or so in lanes.c, but I
+haven't investigated further.
+e.g. { globals = { data = 1, func = function() useclosurehere() end } }"
+]]
+
+require "lanes"
+
+local function testrun()
+    assert( print )
+    assert( data==1 )
+    assert( type(func)=="function" )
+    func()  -- "using the closure"
+    return true
+end
+
+-- Should also work without these lines, but currently doesn't (bug in Lanes,
+-- a function thrown over isn't connected to receiving lane's globals)
+--
+--local print=print
+--local assert=assert
+
+local function useclosurehere()
+    assert( print )
+    print "using the closure" 
+end
+
+local lane= lanes.gen( "", { globals = { data=1, func=useclosurehere } }, testrun )()
+print(lane[1])
diff --git a/tests/irayo_recursive.lua b/tests/irayo_recursive.lua
new file mode 100644
index 0000000..82e5a54
--- /dev/null
+++ b/tests/irayo_recursive.lua
@@ -0,0 +1,18 @@
+--
+-- Bugs filed by irayo Jul-2008
+--
+--[[
+This code showed lack of caching 'select', 'type' etc. in 'src/lanes.lua'.
+]]
+local function recurse()
+    print("level "..i);
+    if i > 10 then return "finished" end
+
+    require "lanes"
+
+    local lane = lanes.gen( "*", { globals = { ["i"]= i + 1 } }, recurse ) ()
+    return lane[1]
+end
+
+i = 0;
+recurse()
diff --git a/tests/keeper.lua b/tests/keeper.lua
new file mode 100644
index 0000000..5c8c23a
--- /dev/null
+++ b/tests/keeper.lua
@@ -0,0 +1,47 @@
+--
+-- KEEPER.LUA
+--
+-- Test program for Lua Lanes
+--
+
+require "lanes"
+
+local function keeper(linda)
+    local mt= {
+        __index= function( _, key )
+            return linda:get( key )
+        end,
+        __newindex= function( _, key, val ) 
+            linda:set( key, val )
+        end
+    }
+    return setmetatable( {}, mt )
+end
+
+--
+local lindaA= lanes.linda()
+local A= keeper( lindaA )
+
+local lindaB= lanes.linda()
+local B= keeper( lindaB )
+
+A.some= 1
+print( A.some )
+assert( A.some==1 )
+
+B.some= "hoo"
+assert( B.some=="hoo" )
+assert( A.some==1 )
+
+function lane()
+    local a= keeper(lindaA)
+    print( a.some )
+    assert( a.some==1 )
+    a.some= 2
+end
+
+local h= lanes.gen( "io", lane )()
+h:join()
+
+print( A.some )     -- 2
+assert( A.some==2 )
diff --git a/tests/launchtest.lua b/tests/launchtest.lua
new file mode 100644
index 0000000..5e3037f
--- /dev/null
+++ b/tests/launchtest.lua
@@ -0,0 +1,78 @@
+--
+-- LAUNCHTEST.LUA       Copyright (c) 2007-08, Asko Kauppi <akauppi@gmail.com>
+--
+-- Tests launching speed of N threads
+--
+-- Usage:
+--      [time] lua -lstrict launchtest.lua [threads] [-libs[=io,os,math,...]]
+--
+--      threads: number of threads to launch (like: 2000) :)
+--      libs: combination of "os","io","math","package", ...
+--            just "-libs" for all libraries
+--
+-- Note:
+--      One _can_ reach the system threading level, ie. doing 10000 on 
+--      PowerBook G4:
+--      <<
+--          pthread_create( ref, &a, lane_main, data ) failed @ line 316: 35 
+--          Command terminated abnormally.
+--      <<
+--
+--      (Lua Lanes _can_ be made tolerable to such congestion cases. Just
+--       currently, it is not. btw, 5000 seems to run okay - system limit
+--       being 2040 simultaneous threads)
+--
+-- To do:
+--      - ...
+--
+
+local N= 1000   -- threads/loops to use
+local M= 1000   -- sieves from 1..M
+local LIBS= nil -- default: load no libraries
+
+local function HELP()
+    io.stderr:write( "Usage: lua launchtest.lua [threads] [-libs[=io,os,math,...]]\n" )
+    exit(1)
+end
+
+local m= require "argtable"
+local argtable= assert(m.argtable)
+
+for k,v in pairs( argtable(...) ) do
+    if k==1 then            N= tonumber(v) or HELP()
+    elseif k=="libs" then   LIBS= (v==true) and "*" or v
+    else                    HELP()
+    end
+end
+
+require "lanes"
+
+local g= lanes.gen( LIBS, function(i) 
+                        --io.stderr:write( i.."\t" )
+                        return i 
+                    end )
+
+local t= {}
+
+for i=1,N do
+    t[i]= g(i)
+end
+
+if false then
+    -- just finish here, without waiting for threads to finish
+    --
+    local st= t[N].status
+    print(st)   -- if that is "done", they flew already! :)
+else
+    -- mark that all have been launched, now wait for them to return
+    --
+    io.stderr:write( N.." lanes launched.\n" )
+    
+    for i=1,N do
+        local rc= t[i]:join()
+        assert( rc==i )
+    end
+
+    io.stderr:write( N.." lanes finished.\n" )
+end
+
diff --git a/tests/objects.lua b/tests/objects.lua
new file mode 100644
index 0000000..8f56a5f
--- /dev/null
+++ b/tests/objects.lua
@@ -0,0 +1,76 @@
+--
+-- OBJECTS.LUA
+--
+-- Tests that objects (metatables) can be passed between lanes.
+--
+
+require "lanes"
+
+local linda= lanes.linda()
+
+local start_lane= lanes.gen( "io", 
+    function( obj1 )
+
+        assert( obj1.v )
+        assert( obj1.print )
+
+        assert( obj1 )
+        local mt1= getmetatable(obj1)
+        assert(mt1)
+    
+        local obj2= linda:receive("")
+        assert( obj2 )
+        local mt2= getmetatable(obj2)
+        assert(mt2)
+        assert( mt1==mt2 )
+        
+        local v= obj1:print()
+        assert( v=="aaa" )
+    
+        v= obj2:print()    
+        assert( v=="bbb" )
+    
+        return true
+    end
+)
+
+
+local WR= function(str)
+    io.stderr:write( tostring(str).."\n")
+end
+
+
+-- Lanes identifies metatables and copies them only once per each lane.
+--
+-- Having methods in the metatable will make passing objects lighter than
+-- having the methods 'fixed' in the object tables themselves.
+--
+local o_mt= {
+    __index= function( me, key )
+        if key=="print" then
+            return function() WR(me.v); return me.v end
+        end
+    end
+}
+
+local function obj_gen(v)
+    local o= { v=v }
+    setmetatable( o, o_mt )
+    return o
+end
+
+local a= obj_gen("aaa")
+local b= obj_gen("bbb")
+
+assert( a and b )
+
+local mt_a= getmetatable(a)
+local mt_b= getmetatable(b)
+assert( mt_a and mt_a==mt_b )
+
+local h= start_lane(a)  -- 1st object as parameter
+
+linda:send( "", b )    -- 2nd object via Linda
+
+assert( h[1]==true )    -- wait for return
+
diff --git a/tests/perftest.lua b/tests/perftest.lua
new file mode 100644
index 0000000..8ce1b3c
--- /dev/null
+++ b/tests/perftest.lua
@@ -0,0 +1,184 @@
+--
+-- PERFTEST.LUA         Copyright (c) 2007-08, Asko Kauppi <akauppi@gmail.com>
+--
+-- Performance comparison of multithreaded Lua (= how much ballast does using
+-- Lua Lanes introduce)
+--
+-- Usage:
+--      [time] lua -lstrict perftest.lua [threads] [-plain|-single[=2..n]] [-time] [-prio[=-2..+2[,-2..+2]]]
+--
+--      threads: number of threads to launch (loops in 'plain' mode)
+--      -plain: runs in nonthreaded mode, to get a comparison baseline
+--      -single: runs using just a single CPU core (or 'n' cores if given)
+--      -prio: sets odd numbered threads to higher/lower priority
+--
+-- History:
+--      AKa 20-Jul-08: updated to Lanes 2008
+--      AK 14-Apr-07: works on Win32
+--
+-- To do:
+--      (none?)
+--
+
+-- On MSYS, stderr is buffered. In this test it matters.
+-- Seems, even with this MSYS wants to buffer linewise, needing '\n'
+-- before actual output.
+--
+local MSYS= os.getenv("OSTYPE")=="msys"
+
+
+require "lanes"
+
+local m= require "argtable"
+local argtable= assert( m.argtable )
+
+local N= 1000   -- threads/loops to use
+local M= 1000   -- sieves from 1..M
+local PLAIN= false      -- single threaded (true) or using Lanes (false)
+local SINGLE= false     -- cores to use (false / 1..n) 
+local TIME= false       -- use Lua for the timing
+local PRIO_ODD, PRIO_EVEN   -- -3..+3
+
+local function HELP()
+    io.stderr:write( "Usage: lua perftest.lua [threads]\n" )
+end
+
+-- nil -> +2
+-- odd_prio[,even_prio]
+--
+local function prio_param(v)
+    if v==true then return 2,-2 end
+
+    local a,b= string.match( v, "^([%+%-]?%d+)%,([%+%-]?%d+)$" )
+    if a then
+        return tonumber(a), tonumber(b)
+    elseif tonumber(v) then
+        return tonumber(v)
+    else
+        error( "Bad priority: "..v )
+    end
+end
+
+for k,v in pairs( argtable(...) ) do
+    if k==1 then            N= tonumber(v) or HELP()
+    elseif k=="plain" then  PLAIN= true
+    elseif k=="single" then  SINGLE= v  -- true/number
+    elseif k=="time" then   TIME= true
+    elseif k=="prio" then   PRIO_ODD, PRIO_EVEN= prio_param(v)
+    else                    HELP()
+    end
+end
+
+PRIO_ODD= PRIO_ODD or 0
+PRIO_EVEN= PRIO_EVEN or 0
+
+
+-- SAMPLE ADOPTED FROM Lua 5.1.1 test/sieve.lua --
+
+-- the sieve of of Eratosthenes programmed with coroutines
+-- typical usage: lua -e N=1000 sieve.lua | column
+
+-- AK: Wrapped within a surrounding function, so we can pass it to Lanes
+--     Note that coroutines can perfectly fine be used within each Lane. :)
+--
+-- AKa 20-Jul-2008: Now the wrapping to one function is no longer needed;
+--     Lanes 2008 can take the used functions as upvalues.
+--
+local function sieve_lane(N,id)
+
+ if MSYS then
+   io.stderr:setvbuf "no"
+ end
+
+ -- generate all the numbers from 2 to n
+ local function gen (n)
+  return coroutine.wrap(function ()
+    for i=2,n do coroutine.yield(i) end
+  end)
+ end
+
+ -- filter the numbers generated by `g', removing multiples of `p'
+ local function filter (p, g)
+  return coroutine.wrap(function ()
+    while 1 do
+      local n = g()
+      if n == nil then return end
+      if math.mod(n, p) ~= 0 then coroutine.yield(n) end
+    end
+  end)
+ end
+
+ local ret= {}      -- returned values: { 2, 3, 5, 7, 11, ... }
+ N=N or 1000	    -- from caller
+ local x = gen(N)   -- generate primes up to N
+ while 1 do
+  local n = x()		-- pick a number until done
+  if n == nil then break end
+  --print(n)		-- must be a prime number
+  table.insert( ret, n )
+
+  x = filter(n, x)	-- now remove its multiples
+ end
+ 
+ io.stderr:write(id..(MSYS and "\n" or "\t"))   -- mark we're ready
+
+ return ret     
+end
+-- ** END OF LANE ** --
+
+
+-- Keep preparation code outside of the performance test
+--
+local f_even= lanes.gen( "base,coroutine,math,table,io",  -- "*" = all
+                            { priority= PRIO_EVEN }, sieve_lane )
+                             
+local f_odd= lanes.gen( "base,coroutine,math,table,io",  -- "*" = all
+                            { priority= PRIO_ODD }, sieve_lane )
+
+io.stderr:write( "*** Counting primes 1.."..M.." "..N.." times ***\n\n" )
+
+local t0= TIME and os.time()
+
+if PLAIN then
+    io.stderr:write( "Plain (no multithreading):\n" )
+
+    for i=1,N do
+        local tmp= sieve_lane(M,i)
+        assert( type(tmp)=="table" and tmp[1]==2 and tmp[168]==997 )
+    end
+else
+    if SINGLE then
+        io.stderr:write( (tonumber(SINGLE) and SINGLE or 1) .. " core(s):\n" )
+        lanes.single(SINGLE)    -- limit to N cores (just OS X)
+    else
+        io.stderr:write( "Multi core:\n" )
+    end
+
+    if PRIO_ODD ~= PRIO_EVEN then
+        io.stderr:write( ( PRIO_ODD > PRIO_EVEN and "ODD" or "EVEN" )..
+                        " LANES should come first (odd:"..PRIO_ODD..", even:"..PRIO_EVEN..")\n\n" )
+    else
+        io.stderr:write( "EVEN AND ODD lanes should be mingled (both: "..PRIO_ODD..")\n\n" )
+    end
+    local t= {}
+    for i=1,N do
+        t[i]= ((i%2==0) and f_even or f_odd) (M,i)
+    end
+
+    -- Make sure all lanes finished
+    --
+    for i=1,N do
+        local tmp= t[i]:join()
+        assert( type(tmp)=="table" and tmp[1]==2 and tmp[168]==997 )
+    end
+end
+
+io.stderr:write "\n"
+
+if TIME then
+    local t= os.time() - t0
+    io.stderr:write( "*** TIMING: "..t.." seconds ***\n" )
+end
+
+--
+-- end
diff --git a/tests/recursive.lua b/tests/recursive.lua
new file mode 100644
index 0000000..49c03d3
--- /dev/null
+++ b/tests/recursive.lua
@@ -0,0 +1,21 @@
+--
+-- RECURSIVE.LUA
+--
+-- Test program for Lua Lanes
+--
+
+io.stderr:write( "depth:" )
+local function func( depth )
+    io.stderr:write(" " .. depth)
+    if depth > 10 then
+        return "done!"
+    end
+
+    require "lanes"
+    local lane= lanes.gen("*", func)( depth+1 )
+    return lane[1]
+end
+
+local v= func(0)
+assert(v=="done!")
+io.stderr:write("\n")
diff --git a/tests/require.lua b/tests/require.lua
new file mode 100644
index 0000000..2cfe780
--- /dev/null
+++ b/tests/require.lua
@@ -0,0 +1,30 @@
+--
+-- REQUIRE.LUA
+--
+-- Test that 'require' works from sublanes
+--
+require 'lanes'
+
+local function a_lane()
+    -- To require 'math' we still actually need to have it initialized for
+    -- the lane.
+    --
+    require "math"
+    assert( math and math.sqrt )
+    assert( math.sqrt(4)==2 )
+
+    assert( lanes==nil )
+    require "lanes"
+    assert( lanes and lanes.gen )
+
+    local h= lanes.gen( function() return 42 end ) ()
+    local v= h[1]
+
+    return v==42
+end
+
+local gen= lanes.gen( "math,package,string,table", a_lane )
+
+local h= gen()
+local ret= h[1]
+assert( ret==true )
diff --git a/tests/timer.lua b/tests/timer.lua
new file mode 100644
index 0000000..e95f326
--- /dev/null
+++ b/tests/timer.lua
@@ -0,0 +1,93 @@
+--
+-- TIMER.LUA
+--
+-- Sample program for Lua Lanes
+--
+
+-- On MSYS, stderr is buffered. In this test it matters.
+io.stderr:setvbuf "no"
+
+
+require "lanes"
+
+local linda= lanes.linda()
+
+local function PRINT(str)
+    io.stderr:write(str.."\n")
+end
+
+local T1= "1s"  -- these keys can be anything...
+local T2= "5s"
+
+local step= {}
+
+lanes.timer( linda, T1, 1.0, 1.0 )
+step[T1]= 1.0
+
+PRINT( "\n*** Timers every second (not synced to wall clock) ***\n" )
+
+local v_first
+local v_last= {}     -- { [channel]= num }
+local T2_first_round= true
+
+local caught= {}     -- { [T1]= bool, [T2]= bool }
+
+while true do
+    io.stderr:write("waiting...\t")
+    local v,channel= linda:receive( 6.0, T1,T2 )
+    assert( channel==T1 or channel==T2 )
+    caught[channel]= true
+
+    io.stderr:write( ((channel==T1) and "" or "\t\t").. string.format("%.3f",v),"\n" )
+    assert( type(v)=="number" )
+
+    if v_last[channel] then
+        if channel==T2 and T2_first_round then
+            -- do not make measurements, first round is not 5secs due to wall clock adjustment
+            T2_first_round= false
+        else
+            assert( math.abs(v-v_last[channel]- step[channel]) < 0.02 )
+        end
+    end
+    
+    if not v_first then
+        v_first= v
+    elseif v-v_first > 3.0 and (not step[T2]) then
+        PRINT( "\n*** Adding timers every 5 second (synced to wall clock) ***\n" )
+
+        -- The first event can be in the past (just cut seconds down to 5s)
+        --
+        local date= os.date("*t")
+        date.sec = date.sec - date.sec%5
+
+        lanes.timer( linda, T2, date, 5.0 )
+        step[T2]= 5.0
+
+    elseif v-v_first > 10 then    -- exit condition
+        break
+    end
+    v_last[channel]= v
+end  
+
+-- Windows version had a bug where T2 timers were not coming through, at all.
+-- AKa 24-Jan-2009
+--
+assert( caught[T1] )
+assert( caught[T2] )
+
+PRINT( "\n*** Clearing timers ***\n" )
+
+lanes.timer( linda, T1, 0 )    -- reset; no reoccuring ticks
+lanes.timer( linda, T2, 0 )
+
+linda:receive( 0, T1 )    -- clear out; there could be one tick left
+linda:receive( 0, T2 )
+
+assert( linda:get(T1) == nil )
+assert( linda:get(T2) == nil )
+
+PRINT "...making sure no ticks are coming..."
+
+local v= linda:receive( 1.5, T1,T2 )    -- should not get any
+assert(v==nil)
+
diff --git a/tools/bin2c.lua b/tools/bin2c.lua
new file mode 100644
index 0000000..352d18e
--- /dev/null
+++ b/tools/bin2c.lua
@@ -0,0 +1,131 @@
+--
+-- BIN2C.LUA [filename] [-o output.lch]
+--
+-- Convert files to byte arrays for automatic loading with 'lua_dobuffer'.
+--
+-- Based on 'etc/bin2c.c' of Lua 5.0.1 sources by:
+--      Luiz Henrique de Figueiredo (lhf@tecgraf.puc-rio.br)
+--
+-- Changes:
+--
+-- 12-Dec-07/AKa: changed the output to have just the "{ ... }" part; others
+--                (variable name) can be explicitly given before the '#include'
+-- 16-Nov-07/AKa: taken into luaSub
+-- 16-Mar-04/AKa: added 'glua_wrap()' support
+-- xx-Jan-04/AKa: subdirectory names are not included in debug info
+--
+
+local function USAGE()
+    io.stderr:write "lua bin2c.lua [filename] [-o output.lch]"
+    os.exit(-1)
+end
+
+local out_f   -- file to output to (stdout if nil)
+
+local function OUT( ... )
+    (out_f or io.stdout): write( ... );     -- ; actually needed by Lua
+    (out_f or io.stdout): write "\n"
+end
+
+local HEAD= "{ "
+local START= '  '
+local FILL= '%3d,'
+local STOP= ""
+local TAIL= "};\n"
+
+--
+local function dump( f )
+    --
+    OUT [[
+/* bin2c.lua generated code -- DO NOT EDIT
+ *
+ * To use from C source: 
+ *    char my_chunk[]=
+ *    #include "my.lch"
+ */]]
+
+    local str= HEAD..'\n'..START
+    local len= 0
+    
+    while true do
+        for n=1,20 do
+            local c= f:read(1)
+            if c then
+                str= str..string.format( FILL, string.byte(c) )
+                len= len+1
+            else
+                OUT( str..STOP.. string.format( TAIL, len ) )
+                return  -- the end
+            end
+        end
+        OUT(str..STOP)
+        str= START
+    end
+end
+
+--
+local function fdump( fn )
+    --
+    local f= io.open( fn, "rb" )    -- must open as binary
+    
+    if not f then
+        error( "bin2c: cannot open "..fn )
+    else
+        dump( f )
+        f:close()
+    end
+end
+
+--
+local function main( argv )
+    --
+    local fn= argv.o
+    if fn then
+        local f,err= io.open( fn, "w" )
+        assert( f, "Unable to write '"..fn.."': "..(err or "") )
+
+        out_f= f
+    end
+    
+    if argv[2] then
+        USAGE()
+    elseif argv[1] then
+        fdump( argv[1] )
+    else    -- use stdin (no params)
+        if os.getenv("WINDIR") then
+            error "using stdin not allowed on Win32!"  -- it wouldn't be binary
+        end
+        dump(io.stdin)
+    end
+    
+    if out_f then
+        out_f:close()
+    end
+end
+
+--
+local argv= {}
+local valid_flags= { o=1 }    -- lookup: 0=no value, 1=value
+
+-- Need to use while since '-o' advances 'i' by 2
+--
+local args= select('#',...)
+local i=1
+
+while i<=args do
+    local v= select(i,...)
+    local flag= string.match( v, "^%-(.+)" )
+    
+    if flag then
+        if not valid_flags[flag] then
+            error( "Unknown flag: -"..flag )
+        end
+        argv[flag]= (i+1<=args) and select(i+1,...) or true
+        i= i+1
+    else
+        table.insert( argv, v )   -- [1..N]
+    end
+    i= i+1
+end
+
+return main(argv)
-- 
cgit v1.2.3-55-g6feb