From ec7d7a2b8feead992f0f9197ae299fe410ec16a0 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Wed, 23 Mar 2022 15:57:45 +0100 Subject: cleanup; delete unreferenced powerpoint file from docs --- docs/lua05.ppt | Bin 304128 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 docs/lua05.ppt diff --git a/docs/lua05.ppt b/docs/lua05.ppt deleted file mode 100644 index e2b7ab4..0000000 Binary files a/docs/lua05.ppt and /dev/null differ -- cgit v1.2.3-55-g6feb From 0fc0122df8050db8cbb75f75927ab3ace0fc60bd Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Wed, 23 Mar 2022 15:58:50 +0100 Subject: cleanup; move logo file into docs, more appropriate --- docs/logo.ps | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ logo.ps | 210 ----------------------------------------------------------- 2 files changed, 210 insertions(+), 210 deletions(-) create mode 100644 docs/logo.ps delete mode 100644 logo.ps diff --git a/docs/logo.ps b/docs/logo.ps new file mode 100644 index 0000000..8b5809a --- /dev/null +++ b/docs/logo.ps @@ -0,0 +1,210 @@ +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: Lua logo +%%Creator: lua@tecgraf.puc-rio.br +%%CreationDate: Wed Nov 29 19:04:04 EDT 2000 +%%BoundingBox: -45 0 1035 1080 +%%Pages: 1 +%%EndComments +%%EndProlog + +%------------------------------------------------------------------------------ +% +% Copyright (C) 1998-2000. All rights reserved. +% Graphic design by Alexandre Nakonechny (nako@openlink.com.br). +% PostScript programming by the Lua team (lua@tecgraf.puc-rio.br). +% +% Permission is hereby granted, without written agreement and without license +% or royalty fees, to use, copy, and distribute this logo for any purpose, +% including commercial applications, subject to the following conditions: +% +% * The origin of this logo must not be misrepresented; you must not +% claim that you drew the original logo. We recommend that you give credit +% to the graphics designer in all printed matter that includes the logo. +% +% * The only modification you can make is to adapt the orbiting text to +% your product name. +% +% * The logo can be used in any scale as long as the relative proportions +% of its elements are maintained. +% +%------------------------------------------------------------------------------ + +/LABEL (tekcoS) def + +%-- DO NOT CHANGE ANYTHING BELOW THIS LINE ------------------------------------ + +/PLANETCOLOR {0 0 0.5 setrgbcolor} bind def +/HOLECOLOR {1.0 setgray} bind def +/ORBITCOLOR {0.5 setgray} bind def +/LOGOFONT {/Helvetica 0.90} def +/LABELFONT {/Helvetica 0.36} def + +%------------------------------------------------------------------------------ + +/MOONCOLOR {PLANETCOLOR} bind def +/LOGOCOLOR {HOLECOLOR} bind def +/LABELCOLOR {ORBITCOLOR} bind def + +/LABELANGLE 325 def +/LOGO (Lua) def + +/DASHANGLE 10 def +/HALFDASHANGLE DASHANGLE 2 div def + +% moon radius. planet radius is 1. +/r 1 2 sqrt 2 div sub def + +/D {0 360 arc fill} bind def +/F {exch findfont exch scalefont setfont} bind def + +% place it nicely on the paper +/RESOLUTION 1024 def +RESOLUTION 2 div dup translate +RESOLUTION 2 div 2 sqrt div dup scale + +%-------------------------------------------------------------------- planet -- +PLANETCOLOR +0 0 1 D + +%---------------------------------------------------------------------- hole -- +HOLECOLOR +1 2 r mul sub dup r D + +%---------------------------------------------------------------------- moon -- +MOONCOLOR +1 1 r D + +%---------------------------------------------------------------------- logo -- +LOGOCOLOR +LOGOFONT +F +LOGO stringwidth pop 2 div neg +-0.5 moveto +LOGO show + +%------------------------------------------------------------------------------ +% based on code from Blue Book Program 10, on pages 167--169 +% available at ftp://ftp.adobe.com/pub/adobe/displaypostscript/bluebook.shar + +% str ptsize centerangle radius outsidecircletext -- +/outsidecircletext { + circtextdict begin + /radius exch def + /centerangle exch def + /ptsize exch def + /str exch def + + gsave + str radius ptsize findhalfangle + centerangle + add rotate + str + { /charcode exch def + ( ) dup 0 charcode put outsideplacechar + } forall + + grestore + end +} def + +% string radius ptsize findhalfangle halfangle +/findhalfangle { + 4 div add + exch + stringwidth pop 2 div + exch + 2 mul 3.1415926535 mul div 360 mul +} def + +/circtextdict 16 dict def +circtextdict begin + + /outsideplacechar { + /char exch def + /halfangle char radius ptsize findhalfangle def + gsave + halfangle neg rotate + 1.4 0 translate + 90 rotate + char stringwidth pop 2 div neg 0 moveto + char show + grestore + halfangle 2 mul neg rotate + } def + +end + +%--------------------------------------------------------------------- label -- +LABELFONT +F + +/LABELSIZE LABELFONT exch pop def +/LABELRADIUS LABELSIZE 3 div 1 r add sub neg 1.02 mul def + + +/HALFANGLE + LABEL LABELRADIUS LABELSIZE findhalfangle + HALFDASHANGLE div ceiling HALFDASHANGLE mul +def + +/LABELANGLE + 60 LABELANGLE HALFANGLE sub + lt + { + HALFANGLE + HALFANGLE DASHANGLE div floor DASHANGLE mul + eq + {LABELANGLE DASHANGLE div ceiling DASHANGLE mul} + {LABELANGLE HALFDASHANGLE sub DASHANGLE div round DASHANGLE mul HALFDASHANGLE add} + ifelse + } + {HALFANGLE 60 add} + ifelse +def + +LABELCOLOR +LABEL +LABELSIZE +LABELANGLE +LABELRADIUS +outsidecircletext + +%--------------------------------------------------------------------- orbit -- +ORBITCOLOR +0.03 setlinewidth +[1 r add 3.1415926535 180 div HALFDASHANGLE mul mul] 0 setdash +newpath +0 0 +1 r add +3 copy +30 +LABELANGLE HALFANGLE add +arcn +stroke +60 +LABELANGLE HALFANGLE sub +2 copy +lt {arc stroke} {4 {pop} repeat} ifelse + +%------------------------------------------------------------------ copyright -- +/COPYRIGHT +(Graphic design by A. Nakonechny. Copyright (c) 1998, All rights reserved.) +def + +LABELCOLOR +LOGOFONT +32 div +F +2 sqrt 0.99 mul +dup +neg +moveto +COPYRIGHT +90 rotate +%show + +%---------------------------------------------------------------------- done -- +showpage + +%%Trailer +%%EOF diff --git a/logo.ps b/logo.ps deleted file mode 100644 index 8b5809a..0000000 --- a/logo.ps +++ /dev/null @@ -1,210 +0,0 @@ -%!PS-Adobe-2.0 EPSF-2.0 -%%Title: Lua logo -%%Creator: lua@tecgraf.puc-rio.br -%%CreationDate: Wed Nov 29 19:04:04 EDT 2000 -%%BoundingBox: -45 0 1035 1080 -%%Pages: 1 -%%EndComments -%%EndProlog - -%------------------------------------------------------------------------------ -% -% Copyright (C) 1998-2000. All rights reserved. -% Graphic design by Alexandre Nakonechny (nako@openlink.com.br). -% PostScript programming by the Lua team (lua@tecgraf.puc-rio.br). -% -% Permission is hereby granted, without written agreement and without license -% or royalty fees, to use, copy, and distribute this logo for any purpose, -% including commercial applications, subject to the following conditions: -% -% * The origin of this logo must not be misrepresented; you must not -% claim that you drew the original logo. We recommend that you give credit -% to the graphics designer in all printed matter that includes the logo. -% -% * The only modification you can make is to adapt the orbiting text to -% your product name. -% -% * The logo can be used in any scale as long as the relative proportions -% of its elements are maintained. -% -%------------------------------------------------------------------------------ - -/LABEL (tekcoS) def - -%-- DO NOT CHANGE ANYTHING BELOW THIS LINE ------------------------------------ - -/PLANETCOLOR {0 0 0.5 setrgbcolor} bind def -/HOLECOLOR {1.0 setgray} bind def -/ORBITCOLOR {0.5 setgray} bind def -/LOGOFONT {/Helvetica 0.90} def -/LABELFONT {/Helvetica 0.36} def - -%------------------------------------------------------------------------------ - -/MOONCOLOR {PLANETCOLOR} bind def -/LOGOCOLOR {HOLECOLOR} bind def -/LABELCOLOR {ORBITCOLOR} bind def - -/LABELANGLE 325 def -/LOGO (Lua) def - -/DASHANGLE 10 def -/HALFDASHANGLE DASHANGLE 2 div def - -% moon radius. planet radius is 1. -/r 1 2 sqrt 2 div sub def - -/D {0 360 arc fill} bind def -/F {exch findfont exch scalefont setfont} bind def - -% place it nicely on the paper -/RESOLUTION 1024 def -RESOLUTION 2 div dup translate -RESOLUTION 2 div 2 sqrt div dup scale - -%-------------------------------------------------------------------- planet -- -PLANETCOLOR -0 0 1 D - -%---------------------------------------------------------------------- hole -- -HOLECOLOR -1 2 r mul sub dup r D - -%---------------------------------------------------------------------- moon -- -MOONCOLOR -1 1 r D - -%---------------------------------------------------------------------- logo -- -LOGOCOLOR -LOGOFONT -F -LOGO stringwidth pop 2 div neg --0.5 moveto -LOGO show - -%------------------------------------------------------------------------------ -% based on code from Blue Book Program 10, on pages 167--169 -% available at ftp://ftp.adobe.com/pub/adobe/displaypostscript/bluebook.shar - -% str ptsize centerangle radius outsidecircletext -- -/outsidecircletext { - circtextdict begin - /radius exch def - /centerangle exch def - /ptsize exch def - /str exch def - - gsave - str radius ptsize findhalfangle - centerangle - add rotate - str - { /charcode exch def - ( ) dup 0 charcode put outsideplacechar - } forall - - grestore - end -} def - -% string radius ptsize findhalfangle halfangle -/findhalfangle { - 4 div add - exch - stringwidth pop 2 div - exch - 2 mul 3.1415926535 mul div 360 mul -} def - -/circtextdict 16 dict def -circtextdict begin - - /outsideplacechar { - /char exch def - /halfangle char radius ptsize findhalfangle def - gsave - halfangle neg rotate - 1.4 0 translate - 90 rotate - char stringwidth pop 2 div neg 0 moveto - char show - grestore - halfangle 2 mul neg rotate - } def - -end - -%--------------------------------------------------------------------- label -- -LABELFONT -F - -/LABELSIZE LABELFONT exch pop def -/LABELRADIUS LABELSIZE 3 div 1 r add sub neg 1.02 mul def - - -/HALFANGLE - LABEL LABELRADIUS LABELSIZE findhalfangle - HALFDASHANGLE div ceiling HALFDASHANGLE mul -def - -/LABELANGLE - 60 LABELANGLE HALFANGLE sub - lt - { - HALFANGLE - HALFANGLE DASHANGLE div floor DASHANGLE mul - eq - {LABELANGLE DASHANGLE div ceiling DASHANGLE mul} - {LABELANGLE HALFDASHANGLE sub DASHANGLE div round DASHANGLE mul HALFDASHANGLE add} - ifelse - } - {HALFANGLE 60 add} - ifelse -def - -LABELCOLOR -LABEL -LABELSIZE -LABELANGLE -LABELRADIUS -outsidecircletext - -%--------------------------------------------------------------------- orbit -- -ORBITCOLOR -0.03 setlinewidth -[1 r add 3.1415926535 180 div HALFDASHANGLE mul mul] 0 setdash -newpath -0 0 -1 r add -3 copy -30 -LABELANGLE HALFANGLE add -arcn -stroke -60 -LABELANGLE HALFANGLE sub -2 copy -lt {arc stroke} {4 {pop} repeat} ifelse - -%------------------------------------------------------------------ copyright -- -/COPYRIGHT -(Graphic design by A. Nakonechny. Copyright (c) 1998, All rights reserved.) -def - -LABELCOLOR -LOGOFONT -32 div -F -2 sqrt 0.99 mul -dup -neg -moveto -COPYRIGHT -90 rotate -%show - -%---------------------------------------------------------------------- done -- -showpage - -%%Trailer -%%EOF -- cgit v1.2.3-55-g6feb From 7187be8b76452aa968726180af24deaaa545431d Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Wed, 23 Mar 2022 16:01:50 +0100 Subject: cleanup; delete the ./gem folder --- gem/ex1.lua | 4 - gem/ex10.lua | 17 -- gem/ex11.lua | 7 - gem/ex12.lua | 34 --- gem/ex2.lua | 11 - gem/ex3.lua | 15 -- gem/ex4.lua | 5 - gem/ex5.lua | 15 -- gem/ex6.lua | 14 -- gem/ex7.lua | 16 -- gem/ex8.lua | 5 - gem/ex9.lua | 3 - gem/gem.c | 54 ----- gem/gt.b64 | 206 ---------------- gem/input.bin | Bin 11732 -> 0 bytes gem/ltn012.tex | 695 ------------------------------------------------------ gem/luasocket.png | Bin 11732 -> 0 bytes gem/makefile | 14 -- gem/myps2pdf | 113 --------- gem/t1.lua | 25 -- gem/t1lf.txt | 5 - gem/t2.lua | 36 --- gem/t2.txt | 4 - gem/t2gt.qp | 5 - gem/t3.lua | 25 -- gem/t4.lua | 10 - gem/t5.lua | 30 --- gem/test.lua | 46 ---- 28 files changed, 1414 deletions(-) delete mode 100644 gem/ex1.lua delete mode 100644 gem/ex10.lua delete mode 100644 gem/ex11.lua delete mode 100644 gem/ex12.lua delete mode 100644 gem/ex2.lua delete mode 100644 gem/ex3.lua delete mode 100644 gem/ex4.lua delete mode 100644 gem/ex5.lua delete mode 100644 gem/ex6.lua delete mode 100644 gem/ex7.lua delete mode 100644 gem/ex8.lua delete mode 100644 gem/ex9.lua delete mode 100644 gem/gem.c delete mode 100644 gem/gt.b64 delete mode 100644 gem/input.bin delete mode 100644 gem/ltn012.tex delete mode 100644 gem/luasocket.png delete mode 100644 gem/makefile delete mode 100755 gem/myps2pdf delete mode 100644 gem/t1.lua delete mode 100644 gem/t1lf.txt delete mode 100644 gem/t2.lua delete mode 100644 gem/t2.txt delete mode 100644 gem/t2gt.qp delete mode 100644 gem/t3.lua delete mode 100644 gem/t4.lua delete mode 100644 gem/t5.lua delete mode 100644 gem/test.lua diff --git a/gem/ex1.lua b/gem/ex1.lua deleted file mode 100644 index 327a542..0000000 --- a/gem/ex1.lua +++ /dev/null @@ -1,4 +0,0 @@ -local CRLF = "\013\010" -local input = source.chain(source.file(io.stdin), normalize(CRLF)) -local output = sink.file(io.stdout) -pump.all(input, output) diff --git a/gem/ex10.lua b/gem/ex10.lua deleted file mode 100644 index 2b1b98f..0000000 --- a/gem/ex10.lua +++ /dev/null @@ -1,17 +0,0 @@ -function pump.step(src, snk) - local chunk, src_err = src() - local ret, snk_err = snk(chunk, src_err) - if chunk and ret then return 1 - else return nil, src_err or snk_err end -end - -function pump.all(src, snk, step) - step = step or pump.step - while true do - local ret, err = step(src, snk) - if not ret then - if err then return nil, err - else return 1 end - end - end -end diff --git a/gem/ex11.lua b/gem/ex11.lua deleted file mode 100644 index 79c99af..0000000 --- a/gem/ex11.lua +++ /dev/null @@ -1,7 +0,0 @@ -local input = source.chain( - source.file(io.open("input.bin", "rb")), - encode("base64")) -local output = sink.chain( - wrap(76), - sink.file(io.open("output.b64", "w"))) -pump.all(input, output) diff --git a/gem/ex12.lua b/gem/ex12.lua deleted file mode 100644 index de17d76..0000000 --- a/gem/ex12.lua +++ /dev/null @@ -1,34 +0,0 @@ -local smtp = require"socket.smtp" -local mime = require"mime" -local ltn12 = require"ltn12" - -CRLF = "\013\010" - -local message = smtp.message{ - headers = { - from = "Sicrano <sicrano@example.com>", - to = "Fulano <fulano@example.com>", - subject = "A message with an attachment"}, - body = { - preamble = "Hope you can see the attachment" .. CRLF, - [1] = { - body = "Here is our logo" .. CRLF}, - [2] = { - headers = { - ["content-type"] = 'image/png; name="luasocket.png"', - ["content-disposition"] = - 'attachment; filename="luasocket.png"', - ["content-description"] = 'LuaSocket logo', - ["content-transfer-encoding"] = "BASE64"}, - body = ltn12.source.chain( - ltn12.source.file(io.open("luasocket.png", "rb")), - ltn12.filter.chain( - mime.encode("base64"), - mime.wrap()))}}} - -assert(smtp.send{ - rcpt = "<diego@cs.princeton.edu>", - from = "<diego@cs.princeton.edu>", - server = "localhost", - port = 2525, - source = message}) diff --git a/gem/ex2.lua b/gem/ex2.lua deleted file mode 100644 index 94bde66..0000000 --- a/gem/ex2.lua +++ /dev/null @@ -1,11 +0,0 @@ -function filter.cycle(lowlevel, context, extra) - return function(chunk) - local ret - ret, context = lowlevel(context, chunk, extra) - return ret - end -end - -function normalize(marker) - return filter.cycle(eol, 0, marker) -end diff --git a/gem/ex3.lua b/gem/ex3.lua deleted file mode 100644 index 60b4423..0000000 --- a/gem/ex3.lua +++ /dev/null @@ -1,15 +0,0 @@ -local function chainpair(f1, f2) - return function(chunk) - local ret = f2(f1(chunk)) - if chunk then return ret - else return (ret or "") .. (f2() or "") end - end -end - -function filter.chain(...) - local f = select(1, ...) - for i = 2, select('#', ...) do - f = chainpair(f, select(i, ...)) - end - return f -end diff --git a/gem/ex4.lua b/gem/ex4.lua deleted file mode 100644 index c48b77e..0000000 --- a/gem/ex4.lua +++ /dev/null @@ -1,5 +0,0 @@ -local qp = filter.chain(normalize(CRLF), encode("quoted-printable"), - wrap("quoted-printable")) -local input = source.chain(source.file(io.stdin), qp) -local output = sink.file(io.stdout) -pump.all(input, output) diff --git a/gem/ex5.lua b/gem/ex5.lua deleted file mode 100644 index 196b30a..0000000 --- a/gem/ex5.lua +++ /dev/null @@ -1,15 +0,0 @@ -function source.empty(err) - return function() - return nil, err - end -end - -function source.file(handle, io_err) - if handle then - return function() - local chunk = handle:read(20) - if not chunk then handle:close() end - return chunk - end - else return source.empty(io_err or "unable to open file") end -end diff --git a/gem/ex6.lua b/gem/ex6.lua deleted file mode 100644 index a3fdca0..0000000 --- a/gem/ex6.lua +++ /dev/null @@ -1,14 +0,0 @@ -function source.chain(src, f) - return function() - if not src then - return nil - end - local chunk, err = src() - if not chunk then - src = nil - return f(nil) - else - return f(chunk) - end - end -end diff --git a/gem/ex7.lua b/gem/ex7.lua deleted file mode 100644 index c766988..0000000 --- a/gem/ex7.lua +++ /dev/null @@ -1,16 +0,0 @@ -function sink.table(t) - t = t or {} - local f = function(chunk, err) - if chunk then table.insert(t, chunk) end - return 1 - end - return f, t -end - -local function null() - return 1 -end - -function sink.null() - return null -end diff --git a/gem/ex8.lua b/gem/ex8.lua deleted file mode 100644 index 81e288c..0000000 --- a/gem/ex8.lua +++ /dev/null @@ -1,5 +0,0 @@ -local input = source.file(io.stdin) -local output, t = sink.table() -output = sink.chain(normalize(CRLF), output) -pump.all(input, output) -io.write(table.concat(t)) diff --git a/gem/ex9.lua b/gem/ex9.lua deleted file mode 100644 index b857698..0000000 --- a/gem/ex9.lua +++ /dev/null @@ -1,3 +0,0 @@ -for chunk in source.file(io.stdin) do - io.write(chunk) -end diff --git a/gem/gem.c b/gem/gem.c deleted file mode 100644 index 976f74d..0000000 --- a/gem/gem.c +++ /dev/null @@ -1,54 +0,0 @@ -#include "lua.h" -#include "lauxlib.h" - -#define CR '\xD' -#define LF '\xA' -#define CRLF "\xD\xA" - -#define candidate(c) (c == CR || c == LF) -static int pushchar(int c, int last, const char *marker, - luaL_Buffer *buffer) { - if (candidate(c)) { - if (candidate(last)) { - if (c == last) - luaL_addstring(buffer, marker); - return 0; - } else { - luaL_addstring(buffer, marker); - return c; - } - } else { - luaL_putchar(buffer, c); - return 0; - } -} - -static int eol(lua_State *L) { - int context = luaL_checkint(L, 1); - size_t isize = 0; - const char *input = luaL_optlstring(L, 2, NULL, &isize); - const char *last = input + isize; - const char *marker = luaL_optstring(L, 3, CRLF); - luaL_Buffer buffer; - luaL_buffinit(L, &buffer); - if (!input) { - lua_pushnil(L); - lua_pushnumber(L, 0); - return 2; - } - while (input < last) - context = pushchar(*input++, context, marker, &buffer); - luaL_pushresult(&buffer); - lua_pushnumber(L, context); - return 2; -} - -static luaL_reg func[] = { - { "eol", eol }, - { NULL, NULL } -}; - -int luaopen_gem(lua_State *L) { - luaL_openlib(L, "gem", func, 0); - return 0; -} diff --git a/gem/gt.b64 b/gem/gt.b64 deleted file mode 100644 index a74c0b3..0000000 --- a/gem/gt.b64 +++ /dev/null @@ -1,206 +0,0 @@ -iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAIAAABMXPacAAAtU0lEQVR42u19eXRURdb4rarXa5LO -RshKEshC2MLOBIjsCoMLGJhRPnUEcUGZEX7j4Iw6zqd+zjkzzowL6gzKMOoBRHAAPyQKUZQlxLAk -EIEkQkhCyEoISegs3f1eVf3+qPTj0Z3udEJImN/Pe/rkdF6/V6/q3qp7b92tEOccfoT+A9zfHfj/ -HX4kQD/DjwToZ/iRAP0MPxKgn+FHAvQz/EiAfgapvzvQQ3DfviCE+rtTPYH/AAKouEYIcc4ForUX -tXeKexhj6k8IIe2DvdUl0SYAcN7RGYQ63oAQ4hx8fBu6BXfC6vBcsHyDeNRi7cYboZQjBIRgl/lB -KQcAQnyl+q1IAC9YU7/s2bOnsrKSUupwOHQ63cMPP2wymRhjGOOrV6/m5ORYLJbg4OABAwZYLBaD -waBtQUsD34mqRT0hHc/abEpNjbWlxYEQCgw0RET463QEABjjjHFfyND/LEg737XsQpblhoaGioqK -CxcunD9/fv78+ampqepgZFk2mUwBAQEYY6PRSAhRG7Tb7cXFxXa73W63W63Wn/zkJ4sXL1YfVHGB -EFI5VZc0EDcwxjnnkoRbWhw7dxZt316Yn19TW9siyxQADAZddHRAWlrMffeNnDcvUa8nlDKEAGNv -7ffbClCnoYoFFRFiIufn53/88cfBwcERERERERHjxo2LjIz0ZbaqFLXb7ZcuXZIkKSoqShAYY7xn -z576+vpJkybFxcUZjUZfOJKKfQBACP75z/yXXtpfXX0JAAFIAAQAAXAADsAAZAA0dGjMa6/Nueee -FEoZQsgLDfqTAFqWIstyRUVFXFycJEniJ6vV2tTUFBUVRQhxkb0q2TTS7xr9tNxG/bdjtAjl5eXl -5ubW1dUhhJKTkzMyMkwmk0p4AMAYq91Tv1DKCMENDW0PPLBj797vEdJjrAfgjF2HP+d8B8YcAMry -5VP//vf5Oh3h3OM66P8V0NTU9N133+Xl5SmKsnr16qCgIBc8MsbE5HXXgjqdU9oRie8YY5c2W1tb -CwsLS0tLFy5cqEoILWnFI84rHGNUXW29/fYPCwsvSpI/pQLxntYNxxhjDIpinTNn1K5d/2Uy6Zwd -cNWO+o4A7mjFGOfk5OzcuTMsLGzixInjxo2zWCwqIlSpAL2k47tMc+18FN8vXLgAAHFxce4Cqa1N -njlzw9GjZZLkryiK6KP3twEgnY7I8tWf/WzCtm33McZVJVV7H3nppZf6BvXaL+rAFEVJSEhYvHjx -4MGDDQaDykxAw1h6S38XLxUcRnRGnXyiM4cOHdqyZUtDQ0N0dLSfn5/4SUz/Z57Zs3PnCZ0uQFEU -ANQV9jvIwxiTJOPp0xdCQgLS0gZRyjF2Hc5NXwEu866lpUWv1+v1enVBqFsnwWS0dLrZ4K7dlpSU -ZGZmVlVVpaen33PPPYL1HzlSOXnyewCk+6gSo2OhocaCgl9GR1vEOtCO7qbbglQsY4yPHj366quv -nj59GjScWtBGq0f2mVHBZbVxzhMSElatWvXzn//cORUAANau/Y5zB8YYoLsUQJxzQqSGhqb1648D -gFClXO+4eSNUZ9alS5e2b99eXl4+d+7cqVOnCrl361hvOt2LCNWlttY6bNjbTU22Hk9WhBDnjhEj -IgoKVoqdc1+vAFmW//WvfymK8uyzz86aNUvlP72HPrjBWaR2RkgIoXeJ2ZqbW9nUdBVj0uPGOecA -ujNn6s+cuQRui6CXd8JaJUedSsJUEBoaqtfrtdd9p4HQ3rTGL9UE1ik2BZ/trmnMRePinAFAQUEt -AMMYuXMP34EQRKnjzJlLqakRLr3uTQJoJarLzigyMpIxJiStVr/0pTXOQdgAMEaEYACOEPb+tKCU -UOEVhYq9qKCKTwYyzW0XL169cUaNEAJglZVXwc2Q3msE0GKfEFJYWGg2m+Pj41UtyMeJr8W7olCB -dFVS2mxKZeXVqqqrFRXN9fVtDQ1tbW2yw0EBQK8nJpNuwABTWJjfoEGB0dEBMTEWk0mHEBYPU8oY -Y04S+roEbTalt1Bkt1P3i728AjjnhJCjR49u3rw5IyNDEACcvBW8ajgqRhSFCUsvQhghVF/fmptb -efjwxWPHqs6da6iutlLqAFA86yQIQCJEHxkZkJQUMnFi9JQpg9LSYsLD/THusCtw3mHR7JIMfn66 -3sKP2dxJU70sAzDGBw4c2Llz5/333z958mRVqfD+lBb1GCNhxa2oaP788x8++6z4yJFKq9UKQAGI -+CCkw1jvqVkhPylllZVXKivrv/22EID4+wdMmhS9YEHKggVD4+KCxAqjlHkig9DfASA+PkismO7r -oNeAMQ6A4+ODwG0K9o4aqtoajx07tnnz5mXLlo0ePVplO12iXhjZMUYYI1mme/aUrF+f/9VXJTZb -CwAG0GFMhDHLxfjlHQTTF/KTMQogAzCDwW/27ITHHhs/f36SXk+8GO4VhUkSzsoqmTv3XxgbbkQI -A3BJQmfO/DI5eYAQhL1JAK0l68qVK1euXElMTOyS6av6EqViI4bb2+WNGwveeCO3uLgSAAAMhBCA -Dh/TjQMhCABRSgHsAJCUFL16ddrDD4/289OrfQDNahBGiKYm2/Dha2tqrAj1YCcMAIAxYsw+aVLs -kSMr3G2IN7QPcOqFXJ3IISEhCQkJvmBfaIeKIqQifPDBiREj3n3iiW3FxTUYmwgxCWT1FvYBgFJO -KQVAhJgwNp07V7ty5afDh7+7fn0e50AIVhTGmNZiCIrCgoKMixYNB7D3aCcMTvalPPjgGNEHl597 -vgI8Gd8FL/JkLnaf+IcPV6xatScv7zxCEsYGdQd0k6HDvs2Yg3PH6NFD3npr3vTp8Wqv1D0Hxqik -5MrYse+0tFCn48X3LSTHGDMmJySEnDjxy4AAfa+tAK1yWVpampubqxJDMLhOub9W2BKC29uVX/7y -i/T09/LyygjxQ0hPKe0T7AMAYoxTShGSCPEvKKiYMWP9E0/sbm11iKXgHAIoCktMDHnxxVkAbTpd -t9DFnahW/vSneQEBHYzOBS09IYA62THGra2tmzZtOnfunO9PCeF25Ejl+PHr3n13PyE6jI1O1Pex -dQgxBpRSjA2E6N9//+DYseuysyskCVPKBTsiBDHGn302ffHiCbJs1ekkJ3K7GC5CSKfDlFrXrJm1 -ePFwShnGnYyuJwTQ+vk2bdrk5+e3ZMkS9Scv2GeMU8p1OvLOO0enTn3v7Nk6QvwpFQbRfjTMIcYY -pZwQ/9LS+mnT3n/99e8kCQtmKNYB53zTpkV33jlGlpslSWzIPZFBhKUQjLksW596auZrr92hYt8d -Pz1cAQKhmZmZpaWlS5culSRJsKNOJYrWqY0xeuKJz3/1q38DYIz1lIrNYT9gHyFXAxGlFGM9xtIz -z+xctuwzYUESXnXOQacj//u/S3796zsUxU6pDSGQJEKIsHB0fAhBkkQQ4pS2Ygyvv77o3XfvFNjv -zagIVZLs27cvMDBwwoQJqpHHE98Xno3WVvlnP9v65ZcFkhSgKKybAu0GgQMgse2iVIQviIFjjDHG -YnvccZskYUWxzp49cseO+y0Wg+i82DFIEj58uOL55/cdPFgKYHfuDcUoGAAFYISY77572B//OGv4 -8DBFYd6jg3pIAE8hCF6w39xsu+uuTdnZZyXJv2+x34F6xhjndgAOoPfzM5nNEqXcarXLsg1AAdBh -rIcOB5GgQcukSQlffPGL0FCTGIJgSmI65+VV79xZnJNzsby8UQ3MSkgImT49PiNjWHJyqBrC5d3u -1A0CuHstvOv7KufBGFmtjnnzPsrJKZEkP0WhfTnxnV1t0+mMs2YlLVyYMnFiVHS0xWzWUcqammzn -zl359tuyf/+7sKSkGiEJIT1jFAAkiShK68SJg7OylgYFGcVAAECrqiKEZJm2tysIgdmsc14EWRY2 -FY/q+A0RQG3Re2yIerMsszvv3Pj114WS5N/n2McACufKz38+/uWXZ6SkDHDvs4rH7duLXnjh69LS -GkLMlHIALmgwbVry3r0PGwwd4T3gNDcJkqiUUC8SgjEWPoyuba6+CmFtAMH+/ftra2s7COjVuim0 -iEcf/axfsI8x5twRGGjYufPhrVsXJyeHUsrEdlf7oZTLMiUE33//yFOnVj7yyBRK2wgBAKQoVJL8 -Dh78YenSHerqV13cOl2HhUr1DmGMdDpSX3/p22/3C1+3FnU3RAC1obNnz+7atau9vd1L007WzwnB -r756YOPGI/0y9xmTo6IsBw8+vnBhiixT4dIRWNN+CEE6HRF7LoOBbNiw4JVX5lNqwxg5aeC/deux -F1/cRwimVJV/AM79ppAK6opvb2/ftWtXSUlJl9iHbsUFiXds2rQpOTl52rRpnoydzoAfJkk4M/Ps -Y4/twNjotIH0ndQFYP7+ur17l40ZEyHLVJKwpy26+q/Q7hWFzZw5uKVFyck5R4gwjQDGhgMHzqam -Ro8YMVBs472YuYKDg69cuVJQUJCWlubi5nQHn1aAuu5OnDhRU1MzZ84c7/cLda2mpuWJJz4DQJx3 -14Ryo4AxAnC8+ead48dHORxUhIx7R4Rzb48IwYyx116bm56eRGm7sMFxDgDSU0/9b0VFsyRhL/YS -8Yrbb7+9trY2Ly9Pxd4NEUCFc+fOTZgwYeDAgWL6u9+g2kcB4Omnd1dVNRCi57wvN7rC/mWbNWvo -8uXjKWU6He5SErrQQAjb116bCyAJAwnnjBBdXV3jr36122WY7sAYCwsLGz9+vOBCXbzURy3Iydap -oijafIfr7+kw4UoS3rLl1H/912ZCTJT2tZkBIcS5PTNz6fz5yaIzvicMqWillEsSzsjYsnNnASEm -oRQRgilt+/DD+x9+eKyzZe6GhA7M2O12Qoga7O3pdb6yIPEXY+w1qodzziUJNzXZXnghC0ByKgJ9 -BxgD546UlIjbb08AAEKuCUwfQTu0hx4aDYDUKcoYB9D9/vdfX77c5oURiZWk1+tFYD14FcVdEECr -fbq8wH36g9Ph8Ne/ZpeV1fU581HRp8ycOVinI6pVuQftCH1/6tTYoCALY1SIUs45IfrKyvo///mQ -kx6uyHVHTqc49JUA2na1Ar2zUXHOQZJweXnTO+/kAhj7nvmoMG5c9I08rlpABw70T0oKBVCc4xV+ -JNM//nHk3LkGwdw6fVz7txc2YoyxrVu3lpaWImecs4fbOACsXftdc7OVEOlGwgh6DJwDAImNDdTi -omcghhMTYwFg2glNCGltbX3jjRzoLNhWizSEUHl5+datW51G307AGwFU/amqqur48eOSJHm9EyQJ -V1Vd/fDDEwCG/jLxc84BkNEoAXRD8HpoCgDAZNJdP5PEIjBs2lRQXt4kFoEXFi9J0vHjxysrK8GD -PurTCvj+++9jYmJiY2O9CHQxFz766ERjYxMh0s1OO/AEIoDH4VBUDN4g2GyK20zihEhW69UPPsgD -z4tACIOYmJiYmBgRkd8pdEEAsXssKioaOnQoeBAj4pokYYeDbtpUAKDrD+eiOmwAoCIKE3ywBHgd -OwKAqqqrAC68XvBh/ebN37e3y5KEPWOGA0BycnJRURFowgOve0uX/bBarYqiCAJ4gI44hm++KS0q -qkVI31/TX2AHAPLza26kCTU5oKGhraTkCgBxGRHngLHu/PlLWVkl0FmwiRaGDx8uy3JTU1Onv3at -hgYEBKxevTo2NhY8y3TRvU8/PQ1ARZbnTcaytw4DSPv3lzHGvMxN39qB3NyLDQ3NGEvubYjYrU8/ -PeOpBRVXMTExq1evDgwM7PQ2bwRQce2Siu4OkoStVntW1vn+5T8AwBhHSHfqVPWBAxfAq5biCdSg -MQDYvPl7pwrE3V8EoP/669LGxnZP+qgAQojJZPLkG/BIAHXiMK/bWTWO6tixqsrKKwjp+rv2hBgk -FWqi6Ex3nU6UMknCBQW1//73GQADpZ1MKc4BY6murik3txKgI4PBS8ue3ANdywDkBPDo/AIA2Lev -FEDpNPSlbwExxhEyff756W3bTksSVhSP4RpuA7mWmgAAzz2XJcs2LxGJgtL79p33gjoXBLpDFwRo -bGwsLi7W1gXopAmMACAn56K7sOonEGUbpJUrPz93rkGnI7JMVX+Wx2ec2JdlJkn4j3888OWXZwgx -ednQcM4ByHffXVSR4OEeYIz98MMPjY2N3SCAQHphYeG2bdu8+h0BY9TY2H7mzCUA7+o/BwBJwuKD -8Q1F3HsFYVWWLl+23nXXxoqKZkED1UnrptJ0/KsojFKu15O///3Y73+/F2NTp8zn+gelwsLLly61 -CiO2xw4htHXr1sLCQnBj6dhz0wAADQ0N4eHhXpawuF5aeqW+vsVrKnOHl0pRWsSHMYcz1vWm0IAx -hrHh7NlLU6a8n51dIXwyAsXOND+uutFlmQonEsbouee+XrlyB8Z6sey9vINzQAg3NbWWlDQAeHMP -IIQiIyMvXboE18cVgpcMGTHrm5qagoKCwHMqj2iqqOgygEyI5FkjRgA0JMT/oYemMMbNZik7u+Lw -4dKbKbQ7aFBV1Txjxvqnnpry/PO3RUT4u3gyEOpYxAihb74pW7MmKz+/lBATpeCLFw9jRKlcVHR5 -ypRY7wMJCQnpdCvQBQFqampGjRrllQAcAM6fvwLAvOTxYIwYYxER/m++OU+WqU5H/vzn7MOHfyDE -IIzGN48GCOk452+/vf/DD/MXLhy+cGHK2LER4eH+BgNhjLe0OMrKmg4evLBly+mjR0sBgBA/Sn2N -GxNDPn/+CnheAeK62WwWDjIXNHZBgGnTpkVFRUFX4ebl5U2+ONc45yIwRKcjvZh54R1FnDPOESF+ -Vqt948bcjRuP6HTmsDA/k0lijDc12RsbW0SQIcZGABBJHD5uZYTtr7y8CTy4SVS8DR8+XPASn1iQ -2sqUKVPUnAsPdwIA1Na2+DhfCMGS1FHWrk8IAKJjlFIATIiZc5BlWl3d6JzjCIBIkr8QBt0NHhDR -QLW1LeDZ9C2iZuPi4uLj413Q65EAmjypTqrruOAUABobbW4Wq1sN1KhCBIAQujZwkSmlva27LTc2 -2gDAwxS9LoPapRwXdOkPgK58GkL/bWlx9GuAfzeQ5RyaWu/gWnC5Om7fmxMsqLXVIaLYfbv/OvDG -grR830vrjHFZ7gPvu8hX6ZhBIkyhM6q73MY830Mo5ZxTkQ/sXBmYENJVRTJXbMkyY4x7spZ5R6a3 -fUBLS8uWLVvq6+vBqzlFNQfdzG2wCM6hYg9BaZsT+7yz2xTnbe2aeobqDYKjUkVp4dxuNOojI4Ni -YkIiIgJNJj3nsqK0cE67lRPp3RAkfrpy5cqWLVuam5tdEOUtU16W5ZMnT6alpYWFhXnxhWGMhOHX -R5NLDwAhxLmSmDhw6dIxisIaG9vffvuou5EAIcS5nJoac999IxWFVVdffe+945p7OIDI226LjBzw -4INjfvrTxKSk0MBAA8ZI5AqUlFzZu/f8Bx/k1dZewdjkm2OVq3GPngiAEGptbT1x4oQIKtQi0xsB -JEkym83ecSra0uvJTfUBYIwoVZKSQl54YRoAlJc3/f3vx9yttOK21NTw55+/DQAKCmrfe++YBoMI -IWDM9sQT6X/961x/f9cAJ4vFEBUVMH16/G9/O3X58s+2by/A2OidBsJwrdcTX5Q6s9ks/Oq+pqmK -ux0Oh1cCdHS9D5wwKsZFioTnLl2z7WgvY4w4t/2f/zNt3bq7jUZJWEnb2uTy8qZz5xpqaqxCkDoc -1GIxbNt23223JTDmS342t1gMahKcJ7DZbACg07nW6/C2AvR6vUhE7Wq0KDTUBNC9ALQegLrKnUmK -ncO11S1h7UXG5Li4ga+8MotzTgi6etX+4ovf7thRePlyG6XcYCCDBwc//fRPHntsvMNB9Xry7LO3 -HTpUKp72/C4AYCEhRuiq8Ep7eztCSK/Xd4MAGOPHHntM1PL0nH8KABAdbfEgFW8VEEabO+5I9Pc3 -tLXJZrPu/vs/3bPnBMZ+jImodKWwsOrxxz9ubZVXr04DgPHjowIC/K1WG0Letzg8OtqiosIdBOqS -kpIef/xx99CeLvwB4eHhQgx42oWJ9e6s6dLfaO4KxoyJBACzWXfgQNmePWckKciZ44gAMCEGAOMn -n5wUN1ssBn9/PYA3didsQaIOjXcsmUymiIgI9xsk762L8nVqRpj78+JKSkooAOmrKgM9AcY6nPWt -rQ4AyM4uBxD7gA59X5hFAXBbm+K7QUIUAkpJGQDXMwltipxKg04R6G0jxjVlNzyB2AkPHTqAEEM/ -BoN2CZxzAN2nn5749NPjAICQjhADAEeoo2QQ54xzBaAlPn6okyRdj4UxBmBwEuAa6kGjC6hGuk43 -Yt6iDcUKsFqtfn5+nuISRVNxcUFxcUGlpZcRkm5VixAC4BgbCUGEYIdDobTdyV4wgC4gwBgVFTB9 -+k9efHG6ry0i4JzGxAQPHhwMzrmoTSRV+YdLQrX2YhcEqK+vX7du3YoVK8LDwz3xOEqZwSCNHRtR -WlqLsa6v7Mw9Ac5BURRZtpnNAWPHJo0eHT506IDBg4NiYizh4f4DBpj1euKJV7iD2HaMGRMhSj6p -GawIIVGhua2tbefOnQ888IBIquCaepLqsujCHxAYGMg5r62tDQ8PBw9iQEz5GTPit28/0d8Y9oZ8 -hDDnsr+/Yc2a2cuXj42OtrjkPAuk1NW1DhhgliRfeCkC4NOnx6tI4M6ikQcPHszOzo6MjLRarcXF -xXFxcRaLRSS3MsbKysqioqLE8RHehDDn3Gg0hoWFlZeXjx492jOlOABMnz4Yof7MCegCVQhxLkdF -WfbsWTpq1EBhvUEItbfLVVXWysqrZWWNZ8827N9fTik7cuQx8MG0RSkD0M+cORg6WLHgchgApkyZ -Eh8fn5WVxRj7/PPPbTabxWKJiopKTEwMDAz8+OOPn3zySXEgiDcCCGIOGjSouLgYPAgl9YyUUaPC -x42LyMu7eMP17W4UtPsvFUSm0IYN944aNdBmU4xG6fDhin/841hOzsXKyquybAdQMAbG6MiR8T7y -H8Yco0ZFjh0bKf510gA45xaLJSgoqLq6OiIiYuTIkefPn7948eKFCxf279/f0NCQkpISGRkJLn6J -zpArVMyU9vZ2tR5Kp3dSyiUJ3XNPSl5eGUJGgJu7DrwkmwLwyEg/l6uEIErtkycPmTcvyeGgRqP0 -t79995vf7EKIca5T62ASgh0Ouyj02hWIIgjyXXcNxRiJkihOSndwaUrpnDlzBMYSEhKGDBkixHJj -Y6PZbAY199UL9gVPTEpKSk5O9u6cEZczMob/z/8cuHkZ8S6ntbj/DsABsJiSLmMBoGlpMQCg15Pq -auvLL2cBSJKkUxQm3DLCNwDABUftCkSahnHx4hHunXGWLcCHDh3Ky8tDCA0aNGjq1KkiwCc0NFSV -85zzLjxiWsekp4Q/5KzNOXJk+OzZgwEcvgQoIoQAsBqn5eXj3CJdA6NRMplc3B8dWbQDBwbOnDmk -09GEh/uLb+XlV6xWGWNJRGupN0gSAXAMGxaqGbtHCzyAfcaM+HHjotQCNi5427VrV2ZmZnJycmJi -Yk5OjsPhOHnypOpcUbUgn6xa2mM/PBn9Bd9/9NEJaje8E4BzGaBFUVrVUC1PH84V56JmAKAoLDzc -f9y4CACbXt9R+EGSCCEYoPU3v7ltwACzqCbtAlZrh1k3IiJAr8ecc0lSH0eSRByOlvDw0Fdeud05 -duHkwm7hNuI7f/TR8eAWgC12r3V1dceOHVuxYsX8+fMTEhLi4uIGDRqUm5u7bds2uD5+ouvSxej6 -2kyeQDDBBQuGjRoVfepUDcZ6T6JYrI/x4wc98sjtAQEGr1l/YDJJu3efLS6uA5AqKpplmYrH//Sn -eXPm1FitzSK0i3PKOaxcOXvNmnS1sI8WKQD4++9rAcDhoEOGhDz2WNq77+5jTM8YEtoj5zBpUuLG -jfeKoiqEYEIwxgqAnXOjtmAlQpgxx9ChkYsWjQC38A6BpbKystDQUBEGcezYsYSEBAC4995733nn -ncrKypiYGLXUQBcEUGNSDh482NzcfPfdd3dapAA5yyHqdHjVqsmPProNIYO7KBaF6MUsnjVryJw5 -CV62PMLxK0m4vr61uPiiJPn98EPd4cMVM2YMttuVSZOi8/OfWrs2Ny+vRlFYQkLwL34xZt68RADY -uLHgrruSQ0PN6pZQxPLv23e+pKQhMTFUlunatT/9yU+it207U1fXoteThISQBQtS7rwzyWCQGhvb -jUbJaEQGg/SrX6W/8UZua6ujudnmHAvHGFOqPP30ZOFUEDWxtKgAAD8/v6tXrzocDs55ZWXlrFmz -AMBisRiNRhf/iq95wnq9/rvvvrNarWpghadF8NBDY1JTB1HaiStD6KyEYEKQpyqCngBjBMDWrNnb -0uIwGCRZpoMHB61dOz87+5EjRx7bvHnRHXckAMBf/5rzyiv7goONoIlY5hwwJm1tbatXfymyORnj -Dz00eteuJUeOPJ6dvfzDDxcuXJhiMEj5+TXp6RvKyhoRQna7smpVWlXVMw8/PAbARggSyg+l9pSU -qGXLxrlMf62eMmzYMKPRuHXr1ry8vIEDB0ZHRwPA6dOnKaXiu08uSe1948aNy8rKOnny5G233ebJ -LCoWgV5P/vCHmYsXb3KZzgCorU0+dOiC78YixlhgoLG2tgUAKwrD2HD8+IVZsz745z8XpqaGq3HO -oj/Nze0vv3zgzTe/iY+PPHSowmzWnTp1SdsUxsbMzNNz5360bt09Q4YEO+cQF1HTly+3vv320ddf -P9Ta2rxhw4m//W2uWoxAOFydwQ3AOX3xxZkmk+v0V3l1W1ub2Wx+5JFHNm7cKPhPTk5OTU1Nbm5u -RkaGwWDQchGf4gkFF9q9e/fJkyefe+457dmCbljukEJ33bUxM/MMIWZnpJ/qrunBeQgEAKsBDYzZ -JUmaPj1xxoy4uLggnY5cvtyan1+7e/cP9fUNGJsZY863IAA1XxyphVSMRuOcOUnp6bExMRaEUG2t -9ejR6qysksbGKxibADBjjgULRt5zT4rJJFVVWf/1r/yiolqEJIQQY+1z5qR89dVS7cFsKkIF9r/4 -4osFCxbodDpZlk+dOnXq1Kn6+nqz2Zyeni7OI9VObp8IIO6ur6/funXrkiVLhCbrKVZXBBsXFdVP -nPiP1lbFibsOGvTAaaNWkxT/OQ9BsQOoQZxC2OjV8Gz1LW7hPeJxUT6ROTmw+rhOhOUihDi3qSH1 -AHonq+BGI8rNXTF6dIRaDVQb+EYIaWxsfOutt1asWBEREUEpdT8IE67Hgk8pSuJLaGjok08+6QX7 -HS1ipChs2LCwV16ZA2BzMQyIXU+3Pi7dYYxxDoQYJcmfEDMhJvEFIaI66zXPos4eR86nTNc/TtXH -CTGpjSMkidgTgPY//GHW6NERatF3AfX19QL7lNLg4OCwsDCRGAwaxb2trU1dKNpJ373kKRfC+MaI -ThPi52RE/6HACSGUtt1+e0pW1jXmI5Bgs9lef/31gICABQsWDBo0CCH0zTffHDlyJCUlpbGx0Waz -ORyOpqamMWPGLFy40L3OW/fKVoLGeOuJBiLaUj2BdPLkdRUVTRjr+7tAdM+xL0rQR0YG5OauiI0N -FEPT8pPa2tq9e/eeOXNm6NChCxcuBIC33norPDw8MDDQZDL5+fkZDIbU1NROmUf3YtmcWZy0tbU1 -ICDAMw2u1e07ePDCnDkbZFn1Cv1n0aDj9BiEWFbWstmzh7gXylLnYmVl5e7du0tLS8ePH19RUbFg -wYLk5GRtbdtO0dW9mnGilYKCgrffflsEunRKvw5nm4QVhU2bFrdhwyIAu6hZeMvGrXQ6XBHKyLn9 -/ffvnT17iKi+6C5UBURHR69YsWLFihX19fXV1dUHDhxoaWkRKoOQLp1O1m4fZ4sQCgkJOXz4cFNT -0/Dhw9UW3TNDOOeEYEWhY8dG+vub9+49TYj+epXmVgYOgCQJUdr6xz/euWpVmkjs6TQHpr29/bPP -PtuxY0dRUdGkSZOmTZsWExNTVFSUlZVlt9tjY2NFPFanWUbdI4DQeXU6XVhY2K5duxISEgRf8xCa -isQ5RpTy9PRYQvTffHNGkv4jaNCBfUVpfeGFef/93zO0ey4XwwNj7P33329sbExPT9fr9YmJiQI/ -aWlpFoslPz9/xIgRJpMJPOjg3ZYB4NRwPv7448rKymeffRa8pvAh5ylVkoT/9Kfs5577nBAjY7jv -y8n5PkqEMMac0vaXX57/hz9M91SCXjipjh07lpWVtWbNGrWcoSzLe/bsSU9PDw4OppS6HMbuAt07 -yE3b0J133nnlirfsQO39hICi0N/9Lj0kxLRixQ7OMSG6W1I35RgTzmVK6TvvLF65cqIn7KuGkKqq -qoiICL1eL8syxlhUNTlx4oSiKPfee2+X7+v5ESYWi2Xw4MEuEqlTd42TBliW6eOPj//yy0eCg42U -tkuScKrcImJZJPITxtoDAgyff7505cqJskxdsK8OkznPlIuKiqqoqGhtbRWRz4qi6HS66dOni6TU -Ls9w7DYBtL1Rjy1xiezw9IgkYVmmc+cmHD/+1MSJgxWlhRDo6flcvYx9jDEhoCgtY8bEHj/+5F13 -JQudx9MACSFiso8dO9ZsNn/44YeiUqu48/Lly2qCu/cXd1sLguvLMoovly5dUhTFZDJ5OstE02+s -KCwkxLRs2Vi7nWRnn+dcIUTv9Oj2PUfqyBdjzME5Xb165iefLB440F/oPNrxav2INpvt8OHDR48e -tVqt0dHRI0eOzM7OPnjwoF6vlyQpNzf38OHD9913X1BQkJcM347GbySpSDWUbtiwwWq1Pv300ypt -vItlcWCLOI9lxYrdp0+XI2TEWHKu674hA3dGSimc21JSYtetu+v6s9w6hgiaEAWEUGNj4/r16yml -AwcOLCsrE5bnkJCQL7/8sqCgQJZlPz+/u+++e8SIEVor6U0hgIrQq1evvvHGG3FxcUuXLgXPSpH2 -EVU1stuVd9459uqr+5uaGvuKDNeh3mIJfP756atWpQkPl/ASg5PBqtNfDeh8//33CSHLly8HgLa2 -to8//ri0tHTVqlXh4eF2u729vT0gIEA1gnYZ5dgTFnQdARFijBmNxmHDhu3Zs+fixYujR4/2/mIt -OxJG3alTY5ctGwugP3WqzmazAiCMJe8FYHqGdwDkFKoK5+1+fuaVK9O3bFk8b16SKJWrMn2xshlj -Fy9erK2t9fPz0+v1CKGmpqY9e/YsWrQoKCiIUmowGMaNG1dcXHzmzJlJkyYRQoxGI3Kecuc9lkfA -jZ4nrHY0PDx8+fLl3377rcPhMBgM4HUdqNNKnISgKCwszO8vf7n9179Oe++9vPXr86qr6wEAQC8E -XbdOse3sdcI9KU4HdQBARMSARx8dt2LFhOhoC2PcRdcUgyopKdm5c6fVahWCbfHixampqeJXNW1L -WPx/+tOfrl+/vq6uLjw8XCj+XmoL9DIBtNSOj49ftmyZOgzBSbyXOVBrjgosRET4v/TSjDVrpmRm -nvvoo5P795e1tVkBAEAHIKk4UvPcPaFbcA6V0XGuUKoAcJMpYNq05IcfHn333UNFlqTgOcLCIxoU -6M7Pz//kk09mzJiRnp5OCMnMzBTFZgIDA+Pi4r766qvhw4cTQhRFAYCgoCBCiN1uB429wUffU68d -6KyuXK28cr/i4XEQfFk9XlkMoLraundvyZ49JTk5FysrmwDEKWDCQyk+1zXpNHIw50ds9PRRUUFT -pgyaNy9x7tzEmJiOoGj1CGn3GOnGxsa//OUv99xzT1pamjYmU8yn+vr6N998MyEh4cEHH9TpdAih -L7/88uTJk7/97W99n/i9TACVDNfaRSgvLy8iIiI6OrrL7bg7ISnlCF07q6u9Xf7hh4a8vOrvv68r -LKyvrLx66VKr1eqQZVlzJh4CwDqd5O+vHzjQLybGMmxYWGpq+PjxUcOGDTCZdFoFzNP5aoKlZGdn -Hzhw4He/+506lxFCLS0ttbW1JpMpOjq6srLygw8+UBRlxIgRjY2NFy9efOSRR4YMGeLLIeIu0Jtn -yrsYab///vtt27YtW7YsJSVFXQq+tAAA6lmaooSM0SiNGRMxenQ4dIh93txsa262NzfbbDZFVKrQ -6bDRKAUGGi0WQ1CQ0WVqi7P7xKmFWut8px0wGAytra1NTU2hoaGKopSXlx85cqS4uNhms1FKp0yZ -snjx4meeeSY3N/f8+fMhISH33nvvwIEDuQ8ZXZ0MuRdXgArq8L744ouvvvrqjjvumD17ttejNzy1 -I8JAROHBDtYv+IYXh6jTRX7tLFRN8lAXJdWdC679jTfeYIwlJiaWl5c3NDRERUVNmDBhyJAhZWVl -27dv/8UvfjF27NgunS39QwAt98cYnzlzZvPmzUuWLBk1apSWn/asu2pvPVVkVaN3tP92t32EUHV1 -dWZmZnNzc0JCwsSJE0U0lfhp3bp1gYGBS5YsURRF3eX2gPvfLAK406ClpcVgMOh0Og361KolXWvK -fQlaa4/LF+HVkiTp7bffjo6OzsjIELLtBvvfwyPNvYM6u4Uyqk2yFIYUdffgyX7Xl6BqONq9K3cm -1MmyzJ1nF0qSdOjQocrKysmTJ4NTON/g7OlNIawFtVtaHU5c+eijjzDGGRkZAwYM8FE43yTQmnVB -M+XVBVpXV/fBBx/Mnj07NTX16tWr+/bty8vLe+CBByIjIz2dpNZtRPXZ7FOXc2lp6RdffFFRUTF2 -7NhZs2aJBNjr+tQj8dDdzqjTXFWRtdtGZ2CHsmvXrtzcXJPJpChKWFhYRkZGbGyslwOsuwt9vfxV -Mpw9e3bHjh1JSUmLFi1y2eyoJtxep4SLyFH/LS8vz8zMHD16dHp6urtuc+nSpbq6uuDg4KioKME5 -u9xa3ooEUMejVmJUFEVRFJEuK8Zjs9lUY1ZH/9yQ1bP3goa0Ku7sdntOTk5+fn59fX1CQsIdd9wR -FxenfbX7svDdyuYj3CwZ4A7qNk0MQARTqmfNAYDNZlu7dq3FYpkwYUJSUpI4ckKrh2hnnIvBw9O7 -tPeD2ykuIm8rMTHxoYceEjsp7SMuEkIVxb27KPtHA3HX9gTDPXv27MmTJ8+fP2+1WtPS0jIyMnqw -uXdRIgU0NzdXVlYWFhaOHz8+ISFBZXoqu+uyQupNgr5bAVpwd2oCgCRJw4YNGz58uKIo586dcxED -R44cqampGTRoUGBgoMViCQ4OFhsLLaIZY4qiUEpFjSN1J7hjxw5ZlgkhgYGBqampLj1RVaA+EP6d -oKJ/dXABWg4LTkah5d0iSe3YsWMOh8Nms8myvHLlyujoaDGR29vb169f39LSIqwI4eHhK1euBKdh -ubq6uqioaMiQIZGRkULegJvZqh93grcEAQRop7N2q6xlVoyx1tZWq9U6YMAAbSDU8ePHEULiANOg -oKDY2FithHCRFv0y0z3BLUQAT6C6d7TaIfiAR5c9bZcBA/0C/wEEEKDtZ6duHy1a3Wtk37LwH0OA -/1fhphjjfgTf4f8C4VLHz/5KLxoAAAA8dEVYdGNvbW1lbnQAIEltYWdlIGdlbmVyYXRlZCBieSBH -TlUgR2hvc3RzY3JpcHQgKGRldmljZT1wbm1yYXcpCvqLFvMAAAAASUVORK5CYII= diff --git a/gem/input.bin b/gem/input.bin deleted file mode 100644 index d24a954..0000000 Binary files a/gem/input.bin and /dev/null differ diff --git a/gem/ltn012.tex b/gem/ltn012.tex deleted file mode 100644 index 8027ecc..0000000 --- a/gem/ltn012.tex +++ /dev/null @@ -1,695 +0,0 @@ -\documentclass[10pt]{article} -\usepackage{fancyvrb} -\usepackage{url} -\DefineVerbatimEnvironment{lua}{Verbatim}{fontsize=\small,commandchars=\@\#\%} -\DefineVerbatimEnvironment{C}{Verbatim}{fontsize=\small,commandchars=\@\#\%} -\DefineVerbatimEnvironment{mime}{Verbatim}{fontsize=\small,commandchars=\$\#\%} -\newcommand{\stick}[1]{\vbox{\setlength{\parskip}{0pt}#1}} -\newcommand{\bl}{\ensuremath{\mathtt{\backslash}}} -\newcommand{\CR}{\texttt{CR}} -\newcommand{\LF}{\texttt{LF}} -\newcommand{\CRLF}{\texttt{CR~LF}} -\newcommand{\nil}{\texttt{nil}} - -\title{Filters, sources, sinks, and pumps\\ - {\large or Functional programming for the rest of us}} -\author{Diego Nehab} - -\begin{document} - -\maketitle - -\begin{abstract} -Certain data processing operations can be implemented in the -form of filters. A filter is a function that can process -data received in consecutive invocations, returning partial -results each time it is called. Examples of operations that -can be implemented as filters include the end-of-line -normalization for text, Base64 and Quoted-Printable transfer -content encodings, the breaking of text into lines, SMTP -dot-stuffing, and there are many others. Filters become -even more powerful when we allow them to be chained together -to create composite filters. In this context, filters can be -seen as the internal links in a chain of data transformations. -Sources and sinks are the corresponding end points in these -chains. A source is a function that produces data, chunk by -chunk, and a sink is a function that takes data, chunk by -chunk. Finally, pumps are procedures that actively drive -data from a source to a sink, and indirectly through all -intervening filters. In this article, we describe the design of an -elegant interface for filters, sources, sinks, chains, and -pumps, and we illustrate each step with concrete examples. -\end{abstract} - -\section{Introduction} - -Within the realm of networking applications, we are often -required to apply transformations to streams of data. Examples -include the end-of-line normalization for text, Base64 and -Quoted-Printable transfer content encodings, breaking text -into lines with a maximum number of columns, SMTP -dot-stuffing, \texttt{gzip} compression, HTTP chunked -transfer coding, and the list goes on. - -Many complex tasks require a combination of two or more such -transformations, and therefore a general mechanism for -promoting reuse is desirable. In the process of designing -\texttt{LuaSocket~2.0}, we repeatedly faced this problem. -The solution we reached proved to be very general and -convenient. It is based on the concepts of filters, sources, -sinks, and pumps, which we introduce below. - -\emph{Filters} are functions that can be repeatedly invoked -with chunks of input, successively returning processed -chunks of output. Naturally, the result of -concatenating all the output chunks must be the same as the -result of applying the filter to the concatenation of all -input chunks. In fancier language, filters \emph{commute} -with the concatenation operator. More importantly, filters -must handle input data correctly no matter how the stream -has been split into chunks. - -A \emph{chain} is a function that transparently combines the -effect of one or more filters. The interface of a chain is -indistinguishable from the interface of its component -filters. This allows a chained filter to be used wherever -an atomic filter is accepted. In particular, chains can be -themselves chained to create arbitrarily complex operations. - -Filters can be seen as internal nodes in a network through -which data will flow, potentially being transformed many -times along the way. Chains connect these nodes together. -The initial and final nodes of the network are -\emph{sources} and \emph{sinks}, respectively. Less -abstractly, a source is a function that produces new chunks -of data every time it is invoked. Conversely, sinks are -functions that give a final destination to the chunks of -data they receive in sucessive calls. Naturally, sources -and sinks can also be chained with filters to produce -filtered sources and sinks. - -Finally, filters, chains, sources, and sinks are all passive -entities: they must be repeatedly invoked in order for -anything to happen. \emph{Pumps} provide the driving force -that pushes data through the network, from a source to a -sink, and indirectly through all intervening filters. - -In the following sections, we start with a simplified -interface, which we later refine. The evolution we present -is not contrived: it recreates the steps we ourselves -followed as we consolidated our understanding of these -concepts within our application domain. - -\subsection{A simple example} - -The end-of-line normalization of text is a good -example to motivate our initial filter interface. -Assume we are given text in an unknown end-of-line -convention (including possibly mixed conventions) out of the -commonly found Unix (\LF), Mac OS (\CR), and -DOS (\CRLF) conventions. We would like to be able to -use the folowing code to normalize the end-of-line markers: -\begin{quote} -\begin{lua} -@stick# -local CRLF = "\013\010" -local input = source.chain(source.file(io.stdin), normalize(CRLF)) -local output = sink.file(io.stdout) -pump.all(input, output) -% -\end{lua} -\end{quote} - -This program should read data from the standard input stream -and normalize the end-of-line markers to the canonic -\CRLF\ marker, as defined by the MIME standard. -Finally, the normalized text should be sent to the standard output -stream. We use a \emph{file source} that produces data from -standard input, and chain it with a filter that normalizes -the data. The pump then repeatedly obtains data from the -source, and passes it to the \emph{file sink}, which sends -it to the standard output. - -In the code above, the \texttt{normalize} \emph{factory} is a -function that creates our normalization filter, which -replaces any end-of-line marker with the canonic marker. -The initial filter interface is -trivial: a filter function receives a chunk of input data, -and returns a chunk of processed data. When there are no -more input data left, the caller notifies the filter by invoking -it with a \nil\ chunk. The filter responds by returning -the final chunk of processed data (which could of course be -the empty string). - -Although the interface is extremely simple, the -implementation is not so obvious. A normalization filter -respecting this interface needs to keep some kind of context -between calls. This is because a chunk boundary may lie between -the \CR\ and \LF\ characters marking the end of a single line. This -need for contextual storage motivates the use of -factories: each time the factory is invoked, it returns a -filter with its own context so that we can have several -independent filters being used at the same time. For -efficiency reasons, we must avoid the obvious solution of -concatenating all the input into the context before -producing any output chunks. - -To that end, we break the implementation into two parts: -a low-level filter, and a factory of high-level filters. The -low-level filter is implemented in C and does not maintain -any context between function calls. The high-level filter -factory, implemented in Lua, creates and returns a -high-level filter that maintains whatever context the low-level -filter needs, but isolates the user from its internal -details. That way, we take advantage of C's efficiency to -perform the hard work, and take advantage of Lua's -simplicity for the bookkeeping. - -\subsection{The Lua part of the filter} - -Below is the complete implementation of the factory of high-level -end-of-line normalization filters: -\begin{quote} -\begin{lua} -@stick# -function filter.cycle(lowlevel, context, extra) - return function(chunk) - local ret - ret, context = lowlevel(context, chunk, extra) - return ret - end -end -% - -@stick# -function normalize(marker) - return filter.cycle(eol, 0, marker) -end -% -\end{lua} -\end{quote} - -The \texttt{normalize} factory simply calls a more generic -factory, the \texttt{cycle}~factory, passing the low-level -filter~\texttt{eol}. The \texttt{cycle}~factory receives a -low-level filter, an initial context, and an extra -parameter, and returns a new high-level filter. Each time -the high-level filer is passed a new chunk, it invokes the -low-level filter with the previous context, the new chunk, -and the extra argument. It is the low-level filter that -does all the work, producing the chunk of processed data and -a new context. The high-level filter then replaces its -internal context, and returns the processed chunk of data to -the user. Notice that we take advantage of Lua's lexical -scoping to store the context in a closure between function -calls. - -\subsection{The C part of the filter} - -As for the low-level filter, we must first accept -that there is no perfect solution to the end-of-line marker -normalization problem. The difficulty comes from an -inherent ambiguity in the definition of empty lines within -mixed input. However, the following solution works well for -any consistent input, as well as for non-empty lines in -mixed input. It also does a reasonable job with empty lines -and serves as a good example of how to implement a low-level -filter. - -The idea is to consider both \CR\ and~\LF\ as end-of-line -\emph{candidates}. We issue a single break if any candidate -is seen alone, or if it is followed by a different -candidate. In other words, \CR~\CR~and \LF~\LF\ each issue -two end-of-line markers, whereas \CR~\LF~and \LF~\CR\ issue -only one marker each. It is easy to see that this method -correctly handles the most common end-of-line conventions. - -With this in mind, we divide the low-level filter into two -simple functions. The inner function~\texttt{pushchar} performs the -normalization itself. It takes each input character in turn, -deciding what to output and how to modify the context. The -context tells if the last processed character was an -end-of-line candidate, and if so, which candidate it was. -For efficiency, we use Lua's auxiliary library's buffer -interface: -\begin{quote} -\begin{C} -@stick# -@#define candidate(c) (c == CR || c == LF) -static int pushchar(int c, int last, const char *marker, - luaL_Buffer *buffer) { - if (candidate(c)) { - if (candidate(last)) { - if (c == last) - luaL_addstring(buffer, marker); - return 0; - } else { - luaL_addstring(buffer, marker); - return c; - } - } else { - luaL_pushchar(buffer, c); - return 0; - } -} -% -\end{C} -\end{quote} - -The outer function~\texttt{eol} simply interfaces with Lua. -It receives the context and input chunk (as well as an -optional custom end-of-line marker), and returns the -transformed output chunk and the new context. -Notice that if the input chunk is \nil, the operation -is considered to be finished. In that case, the loop will -not execute a single time and the context is reset to the -initial state. This allows the filter to be reused many -times: -\begin{quote} -\begin{C} -@stick# -static int eol(lua_State *L) { - int context = luaL_checkint(L, 1); - size_t isize = 0; - const char *input = luaL_optlstring(L, 2, NULL, &isize); - const char *last = input + isize; - const char *marker = luaL_optstring(L, 3, CRLF); - luaL_Buffer buffer; - luaL_buffinit(L, &buffer); - if (!input) { - lua_pushnil(L); - lua_pushnumber(L, 0); - return 2; - } - while (input < last) - context = pushchar(*input++, context, marker, &buffer); - luaL_pushresult(&buffer); - lua_pushnumber(L, context); - return 2; -} -% -\end{C} -\end{quote} - -When designing filters, the challenging part is usually -deciding what to store in the context. For line breaking, for -instance, it could be the number of bytes that still fit in the -current line. For Base64 encoding, it could be a string -with the bytes that remain after the division of the input -into 3-byte atoms. The MIME module in the \texttt{LuaSocket} -distribution has many other examples. - -\section{Filter chains} - -Chains greatly increase the power of filters. For example, -according to the standard for Quoted-Printable encoding, -text should be normalized to a canonic end-of-line marker -prior to encoding. After encoding, the resulting text must -be broken into lines of no more than 76 characters, with the -use of soft line breaks (a line terminated by the \texttt{=} -sign). To help specifying complex transformations like -this, we define a chain factory that creates a composite -filter from one or more filters. A chained filter passes -data through all its components, and can be used wherever a -primitive filter is accepted. - -The chaining factory is very simple. The auxiliary -function~\texttt{chainpair} chains two filters together, -taking special care if the chunk is the last. This is -because the final \nil\ chunk notification has to be -pushed through both filters in turn: -\begin{quote} -\begin{lua} -@stick# -local function chainpair(f1, f2) - return function(chunk) - local ret = f2(f1(chunk)) - if chunk then return ret - else return ret .. f2() end - end -end -% - -@stick# -function filter.chain(...) - local f = select(1, ...) - for i = 2, select('@#', ...) do - f = chainpair(f, select(i, ...)) - end - return f -end -% -\end{lua} -\end{quote} - -Thanks to the chain factory, we can -define the Quoted-Printable conversion as such: -\begin{quote} -\begin{lua} -@stick# -local qp = filter.chain(normalize(CRLF), encode("quoted-printable"), - wrap("quoted-printable")) -local input = source.chain(source.file(io.stdin), qp) -local output = sink.file(io.stdout) -pump.all(input, output) -% -\end{lua} -\end{quote} - -\section{Sources, sinks, and pumps} - -The filters we introduced so far act as the internal nodes -in a network of transformations. Information flows from node -to node (or rather from one filter to the next) and is -transformed along the way. Chaining filters together is our -way to connect nodes in this network. As the starting point -for the network, we need a source node that produces the -data. In the end of the network, we need a sink node that -gives a final destination to the data. - -\subsection{Sources} - -A source returns the next chunk of data each time it is -invoked. When there is no more data, it simply returns~\nil. -In the event of an error, the source can inform the -caller by returning \nil\ followed by the error message. - -Below are two simple source factories. The \texttt{empty} source -returns no data, possibly returning an associated error -message. The \texttt{file} source yields the contents of a file -in a chunk by chunk fashion: -\begin{quote} -\begin{lua} -@stick# -function source.empty(err) - return function() - return nil, err - end -end -% - -@stick# -function source.file(handle, io_err) - if handle then - return function() - local chunk = handle:read(2048) - if not chunk then handle:close() end - return chunk - end - else return source.empty(io_err or "unable to open file") end -end -% -\end{lua} -\end{quote} - -\subsection{Filtered sources} - -A filtered source passes its data through the -associated filter before returning it to the caller. -Filtered sources are useful when working with -functions that get their input data from a source (such as -the pumps in our examples). By chaining a source with one or -more filters, such functions can be transparently provided -with filtered data, with no need to change their interfaces. -Here is a factory that does the job: -\begin{quote} -\begin{lua} -@stick# -function source.chain(src, f) - return function() - if not src then - return nil - end - local chunk, err = src() - if not chunk then - src = nil - return f(nil) - else - return f(chunk) - end - end -end -% -\end{lua} -\end{quote} - -\subsection{Sinks} - -Just as we defined an interface for a source of data, we can -also define an interface for a data destination. We call -any function respecting this interface a sink. In our first -example, we used a file sink connected to the standard -output. - -Sinks receive consecutive chunks of data, until the end of -data is signaled by a \nil\ input chunk. A sink can be -notified of an error with an optional extra argument that -contains the error message, following a \nil\ chunk. -If a sink detects an error itself, and -wishes not to be called again, it can return \nil, -followed by an error message. A return value that -is not \nil\ means the sink will accept more data. - -Below are two useful sink factories. -The table factory creates a sink that stores -individual chunks into an array. The data can later be -efficiently concatenated into a single string with Lua's -\texttt{table.concat} library function. The \texttt{null} sink -simply discards the chunks it receives: -\begin{quote} -\begin{lua} -@stick# -function sink.table(t) - t = t or {} - local f = function(chunk, err) - if chunk then table.insert(t, chunk) end - return 1 - end - return f, t -end -% - -@stick# -local function null() - return 1 -end - -function sink.null() - return null -end -% -\end{lua} -\end{quote} - -Naturally, filtered sinks are just as useful as filtered -sources. A filtered sink passes each chunk it receives -through the associated filter before handing it down to the -original sink. In the following example, we use a source -that reads from the standard input. The input chunks are -sent to a table sink, which has been coupled with a -normalization filter. The filtered chunks are then -concatenated from the output array, and finally sent to -standard out: -\begin{quote} -\begin{lua} -@stick# -local input = source.file(io.stdin) -local output, t = sink.table() -output = sink.chain(normalize(CRLF), output) -pump.all(input, output) -io.write(table.concat(t)) -% -\end{lua} -\end{quote} - -\subsection{Pumps} - -Although not on purpose, our interface for sources is -compatible with Lua iterators. That is, a source can be -neatly used in conjunction with \texttt{for} loops. Using -our file source as an iterator, we can write the following -code: -\begin{quote} -\begin{lua} -@stick# -for chunk in source.file(io.stdin) do - io.write(chunk) -end -% -\end{lua} -\end{quote} - -Loops like this will always be present because everything -we designed so far is passive. Sources, sinks, filters: none -of them can do anything on their own. The operation of -pumping all data a source can provide into a sink is so -common that it deserves its own function: -\begin{quote} -\begin{lua} -@stick# -function pump.step(src, snk) - local chunk, src_err = src() - local ret, snk_err = snk(chunk, src_err) - if chunk and ret then return 1 - else return nil, src_err or snk_err end -end -% - -@stick# -function pump.all(src, snk, step) - step = step or pump.step - while true do - local ret, err = step(src, snk) - if not ret then - if err then return nil, err - else return 1 end - end - end -end -% -\end{lua} -\end{quote} - -The \texttt{pump.step} function moves one chunk of data from -the source to the sink. The \texttt{pump.all} function takes -an optional \texttt{step} function and uses it to pump all the -data from the source to the sink. -Here is an example that uses the Base64 and the -line wrapping filters from the \texttt{LuaSocket} -distribution. The program reads a binary file from -disk and stores it in another file, after encoding it to the -Base64 transfer content encoding: -\begin{quote} -\begin{lua} -@stick# -local input = source.chain( - source.file(io.open("input.bin", "rb")), - encode("base64")) -local output = sink.chain( - wrap(76), - sink.file(io.open("output.b64", "w"))) -pump.all(input, output) -% -\end{lua} -\end{quote} - -The way we split the filters here is not intuitive, on -purpose. Alternatively, we could have chained the Base64 -encode filter and the line-wrap filter together, and then -chain the resulting filter with either the file source or -the file sink. It doesn't really matter. - -\section{Exploding filters} - -Our current filter interface has one serious shortcoming. -Consider for example a \texttt{gzip} decompression filter. -During decompression, a small input chunk can be exploded -into a huge amount of data. To address this problem, we -decided to change the filter interface and allow exploding -filters to return large quantities of output data in a chunk -by chunk manner. - -More specifically, after passing each chunk of input to -a filter, and collecting the first chunk of output, the -user must now loop to receive other chunks from the filter until no -filtered data is left. Within these secondary calls, the -caller passes an empty string to the filter. The filter -responds with an empty string when it is ready for the next -input chunk. In the end, after the user passes a -\nil\ chunk notifying the filter that there is no -more input data, the filter might still have to produce too -much output data to return in a single chunk. The user has -to loop again, now passing \nil\ to the filter each time, -until the filter itself returns \nil\ to notify the -user it is finally done. - -Fortunately, it is very easy to modify a filter to respect -the new interface. In fact, the end-of-line translation -filter we presented earlier already conforms to it. The -complexity is encapsulated within the chaining functions, -which must now include a loop. Since these functions only -have to be written once, the user is rarely affected. -Interestingly, the modifications do not have a measurable -negative impact in the performance of filters that do -not need the added flexibility. On the other hand, for a -small price in complexity, the changes make exploding -filters practical. - -\section{A complex example} - -The LTN12 module in the \texttt{LuaSocket} distribution -implements all the ideas we have described. The MIME -and SMTP modules are tightly integrated with LTN12, -and can be used to showcase the expressive power of filters, -sources, sinks, and pumps. Below is an example -of how a user would proceed to define and send a -multipart message, with attachments, using \texttt{LuaSocket}: -\begin{quote} -\begin{mime} -local smtp = require"socket.smtp" -local mime = require"mime" -local ltn12 = require"ltn12" - -local message = smtp.message{ - headers = { - from = "Sicrano <sicrano@example.com>", - to = "Fulano <fulano@example.com>", - subject = "A message with an attachment"}, - body = { - preamble = "Hope you can see the attachment" .. CRLF, - [1] = { - body = "Here is our logo" .. CRLF}, - [2] = { - headers = { - ["content-type"] = 'image/png; name="luasocket.png"', - ["content-disposition"] = - 'attachment; filename="luasocket.png"', - ["content-description"] = 'LuaSocket logo', - ["content-transfer-encoding"] = "BASE64"}, - body = ltn12.source.chain( - ltn12.source.file(io.open("luasocket.png", "rb")), - ltn12.filter.chain( - mime.encode("base64"), - mime.wrap()))}}} - -assert(smtp.send{ - rcpt = "<fulano@example.com>", - from = "<sicrano@example.com>", - source = message}) -\end{mime} -\end{quote} - -The \texttt{smtp.message} function receives a table -describing the message, and returns a source. The -\texttt{smtp.send} function takes this source, chains it with the -SMTP dot-stuffing filter, connects a socket sink -with the server, and simply pumps the data. The message is never -assembled in memory. Everything is produced on demand, -transformed in small pieces, and sent to the server in chunks, -including the file attachment which is loaded from disk and -encoded on the fly. It just works. - -\section{Conclusions} - -In this article, we introduced the concepts of filters, -sources, sinks, and pumps to the Lua language. These are -useful tools for stream processing in general. Sources provide -a simple abstraction for data acquisition. Sinks provide an -abstraction for final data destinations. Filters define an -interface for data transformations. The chaining of -filters, sources and sinks provides an elegant way to create -arbitrarily complex data transformations from simpler -components. Pumps simply push the data through. - -\section{Acknowledgements} - -The concepts described in this text are the result of long -discussions with David Burgess. A version of this text has -been released on-line as the Lua Technical Note 012, hence -the name of the corresponding LuaSocket module, LTN12. Wim -Couwenberg contributed to the implementation of the module, -and Adrian Sietsma was the first to notice the -correspondence between sources and Lua iterators. - - -\end{document} diff --git a/gem/luasocket.png b/gem/luasocket.png deleted file mode 100644 index d24a954..0000000 Binary files a/gem/luasocket.png and /dev/null differ diff --git a/gem/makefile b/gem/makefile deleted file mode 100644 index a4287c2..0000000 --- a/gem/makefile +++ /dev/null @@ -1,14 +0,0 @@ -ltn012.pdf: ltn012.ps - ./myps2pdf ltn012.ps - -ltn012.ps: ltn012.dvi - dvips -G0 -t letter -o ltn012.ps ltn012.dvi - -ltn012.dvi: ltn012.tex - latex ltn012 - -clean: - rm -f *~ *.log *.aux *.bbl *.blg ltn012.pdf ltn012.ps ltn012.dvi ltn012.lof ltn012.toc ltn012.lot - -pdf: ltn012.pdf - open ltn012.pdf diff --git a/gem/myps2pdf b/gem/myps2pdf deleted file mode 100755 index 78c23e5..0000000 --- a/gem/myps2pdf +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/sh - -do_opt=1 -best=0 -rot=0 -a4=0 -eps=0 -usage="Usage: $0 [-no_opt] [-best] [-rot] [-a4] [-eps] in.ps [out.pdf]" - -case "x$1" in -"x-no_opt") do_opt=0 ; shift ;; -esac - -case "x$1" in -"x-best") best=1 ; shift ;; -esac - -case "x$1" in -"x-rot") rot=1 ; shift ;; -esac - -case "x$1" in -"x-a4") a4=1 ; shift ;; -esac - -case "x$1" in -"x-eps") eps=1 ; shift ;; -esac - -case $# in -2) ifilename=$1 ; ofilename=$2 ;; -1) ifilename=$1 - if `echo $1 | grep -i '\.e*ps$' > /dev/null` - then - ofilename=`echo $1 | sed 's/\..*$/.pdf/'` - else - echo "$usage" 1>&2 - exit 1 - fi ;; -*) echo "$usage" 1>&2 ; exit 1 ;; -esac - -if [ $best == 1 ] -then - options="-dPDFSETTINGS=/prepress \ - -r1200 \ - -dMonoImageResolution=1200 \ - -dGrayImageResolution=1200 \ - -dColorImageResolution=1200 \ - -dDownsampleMonoImages=false \ - -dDownsampleGrayImages=false \ - -dDownsampleColorImages=false \ - -dAutoFilterMonoImages=false \ - -dAutoFilterGrayImages=false \ - -dAutoFilterColorImages=false \ - -dMonoImageFilter=/FlateEncode \ - -dGrayImageFilter=/FlateEncode \ - -dColorImageFilter=/FlateEncode" -else - options="-dPDFSETTINGS=/prepress \ - -r600 \ - -dDownsampleMonoImages=true \ - -dDownsampleGrayImages=true \ - -dDownsampleColorImages=true \ - -dMonoImageDownsampleThreshold=2.0 \ - -dGrayImageDownsampleThreshold=1.5 \ - -dColorImageDownsampleThreshold=1.5 \ - -dMonoImageResolution=600 \ - -dGrayImageResolution=600 \ - -dColorImageResolution=600 \ - -dAutoFilterMonoImages=false \ - -dMonoImageFilter=/FlateEncode \ - -dAutoFilterGrayImages=true \ - -dAutoFilterColorImages=true" -fi - -if [ $rot == 1 ] -then - options="$options -dAutoRotatePages=/PageByPage" -fi - -if [ $eps == 1 ] -then - options="$options -dEPSCrop" -fi - -set -x - -if [ $a4 == 1 ] -then - # Resize from A4 to letter size - psresize -Pa4 -pletter "$ifilename" myps2pdf.temp.ps - ifilename=myps2pdf.temp.ps -fi - -gs -q -dSAFER -dNOPAUSE -dBATCH \ - -sDEVICE=pdfwrite -sPAPERSIZE=letter -sOutputFile=myps2pdf.temp.pdf \ - -dCompatibilityLevel=1.3 \ - $options \ - -dMaxSubsetPct=100 \ - -dSubsetFonts=true \ - -dEmbedAllFonts=true \ - -dColorConversionStrategy=/LeaveColorUnchanged \ - -dDoThumbnails=true \ - -dPreserveEPSInfo=true \ - -c .setpdfwrite -f "$ifilename" - -if [ $do_opt == 1 ] -then - pdfopt myps2pdf.temp.pdf $ofilename -else - mv myps2pdf.temp.pdf $ofilename -fi -rm -f myps2pdf.temp.pdf myps2pdf.temp.ps diff --git a/gem/t1.lua b/gem/t1.lua deleted file mode 100644 index 0c054c9..0000000 --- a/gem/t1.lua +++ /dev/null @@ -1,25 +0,0 @@ -source = {} -sink = {} -pump = {} -filter = {} - --- source.chain -dofile("ex6.lua") - --- source.file -dofile("ex5.lua") - --- normalize -require"gem" -eol = gem.eol -dofile("ex2.lua") - --- sink.file -require"ltn12" -sink.file = ltn12.sink.file - --- pump.all -dofile("ex10.lua") - --- run test -dofile("ex1.lua") diff --git a/gem/t1lf.txt b/gem/t1lf.txt deleted file mode 100644 index 8cddd1b..0000000 --- a/gem/t1lf.txt +++ /dev/null @@ -1,5 +0,0 @@ -this is a test file -it should have been saved as lf eol -but t1.lua will convert it to crlf eol -otherwise it is broken! - diff --git a/gem/t2.lua b/gem/t2.lua deleted file mode 100644 index a81ed73..0000000 --- a/gem/t2.lua +++ /dev/null @@ -1,36 +0,0 @@ -source = {} -sink = {} -pump = {} -filter = {} - --- filter.chain -dofile("ex3.lua") - --- normalize -require"gem" -eol = gem.eol -dofile("ex2.lua") - --- encode -require"mime" -encode = mime.encode - --- wrap -wrap = mime.wrap - --- source.chain -dofile("ex6.lua") - --- source.file -dofile("ex5.lua") - --- sink.file -require"ltn12" -sink.file = ltn12.sink.file - --- pump.all -dofile("ex10.lua") - --- run test -CRLF = "\013\010" -dofile("ex4.lua") diff --git a/gem/t2.txt b/gem/t2.txt deleted file mode 100644 index f484fe8..0000000 --- a/gem/t2.txt +++ /dev/null @@ -1,4 +0,0 @@ -esse � um texto com acentos -quoted-printable tem que quebrar linhas longas, com mais que 76 linhas de texto -fora que as quebras de linhas t�m que ser normalizadas -vamos ver o que d� isso aqui diff --git a/gem/t2gt.qp b/gem/t2gt.qp deleted file mode 100644 index 355a845..0000000 --- a/gem/t2gt.qp +++ /dev/null @@ -1,5 +0,0 @@ -esse =E9 um texto com acentos -quoted-printable tem que quebrar linhas longas, com mais que 76 linhas de t= -exto -fora que as quebras de linhas t=EAm que ser normalizadas -vamos ver o que d=E1 isso aqui diff --git a/gem/t3.lua b/gem/t3.lua deleted file mode 100644 index 4bb98ba..0000000 --- a/gem/t3.lua +++ /dev/null @@ -1,25 +0,0 @@ -source = {} -sink = {} -pump = {} -filter = {} - --- source.file -dofile("ex5.lua") - --- sink.table -dofile("ex7.lua") - --- sink.chain -require"ltn12" -sink.chain = ltn12.sink.chain - --- normalize -require"gem" -eol = gem.eol -dofile("ex2.lua") - --- pump.all -dofile("ex10.lua") - --- run test -dofile("ex8.lua") diff --git a/gem/t4.lua b/gem/t4.lua deleted file mode 100644 index 8b8071c..0000000 --- a/gem/t4.lua +++ /dev/null @@ -1,10 +0,0 @@ -source = {} -sink = {} -pump = {} -filter = {} - --- source.file -dofile("ex5.lua") - --- run test -dofile("ex9.lua") diff --git a/gem/t5.lua b/gem/t5.lua deleted file mode 100644 index 7c569ea..0000000 --- a/gem/t5.lua +++ /dev/null @@ -1,30 +0,0 @@ -source = {} -sink = {} -pump = {} -filter = {} - --- source.chain -dofile("ex6.lua") - --- source.file -dofile("ex5.lua") - --- encode -require"mime" -encode = mime.encode - --- sink.chain -require"ltn12" -sink.chain = ltn12.sink.chain - --- wrap -wrap = mime.wrap - --- sink.file -sink.file = ltn12.sink.file - --- pump.all -dofile("ex10.lua") - --- run test -dofile("ex11.lua") diff --git a/gem/test.lua b/gem/test.lua deleted file mode 100644 index a937b9a..0000000 --- a/gem/test.lua +++ /dev/null @@ -1,46 +0,0 @@ -function readfile(n) - local f = io.open(n, "rb") - local s = f:read("*a") - f:close() - return s -end - -lf = readfile("t1lf.txt") -os.remove("t1crlf.txt") -os.execute("lua t1.lua < t1lf.txt > t1crlf.txt") -crlf = readfile("t1crlf.txt") -assert(crlf == string.gsub(lf, "\010", "\013\010"), "broken") - -gt = readfile("t2gt.qp") -os.remove("t2.qp") -os.execute("lua t2.lua < t2.txt > t2.qp") -t2 = readfile("t2.qp") -assert(gt == t2, "broken") - -os.remove("t1crlf.txt") -os.execute("lua t3.lua < t1lf.txt > t1crlf.txt") -crlf = readfile("t1crlf.txt") -assert(crlf == string.gsub(lf, "\010", "\013\010"), "broken") - -t = readfile("test.lua") -os.execute("lua t4.lua < test.lua > t") -t2 = readfile("t") -assert(t == t2, "broken") - -os.remove("output.b64") -gt = readfile("gt.b64") -os.execute("lua t5.lua") -t5 = readfile("output.b64") -assert(gt == t5, "failed") - -print("1 2 5 6 10 passed") -print("2 3 4 5 6 10 passed") -print("2 5 6 7 8 10 passed") -print("5 9 passed") -print("5 6 10 11 passed") - -os.remove("t") -os.remove("t2.qp") -os.remove("t1crlf.txt") -os.remove("t11.b64") -os.remove("output.b64") -- cgit v1.2.3-55-g6feb From 86de838eb5ed49711be8d62e01255cc2ccd3342e Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Wed, 23 Mar 2022 16:05:11 +0100 Subject: cleanup; move ./etc into ./samples and mark 'unsupported' --- .luacheckrc | 2 - etc/README | 89 ------------- etc/b64.lua | 19 --- etc/check-links.lua | 111 ---------------- etc/check-memory.lua | 17 --- etc/cookie.lua | 88 ------------- etc/dict.lua | 151 ---------------------- etc/dispatch.lua | 307 -------------------------------------------- etc/eol.lua | 13 -- etc/forward.lua | 65 ---------- etc/get.lua | 141 --------------------- etc/links | 17 --- etc/lp.lua | 323 ----------------------------------------------- etc/qp.lua | 23 ---- etc/tftp.lua | 154 ---------------------- luasocket-scm-3.rockspec | 1 - makefile.dist | 28 ++-- samples/README | 90 ++++++++++++- samples/b64.lua | 19 +++ samples/check-links.lua | 111 ++++++++++++++++ samples/check-memory.lua | 17 +++ samples/cookie.lua | 88 +++++++++++++ samples/dict.lua | 151 ++++++++++++++++++++++ samples/dispatch.lua | 307 ++++++++++++++++++++++++++++++++++++++++++++ samples/eol.lua | 13 ++ samples/forward.lua | 65 ++++++++++ samples/get.lua | 141 +++++++++++++++++++++ samples/links | 17 +++ samples/lp.lua | 323 +++++++++++++++++++++++++++++++++++++++++++++++ samples/qp.lua | 23 ++++ samples/tftp.lua | 154 ++++++++++++++++++++++ 31 files changed, 1527 insertions(+), 1541 deletions(-) delete mode 100644 etc/README delete mode 100644 etc/b64.lua delete mode 100644 etc/check-links.lua delete mode 100644 etc/check-memory.lua delete mode 100644 etc/cookie.lua delete mode 100644 etc/dict.lua delete mode 100644 etc/dispatch.lua delete mode 100644 etc/eol.lua delete mode 100644 etc/forward.lua delete mode 100644 etc/get.lua delete mode 100644 etc/links delete mode 100644 etc/lp.lua delete mode 100644 etc/qp.lua delete mode 100644 etc/tftp.lua create mode 100644 samples/b64.lua create mode 100644 samples/check-links.lua create mode 100644 samples/check-memory.lua create mode 100644 samples/cookie.lua create mode 100644 samples/dict.lua create mode 100644 samples/dispatch.lua create mode 100644 samples/eol.lua create mode 100644 samples/forward.lua create mode 100644 samples/get.lua create mode 100644 samples/links create mode 100644 samples/lp.lua create mode 100644 samples/qp.lua create mode 100644 samples/tftp.lua diff --git a/.luacheckrc b/.luacheckrc index 8b25dd7..a3b4f63 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -15,8 +15,6 @@ include_files = { } exclude_files = { - "etc/*.lua", - "etc/**/*.lua", "test/*.lua", "test/**/*.lua", "samples/*.lua", diff --git a/etc/README b/etc/README deleted file mode 100644 index cfd3e37..0000000 --- a/etc/README +++ /dev/null @@ -1,89 +0,0 @@ -This directory contains code that is more useful than the -samples. This code *is* supported. - - tftp.lua -- Trivial FTP client - -This module implements file retrieval by the TFTP protocol. -Its main use was to test the UDP code, but since someone -found it usefull, I turned it into a module that is almost -official (no uploads, yet). - - dict.lua -- Dict client - -The dict.lua module started with a cool simple client -for the DICT protocol, written by Luiz Henrique Figueiredo. -This new version has been converted into a library, similar -to the HTTP and FTP libraries, that can be used from within -any luasocket application. Take a look on the source code -and you will be able to figure out how to use it. - - lp.lua -- LPD client library - -The lp.lua module implements the client part of the Line -Printer Daemon protocol, used to print files on Unix -machines. It is courtesy of David Burgess! See the source -code and the lpr.lua in the examples directory. - - b64.lua - qp.lua - eol.lua - -These are tiny programs that perform Base64, -Quoted-Printable and end-of-line marker conversions. - - get.lua -- file retriever - -This little program is a client that uses the FTP and -HTTP code to implement a command line file graber. Just -run - - lua get.lua <remote-file> [<local-file>] - -to download a remote file (either ftp:// or http://) to -the specified local file. The program also prints the -download throughput, elapsed time, bytes already downloaded -etc during download. - - check-memory.lua -- checks memory consumption - -This is just to see how much memory each module uses. - - dispatch.lua -- coroutine based dispatcher - -This is a first try at a coroutine based non-blocking -dispatcher for LuaSocket. Take a look at 'check-links.lua' -and at 'forward.lua' to see how to use it. - - check-links.lua -- HTML link checker program - -This little program scans a HTML file and checks for broken -links. It is similar to check-links.pl by Jamie Zawinski, -but uses all facilities of the LuaSocket library and the Lua -language. It has not been thoroughly tested, but it should -work. Just run - - lua check-links.lua [-n] {<url>} > output - -and open the result to see a list of broken links. Make sure -you check the '-n' switch. It runs in non-blocking mode, -using coroutines, and is MUCH faster! - - forward.lua -- coroutine based forward server - -This is a forward server that can accept several connections -and transfers simultaneously using non-blocking I/O and the -coroutine-based dispatcher. You can run, for example - - lua forward.lua 8080:proxy.com:3128 - -to redirect all local conections to port 8080 to the host -'proxy.com' at port 3128. - - unix.c and unix.h - -This is an implementation of Unix local domain sockets and -demonstrates how to extend LuaSocket with a new type of -transport. It has been tested on Linux and on Mac OS X. - -Good luck, -Diego. diff --git a/etc/b64.lua b/etc/b64.lua deleted file mode 100644 index 11eeb2d..0000000 --- a/etc/b64.lua +++ /dev/null @@ -1,19 +0,0 @@ ------------------------------------------------------------------------------ --- Little program to convert to and from Base64 --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local ltn12 = require("ltn12") -local mime = require("mime") -local source = ltn12.source.file(io.stdin) -local sink = ltn12.sink.file(io.stdout) -local convert -if arg and arg[1] == '-d' then - convert = mime.decode("base64") -else - local base64 = mime.encode("base64") - local wrap = mime.wrap() - convert = ltn12.filter.chain(base64, wrap) -end -sink = ltn12.sink.chain(convert, sink) -ltn12.pump.all(source, sink) diff --git a/etc/check-links.lua b/etc/check-links.lua deleted file mode 100644 index 283f3ac..0000000 --- a/etc/check-links.lua +++ /dev/null @@ -1,111 +0,0 @@ ------------------------------------------------------------------------------ --- Little program that checks links in HTML files, using coroutines and --- non-blocking I/O via the dispatcher module. --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local url = require("socket.url") -local dispatch = require("dispatch") -local http = require("socket.http") -dispatch.TIMEOUT = 10 - --- make sure the user knows how to invoke us -arg = arg or {} -if #arg < 1 then - print("Usage:\n luasocket check-links.lua [-n] {<url>}") - exit() -end - --- '-n' means we are running in non-blocking mode -if arg[1] == "-n" then - -- if non-blocking I/O was requested, use real dispatcher interface - table.remove(arg, 1) - handler = dispatch.newhandler("coroutine") -else - -- if using blocking I/O, use fake dispatcher interface - handler = dispatch.newhandler("sequential") -end - -local nthreads = 0 - --- get the status of a URL using the dispatcher -function getstatus(link) - local parsed = url.parse(link, {scheme = "file"}) - if parsed.scheme == "http" then - nthreads = nthreads + 1 - handler:start(function() - local r, c, h, s = http.request{ - method = "HEAD", - url = link, - create = handler.tcp - } - if r and c == 200 then io.write('\t', link, '\n') - else io.write('\t', link, ': ', tostring(c), '\n') end - nthreads = nthreads - 1 - end) - end -end - -function readfile(path) - path = url.unescape(path) - local file, error = io.open(path, "r") - if file then - local body = file:read("*a") - file:close() - return body - else return nil, error end -end - -function load(u) - local parsed = url.parse(u, { scheme = "file" }) - local body, headers, code, error - local base = u - if parsed.scheme == "http" then - body, code, headers = http.request(u) - if code == 200 then - -- if there was a redirect, update base to reflect it - base = headers.location or base - end - if not body then - error = code - end - elseif parsed.scheme == "file" then - body, error = readfile(parsed.path) - else error = string.format("unhandled scheme '%s'", parsed.scheme) end - return base, body, error -end - -function getlinks(body, base) - -- get rid of comments - body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") - local links = {} - -- extract links - body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) - table.insert(links, url.absolute(base, href)) - end) - body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) - table.insert(links, url.absolute(base, href)) - end) - string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) - table.insert(links, url.absolute(base, href)) - end) - return links -end - -function checklinks(address) - local base, body, error = load(address) - if not body then print(error) return end - print("Checking ", base) - local links = getlinks(body, base) - for _, link in ipairs(links) do - getstatus(link) - end -end - -for _, address in ipairs(arg) do - checklinks(url.absolute("file:", address)) -end - -while nthreads > 0 do - handler:step() -end diff --git a/etc/check-memory.lua b/etc/check-memory.lua deleted file mode 100644 index 7bd984d..0000000 --- a/etc/check-memory.lua +++ /dev/null @@ -1,17 +0,0 @@ -function load(s) - collectgarbage() - local a = gcinfo() - _G[s] = require(s) - collectgarbage() - local b = gcinfo() - print(s .. ":\t " .. (b-a) .. "k") -end - -load("socket.url") -load("ltn12") -load("socket") -load("mime") -load("socket.tp") -load("socket.smtp") -load("socket.http") -load("socket.ftp") diff --git a/etc/cookie.lua b/etc/cookie.lua deleted file mode 100644 index fec10a1..0000000 --- a/etc/cookie.lua +++ /dev/null @@ -1,88 +0,0 @@ -local socket = require"socket" -local http = require"socket.http" -local url = require"socket.url" -local ltn12 = require"ltn12" - -local token_class = '[^%c%s%(%)%<%>%@%,%;%:%\\%"%/%[%]%?%=%{%}]' - -local function unquote(t, quoted) - local n = string.match(t, "%$(%d+)$") - if n then n = tonumber(n) end - if quoted[n] then return quoted[n] - else return t end -end - -local function parse_set_cookie(c, quoted, cookie_table) - c = c .. ";$last=last;" - local _, _, n, v, i = string.find(c, "(" .. token_class .. - "+)%s*=%s*(.-)%s*;%s*()") - local cookie = { - name = n, - value = unquote(v, quoted), - attributes = {} - } - while 1 do - _, _, n, v, i = string.find(c, "(" .. token_class .. - "+)%s*=?%s*(.-)%s*;%s*()", i) - if not n or n == "$last" then break end - cookie.attributes[#cookie.attributes+1] = { - name = n, - value = unquote(v, quoted) - } - end - cookie_table[#cookie_table+1] = cookie -end - -local function split_set_cookie(s, cookie_table) - cookie_table = cookie_table or {} - -- remove quoted strings from cookie list - local quoted = {} - s = string.gsub(s, '"(.-)"', function(q) - quoted[#quoted+1] = q - return "$" .. #quoted - end) - -- add sentinel - s = s .. ",$last=" - -- split into individual cookies - i = 1 - while 1 do - local _, _, cookie, next_token - _, _, cookie, i, next_token = string.find(s, "(.-)%s*%,%s*()(" .. - token_class .. "+)%s*=", i) - if not next_token then break end - parse_set_cookie(cookie, quoted, cookie_table) - if next_token == "$last" then break end - end - return cookie_table -end - -local function quote(s) - if string.find(s, "[ %,%;]") then return '"' .. s .. '"' - else return s end -end - -local _empty = {} -local function build_cookies(cookies) - s = "" - for i,v in ipairs(cookies or _empty) do - if v.name then - s = s .. v.name - if v.value and v.value ~= "" then - s = s .. '=' .. quote(v.value) - end - end - if v.name and #(v.attributes or _empty) > 0 then s = s .. "; " end - for j,u in ipairs(v.attributes or _empty) do - if u.name then - s = s .. u.name - if u.value and u.value ~= "" then - s = s .. '=' .. quote(u.value) - end - end - if j < #v.attributes then s = s .. "; " end - end - if i < #cookies then s = s .. ", " end - end - return s -end - diff --git a/etc/dict.lua b/etc/dict.lua deleted file mode 100644 index 8c5b711..0000000 --- a/etc/dict.lua +++ /dev/null @@ -1,151 +0,0 @@ ------------------------------------------------------------------------------ --- Little program to download DICT word definitions --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ - ------------------------------------------------------------------------------ --- Load required modules ------------------------------------------------------------------------------ -local base = _G -local string = require("string") -local table = require("table") -local socket = require("socket") -local url = require("socket.url") -local tp = require("socket.tp") -module("socket.dict") - ------------------------------------------------------------------------------ --- Globals ------------------------------------------------------------------------------ -HOST = "dict.org" -PORT = 2628 -TIMEOUT = 10 - ------------------------------------------------------------------------------ --- Low-level dict API ------------------------------------------------------------------------------ -local metat = { __index = {} } - -function open(host, port) - local tp = socket.try(tp.connect(host or HOST, port or PORT, TIMEOUT)) - return base.setmetatable({tp = tp}, metat) -end - -function metat.__index:greet() - return socket.try(self.tp:check(220)) -end - -function metat.__index:check(ok) - local code, status = socket.try(self.tp:check(ok)) - return code, - base.tonumber(socket.skip(2, string.find(status, "^%d%d%d (%d*)"))) -end - -function metat.__index:getdef() - local line = socket.try(self.tp:receive()) - local def = {} - while line ~= "." do - table.insert(def, line) - line = socket.try(self.tp:receive()) - end - return table.concat(def, "\n") -end - -function metat.__index:define(database, word) - database = database or "!" - socket.try(self.tp:command("DEFINE", database .. " " .. word)) - local code, count = self:check(150) - local defs = {} - for i = 1, count do - self:check(151) - table.insert(defs, self:getdef()) - end - self:check(250) - return defs -end - -function metat.__index:match(database, strat, word) - database = database or "!" - strat = strat or "." - socket.try(self.tp:command("MATCH", database .." ".. strat .." ".. word)) - self:check(152) - local mat = {} - local line = socket.try(self.tp:receive()) - while line ~= '.' do - database, word = socket.skip(2, string.find(line, "(%S+) (.*)")) - if not mat[database] then mat[database] = {} end - table.insert(mat[database], word) - line = socket.try(self.tp:receive()) - end - self:check(250) - return mat -end - -function metat.__index:quit() - self.tp:command("QUIT") - return self:check(221) -end - -function metat.__index:close() - return self.tp:close() -end - ------------------------------------------------------------------------------ --- High-level dict API ------------------------------------------------------------------------------ -local default = { - scheme = "dict", - host = "dict.org" -} - -local function there(f) - if f == "" then return nil - else return f end -end - -local function parse(u) - local t = socket.try(url.parse(u, default)) - socket.try(t.scheme == "dict", "invalid scheme '" .. t.scheme .. "'") - socket.try(t.path, "invalid path in url") - local cmd, arg = socket.skip(2, string.find(t.path, "^/(.)(.*)$")) - socket.try(cmd == "d" or cmd == "m", "<command> should be 'm' or 'd'") - socket.try(arg and arg ~= "", "need at least <word> in URL") - t.command, t.argument = cmd, arg - arg = string.gsub(arg, "^:([^:]+)", function(f) t.word = f end) - socket.try(t.word, "need at least <word> in URL") - arg = string.gsub(arg, "^:([^:]*)", function(f) t.database = there(f) end) - if cmd == "m" then - arg = string.gsub(arg, "^:([^:]*)", function(f) t.strat = there(f) end) - end - string.gsub(arg, ":([^:]*)$", function(f) t.n = base.tonumber(f) end) - return t -end - -local function tget(gett) - local con = open(gett.host, gett.port) - con:greet() - if gett.command == "d" then - local def = con:define(gett.database, gett.word) - con:quit() - con:close() - if gett.n then return def[gett.n] - else return def end - elseif gett.command == "m" then - local mat = con:match(gett.database, gett.strat, gett.word) - con:quit() - con:close() - return mat - else return nil, "invalid command" end -end - -local function sget(u) - local gett = parse(u) - return tget(gett) -end - -get = socket.protect(function(gett) - if base.type(gett) == "string" then return sget(gett) - else return tget(gett) end -end) - diff --git a/etc/dispatch.lua b/etc/dispatch.lua deleted file mode 100644 index 2485415..0000000 --- a/etc/dispatch.lua +++ /dev/null @@ -1,307 +0,0 @@ ------------------------------------------------------------------------------ --- A hacked dispatcher module --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local base = _G -local table = require("table") -local string = require("string") -local socket = require("socket") -local coroutine = require("coroutine") -module("dispatch") - --- if too much time goes by without any activity in one of our sockets, we --- just kill it -TIMEOUT = 60 - ------------------------------------------------------------------------------ --- We implement 3 types of dispatchers: --- sequential --- coroutine --- threaded --- The user can choose whatever one is needed ------------------------------------------------------------------------------ -local handlert = {} - --- default handler is coroutine -function newhandler(mode) - mode = mode or "coroutine" - return handlert[mode]() -end - -local function seqstart(self, func) - return func() -end - --- sequential handler simply calls the functions and doesn't wrap I/O -function handlert.sequential() - return { - tcp = socket.tcp, - start = seqstart - } -end - ------------------------------------------------------------------------------ --- Mega hack. Don't try to do this at home. ------------------------------------------------------------------------------ --- we can't yield across calls to protect on Lua 5.1, so we rewrite it with --- coroutines --- make sure you don't require any module that uses socket.protect before --- loading our hack -if string.sub(base._VERSION, -3) == "5.1" then - local function _protect(co, status, ...) - if not status then - local msg = ... - if base.type(msg) == 'table' then - return nil, msg[1] - else - base.error(msg, 0) - end - end - if coroutine.status(co) == "suspended" then - return _protect(co, coroutine.resume(co, coroutine.yield(...))) - else - return ... - end - end - - function socket.protect(f) - return function(...) - local co = coroutine.create(f) - return _protect(co, coroutine.resume(co, ...)) - end - end -end - ------------------------------------------------------------------------------ --- Simple set data structure. O(1) everything. ------------------------------------------------------------------------------ -local function newset() - local reverse = {} - local set = {} - return base.setmetatable(set, {__index = { - insert = function(set, value) - if not reverse[value] then - table.insert(set, value) - reverse[value] = #set - end - end, - remove = function(set, value) - local index = reverse[value] - if index then - reverse[value] = nil - local top = table.remove(set) - if top ~= value then - reverse[top] = index - set[index] = top - end - end - end - }}) -end - ------------------------------------------------------------------------------ --- socket.tcp() wrapper for the coroutine dispatcher ------------------------------------------------------------------------------ -local function cowrap(dispatcher, tcp, error) - if not tcp then return nil, error end - -- put it in non-blocking mode right away - tcp:settimeout(0) - -- metatable for wrap produces new methods on demand for those that we - -- don't override explicitly. - local metat = { __index = function(table, key) - table[key] = function(...) - return tcp[key](tcp,select(2,...)) - end - return table[key] - end} - -- does our user want to do his own non-blocking I/O? - local zero = false - -- create a wrap object that will behave just like a real socket object - local wrap = { } - -- we ignore settimeout to preserve our 0 timeout, but record whether - -- the user wants to do his own non-blocking I/O - function wrap:settimeout(value, mode) - if value == 0 then zero = true - else zero = false end - return 1 - end - -- send in non-blocking mode and yield on timeout - function wrap:send(data, first, last) - first = (first or 1) - 1 - local result, error - while true do - -- return control to dispatcher and tell it we want to send - -- if upon return the dispatcher tells us we timed out, - -- return an error to whoever called us - if coroutine.yield(dispatcher.sending, tcp) == "timeout" then - return nil, "timeout" - end - -- try sending - result, error, first = tcp:send(data, first+1, last) - -- if we are done, or there was an unexpected error, - -- break away from loop - if error ~= "timeout" then return result, error, first end - end - end - -- receive in non-blocking mode and yield on timeout - -- or simply return partial read, if user requested timeout = 0 - function wrap:receive(pattern, partial) - local error = "timeout" - local value - while true do - -- return control to dispatcher and tell it we want to receive - -- if upon return the dispatcher tells us we timed out, - -- return an error to whoever called us - if coroutine.yield(dispatcher.receiving, tcp) == "timeout" then - return nil, "timeout" - end - -- try receiving - value, error, partial = tcp:receive(pattern, partial) - -- if we are done, or there was an unexpected error, - -- break away from loop. also, if the user requested - -- zero timeout, return all we got - if (error ~= "timeout") or zero then - return value, error, partial - end - end - end - -- connect in non-blocking mode and yield on timeout - function wrap:connect(host, port) - local result, error = tcp:connect(host, port) - if error == "timeout" then - -- return control to dispatcher. we will be writable when - -- connection succeeds. - -- if upon return the dispatcher tells us we have a - -- timeout, just abort - if coroutine.yield(dispatcher.sending, tcp) == "timeout" then - return nil, "timeout" - end - -- when we come back, check if connection was successful - result, error = tcp:connect(host, port) - if result or error == "already connected" then return 1 - else return nil, "non-blocking connect failed" end - else return result, error end - end - -- accept in non-blocking mode and yield on timeout - function wrap:accept() - while 1 do - -- return control to dispatcher. we will be readable when a - -- connection arrives. - -- if upon return the dispatcher tells us we have a - -- timeout, just abort - if coroutine.yield(dispatcher.receiving, tcp) == "timeout" then - return nil, "timeout" - end - local client, error = tcp:accept() - if error ~= "timeout" then - return cowrap(dispatcher, client, error) - end - end - end - -- remove cortn from context - function wrap:close() - dispatcher.stamp[tcp] = nil - dispatcher.sending.set:remove(tcp) - dispatcher.sending.cortn[tcp] = nil - dispatcher.receiving.set:remove(tcp) - dispatcher.receiving.cortn[tcp] = nil - return tcp:close() - end - return base.setmetatable(wrap, metat) -end - - ------------------------------------------------------------------------------ --- Our coroutine dispatcher ------------------------------------------------------------------------------ -local cometat = { __index = {} } - -function schedule(cortn, status, operation, tcp) - if status then - if cortn and operation then - operation.set:insert(tcp) - operation.cortn[tcp] = cortn - operation.stamp[tcp] = socket.gettime() - end - else base.error(operation) end -end - -function kick(operation, tcp) - operation.cortn[tcp] = nil - operation.set:remove(tcp) -end - -function wakeup(operation, tcp) - local cortn = operation.cortn[tcp] - -- if cortn is still valid, wake it up - if cortn then - kick(operation, tcp) - return cortn, coroutine.resume(cortn) - -- othrewise, just get scheduler not to do anything - else - return nil, true - end -end - -function abort(operation, tcp) - local cortn = operation.cortn[tcp] - if cortn then - kick(operation, tcp) - coroutine.resume(cortn, "timeout") - end -end - --- step through all active cortns -function cometat.__index:step() - -- check which sockets are interesting and act on them - local readable, writable = socket.select(self.receiving.set, - self.sending.set, 1) - -- for all readable connections, resume their cortns and reschedule - -- when they yield back to us - for _, tcp in base.ipairs(readable) do - schedule(wakeup(self.receiving, tcp)) - end - -- for all writable connections, do the same - for _, tcp in base.ipairs(writable) do - schedule(wakeup(self.sending, tcp)) - end - -- politely ask replacement I/O functions in idle cortns to - -- return reporting a timeout - local now = socket.gettime() - for tcp, stamp in base.pairs(self.stamp) do - if tcp.class == "tcp{client}" and now - stamp > TIMEOUT then - abort(self.sending, tcp) - abort(self.receiving, tcp) - end - end -end - -function cometat.__index:start(func) - local cortn = coroutine.create(func) - schedule(cortn, coroutine.resume(cortn)) -end - -function handlert.coroutine() - local stamp = {} - local dispatcher = { - stamp = stamp, - sending = { - name = "sending", - set = newset(), - cortn = {}, - stamp = stamp - }, - receiving = { - name = "receiving", - set = newset(), - cortn = {}, - stamp = stamp - }, - } - function dispatcher.tcp() - return cowrap(dispatcher, socket.tcp()) - end - return base.setmetatable(dispatcher, cometat) -end - diff --git a/etc/eol.lua b/etc/eol.lua deleted file mode 100644 index eeaf0ce..0000000 --- a/etc/eol.lua +++ /dev/null @@ -1,13 +0,0 @@ ------------------------------------------------------------------------------ --- Little program to adjust end of line markers. --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local mime = require("mime") -local ltn12 = require("ltn12") -local marker = '\n' -if arg and arg[1] == '-d' then marker = '\r\n' end -local filter = mime.normalize(marker) -local source = ltn12.source.chain(ltn12.source.file(io.stdin), filter) -local sink = ltn12.sink.file(io.stdout) -ltn12.pump.all(source, sink) diff --git a/etc/forward.lua b/etc/forward.lua deleted file mode 100644 index 05ced1a..0000000 --- a/etc/forward.lua +++ /dev/null @@ -1,65 +0,0 @@ --- load our favourite library -local dispatch = require("dispatch") -local handler = dispatch.newhandler() - --- make sure the user knows how to invoke us -if #arg < 1 then - print("Usage") - print(" lua forward.lua <iport:ohost:oport> ...") - os.exit(1) -end - --- function to move data from one socket to the other -local function move(foo, bar) - local live - while 1 do - local data, error, partial = foo:receive(2048) - live = data or error == "timeout" - data = data or partial - local result, error = bar:send(data) - if not live or not result then - foo:close() - bar:close() - break - end - end -end - --- for each tunnel, start a new server -for i, v in ipairs(arg) do - -- capture forwarding parameters - local _, _, iport, ohost, oport = string.find(v, "([^:]+):([^:]+):([^:]+)") - assert(iport, "invalid arguments") - -- create our server socket - local server = assert(handler.tcp()) - assert(server:setoption("reuseaddr", true)) - assert(server:bind("*", iport)) - assert(server:listen(32)) - -- handler for the server object loops accepting new connections - handler:start(function() - while 1 do - local client = assert(server:accept()) - assert(client:settimeout(0)) - -- for each new connection, start a new client handler - handler:start(function() - -- handler tries to connect to peer - local peer = assert(handler.tcp()) - assert(peer:settimeout(0)) - assert(peer:connect(ohost, oport)) - -- if sucessful, starts a new handler to send data from - -- client to peer - handler:start(function() - move(client, peer) - end) - -- afte starting new handler, enter in loop sending data from - -- peer to client - move(peer, client) - end) - end - end) -end - --- simply loop stepping the server -while 1 do - handler:step() -end diff --git a/etc/get.lua b/etc/get.lua deleted file mode 100644 index d53c465..0000000 --- a/etc/get.lua +++ /dev/null @@ -1,141 +0,0 @@ ------------------------------------------------------------------------------ --- Little program to download files from URLs --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local socket = require("socket") -local http = require("socket.http") -local ftp = require("socket.ftp") -local url = require("socket.url") -local ltn12 = require("ltn12") - --- formats a number of seconds into human readable form -function nicetime(s) - local l = "s" - if s > 60 then - s = s / 60 - l = "m" - if s > 60 then - s = s / 60 - l = "h" - if s > 24 then - s = s / 24 - l = "d" -- hmmm - end - end - end - if l == "s" then return string.format("%5.0f%s", s, l) - else return string.format("%5.2f%s", s, l) end -end - --- formats a number of bytes into human readable form -function nicesize(b) - local l = "B" - if b > 1024 then - b = b / 1024 - l = "KB" - if b > 1024 then - b = b / 1024 - l = "MB" - if b > 1024 then - b = b / 1024 - l = "GB" -- hmmm - end - end - end - return string.format("%7.2f%2s", b, l) -end - --- returns a string with the current state of the download -local remaining_s = "%s received, %s/s throughput, %2.0f%% done, %s remaining" -local elapsed_s = "%s received, %s/s throughput, %s elapsed " -function gauge(got, delta, size) - local rate = got / delta - if size and size >= 1 then - return string.format(remaining_s, nicesize(got), nicesize(rate), - 100*got/size, nicetime((size-got)/rate)) - else - return string.format(elapsed_s, nicesize(got), - nicesize(rate), nicetime(delta)) - end -end - --- creates a new instance of a receive_cb that saves to disk --- kind of copied from luasocket's manual callback examples -function stats(size) - local start = socket.gettime() - local last = start - local got = 0 - return function(chunk) - -- elapsed time since start - local current = socket.gettime() - if chunk then - -- total bytes received - got = got + string.len(chunk) - -- not enough time for estimate - if current - last > 1 then - io.stderr:write("\r", gauge(got, current - start, size)) - io.stderr:flush() - last = current - end - else - -- close up - io.stderr:write("\r", gauge(got, current - start), "\n") - end - return chunk - end -end - --- determines the size of a http file -function gethttpsize(u) - local r, c, h = http.request {method = "HEAD", url = u} - if c == 200 then - return tonumber(h["content-length"]) - end -end - --- downloads a file using the http protocol -function getbyhttp(u, file) - local save = ltn12.sink.file(file or io.stdout) - -- only print feedback if output is not stdout - if file then save = ltn12.sink.chain(stats(gethttpsize(u)), save) end - local r, c, h, s = http.request {url = u, sink = save } - if c ~= 200 then io.stderr:write(s or c, "\n") end -end - --- downloads a file using the ftp protocol -function getbyftp(u, file) - local save = ltn12.sink.file(file or io.stdout) - -- only print feedback if output is not stdout - -- and we don't know how big the file is - if file then save = ltn12.sink.chain(stats(), save) end - local gett = url.parse(u) - gett.sink = save - gett.type = "i" - local ret, err = ftp.get(gett) - if err then print(err) end -end - --- determines the scheme -function getscheme(u) - -- this is an heuristic to solve a common invalid url poblem - if not string.find(u, "//") then u = "//" .. u end - local parsed = url.parse(u, {scheme = "http"}) - return parsed.scheme -end - --- gets a file either by http or ftp, saving as <name> -function get(u, name) - local fout = name and io.open(name, "wb") - local scheme = getscheme(u) - if scheme == "ftp" then getbyftp(u, fout) - elseif scheme == "http" then getbyhttp(u, fout) - else print("unknown scheme" .. scheme) end -end - --- main program -arg = arg or {} -if #arg < 1 then - io.write("Usage:\n lua get.lua <remote-url> [<local-file>]\n") - os.exit(1) -else get(arg[1], arg[2]) end diff --git a/etc/links b/etc/links deleted file mode 100644 index 087f1c0..0000000 --- a/etc/links +++ /dev/null @@ -1,17 +0,0 @@ -<a href="http://www.cs.princeton.edu"> bla </a> -<a href="http://www.princeton.edu"> bla </a> -<a href="http://www.tecgraf.puc-rio.br"> bla </a> -<a href="http://www.inf.puc-rio.br"> bla </a> -<a href="http://www.puc-rio.br"> bla </a> -<a href="http://www.impa.br"> bla </a> -<a href="http://www.lua.org"> bla </a> -<a href="http://www.lua-users.org"> bla </a> -<a href="http://www.amazon.com"> bla </a> -<a href="http://www.google.com"> bla </a> -<a href="http://www.nytimes.com"> bla </a> -<a href="http://www.bbc.co.uk"> bla </a> -<a href="http://oglobo.globo.com"> bla </a> -<a href="http://slate.msn.com"> bla </a> -<a href="http://www.apple.com"> bla </a> -<a href="http://www.microsoft.com"> bla </a> -<a href="http://www.nasa.gov"> bla </a> diff --git a/etc/lp.lua b/etc/lp.lua deleted file mode 100644 index 25f0b95..0000000 --- a/etc/lp.lua +++ /dev/null @@ -1,323 +0,0 @@ ------------------------------------------------------------------------------ --- LPD support for the Lua language --- LuaSocket toolkit. --- Author: David Burgess --- Modified by Diego Nehab, but David is in charge ------------------------------------------------------------------------------ ---[[ - if you have any questions: RFC 1179 -]] --- make sure LuaSocket is loaded -local io = require("io") -local base = _G -local os = require("os") -local math = require("math") -local string = require("string") -local socket = require("socket") -local ltn12 = require("ltn12") -module("socket.lp") - --- default port -PORT = 515 -SERVER = os.getenv("SERVER_NAME") or os.getenv("COMPUTERNAME") or "localhost" -PRINTER = os.getenv("PRINTER") or "printer" - -local function connect(localhost, option) - local host = option.host or SERVER - local port = option.port or PORT - local skt - local try = socket.newtry(function() if skt then skt:close() end end) - if option.localbind then - -- bind to a local port (if we can) - local localport = 721 - local done, err - repeat - skt = socket.try(socket.tcp()) - try(skt:settimeout(30)) - done, err = skt:bind(localhost, localport) - if not done then - localport = localport + 1 - skt:close() - skt = nil - else break end - until localport > 731 - socket.try(skt, err) - else skt = socket.try(socket.tcp()) end - try(skt:connect(host, port)) - return { skt = skt, try = try } -end - ---[[ -RFC 1179 -5.3 03 - Send queue state (short) - - +----+-------+----+------+----+ - | 03 | Queue | SP | List | LF | - +----+-------+----+------+----+ - Command code - 3 - Operand 1 - Printer queue name - Other operands - User names or job numbers - - If the user names or job numbers or both are supplied then only those - jobs for those users or with those numbers will be sent. - - The response is an ASCII stream which describes the printer queue. - The stream continues until the connection closes. Ends of lines are - indicated with ASCII LF control characters. The lines may also - contain ASCII HT control characters. - -5.4 04 - Send queue state (long) - - +----+-------+----+------+----+ - | 04 | Queue | SP | List | LF | - +----+-------+----+------+----+ - Command code - 4 - Operand 1 - Printer queue name - Other operands - User names or job numbers - - If the user names or job numbers or both are supplied then only those - jobs for those users or with those numbers will be sent. - - The response is an ASCII stream which describes the printer queue. - The stream continues until the connection closes. Ends of lines are - indicated with ASCII LF control characters. The lines may also - contain ASCII HT control characters. -]] - --- gets server acknowledement -local function recv_ack(con) - local ack = con.skt:receive(1) - con.try(string.char(0) == ack, "failed to receive server acknowledgement") -end - --- sends client acknowledement -local function send_ack(con) - local sent = con.skt:send(string.char(0)) - con.try(sent == 1, "failed to send acknowledgement") -end - --- sends queue request --- 5.2 02 - Receive a printer job --- --- +----+-------+----+ --- | 02 | Queue | LF | --- +----+-------+----+ --- Command code - 2 --- Operand - Printer queue name --- --- Receiving a job is controlled by a second level of commands. The --- daemon is given commands by sending them over the same connection. --- The commands are described in the next section (6). --- --- After this command is sent, the client must read an acknowledgement --- octet from the daemon. A positive acknowledgement is an octet of --- zero bits. A negative acknowledgement is an octet of any other --- pattern. -local function send_queue(con, queue) - queue = queue or PRINTER - local str = string.format("\2%s\10", queue) - local sent = con.skt:send(str) - con.try(sent == string.len(str), "failed to send print request") - recv_ack(con) -end - --- sends control file --- 6.2 02 - Receive control file --- --- +----+-------+----+------+----+ --- | 02 | Count | SP | Name | LF | --- +----+-------+----+------+----+ --- Command code - 2 --- Operand 1 - Number of bytes in control file --- Operand 2 - Name of control file --- --- The control file must be an ASCII stream with the ends of lines --- indicated by ASCII LF. The total number of bytes in the stream is --- sent as the first operand. The name of the control file is sent as --- the second. It should start with ASCII "cfA", followed by a three --- digit job number, followed by the host name which has constructed the --- control file. Acknowledgement processing must occur as usual after --- the command is sent. --- --- The next "Operand 1" octets over the same TCP connection are the --- intended contents of the control file. Once all of the contents have --- been delivered, an octet of zero bits is sent as an indication that --- the file being sent is complete. A second level of acknowledgement --- processing must occur at this point. - --- sends data file --- 6.3 03 - Receive data file --- --- +----+-------+----+------+----+ --- | 03 | Count | SP | Name | LF | --- +----+-------+----+------+----+ --- Command code - 3 --- Operand 1 - Number of bytes in data file --- Operand 2 - Name of data file --- --- The data file may contain any 8 bit values at all. The total number --- of bytes in the stream may be sent as the first operand, otherwise --- the field should be cleared to 0. The name of the data file should --- start with ASCII "dfA". This should be followed by a three digit job --- number. The job number should be followed by the host name which has --- constructed the data file. Interpretation of the contents of the --- data file is determined by the contents of the corresponding control --- file. If a data file length has been specified, the next "Operand 1" --- octets over the same TCP connection are the intended contents of the --- data file. In this case, once all of the contents have been --- delivered, an octet of zero bits is sent as an indication that the --- file being sent is complete. A second level of acknowledgement --- processing must occur at this point. - - -local function send_hdr(con, control) - local sent = con.skt:send(control) - con.try(sent and sent >= 1 , "failed to send header file") - recv_ack(con) -end - -local function send_control(con, control) - local sent = con.skt:send(control) - con.try(sent and sent >= 1, "failed to send control file") - send_ack(con) -end - -local function send_data(con,fh,size) - local buf - while size > 0 do - buf,message = fh:read(8192) - if buf then - st = con.try(con.skt:send(buf)) - size = size - st - else - con.try(size == 0, "file size mismatch") - end - end - recv_ack(con) -- note the double acknowledgement - send_ack(con) - recv_ack(con) - return size -end - - ---[[ -local control_dflt = { - "H"..string.sub(socket.hostname,1,31).."\10", -- host - "C"..string.sub(socket.hostname,1,31).."\10", -- class - "J"..string.sub(filename,1,99).."\10", -- jobname - "L"..string.sub(user,1,31).."\10", -- print banner page - "I"..tonumber(indent).."\10", -- indent column count ('f' only) - "M"..string.sub(mail,1,128).."\10", -- mail when printed user@host - "N"..string.sub(filename,1,131).."\10", -- name of source file - "P"..string.sub(user,1,31).."\10", -- user name - "T"..string.sub(title,1,79).."\10", -- title for banner ('p' only) - "W"..tonumber(width or 132).."\10", -- width of print f,l,p only - - "f"..file.."\10", -- formatted print (remove control chars) - "l"..file.."\10", -- print - "o"..file.."\10", -- postscript - "p"..file.."\10", -- pr format - requires T, L - "r"..file.."\10", -- fortran format - "U"..file.."\10", -- Unlink (data file only) -} -]] - --- generate a varying job number -local seq = 0 -local function newjob(connection) - seq = seq + 1 - return math.floor(socket.gettime() * 1000 + seq)%1000 -end - - -local format_codes = { - binary = 'l', - text = 'f', - ps = 'o', - pr = 'p', - fortran = 'r', - l = 'l', - r = 'r', - o = 'o', - p = 'p', - f = 'f' -} - --- lp.send{option} --- requires option.file - -send = socket.protect(function(option) - socket.try(option and base.type(option) == "table", "invalid options") - local file = option.file - socket.try(file, "invalid file name") - local fh = socket.try(io.open(file,"rb")) - local datafile_size = fh:seek("end") -- get total size - fh:seek("set") -- go back to start of file - local localhost = socket.dns.gethostname() or os.getenv("COMPUTERNAME") - or "localhost" - local con = connect(localhost, option) --- format the control file - local jobno = newjob() - local localip = socket.dns.toip(localhost) - localhost = string.sub(localhost,1,31) - local user = string.sub(option.user or os.getenv("LPRUSER") or - os.getenv("USERNAME") or os.getenv("USER") or "anonymous", 1,31) - local lpfile = string.format("dfA%3.3d%-s", jobno, localhost); - local fmt = format_codes[option.format] or 'l' - local class = string.sub(option.class or localip or localhost,1,31) - local _,_,ctlfn = string.find(file,".*[%/%\\](.*)") - ctlfn = string.sub(ctlfn or file,1,131) - local cfile = - string.format("H%-s\nC%-s\nJ%-s\nP%-s\n%.1s%-s\nU%-s\nN%-s\n", - localhost, - class, - option.job or "LuaSocket", - user, - fmt, lpfile, - lpfile, - ctlfn); -- mandatory part of ctl file - if (option.banner) then cfile = cfile .. 'L'..user..'\10' end - if (option.indent) then cfile = cfile .. 'I'..base.tonumber(option.indent)..'\10' end - if (option.mail) then cfile = cfile .. 'M'..string.sub((option.mail),1,128)..'\10' end - if (fmt == 'p' and option.title) then cfile = cfile .. 'T'..string.sub((option.title),1,79)..'\10' end - if ((fmt == 'p' or fmt == 'l' or fmt == 'f') and option.width) then - cfile = cfile .. 'W'..base.tonumber(option,width)..'\10' - end - - con.skt:settimeout(option.timeout or 65) --- send the queue header - send_queue(con, option.queue) --- send the control file header - local cfilecmd = string.format("\2%d cfA%3.3d%-s\n",string.len(cfile), jobno, localhost); - send_hdr(con,cfilecmd) - --- send the control file - send_control(con,cfile) - --- send the data file header - local dfilecmd = string.format("\3%d dfA%3.3d%-s\n",datafile_size, jobno, localhost); - send_hdr(con,dfilecmd) - --- send the data file - send_data(con,fh,datafile_size) - fh:close() - con.skt:close(); - return jobno, datafile_size -end) - --- --- lp.query({host=,queue=printer|'*', format='l'|'s', list=}) --- -query = socket.protect(function(p) - p = p or {} - local localhost = socket.dns.gethostname() or os.getenv("COMPUTERNAME") - or "localhost" - local con = connect(localhost,p) - local fmt - if string.sub(p.format or 's',1,1) == 's' then fmt = 3 else fmt = 4 end - con.try(con.skt:send(string.format("%c%s %s\n", fmt, p.queue or "*", - p.list or ""))) - local data = con.try(con.skt:receive("*a")) - con.skt:close() - return data -end) diff --git a/etc/qp.lua b/etc/qp.lua deleted file mode 100644 index 523238b..0000000 --- a/etc/qp.lua +++ /dev/null @@ -1,23 +0,0 @@ ------------------------------------------------------------------------------ --- Little program to convert to and from Quoted-Printable --- LuaSocket sample files --- Author: Diego Nehab ------------------------------------------------------------------------------ -local ltn12 = require("ltn12") -local mime = require("mime") -local convert -arg = arg or {} -local mode = arg and arg[1] or "-et" -if mode == "-et" then - local normalize = mime.normalize() - local qp = mime.encode("quoted-printable") - local wrap = mime.wrap("quoted-printable") - convert = ltn12.filter.chain(normalize, qp, wrap) -elseif mode == "-eb" then - local qp = mime.encode("quoted-printable", "binary") - local wrap = mime.wrap("quoted-printable") - convert = ltn12.filter.chain(qp, wrap) -else convert = mime.decode("quoted-printable") end -local source = ltn12.source.chain(ltn12.source.file(io.stdin), convert) -local sink = ltn12.sink.file(io.stdout) -ltn12.pump.all(source, sink) diff --git a/etc/tftp.lua b/etc/tftp.lua deleted file mode 100644 index ed99cd1..0000000 --- a/etc/tftp.lua +++ /dev/null @@ -1,154 +0,0 @@ ------------------------------------------------------------------------------ --- TFTP support for the Lua language --- LuaSocket toolkit. --- Author: Diego Nehab ------------------------------------------------------------------------------ - ------------------------------------------------------------------------------ --- Load required files ------------------------------------------------------------------------------ -local base = _G -local table = require("table") -local math = require("math") -local string = require("string") -local socket = require("socket") -local ltn12 = require("ltn12") -local url = require("socket.url") -module("socket.tftp") - ------------------------------------------------------------------------------ --- Program constants ------------------------------------------------------------------------------ -local char = string.char -local byte = string.byte - -PORT = 69 -local OP_RRQ = 1 -local OP_WRQ = 2 -local OP_DATA = 3 -local OP_ACK = 4 -local OP_ERROR = 5 -local OP_INV = {"RRQ", "WRQ", "DATA", "ACK", "ERROR"} - ------------------------------------------------------------------------------ --- Packet creation functions ------------------------------------------------------------------------------ -local function RRQ(source, mode) - return char(0, OP_RRQ) .. source .. char(0) .. mode .. char(0) -end - -local function WRQ(source, mode) - return char(0, OP_RRQ) .. source .. char(0) .. mode .. char(0) -end - -local function ACK(block) - local low, high - low = math.mod(block, 256) - high = (block - low)/256 - return char(0, OP_ACK, high, low) -end - -local function get_OP(dgram) - local op = byte(dgram, 1)*256 + byte(dgram, 2) - return op -end - ------------------------------------------------------------------------------ --- Packet analysis functions ------------------------------------------------------------------------------ -local function split_DATA(dgram) - local block = byte(dgram, 3)*256 + byte(dgram, 4) - local data = string.sub(dgram, 5) - return block, data -end - -local function get_ERROR(dgram) - local code = byte(dgram, 3)*256 + byte(dgram, 4) - local msg - _,_, msg = string.find(dgram, "(.*)\000", 5) - return string.format("error code %d: %s", code, msg) -end - ------------------------------------------------------------------------------ --- The real work ------------------------------------------------------------------------------ -local function tget(gett) - local retries, dgram, sent, datahost, dataport, code - local last = 0 - socket.try(gett.host, "missing host") - local con = socket.try(socket.udp()) - local try = socket.newtry(function() con:close() end) - -- convert from name to ip if needed - gett.host = try(socket.dns.toip(gett.host)) - con:settimeout(1) - -- first packet gives data host/port to be used for data transfers - local path = string.gsub(gett.path or "", "^/", "") - path = url.unescape(path) - retries = 0 - repeat - sent = try(con:sendto(RRQ(path, "octet"), gett.host, gett.port)) - dgram, datahost, dataport = con:receivefrom() - retries = retries + 1 - until dgram or datahost ~= "timeout" or retries > 5 - try(dgram, datahost) - -- associate socket with data host/port - try(con:setpeername(datahost, dataport)) - -- default sink - local sink = gett.sink or ltn12.sink.null() - -- process all data packets - while 1 do - -- decode packet - code = get_OP(dgram) - try(code ~= OP_ERROR, get_ERROR(dgram)) - try(code == OP_DATA, "unhandled opcode " .. code) - -- get data packet parts - local block, data = split_DATA(dgram) - -- if not repeated, write - if block == last+1 then - try(sink(data)) - last = block - end - -- last packet brings less than 512 bytes of data - if string.len(data) < 512 then - try(con:send(ACK(block))) - try(con:close()) - try(sink(nil)) - return 1 - end - -- get the next packet - retries = 0 - repeat - sent = try(con:send(ACK(last))) - dgram, err = con:receive() - retries = retries + 1 - until dgram or err ~= "timeout" or retries > 5 - try(dgram, err) - end -end - -local default = { - port = PORT, - path ="/", - scheme = "tftp" -} - -local function parse(u) - local t = socket.try(url.parse(u, default)) - socket.try(t.scheme == "tftp", "invalid scheme '" .. t.scheme .. "'") - socket.try(t.host, "invalid host") - return t -end - -local function sget(u) - local gett = parse(u) - local t = {} - gett.sink = ltn12.sink.table(t) - tget(gett) - return table.concat(t) -end - -get = socket.protect(function(gett) - if base.type(gett) == "string" then return sget(gett) - else return tget(gett) end -end) - diff --git a/luasocket-scm-3.rockspec b/luasocket-scm-3.rockspec index 71f335c..f3d24e3 100644 --- a/luasocket-scm-3.rockspec +++ b/luasocket-scm-3.rockspec @@ -129,6 +129,5 @@ build = { copy_directories = { "docs" , "samples" - , "etc" , "test" } } diff --git a/makefile.dist b/makefile.dist index a27ba57..5ef44d3 100644 --- a/makefile.dist +++ b/makefile.dist @@ -22,20 +22,17 @@ SAMPLES = \ samples/lpr.lua \ samples/talker.lua \ samples/tinyirc.lua - -ETC = \ - etc/README \ - etc/b64.lua \ - etc/check-links.lua \ - etc/check-memory.lua \ - etc/dict.lua \ - etc/dispatch.lua \ - etc/eol.lua \ - etc/forward.lua \ - etc/get.lua \ - etc/lp.lua \ - etc/qp.lua \ - etc/tftp.lua + samples/b64.lua \ + samples/check-links.lua \ + samples/check-memory.lua \ + samples/dict.lua \ + samples/dispatch.lua \ + samples/eol.lua \ + samples/forward.lua \ + samples/get.lua \ + samples/lp.lua \ + samples/qp.lua \ + samples/tftp.lua SRC = \ src/makefile \ @@ -117,9 +114,6 @@ dist: cp -vf README.md $(DIST) cp -vf $(MAKE) $(DIST) - mkdir -p $(DIST)/etc - cp -vf $(ETC) $(DIST)/etc - mkdir -p $(DIST)/src cp -vf $(SRC) $(DIST)/src diff --git a/samples/README b/samples/README index e63a6f5..4ee06b6 100644 --- a/samples/README +++ b/samples/README @@ -1,11 +1,95 @@ This directory contains some sample programs using LuaSocket. This code is not supported. + tftp.lua -- Trivial FTP client + +This module implements file retrieval by the TFTP protocol. +Its main use was to test the UDP code, but since someone +found it usefull, I turned it into a module that is almost +official (no uploads, yet). + + dict.lua -- Dict client + +The dict.lua module started with a cool simple client +for the DICT protocol, written by Luiz Henrique Figueiredo. +This new version has been converted into a library, similar +to the HTTP and FTP libraries, that can be used from within +any luasocket application. Take a look on the source code +and you will be able to figure out how to use it. + + lp.lua -- LPD client library + +The lp.lua module implements the client part of the Line +Printer Daemon protocol, used to print files on Unix +machines. It is courtesy of David Burgess! See the source +code and the lpr.lua in the examples directory. + + b64.lua + qp.lua + eol.lua + +These are tiny programs that perform Base64, +Quoted-Printable and end-of-line marker conversions. + + get.lua -- file retriever + +This little program is a client that uses the FTP and +HTTP code to implement a command line file graber. Just +run + + lua get.lua <remote-file> [<local-file>] + +to download a remote file (either ftp:// or http://) to +the specified local file. The program also prints the +download throughput, elapsed time, bytes already downloaded +etc during download. + + check-memory.lua -- checks memory consumption + +This is just to see how much memory each module uses. + + dispatch.lua -- coroutine based dispatcher + +This is a first try at a coroutine based non-blocking +dispatcher for LuaSocket. Take a look at 'check-links.lua' +and at 'forward.lua' to see how to use it. + + check-links.lua -- HTML link checker program + +This little program scans a HTML file and checks for broken +links. It is similar to check-links.pl by Jamie Zawinski, +but uses all facilities of the LuaSocket library and the Lua +language. It has not been thoroughly tested, but it should +work. Just run + + lua check-links.lua [-n] {<url>} > output + +and open the result to see a list of broken links. Make sure +you check the '-n' switch. It runs in non-blocking mode, +using coroutines, and is MUCH faster! + + forward.lua -- coroutine based forward server + +This is a forward server that can accept several connections +and transfers simultaneously using non-blocking I/O and the +coroutine-based dispatcher. You can run, for example + + lua forward.lua 8080:proxy.com:3128 + +to redirect all local conections to port 8080 to the host +'proxy.com' at port 3128. + + unix.c and unix.h + +This is an implementation of Unix local domain sockets and +demonstrates how to extend LuaSocket with a new type of +transport. It has been tested on Linux and on Mac OS X. + listener.lua -- socket to stdout talker.lua -- stdin to socket listener.lua and talker.lua are about the simplest -applications you can write using LuaSocket. Run +applications you can write using LuaSocket. Run 'lua listener.lua' and 'lua talker.lua' @@ -17,13 +101,13 @@ be printed by listen.lua. This is a cool program written by David Burgess to print files using the Line Printer Daemon protocol, widely used in Unix machines. It uses the lp.lua implementation, in the -etc directory. Just run 'lua lpr.lua <filename> +samples directory. Just run 'lua lpr.lua <filename> queue=<printername>' and the file will print! cddb.lua -- CDDB client This is the first try on a simple CDDB client. Not really -useful, but one day it might become a module. +useful, but one day it might become a module. daytimeclnt.lua -- day time client diff --git a/samples/b64.lua b/samples/b64.lua new file mode 100644 index 0000000..11eeb2d --- /dev/null +++ b/samples/b64.lua @@ -0,0 +1,19 @@ +----------------------------------------------------------------------------- +-- Little program to convert to and from Base64 +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local ltn12 = require("ltn12") +local mime = require("mime") +local source = ltn12.source.file(io.stdin) +local sink = ltn12.sink.file(io.stdout) +local convert +if arg and arg[1] == '-d' then + convert = mime.decode("base64") +else + local base64 = mime.encode("base64") + local wrap = mime.wrap() + convert = ltn12.filter.chain(base64, wrap) +end +sink = ltn12.sink.chain(convert, sink) +ltn12.pump.all(source, sink) diff --git a/samples/check-links.lua b/samples/check-links.lua new file mode 100644 index 0000000..283f3ac --- /dev/null +++ b/samples/check-links.lua @@ -0,0 +1,111 @@ +----------------------------------------------------------------------------- +-- Little program that checks links in HTML files, using coroutines and +-- non-blocking I/O via the dispatcher module. +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local url = require("socket.url") +local dispatch = require("dispatch") +local http = require("socket.http") +dispatch.TIMEOUT = 10 + +-- make sure the user knows how to invoke us +arg = arg or {} +if #arg < 1 then + print("Usage:\n luasocket check-links.lua [-n] {<url>}") + exit() +end + +-- '-n' means we are running in non-blocking mode +if arg[1] == "-n" then + -- if non-blocking I/O was requested, use real dispatcher interface + table.remove(arg, 1) + handler = dispatch.newhandler("coroutine") +else + -- if using blocking I/O, use fake dispatcher interface + handler = dispatch.newhandler("sequential") +end + +local nthreads = 0 + +-- get the status of a URL using the dispatcher +function getstatus(link) + local parsed = url.parse(link, {scheme = "file"}) + if parsed.scheme == "http" then + nthreads = nthreads + 1 + handler:start(function() + local r, c, h, s = http.request{ + method = "HEAD", + url = link, + create = handler.tcp + } + if r and c == 200 then io.write('\t', link, '\n') + else io.write('\t', link, ': ', tostring(c), '\n') end + nthreads = nthreads - 1 + end) + end +end + +function readfile(path) + path = url.unescape(path) + local file, error = io.open(path, "r") + if file then + local body = file:read("*a") + file:close() + return body + else return nil, error end +end + +function load(u) + local parsed = url.parse(u, { scheme = "file" }) + local body, headers, code, error + local base = u + if parsed.scheme == "http" then + body, code, headers = http.request(u) + if code == 200 then + -- if there was a redirect, update base to reflect it + base = headers.location or base + end + if not body then + error = code + end + elseif parsed.scheme == "file" then + body, error = readfile(parsed.path) + else error = string.format("unhandled scheme '%s'", parsed.scheme) end + return base, body, error +end + +function getlinks(body, base) + -- get rid of comments + body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") + local links = {} + -- extract links + body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) + table.insert(links, url.absolute(base, href)) + end) + body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) + table.insert(links, url.absolute(base, href)) + end) + string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) + table.insert(links, url.absolute(base, href)) + end) + return links +end + +function checklinks(address) + local base, body, error = load(address) + if not body then print(error) return end + print("Checking ", base) + local links = getlinks(body, base) + for _, link in ipairs(links) do + getstatus(link) + end +end + +for _, address in ipairs(arg) do + checklinks(url.absolute("file:", address)) +end + +while nthreads > 0 do + handler:step() +end diff --git a/samples/check-memory.lua b/samples/check-memory.lua new file mode 100644 index 0000000..7bd984d --- /dev/null +++ b/samples/check-memory.lua @@ -0,0 +1,17 @@ +function load(s) + collectgarbage() + local a = gcinfo() + _G[s] = require(s) + collectgarbage() + local b = gcinfo() + print(s .. ":\t " .. (b-a) .. "k") +end + +load("socket.url") +load("ltn12") +load("socket") +load("mime") +load("socket.tp") +load("socket.smtp") +load("socket.http") +load("socket.ftp") diff --git a/samples/cookie.lua b/samples/cookie.lua new file mode 100644 index 0000000..fec10a1 --- /dev/null +++ b/samples/cookie.lua @@ -0,0 +1,88 @@ +local socket = require"socket" +local http = require"socket.http" +local url = require"socket.url" +local ltn12 = require"ltn12" + +local token_class = '[^%c%s%(%)%<%>%@%,%;%:%\\%"%/%[%]%?%=%{%}]' + +local function unquote(t, quoted) + local n = string.match(t, "%$(%d+)$") + if n then n = tonumber(n) end + if quoted[n] then return quoted[n] + else return t end +end + +local function parse_set_cookie(c, quoted, cookie_table) + c = c .. ";$last=last;" + local _, _, n, v, i = string.find(c, "(" .. token_class .. + "+)%s*=%s*(.-)%s*;%s*()") + local cookie = { + name = n, + value = unquote(v, quoted), + attributes = {} + } + while 1 do + _, _, n, v, i = string.find(c, "(" .. token_class .. + "+)%s*=?%s*(.-)%s*;%s*()", i) + if not n or n == "$last" then break end + cookie.attributes[#cookie.attributes+1] = { + name = n, + value = unquote(v, quoted) + } + end + cookie_table[#cookie_table+1] = cookie +end + +local function split_set_cookie(s, cookie_table) + cookie_table = cookie_table or {} + -- remove quoted strings from cookie list + local quoted = {} + s = string.gsub(s, '"(.-)"', function(q) + quoted[#quoted+1] = q + return "$" .. #quoted + end) + -- add sentinel + s = s .. ",$last=" + -- split into individual cookies + i = 1 + while 1 do + local _, _, cookie, next_token + _, _, cookie, i, next_token = string.find(s, "(.-)%s*%,%s*()(" .. + token_class .. "+)%s*=", i) + if not next_token then break end + parse_set_cookie(cookie, quoted, cookie_table) + if next_token == "$last" then break end + end + return cookie_table +end + +local function quote(s) + if string.find(s, "[ %,%;]") then return '"' .. s .. '"' + else return s end +end + +local _empty = {} +local function build_cookies(cookies) + s = "" + for i,v in ipairs(cookies or _empty) do + if v.name then + s = s .. v.name + if v.value and v.value ~= "" then + s = s .. '=' .. quote(v.value) + end + end + if v.name and #(v.attributes or _empty) > 0 then s = s .. "; " end + for j,u in ipairs(v.attributes or _empty) do + if u.name then + s = s .. u.name + if u.value and u.value ~= "" then + s = s .. '=' .. quote(u.value) + end + end + if j < #v.attributes then s = s .. "; " end + end + if i < #cookies then s = s .. ", " end + end + return s +end + diff --git a/samples/dict.lua b/samples/dict.lua new file mode 100644 index 0000000..8c5b711 --- /dev/null +++ b/samples/dict.lua @@ -0,0 +1,151 @@ +----------------------------------------------------------------------------- +-- Little program to download DICT word definitions +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- + +----------------------------------------------------------------------------- +-- Load required modules +----------------------------------------------------------------------------- +local base = _G +local string = require("string") +local table = require("table") +local socket = require("socket") +local url = require("socket.url") +local tp = require("socket.tp") +module("socket.dict") + +----------------------------------------------------------------------------- +-- Globals +----------------------------------------------------------------------------- +HOST = "dict.org" +PORT = 2628 +TIMEOUT = 10 + +----------------------------------------------------------------------------- +-- Low-level dict API +----------------------------------------------------------------------------- +local metat = { __index = {} } + +function open(host, port) + local tp = socket.try(tp.connect(host or HOST, port or PORT, TIMEOUT)) + return base.setmetatable({tp = tp}, metat) +end + +function metat.__index:greet() + return socket.try(self.tp:check(220)) +end + +function metat.__index:check(ok) + local code, status = socket.try(self.tp:check(ok)) + return code, + base.tonumber(socket.skip(2, string.find(status, "^%d%d%d (%d*)"))) +end + +function metat.__index:getdef() + local line = socket.try(self.tp:receive()) + local def = {} + while line ~= "." do + table.insert(def, line) + line = socket.try(self.tp:receive()) + end + return table.concat(def, "\n") +end + +function metat.__index:define(database, word) + database = database or "!" + socket.try(self.tp:command("DEFINE", database .. " " .. word)) + local code, count = self:check(150) + local defs = {} + for i = 1, count do + self:check(151) + table.insert(defs, self:getdef()) + end + self:check(250) + return defs +end + +function metat.__index:match(database, strat, word) + database = database or "!" + strat = strat or "." + socket.try(self.tp:command("MATCH", database .." ".. strat .." ".. word)) + self:check(152) + local mat = {} + local line = socket.try(self.tp:receive()) + while line ~= '.' do + database, word = socket.skip(2, string.find(line, "(%S+) (.*)")) + if not mat[database] then mat[database] = {} end + table.insert(mat[database], word) + line = socket.try(self.tp:receive()) + end + self:check(250) + return mat +end + +function metat.__index:quit() + self.tp:command("QUIT") + return self:check(221) +end + +function metat.__index:close() + return self.tp:close() +end + +----------------------------------------------------------------------------- +-- High-level dict API +----------------------------------------------------------------------------- +local default = { + scheme = "dict", + host = "dict.org" +} + +local function there(f) + if f == "" then return nil + else return f end +end + +local function parse(u) + local t = socket.try(url.parse(u, default)) + socket.try(t.scheme == "dict", "invalid scheme '" .. t.scheme .. "'") + socket.try(t.path, "invalid path in url") + local cmd, arg = socket.skip(2, string.find(t.path, "^/(.)(.*)$")) + socket.try(cmd == "d" or cmd == "m", "<command> should be 'm' or 'd'") + socket.try(arg and arg ~= "", "need at least <word> in URL") + t.command, t.argument = cmd, arg + arg = string.gsub(arg, "^:([^:]+)", function(f) t.word = f end) + socket.try(t.word, "need at least <word> in URL") + arg = string.gsub(arg, "^:([^:]*)", function(f) t.database = there(f) end) + if cmd == "m" then + arg = string.gsub(arg, "^:([^:]*)", function(f) t.strat = there(f) end) + end + string.gsub(arg, ":([^:]*)$", function(f) t.n = base.tonumber(f) end) + return t +end + +local function tget(gett) + local con = open(gett.host, gett.port) + con:greet() + if gett.command == "d" then + local def = con:define(gett.database, gett.word) + con:quit() + con:close() + if gett.n then return def[gett.n] + else return def end + elseif gett.command == "m" then + local mat = con:match(gett.database, gett.strat, gett.word) + con:quit() + con:close() + return mat + else return nil, "invalid command" end +end + +local function sget(u) + local gett = parse(u) + return tget(gett) +end + +get = socket.protect(function(gett) + if base.type(gett) == "string" then return sget(gett) + else return tget(gett) end +end) + diff --git a/samples/dispatch.lua b/samples/dispatch.lua new file mode 100644 index 0000000..2485415 --- /dev/null +++ b/samples/dispatch.lua @@ -0,0 +1,307 @@ +----------------------------------------------------------------------------- +-- A hacked dispatcher module +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local base = _G +local table = require("table") +local string = require("string") +local socket = require("socket") +local coroutine = require("coroutine") +module("dispatch") + +-- if too much time goes by without any activity in one of our sockets, we +-- just kill it +TIMEOUT = 60 + +----------------------------------------------------------------------------- +-- We implement 3 types of dispatchers: +-- sequential +-- coroutine +-- threaded +-- The user can choose whatever one is needed +----------------------------------------------------------------------------- +local handlert = {} + +-- default handler is coroutine +function newhandler(mode) + mode = mode or "coroutine" + return handlert[mode]() +end + +local function seqstart(self, func) + return func() +end + +-- sequential handler simply calls the functions and doesn't wrap I/O +function handlert.sequential() + return { + tcp = socket.tcp, + start = seqstart + } +end + +----------------------------------------------------------------------------- +-- Mega hack. Don't try to do this at home. +----------------------------------------------------------------------------- +-- we can't yield across calls to protect on Lua 5.1, so we rewrite it with +-- coroutines +-- make sure you don't require any module that uses socket.protect before +-- loading our hack +if string.sub(base._VERSION, -3) == "5.1" then + local function _protect(co, status, ...) + if not status then + local msg = ... + if base.type(msg) == 'table' then + return nil, msg[1] + else + base.error(msg, 0) + end + end + if coroutine.status(co) == "suspended" then + return _protect(co, coroutine.resume(co, coroutine.yield(...))) + else + return ... + end + end + + function socket.protect(f) + return function(...) + local co = coroutine.create(f) + return _protect(co, coroutine.resume(co, ...)) + end + end +end + +----------------------------------------------------------------------------- +-- Simple set data structure. O(1) everything. +----------------------------------------------------------------------------- +local function newset() + local reverse = {} + local set = {} + return base.setmetatable(set, {__index = { + insert = function(set, value) + if not reverse[value] then + table.insert(set, value) + reverse[value] = #set + end + end, + remove = function(set, value) + local index = reverse[value] + if index then + reverse[value] = nil + local top = table.remove(set) + if top ~= value then + reverse[top] = index + set[index] = top + end + end + end + }}) +end + +----------------------------------------------------------------------------- +-- socket.tcp() wrapper for the coroutine dispatcher +----------------------------------------------------------------------------- +local function cowrap(dispatcher, tcp, error) + if not tcp then return nil, error end + -- put it in non-blocking mode right away + tcp:settimeout(0) + -- metatable for wrap produces new methods on demand for those that we + -- don't override explicitly. + local metat = { __index = function(table, key) + table[key] = function(...) + return tcp[key](tcp,select(2,...)) + end + return table[key] + end} + -- does our user want to do his own non-blocking I/O? + local zero = false + -- create a wrap object that will behave just like a real socket object + local wrap = { } + -- we ignore settimeout to preserve our 0 timeout, but record whether + -- the user wants to do his own non-blocking I/O + function wrap:settimeout(value, mode) + if value == 0 then zero = true + else zero = false end + return 1 + end + -- send in non-blocking mode and yield on timeout + function wrap:send(data, first, last) + first = (first or 1) - 1 + local result, error + while true do + -- return control to dispatcher and tell it we want to send + -- if upon return the dispatcher tells us we timed out, + -- return an error to whoever called us + if coroutine.yield(dispatcher.sending, tcp) == "timeout" then + return nil, "timeout" + end + -- try sending + result, error, first = tcp:send(data, first+1, last) + -- if we are done, or there was an unexpected error, + -- break away from loop + if error ~= "timeout" then return result, error, first end + end + end + -- receive in non-blocking mode and yield on timeout + -- or simply return partial read, if user requested timeout = 0 + function wrap:receive(pattern, partial) + local error = "timeout" + local value + while true do + -- return control to dispatcher and tell it we want to receive + -- if upon return the dispatcher tells us we timed out, + -- return an error to whoever called us + if coroutine.yield(dispatcher.receiving, tcp) == "timeout" then + return nil, "timeout" + end + -- try receiving + value, error, partial = tcp:receive(pattern, partial) + -- if we are done, or there was an unexpected error, + -- break away from loop. also, if the user requested + -- zero timeout, return all we got + if (error ~= "timeout") or zero then + return value, error, partial + end + end + end + -- connect in non-blocking mode and yield on timeout + function wrap:connect(host, port) + local result, error = tcp:connect(host, port) + if error == "timeout" then + -- return control to dispatcher. we will be writable when + -- connection succeeds. + -- if upon return the dispatcher tells us we have a + -- timeout, just abort + if coroutine.yield(dispatcher.sending, tcp) == "timeout" then + return nil, "timeout" + end + -- when we come back, check if connection was successful + result, error = tcp:connect(host, port) + if result or error == "already connected" then return 1 + else return nil, "non-blocking connect failed" end + else return result, error end + end + -- accept in non-blocking mode and yield on timeout + function wrap:accept() + while 1 do + -- return control to dispatcher. we will be readable when a + -- connection arrives. + -- if upon return the dispatcher tells us we have a + -- timeout, just abort + if coroutine.yield(dispatcher.receiving, tcp) == "timeout" then + return nil, "timeout" + end + local client, error = tcp:accept() + if error ~= "timeout" then + return cowrap(dispatcher, client, error) + end + end + end + -- remove cortn from context + function wrap:close() + dispatcher.stamp[tcp] = nil + dispatcher.sending.set:remove(tcp) + dispatcher.sending.cortn[tcp] = nil + dispatcher.receiving.set:remove(tcp) + dispatcher.receiving.cortn[tcp] = nil + return tcp:close() + end + return base.setmetatable(wrap, metat) +end + + +----------------------------------------------------------------------------- +-- Our coroutine dispatcher +----------------------------------------------------------------------------- +local cometat = { __index = {} } + +function schedule(cortn, status, operation, tcp) + if status then + if cortn and operation then + operation.set:insert(tcp) + operation.cortn[tcp] = cortn + operation.stamp[tcp] = socket.gettime() + end + else base.error(operation) end +end + +function kick(operation, tcp) + operation.cortn[tcp] = nil + operation.set:remove(tcp) +end + +function wakeup(operation, tcp) + local cortn = operation.cortn[tcp] + -- if cortn is still valid, wake it up + if cortn then + kick(operation, tcp) + return cortn, coroutine.resume(cortn) + -- othrewise, just get scheduler not to do anything + else + return nil, true + end +end + +function abort(operation, tcp) + local cortn = operation.cortn[tcp] + if cortn then + kick(operation, tcp) + coroutine.resume(cortn, "timeout") + end +end + +-- step through all active cortns +function cometat.__index:step() + -- check which sockets are interesting and act on them + local readable, writable = socket.select(self.receiving.set, + self.sending.set, 1) + -- for all readable connections, resume their cortns and reschedule + -- when they yield back to us + for _, tcp in base.ipairs(readable) do + schedule(wakeup(self.receiving, tcp)) + end + -- for all writable connections, do the same + for _, tcp in base.ipairs(writable) do + schedule(wakeup(self.sending, tcp)) + end + -- politely ask replacement I/O functions in idle cortns to + -- return reporting a timeout + local now = socket.gettime() + for tcp, stamp in base.pairs(self.stamp) do + if tcp.class == "tcp{client}" and now - stamp > TIMEOUT then + abort(self.sending, tcp) + abort(self.receiving, tcp) + end + end +end + +function cometat.__index:start(func) + local cortn = coroutine.create(func) + schedule(cortn, coroutine.resume(cortn)) +end + +function handlert.coroutine() + local stamp = {} + local dispatcher = { + stamp = stamp, + sending = { + name = "sending", + set = newset(), + cortn = {}, + stamp = stamp + }, + receiving = { + name = "receiving", + set = newset(), + cortn = {}, + stamp = stamp + }, + } + function dispatcher.tcp() + return cowrap(dispatcher, socket.tcp()) + end + return base.setmetatable(dispatcher, cometat) +end + diff --git a/samples/eol.lua b/samples/eol.lua new file mode 100644 index 0000000..eeaf0ce --- /dev/null +++ b/samples/eol.lua @@ -0,0 +1,13 @@ +----------------------------------------------------------------------------- +-- Little program to adjust end of line markers. +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local mime = require("mime") +local ltn12 = require("ltn12") +local marker = '\n' +if arg and arg[1] == '-d' then marker = '\r\n' end +local filter = mime.normalize(marker) +local source = ltn12.source.chain(ltn12.source.file(io.stdin), filter) +local sink = ltn12.sink.file(io.stdout) +ltn12.pump.all(source, sink) diff --git a/samples/forward.lua b/samples/forward.lua new file mode 100644 index 0000000..05ced1a --- /dev/null +++ b/samples/forward.lua @@ -0,0 +1,65 @@ +-- load our favourite library +local dispatch = require("dispatch") +local handler = dispatch.newhandler() + +-- make sure the user knows how to invoke us +if #arg < 1 then + print("Usage") + print(" lua forward.lua <iport:ohost:oport> ...") + os.exit(1) +end + +-- function to move data from one socket to the other +local function move(foo, bar) + local live + while 1 do + local data, error, partial = foo:receive(2048) + live = data or error == "timeout" + data = data or partial + local result, error = bar:send(data) + if not live or not result then + foo:close() + bar:close() + break + end + end +end + +-- for each tunnel, start a new server +for i, v in ipairs(arg) do + -- capture forwarding parameters + local _, _, iport, ohost, oport = string.find(v, "([^:]+):([^:]+):([^:]+)") + assert(iport, "invalid arguments") + -- create our server socket + local server = assert(handler.tcp()) + assert(server:setoption("reuseaddr", true)) + assert(server:bind("*", iport)) + assert(server:listen(32)) + -- handler for the server object loops accepting new connections + handler:start(function() + while 1 do + local client = assert(server:accept()) + assert(client:settimeout(0)) + -- for each new connection, start a new client handler + handler:start(function() + -- handler tries to connect to peer + local peer = assert(handler.tcp()) + assert(peer:settimeout(0)) + assert(peer:connect(ohost, oport)) + -- if sucessful, starts a new handler to send data from + -- client to peer + handler:start(function() + move(client, peer) + end) + -- afte starting new handler, enter in loop sending data from + -- peer to client + move(peer, client) + end) + end + end) +end + +-- simply loop stepping the server +while 1 do + handler:step() +end diff --git a/samples/get.lua b/samples/get.lua new file mode 100644 index 0000000..d53c465 --- /dev/null +++ b/samples/get.lua @@ -0,0 +1,141 @@ +----------------------------------------------------------------------------- +-- Little program to download files from URLs +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local socket = require("socket") +local http = require("socket.http") +local ftp = require("socket.ftp") +local url = require("socket.url") +local ltn12 = require("ltn12") + +-- formats a number of seconds into human readable form +function nicetime(s) + local l = "s" + if s > 60 then + s = s / 60 + l = "m" + if s > 60 then + s = s / 60 + l = "h" + if s > 24 then + s = s / 24 + l = "d" -- hmmm + end + end + end + if l == "s" then return string.format("%5.0f%s", s, l) + else return string.format("%5.2f%s", s, l) end +end + +-- formats a number of bytes into human readable form +function nicesize(b) + local l = "B" + if b > 1024 then + b = b / 1024 + l = "KB" + if b > 1024 then + b = b / 1024 + l = "MB" + if b > 1024 then + b = b / 1024 + l = "GB" -- hmmm + end + end + end + return string.format("%7.2f%2s", b, l) +end + +-- returns a string with the current state of the download +local remaining_s = "%s received, %s/s throughput, %2.0f%% done, %s remaining" +local elapsed_s = "%s received, %s/s throughput, %s elapsed " +function gauge(got, delta, size) + local rate = got / delta + if size and size >= 1 then + return string.format(remaining_s, nicesize(got), nicesize(rate), + 100*got/size, nicetime((size-got)/rate)) + else + return string.format(elapsed_s, nicesize(got), + nicesize(rate), nicetime(delta)) + end +end + +-- creates a new instance of a receive_cb that saves to disk +-- kind of copied from luasocket's manual callback examples +function stats(size) + local start = socket.gettime() + local last = start + local got = 0 + return function(chunk) + -- elapsed time since start + local current = socket.gettime() + if chunk then + -- total bytes received + got = got + string.len(chunk) + -- not enough time for estimate + if current - last > 1 then + io.stderr:write("\r", gauge(got, current - start, size)) + io.stderr:flush() + last = current + end + else + -- close up + io.stderr:write("\r", gauge(got, current - start), "\n") + end + return chunk + end +end + +-- determines the size of a http file +function gethttpsize(u) + local r, c, h = http.request {method = "HEAD", url = u} + if c == 200 then + return tonumber(h["content-length"]) + end +end + +-- downloads a file using the http protocol +function getbyhttp(u, file) + local save = ltn12.sink.file(file or io.stdout) + -- only print feedback if output is not stdout + if file then save = ltn12.sink.chain(stats(gethttpsize(u)), save) end + local r, c, h, s = http.request {url = u, sink = save } + if c ~= 200 then io.stderr:write(s or c, "\n") end +end + +-- downloads a file using the ftp protocol +function getbyftp(u, file) + local save = ltn12.sink.file(file or io.stdout) + -- only print feedback if output is not stdout + -- and we don't know how big the file is + if file then save = ltn12.sink.chain(stats(), save) end + local gett = url.parse(u) + gett.sink = save + gett.type = "i" + local ret, err = ftp.get(gett) + if err then print(err) end +end + +-- determines the scheme +function getscheme(u) + -- this is an heuristic to solve a common invalid url poblem + if not string.find(u, "//") then u = "//" .. u end + local parsed = url.parse(u, {scheme = "http"}) + return parsed.scheme +end + +-- gets a file either by http or ftp, saving as <name> +function get(u, name) + local fout = name and io.open(name, "wb") + local scheme = getscheme(u) + if scheme == "ftp" then getbyftp(u, fout) + elseif scheme == "http" then getbyhttp(u, fout) + else print("unknown scheme" .. scheme) end +end + +-- main program +arg = arg or {} +if #arg < 1 then + io.write("Usage:\n lua get.lua <remote-url> [<local-file>]\n") + os.exit(1) +else get(arg[1], arg[2]) end diff --git a/samples/links b/samples/links new file mode 100644 index 0000000..087f1c0 --- /dev/null +++ b/samples/links @@ -0,0 +1,17 @@ +<a href="http://www.cs.princeton.edu"> bla </a> +<a href="http://www.princeton.edu"> bla </a> +<a href="http://www.tecgraf.puc-rio.br"> bla </a> +<a href="http://www.inf.puc-rio.br"> bla </a> +<a href="http://www.puc-rio.br"> bla </a> +<a href="http://www.impa.br"> bla </a> +<a href="http://www.lua.org"> bla </a> +<a href="http://www.lua-users.org"> bla </a> +<a href="http://www.amazon.com"> bla </a> +<a href="http://www.google.com"> bla </a> +<a href="http://www.nytimes.com"> bla </a> +<a href="http://www.bbc.co.uk"> bla </a> +<a href="http://oglobo.globo.com"> bla </a> +<a href="http://slate.msn.com"> bla </a> +<a href="http://www.apple.com"> bla </a> +<a href="http://www.microsoft.com"> bla </a> +<a href="http://www.nasa.gov"> bla </a> diff --git a/samples/lp.lua b/samples/lp.lua new file mode 100644 index 0000000..25f0b95 --- /dev/null +++ b/samples/lp.lua @@ -0,0 +1,323 @@ +----------------------------------------------------------------------------- +-- LPD support for the Lua language +-- LuaSocket toolkit. +-- Author: David Burgess +-- Modified by Diego Nehab, but David is in charge +----------------------------------------------------------------------------- +--[[ + if you have any questions: RFC 1179 +]] +-- make sure LuaSocket is loaded +local io = require("io") +local base = _G +local os = require("os") +local math = require("math") +local string = require("string") +local socket = require("socket") +local ltn12 = require("ltn12") +module("socket.lp") + +-- default port +PORT = 515 +SERVER = os.getenv("SERVER_NAME") or os.getenv("COMPUTERNAME") or "localhost" +PRINTER = os.getenv("PRINTER") or "printer" + +local function connect(localhost, option) + local host = option.host or SERVER + local port = option.port or PORT + local skt + local try = socket.newtry(function() if skt then skt:close() end end) + if option.localbind then + -- bind to a local port (if we can) + local localport = 721 + local done, err + repeat + skt = socket.try(socket.tcp()) + try(skt:settimeout(30)) + done, err = skt:bind(localhost, localport) + if not done then + localport = localport + 1 + skt:close() + skt = nil + else break end + until localport > 731 + socket.try(skt, err) + else skt = socket.try(socket.tcp()) end + try(skt:connect(host, port)) + return { skt = skt, try = try } +end + +--[[ +RFC 1179 +5.3 03 - Send queue state (short) + + +----+-------+----+------+----+ + | 03 | Queue | SP | List | LF | + +----+-------+----+------+----+ + Command code - 3 + Operand 1 - Printer queue name + Other operands - User names or job numbers + + If the user names or job numbers or both are supplied then only those + jobs for those users or with those numbers will be sent. + + The response is an ASCII stream which describes the printer queue. + The stream continues until the connection closes. Ends of lines are + indicated with ASCII LF control characters. The lines may also + contain ASCII HT control characters. + +5.4 04 - Send queue state (long) + + +----+-------+----+------+----+ + | 04 | Queue | SP | List | LF | + +----+-------+----+------+----+ + Command code - 4 + Operand 1 - Printer queue name + Other operands - User names or job numbers + + If the user names or job numbers or both are supplied then only those + jobs for those users or with those numbers will be sent. + + The response is an ASCII stream which describes the printer queue. + The stream continues until the connection closes. Ends of lines are + indicated with ASCII LF control characters. The lines may also + contain ASCII HT control characters. +]] + +-- gets server acknowledement +local function recv_ack(con) + local ack = con.skt:receive(1) + con.try(string.char(0) == ack, "failed to receive server acknowledgement") +end + +-- sends client acknowledement +local function send_ack(con) + local sent = con.skt:send(string.char(0)) + con.try(sent == 1, "failed to send acknowledgement") +end + +-- sends queue request +-- 5.2 02 - Receive a printer job +-- +-- +----+-------+----+ +-- | 02 | Queue | LF | +-- +----+-------+----+ +-- Command code - 2 +-- Operand - Printer queue name +-- +-- Receiving a job is controlled by a second level of commands. The +-- daemon is given commands by sending them over the same connection. +-- The commands are described in the next section (6). +-- +-- After this command is sent, the client must read an acknowledgement +-- octet from the daemon. A positive acknowledgement is an octet of +-- zero bits. A negative acknowledgement is an octet of any other +-- pattern. +local function send_queue(con, queue) + queue = queue or PRINTER + local str = string.format("\2%s\10", queue) + local sent = con.skt:send(str) + con.try(sent == string.len(str), "failed to send print request") + recv_ack(con) +end + +-- sends control file +-- 6.2 02 - Receive control file +-- +-- +----+-------+----+------+----+ +-- | 02 | Count | SP | Name | LF | +-- +----+-------+----+------+----+ +-- Command code - 2 +-- Operand 1 - Number of bytes in control file +-- Operand 2 - Name of control file +-- +-- The control file must be an ASCII stream with the ends of lines +-- indicated by ASCII LF. The total number of bytes in the stream is +-- sent as the first operand. The name of the control file is sent as +-- the second. It should start with ASCII "cfA", followed by a three +-- digit job number, followed by the host name which has constructed the +-- control file. Acknowledgement processing must occur as usual after +-- the command is sent. +-- +-- The next "Operand 1" octets over the same TCP connection are the +-- intended contents of the control file. Once all of the contents have +-- been delivered, an octet of zero bits is sent as an indication that +-- the file being sent is complete. A second level of acknowledgement +-- processing must occur at this point. + +-- sends data file +-- 6.3 03 - Receive data file +-- +-- +----+-------+----+------+----+ +-- | 03 | Count | SP | Name | LF | +-- +----+-------+----+------+----+ +-- Command code - 3 +-- Operand 1 - Number of bytes in data file +-- Operand 2 - Name of data file +-- +-- The data file may contain any 8 bit values at all. The total number +-- of bytes in the stream may be sent as the first operand, otherwise +-- the field should be cleared to 0. The name of the data file should +-- start with ASCII "dfA". This should be followed by a three digit job +-- number. The job number should be followed by the host name which has +-- constructed the data file. Interpretation of the contents of the +-- data file is determined by the contents of the corresponding control +-- file. If a data file length has been specified, the next "Operand 1" +-- octets over the same TCP connection are the intended contents of the +-- data file. In this case, once all of the contents have been +-- delivered, an octet of zero bits is sent as an indication that the +-- file being sent is complete. A second level of acknowledgement +-- processing must occur at this point. + + +local function send_hdr(con, control) + local sent = con.skt:send(control) + con.try(sent and sent >= 1 , "failed to send header file") + recv_ack(con) +end + +local function send_control(con, control) + local sent = con.skt:send(control) + con.try(sent and sent >= 1, "failed to send control file") + send_ack(con) +end + +local function send_data(con,fh,size) + local buf + while size > 0 do + buf,message = fh:read(8192) + if buf then + st = con.try(con.skt:send(buf)) + size = size - st + else + con.try(size == 0, "file size mismatch") + end + end + recv_ack(con) -- note the double acknowledgement + send_ack(con) + recv_ack(con) + return size +end + + +--[[ +local control_dflt = { + "H"..string.sub(socket.hostname,1,31).."\10", -- host + "C"..string.sub(socket.hostname,1,31).."\10", -- class + "J"..string.sub(filename,1,99).."\10", -- jobname + "L"..string.sub(user,1,31).."\10", -- print banner page + "I"..tonumber(indent).."\10", -- indent column count ('f' only) + "M"..string.sub(mail,1,128).."\10", -- mail when printed user@host + "N"..string.sub(filename,1,131).."\10", -- name of source file + "P"..string.sub(user,1,31).."\10", -- user name + "T"..string.sub(title,1,79).."\10", -- title for banner ('p' only) + "W"..tonumber(width or 132).."\10", -- width of print f,l,p only + + "f"..file.."\10", -- formatted print (remove control chars) + "l"..file.."\10", -- print + "o"..file.."\10", -- postscript + "p"..file.."\10", -- pr format - requires T, L + "r"..file.."\10", -- fortran format + "U"..file.."\10", -- Unlink (data file only) +} +]] + +-- generate a varying job number +local seq = 0 +local function newjob(connection) + seq = seq + 1 + return math.floor(socket.gettime() * 1000 + seq)%1000 +end + + +local format_codes = { + binary = 'l', + text = 'f', + ps = 'o', + pr = 'p', + fortran = 'r', + l = 'l', + r = 'r', + o = 'o', + p = 'p', + f = 'f' +} + +-- lp.send{option} +-- requires option.file + +send = socket.protect(function(option) + socket.try(option and base.type(option) == "table", "invalid options") + local file = option.file + socket.try(file, "invalid file name") + local fh = socket.try(io.open(file,"rb")) + local datafile_size = fh:seek("end") -- get total size + fh:seek("set") -- go back to start of file + local localhost = socket.dns.gethostname() or os.getenv("COMPUTERNAME") + or "localhost" + local con = connect(localhost, option) +-- format the control file + local jobno = newjob() + local localip = socket.dns.toip(localhost) + localhost = string.sub(localhost,1,31) + local user = string.sub(option.user or os.getenv("LPRUSER") or + os.getenv("USERNAME") or os.getenv("USER") or "anonymous", 1,31) + local lpfile = string.format("dfA%3.3d%-s", jobno, localhost); + local fmt = format_codes[option.format] or 'l' + local class = string.sub(option.class or localip or localhost,1,31) + local _,_,ctlfn = string.find(file,".*[%/%\\](.*)") + ctlfn = string.sub(ctlfn or file,1,131) + local cfile = + string.format("H%-s\nC%-s\nJ%-s\nP%-s\n%.1s%-s\nU%-s\nN%-s\n", + localhost, + class, + option.job or "LuaSocket", + user, + fmt, lpfile, + lpfile, + ctlfn); -- mandatory part of ctl file + if (option.banner) then cfile = cfile .. 'L'..user..'\10' end + if (option.indent) then cfile = cfile .. 'I'..base.tonumber(option.indent)..'\10' end + if (option.mail) then cfile = cfile .. 'M'..string.sub((option.mail),1,128)..'\10' end + if (fmt == 'p' and option.title) then cfile = cfile .. 'T'..string.sub((option.title),1,79)..'\10' end + if ((fmt == 'p' or fmt == 'l' or fmt == 'f') and option.width) then + cfile = cfile .. 'W'..base.tonumber(option,width)..'\10' + end + + con.skt:settimeout(option.timeout or 65) +-- send the queue header + send_queue(con, option.queue) +-- send the control file header + local cfilecmd = string.format("\2%d cfA%3.3d%-s\n",string.len(cfile), jobno, localhost); + send_hdr(con,cfilecmd) + +-- send the control file + send_control(con,cfile) + +-- send the data file header + local dfilecmd = string.format("\3%d dfA%3.3d%-s\n",datafile_size, jobno, localhost); + send_hdr(con,dfilecmd) + +-- send the data file + send_data(con,fh,datafile_size) + fh:close() + con.skt:close(); + return jobno, datafile_size +end) + +-- +-- lp.query({host=,queue=printer|'*', format='l'|'s', list=}) +-- +query = socket.protect(function(p) + p = p or {} + local localhost = socket.dns.gethostname() or os.getenv("COMPUTERNAME") + or "localhost" + local con = connect(localhost,p) + local fmt + if string.sub(p.format or 's',1,1) == 's' then fmt = 3 else fmt = 4 end + con.try(con.skt:send(string.format("%c%s %s\n", fmt, p.queue or "*", + p.list or ""))) + local data = con.try(con.skt:receive("*a")) + con.skt:close() + return data +end) diff --git a/samples/qp.lua b/samples/qp.lua new file mode 100644 index 0000000..523238b --- /dev/null +++ b/samples/qp.lua @@ -0,0 +1,23 @@ +----------------------------------------------------------------------------- +-- Little program to convert to and from Quoted-Printable +-- LuaSocket sample files +-- Author: Diego Nehab +----------------------------------------------------------------------------- +local ltn12 = require("ltn12") +local mime = require("mime") +local convert +arg = arg or {} +local mode = arg and arg[1] or "-et" +if mode == "-et" then + local normalize = mime.normalize() + local qp = mime.encode("quoted-printable") + local wrap = mime.wrap("quoted-printable") + convert = ltn12.filter.chain(normalize, qp, wrap) +elseif mode == "-eb" then + local qp = mime.encode("quoted-printable", "binary") + local wrap = mime.wrap("quoted-printable") + convert = ltn12.filter.chain(qp, wrap) +else convert = mime.decode("quoted-printable") end +local source = ltn12.source.chain(ltn12.source.file(io.stdin), convert) +local sink = ltn12.sink.file(io.stdout) +ltn12.pump.all(source, sink) diff --git a/samples/tftp.lua b/samples/tftp.lua new file mode 100644 index 0000000..ed99cd1 --- /dev/null +++ b/samples/tftp.lua @@ -0,0 +1,154 @@ +----------------------------------------------------------------------------- +-- TFTP support for the Lua language +-- LuaSocket toolkit. +-- Author: Diego Nehab +----------------------------------------------------------------------------- + +----------------------------------------------------------------------------- +-- Load required files +----------------------------------------------------------------------------- +local base = _G +local table = require("table") +local math = require("math") +local string = require("string") +local socket = require("socket") +local ltn12 = require("ltn12") +local url = require("socket.url") +module("socket.tftp") + +----------------------------------------------------------------------------- +-- Program constants +----------------------------------------------------------------------------- +local char = string.char +local byte = string.byte + +PORT = 69 +local OP_RRQ = 1 +local OP_WRQ = 2 +local OP_DATA = 3 +local OP_ACK = 4 +local OP_ERROR = 5 +local OP_INV = {"RRQ", "WRQ", "DATA", "ACK", "ERROR"} + +----------------------------------------------------------------------------- +-- Packet creation functions +----------------------------------------------------------------------------- +local function RRQ(source, mode) + return char(0, OP_RRQ) .. source .. char(0) .. mode .. char(0) +end + +local function WRQ(source, mode) + return char(0, OP_RRQ) .. source .. char(0) .. mode .. char(0) +end + +local function ACK(block) + local low, high + low = math.mod(block, 256) + high = (block - low)/256 + return char(0, OP_ACK, high, low) +end + +local function get_OP(dgram) + local op = byte(dgram, 1)*256 + byte(dgram, 2) + return op +end + +----------------------------------------------------------------------------- +-- Packet analysis functions +----------------------------------------------------------------------------- +local function split_DATA(dgram) + local block = byte(dgram, 3)*256 + byte(dgram, 4) + local data = string.sub(dgram, 5) + return block, data +end + +local function get_ERROR(dgram) + local code = byte(dgram, 3)*256 + byte(dgram, 4) + local msg + _,_, msg = string.find(dgram, "(.*)\000", 5) + return string.format("error code %d: %s", code, msg) +end + +----------------------------------------------------------------------------- +-- The real work +----------------------------------------------------------------------------- +local function tget(gett) + local retries, dgram, sent, datahost, dataport, code + local last = 0 + socket.try(gett.host, "missing host") + local con = socket.try(socket.udp()) + local try = socket.newtry(function() con:close() end) + -- convert from name to ip if needed + gett.host = try(socket.dns.toip(gett.host)) + con:settimeout(1) + -- first packet gives data host/port to be used for data transfers + local path = string.gsub(gett.path or "", "^/", "") + path = url.unescape(path) + retries = 0 + repeat + sent = try(con:sendto(RRQ(path, "octet"), gett.host, gett.port)) + dgram, datahost, dataport = con:receivefrom() + retries = retries + 1 + until dgram or datahost ~= "timeout" or retries > 5 + try(dgram, datahost) + -- associate socket with data host/port + try(con:setpeername(datahost, dataport)) + -- default sink + local sink = gett.sink or ltn12.sink.null() + -- process all data packets + while 1 do + -- decode packet + code = get_OP(dgram) + try(code ~= OP_ERROR, get_ERROR(dgram)) + try(code == OP_DATA, "unhandled opcode " .. code) + -- get data packet parts + local block, data = split_DATA(dgram) + -- if not repeated, write + if block == last+1 then + try(sink(data)) + last = block + end + -- last packet brings less than 512 bytes of data + if string.len(data) < 512 then + try(con:send(ACK(block))) + try(con:close()) + try(sink(nil)) + return 1 + end + -- get the next packet + retries = 0 + repeat + sent = try(con:send(ACK(last))) + dgram, err = con:receive() + retries = retries + 1 + until dgram or err ~= "timeout" or retries > 5 + try(dgram, err) + end +end + +local default = { + port = PORT, + path ="/", + scheme = "tftp" +} + +local function parse(u) + local t = socket.try(url.parse(u, default)) + socket.try(t.scheme == "tftp", "invalid scheme '" .. t.scheme .. "'") + socket.try(t.host, "invalid host") + return t +end + +local function sget(u) + local gett = parse(u) + local t = {} + gett.sink = ltn12.sink.table(t) + tget(gett) + return table.concat(t) +end + +get = socket.protect(function(gett) + if base.type(gett) == "string" then return sget(gett) + else return tget(gett) end +end) + -- cgit v1.2.3-55-g6feb From 3adf252b45401b4b97e63668c6ee530e7b3936ad Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Wed, 23 Mar 2022 16:15:52 +0100 Subject: cleanup; move FIX, TODO, WISH into TODO.md --- FIX | 28 -------------- TODO | 81 -------------------------------------- TODO.md | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ WISH | 22 ----------- 4 files changed, 135 insertions(+), 131 deletions(-) delete mode 100644 FIX delete mode 100644 TODO create mode 100644 TODO.md delete mode 100644 WISH diff --git a/FIX b/FIX deleted file mode 100644 index 40f30a1..0000000 --- a/FIX +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - -http was preserving old host header during redirects -fix smtp.send hang on source error -add create field to FTP and SMTP and fix HTTP ugliness -clean timeout argument to open functions in SMTP, HTTP and FTP -eliminate globals from namespaces created by module(). -url.absolute was not working when base_url was already parsed -http.request was redirecting even when the location header was empty -tcp{client}:shutdown() was checking for group instead of class. -tcp{client}:send() now returns i+sent-1... -get rid of a = socket.try() in the manual, except for protected cases. replace it with assert. -get rid of "base." kludge in package.loaded -check all "require("http")" etc in the manual. -make sure sock_gethostname.* only return success if the hp is not null! -change 'l' prefix in C libraries to 'c' to avoid clash with LHF libraries - don't forget the declarations in luasocket.h and mime.h!!! -setpeername was using udp{unconnected} -fixed a bug in http.lua that caused some requests to fail (Florian Berger) -fixed a bug in select.c that prevented sockets with descriptor 0 from working (Renato Maia) -fixed a "bug" that caused dns.toip to crash under uLinux -fixed a "bug" that caused a crash in gethostbyname under VMS -DEBUG and VERSION became _DEBUG and _VERSION -send returns the right value if input is "". Alexander Marinov diff --git a/TODO b/TODO deleted file mode 100644 index a838fc0..0000000 --- a/TODO +++ /dev/null @@ -1,81 +0,0 @@ -- bizarre default values for getnameinfo should throw error instead! - -> It's just too bad it can't talk to gmail - -> reason 1: they absolutely want TLS -> reason 2: unlike all the other SMTP implementations, they -> don't -> tolerate missing < > around adresses - -- document the new bind and connect behavior. -- shouldn't we instead make the code compatible to Lua 5.2 - without any compat stuff, and use a compatibility layer to - make it work on 5.1? -- add what's new to manual -- should there be an equivalent to tohostname for IPv6? -- should we add service name resolution as well to getaddrinfo? -- Maybe the sockaddr to presentation conversion should be done with getnameinfo()? - -- add http POST sample to manual - people keep asking stupid questions -- documentation of dirty/getfd/setfd is problematic because of portability - same for unix and serial. - what to do about this? add a stronger disclaimer? -- fix makefile with decent defaults? - -Done: - -- added IPv6 support to getsockname -- simplified getpeername implementation -- added family to return of getsockname and getpeername - and added modification to the manual to describe - -- connect and bind try all adresses returned by getaddrinfo -- document headers.lua? -- update copyright date everywhere? -- remove RCSID from files? -- move version to 2.1 rather than 2.1.1? -- fixed url package to support ipv6 hosts -- changed domain to family -- implement getfamily methods. - -- remove references to Lua 5.0 from documentation, add 5.2? -- update lua and luasocket version in samples in documentation -- document ipv5_v6only default option being set? -- document tcp6 and udp6 -- document dns.getaddrinfo -- documented zero-sized datagram change? - no. -- document unix socket and serial socket? add raw support? - no. -- document getoption -- merge luaL_typeerror into auxiliar to avoid using luaL prefix? - - - - - - - - - - -replace \r\n with \0xD\0xA in everything -New mime support - -ftp send should return server replies? -make sure there are no object files in the distribution tarball -http handling of 100-continue, see DB patch -DB ftp.lua bug. -test unix.c to return just a function and works with require"unix" -get rid of setmetatable(, nil) since packages don't need this anymore in 5.1 -compat-5.1 novo -ajeitar pra lua-5.1 - -adicionar exemplos de expans�o: pipe, local, named pipe -testar os options! - - -- Thread-unsafe functions to protect - gethostbyname(), gethostbyaddr(), gethostent(), -inet_ntoa(), strerror(), - diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..d265694 --- /dev/null +++ b/TODO.md @@ -0,0 +1,135 @@ +## FIX + +http was preserving old host header during redirects +fix smtp.send hang on source error +add create field to FTP and SMTP and fix HTTP ugliness +clean timeout argument to open functions in SMTP, HTTP and FTP +eliminate globals from namespaces created by module(). +url.absolute was not working when base_url was already parsed +http.request was redirecting even when the location header was empty +tcp{client}:shutdown() was checking for group instead of class. +tcp{client}:send() now returns i+sent-1... +get rid of a = socket.try() in the manual, except for protected cases. replace it with assert. +get rid of "base." kludge in package.loaded +check all "require("http")" etc in the manual. +make sure sock_gethostname.* only return success if the hp is not null! +change 'l' prefix in C libraries to 'c' to avoid clash with LHF libraries + don't forget the declarations in luasocket.h and mime.h!!! +setpeername was using udp{unconnected} +fixed a bug in http.lua that caused some requests to fail (Florian Berger) +fixed a bug in select.c that prevented sockets with descriptor 0 from working (Renato Maia) +fixed a "bug" that caused dns.toip to crash under uLinux +fixed a "bug" that caused a crash in gethostbyname under VMS +DEBUG and VERSION became _DEBUG and _VERSION +send returns the right value if input is "". Alexander Marinov + + +## WISH + +... as an l-value to get all results of a function call? +at least ...[i] and #... +extend to full tuples? + +__and __or __not metamethods + +lua_tostring, lua_tonumber, lua_touseradta etc push values in stack +__tostring,__tonumber, __touserdata metamethods are checked +and expected to push an object of correct type on stack + +lua_rawtostring, lua_rawtonumber, lua_rawtouserdata don't +push anything on stack, return data of appropriate type, +skip metamethods and throw error if object not of exact type + +package.findfile exported +module not polluting the global namespace + +coxpcall with a coroutine pool for efficiency (reusing coroutines) + +exception mechanism formalized? just like the package system was. + +a nice bitlib in the core + + +## TODO + +- bizarre default values for getnameinfo should throw error instead! + +> It's just too bad it can't talk to gmail - +> reason 1: they absolutely want TLS +> reason 2: unlike all the other SMTP implementations, they +> don't +> tolerate missing < > around adresses + +- document the new bind and connect behavior. +- shouldn't we instead make the code compatible to Lua 5.2 + without any compat stuff, and use a compatibility layer to + make it work on 5.1? +- add what's new to manual +- should there be an equivalent to tohostname for IPv6? +- should we add service name resolution as well to getaddrinfo? +- Maybe the sockaddr to presentation conversion should be done with getnameinfo()? + +- add http POST sample to manual + people keep asking stupid questions +- documentation of dirty/getfd/setfd is problematic because of portability + same for unix and serial. + what to do about this? add a stronger disclaimer? +- fix makefile with decent defaults? + +## Done: + +- added IPv6 support to getsockname +- simplified getpeername implementation +- added family to return of getsockname and getpeername + and added modification to the manual to describe + +- connect and bind try all adresses returned by getaddrinfo +- document headers.lua? +- update copyright date everywhere? +- remove RCSID from files? +- move version to 2.1 rather than 2.1.1? +- fixed url package to support ipv6 hosts +- changed domain to family +- implement getfamily methods. + +- remove references to Lua 5.0 from documentation, add 5.2? +- update lua and luasocket version in samples in documentation +- document ipv5_v6only default option being set? +- document tcp6 and udp6 +- document dns.getaddrinfo +- documented zero-sized datagram change? + no. +- document unix socket and serial socket? add raw support? + no. +- document getoption +- merge luaL_typeerror into auxiliar to avoid using luaL prefix? + + + + + + + + + + +replace \r\n with \0xD\0xA in everything +New mime support + +ftp send should return server replies? +make sure there are no object files in the distribution tarball +http handling of 100-continue, see DB patch +DB ftp.lua bug. +test unix.c to return just a function and works with require"unix" +get rid of setmetatable(, nil) since packages don't need this anymore in 5.1 +compat-5.1 novo +ajeitar pra lua-5.1 + +adicionar exemplos de expans�o: pipe, local, named pipe +testar os options! + + +- Thread-unsafe functions to protect + gethostbyname(), gethostbyaddr(), gethostent(), +inet_ntoa(), strerror(), + diff --git a/WISH b/WISH deleted file mode 100644 index e7e9c07..0000000 --- a/WISH +++ /dev/null @@ -1,22 +0,0 @@ -... as an l-value to get all results of a function call? -at least ...[i] and #... -extend to full tuples? - -__and __or __not metamethods - -lua_tostring, lua_tonumber, lua_touseradta etc push values in stack -__tostring,__tonumber, __touserdata metamethods are checked -and expected to push an object of correct type on stack - -lua_rawtostring, lua_rawtonumber, lua_rawtouserdata don't -push anything on stack, return data of appropriate type, -skip metamethods and throw error if object not of exact type - -package.findfile exported -module not polluting the global namespace - -coxpcall with a coroutine pool for efficiency (reusing coroutines) - -exception mechanism formalized? just like the package system was. - -a nice bitlib in the core -- cgit v1.2.3-55-g6feb From db2f1c9598c63a721fad4b8ae0e0121eccc86248 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Tue, 29 Mar 2022 14:09:10 +0200 Subject: chore(ltn) update file contents from wiki to markdown --- ltn012.wiki | 205 ++++++++++++++++++++++++++++++------------------------------ ltn013.wiki | 127 ++++++++++++++++++------------------- 2 files changed, 163 insertions(+), 169 deletions(-) diff --git a/ltn012.wiki b/ltn012.wiki index 96b13ae..fa26b4a 100644 --- a/ltn012.wiki +++ b/ltn012.wiki @@ -1,51 +1,48 @@ -===Filters, sources and sinks: design, motivation and examples=== -==or Functional programming for the rest of us== +# Filters, sources and sinks: design, motivation and examples +### or Functional programming for the rest of us by DiegoNehab -{{{ +## Abstract -}}} +Certain operations can be implemented in the form of filters. A filter is a function that processes data received in consecutive function calls, returning partial results chunk by chunk. Examples of operations that can be implemented as filters include the end-of-line normalization for text, Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and there are many others. Filters become even more powerful when we allow them to be chained together to create composite filters. Filters can be seen as middle nodes in a chain of data transformations. Sources an sinks are the corresponding end points of these chains. A source is a function that produces data, chunk by chunk, and a sink is a function that takes data, chunk by chunk. In this technical note, we define an elegant interface for filters, sources, sinks and chaining. We evolve our interface progressively, until we reach a high degree of generality. We discuss difficulties that arise during the implementation of this interface and we provide solutions and examples. -===Abstract=== -Certain operations can be implemented in the form of filters. A filter is a function that processes data received in consecutive function calls, returning partial results chunk by chunk. Examples of operations that can be implemented as filters include the end-of-line normalization for text, Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and there are many others. Filters become even more powerful when we allow them to be chained together to create composite filters. Filters can be seen as middle nodes in a chain of data transformations. Sources an sinks are the corresponding end points of these chains. A source is a function that produces data, chunk by chunk, and a sink is a function that takes data, chunk by chunk. In this technical note, we define an elegant interface for filters, sources, sinks and chaining. We evolve our interface progressively, until we reach a high degree of generality. We discuss difficulties that arise during the implementation of this interface and we provide solutions and examples. +## Introduction -===Introduction=== +Applications sometimes have too much information to process to fit in memory and are thus forced to process data in smaller parts. Even when there is enough memory, processing all the data atomically may take long enough to frustrate a user that wants to interact with the application. Furthermore, complex transformations can often be defined as series of simpler operations. Several different complex transformations might share the same simpler operations, so that an uniform interface to combine them is desirable. The following concepts constitute our solution to these problems. -Applications sometimes have too much information to process to fit in memory and are thus forced to process data in smaller parts. Even when there is enough memory, processing all the data atomically may take long enough to frustrate a user that wants to interact with the application. Furthermore, complex transformations can often be defined as series of simpler operations. Several different complex transformations might share the same simpler operations, so that an uniform interface to combine them is desirable. The following concepts constitute our solution to these problems. +"Filters" are functions that accept successive chunks of input, and produce successive chunks of output. Furthermore, the result of concatenating all the output data is the same as the result of applying the filter over the concatenation of the input data. As a consequence, boundaries are irrelevant: filters have to handle input data split arbitrarily by the user. -''Filters'' are functions that accept successive chunks of input, and produce successive chunks of output. Furthermore, the result of concatenating all the output data is the same as the result of applying the filter over the concatenation of the input data. As a consequence, boundaries are irrelevant: filters have to handle input data split arbitrarily by the user. +A "chain" is a function that combines the effect of two (or more) other functions, but whose interface is indistinguishable from the interface of one of its components. Thus, a chained filter can be used wherever an atomic filter can be used. However, its effect on data is the combined effect of its component filters. Note that, as a consequence, chains can be chained themselves to create arbitrarily complex operations that can be used just like atomic operations. -A ''chain'' is a function that combines the effect of two (or more) other functions, but whose interface is indistinguishable from the interface of one of its components. Thus, a chained filter can be used wherever an atomic filter can be used. However, its effect on data is the combined effect of its component filters. Note that, as a consequence, chains can be chained themselves to create arbitrarily complex operations that can be used just like atomic operations. +Filters can be seen as internal nodes in a network through which data flows, potentially being transformed along its way. Chains connect these nodes together. To complete the picture, we need "sources" and "sinks" as initial and final nodes of the network, respectively. Less abstractly, a source is a function that produces new data every time it is called. On the other hand, sinks are functions that give a final destination to the data they receive. Naturally, sources and sinks can be chained with filters. -Filters can be seen as internal nodes in a network through which data flows, potentially being transformed along its way. Chains connect these nodes together. To complete the picture, we need ''sources'' and ''sinks'' as initial and final nodes of the network, respectively. Less abstractly, a source is a function that produces new data every time it is called. On the other hand, sinks are functions that give a final destination to the data they receive. Naturally, sources and sinks can be chained with filters. +Finally, filters, chains, sources, and sinks are all passive entities: they need to be repeatedly called in order for something to happen. "Pumps" provide the driving force that pushes data through the network, from a source to a sink. -Finally, filters, chains, sources, and sinks are all passive entities: they need to be repeatedly called in order for something to happen. ''Pumps'' provide the driving force that pushes data through the network, from a source to a sink. + Hopefully, these concepts will become clear with examples. In the following sections, we start with simplified interfaces, which we improve several times until we can find no obvious shortcomings. The evolution we present is not contrived: it follows the steps we followed ourselves as we consolidated our understanding of these concepts. - Hopefully, these concepts will become clear with examples. In the following sections, we start with simplified interfaces, which we improve several times until we can find no obvious shortcomings. The evolution we present is not contrived: it follows the steps we followed ourselves as we consolidated our understanding of these concepts. +### A concrete example -== A concrete example == - -Some data transformations are easier to implement as filters than others. Examples of operations that can be implemented as filters include the end-of-line normalization for text, the Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and many others. Let's use the end-of-line normalization as an example to define our initial filter interface. We later discuss why the implementation might not be trivial. +Some data transformations are easier to implement as filters than others. Examples of operations that can be implemented as filters include the end-of-line normalization for text, the Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and many others. Let's use the end-of-line normalization as an example to define our initial filter interface. We later discuss why the implementation might not be trivial. Assume we are given text in an unknown end-of-line convention (including possibly mixed conventions) out of the commonly found Unix (LF), Mac OS (CR), and DOS (CRLF) conventions. We would like to be able to write code like the following: - {{{ +```lua input = source.chain(source.file(io.stdin), normalize("\r\n")) output = sink.file(io.stdout) pump(input, output) -}}} +``` -This program should read data from the standard input stream and normalize the end-of-line markers to the canonic CRLF marker defined by the MIME standard, finally sending the results to the standard output stream. For that, we use a ''file source'' to produce data from standard input, and chain it with a filter that normalizes the data. The pump then repeatedly gets data from the source, and moves it to the ''file sink'' that sends it to standard output. +This program should read data from the standard input stream and normalize the end-of-line markers to the canonic CRLF marker defined by the MIME standard, finally sending the results to the standard output stream. For that, we use a "file source" to produce data from standard input, and chain it with a filter that normalizes the data. The pump then repeatedly gets data from the source, and moves it to the "file sink" that sends it to standard output. -To make the discussion even more concrete, we start by discussing the implementation of the normalization filter. The {{normalize}} ''factory'' is a function that creates such a filter. Our initial filter interface is as follows: the filter receives a chunk of input data, and returns a chunk of processed data. When there is no more input data, the user notifies the filter by invoking it with a {{nil}} chunk. The filter then returns the final chunk of processed data. +To make the discussion even more concrete, we start by discussing the implementation of the normalization filter. The `normalize` "factory" is a function that creates such a filter. Our initial filter interface is as follows: the filter receives a chunk of input data, and returns a chunk of processed data. When there is no more input data, the user notifies the filter by invoking it with a `nil` chunk. The filter then returns the final chunk of processed data. -Although the interface is extremely simple, the implementation doesn't seem so obvious. Any filter respecting this interface needs to keep some kind of context between calls. This is because chunks can be broken between the CR and LF characters marking the end of a line. This need for context storage is what motivates the use of factories: each time the factory is called, it returns a filter with its own context so that we can have several independent filters being used at the same time. For the normalization filter, we know that the obvious solution (i.e. concatenating all the input into the context before producing any output) is not good enough, so we will have to find another way. +Although the interface is extremely simple, the implementation doesn't seem so obvious. Any filter respecting this interface needs to keep some kind of context between calls. This is because chunks can be broken between the CR and LF characters marking the end of a line. This need for context storage is what motivates the use of factories: each time the factory is called, it returns a filter with its own context so that we can have several independent filters being used at the same time. For the normalization filter, we know that the obvious solution (i.e. concatenating all the input into the context before producing any output) is not good enough, so we will have to find another way. We will break the implementation in two parts: a low-level filter, and a factory of high-level filters. The low-level filter will be implemented in C and will not carry any context between function calls. The high-level filter factory, implemented in Lua, will create and return a high-level filter that keeps whatever context the low-level filter needs, but isolates the user from its internal details. That way, we take advantage of C's efficiency to perform the dirty work, and take advantage of Lua's simplicity for the bookkeeping. -==The Lua part of the implementation== +### The Lua part of the implementation Below is the implementation of the factory of high-level end-of-line normalization filters: - {{{ +```lua function filter.cycle(low, ctx, extra) return function(chunk) local ret @@ -57,18 +54,18 @@ end function normalize(marker) return cycle(eol, 0, marker) end -}}} +``` -The {{normalize}} factory simply calls a more generic factory, the {{cycle}} factory. This factory receives a low-level filter, an initial context and some extra value and returns the corresponding high-level filter. Each time the high level filer is called with a new chunk, it calls the low-level filter passing the previous context, the new chunk and the extra argument. The low-level filter produces the chunk of processed data and a new context. Finally, the high-level filter updates its internal context and returns the processed chunk of data to the user. It is the low-level filter that does all the work. Notice that this implementation takes advantage of the Lua 5.0 lexical scoping rules to store the context locally, between function calls. +The `normalize` factory simply calls a more generic factory, the `cycle` factory. This factory receives a low-level filter, an initial context and some extra value and returns the corresponding high-level filter. Each time the high level filer is called with a new chunk, it calls the low-level filter passing the previous context, the new chunk and the extra argument. The low-level filter produces the chunk of processed data and a new context. Finally, the high-level filter updates its internal context and returns the processed chunk of data to the user. It is the low-level filter that does all the work. Notice that this implementation takes advantage of the Lua 5.0 lexical scoping rules to store the context locally, between function calls. Moving to the low-level filter, we notice there is no perfect solution to the end-of-line marker normalization problem itself. The difficulty comes from an inherent ambiguity on the definition of empty lines within mixed input. However, the following solution works well for any consistent input, as well as for non-empty lines in mixed input. It also does a reasonable job with empty lines and serves as a good example of how to implement a low-level filter. -Here is what we do: CR and LF are considered candidates for line break. We issue ''one'' end-of-line line marker if one of the candidates is seen alone, or followed by a ''different'' candidate. That is, CR CR and LF LF issue two end of line markers each, but CR LF and LF CR issue only one marker. This idea takes care of Mac OS, Mac OS X, VMS and Unix, DOS and MIME, as well as probably other more obscure conventions. +Here is what we do: CR and LF are considered candidates for line break. We issue "one" end-of-line line marker if one of the candidates is seen alone, or followed by a "different" candidate. That is, CR CR and LF LF issue two end of line markers each, but CR LF and LF CR issue only one marker. This idea takes care of Mac OS, Mac OS X, VMS and Unix, DOS and MIME, as well as probably other more obscure conventions. -==The C part of the implementation== +### The C part of the implementation The low-level filter is divided into two simple functions. The inner function actually does the conversion. It takes each input character in turn, deciding what to output and how to modify the context. The context tells if the last character seen was a candidate and, if so, which candidate it was. - {{{ +```c #define candidate(c) (c == CR || c == LF) static int process(int c, int last, const char *marker, luaL_Buffer *buffer) { if (candidate(c)) { @@ -84,10 +81,10 @@ static int process(int c, int last, const char *marker, luaL_Buffer *buffer) { return 0; } } -}}} +``` -The inner function makes use of Lua's auxiliary library's buffer interface for its efficiency and ease of use. The outer function simply interfaces with Lua. It receives the context and the input chunk (as well as an optional end-of-line marker), and returns the transformed output and the new context. - {{{ +The inner function makes use of Lua's auxiliary library's buffer interface for its efficiency and ease of use. The outer function simply interfaces with Lua. It receives the context and the input chunk (as well as an optional end-of-line marker), and returns the transformed output and the new context. +```c static int eol(lua_State *L) { int ctx = luaL_checkint(L, 1); size_t isize = 0; @@ -107,16 +104,16 @@ static int eol(lua_State *L) { lua_pushnumber(L, ctx); return 2; } -}}} +``` -Notice that if the input chunk is {{nil}}, the operation is considered to be finished. In that case, the loop will not execute a single time and the context is reset to the initial state. This allows the filter to be reused indefinitely. It is a good idea to write filters like this, when possible. +Notice that if the input chunk is `nil`, the operation is considered to be finished. In that case, the loop will not execute a single time and the context is reset to the initial state. This allows the filter to be reused indefinitely. It is a good idea to write filters like this, when possible. -Besides the end-of-line normalization filter shown above, many other filters can be implemented with the same ideas. Examples include Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing etc. The challenging part is to decide what will be the context. For line breaking, for instance, it could be the number of bytes left in the current line. For Base64 encoding, it could be the bytes that remain in the division of the input into 3-byte atoms. +Besides the end-of-line normalization filter shown above, many other filters can be implemented with the same ideas. Examples include Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing etc. The challenging part is to decide what will be the context. For line breaking, for instance, it could be the number of bytes left in the current line. For Base64 encoding, it could be the bytes that remain in the division of the input into 3-byte atoms. -===Chaining=== +## Chaining -Filters become more powerful when the concept of chaining is introduced. Suppose you have a filter for Quoted-Printable encoding and you want to encode some text. According to the standard, the text has to be normalized into its canonic form prior to encoding. A nice interface that simplifies this task is a factory that creates a composite filter that passes data through multiple filters, but that can be used wherever a primitive filter is used. - {{{ +Filters become more powerful when the concept of chaining is introduced. Suppose you have a filter for Quoted-Printable encoding and you want to encode some text. According to the standard, the text has to be normalized into its canonic form prior to encoding. A nice interface that simplifies this task is a factory that creates a composite filter that passes data through multiple filters, but that can be used wherever a primitive filter is used. +```lua local function chain2(f1, f2) return function(chunk) local ret = f2(f1(chunk)) @@ -140,18 +137,18 @@ while 1 do io.write(chain(chunk)) if not chunk then break end end -}}} +``` -The chaining factory is very simple. All it does is return a function that passes data through all filters and returns the result to the user. It uses the simpler auxiliary function that knows how to chain two filters together. In the auxiliary function, special care must be taken if the chunk is final. This is because the final chunk notification has to be pushed through both filters in turn. Thanks to the chain factory, it is easy to perform the Quoted-Printable conversion, as the above example shows. +The chaining factory is very simple. All it does is return a function that passes data through all filters and returns the result to the user. It uses the simpler auxiliary function that knows how to chain two filters together. In the auxiliary function, special care must be taken if the chunk is final. This is because the final chunk notification has to be pushed through both filters in turn. Thanks to the chain factory, it is easy to perform the Quoted-Printable conversion, as the above example shows. -===Sources, sinks, and pumps=== +## Sources, sinks, and pumps As we noted in the introduction, the filters we introduced so far act as the internal nodes in a network of transformations. Information flows from node to node (or rather from one filter to the next) and is transformed on its way out. Chaining filters together is the way we found to connect nodes in the network. But what about the end nodes? In the beginning of the network, we need a node that provides the data, a source. In the end of the network, we need a node that takes in the data, a sink. -==Sources== +### Sources -We start with two simple sources. The first is the {{empty}} source: It simply returns no data, possibly returning an error message. The second is the {{file}} source, which produces the contents of a file in a chunk by chunk fashion, closing the file handle when done. - {{{ +We start with two simple sources. The first is the `empty` source: It simply returns no data, possibly returning an error message. The second is the `file` source, which produces the contents of a file in a chunk by chunk fashion, closing the file handle when done. +```lua function source.empty(err) return function() return nil, err @@ -159,7 +156,7 @@ function source.empty(err) end function source.file(handle, io_err) - if handle then + if handle then return function() local chunk = handle:read(2048) if not chunk then handle:close() end @@ -167,44 +164,44 @@ function source.file(handle, io_err) end else return source.empty(io_err or "unable to open file") end end -}}} +``` -A source returns the next chunk of data each time it is called. When there is no more data, it just returns {{nil}}. If there is an error, the source can inform the caller by returning {{nil}} followed by an error message. Adrian Sietsma noticed that, although not on purpose, the interface for sources is compatible with the idea of iterators in Lua 5.0. That is, a data source can be nicely used in conjunction with {{for}} loops. Using our file source as an iterator, we can rewrite our first example: - {{{ +A source returns the next chunk of data each time it is called. When there is no more data, it just returns `nil`. If there is an error, the source can inform the caller by returning `nil` followed by an error message. Adrian Sietsma noticed that, although not on purpose, the interface for sources is compatible with the idea of iterators in Lua 5.0. That is, a data source can be nicely used in conjunction with `for` loops. Using our file source as an iterator, we can rewrite our first example: +```lua local process = normalize("\r\n") for chunk in source.file(io.stdin) do io.write(process(chunk)) end io.write(process(nil)) -}}} +``` -Notice that the last call to the filter obtains the last chunk of processed data. The loop terminates when the source returns {{nil}} and therefore we need that final call outside of the loop. +Notice that the last call to the filter obtains the last chunk of processed data. The loop terminates when the source returns `nil` and therefore we need that final call outside of the loop. -==Maintaining state between calls== +### Maintaining state between calls -It is often the case that a source needs to change its behavior after some event. One simple example would be a file source that wants to make sure it returns {{nil}} regardless of how many times it is called after the end of file, avoiding attempts to read past the end of the file. Another example would be a source that returns the contents of several files, as if they were concatenated, moving from one file to the next until the end of the last file is reached. +It is often the case that a source needs to change its behavior after some event. One simple example would be a file source that wants to make sure it returns `nil` regardless of how many times it is called after the end of file, avoiding attempts to read past the end of the file. Another example would be a source that returns the contents of several files, as if they were concatenated, moving from one file to the next until the end of the last file is reached. -One way to implement this kind of source is to have the factory declare extra state variables that the source can use via lexical scoping. Our file source could set the file handle itself to {{nil}} when it detects the end-of-file. Then, every time the source is called, it could check if the handle is still valid and act accordingly: - {{{ +One way to implement this kind of source is to have the factory declare extra state variables that the source can use via lexical scoping. Our file source could set the file handle itself to `nil` when it detects the end-of-file. Then, every time the source is called, it could check if the handle is still valid and act accordingly: +```lua function source.file(handle, io_err) - if handle then + if handle then return function() if not handle then return nil end local chunk = handle:read(2048) - if not chunk then - handle:close() + if not chunk then + handle:close() handle = nil end return chunk end else return source.empty(io_err or "unable to open file") end end -}}} +``` -Another way to implement this behavior involves a change in the source interface to makes it more flexible. Let's allow a source to return a second value, besides the next chunk of data. If the returned chunk is {{nil}}, the extra return value tells us what happened. A second {{nil}} means that there is just no more data and the source is empty. Any other value is considered to be an error message. On the other hand, if the chunk was ''not'' {{nil}}, the second return value tells us whether the source wants to be replaced. If it is {{nil}}, we should proceed using the same source. Otherwise it has to be another source, which we have to use from then on, to get the remaining data. +Another way to implement this behavior involves a change in the source interface to makes it more flexible. Let's allow a source to return a second value, besides the next chunk of data. If the returned chunk is `nil`, the extra return value tells us what happened. A second `nil` means that there is just no more data and the source is empty. Any other value is considered to be an error message. On the other hand, if the chunk was "not" `nil`, the second return value tells us whether the source wants to be replaced. If it is `nil`, we should proceed using the same source. Otherwise it has to be another source, which we have to use from then on, to get the remaining data. -This extra freedom is good for someone writing a source function, but it is a pain for those that have to use it. Fortunately, given one of these ''fancy'' sources, we can transform it into a simple source that never needs to be replaced, using the following factory. - {{{ +This extra freedom is good for someone writing a source function, but it is a pain for those that have to use it. Fortunately, given one of these "fancy" sources, we can transform it into a simple source that never needs to be replaced, using the following factory. +```lua function source.simplify(src) return function() local chunk, err_or_new = src() @@ -213,28 +210,28 @@ function source.simplify(src) else return chunk end end end -}}} +``` The simplification factory allows us to write fancy sources and use them as if they were simple. Therefore, our next functions will only produce simple sources, and functions that take sources will assume they are simple. Going back to our file source, the extended interface allows for a more elegant implementation. The new source just asks to be replaced by an empty source as soon as there is no more data. There is no repeated checking of the handle. To make things simpler to the user, the factory itself simplifies the the fancy file source before returning it to the user: - {{{ +```lua function source.file(handle, io_err) - if handle then + if handle then return source.simplify(function() local chunk = handle:read(2048) - if not chunk then + if not chunk then handle:close() - return "", source.empty() + return "", source.empty() end return chunk end) else return source.empty(io_err or "unable to open file") end end -}}} +``` -We can make these ideas even more powerful if we use a new feature of Lua 5.0: coroutines. Coroutines suffer from a great lack of advertisement, and I am going to play my part here. Just like lexical scoping, coroutines taste odd at first, but once you get used with the concept, it can save your day. I have to admit that using coroutines to implement our file source would be overkill, so let's implement a concatenated source factory instead. - {{{ +We can make these ideas even more powerful if we use a new feature of Lua 5.0: coroutines. Coroutines suffer from a great lack of advertisement, and I am going to play my part here. Just like lexical scoping, coroutines taste odd at first, but once you get used with the concept, it can save your day. I have to admit that using coroutines to implement our file source would be overkill, so let's implement a concatenated source factory instead. +```lua function source.cat(...) local arg = {...} local co = coroutine.create(function() @@ -242,22 +239,22 @@ function source.cat(...) while i <= #arg do local chunk, err = arg[i]() if chunk then coroutine.yield(chunk) - elseif err then return nil, err - else i = i + 1 end + elseif err then return nil, err + else i = i + 1 end end end) return function() return shift(coroutine.resume(co)) end end -}}} +``` -The factory creates two functions. The first is an auxiliary that does all the work, in the form of a coroutine. It reads a chunk from one of the sources. If the chunk is {{nil}}, it moves to the next source, otherwise it just yields returning the chunk. When it is resumed, it continues from where it stopped and tries to read the next chunk. The second function is the source itself, and just resumes the execution of the auxiliary coroutine, returning to the user whatever chunks it returns (skipping the first result that tells us if the coroutine terminated). Imagine writing the same function without coroutines and you will notice the simplicity of this implementation. We will use coroutines again when we make the filter interface more powerful. +The factory creates two functions. The first is an auxiliary that does all the work, in the form of a coroutine. It reads a chunk from one of the sources. If the chunk is `nil`, it moves to the next source, otherwise it just yields returning the chunk. When it is resumed, it continues from where it stopped and tries to read the next chunk. The second function is the source itself, and just resumes the execution of the auxiliary coroutine, returning to the user whatever chunks it returns (skipping the first result that tells us if the coroutine terminated). Imagine writing the same function without coroutines and you will notice the simplicity of this implementation. We will use coroutines again when we make the filter interface more powerful. -==Chaining Sources== +### Chaining Sources What does it mean to chain a source with a filter? The most useful interpretation is that the combined source-filter is a new source that produces data and passes it through the filter before returning it. Here is a factory that does it: - {{{ +```lua function source.chain(src, f) return source.simplify(function() local chunk, err = src() @@ -265,14 +262,14 @@ function source.chain(src, f) else return f(chunk) end end) end -}}} +``` Our motivating example in the introduction chains a source with a filter. The idea of chaining a source with a filter is useful when one thinks about functions that might get their input data from a source. By chaining a simple source with one or more filters, the same function can be provided with filtered data even though it is unaware of the filtering that is happening behind its back. -==Sinks== +### Sinks -Just as we defined an interface for an initial source of data, we can also define an interface for a final destination of data. We call any function respecting that interface a ''sink''. Below are two simple factories that return sinks. The table factory creates a sink that stores all obtained data into a table. The data can later be efficiently concatenated into a single string with the {{table.concat}} library function. As another example, we introduce the {{null}} sink: A sink that simply discards the data it receives. - {{{ +Just as we defined an interface for an initial source of data, we can also define an interface for a final destination of data. We call any function respecting that interface a "sink". Below are two simple factories that return sinks. The table factory creates a sink that stores all obtained data into a table. The data can later be efficiently concatenated into a single string with the `table.concat` library function. As another example, we introduce the `null` sink: A sink that simply discards the data it receives. +```lua function sink.table(t) t = t or {} local f = function(chunk, err) @@ -289,12 +286,12 @@ end function sink.null() return null end -}}} +``` -Sinks receive consecutive chunks of data, until the end of data is notified with a {{nil}} chunk. An error is notified by an extra argument giving an error message after the {{nil}} chunk. If a sink detects an error itself and wishes not to be called again, it should return {{nil}}, optionally followed by an error message. A return value that is not {{nil}} means the source will accept more data. Finally, just as sources can choose to be replaced, so can sinks, following the same interface. Once again, it is easy to implement a {{sink.simplify}} factory that transforms a fancy sink into a simple sink. +Sinks receive consecutive chunks of data, until the end of data is notified with a `nil` chunk. An error is notified by an extra argument giving an error message after the `nil` chunk. If a sink detects an error itself and wishes not to be called again, it should return `nil`, optionally followed by an error message. A return value that is not `nil` means the source will accept more data. Finally, just as sources can choose to be replaced, so can sinks, following the same interface. Once again, it is easy to implement a `sink.simplify` factory that transforms a fancy sink into a simple sink. As an example, let's create a source that reads from the standard input, then chain it with a filter that normalizes the end-of-line convention and let's use a sink to place all data into a table, printing the result in the end. - {{{ +```lua local load = source.chain(source.file(io.stdin), normalize("\r\n")) local store, t = sink.table() while 1 do @@ -303,10 +300,10 @@ while 1 do if not chunk then break end end print(table.concat(t)) -}}} +``` -Again, just as we created a factory that produces a chained source-filter from a source and a filter, it is easy to create a factory that produces a new sink given a sink and a filter. The new sink passes all data it receives through the filter before handing it in to the original sink. Here is the implementation: - {{{ +Again, just as we created a factory that produces a chained source-filter from a source and a filter, it is easy to create a factory that produces a new sink given a sink and a filter. The new sink passes all data it receives through the filter before handing it in to the original sink. Here is the implementation: +```lua function sink.chain(f, snk) return function(chunk, err) local r, e = snk(f(chunk)) @@ -315,12 +312,12 @@ function sink.chain(f, snk) return 1 end end -}}} +``` -==Pumps== +### Pumps -There is a while loop that has been around for too long in our examples. It's always there because everything that we designed so far is passive. Sources, sinks, filters: None of them will do anything on their own. The operation of pumping all data a source can provide into a sink is so common that we will provide a couple helper functions to do that for us. - {{{ +There is a while loop that has been around for too long in our examples. It's always there because everything that we designed so far is passive. Sources, sinks, filters: None of them will do anything on their own. The operation of pumping all data a source can provide into a sink is so common that we will provide a couple helper functions to do that for us. +```lua function pump.step(src, snk) local chunk, src_err = src() local ret, snk_err = snk(chunk, src_err) @@ -334,31 +331,31 @@ function pump.all(src, snk, step) if not ret then return not err, err end end end -}}} +``` -The {{pump.step}} function moves one chunk of data from the source to the sink. The {{pump.all}} function takes an optional {{step}} function and uses it to pump all the data from the source to the sink. We can now use everything we have to write a program that reads a binary file from disk and stores it in another file, after encoding it to the Base64 transfer content encoding: - {{{ +The `pump.step` function moves one chunk of data from the source to the sink. The `pump.all` function takes an optional `step` function and uses it to pump all the data from the source to the sink. We can now use everything we have to write a program that reads a binary file from disk and stores it in another file, after encoding it to the Base64 transfer content encoding: +```lua local load = source.chain( - source.file(io.open("input.bin", "rb")), + source.file(io.open("input.bin", "rb")), encode("base64") ) local store = sink.chain( wrap(76), - sink.file(io.open("output.b64", "w")), + sink.file(io.open("output.b64", "w")), ) pump.all(load, store) -}}} +``` -The way we split the filters here is not intuitive, on purpose. Alternatively, we could have chained the Base64 encode filter and the line-wrap filter together, and then chain the resulting filter with either the file source or the file sink. It doesn't really matter. +The way we split the filters here is not intuitive, on purpose. Alternatively, we could have chained the Base64 encode filter and the line-wrap filter together, and then chain the resulting filter with either the file source or the file sink. It doesn't really matter. -===One last important change=== +## One last important change -Turns out we still have a problem. When David Burgess was writing his gzip filter, he noticed that the decompression filter can explode a small input chunk into a huge amount of data. Although we wished we could ignore this problem, we soon agreed we couldn't. The only solution is to allow filters to return partial results, and that is what we chose to do. After invoking the filter to pass input data, the user now has to loop invoking the filter to find out if it has more output data to return. Note that these extra calls can't pass more data to the filter. +Turns out we still have a problem. When David Burgess was writing his gzip filter, he noticed that the decompression filter can explode a small input chunk into a huge amount of data. Although we wished we could ignore this problem, we soon agreed we couldn't. The only solution is to allow filters to return partial results, and that is what we chose to do. After invoking the filter to pass input data, the user now has to loop invoking the filter to find out if it has more output data to return. Note that these extra calls can't pass more data to the filter. -More specifically, after passing a chunk of input data to a filter and collecting the first chunk of output data, the user invokes the filter repeatedly, passing the empty string, to get extra output chunks. When the filter itself returns an empty string, the user knows there is no more output data, and can proceed to pass the next input chunk. In the end, after the user passes a {{nil}} notifying the filter that there is no more input data, the filter might still have produced too much output data to return in a single chunk. The user has to loop again, this time passing {{nil}} each time, until the filter itself returns {{nil}} to notify the user it is finally done. +More specifically, after passing a chunk of input data to a filter and collecting the first chunk of output data, the user invokes the filter repeatedly, passing the empty string, to get extra output chunks. When the filter itself returns an empty string, the user knows there is no more output data, and can proceed to pass the next input chunk. In the end, after the user passes a `nil` notifying the filter that there is no more input data, the filter might still have produced too much output data to return in a single chunk. The user has to loop again, this time passing `nil` each time, until the filter itself returns `nil` to notify the user it is finally done. Most filters won't need this extra freedom. Fortunately, the new filter interface is easy to implement. In fact, the end-of-line translation filter we created in the introduction already conforms to it. On the other hand, the chaining function becomes much more complicated. If it wasn't for coroutines, I wouldn't be happy to implement it. Let me know if you can find a simpler implementation that does not use coroutines! - {{{ +```lua local function chain2(f1, f2) local co = coroutine.create(function(chunk) while true do @@ -380,14 +377,14 @@ local function chain2(f1, f2) return res end end -}}} +``` Chaining sources also becomes more complicated, but a similar solution is possible with coroutines. Chaining sinks is just as simple as it has always been. Interestingly, these modifications do not have a measurable negative impact in the the performance of filters that didn't need the added flexibility. They do severely improve the efficiency of filters like the gzip filter, though, and that is why we are keeping them. -===Final considerations=== +## Final considerations -These ideas were created during the development of {{LuaSocket}}[http://www.tecgraf.puc-rio.br/luasocket] 2.0, and are available as the LTN12 module. As a result, {{LuaSocket}}[http://www.tecgraf.puc-rio.br/luasocket] implementation was greatly simplified and became much more powerful. The MIME module is especially integrated to LTN12 and provides many other filters. We felt these concepts deserved to be made public even to those that don't care about {{LuaSocket}}[http://www.tecgraf.puc-rio.br/luasocket], hence the LTN. +These ideas were created during the development of [LuaSocket](https://github.com/lunarmodules/luasocket) 2.0, and are available as the LTN12 module. As a result, [LuaSocket](https://github.com/lunarmodules/luasocket) implementation was greatly simplified and became much more powerful. The MIME module is especially integrated to LTN12 and provides many other filters. We felt these concepts deserved to be made public even to those that don't care about [LuaSocket](https://github.com/lunarmodules/luasocket), hence the LTN. -One extra application that deserves mentioning makes use of an identity filter. Suppose you want to provide some feedback to the user while a file is being downloaded into a sink. Chaining the sink with an identity filter (a filter that simply returns the received data unaltered), you can update a progress counter on the fly. The original sink doesn't have to be modified. Another interesting idea is that of a T sink: A sink that sends data to two other sinks. In summary, there appears to be enough room for many other interesting ideas. +One extra application that deserves mentioning makes use of an identity filter. Suppose you want to provide some feedback to the user while a file is being downloaded into a sink. Chaining the sink with an identity filter (a filter that simply returns the received data unaltered), you can update a progress counter on the fly. The original sink doesn't have to be modified. Another interesting idea is that of a T sink: A sink that sends data to two other sinks. In summary, there appears to be enough room for many other interesting ideas. -In this technical note we introduced filters, sources, sinks, and pumps. These are useful tools for data processing in general. Sources provide a simple abstraction for data acquisition. Sinks provide an abstraction for final data destinations. Filters define an interface for data transformations. The chaining of filters, sources and sinks provides an elegant way to create arbitrarily complex data transformation from simpler transformations. Pumps just put the machinery to work. +In this technical note we introduced filters, sources, sinks, and pumps. These are useful tools for data processing in general. Sources provide a simple abstraction for data acquisition. Sinks provide an abstraction for final data destinations. Filters define an interface for data transformations. The chaining of filters, sources and sinks provides an elegant way to create arbitrarily complex data transformation from simpler transformations. Pumps just put the machinery to work. diff --git a/ltn013.wiki b/ltn013.wiki index a622424..9c56805 100644 --- a/ltn013.wiki +++ b/ltn013.wiki @@ -1,50 +1,47 @@ -===Using finalized exceptions=== -==or How to get rid of all those if statements== +# Using finalized exceptions +### or How to get rid of all those if statements by DiegoNehab -{{{ -}}} +## Abstract +This little LTN describes a simple exception scheme that greatly simplifies error checking in Lua programs. All the needed functionality ships standard with Lua, but is hidden between the `assert` and `pcall` functions. To make it more evident, we stick to a convenient standard (you probably already use anyways) for Lua function return values, and define two very simple helper functions (either in C or in Lua itself). -===Abstract=== -This little LTN describes a simple exception scheme that greatly simplifies error checking in Lua programs. All the needed functionality ships standard with Lua, but is hidden between the {{assert}} and {{pcall}} functions. To make it more evident, we stick to a convenient standard (you probably already use anyways) for Lua function return values, and define two very simple helper functions (either in C or in Lua itself). +## Introduction -===Introduction=== +Most Lua functions return `nil` in case of error, followed by a message describing the error. If you don't use this convention, you probably have good reasons. Hopefully, after reading on, you will realize your reasons are not good enough. -Most Lua functions return {{nil}} in case of error, followed by a message describing the error. If you don't use this convention, you probably have good reasons. Hopefully, after reading on, you will realize your reasons are not good enough. +If you are like me, you hate error checking. Most nice little code snippets that look beautiful when you first write them lose some of their charm when you add all that error checking code. Yet, error checking is as important as the rest of the code. How sad. -If you are like me, you hate error checking. Most nice little code snippets that look beautiful when you first write them lose some of their charm when you add all that error checking code. Yet, error checking is as important as the rest of the code. How sad. - -Even if you stick to a return convention, any complex task involving several function calls makes error checking both boring and error-prone (do you see the ''error'' below?) - {{{ +Even if you stick to a return convention, any complex task involving several function calls makes error checking both boring and error-prone (do you see the "error" below?) +```lua function task(arg1, arg2, ...) local ret1, err = task1(arg1) - if not ret1 then + if not ret1 then cleanup1() - return nil, error + return nil, error end local ret2, err = task2(arg2) - if not ret then + if not ret then cleanup2() - return nil, error + return nil, error end ... end -}}} +``` -The standard {{assert}} function provides an interesting alternative. To use it, simply nest every function call to be error checked with a call to {{assert}}. The {{assert}} function checks the value of its first argument. If it is {{nil}}, {{assert}} throws the second argument as an error message. Otherwise, {{assert}} lets all arguments through as if had not been there. The idea greatly simplifies error checking: - {{{ +The standard `assert` function provides an interesting alternative. To use it, simply nest every function call to be error checked with a call to `assert`. The `assert` function checks the value of its first argument. If it is `nil`, `assert` throws the second argument as an error message. Otherwise, `assert` lets all arguments through as if had not been there. The idea greatly simplifies error checking: +```lua function task(arg1, arg2, ...) local ret1 = assert(task1(arg1)) local ret2 = assert(task2(arg2)) ... end -}}} +``` -If any task fails, the execution is aborted by {{assert}} and the error message is displayed to the user as the cause of the problem. If no error happens, the task completes as before. There isn't a single {{if}} statement and this is great. However, there are some problems with the idea. +If any task fails, the execution is aborted by `assert` and the error message is displayed to the user as the cause of the problem. If no error happens, the task completes as before. There isn't a single `if` statement and this is great. However, there are some problems with the idea. -First, the topmost {{task}} function doesn't respect the protocol followed by the lower-level tasks: It raises an error instead of returning {{nil}} followed by the error messages. Here is where the standard {{pcall}} comes in handy. - {{{ +First, the topmost `task` function doesn't respect the protocol followed by the lower-level tasks: It raises an error instead of returning `nil` followed by the error messages. Here is where the standard `pcall` comes in handy. +```lua function xtask(arg1, arg2, ...) local ret1 = assert(task1(arg1)) local ret2 = assert(task2(arg2)) @@ -56,22 +53,22 @@ function task(arg1, arg2, ...) if ok then return ret_or_err else return nil, ret_or_err end end -}}} +``` -Our new {{task}} function is well behaved. {{Pcall}} catches any error raised by the calls to {{assert}} and returns it after the status code. That way, errors don't get propagated to the user of the high level {{task}} function. +Our new `task` function is well behaved. `Pcall` catches any error raised by the calls to `assert` and returns it after the status code. That way, errors don't get propagated to the user of the high level `task` function. -These are the main ideas for our exception scheme, but there are still a few glitches to fix: +These are the main ideas for our exception scheme, but there are still a few glitches to fix: - * Directly using {{pcall}} ruined the simplicity of the code; - * What happened to the cleanup function calls? What if we have to, say, close a file? - * {{Assert}} messes with the error message before raising the error (it adds line number information). +* Directly using `pcall` ruined the simplicity of the code; +* What happened to the cleanup function calls? What if we have to, say, close a file? +* `Assert` messes with the error message before raising the error (it adds line number information). -Fortunately, all these problems are very easy to solve and that's what we do in the following sections. +Fortunately, all these problems are very easy to solve and that's what we do in the following sections. -== Introducing the {{protect}} factory == +## Introducing the `protect` factory -We used the {{pcall}} function to shield the user from errors that could be raised by the underlying implementation. Instead of directly using {{pcall}} (and thus duplicating code) every time we prefer a factory that does the same job: - {{{ +We used the `pcall` function to shield the user from errors that could be raised by the underlying implementation. Instead of directly using `pcall` (and thus duplicating code) every time we prefer a factory that does the same job: +```lua local function pack(ok, ...) return ok, {...} end @@ -83,19 +80,19 @@ function protect(f) else return nil, ret[1] end end end -}}} +``` -The {{protect}} factory receives a function that might raise exceptions and returns a function that respects our return value convention. Now we can rewrite the top-level {{task}} function in a much cleaner way: - {{{ +The `protect` factory receives a function that might raise exceptions and returns a function that respects our return value convention. Now we can rewrite the top-level `task` function in a much cleaner way: +```lua task = protect(function(arg1, arg2, ...) local ret1 = assert(task1(arg1)) local ret2 = assert(task2(arg2)) ... end) -}}} +``` -The Lua implementation of the {{protect}} factory suffers with the creation of tables to hold multiple arguments and return values. It is possible (and easy) to implement the same function in C, without any table creation. - {{{ +The Lua implementation of the `protect` factory suffers with the creation of tables to hold multiple arguments and return values. It is possible (and easy) to implement the same function in C, without any table creation. +```c static int safecall(lua_State *L) { lua_pushvalue(L, lua_upvalueindex(1)); lua_insert(L, 1); @@ -110,17 +107,17 @@ static int protect(lua_State *L) { lua_pushcclosure(L, safecall, 1); return 1; } -}}} +``` -===The {{newtry}} factory=== +## The `newtry` factory Let's solve the two remaining issues with a single shot and use a concrete example to illustrate the proposed solution. Suppose you want to write a function to download an HTTP document. You have to connect, send the request and read the reply. Each of these tasks can fail, but if something goes wrong after you connected, you have to close the connection before returning the error message. - {{{ +```lua get = protect(function(host, path) local c -- create a try function with a finalizer to close the socket - local try = newtry(function() - if c then c:close() end + local try = newtry(function() + if c then c:close() end end) -- connect and send request c = try(connect(host, 80)) @@ -137,34 +134,34 @@ get = protect(function(host, path) c:close() return b, h end) -}}} +``` -The {{newtry}} factory returns a function that works just like {{assert}}. The differences are that the {{try}} function doesn't mess with the error message and it calls an optional ''finalizer'' before raising the error. In our example, the finalizer simply closes the socket. +The `newtry` factory returns a function that works just like `assert`. The differences are that the `try` function doesn't mess with the error message and it calls an optional "finalizer" before raising the error. In our example, the finalizer simply closes the socket. -Even with a simple example like this, we see that the finalized exceptions simplified our life. Let's see what we gain in general, not just in this example: +Even with a simple example like this, we see that the finalized exceptions simplified our life. Let's see what we gain in general, not just in this example: - * We don't need to declare dummy variables to hold error messages in case any ever shows up; - * We avoid using a variable to hold something that could either be a return value or an error message; - * We didn't have to use several ''if'' statements to check for errors; - * If an error happens, we know our finalizer is going to be invoked automatically; - * Exceptions get propagated, so we don't repeat these ''if'' statements until the error reaches the user. +* We don't need to declare dummy variables to hold error messages in case any ever shows up; +* We avoid using a variable to hold something that could either be a return value or an error message; +* We didn't have to use several "if" statements to check for errors; +* If an error happens, we know our finalizer is going to be invoked automatically; +* Exceptions get propagated, so we don't repeat these "if" statements until the error reaches the user. -Try writing the same function without the tricks we used above and you will see that the code gets ugly. Longer sequences of operations with error checking would get even uglier. So let's implement the {{newtry}} function in Lua: - {{{ +Try writing the same function without the tricks we used above and you will see that the code gets ugly. Longer sequences of operations with error checking would get even uglier. So let's implement the `newtry` function in Lua: +```lua function newtry(f) - return function(...) - if not arg[1] then - if f then f() end - error(arg[2], 0) - else + return function(...) + if not arg[1] then + if f then f() end + error(arg[2], 0) + else return ... end end end -}}} +``` Again, the implementation suffers from the creation of tables at each function call, so we prefer the C version: - {{{ +```lua static int finalize(lua_State *L) { if (!lua_toboolean(L, 1)) { lua_pushvalue(L, lua_upvalueindex(1)); @@ -182,13 +179,13 @@ static int do_nothing(lua_State *L) { static int newtry(lua_State *L) { lua_settop(L, 1); - if (lua_isnil(L, 1)) + if (lua_isnil(L, 1)) lua_pushcfunction(L, do_nothing); lua_pushcclosure(L, finalize, 1); return 1; } -}}} +``` -===Final considerations=== +## Final considerations -The {{protect}} and {{newtry}} functions saved a ''lot'' of work in the implementation of {{LuaSocket}}[http://www.tecgraf.puc-rio.br/luasocket]. The size of some modules was cut in half by the these ideas. It's true the scheme is not as generic as the exception mechanism of programming languages like C++ or Java, but the power/simplicity ratio is favorable and I hope it serves you as well as it served {{LuaSocket}}. +The `protect` and `newtry` functions saved a "lot" of work in the implementation of [LuaSocket](https://github.com/lunarmodules/luasocket). The size of some modules was cut in half by the these ideas. It's true the scheme is not as generic as the exception mechanism of programming languages like C++ or Java, but the power/simplicity ratio is favorable and I hope it serves you as well as it served [LuaSocket](https://github.com/lunarmodules/luasocket). -- cgit v1.2.3-55-g6feb From 97d5194f302d3fb9fe27874d9b5f73004a208d01 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer <thijs@thijsschreijer.nl> Date: Tue, 29 Mar 2022 14:10:30 +0200 Subject: chore(ltn) rename files to markdown --- ltn012.md | 390 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ltn012.wiki | 390 ------------------------------------------------------------ ltn013.md | 191 +++++++++++++++++++++++++++++ ltn013.wiki | 191 ----------------------------- 4 files changed, 581 insertions(+), 581 deletions(-) create mode 100644 ltn012.md delete mode 100644 ltn012.wiki create mode 100644 ltn013.md delete mode 100644 ltn013.wiki diff --git a/ltn012.md b/ltn012.md new file mode 100644 index 0000000..fa26b4a --- /dev/null +++ b/ltn012.md @@ -0,0 +1,390 @@ +# Filters, sources and sinks: design, motivation and examples +### or Functional programming for the rest of us +by DiegoNehab + +## Abstract + +Certain operations can be implemented in the form of filters. A filter is a function that processes data received in consecutive function calls, returning partial results chunk by chunk. Examples of operations that can be implemented as filters include the end-of-line normalization for text, Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and there are many others. Filters become even more powerful when we allow them to be chained together to create composite filters. Filters can be seen as middle nodes in a chain of data transformations. Sources an sinks are the corresponding end points of these chains. A source is a function that produces data, chunk by chunk, and a sink is a function that takes data, chunk by chunk. In this technical note, we define an elegant interface for filters, sources, sinks and chaining. We evolve our interface progressively, until we reach a high degree of generality. We discuss difficulties that arise during the implementation of this interface and we provide solutions and examples. + +## Introduction + +Applications sometimes have too much information to process to fit in memory and are thus forced to process data in smaller parts. Even when there is enough memory, processing all the data atomically may take long enough to frustrate a user that wants to interact with the application. Furthermore, complex transformations can often be defined as series of simpler operations. Several different complex transformations might share the same simpler operations, so that an uniform interface to combine them is desirable. The following concepts constitute our solution to these problems. + +"Filters" are functions that accept successive chunks of input, and produce successive chunks of output. Furthermore, the result of concatenating all the output data is the same as the result of applying the filter over the concatenation of the input data. As a consequence, boundaries are irrelevant: filters have to handle input data split arbitrarily by the user. + +A "chain" is a function that combines the effect of two (or more) other functions, but whose interface is indistinguishable from the interface of one of its components. Thus, a chained filter can be used wherever an atomic filter can be used. However, its effect on data is the combined effect of its component filters. Note that, as a consequence, chains can be chained themselves to create arbitrarily complex operations that can be used just like atomic operations. + +Filters can be seen as internal nodes in a network through which data flows, potentially being transformed along its way. Chains connect these nodes together. To complete the picture, we need "sources" and "sinks" as initial and final nodes of the network, respectively. Less abstractly, a source is a function that produces new data every time it is called. On the other hand, sinks are functions that give a final destination to the data they receive. Naturally, sources and sinks can be chained with filters. + +Finally, filters, chains, sources, and sinks are all passive entities: they need to be repeatedly called in order for something to happen. "Pumps" provide the driving force that pushes data through the network, from a source to a sink. + + Hopefully, these concepts will become clear with examples. In the following sections, we start with simplified interfaces, which we improve several times until we can find no obvious shortcomings. The evolution we present is not contrived: it follows the steps we followed ourselves as we consolidated our understanding of these concepts. + +### A concrete example + +Some data transformations are easier to implement as filters than others. Examples of operations that can be implemented as filters include the end-of-line normalization for text, the Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and many others. Let's use the end-of-line normalization as an example to define our initial filter interface. We later discuss why the implementation might not be trivial. + +Assume we are given text in an unknown end-of-line convention (including possibly mixed conventions) out of the commonly found Unix (LF), Mac OS (CR), and DOS (CRLF) conventions. We would like to be able to write code like the following: +```lua +input = source.chain(source.file(io.stdin), normalize("\r\n")) +output = sink.file(io.stdout) +pump(input, output) +``` + +This program should read data from the standard input stream and normalize the end-of-line markers to the canonic CRLF marker defined by the MIME standard, finally sending the results to the standard output stream. For that, we use a "file source" to produce data from standard input, and chain it with a filter that normalizes the data. The pump then repeatedly gets data from the source, and moves it to the "file sink" that sends it to standard output. + +To make the discussion even more concrete, we start by discussing the implementation of the normalization filter. The `normalize` "factory" is a function that creates such a filter. Our initial filter interface is as follows: the filter receives a chunk of input data, and returns a chunk of processed data. When there is no more input data, the user notifies the filter by invoking it with a `nil` chunk. The filter then returns the final chunk of processed data. + +Although the interface is extremely simple, the implementation doesn't seem so obvious. Any filter respecting this interface needs to keep some kind of context between calls. This is because chunks can be broken between the CR and LF characters marking the end of a line. This need for context storage is what motivates the use of factories: each time the factory is called, it returns a filter with its own context so that we can have several independent filters being used at the same time. For the normalization filter, we know that the obvious solution (i.e. concatenating all the input into the context before producing any output) is not good enough, so we will have to find another way. + +We will break the implementation in two parts: a low-level filter, and a factory of high-level filters. The low-level filter will be implemented in C and will not carry any context between function calls. The high-level filter factory, implemented in Lua, will create and return a high-level filter that keeps whatever context the low-level filter needs, but isolates the user from its internal details. That way, we take advantage of C's efficiency to perform the dirty work, and take advantage of Lua's simplicity for the bookkeeping. + +### The Lua part of the implementation + +Below is the implementation of the factory of high-level end-of-line normalization filters: +```lua +function filter.cycle(low, ctx, extra) + return function(chunk) + local ret + ret, ctx = low(ctx, chunk, extra) + return ret + end +end + +function normalize(marker) + return cycle(eol, 0, marker) +end +``` + +The `normalize` factory simply calls a more generic factory, the `cycle` factory. This factory receives a low-level filter, an initial context and some extra value and returns the corresponding high-level filter. Each time the high level filer is called with a new chunk, it calls the low-level filter passing the previous context, the new chunk and the extra argument. The low-level filter produces the chunk of processed data and a new context. Finally, the high-level filter updates its internal context and returns the processed chunk of data to the user. It is the low-level filter that does all the work. Notice that this implementation takes advantage of the Lua 5.0 lexical scoping rules to store the context locally, between function calls. + +Moving to the low-level filter, we notice there is no perfect solution to the end-of-line marker normalization problem itself. The difficulty comes from an inherent ambiguity on the definition of empty lines within mixed input. However, the following solution works well for any consistent input, as well as for non-empty lines in mixed input. It also does a reasonable job with empty lines and serves as a good example of how to implement a low-level filter. + +Here is what we do: CR and LF are considered candidates for line break. We issue "one" end-of-line line marker if one of the candidates is seen alone, or followed by a "different" candidate. That is, CR CR and LF LF issue two end of line markers each, but CR LF and LF CR issue only one marker. This idea takes care of Mac OS, Mac OS X, VMS and Unix, DOS and MIME, as well as probably other more obscure conventions. + +### The C part of the implementation + +The low-level filter is divided into two simple functions. The inner function actually does the conversion. It takes each input character in turn, deciding what to output and how to modify the context. The context tells if the last character seen was a candidate and, if so, which candidate it was. +```c +#define candidate(c) (c == CR || c == LF) +static int process(int c, int last, const char *marker, luaL_Buffer *buffer) { + if (candidate(c)) { + if (candidate(last)) { + if (c == last) luaL_addstring(buffer, marker); + return 0; + } else { + luaL_addstring(buffer, marker); + return c; + } + } else { + luaL_putchar(buffer, c); + return 0; + } +} +``` + +The inner function makes use of Lua's auxiliary library's buffer interface for its efficiency and ease of use. The outer function simply interfaces with Lua. It receives the context and the input chunk (as well as an optional end-of-line marker), and returns the transformed output and the new context. +```c +static int eol(lua_State *L) { + int ctx = luaL_checkint(L, 1); + size_t isize = 0; + const char *input = luaL_optlstring(L, 2, NULL, &isize); + const char *last = input + isize; + const char *marker = luaL_optstring(L, 3, CRLF); + luaL_Buffer buffer; + luaL_buffinit(L, &buffer); + if (!input) { + lua_pushnil(L); + lua_pushnumber(L, 0); + return 2; + } + while (input < last) + ctx = process(*input++, ctx, marker, &buffer); + luaL_pushresult(&buffer); + lua_pushnumber(L, ctx); + return 2; +} +``` + +Notice that if the input chunk is `nil`, the operation is considered to be finished. In that case, the loop will not execute a single time and the context is reset to the initial state. This allows the filter to be reused indefinitely. It is a good idea to write filters like this, when possible. + +Besides the end-of-line normalization filter shown above, many other filters can be implemented with the same ideas. Examples include Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing etc. The challenging part is to decide what will be the context. For line breaking, for instance, it could be the number of bytes left in the current line. For Base64 encoding, it could be the bytes that remain in the division of the input into 3-byte atoms. + +## Chaining + +Filters become more powerful when the concept of chaining is introduced. Suppose you have a filter for Quoted-Printable encoding and you want to encode some text. According to the standard, the text has to be normalized into its canonic form prior to encoding. A nice interface that simplifies this task is a factory that creates a composite filter that passes data through multiple filters, but that can be used wherever a primitive filter is used. +```lua +local function chain2(f1, f2) + return function(chunk) + local ret = f2(f1(chunk)) + if chunk then return ret + else return ret .. f2() end + end +end + +function filter.chain(...) + local arg = {...} + local f = arg[1] + for i = 2, #arg do + f = chain2(f, arg[i]) + end + return f +end + +local chain = filter.chain(normalize("\r\n"), encode("quoted-printable")) +while 1 do + local chunk = io.read(2048) + io.write(chain(chunk)) + if not chunk then break end +end +``` + +The chaining factory is very simple. All it does is return a function that passes data through all filters and returns the result to the user. It uses the simpler auxiliary function that knows how to chain two filters together. In the auxiliary function, special care must be taken if the chunk is final. This is because the final chunk notification has to be pushed through both filters in turn. Thanks to the chain factory, it is easy to perform the Quoted-Printable conversion, as the above example shows. + +## Sources, sinks, and pumps + +As we noted in the introduction, the filters we introduced so far act as the internal nodes in a network of transformations. Information flows from node to node (or rather from one filter to the next) and is transformed on its way out. Chaining filters together is the way we found to connect nodes in the network. But what about the end nodes? In the beginning of the network, we need a node that provides the data, a source. In the end of the network, we need a node that takes in the data, a sink. + +### Sources + +We start with two simple sources. The first is the `empty` source: It simply returns no data, possibly returning an error message. The second is the `file` source, which produces the contents of a file in a chunk by chunk fashion, closing the file handle when done. +```lua +function source.empty(err) + return function() + return nil, err + end +end + +function source.file(handle, io_err) + if handle then + return function() + local chunk = handle:read(2048) + if not chunk then handle:close() end + return chunk + end + else return source.empty(io_err or "unable to open file") end +end +``` + +A source returns the next chunk of data each time it is called. When there is no more data, it just returns `nil`. If there is an error, the source can inform the caller by returning `nil` followed by an error message. Adrian Sietsma noticed that, although not on purpose, the interface for sources is compatible with the idea of iterators in Lua 5.0. That is, a data source can be nicely used in conjunction with `for` loops. Using our file source as an iterator, we can rewrite our first example: +```lua +local process = normalize("\r\n") +for chunk in source.file(io.stdin) do + io.write(process(chunk)) +end +io.write(process(nil)) +``` + +Notice that the last call to the filter obtains the last chunk of processed data. The loop terminates when the source returns `nil` and therefore we need that final call outside of the loop. + +### Maintaining state between calls + +It is often the case that a source needs to change its behavior after some event. One simple example would be a file source that wants to make sure it returns `nil` regardless of how many times it is called after the end of file, avoiding attempts to read past the end of the file. Another example would be a source that returns the contents of several files, as if they were concatenated, moving from one file to the next until the end of the last file is reached. + +One way to implement this kind of source is to have the factory declare extra state variables that the source can use via lexical scoping. Our file source could set the file handle itself to `nil` when it detects the end-of-file. Then, every time the source is called, it could check if the handle is still valid and act accordingly: +```lua +function source.file(handle, io_err) + if handle then + return function() + if not handle then return nil end + local chunk = handle:read(2048) + if not chunk then + handle:close() + handle = nil + end + return chunk + end + else return source.empty(io_err or "unable to open file") end +end +``` + +Another way to implement this behavior involves a change in the source interface to makes it more flexible. Let's allow a source to return a second value, besides the next chunk of data. If the returned chunk is `nil`, the extra return value tells us what happened. A second `nil` means that there is just no more data and the source is empty. Any other value is considered to be an error message. On the other hand, if the chunk was "not" `nil`, the second return value tells us whether the source wants to be replaced. If it is `nil`, we should proceed using the same source. Otherwise it has to be another source, which we have to use from then on, to get the remaining data. + +This extra freedom is good for someone writing a source function, but it is a pain for those that have to use it. Fortunately, given one of these "fancy" sources, we can transform it into a simple source that never needs to be replaced, using the following factory. +```lua +function source.simplify(src) + return function() + local chunk, err_or_new = src() + src = err_or_new or src + if not chunk then return nil, err_or_new + else return chunk end + end +end +``` + +The simplification factory allows us to write fancy sources and use them as if they were simple. Therefore, our next functions will only produce simple sources, and functions that take sources will assume they are simple. + +Going back to our file source, the extended interface allows for a more elegant implementation. The new source just asks to be replaced by an empty source as soon as there is no more data. There is no repeated checking of the handle. To make things simpler to the user, the factory itself simplifies the the fancy file source before returning it to the user: +```lua +function source.file(handle, io_err) + if handle then + return source.simplify(function() + local chunk = handle:read(2048) + if not chunk then + handle:close() + return "", source.empty() + end + return chunk + end) + else return source.empty(io_err or "unable to open file") end +end +``` + +We can make these ideas even more powerful if we use a new feature of Lua 5.0: coroutines. Coroutines suffer from a great lack of advertisement, and I am going to play my part here. Just like lexical scoping, coroutines taste odd at first, but once you get used with the concept, it can save your day. I have to admit that using coroutines to implement our file source would be overkill, so let's implement a concatenated source factory instead. +```lua +function source.cat(...) + local arg = {...} + local co = coroutine.create(function() + local i = 1 + while i <= #arg do + local chunk, err = arg[i]() + if chunk then coroutine.yield(chunk) + elseif err then return nil, err + else i = i + 1 end + end + end) + return function() + return shift(coroutine.resume(co)) + end +end +``` + +The factory creates two functions. The first is an auxiliary that does all the work, in the form of a coroutine. It reads a chunk from one of the sources. If the chunk is `nil`, it moves to the next source, otherwise it just yields returning the chunk. When it is resumed, it continues from where it stopped and tries to read the next chunk. The second function is the source itself, and just resumes the execution of the auxiliary coroutine, returning to the user whatever chunks it returns (skipping the first result that tells us if the coroutine terminated). Imagine writing the same function without coroutines and you will notice the simplicity of this implementation. We will use coroutines again when we make the filter interface more powerful. + +### Chaining Sources + +What does it mean to chain a source with a filter? The most useful interpretation is that the combined source-filter is a new source that produces data and passes it through the filter before returning it. Here is a factory that does it: +```lua +function source.chain(src, f) + return source.simplify(function() + local chunk, err = src() + if not chunk then return f(nil), source.empty(err) + else return f(chunk) end + end) +end +``` + +Our motivating example in the introduction chains a source with a filter. The idea of chaining a source with a filter is useful when one thinks about functions that might get their input data from a source. By chaining a simple source with one or more filters, the same function can be provided with filtered data even though it is unaware of the filtering that is happening behind its back. + +### Sinks + +Just as we defined an interface for an initial source of data, we can also define an interface for a final destination of data. We call any function respecting that interface a "sink". Below are two simple factories that return sinks. The table factory creates a sink that stores all obtained data into a table. The data can later be efficiently concatenated into a single string with the `table.concat` library function. As another example, we introduce the `null` sink: A sink that simply discards the data it receives. +```lua +function sink.table(t) + t = t or {} + local f = function(chunk, err) + if chunk then table.insert(t, chunk) end + return 1 + end + return f, t +end + +local function null() + return 1 +end + +function sink.null() + return null +end +``` + +Sinks receive consecutive chunks of data, until the end of data is notified with a `nil` chunk. An error is notified by an extra argument giving an error message after the `nil` chunk. If a sink detects an error itself and wishes not to be called again, it should return `nil`, optionally followed by an error message. A return value that is not `nil` means the source will accept more data. Finally, just as sources can choose to be replaced, so can sinks, following the same interface. Once again, it is easy to implement a `sink.simplify` factory that transforms a fancy sink into a simple sink. + +As an example, let's create a source that reads from the standard input, then chain it with a filter that normalizes the end-of-line convention and let's use a sink to place all data into a table, printing the result in the end. +```lua +local load = source.chain(source.file(io.stdin), normalize("\r\n")) +local store, t = sink.table() +while 1 do + local chunk = load() + store(chunk) + if not chunk then break end +end +print(table.concat(t)) +``` + +Again, just as we created a factory that produces a chained source-filter from a source and a filter, it is easy to create a factory that produces a new sink given a sink and a filter. The new sink passes all data it receives through the filter before handing it in to the original sink. Here is the implementation: +```lua +function sink.chain(f, snk) + return function(chunk, err) + local r, e = snk(f(chunk)) + if not r then return nil, e end + if not chunk then return snk(nil, err) end + return 1 + end +end +``` + +### Pumps + +There is a while loop that has been around for too long in our examples. It's always there because everything that we designed so far is passive. Sources, sinks, filters: None of them will do anything on their own. The operation of pumping all data a source can provide into a sink is so common that we will provide a couple helper functions to do that for us. +```lua +function pump.step(src, snk) + local chunk, src_err = src() + local ret, snk_err = snk(chunk, src_err) + return chunk and ret and not src_err and not snk_err, src_err or snk_err +end + +function pump.all(src, snk, step) + step = step or pump.step + while true do + local ret, err = step(src, snk) + if not ret then return not err, err end + end +end +``` + +The `pump.step` function moves one chunk of data from the source to the sink. The `pump.all` function takes an optional `step` function and uses it to pump all the data from the source to the sink. We can now use everything we have to write a program that reads a binary file from disk and stores it in another file, after encoding it to the Base64 transfer content encoding: +```lua +local load = source.chain( + source.file(io.open("input.bin", "rb")), + encode("base64") +) +local store = sink.chain( + wrap(76), + sink.file(io.open("output.b64", "w")), +) +pump.all(load, store) +``` + +The way we split the filters here is not intuitive, on purpose. Alternatively, we could have chained the Base64 encode filter and the line-wrap filter together, and then chain the resulting filter with either the file source or the file sink. It doesn't really matter. + +## One last important change + +Turns out we still have a problem. When David Burgess was writing his gzip filter, he noticed that the decompression filter can explode a small input chunk into a huge amount of data. Although we wished we could ignore this problem, we soon agreed we couldn't. The only solution is to allow filters to return partial results, and that is what we chose to do. After invoking the filter to pass input data, the user now has to loop invoking the filter to find out if it has more output data to return. Note that these extra calls can't pass more data to the filter. + +More specifically, after passing a chunk of input data to a filter and collecting the first chunk of output data, the user invokes the filter repeatedly, passing the empty string, to get extra output chunks. When the filter itself returns an empty string, the user knows there is no more output data, and can proceed to pass the next input chunk. In the end, after the user passes a `nil` notifying the filter that there is no more input data, the filter might still have produced too much output data to return in a single chunk. The user has to loop again, this time passing `nil` each time, until the filter itself returns `nil` to notify the user it is finally done. + +Most filters won't need this extra freedom. Fortunately, the new filter interface is easy to implement. In fact, the end-of-line translation filter we created in the introduction already conforms to it. On the other hand, the chaining function becomes much more complicated. If it wasn't for coroutines, I wouldn't be happy to implement it. Let me know if you can find a simpler implementation that does not use coroutines! +```lua +local function chain2(f1, f2) + local co = coroutine.create(function(chunk) + while true do + local filtered1 = f1(chunk) + local filtered2 = f2(filtered1) + local done2 = filtered1 and "" + while true do + if filtered2 == "" or filtered2 == nil then break end + coroutine.yield(filtered2) + filtered2 = f2(done2) + end + if filtered1 == "" then chunk = coroutine.yield(filtered1) + elseif filtered1 == nil then return nil + else chunk = chunk and "" end + end + end) + return function(chunk) + local _, res = coroutine.resume(co, chunk) + return res + end +end +``` + +Chaining sources also becomes more complicated, but a similar solution is possible with coroutines. Chaining sinks is just as simple as it has always been. Interestingly, these modifications do not have a measurable negative impact in the the performance of filters that didn't need the added flexibility. They do severely improve the efficiency of filters like the gzip filter, though, and that is why we are keeping them. + +## Final considerations + +These ideas were created during the development of [LuaSocket](https://github.com/lunarmodules/luasocket) 2.0, and are available as the LTN12 module. As a result, [LuaSocket](https://github.com/lunarmodules/luasocket) implementation was greatly simplified and became much more powerful. The MIME module is especially integrated to LTN12 and provides many other filters. We felt these concepts deserved to be made public even to those that don't care about [LuaSocket](https://github.com/lunarmodules/luasocket), hence the LTN. + +One extra application that deserves mentioning makes use of an identity filter. Suppose you want to provide some feedback to the user while a file is being downloaded into a sink. Chaining the sink with an identity filter (a filter that simply returns the received data unaltered), you can update a progress counter on the fly. The original sink doesn't have to be modified. Another interesting idea is that of a T sink: A sink that sends data to two other sinks. In summary, there appears to be enough room for many other interesting ideas. + +In this technical note we introduced filters, sources, sinks, and pumps. These are useful tools for data processing in general. Sources provide a simple abstraction for data acquisition. Sinks provide an abstraction for final data destinations. Filters define an interface for data transformations. The chaining of filters, sources and sinks provides an elegant way to create arbitrarily complex data transformation from simpler transformations. Pumps just put the machinery to work. diff --git a/ltn012.wiki b/ltn012.wiki deleted file mode 100644 index fa26b4a..0000000 --- a/ltn012.wiki +++ /dev/null @@ -1,390 +0,0 @@ -# Filters, sources and sinks: design, motivation and examples -### or Functional programming for the rest of us -by DiegoNehab - -## Abstract - -Certain operations can be implemented in the form of filters. A filter is a function that processes data received in consecutive function calls, returning partial results chunk by chunk. Examples of operations that can be implemented as filters include the end-of-line normalization for text, Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and there are many others. Filters become even more powerful when we allow them to be chained together to create composite filters. Filters can be seen as middle nodes in a chain of data transformations. Sources an sinks are the corresponding end points of these chains. A source is a function that produces data, chunk by chunk, and a sink is a function that takes data, chunk by chunk. In this technical note, we define an elegant interface for filters, sources, sinks and chaining. We evolve our interface progressively, until we reach a high degree of generality. We discuss difficulties that arise during the implementation of this interface and we provide solutions and examples. - -## Introduction - -Applications sometimes have too much information to process to fit in memory and are thus forced to process data in smaller parts. Even when there is enough memory, processing all the data atomically may take long enough to frustrate a user that wants to interact with the application. Furthermore, complex transformations can often be defined as series of simpler operations. Several different complex transformations might share the same simpler operations, so that an uniform interface to combine them is desirable. The following concepts constitute our solution to these problems. - -"Filters" are functions that accept successive chunks of input, and produce successive chunks of output. Furthermore, the result of concatenating all the output data is the same as the result of applying the filter over the concatenation of the input data. As a consequence, boundaries are irrelevant: filters have to handle input data split arbitrarily by the user. - -A "chain" is a function that combines the effect of two (or more) other functions, but whose interface is indistinguishable from the interface of one of its components. Thus, a chained filter can be used wherever an atomic filter can be used. However, its effect on data is the combined effect of its component filters. Note that, as a consequence, chains can be chained themselves to create arbitrarily complex operations that can be used just like atomic operations. - -Filters can be seen as internal nodes in a network through which data flows, potentially being transformed along its way. Chains connect these nodes together. To complete the picture, we need "sources" and "sinks" as initial and final nodes of the network, respectively. Less abstractly, a source is a function that produces new data every time it is called. On the other hand, sinks are functions that give a final destination to the data they receive. Naturally, sources and sinks can be chained with filters. - -Finally, filters, chains, sources, and sinks are all passive entities: they need to be repeatedly called in order for something to happen. "Pumps" provide the driving force that pushes data through the network, from a source to a sink. - - Hopefully, these concepts will become clear with examples. In the following sections, we start with simplified interfaces, which we improve several times until we can find no obvious shortcomings. The evolution we present is not contrived: it follows the steps we followed ourselves as we consolidated our understanding of these concepts. - -### A concrete example - -Some data transformations are easier to implement as filters than others. Examples of operations that can be implemented as filters include the end-of-line normalization for text, the Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing, and many others. Let's use the end-of-line normalization as an example to define our initial filter interface. We later discuss why the implementation might not be trivial. - -Assume we are given text in an unknown end-of-line convention (including possibly mixed conventions) out of the commonly found Unix (LF), Mac OS (CR), and DOS (CRLF) conventions. We would like to be able to write code like the following: -```lua -input = source.chain(source.file(io.stdin), normalize("\r\n")) -output = sink.file(io.stdout) -pump(input, output) -``` - -This program should read data from the standard input stream and normalize the end-of-line markers to the canonic CRLF marker defined by the MIME standard, finally sending the results to the standard output stream. For that, we use a "file source" to produce data from standard input, and chain it with a filter that normalizes the data. The pump then repeatedly gets data from the source, and moves it to the "file sink" that sends it to standard output. - -To make the discussion even more concrete, we start by discussing the implementation of the normalization filter. The `normalize` "factory" is a function that creates such a filter. Our initial filter interface is as follows: the filter receives a chunk of input data, and returns a chunk of processed data. When there is no more input data, the user notifies the filter by invoking it with a `nil` chunk. The filter then returns the final chunk of processed data. - -Although the interface is extremely simple, the implementation doesn't seem so obvious. Any filter respecting this interface needs to keep some kind of context between calls. This is because chunks can be broken between the CR and LF characters marking the end of a line. This need for context storage is what motivates the use of factories: each time the factory is called, it returns a filter with its own context so that we can have several independent filters being used at the same time. For the normalization filter, we know that the obvious solution (i.e. concatenating all the input into the context before producing any output) is not good enough, so we will have to find another way. - -We will break the implementation in two parts: a low-level filter, and a factory of high-level filters. The low-level filter will be implemented in C and will not carry any context between function calls. The high-level filter factory, implemented in Lua, will create and return a high-level filter that keeps whatever context the low-level filter needs, but isolates the user from its internal details. That way, we take advantage of C's efficiency to perform the dirty work, and take advantage of Lua's simplicity for the bookkeeping. - -### The Lua part of the implementation - -Below is the implementation of the factory of high-level end-of-line normalization filters: -```lua -function filter.cycle(low, ctx, extra) - return function(chunk) - local ret - ret, ctx = low(ctx, chunk, extra) - return ret - end -end - -function normalize(marker) - return cycle(eol, 0, marker) -end -``` - -The `normalize` factory simply calls a more generic factory, the `cycle` factory. This factory receives a low-level filter, an initial context and some extra value and returns the corresponding high-level filter. Each time the high level filer is called with a new chunk, it calls the low-level filter passing the previous context, the new chunk and the extra argument. The low-level filter produces the chunk of processed data and a new context. Finally, the high-level filter updates its internal context and returns the processed chunk of data to the user. It is the low-level filter that does all the work. Notice that this implementation takes advantage of the Lua 5.0 lexical scoping rules to store the context locally, between function calls. - -Moving to the low-level filter, we notice there is no perfect solution to the end-of-line marker normalization problem itself. The difficulty comes from an inherent ambiguity on the definition of empty lines within mixed input. However, the following solution works well for any consistent input, as well as for non-empty lines in mixed input. It also does a reasonable job with empty lines and serves as a good example of how to implement a low-level filter. - -Here is what we do: CR and LF are considered candidates for line break. We issue "one" end-of-line line marker if one of the candidates is seen alone, or followed by a "different" candidate. That is, CR CR and LF LF issue two end of line markers each, but CR LF and LF CR issue only one marker. This idea takes care of Mac OS, Mac OS X, VMS and Unix, DOS and MIME, as well as probably other more obscure conventions. - -### The C part of the implementation - -The low-level filter is divided into two simple functions. The inner function actually does the conversion. It takes each input character in turn, deciding what to output and how to modify the context. The context tells if the last character seen was a candidate and, if so, which candidate it was. -```c -#define candidate(c) (c == CR || c == LF) -static int process(int c, int last, const char *marker, luaL_Buffer *buffer) { - if (candidate(c)) { - if (candidate(last)) { - if (c == last) luaL_addstring(buffer, marker); - return 0; - } else { - luaL_addstring(buffer, marker); - return c; - } - } else { - luaL_putchar(buffer, c); - return 0; - } -} -``` - -The inner function makes use of Lua's auxiliary library's buffer interface for its efficiency and ease of use. The outer function simply interfaces with Lua. It receives the context and the input chunk (as well as an optional end-of-line marker), and returns the transformed output and the new context. -```c -static int eol(lua_State *L) { - int ctx = luaL_checkint(L, 1); - size_t isize = 0; - const char *input = luaL_optlstring(L, 2, NULL, &isize); - const char *last = input + isize; - const char *marker = luaL_optstring(L, 3, CRLF); - luaL_Buffer buffer; - luaL_buffinit(L, &buffer); - if (!input) { - lua_pushnil(L); - lua_pushnumber(L, 0); - return 2; - } - while (input < last) - ctx = process(*input++, ctx, marker, &buffer); - luaL_pushresult(&buffer); - lua_pushnumber(L, ctx); - return 2; -} -``` - -Notice that if the input chunk is `nil`, the operation is considered to be finished. In that case, the loop will not execute a single time and the context is reset to the initial state. This allows the filter to be reused indefinitely. It is a good idea to write filters like this, when possible. - -Besides the end-of-line normalization filter shown above, many other filters can be implemented with the same ideas. Examples include Base64 and Quoted-Printable transfer content encodings, the breaking of text into lines, SMTP byte stuffing etc. The challenging part is to decide what will be the context. For line breaking, for instance, it could be the number of bytes left in the current line. For Base64 encoding, it could be the bytes that remain in the division of the input into 3-byte atoms. - -## Chaining - -Filters become more powerful when the concept of chaining is introduced. Suppose you have a filter for Quoted-Printable encoding and you want to encode some text. According to the standard, the text has to be normalized into its canonic form prior to encoding. A nice interface that simplifies this task is a factory that creates a composite filter that passes data through multiple filters, but that can be used wherever a primitive filter is used. -```lua -local function chain2(f1, f2) - return function(chunk) - local ret = f2(f1(chunk)) - if chunk then return ret - else return ret .. f2() end - end -end - -function filter.chain(...) - local arg = {...} - local f = arg[1] - for i = 2, #arg do - f = chain2(f, arg[i]) - end - return f -end - -local chain = filter.chain(normalize("\r\n"), encode("quoted-printable")) -while 1 do - local chunk = io.read(2048) - io.write(chain(chunk)) - if not chunk then break end -end -``` - -The chaining factory is very simple. All it does is return a function that passes data through all filters and returns the result to the user. It uses the simpler auxiliary function that knows how to chain two filters together. In the auxiliary function, special care must be taken if the chunk is final. This is because the final chunk notification has to be pushed through both filters in turn. Thanks to the chain factory, it is easy to perform the Quoted-Printable conversion, as the above example shows. - -## Sources, sinks, and pumps - -As we noted in the introduction, the filters we introduced so far act as the internal nodes in a network of transformations. Information flows from node to node (or rather from one filter to the next) and is transformed on its way out. Chaining filters together is the way we found to connect nodes in the network. But what about the end nodes? In the beginning of the network, we need a node that provides the data, a source. In the end of the network, we need a node that takes in the data, a sink. - -### Sources - -We start with two simple sources. The first is the `empty` source: It simply returns no data, possibly returning an error message. The second is the `file` source, which produces the contents of a file in a chunk by chunk fashion, closing the file handle when done. -```lua -function source.empty(err) - return function() - return nil, err - end -end - -function source.file(handle, io_err) - if handle then - return function() - local chunk = handle:read(2048) - if not chunk then handle:close() end - return chunk - end - else return source.empty(io_err or "unable to open file") end -end -``` - -A source returns the next chunk of data each time it is called. When there is no more data, it just returns `nil`. If there is an error, the source can inform the caller by returning `nil` followed by an error message. Adrian Sietsma noticed that, although not on purpose, the interface for sources is compatible with the idea of iterators in Lua 5.0. That is, a data source can be nicely used in conjunction with `for` loops. Using our file source as an iterator, we can rewrite our first example: -```lua -local process = normalize("\r\n") -for chunk in source.file(io.stdin) do - io.write(process(chunk)) -end -io.write(process(nil)) -``` - -Notice that the last call to the filter obtains the last chunk of processed data. The loop terminates when the source returns `nil` and therefore we need that final call outside of the loop. - -### Maintaining state between calls - -It is often the case that a source needs to change its behavior after some event. One simple example would be a file source that wants to make sure it returns `nil` regardless of how many times it is called after the end of file, avoiding attempts to read past the end of the file. Another example would be a source that returns the contents of several files, as if they were concatenated, moving from one file to the next until the end of the last file is reached. - -One way to implement this kind of source is to have the factory declare extra state variables that the source can use via lexical scoping. Our file source could set the file handle itself to `nil` when it detects the end-of-file. Then, every time the source is called, it could check if the handle is still valid and act accordingly: -```lua -function source.file(handle, io_err) - if handle then - return function() - if not handle then return nil end - local chunk = handle:read(2048) - if not chunk then - handle:close() - handle = nil - end - return chunk - end - else return source.empty(io_err or "unable to open file") end -end -``` - -Another way to implement this behavior involves a change in the source interface to makes it more flexible. Let's allow a source to return a second value, besides the next chunk of data. If the returned chunk is `nil`, the extra return value tells us what happened. A second `nil` means that there is just no more data and the source is empty. Any other value is considered to be an error message. On the other hand, if the chunk was "not" `nil`, the second return value tells us whether the source wants to be replaced. If it is `nil`, we should proceed using the same source. Otherwise it has to be another source, which we have to use from then on, to get the remaining data. - -This extra freedom is good for someone writing a source function, but it is a pain for those that have to use it. Fortunately, given one of these "fancy" sources, we can transform it into a simple source that never needs to be replaced, using the following factory. -```lua -function source.simplify(src) - return function() - local chunk, err_or_new = src() - src = err_or_new or src - if not chunk then return nil, err_or_new - else return chunk end - end -end -``` - -The simplification factory allows us to write fancy sources and use them as if they were simple. Therefore, our next functions will only produce simple sources, and functions that take sources will assume they are simple. - -Going back to our file source, the extended interface allows for a more elegant implementation. The new source just asks to be replaced by an empty source as soon as there is no more data. There is no repeated checking of the handle. To make things simpler to the user, the factory itself simplifies the the fancy file source before returning it to the user: -```lua -function source.file(handle, io_err) - if handle then - return source.simplify(function() - local chunk = handle:read(2048) - if not chunk then - handle:close() - return "", source.empty() - end - return chunk - end) - else return source.empty(io_err or "unable to open file") end -end -``` - -We can make these ideas even more powerful if we use a new feature of Lua 5.0: coroutines. Coroutines suffer from a great lack of advertisement, and I am going to play my part here. Just like lexical scoping, coroutines taste odd at first, but once you get used with the concept, it can save your day. I have to admit that using coroutines to implement our file source would be overkill, so let's implement a concatenated source factory instead. -```lua -function source.cat(...) - local arg = {...} - local co = coroutine.create(function() - local i = 1 - while i <= #arg do - local chunk, err = arg[i]() - if chunk then coroutine.yield(chunk) - elseif err then return nil, err - else i = i + 1 end - end - end) - return function() - return shift(coroutine.resume(co)) - end -end -``` - -The factory creates two functions. The first is an auxiliary that does all the work, in the form of a coroutine. It reads a chunk from one of the sources. If the chunk is `nil`, it moves to the next source, otherwise it just yields returning the chunk. When it is resumed, it continues from where it stopped and tries to read the next chunk. The second function is the source itself, and just resumes the execution of the auxiliary coroutine, returning to the user whatever chunks it returns (skipping the first result that tells us if the coroutine terminated). Imagine writing the same function without coroutines and you will notice the simplicity of this implementation. We will use coroutines again when we make the filter interface more powerful. - -### Chaining Sources - -What does it mean to chain a source with a filter? The most useful interpretation is that the combined source-filter is a new source that produces data and passes it through the filter before returning it. Here is a factory that does it: -```lua -function source.chain(src, f) - return source.simplify(function() - local chunk, err = src() - if not chunk then return f(nil), source.empty(err) - else return f(chunk) end - end) -end -``` - -Our motivating example in the introduction chains a source with a filter. The idea of chaining a source with a filter is useful when one thinks about functions that might get their input data from a source. By chaining a simple source with one or more filters, the same function can be provided with filtered data even though it is unaware of the filtering that is happening behind its back. - -### Sinks - -Just as we defined an interface for an initial source of data, we can also define an interface for a final destination of data. We call any function respecting that interface a "sink". Below are two simple factories that return sinks. The table factory creates a sink that stores all obtained data into a table. The data can later be efficiently concatenated into a single string with the `table.concat` library function. As another example, we introduce the `null` sink: A sink that simply discards the data it receives. -```lua -function sink.table(t) - t = t or {} - local f = function(chunk, err) - if chunk then table.insert(t, chunk) end - return 1 - end - return f, t -end - -local function null() - return 1 -end - -function sink.null() - return null -end -``` - -Sinks receive consecutive chunks of data, until the end of data is notified with a `nil` chunk. An error is notified by an extra argument giving an error message after the `nil` chunk. If a sink detects an error itself and wishes not to be called again, it should return `nil`, optionally followed by an error message. A return value that is not `nil` means the source will accept more data. Finally, just as sources can choose to be replaced, so can sinks, following the same interface. Once again, it is easy to implement a `sink.simplify` factory that transforms a fancy sink into a simple sink. - -As an example, let's create a source that reads from the standard input, then chain it with a filter that normalizes the end-of-line convention and let's use a sink to place all data into a table, printing the result in the end. -```lua -local load = source.chain(source.file(io.stdin), normalize("\r\n")) -local store, t = sink.table() -while 1 do - local chunk = load() - store(chunk) - if not chunk then break end -end -print(table.concat(t)) -``` - -Again, just as we created a factory that produces a chained source-filter from a source and a filter, it is easy to create a factory that produces a new sink given a sink and a filter. The new sink passes all data it receives through the filter before handing it in to the original sink. Here is the implementation: -```lua -function sink.chain(f, snk) - return function(chunk, err) - local r, e = snk(f(chunk)) - if not r then return nil, e end - if not chunk then return snk(nil, err) end - return 1 - end -end -``` - -### Pumps - -There is a while loop that has been around for too long in our examples. It's always there because everything that we designed so far is passive. Sources, sinks, filters: None of them will do anything on their own. The operation of pumping all data a source can provide into a sink is so common that we will provide a couple helper functions to do that for us. -```lua -function pump.step(src, snk) - local chunk, src_err = src() - local ret, snk_err = snk(chunk, src_err) - return chunk and ret and not src_err and not snk_err, src_err or snk_err -end - -function pump.all(src, snk, step) - step = step or pump.step - while true do - local ret, err = step(src, snk) - if not ret then return not err, err end - end -end -``` - -The `pump.step` function moves one chunk of data from the source to the sink. The `pump.all` function takes an optional `step` function and uses it to pump all the data from the source to the sink. We can now use everything we have to write a program that reads a binary file from disk and stores it in another file, after encoding it to the Base64 transfer content encoding: -```lua -local load = source.chain( - source.file(io.open("input.bin", "rb")), - encode("base64") -) -local store = sink.chain( - wrap(76), - sink.file(io.open("output.b64", "w")), -) -pump.all(load, store) -``` - -The way we split the filters here is not intuitive, on purpose. Alternatively, we could have chained the Base64 encode filter and the line-wrap filter together, and then chain the resulting filter with either the file source or the file sink. It doesn't really matter. - -## One last important change - -Turns out we still have a problem. When David Burgess was writing his gzip filter, he noticed that the decompression filter can explode a small input chunk into a huge amount of data. Although we wished we could ignore this problem, we soon agreed we couldn't. The only solution is to allow filters to return partial results, and that is what we chose to do. After invoking the filter to pass input data, the user now has to loop invoking the filter to find out if it has more output data to return. Note that these extra calls can't pass more data to the filter. - -More specifically, after passing a chunk of input data to a filter and collecting the first chunk of output data, the user invokes the filter repeatedly, passing the empty string, to get extra output chunks. When the filter itself returns an empty string, the user knows there is no more output data, and can proceed to pass the next input chunk. In the end, after the user passes a `nil` notifying the filter that there is no more input data, the filter might still have produced too much output data to return in a single chunk. The user has to loop again, this time passing `nil` each time, until the filter itself returns `nil` to notify the user it is finally done. - -Most filters won't need this extra freedom. Fortunately, the new filter interface is easy to implement. In fact, the end-of-line translation filter we created in the introduction already conforms to it. On the other hand, the chaining function becomes much more complicated. If it wasn't for coroutines, I wouldn't be happy to implement it. Let me know if you can find a simpler implementation that does not use coroutines! -```lua -local function chain2(f1, f2) - local co = coroutine.create(function(chunk) - while true do - local filtered1 = f1(chunk) - local filtered2 = f2(filtered1) - local done2 = filtered1 and "" - while true do - if filtered2 == "" or filtered2 == nil then break end - coroutine.yield(filtered2) - filtered2 = f2(done2) - end - if filtered1 == "" then chunk = coroutine.yield(filtered1) - elseif filtered1 == nil then return nil - else chunk = chunk and "" end - end - end) - return function(chunk) - local _, res = coroutine.resume(co, chunk) - return res - end -end -``` - -Chaining sources also becomes more complicated, but a similar solution is possible with coroutines. Chaining sinks is just as simple as it has always been. Interestingly, these modifications do not have a measurable negative impact in the the performance of filters that didn't need the added flexibility. They do severely improve the efficiency of filters like the gzip filter, though, and that is why we are keeping them. - -## Final considerations - -These ideas were created during the development of [LuaSocket](https://github.com/lunarmodules/luasocket) 2.0, and are available as the LTN12 module. As a result, [LuaSocket](https://github.com/lunarmodules/luasocket) implementation was greatly simplified and became much more powerful. The MIME module is especially integrated to LTN12 and provides many other filters. We felt these concepts deserved to be made public even to those that don't care about [LuaSocket](https://github.com/lunarmodules/luasocket), hence the LTN. - -One extra application that deserves mentioning makes use of an identity filter. Suppose you want to provide some feedback to the user while a file is being downloaded into a sink. Chaining the sink with an identity filter (a filter that simply returns the received data unaltered), you can update a progress counter on the fly. The original sink doesn't have to be modified. Another interesting idea is that of a T sink: A sink that sends data to two other sinks. In summary, there appears to be enough room for many other interesting ideas. - -In this technical note we introduced filters, sources, sinks, and pumps. These are useful tools for data processing in general. Sources provide a simple abstraction for data acquisition. Sinks provide an abstraction for final data destinations. Filters define an interface for data transformations. The chaining of filters, sources and sinks provides an elegant way to create arbitrarily complex data transformation from simpler transformations. Pumps just put the machinery to work. diff --git a/ltn013.md b/ltn013.md new file mode 100644 index 0000000..9c56805 --- /dev/null +++ b/ltn013.md @@ -0,0 +1,191 @@ +# Using finalized exceptions +### or How to get rid of all those if statements +by DiegoNehab + + +## Abstract +This little LTN describes a simple exception scheme that greatly simplifies error checking in Lua programs. All the needed functionality ships standard with Lua, but is hidden between the `assert` and `pcall` functions. To make it more evident, we stick to a convenient standard (you probably already use anyways) for Lua function return values, and define two very simple helper functions (either in C or in Lua itself). + +## Introduction + +Most Lua functions return `nil` in case of error, followed by a message describing the error. If you don't use this convention, you probably have good reasons. Hopefully, after reading on, you will realize your reasons are not good enough. + +If you are like me, you hate error checking. Most nice little code snippets that look beautiful when you first write them lose some of their charm when you add all that error checking code. Yet, error checking is as important as the rest of the code. How sad. + +Even if you stick to a return convention, any complex task involving several function calls makes error checking both boring and error-prone (do you see the "error" below?) +```lua +function task(arg1, arg2, ...) + local ret1, err = task1(arg1) + if not ret1 then + cleanup1() + return nil, error + end + local ret2, err = task2(arg2) + if not ret then + cleanup2() + return nil, error + end + ... +end +``` + +The standard `assert` function provides an interesting alternative. To use it, simply nest every function call to be error checked with a call to `assert`. The `assert` function checks the value of its first argument. If it is `nil`, `assert` throws the second argument as an error message. Otherwise, `assert` lets all arguments through as if had not been there. The idea greatly simplifies error checking: +```lua +function task(arg1, arg2, ...) + local ret1 = assert(task1(arg1)) + local ret2 = assert(task2(arg2)) + ... +end +``` + +If any task fails, the execution is aborted by `assert` and the error message is displayed to the user as the cause of the problem. If no error happens, the task completes as before. There isn't a single `if` statement and this is great. However, there are some problems with the idea. + +First, the topmost `task` function doesn't respect the protocol followed by the lower-level tasks: It raises an error instead of returning `nil` followed by the error messages. Here is where the standard `pcall` comes in handy. +```lua +function xtask(arg1, arg2, ...) + local ret1 = assert(task1(arg1)) + local ret2 = assert(task2(arg2)) + ... +end + +function task(arg1, arg2, ...) + local ok, ret_or_err = pcall(xtask, arg1, arg2, ...) + if ok then return ret_or_err + else return nil, ret_or_err end +end +``` + +Our new `task` function is well behaved. `Pcall` catches any error raised by the calls to `assert` and returns it after the status code. That way, errors don't get propagated to the user of the high level `task` function. + +These are the main ideas for our exception scheme, but there are still a few glitches to fix: + +* Directly using `pcall` ruined the simplicity of the code; +* What happened to the cleanup function calls? What if we have to, say, close a file? +* `Assert` messes with the error message before raising the error (it adds line number information). + +Fortunately, all these problems are very easy to solve and that's what we do in the following sections. + +## Introducing the `protect` factory + +We used the `pcall` function to shield the user from errors that could be raised by the underlying implementation. Instead of directly using `pcall` (and thus duplicating code) every time we prefer a factory that does the same job: +```lua +local function pack(ok, ...) + return ok, {...} +end + +function protect(f) + return function(...) + local ok, ret = pack(pcall(f, ...)) + if ok then return unpack(ret) + else return nil, ret[1] end + end +end +``` + +The `protect` factory receives a function that might raise exceptions and returns a function that respects our return value convention. Now we can rewrite the top-level `task` function in a much cleaner way: +```lua +task = protect(function(arg1, arg2, ...) + local ret1 = assert(task1(arg1)) + local ret2 = assert(task2(arg2)) + ... +end) +``` + +The Lua implementation of the `protect` factory suffers with the creation of tables to hold multiple arguments and return values. It is possible (and easy) to implement the same function in C, without any table creation. +```c +static int safecall(lua_State *L) { + lua_pushvalue(L, lua_upvalueindex(1)); + lua_insert(L, 1); + if (lua_pcall(L, lua_gettop(L) - 1, LUA_MULTRET, 0) != 0) { + lua_pushnil(L); + lua_insert(L, 1); + return 2; + } else return lua_gettop(L); +} + +static int protect(lua_State *L) { + lua_pushcclosure(L, safecall, 1); + return 1; +} +``` + +## The `newtry` factory + +Let's solve the two remaining issues with a single shot and use a concrete example to illustrate the proposed solution. Suppose you want to write a function to download an HTTP document. You have to connect, send the request and read the reply. Each of these tasks can fail, but if something goes wrong after you connected, you have to close the connection before returning the error message. +```lua +get = protect(function(host, path) + local c + -- create a try function with a finalizer to close the socket + local try = newtry(function() + if c then c:close() end + end) + -- connect and send request + c = try(connect(host, 80)) + try(c:send("GET " .. path .. " HTTP/1.0\r\n\r\n")) + -- get headers + local h = {} + while 1 do + l = try(c:receive()) + if l == "" then break end + table.insert(h, l) + end + -- get body + local b = try(c:receive("*a")) + c:close() + return b, h +end) +``` + +The `newtry` factory returns a function that works just like `assert`. The differences are that the `try` function doesn't mess with the error message and it calls an optional "finalizer" before raising the error. In our example, the finalizer simply closes the socket. + +Even with a simple example like this, we see that the finalized exceptions simplified our life. Let's see what we gain in general, not just in this example: + +* We don't need to declare dummy variables to hold error messages in case any ever shows up; +* We avoid using a variable to hold something that could either be a return value or an error message; +* We didn't have to use several "if" statements to check for errors; +* If an error happens, we know our finalizer is going to be invoked automatically; +* Exceptions get propagated, so we don't repeat these "if" statements until the error reaches the user. + +Try writing the same function without the tricks we used above and you will see that the code gets ugly. Longer sequences of operations with error checking would get even uglier. So let's implement the `newtry` function in Lua: +```lua +function newtry(f) + return function(...) + if not arg[1] then + if f then f() end + error(arg[2], 0) + else + return ... + end + end +end +``` + +Again, the implementation suffers from the creation of tables at each function call, so we prefer the C version: +```lua +static int finalize(lua_State *L) { + if (!lua_toboolean(L, 1)) { + lua_pushvalue(L, lua_upvalueindex(1)); + lua_pcall(L, 0, 0, 0); + lua_settop(L, 2); + lua_error(L); + return 0; + } else return lua_gettop(L); +} + +static int do_nothing(lua_State *L) { + (void) L; + return 0; +} + +static int newtry(lua_State *L) { + lua_settop(L, 1); + if (lua_isnil(L, 1)) + lua_pushcfunction(L, do_nothing); + lua_pushcclosure(L, finalize, 1); + return 1; +} +``` + +## Final considerations + +The `protect` and `newtry` functions saved a "lot" of work in the implementation of [LuaSocket](https://github.com/lunarmodules/luasocket). The size of some modules was cut in half by the these ideas. It's true the scheme is not as generic as the exception mechanism of programming languages like C++ or Java, but the power/simplicity ratio is favorable and I hope it serves you as well as it served [LuaSocket](https://github.com/lunarmodules/luasocket). diff --git a/ltn013.wiki b/ltn013.wiki deleted file mode 100644 index 9c56805..0000000 --- a/ltn013.wiki +++ /dev/null @@ -1,191 +0,0 @@ -# Using finalized exceptions -### or How to get rid of all those if statements -by DiegoNehab - - -## Abstract -This little LTN describes a simple exception scheme that greatly simplifies error checking in Lua programs. All the needed functionality ships standard with Lua, but is hidden between the `assert` and `pcall` functions. To make it more evident, we stick to a convenient standard (you probably already use anyways) for Lua function return values, and define two very simple helper functions (either in C or in Lua itself). - -## Introduction - -Most Lua functions return `nil` in case of error, followed by a message describing the error. If you don't use this convention, you probably have good reasons. Hopefully, after reading on, you will realize your reasons are not good enough. - -If you are like me, you hate error checking. Most nice little code snippets that look beautiful when you first write them lose some of their charm when you add all that error checking code. Yet, error checking is as important as the rest of the code. How sad. - -Even if you stick to a return convention, any complex task involving several function calls makes error checking both boring and error-prone (do you see the "error" below?) -```lua -function task(arg1, arg2, ...) - local ret1, err = task1(arg1) - if not ret1 then - cleanup1() - return nil, error - end - local ret2, err = task2(arg2) - if not ret then - cleanup2() - return nil, error - end - ... -end -``` - -The standard `assert` function provides an interesting alternative. To use it, simply nest every function call to be error checked with a call to `assert`. The `assert` function checks the value of its first argument. If it is `nil`, `assert` throws the second argument as an error message. Otherwise, `assert` lets all arguments through as if had not been there. The idea greatly simplifies error checking: -```lua -function task(arg1, arg2, ...) - local ret1 = assert(task1(arg1)) - local ret2 = assert(task2(arg2)) - ... -end -``` - -If any task fails, the execution is aborted by `assert` and the error message is displayed to the user as the cause of the problem. If no error happens, the task completes as before. There isn't a single `if` statement and this is great. However, there are some problems with the idea. - -First, the topmost `task` function doesn't respect the protocol followed by the lower-level tasks: It raises an error instead of returning `nil` followed by the error messages. Here is where the standard `pcall` comes in handy. -```lua -function xtask(arg1, arg2, ...) - local ret1 = assert(task1(arg1)) - local ret2 = assert(task2(arg2)) - ... -end - -function task(arg1, arg2, ...) - local ok, ret_or_err = pcall(xtask, arg1, arg2, ...) - if ok then return ret_or_err - else return nil, ret_or_err end -end -``` - -Our new `task` function is well behaved. `Pcall` catches any error raised by the calls to `assert` and returns it after the status code. That way, errors don't get propagated to the user of the high level `task` function. - -These are the main ideas for our exception scheme, but there are still a few glitches to fix: - -* Directly using `pcall` ruined the simplicity of the code; -* What happened to the cleanup function calls? What if we have to, say, close a file? -* `Assert` messes with the error message before raising the error (it adds line number information). - -Fortunately, all these problems are very easy to solve and that's what we do in the following sections. - -## Introducing the `protect` factory - -We used the `pcall` function to shield the user from errors that could be raised by the underlying implementation. Instead of directly using `pcall` (and thus duplicating code) every time we prefer a factory that does the same job: -```lua -local function pack(ok, ...) - return ok, {...} -end - -function protect(f) - return function(...) - local ok, ret = pack(pcall(f, ...)) - if ok then return unpack(ret) - else return nil, ret[1] end - end -end -``` - -The `protect` factory receives a function that might raise exceptions and returns a function that respects our return value convention. Now we can rewrite the top-level `task` function in a much cleaner way: -```lua -task = protect(function(arg1, arg2, ...) - local ret1 = assert(task1(arg1)) - local ret2 = assert(task2(arg2)) - ... -end) -``` - -The Lua implementation of the `protect` factory suffers with the creation of tables to hold multiple arguments and return values. It is possible (and easy) to implement the same function in C, without any table creation. -```c -static int safecall(lua_State *L) { - lua_pushvalue(L, lua_upvalueindex(1)); - lua_insert(L, 1); - if (lua_pcall(L, lua_gettop(L) - 1, LUA_MULTRET, 0) != 0) { - lua_pushnil(L); - lua_insert(L, 1); - return 2; - } else return lua_gettop(L); -} - -static int protect(lua_State *L) { - lua_pushcclosure(L, safecall, 1); - return 1; -} -``` - -## The `newtry` factory - -Let's solve the two remaining issues with a single shot and use a concrete example to illustrate the proposed solution. Suppose you want to write a function to download an HTTP document. You have to connect, send the request and read the reply. Each of these tasks can fail, but if something goes wrong after you connected, you have to close the connection before returning the error message. -```lua -get = protect(function(host, path) - local c - -- create a try function with a finalizer to close the socket - local try = newtry(function() - if c then c:close() end - end) - -- connect and send request - c = try(connect(host, 80)) - try(c:send("GET " .. path .. " HTTP/1.0\r\n\r\n")) - -- get headers - local h = {} - while 1 do - l = try(c:receive()) - if l == "" then break end - table.insert(h, l) - end - -- get body - local b = try(c:receive("*a")) - c:close() - return b, h -end) -``` - -The `newtry` factory returns a function that works just like `assert`. The differences are that the `try` function doesn't mess with the error message and it calls an optional "finalizer" before raising the error. In our example, the finalizer simply closes the socket. - -Even with a simple example like this, we see that the finalized exceptions simplified our life. Let's see what we gain in general, not just in this example: - -* We don't need to declare dummy variables to hold error messages in case any ever shows up; -* We avoid using a variable to hold something that could either be a return value or an error message; -* We didn't have to use several "if" statements to check for errors; -* If an error happens, we know our finalizer is going to be invoked automatically; -* Exceptions get propagated, so we don't repeat these "if" statements until the error reaches the user. - -Try writing the same function without the tricks we used above and you will see that the code gets ugly. Longer sequences of operations with error checking would get even uglier. So let's implement the `newtry` function in Lua: -```lua -function newtry(f) - return function(...) - if not arg[1] then - if f then f() end - error(arg[2], 0) - else - return ... - end - end -end -``` - -Again, the implementation suffers from the creation of tables at each function call, so we prefer the C version: -```lua -static int finalize(lua_State *L) { - if (!lua_toboolean(L, 1)) { - lua_pushvalue(L, lua_upvalueindex(1)); - lua_pcall(L, 0, 0, 0); - lua_settop(L, 2); - lua_error(L); - return 0; - } else return lua_gettop(L); -} - -static int do_nothing(lua_State *L) { - (void) L; - return 0; -} - -static int newtry(lua_State *L) { - lua_settop(L, 1); - if (lua_isnil(L, 1)) - lua_pushcfunction(L, do_nothing); - lua_pushcclosure(L, finalize, 1); - return 1; -} -``` - -## Final considerations - -The `protect` and `newtry` functions saved a "lot" of work in the implementation of [LuaSocket](https://github.com/lunarmodules/luasocket). The size of some modules was cut in half by the these ideas. It's true the scheme is not as generic as the exception mechanism of programming languages like C++ or Java, but the power/simplicity ratio is favorable and I hope it serves you as well as it served [LuaSocket](https://github.com/lunarmodules/luasocket). -- cgit v1.2.3-55-g6feb