aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiego Nehab <diego@tecgraf.puc-rio.br>2005-08-19 01:35:26 +0000
committerDiego Nehab <diego@tecgraf.puc-rio.br>2005-08-19 01:35:26 +0000
commit5e8ae76248ed31496dc6fef7855498a0479159ed (patch)
treeb72e99154f4901b503dbbe883445bee6c42ef70e
parent0c3cdd5ef2485a79d6fec9261f2850c41577d5b3 (diff)
downloadluasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.tar.gz
luasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.tar.bz2
luasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.zip
Dispatcher working for check-links. Need to get it working with forwarder.
-rw-r--r--etc/check-links.lua122
-rw-r--r--etc/dispatch.lua267
2 files changed, 344 insertions, 45 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua
index 9d837e4..e06cc91 100644
--- a/etc/check-links.lua
+++ b/etc/check-links.lua
@@ -1,49 +1,84 @@
1----------------------------------------------------------------------------- 1-----------------------------------------------------------------------------
2-- Little program that checks links in HTML files 2-- Little program that checks links in HTML files, using coroutines and
3-- non-blocking I/O via the dispatcher module.
3-- LuaSocket sample files 4-- LuaSocket sample files
4-- Author: Diego Nehab 5-- Author: Diego Nehab
5-- RCS ID: $Id$ 6-- RCS ID: $$
6----------------------------------------------------------------------------- 7-----------------------------------------------------------------------------
7local http = require("socket.http") 8local dispatch, url, http, handler
8local url = require("socket.url") 9
9http.TIMEOUT = 10 10arg = arg or {}
11if table.getn(arg) < 1 then
12 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
13 exit()
14end
15
16if arg[1] ~= "-n" then
17 -- if using blocking I/O, simulate dispatcher interface
18 url = require("socket.url")
19 http = require("socket.http")
20 handler = {
21 start = function(self, f)
22 f()
23 end,
24 tcp = socket.tcp
25 }
26 http.TIMEOUT = 10
27else
28 -- if non-blocking I/O was requested, disable dispatcher
29 table.remove(arg, 1)
30 dispatch = require("dispatch")
31 dispatch.TIMEOUT = 10
32 url = require("socket.url")
33 http = require("socket.http")
34 handler = dispatch.newhandler()
35end
36
37local nthreads = 0
38
39-- get the status of a URL using the dispatcher
40function getstatus(link)
41 local parsed = url.parse(link, {scheme = "file"})
42 if parsed.scheme == "http" then
43 nthreads = nthreads + 1
44 handler:start(function()
45 local r, c, h, s = http.request{
46 method = "HEAD",
47 url = link,
48 create = handler.tcp
49 }
50 if r and c == 200 then io.write('\t', link, '\n')
51 else io.write('\t', link, ': ', tostring(c), '\n') end
52 nthreads = nthreads - 1
53 end)
54 end
55end
10 56
11function readfile(path) 57function readfile(path)
12 path = url.unescape(path) 58 path = url.unescape(path)
13 local file, error = io.open(path, "r") 59 local file, error = io.open(path, "r")
14 if file then 60 if file then
15 local body = file:read("*a") 61 local body = file:read("*a")
16 file:close() 62 file:close()
17 return body 63 return body
18 else return nil, error end 64 else return nil, error end
19end 65end
20 66
21function getstatus(u) 67function load(u)
22 local parsed = url.parse(u, {scheme = "file"}) 68 local parsed = url.parse(u, { scheme = "file" })
23 if parsed.scheme == "http" then
24 local r, c, h, s = http.request{url = u, method = "HEAD"}
25 if c ~= 200 then return s or c end
26 elseif parsed.scheme == "file" then
27 local file, error = io.open(url.unescape(parsed.path), "r")
28 if file then file:close()
29 else return error end
30 else return string.format("unhandled scheme '%s'", parsed.scheme) end
31end
32
33function retrieve(u)
34 local parsed = url.parse(u, { scheme = "file" })
35 local body, headers, code, error 69 local body, headers, code, error
36 local base = u 70 local base = u
37 if parsed.scheme == "http" then 71 if parsed.scheme == "http" then
38 body, code, headers = http.request(u) 72 body, code, headers = http.request(u)
39 if code == 200 then 73 if code == 200 then
74 -- if there was a redirect, update base to reflect it
40 base = headers.location or base 75 base = headers.location or base
41 end 76 end
42 if not body then 77 if not body then
43 error = code 78 error = code
44 end 79 end
45 elseif parsed.scheme == "file" then 80 elseif parsed.scheme == "file" then
46 body, error = readfile(parsed.path) 81 body, error = readfile(parsed.path)
47 else error = string.format("unhandled scheme '%s'", parsed.scheme) end 82 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
48 return base, body, error 83 return base, body, error
49end 84end
@@ -53,35 +88,32 @@ function getlinks(body, base)
53 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") 88 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
54 local links = {} 89 local links = {}
55 -- extract links 90 -- extract links
56 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) 91 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
57 table.insert(links, url.absolute(base, href)) 92 table.insert(links, url.absolute(base, href))
58 end) 93 end)
59 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) 94 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
60 table.insert(links, url.absolute(base, href)) 95 table.insert(links, url.absolute(base, href))
61 end) 96 end)
62 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) 97 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
63 table.insert(links, url.absolute(base, href)) 98 table.insert(links, url.absolute(base, href))
64 end) 99 end)
65 return links 100 return links
66end 101end
67 102
68function checklinks(u) 103function checklinks(address)
69 local base, body, error = retrieve(u) 104 local base, body, error = load(address)
70 if not body then print(error) return end 105 if not body then print(error) return end
106 print("Checking ", base)
71 local links = getlinks(body, base) 107 local links = getlinks(body, base)
72 for _, l in ipairs(links) do 108 for _, link in ipairs(links) do
73 io.stderr:write("\t", l, "\n") 109 getstatus(link)
74 local err = getstatus(l)
75 if err then io.stderr:write('\t', l, ": ", err, "\n") end
76 end 110 end
77end 111end
78 112
79arg = arg or {} 113for _, address in ipairs(arg) do
80if table.getn(arg) < 1 then 114 checklinks(url.absolute("file:", address))
81 print("Usage:\n luasocket check-links.lua {<url>}")
82 exit()
83end
84for _, a in ipairs(arg) do
85 print("Checking ", a)
86 checklinks(url.absolute("file:", a))
87end 115end
116
117while nthreads > 0 do
118 handler:step()
119end
diff --git a/etc/dispatch.lua b/etc/dispatch.lua
new file mode 100644
index 0000000..e6c14a6
--- /dev/null
+++ b/etc/dispatch.lua
@@ -0,0 +1,267 @@
1-----------------------------------------------------------------------------
2-- A hacked dispatcher module
3-- LuaSocket sample files
4-- Author: Diego Nehab
5-- RCS ID: $$
6-----------------------------------------------------------------------------
7local base = _G
8local socket = require("socket")
9local coroutine = require("coroutine")
10module("dispatch")
11
12-- if too much time goes by without any activity in one of our sockets, we
13-- just kill it
14TIMEOUT = 10
15
16-----------------------------------------------------------------------------
17-- Mega hack. Don't try to do this at home.
18-----------------------------------------------------------------------------
19-- Lua 5.1 has coroutine.running(). We need it here, so we use this terrible
20-- hack to emulate it in Lua itself
21-- This is very inefficient, but is very good for debugging.
22local running
23local resume = coroutine.resume
24function coroutine.resume(co, ...)
25 running = co
26 return resume(co, unpack(arg))
27end
28
29function coroutine.running()
30 return running
31end
32
33-----------------------------------------------------------------------------
34-- Mega hack. Don't try to do this at home.
35-----------------------------------------------------------------------------
36-- we can't yield across calls to protect, so we rewrite it with coxpcall
37-- make sure you don't require any module that uses socket.protect before
38-- loading our hack
39function socket.protect(f)
40 return f
41end
42
43function socket.protect(f)
44 return function(...)
45 local co = coroutine.create(f)
46 while true do
47 local results = {resume(co, unpack(arg))}
48 local status = table.remove(results, 1)
49 if not status then
50 if type(results[1]) == 'table' then
51 return nil, results[1][1]
52 else error(results[1]) end
53 end
54 if coroutine.status(co) == "suspended" then
55 arg = {coroutine.yield(unpack(results))}
56 else
57 return unpack(results)
58 end
59 end
60 end
61end
62
63-----------------------------------------------------------------------------
64-- socket.tcp() replacement for non-blocking I/O
65-----------------------------------------------------------------------------
66local function newtrap(dispatcher)
67 -- try to create underlying socket
68 local tcp, error = socket.tcp()
69 if not tcp then return nil, error end
70 -- put it in non-blocking mode right away
71 tcp:settimeout(0)
72 -- metatable for trap produces new methods on demand for those that we
73 -- don't override explicitly.
74 local metat = { __index = function(table, key)
75 table[key] = function(...)
76 return tcp[key](tcp, unpack(arg))
77 end
78 end}
79 -- does user want to do his own non-blocking I/O?
80 local zero = false
81 -- create a trap object that will behave just like a real socket object
82 local trap = { }
83 -- we ignore settimeout to preserve our 0 timeout, but record whether
84 -- the user wants to do his own non-blocking I/O
85 function trap:settimeout(mode, value)
86 if value == 0 then
87 zero = true
88 else
89 zero = false
90 end
91 return 1
92 end
93 -- send in non-blocking mode and yield on timeout
94 function trap:send(data, first, last)
95 first = (first or 1) - 1
96 local result, error
97 while true do
98 -- tell dispatcher we want to keep sending before we yield
99 dispatcher.sending:insert(tcp)
100 -- mark time we started waiting
101 dispatcher.context[tcp].last = socket.gettime()
102 -- return control to dispatcher
103 -- if upon return the dispatcher tells us we timed out,
104 -- return an error to whoever called us
105 if coroutine.yield() == "timeout" then
106 return nil, "timeout"
107 end
108 -- try sending
109 result, error, first = tcp:send(data, first+1, last)
110 -- if we are done, or there was an unexpected error,
111 -- break away from loop
112 if error ~= "timeout" then return result, error, first end
113 end
114 end
115 -- receive in non-blocking mode and yield on timeout
116 -- or simply return partial read, if user requested timeout = 0
117 function trap:receive(pattern, partial)
118 local error = "timeout"
119 local value
120 while true do
121 -- tell dispatcher we want to keep receiving before we yield
122 dispatcher.receiving:insert(tcp)
123 -- mark time we started waiting
124 dispatcher.context[tcp].last = socket.gettime()
125 -- return control to dispatcher
126 -- if upon return the dispatcher tells us we timed out,
127 -- return an error to whoever called us
128 if coroutine.yield() == "timeout" then
129 return nil, "timeout"
130 end
131 -- try receiving
132 value, error, partial = tcp:receive(pattern, partial)
133 -- if we are done, or there was an unexpected error,
134 -- break away from loop
135 if (error ~= "timeout") or zero then
136 return value, error, partial
137 end
138 end
139 end
140 -- connect in non-blocking mode and yield on timeout
141 function trap:connect(host, port)
142 local result, error = tcp:connect(host, port)
143 -- mark time we started waiting
144 dispatcher.context[tcp].last = socket.gettime()
145 if error == "timeout" then
146 -- tell dispatcher we will be able to write uppon connection
147 dispatcher.sending:insert(tcp)
148 -- return control to dispatcher
149 -- if upon return the dispatcher tells us we have a
150 -- timeout, just abort
151 if coroutine.yield() == "timeout" then
152 return nil, "timeout"
153 end
154 -- when we come back, check if connection was successful
155 result, error = tcp:connect(host, port)
156 if result or error == "already connected" then return 1
157 else return nil, "non-blocking connect failed" end
158 else return result, error end
159 end
160 -- accept in non-blocking mode and yield on timeout
161 function trap:accept()
162 local result, error = tcp:accept()
163 while error == "timeout" do
164 -- mark time we started waiting
165 dispatcher.context[tcp].last = socket.gettime()
166 -- tell dispatcher we will be able to read uppon connection
167 dispatcher.receiving:insert(tcp)
168 -- return control to dispatcher
169 -- if upon return the dispatcher tells us we have a
170 -- timeout, just abort
171 if coroutine.yield() == "timeout" then
172 return nil, "timeout"
173 end
174 end
175 return result, error
176 end
177 -- remove thread from context
178 function trap:close()
179 dispatcher.context[tcp] = nil
180 return tcp:close()
181 end
182 -- add newly created socket to context
183 dispatcher.context[tcp] = {
184 thread = coroutine.running()
185 }
186 return setmetatable(trap, metat)
187end
188
189-----------------------------------------------------------------------------
190-- Our set data structure
191-----------------------------------------------------------------------------
192local function newset()
193 local reverse = {}
194 local set = {}
195 return setmetatable(set, {__index = {
196 insert = function(set, value)
197 if not reverse[value] then
198 table.insert(set, value)
199 reverse[value] = table.getn(set)
200 end
201 end,
202 remove = function(set, value)
203 local index = reverse[value]
204 if index then
205 reverse[value] = nil
206 local top = table.remove(set)
207 if top ~= value then
208 reverse[top] = index
209 set[index] = top
210 end
211 end
212 end
213 }})
214end
215
216-----------------------------------------------------------------------------
217-- Our dispatcher API.
218-----------------------------------------------------------------------------
219local metat = { __index = {} }
220
221function metat.__index:start(func)
222 local co = coroutine.create(func)
223 assert(coroutine.resume(co))
224end
225
226function newhandler()
227 local dispatcher = {
228 context = {},
229 sending = newset(),
230 receiving = newset()
231 }
232 function dispatcher.tcp()
233 return newtrap(dispatcher)
234 end
235 return setmetatable(dispatcher, metat)
236end
237
238-- step through all active threads
239function metat.__index:step()
240 -- check which sockets are interesting and act on them
241 local readable, writable = socket.select(self.receiving,
242 self.sending, 1)
243 -- for all readable connections, resume their threads
244 for _, who in ipairs(readable) do
245 if self.context[who] then
246 self.receiving:remove(who)
247 assert(coroutine.resume(self.context[who].thread))
248 end
249 end
250 -- for all writable connections, do the same
251 for _, who in ipairs(writable) do
252 if self.context[who] then
253 self.sending:remove(who)
254 assert(coroutine.resume(self.context[who].thread))
255 end
256 end
257 -- politely ask replacement I/O functions in idle threads to
258 -- return reporting a timeout
259 local now = socket.gettime()
260 for who, data in pairs(self.context) do
261 if data.last and now - data.last > TIMEOUT then
262 self.sending:remove(who)
263 self.receiving:remove(who)
264 assert(coroutine.resume(self.context[who].thread, "timeout"))
265 end
266 end
267end