diff options
author | Diego Nehab <diego@tecgraf.puc-rio.br> | 2005-08-19 01:35:26 +0000 |
---|---|---|
committer | Diego Nehab <diego@tecgraf.puc-rio.br> | 2005-08-19 01:35:26 +0000 |
commit | 5e8ae76248ed31496dc6fef7855498a0479159ed (patch) | |
tree | b72e99154f4901b503dbbe883445bee6c42ef70e | |
parent | 0c3cdd5ef2485a79d6fec9261f2850c41577d5b3 (diff) | |
download | luasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.tar.gz luasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.tar.bz2 luasocket-5e8ae76248ed31496dc6fef7855498a0479159ed.zip |
Dispatcher working for check-links. Need to get it working with forwarder.
-rw-r--r-- | etc/check-links.lua | 122 | ||||
-rw-r--r-- | etc/dispatch.lua | 267 |
2 files changed, 344 insertions, 45 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua index 9d837e4..e06cc91 100644 --- a/etc/check-links.lua +++ b/etc/check-links.lua | |||
@@ -1,49 +1,84 @@ | |||
1 | ----------------------------------------------------------------------------- | 1 | ----------------------------------------------------------------------------- |
2 | -- Little program that checks links in HTML files | 2 | -- Little program that checks links in HTML files, using coroutines and |
3 | -- non-blocking I/O via the dispatcher module. | ||
3 | -- LuaSocket sample files | 4 | -- LuaSocket sample files |
4 | -- Author: Diego Nehab | 5 | -- Author: Diego Nehab |
5 | -- RCS ID: $Id$ | 6 | -- RCS ID: $$ |
6 | ----------------------------------------------------------------------------- | 7 | ----------------------------------------------------------------------------- |
7 | local http = require("socket.http") | 8 | local dispatch, url, http, handler |
8 | local url = require("socket.url") | 9 | |
9 | http.TIMEOUT = 10 | 10 | arg = arg or {} |
11 | if table.getn(arg) < 1 then | ||
12 | print("Usage:\n luasocket check-links.lua [-n] {<url>}") | ||
13 | exit() | ||
14 | end | ||
15 | |||
16 | if arg[1] ~= "-n" then | ||
17 | -- if using blocking I/O, simulate dispatcher interface | ||
18 | url = require("socket.url") | ||
19 | http = require("socket.http") | ||
20 | handler = { | ||
21 | start = function(self, f) | ||
22 | f() | ||
23 | end, | ||
24 | tcp = socket.tcp | ||
25 | } | ||
26 | http.TIMEOUT = 10 | ||
27 | else | ||
28 | -- if non-blocking I/O was requested, disable dispatcher | ||
29 | table.remove(arg, 1) | ||
30 | dispatch = require("dispatch") | ||
31 | dispatch.TIMEOUT = 10 | ||
32 | url = require("socket.url") | ||
33 | http = require("socket.http") | ||
34 | handler = dispatch.newhandler() | ||
35 | end | ||
36 | |||
37 | local nthreads = 0 | ||
38 | |||
39 | -- get the status of a URL using the dispatcher | ||
40 | function getstatus(link) | ||
41 | local parsed = url.parse(link, {scheme = "file"}) | ||
42 | if parsed.scheme == "http" then | ||
43 | nthreads = nthreads + 1 | ||
44 | handler:start(function() | ||
45 | local r, c, h, s = http.request{ | ||
46 | method = "HEAD", | ||
47 | url = link, | ||
48 | create = handler.tcp | ||
49 | } | ||
50 | if r and c == 200 then io.write('\t', link, '\n') | ||
51 | else io.write('\t', link, ': ', tostring(c), '\n') end | ||
52 | nthreads = nthreads - 1 | ||
53 | end) | ||
54 | end | ||
55 | end | ||
10 | 56 | ||
11 | function readfile(path) | 57 | function readfile(path) |
12 | path = url.unescape(path) | 58 | path = url.unescape(path) |
13 | local file, error = io.open(path, "r") | 59 | local file, error = io.open(path, "r") |
14 | if file then | 60 | if file then |
15 | local body = file:read("*a") | 61 | local body = file:read("*a") |
16 | file:close() | 62 | file:close() |
17 | return body | 63 | return body |
18 | else return nil, error end | 64 | else return nil, error end |
19 | end | 65 | end |
20 | 66 | ||
21 | function getstatus(u) | 67 | function load(u) |
22 | local parsed = url.parse(u, {scheme = "file"}) | 68 | local parsed = url.parse(u, { scheme = "file" }) |
23 | if parsed.scheme == "http" then | ||
24 | local r, c, h, s = http.request{url = u, method = "HEAD"} | ||
25 | if c ~= 200 then return s or c end | ||
26 | elseif parsed.scheme == "file" then | ||
27 | local file, error = io.open(url.unescape(parsed.path), "r") | ||
28 | if file then file:close() | ||
29 | else return error end | ||
30 | else return string.format("unhandled scheme '%s'", parsed.scheme) end | ||
31 | end | ||
32 | |||
33 | function retrieve(u) | ||
34 | local parsed = url.parse(u, { scheme = "file" }) | ||
35 | local body, headers, code, error | 69 | local body, headers, code, error |
36 | local base = u | 70 | local base = u |
37 | if parsed.scheme == "http" then | 71 | if parsed.scheme == "http" then |
38 | body, code, headers = http.request(u) | 72 | body, code, headers = http.request(u) |
39 | if code == 200 then | 73 | if code == 200 then |
74 | -- if there was a redirect, update base to reflect it | ||
40 | base = headers.location or base | 75 | base = headers.location or base |
41 | end | 76 | end |
42 | if not body then | 77 | if not body then |
43 | error = code | 78 | error = code |
44 | end | 79 | end |
45 | elseif parsed.scheme == "file" then | 80 | elseif parsed.scheme == "file" then |
46 | body, error = readfile(parsed.path) | 81 | body, error = readfile(parsed.path) |
47 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end | 82 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end |
48 | return base, body, error | 83 | return base, body, error |
49 | end | 84 | end |
@@ -53,35 +88,32 @@ function getlinks(body, base) | |||
53 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") | 88 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") |
54 | local links = {} | 89 | local links = {} |
55 | -- extract links | 90 | -- extract links |
56 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) | 91 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) |
57 | table.insert(links, url.absolute(base, href)) | 92 | table.insert(links, url.absolute(base, href)) |
58 | end) | 93 | end) |
59 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) | 94 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) |
60 | table.insert(links, url.absolute(base, href)) | 95 | table.insert(links, url.absolute(base, href)) |
61 | end) | 96 | end) |
62 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) | 97 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) |
63 | table.insert(links, url.absolute(base, href)) | 98 | table.insert(links, url.absolute(base, href)) |
64 | end) | 99 | end) |
65 | return links | 100 | return links |
66 | end | 101 | end |
67 | 102 | ||
68 | function checklinks(u) | 103 | function checklinks(address) |
69 | local base, body, error = retrieve(u) | 104 | local base, body, error = load(address) |
70 | if not body then print(error) return end | 105 | if not body then print(error) return end |
106 | print("Checking ", base) | ||
71 | local links = getlinks(body, base) | 107 | local links = getlinks(body, base) |
72 | for _, l in ipairs(links) do | 108 | for _, link in ipairs(links) do |
73 | io.stderr:write("\t", l, "\n") | 109 | getstatus(link) |
74 | local err = getstatus(l) | ||
75 | if err then io.stderr:write('\t', l, ": ", err, "\n") end | ||
76 | end | 110 | end |
77 | end | 111 | end |
78 | 112 | ||
79 | arg = arg or {} | 113 | for _, address in ipairs(arg) do |
80 | if table.getn(arg) < 1 then | 114 | checklinks(url.absolute("file:", address)) |
81 | print("Usage:\n luasocket check-links.lua {<url>}") | ||
82 | exit() | ||
83 | end | ||
84 | for _, a in ipairs(arg) do | ||
85 | print("Checking ", a) | ||
86 | checklinks(url.absolute("file:", a)) | ||
87 | end | 115 | end |
116 | |||
117 | while nthreads > 0 do | ||
118 | handler:step() | ||
119 | end | ||
diff --git a/etc/dispatch.lua b/etc/dispatch.lua new file mode 100644 index 0000000..e6c14a6 --- /dev/null +++ b/etc/dispatch.lua | |||
@@ -0,0 +1,267 @@ | |||
1 | ----------------------------------------------------------------------------- | ||
2 | -- A hacked dispatcher module | ||
3 | -- LuaSocket sample files | ||
4 | -- Author: Diego Nehab | ||
5 | -- RCS ID: $$ | ||
6 | ----------------------------------------------------------------------------- | ||
7 | local base = _G | ||
8 | local socket = require("socket") | ||
9 | local coroutine = require("coroutine") | ||
10 | module("dispatch") | ||
11 | |||
12 | -- if too much time goes by without any activity in one of our sockets, we | ||
13 | -- just kill it | ||
14 | TIMEOUT = 10 | ||
15 | |||
16 | ----------------------------------------------------------------------------- | ||
17 | -- Mega hack. Don't try to do this at home. | ||
18 | ----------------------------------------------------------------------------- | ||
19 | -- Lua 5.1 has coroutine.running(). We need it here, so we use this terrible | ||
20 | -- hack to emulate it in Lua itself | ||
21 | -- This is very inefficient, but is very good for debugging. | ||
22 | local running | ||
23 | local resume = coroutine.resume | ||
24 | function coroutine.resume(co, ...) | ||
25 | running = co | ||
26 | return resume(co, unpack(arg)) | ||
27 | end | ||
28 | |||
29 | function coroutine.running() | ||
30 | return running | ||
31 | end | ||
32 | |||
33 | ----------------------------------------------------------------------------- | ||
34 | -- Mega hack. Don't try to do this at home. | ||
35 | ----------------------------------------------------------------------------- | ||
36 | -- we can't yield across calls to protect, so we rewrite it with coxpcall | ||
37 | -- make sure you don't require any module that uses socket.protect before | ||
38 | -- loading our hack | ||
39 | function socket.protect(f) | ||
40 | return f | ||
41 | end | ||
42 | |||
43 | function socket.protect(f) | ||
44 | return function(...) | ||
45 | local co = coroutine.create(f) | ||
46 | while true do | ||
47 | local results = {resume(co, unpack(arg))} | ||
48 | local status = table.remove(results, 1) | ||
49 | if not status then | ||
50 | if type(results[1]) == 'table' then | ||
51 | return nil, results[1][1] | ||
52 | else error(results[1]) end | ||
53 | end | ||
54 | if coroutine.status(co) == "suspended" then | ||
55 | arg = {coroutine.yield(unpack(results))} | ||
56 | else | ||
57 | return unpack(results) | ||
58 | end | ||
59 | end | ||
60 | end | ||
61 | end | ||
62 | |||
63 | ----------------------------------------------------------------------------- | ||
64 | -- socket.tcp() replacement for non-blocking I/O | ||
65 | ----------------------------------------------------------------------------- | ||
66 | local function newtrap(dispatcher) | ||
67 | -- try to create underlying socket | ||
68 | local tcp, error = socket.tcp() | ||
69 | if not tcp then return nil, error end | ||
70 | -- put it in non-blocking mode right away | ||
71 | tcp:settimeout(0) | ||
72 | -- metatable for trap produces new methods on demand for those that we | ||
73 | -- don't override explicitly. | ||
74 | local metat = { __index = function(table, key) | ||
75 | table[key] = function(...) | ||
76 | return tcp[key](tcp, unpack(arg)) | ||
77 | end | ||
78 | end} | ||
79 | -- does user want to do his own non-blocking I/O? | ||
80 | local zero = false | ||
81 | -- create a trap object that will behave just like a real socket object | ||
82 | local trap = { } | ||
83 | -- we ignore settimeout to preserve our 0 timeout, but record whether | ||
84 | -- the user wants to do his own non-blocking I/O | ||
85 | function trap:settimeout(mode, value) | ||
86 | if value == 0 then | ||
87 | zero = true | ||
88 | else | ||
89 | zero = false | ||
90 | end | ||
91 | return 1 | ||
92 | end | ||
93 | -- send in non-blocking mode and yield on timeout | ||
94 | function trap:send(data, first, last) | ||
95 | first = (first or 1) - 1 | ||
96 | local result, error | ||
97 | while true do | ||
98 | -- tell dispatcher we want to keep sending before we yield | ||
99 | dispatcher.sending:insert(tcp) | ||
100 | -- mark time we started waiting | ||
101 | dispatcher.context[tcp].last = socket.gettime() | ||
102 | -- return control to dispatcher | ||
103 | -- if upon return the dispatcher tells us we timed out, | ||
104 | -- return an error to whoever called us | ||
105 | if coroutine.yield() == "timeout" then | ||
106 | return nil, "timeout" | ||
107 | end | ||
108 | -- try sending | ||
109 | result, error, first = tcp:send(data, first+1, last) | ||
110 | -- if we are done, or there was an unexpected error, | ||
111 | -- break away from loop | ||
112 | if error ~= "timeout" then return result, error, first end | ||
113 | end | ||
114 | end | ||
115 | -- receive in non-blocking mode and yield on timeout | ||
116 | -- or simply return partial read, if user requested timeout = 0 | ||
117 | function trap:receive(pattern, partial) | ||
118 | local error = "timeout" | ||
119 | local value | ||
120 | while true do | ||
121 | -- tell dispatcher we want to keep receiving before we yield | ||
122 | dispatcher.receiving:insert(tcp) | ||
123 | -- mark time we started waiting | ||
124 | dispatcher.context[tcp].last = socket.gettime() | ||
125 | -- return control to dispatcher | ||
126 | -- if upon return the dispatcher tells us we timed out, | ||
127 | -- return an error to whoever called us | ||
128 | if coroutine.yield() == "timeout" then | ||
129 | return nil, "timeout" | ||
130 | end | ||
131 | -- try receiving | ||
132 | value, error, partial = tcp:receive(pattern, partial) | ||
133 | -- if we are done, or there was an unexpected error, | ||
134 | -- break away from loop | ||
135 | if (error ~= "timeout") or zero then | ||
136 | return value, error, partial | ||
137 | end | ||
138 | end | ||
139 | end | ||
140 | -- connect in non-blocking mode and yield on timeout | ||
141 | function trap:connect(host, port) | ||
142 | local result, error = tcp:connect(host, port) | ||
143 | -- mark time we started waiting | ||
144 | dispatcher.context[tcp].last = socket.gettime() | ||
145 | if error == "timeout" then | ||
146 | -- tell dispatcher we will be able to write uppon connection | ||
147 | dispatcher.sending:insert(tcp) | ||
148 | -- return control to dispatcher | ||
149 | -- if upon return the dispatcher tells us we have a | ||
150 | -- timeout, just abort | ||
151 | if coroutine.yield() == "timeout" then | ||
152 | return nil, "timeout" | ||
153 | end | ||
154 | -- when we come back, check if connection was successful | ||
155 | result, error = tcp:connect(host, port) | ||
156 | if result or error == "already connected" then return 1 | ||
157 | else return nil, "non-blocking connect failed" end | ||
158 | else return result, error end | ||
159 | end | ||
160 | -- accept in non-blocking mode and yield on timeout | ||
161 | function trap:accept() | ||
162 | local result, error = tcp:accept() | ||
163 | while error == "timeout" do | ||
164 | -- mark time we started waiting | ||
165 | dispatcher.context[tcp].last = socket.gettime() | ||
166 | -- tell dispatcher we will be able to read uppon connection | ||
167 | dispatcher.receiving:insert(tcp) | ||
168 | -- return control to dispatcher | ||
169 | -- if upon return the dispatcher tells us we have a | ||
170 | -- timeout, just abort | ||
171 | if coroutine.yield() == "timeout" then | ||
172 | return nil, "timeout" | ||
173 | end | ||
174 | end | ||
175 | return result, error | ||
176 | end | ||
177 | -- remove thread from context | ||
178 | function trap:close() | ||
179 | dispatcher.context[tcp] = nil | ||
180 | return tcp:close() | ||
181 | end | ||
182 | -- add newly created socket to context | ||
183 | dispatcher.context[tcp] = { | ||
184 | thread = coroutine.running() | ||
185 | } | ||
186 | return setmetatable(trap, metat) | ||
187 | end | ||
188 | |||
189 | ----------------------------------------------------------------------------- | ||
190 | -- Our set data structure | ||
191 | ----------------------------------------------------------------------------- | ||
192 | local function newset() | ||
193 | local reverse = {} | ||
194 | local set = {} | ||
195 | return setmetatable(set, {__index = { | ||
196 | insert = function(set, value) | ||
197 | if not reverse[value] then | ||
198 | table.insert(set, value) | ||
199 | reverse[value] = table.getn(set) | ||
200 | end | ||
201 | end, | ||
202 | remove = function(set, value) | ||
203 | local index = reverse[value] | ||
204 | if index then | ||
205 | reverse[value] = nil | ||
206 | local top = table.remove(set) | ||
207 | if top ~= value then | ||
208 | reverse[top] = index | ||
209 | set[index] = top | ||
210 | end | ||
211 | end | ||
212 | end | ||
213 | }}) | ||
214 | end | ||
215 | |||
216 | ----------------------------------------------------------------------------- | ||
217 | -- Our dispatcher API. | ||
218 | ----------------------------------------------------------------------------- | ||
219 | local metat = { __index = {} } | ||
220 | |||
221 | function metat.__index:start(func) | ||
222 | local co = coroutine.create(func) | ||
223 | assert(coroutine.resume(co)) | ||
224 | end | ||
225 | |||
226 | function newhandler() | ||
227 | local dispatcher = { | ||
228 | context = {}, | ||
229 | sending = newset(), | ||
230 | receiving = newset() | ||
231 | } | ||
232 | function dispatcher.tcp() | ||
233 | return newtrap(dispatcher) | ||
234 | end | ||
235 | return setmetatable(dispatcher, metat) | ||
236 | end | ||
237 | |||
238 | -- step through all active threads | ||
239 | function metat.__index:step() | ||
240 | -- check which sockets are interesting and act on them | ||
241 | local readable, writable = socket.select(self.receiving, | ||
242 | self.sending, 1) | ||
243 | -- for all readable connections, resume their threads | ||
244 | for _, who in ipairs(readable) do | ||
245 | if self.context[who] then | ||
246 | self.receiving:remove(who) | ||
247 | assert(coroutine.resume(self.context[who].thread)) | ||
248 | end | ||
249 | end | ||
250 | -- for all writable connections, do the same | ||
251 | for _, who in ipairs(writable) do | ||
252 | if self.context[who] then | ||
253 | self.sending:remove(who) | ||
254 | assert(coroutine.resume(self.context[who].thread)) | ||
255 | end | ||
256 | end | ||
257 | -- politely ask replacement I/O functions in idle threads to | ||
258 | -- return reporting a timeout | ||
259 | local now = socket.gettime() | ||
260 | for who, data in pairs(self.context) do | ||
261 | if data.last and now - data.last > TIMEOUT then | ||
262 | self.sending:remove(who) | ||
263 | self.receiving:remove(who) | ||
264 | assert(coroutine.resume(self.context[who].thread, "timeout")) | ||
265 | end | ||
266 | end | ||
267 | end | ||