aboutsummaryrefslogtreecommitdiff
path: root/etc
diff options
context:
space:
mode:
authorDiego Nehab <diego@tecgraf.puc-rio.br>2005-06-20 04:51:55 +0000
committerDiego Nehab <diego@tecgraf.puc-rio.br>2005-06-20 04:51:55 +0000
commit37f7af4b9f1250e3c3439df03d43cf291a4d6f37 (patch)
tree33f7d8fbbc2860378059810c5372aadc140f1305 /etc
parentae4ba2aa98af4e6d035850fcbc5985cea94f25fa (diff)
downloadluasocket-37f7af4b9f1250e3c3439df03d43cf291a4d6f37.tar.gz
luasocket-37f7af4b9f1250e3c3439df03d43cf291a4d6f37.tar.bz2
luasocket-37f7af4b9f1250e3c3439df03d43cf291a4d6f37.zip
Added check-links-nb.lua that check links in a non-blocking way.
Diffstat (limited to 'etc')
-rw-r--r--etc/check-links-nb.lua262
1 files changed, 262 insertions, 0 deletions
diff --git a/etc/check-links-nb.lua b/etc/check-links-nb.lua
new file mode 100644
index 0000000..7e8df1b
--- /dev/null
+++ b/etc/check-links-nb.lua
@@ -0,0 +1,262 @@
1-----------------------------------------------------------------------------
2-- Little program that checks links in HTML files, using coroutines and
3-- non-blocking I/O. Thus, faster than simpler version of same program
4-- LuaSocket sample files
5-- Author: Diego Nehab
6-- RCS ID: $$
7-----------------------------------------------------------------------------
8local socket = require("socket")
9
10TIMEOUT = 10
11
12-- we need to yield across calls to protect, so we can't use pcall
13-- we borrow and simplify code from coxpcall to reimplement socket.protect
14-- before loading http
15function socket.protect(f)
16 return function(...)
17 local co = coroutine.create(f)
18 while true do
19 local results = {coroutine.resume(co, unpack(arg))}
20 local status = results[1]
21 table.remove(results, 1)
22 if not status then
23 return nil, results[1][1]
24 end
25 if coroutine.status(co) == "suspended" then
26 arg = {coroutine.yield(unpack(results))}
27 else
28 return unpack(results)
29 end
30 end
31 end
32end
33
34local http = require("socket.http")
35local url = require("socket.url")
36
37-- creates a new set data structure
38function newset()
39 local reverse = {}
40 local set = {}
41 return setmetatable(set, {__index = {
42 insert = function(set, value)
43 if not reverse[value] then
44 table.insert(set, value)
45 reverse[value] = table.getn(set)
46 end
47 end,
48 remove = function(set, value)
49 local index = reverse[value]
50 if index then
51 reverse[value] = nil
52 local top = table.remove(set)
53 if top ~= value then
54 reverse[top] = index
55 set[index] = top
56 end
57 end
58 end
59 }})
60end
61
62local context = {}
63local sending = newset()
64local receiving = newset()
65local nthreads = 0
66
67-- socket.tcp() replacement for non-blocking I/O
68-- implements enough functionality to be used with http.request
69-- in Lua 5.1, we have coroutine.running to simplify things...
70function newcreate(thread)
71 return function()
72 -- try to create underlying socket
73 local tcp, error = socket.tcp()
74 if not tcp then return nil, error end
75 -- put it in non-blocking mode right away
76 tcp:settimeout(0)
77 local trap = {
78 -- we ignore settimeout to preserve our 0 timeout
79 settimeout = function(self, mode, value)
80 return 1
81 end,
82 -- send in non-blocking mode and yield on timeout
83 send = function(self, data, first, last)
84 first = (first or 1) - 1
85 local result, error
86 while true do
87 result, error, first = tcp:send(data, first+1, last)
88 if error == "timeout" then
89 -- tell dispatcher we want to keep sending
90 sending:insert(tcp)
91 -- mark time we started waiting
92 context[tcp].last = socket.gettime()
93 -- return control to dispatcher
94 if coroutine.yield() == "timeout" then
95 return nil, "timeout"
96 end
97 else return result, error, first end
98 end
99 end,
100 -- receive in non-blocking mode and yield on timeout
101 receive = function(self, pattern)
102 local error, partial = "timeout", ""
103 local value
104 while true do
105 value, error, partial = tcp:receive(pattern, partial)
106 if error == "timeout" then
107 -- tell dispatcher we want to keep receiving
108 receiving:insert(tcp)
109 -- mark time we started waiting
110 context[tcp].last = socket.gettime()
111 -- return control to dispatcher
112 if coroutine.yield() == "timeout" then
113 return nil, "timeout"
114 end
115 else return value, error, partial end
116 end
117 end,
118 -- connect in non-blocking mode and yield on timeout
119 connect = function(self, host, port)
120 local result, error = tcp:connect(host, port)
121 if error == "timeout" then
122 -- tell dispatcher we will be able to write uppon connection
123 sending:insert(tcp)
124 -- mark time we started waiting
125 context[tcp].last = socket.gettime()
126 -- return control to dispatcher
127 if coroutine.yield() == "timeout" then
128 return nil, "timeout"
129 end
130 -- when we come back, check if connection was successful
131 result, error = tcp:connect(host, port)
132 if result or error == "already connected" then return 1
133 else return nil, "non-blocking connect failed" end
134 else return result, error end
135 end,
136 close = function(self)
137 context[tcp] = nil
138 return tcp:close()
139 end
140 }
141 -- add newly created socket to context
142 context[tcp] = {
143 thread = thread,
144 trap = trap
145 }
146 return trap
147 end
148end
149
150-- get the status of a URL, non-blocking
151function getstatus(from, link)
152 local parsed = url.parse(link, {scheme = "file"})
153 if parsed.scheme == "http" then
154 local thread = coroutine.create(function(thread, from, link)
155 local r, c, h, s = http.request{
156 method = "HEAD",
157 url = link,
158 create = newcreate(thread)
159 }
160 if c == 200 then io.write('\t', link, '\n')
161 else io.write('\t', link, ': ', c, '\n') end
162 nthreads = nthreads - 1
163 end)
164 nthreads = nthreads + 1
165 assert(coroutine.resume(thread, thread, from, link))
166 end
167end
168
169-- dispatch all threads until we are done
170function dispatch()
171 while nthreads > 0 do
172 -- check which sockets are interesting and act on them
173 local readable, writable = socket.select(receiving, sending, 1)
174 -- for all readable connections, resume their threads
175 for _, who in ipairs(readable) do
176 if context[who] then
177 receiving:remove(who)
178 assert(coroutine.resume(context[who].thread))
179 end
180 end
181 -- for all writable connections, do the same
182 for _, who in ipairs(writable) do
183 if context[who] then
184 sending:remove(who)
185 assert(coroutine.resume(context[who].thread))
186 end
187 end
188 -- politely ask replacement I/O functions in idle threads to
189 -- return reporting a timeout
190 local now = socket.gettime()
191 for who, data in pairs(context) do
192 if data.last and now - data.last > TIMEOUT then
193 assert(coroutine.resume(context[who].thread, "timeout"))
194 end
195 end
196 end
197end
198
199function readfile(path)
200 path = url.unescape(path)
201 local file, error = io.open(path, "r")
202 if file then
203 local body = file:read("*a")
204 file:close()
205 return body
206 else return nil, error end
207end
208
209function retrieve(u)
210 local parsed = url.parse(u, { scheme = "file" })
211 local body, headers, code, error
212 local base = u
213 if parsed.scheme == "http" then
214 body, code, headers = http.request(u)
215 if code == 200 then
216 base = base or headers.location
217 end
218 if not body then
219 error = code
220 end
221 elseif parsed.scheme == "file" then
222 body, error = readfile(parsed.path)
223 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
224 return base, body, error
225end
226
227function getlinks(body, base)
228 -- get rid of comments
229 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
230 local links = {}
231 -- extract links
232 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
233 table.insert(links, url.absolute(base, href))
234 end)
235 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
236 table.insert(links, url.absolute(base, href))
237 end)
238 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
239 table.insert(links, url.absolute(base, href))
240 end)
241 return links
242end
243
244function checklinks(from)
245 local base, body, error = retrieve(from)
246 if not body then print(error) return end
247 local links = getlinks(body, base)
248 for _, link in ipairs(links) do
249 getstatus(from, link)
250 end
251end
252
253arg = arg or {}
254if table.getn(arg) < 1 then
255 print("Usage:\n luasocket check-links.lua {<url>}")
256 exit()
257end
258for _, a in ipairs(arg) do
259 print("Checking ", a)
260 checklinks(url.absolute("file:", a))
261end
262dispatch()