diff options
Diffstat (limited to 'etc')
-rw-r--r-- | etc/check-links-nb.lua | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/etc/check-links-nb.lua b/etc/check-links-nb.lua new file mode 100644 index 0000000..7e8df1b --- /dev/null +++ b/etc/check-links-nb.lua | |||
@@ -0,0 +1,262 @@ | |||
1 | ----------------------------------------------------------------------------- | ||
2 | -- Little program that checks links in HTML files, using coroutines and | ||
3 | -- non-blocking I/O. Thus, faster than simpler version of same program | ||
4 | -- LuaSocket sample files | ||
5 | -- Author: Diego Nehab | ||
6 | -- RCS ID: $$ | ||
7 | ----------------------------------------------------------------------------- | ||
8 | local socket = require("socket") | ||
9 | |||
10 | TIMEOUT = 10 | ||
11 | |||
12 | -- we need to yield across calls to protect, so we can't use pcall | ||
13 | -- we borrow and simplify code from coxpcall to reimplement socket.protect | ||
14 | -- before loading http | ||
15 | function socket.protect(f) | ||
16 | return function(...) | ||
17 | local co = coroutine.create(f) | ||
18 | while true do | ||
19 | local results = {coroutine.resume(co, unpack(arg))} | ||
20 | local status = results[1] | ||
21 | table.remove(results, 1) | ||
22 | if not status then | ||
23 | return nil, results[1][1] | ||
24 | end | ||
25 | if coroutine.status(co) == "suspended" then | ||
26 | arg = {coroutine.yield(unpack(results))} | ||
27 | else | ||
28 | return unpack(results) | ||
29 | end | ||
30 | end | ||
31 | end | ||
32 | end | ||
33 | |||
34 | local http = require("socket.http") | ||
35 | local url = require("socket.url") | ||
36 | |||
37 | -- creates a new set data structure | ||
38 | function newset() | ||
39 | local reverse = {} | ||
40 | local set = {} | ||
41 | return setmetatable(set, {__index = { | ||
42 | insert = function(set, value) | ||
43 | if not reverse[value] then | ||
44 | table.insert(set, value) | ||
45 | reverse[value] = table.getn(set) | ||
46 | end | ||
47 | end, | ||
48 | remove = function(set, value) | ||
49 | local index = reverse[value] | ||
50 | if index then | ||
51 | reverse[value] = nil | ||
52 | local top = table.remove(set) | ||
53 | if top ~= value then | ||
54 | reverse[top] = index | ||
55 | set[index] = top | ||
56 | end | ||
57 | end | ||
58 | end | ||
59 | }}) | ||
60 | end | ||
61 | |||
62 | local context = {} | ||
63 | local sending = newset() | ||
64 | local receiving = newset() | ||
65 | local nthreads = 0 | ||
66 | |||
67 | -- socket.tcp() replacement for non-blocking I/O | ||
68 | -- implements enough functionality to be used with http.request | ||
69 | -- in Lua 5.1, we have coroutine.running to simplify things... | ||
70 | function newcreate(thread) | ||
71 | return function() | ||
72 | -- try to create underlying socket | ||
73 | local tcp, error = socket.tcp() | ||
74 | if not tcp then return nil, error end | ||
75 | -- put it in non-blocking mode right away | ||
76 | tcp:settimeout(0) | ||
77 | local trap = { | ||
78 | -- we ignore settimeout to preserve our 0 timeout | ||
79 | settimeout = function(self, mode, value) | ||
80 | return 1 | ||
81 | end, | ||
82 | -- send in non-blocking mode and yield on timeout | ||
83 | send = function(self, data, first, last) | ||
84 | first = (first or 1) - 1 | ||
85 | local result, error | ||
86 | while true do | ||
87 | result, error, first = tcp:send(data, first+1, last) | ||
88 | if error == "timeout" then | ||
89 | -- tell dispatcher we want to keep sending | ||
90 | sending:insert(tcp) | ||
91 | -- mark time we started waiting | ||
92 | context[tcp].last = socket.gettime() | ||
93 | -- return control to dispatcher | ||
94 | if coroutine.yield() == "timeout" then | ||
95 | return nil, "timeout" | ||
96 | end | ||
97 | else return result, error, first end | ||
98 | end | ||
99 | end, | ||
100 | -- receive in non-blocking mode and yield on timeout | ||
101 | receive = function(self, pattern) | ||
102 | local error, partial = "timeout", "" | ||
103 | local value | ||
104 | while true do | ||
105 | value, error, partial = tcp:receive(pattern, partial) | ||
106 | if error == "timeout" then | ||
107 | -- tell dispatcher we want to keep receiving | ||
108 | receiving:insert(tcp) | ||
109 | -- mark time we started waiting | ||
110 | context[tcp].last = socket.gettime() | ||
111 | -- return control to dispatcher | ||
112 | if coroutine.yield() == "timeout" then | ||
113 | return nil, "timeout" | ||
114 | end | ||
115 | else return value, error, partial end | ||
116 | end | ||
117 | end, | ||
118 | -- connect in non-blocking mode and yield on timeout | ||
119 | connect = function(self, host, port) | ||
120 | local result, error = tcp:connect(host, port) | ||
121 | if error == "timeout" then | ||
122 | -- tell dispatcher we will be able to write uppon connection | ||
123 | sending:insert(tcp) | ||
124 | -- mark time we started waiting | ||
125 | context[tcp].last = socket.gettime() | ||
126 | -- return control to dispatcher | ||
127 | if coroutine.yield() == "timeout" then | ||
128 | return nil, "timeout" | ||
129 | end | ||
130 | -- when we come back, check if connection was successful | ||
131 | result, error = tcp:connect(host, port) | ||
132 | if result or error == "already connected" then return 1 | ||
133 | else return nil, "non-blocking connect failed" end | ||
134 | else return result, error end | ||
135 | end, | ||
136 | close = function(self) | ||
137 | context[tcp] = nil | ||
138 | return tcp:close() | ||
139 | end | ||
140 | } | ||
141 | -- add newly created socket to context | ||
142 | context[tcp] = { | ||
143 | thread = thread, | ||
144 | trap = trap | ||
145 | } | ||
146 | return trap | ||
147 | end | ||
148 | end | ||
149 | |||
150 | -- get the status of a URL, non-blocking | ||
151 | function getstatus(from, link) | ||
152 | local parsed = url.parse(link, {scheme = "file"}) | ||
153 | if parsed.scheme == "http" then | ||
154 | local thread = coroutine.create(function(thread, from, link) | ||
155 | local r, c, h, s = http.request{ | ||
156 | method = "HEAD", | ||
157 | url = link, | ||
158 | create = newcreate(thread) | ||
159 | } | ||
160 | if c == 200 then io.write('\t', link, '\n') | ||
161 | else io.write('\t', link, ': ', c, '\n') end | ||
162 | nthreads = nthreads - 1 | ||
163 | end) | ||
164 | nthreads = nthreads + 1 | ||
165 | assert(coroutine.resume(thread, thread, from, link)) | ||
166 | end | ||
167 | end | ||
168 | |||
169 | -- dispatch all threads until we are done | ||
170 | function dispatch() | ||
171 | while nthreads > 0 do | ||
172 | -- check which sockets are interesting and act on them | ||
173 | local readable, writable = socket.select(receiving, sending, 1) | ||
174 | -- for all readable connections, resume their threads | ||
175 | for _, who in ipairs(readable) do | ||
176 | if context[who] then | ||
177 | receiving:remove(who) | ||
178 | assert(coroutine.resume(context[who].thread)) | ||
179 | end | ||
180 | end | ||
181 | -- for all writable connections, do the same | ||
182 | for _, who in ipairs(writable) do | ||
183 | if context[who] then | ||
184 | sending:remove(who) | ||
185 | assert(coroutine.resume(context[who].thread)) | ||
186 | end | ||
187 | end | ||
188 | -- politely ask replacement I/O functions in idle threads to | ||
189 | -- return reporting a timeout | ||
190 | local now = socket.gettime() | ||
191 | for who, data in pairs(context) do | ||
192 | if data.last and now - data.last > TIMEOUT then | ||
193 | assert(coroutine.resume(context[who].thread, "timeout")) | ||
194 | end | ||
195 | end | ||
196 | end | ||
197 | end | ||
198 | |||
199 | function readfile(path) | ||
200 | path = url.unescape(path) | ||
201 | local file, error = io.open(path, "r") | ||
202 | if file then | ||
203 | local body = file:read("*a") | ||
204 | file:close() | ||
205 | return body | ||
206 | else return nil, error end | ||
207 | end | ||
208 | |||
209 | function retrieve(u) | ||
210 | local parsed = url.parse(u, { scheme = "file" }) | ||
211 | local body, headers, code, error | ||
212 | local base = u | ||
213 | if parsed.scheme == "http" then | ||
214 | body, code, headers = http.request(u) | ||
215 | if code == 200 then | ||
216 | base = base or headers.location | ||
217 | end | ||
218 | if not body then | ||
219 | error = code | ||
220 | end | ||
221 | elseif parsed.scheme == "file" then | ||
222 | body, error = readfile(parsed.path) | ||
223 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end | ||
224 | return base, body, error | ||
225 | end | ||
226 | |||
227 | function getlinks(body, base) | ||
228 | -- get rid of comments | ||
229 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") | ||
230 | local links = {} | ||
231 | -- extract links | ||
232 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) | ||
233 | table.insert(links, url.absolute(base, href)) | ||
234 | end) | ||
235 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) | ||
236 | table.insert(links, url.absolute(base, href)) | ||
237 | end) | ||
238 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) | ||
239 | table.insert(links, url.absolute(base, href)) | ||
240 | end) | ||
241 | return links | ||
242 | end | ||
243 | |||
244 | function checklinks(from) | ||
245 | local base, body, error = retrieve(from) | ||
246 | if not body then print(error) return end | ||
247 | local links = getlinks(body, base) | ||
248 | for _, link in ipairs(links) do | ||
249 | getstatus(from, link) | ||
250 | end | ||
251 | end | ||
252 | |||
253 | arg = arg or {} | ||
254 | if table.getn(arg) < 1 then | ||
255 | print("Usage:\n luasocket check-links.lua {<url>}") | ||
256 | exit() | ||
257 | end | ||
258 | for _, a in ipairs(arg) do | ||
259 | print("Checking ", a) | ||
260 | checklinks(url.absolute("file:", a)) | ||
261 | end | ||
262 | dispatch() | ||