diff options
Diffstat (limited to 'etc/check-links.lua')
-rw-r--r-- | etc/check-links.lua | 111 |
1 files changed, 0 insertions, 111 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua deleted file mode 100644 index 283f3ac..0000000 --- a/etc/check-links.lua +++ /dev/null | |||
@@ -1,111 +0,0 @@ | |||
1 | ----------------------------------------------------------------------------- | ||
2 | -- Little program that checks links in HTML files, using coroutines and | ||
3 | -- non-blocking I/O via the dispatcher module. | ||
4 | -- LuaSocket sample files | ||
5 | -- Author: Diego Nehab | ||
6 | ----------------------------------------------------------------------------- | ||
7 | local url = require("socket.url") | ||
8 | local dispatch = require("dispatch") | ||
9 | local http = require("socket.http") | ||
10 | dispatch.TIMEOUT = 10 | ||
11 | |||
12 | -- make sure the user knows how to invoke us | ||
13 | arg = arg or {} | ||
14 | if #arg < 1 then | ||
15 | print("Usage:\n luasocket check-links.lua [-n] {<url>}") | ||
16 | exit() | ||
17 | end | ||
18 | |||
19 | -- '-n' means we are running in non-blocking mode | ||
20 | if arg[1] == "-n" then | ||
21 | -- if non-blocking I/O was requested, use real dispatcher interface | ||
22 | table.remove(arg, 1) | ||
23 | handler = dispatch.newhandler("coroutine") | ||
24 | else | ||
25 | -- if using blocking I/O, use fake dispatcher interface | ||
26 | handler = dispatch.newhandler("sequential") | ||
27 | end | ||
28 | |||
29 | local nthreads = 0 | ||
30 | |||
31 | -- get the status of a URL using the dispatcher | ||
32 | function getstatus(link) | ||
33 | local parsed = url.parse(link, {scheme = "file"}) | ||
34 | if parsed.scheme == "http" then | ||
35 | nthreads = nthreads + 1 | ||
36 | handler:start(function() | ||
37 | local r, c, h, s = http.request{ | ||
38 | method = "HEAD", | ||
39 | url = link, | ||
40 | create = handler.tcp | ||
41 | } | ||
42 | if r and c == 200 then io.write('\t', link, '\n') | ||
43 | else io.write('\t', link, ': ', tostring(c), '\n') end | ||
44 | nthreads = nthreads - 1 | ||
45 | end) | ||
46 | end | ||
47 | end | ||
48 | |||
49 | function readfile(path) | ||
50 | path = url.unescape(path) | ||
51 | local file, error = io.open(path, "r") | ||
52 | if file then | ||
53 | local body = file:read("*a") | ||
54 | file:close() | ||
55 | return body | ||
56 | else return nil, error end | ||
57 | end | ||
58 | |||
59 | function load(u) | ||
60 | local parsed = url.parse(u, { scheme = "file" }) | ||
61 | local body, headers, code, error | ||
62 | local base = u | ||
63 | if parsed.scheme == "http" then | ||
64 | body, code, headers = http.request(u) | ||
65 | if code == 200 then | ||
66 | -- if there was a redirect, update base to reflect it | ||
67 | base = headers.location or base | ||
68 | end | ||
69 | if not body then | ||
70 | error = code | ||
71 | end | ||
72 | elseif parsed.scheme == "file" then | ||
73 | body, error = readfile(parsed.path) | ||
74 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end | ||
75 | return base, body, error | ||
76 | end | ||
77 | |||
78 | function getlinks(body, base) | ||
79 | -- get rid of comments | ||
80 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") | ||
81 | local links = {} | ||
82 | -- extract links | ||
83 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) | ||
84 | table.insert(links, url.absolute(base, href)) | ||
85 | end) | ||
86 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) | ||
87 | table.insert(links, url.absolute(base, href)) | ||
88 | end) | ||
89 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) | ||
90 | table.insert(links, url.absolute(base, href)) | ||
91 | end) | ||
92 | return links | ||
93 | end | ||
94 | |||
95 | function checklinks(address) | ||
96 | local base, body, error = load(address) | ||
97 | if not body then print(error) return end | ||
98 | print("Checking ", base) | ||
99 | local links = getlinks(body, base) | ||
100 | for _, link in ipairs(links) do | ||
101 | getstatus(link) | ||
102 | end | ||
103 | end | ||
104 | |||
105 | for _, address in ipairs(arg) do | ||
106 | checklinks(url.absolute("file:", address)) | ||
107 | end | ||
108 | |||
109 | while nthreads > 0 do | ||
110 | handler:step() | ||
111 | end | ||