diff options
Diffstat (limited to 'etc/check-links.lua')
-rw-r--r-- | etc/check-links.lua | 122 |
1 files changed, 77 insertions, 45 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua index 9d837e4..e06cc91 100644 --- a/etc/check-links.lua +++ b/etc/check-links.lua | |||
@@ -1,49 +1,84 @@ | |||
1 | ----------------------------------------------------------------------------- | 1 | ----------------------------------------------------------------------------- |
2 | -- Little program that checks links in HTML files | 2 | -- Little program that checks links in HTML files, using coroutines and |
3 | -- non-blocking I/O via the dispatcher module. | ||
3 | -- LuaSocket sample files | 4 | -- LuaSocket sample files |
4 | -- Author: Diego Nehab | 5 | -- Author: Diego Nehab |
5 | -- RCS ID: $Id$ | 6 | -- RCS ID: $$ |
6 | ----------------------------------------------------------------------------- | 7 | ----------------------------------------------------------------------------- |
7 | local http = require("socket.http") | 8 | local dispatch, url, http, handler |
8 | local url = require("socket.url") | 9 | |
9 | http.TIMEOUT = 10 | 10 | arg = arg or {} |
11 | if table.getn(arg) < 1 then | ||
12 | print("Usage:\n luasocket check-links.lua [-n] {<url>}") | ||
13 | exit() | ||
14 | end | ||
15 | |||
16 | if arg[1] ~= "-n" then | ||
17 | -- if using blocking I/O, simulate dispatcher interface | ||
18 | url = require("socket.url") | ||
19 | http = require("socket.http") | ||
20 | handler = { | ||
21 | start = function(self, f) | ||
22 | f() | ||
23 | end, | ||
24 | tcp = socket.tcp | ||
25 | } | ||
26 | http.TIMEOUT = 10 | ||
27 | else | ||
28 | -- if non-blocking I/O was requested, disable dispatcher | ||
29 | table.remove(arg, 1) | ||
30 | dispatch = require("dispatch") | ||
31 | dispatch.TIMEOUT = 10 | ||
32 | url = require("socket.url") | ||
33 | http = require("socket.http") | ||
34 | handler = dispatch.newhandler() | ||
35 | end | ||
36 | |||
37 | local nthreads = 0 | ||
38 | |||
39 | -- get the status of a URL using the dispatcher | ||
40 | function getstatus(link) | ||
41 | local parsed = url.parse(link, {scheme = "file"}) | ||
42 | if parsed.scheme == "http" then | ||
43 | nthreads = nthreads + 1 | ||
44 | handler:start(function() | ||
45 | local r, c, h, s = http.request{ | ||
46 | method = "HEAD", | ||
47 | url = link, | ||
48 | create = handler.tcp | ||
49 | } | ||
50 | if r and c == 200 then io.write('\t', link, '\n') | ||
51 | else io.write('\t', link, ': ', tostring(c), '\n') end | ||
52 | nthreads = nthreads - 1 | ||
53 | end) | ||
54 | end | ||
55 | end | ||
10 | 56 | ||
11 | function readfile(path) | 57 | function readfile(path) |
12 | path = url.unescape(path) | 58 | path = url.unescape(path) |
13 | local file, error = io.open(path, "r") | 59 | local file, error = io.open(path, "r") |
14 | if file then | 60 | if file then |
15 | local body = file:read("*a") | 61 | local body = file:read("*a") |
16 | file:close() | 62 | file:close() |
17 | return body | 63 | return body |
18 | else return nil, error end | 64 | else return nil, error end |
19 | end | 65 | end |
20 | 66 | ||
21 | function getstatus(u) | 67 | function load(u) |
22 | local parsed = url.parse(u, {scheme = "file"}) | 68 | local parsed = url.parse(u, { scheme = "file" }) |
23 | if parsed.scheme == "http" then | ||
24 | local r, c, h, s = http.request{url = u, method = "HEAD"} | ||
25 | if c ~= 200 then return s or c end | ||
26 | elseif parsed.scheme == "file" then | ||
27 | local file, error = io.open(url.unescape(parsed.path), "r") | ||
28 | if file then file:close() | ||
29 | else return error end | ||
30 | else return string.format("unhandled scheme '%s'", parsed.scheme) end | ||
31 | end | ||
32 | |||
33 | function retrieve(u) | ||
34 | local parsed = url.parse(u, { scheme = "file" }) | ||
35 | local body, headers, code, error | 69 | local body, headers, code, error |
36 | local base = u | 70 | local base = u |
37 | if parsed.scheme == "http" then | 71 | if parsed.scheme == "http" then |
38 | body, code, headers = http.request(u) | 72 | body, code, headers = http.request(u) |
39 | if code == 200 then | 73 | if code == 200 then |
74 | -- if there was a redirect, update base to reflect it | ||
40 | base = headers.location or base | 75 | base = headers.location or base |
41 | end | 76 | end |
42 | if not body then | 77 | if not body then |
43 | error = code | 78 | error = code |
44 | end | 79 | end |
45 | elseif parsed.scheme == "file" then | 80 | elseif parsed.scheme == "file" then |
46 | body, error = readfile(parsed.path) | 81 | body, error = readfile(parsed.path) |
47 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end | 82 | else error = string.format("unhandled scheme '%s'", parsed.scheme) end |
48 | return base, body, error | 83 | return base, body, error |
49 | end | 84 | end |
@@ -53,35 +88,32 @@ function getlinks(body, base) | |||
53 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") | 88 | body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") |
54 | local links = {} | 89 | local links = {} |
55 | -- extract links | 90 | -- extract links |
56 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) | 91 | body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) |
57 | table.insert(links, url.absolute(base, href)) | 92 | table.insert(links, url.absolute(base, href)) |
58 | end) | 93 | end) |
59 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) | 94 | body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) |
60 | table.insert(links, url.absolute(base, href)) | 95 | table.insert(links, url.absolute(base, href)) |
61 | end) | 96 | end) |
62 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) | 97 | string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) |
63 | table.insert(links, url.absolute(base, href)) | 98 | table.insert(links, url.absolute(base, href)) |
64 | end) | 99 | end) |
65 | return links | 100 | return links |
66 | end | 101 | end |
67 | 102 | ||
68 | function checklinks(u) | 103 | function checklinks(address) |
69 | local base, body, error = retrieve(u) | 104 | local base, body, error = load(address) |
70 | if not body then print(error) return end | 105 | if not body then print(error) return end |
106 | print("Checking ", base) | ||
71 | local links = getlinks(body, base) | 107 | local links = getlinks(body, base) |
72 | for _, l in ipairs(links) do | 108 | for _, link in ipairs(links) do |
73 | io.stderr:write("\t", l, "\n") | 109 | getstatus(link) |
74 | local err = getstatus(l) | ||
75 | if err then io.stderr:write('\t', l, ": ", err, "\n") end | ||
76 | end | 110 | end |
77 | end | 111 | end |
78 | 112 | ||
79 | arg = arg or {} | 113 | for _, address in ipairs(arg) do |
80 | if table.getn(arg) < 1 then | 114 | checklinks(url.absolute("file:", address)) |
81 | print("Usage:\n luasocket check-links.lua {<url>}") | ||
82 | exit() | ||
83 | end | ||
84 | for _, a in ipairs(arg) do | ||
85 | print("Checking ", a) | ||
86 | checklinks(url.absolute("file:", a)) | ||
87 | end | 115 | end |
116 | |||
117 | while nthreads > 0 do | ||
118 | handler:step() | ||
119 | end | ||