aboutsummaryrefslogtreecommitdiff
path: root/etc/check-links.lua
diff options
context:
space:
mode:
Diffstat (limited to 'etc/check-links.lua')
-rw-r--r--etc/check-links.lua122
1 files changed, 77 insertions, 45 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua
index 9d837e4..e06cc91 100644
--- a/etc/check-links.lua
+++ b/etc/check-links.lua
@@ -1,49 +1,84 @@
1----------------------------------------------------------------------------- 1-----------------------------------------------------------------------------
2-- Little program that checks links in HTML files 2-- Little program that checks links in HTML files, using coroutines and
3-- non-blocking I/O via the dispatcher module.
3-- LuaSocket sample files 4-- LuaSocket sample files
4-- Author: Diego Nehab 5-- Author: Diego Nehab
5-- RCS ID: $Id$ 6-- RCS ID: $$
6----------------------------------------------------------------------------- 7-----------------------------------------------------------------------------
7local http = require("socket.http") 8local dispatch, url, http, handler
8local url = require("socket.url") 9
9http.TIMEOUT = 10 10arg = arg or {}
11if table.getn(arg) < 1 then
12 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
13 exit()
14end
15
16if arg[1] ~= "-n" then
17 -- if using blocking I/O, simulate dispatcher interface
18 url = require("socket.url")
19 http = require("socket.http")
20 handler = {
21 start = function(self, f)
22 f()
23 end,
24 tcp = socket.tcp
25 }
26 http.TIMEOUT = 10
27else
28 -- if non-blocking I/O was requested, disable dispatcher
29 table.remove(arg, 1)
30 dispatch = require("dispatch")
31 dispatch.TIMEOUT = 10
32 url = require("socket.url")
33 http = require("socket.http")
34 handler = dispatch.newhandler()
35end
36
37local nthreads = 0
38
39-- get the status of a URL using the dispatcher
40function getstatus(link)
41 local parsed = url.parse(link, {scheme = "file"})
42 if parsed.scheme == "http" then
43 nthreads = nthreads + 1
44 handler:start(function()
45 local r, c, h, s = http.request{
46 method = "HEAD",
47 url = link,
48 create = handler.tcp
49 }
50 if r and c == 200 then io.write('\t', link, '\n')
51 else io.write('\t', link, ': ', tostring(c), '\n') end
52 nthreads = nthreads - 1
53 end)
54 end
55end
10 56
11function readfile(path) 57function readfile(path)
12 path = url.unescape(path) 58 path = url.unescape(path)
13 local file, error = io.open(path, "r") 59 local file, error = io.open(path, "r")
14 if file then 60 if file then
15 local body = file:read("*a") 61 local body = file:read("*a")
16 file:close() 62 file:close()
17 return body 63 return body
18 else return nil, error end 64 else return nil, error end
19end 65end
20 66
21function getstatus(u) 67function load(u)
22 local parsed = url.parse(u, {scheme = "file"}) 68 local parsed = url.parse(u, { scheme = "file" })
23 if parsed.scheme == "http" then
24 local r, c, h, s = http.request{url = u, method = "HEAD"}
25 if c ~= 200 then return s or c end
26 elseif parsed.scheme == "file" then
27 local file, error = io.open(url.unescape(parsed.path), "r")
28 if file then file:close()
29 else return error end
30 else return string.format("unhandled scheme '%s'", parsed.scheme) end
31end
32
33function retrieve(u)
34 local parsed = url.parse(u, { scheme = "file" })
35 local body, headers, code, error 69 local body, headers, code, error
36 local base = u 70 local base = u
37 if parsed.scheme == "http" then 71 if parsed.scheme == "http" then
38 body, code, headers = http.request(u) 72 body, code, headers = http.request(u)
39 if code == 200 then 73 if code == 200 then
74 -- if there was a redirect, update base to reflect it
40 base = headers.location or base 75 base = headers.location or base
41 end 76 end
42 if not body then 77 if not body then
43 error = code 78 error = code
44 end 79 end
45 elseif parsed.scheme == "file" then 80 elseif parsed.scheme == "file" then
46 body, error = readfile(parsed.path) 81 body, error = readfile(parsed.path)
47 else error = string.format("unhandled scheme '%s'", parsed.scheme) end 82 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
48 return base, body, error 83 return base, body, error
49end 84end
@@ -53,35 +88,32 @@ function getlinks(body, base)
53 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") 88 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
54 local links = {} 89 local links = {}
55 -- extract links 90 -- extract links
56 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) 91 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
57 table.insert(links, url.absolute(base, href)) 92 table.insert(links, url.absolute(base, href))
58 end) 93 end)
59 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) 94 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
60 table.insert(links, url.absolute(base, href)) 95 table.insert(links, url.absolute(base, href))
61 end) 96 end)
62 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) 97 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
63 table.insert(links, url.absolute(base, href)) 98 table.insert(links, url.absolute(base, href))
64 end) 99 end)
65 return links 100 return links
66end 101end
67 102
68function checklinks(u) 103function checklinks(address)
69 local base, body, error = retrieve(u) 104 local base, body, error = load(address)
70 if not body then print(error) return end 105 if not body then print(error) return end
106 print("Checking ", base)
71 local links = getlinks(body, base) 107 local links = getlinks(body, base)
72 for _, l in ipairs(links) do 108 for _, link in ipairs(links) do
73 io.stderr:write("\t", l, "\n") 109 getstatus(link)
74 local err = getstatus(l)
75 if err then io.stderr:write('\t', l, ": ", err, "\n") end
76 end 110 end
77end 111end
78 112
79arg = arg or {} 113for _, address in ipairs(arg) do
80if table.getn(arg) < 1 then 114 checklinks(url.absolute("file:", address))
81 print("Usage:\n luasocket check-links.lua {<url>}")
82 exit()
83end
84for _, a in ipairs(arg) do
85 print("Checking ", a)
86 checklinks(url.absolute("file:", a))
87end 115end
116
117while nthreads > 0 do
118 handler:step()
119end