aboutsummaryrefslogtreecommitdiff
path: root/etc/check-links.lua
diff options
context:
space:
mode:
Diffstat (limited to 'etc/check-links.lua')
-rw-r--r--etc/check-links.lua111
1 files changed, 0 insertions, 111 deletions
diff --git a/etc/check-links.lua b/etc/check-links.lua
deleted file mode 100644
index 283f3ac..0000000
--- a/etc/check-links.lua
+++ /dev/null
@@ -1,111 +0,0 @@
1-----------------------------------------------------------------------------
2-- Little program that checks links in HTML files, using coroutines and
3-- non-blocking I/O via the dispatcher module.
4-- LuaSocket sample files
5-- Author: Diego Nehab
6-----------------------------------------------------------------------------
7local url = require("socket.url")
8local dispatch = require("dispatch")
9local http = require("socket.http")
10dispatch.TIMEOUT = 10
11
12-- make sure the user knows how to invoke us
13arg = arg or {}
14if #arg < 1 then
15 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
16 exit()
17end
18
19-- '-n' means we are running in non-blocking mode
20if arg[1] == "-n" then
21 -- if non-blocking I/O was requested, use real dispatcher interface
22 table.remove(arg, 1)
23 handler = dispatch.newhandler("coroutine")
24else
25 -- if using blocking I/O, use fake dispatcher interface
26 handler = dispatch.newhandler("sequential")
27end
28
29local nthreads = 0
30
31-- get the status of a URL using the dispatcher
32function getstatus(link)
33 local parsed = url.parse(link, {scheme = "file"})
34 if parsed.scheme == "http" then
35 nthreads = nthreads + 1
36 handler:start(function()
37 local r, c, h, s = http.request{
38 method = "HEAD",
39 url = link,
40 create = handler.tcp
41 }
42 if r and c == 200 then io.write('\t', link, '\n')
43 else io.write('\t', link, ': ', tostring(c), '\n') end
44 nthreads = nthreads - 1
45 end)
46 end
47end
48
49function readfile(path)
50 path = url.unescape(path)
51 local file, error = io.open(path, "r")
52 if file then
53 local body = file:read("*a")
54 file:close()
55 return body
56 else return nil, error end
57end
58
59function load(u)
60 local parsed = url.parse(u, { scheme = "file" })
61 local body, headers, code, error
62 local base = u
63 if parsed.scheme == "http" then
64 body, code, headers = http.request(u)
65 if code == 200 then
66 -- if there was a redirect, update base to reflect it
67 base = headers.location or base
68 end
69 if not body then
70 error = code
71 end
72 elseif parsed.scheme == "file" then
73 body, error = readfile(parsed.path)
74 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
75 return base, body, error
76end
77
78function getlinks(body, base)
79 -- get rid of comments
80 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
81 local links = {}
82 -- extract links
83 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
84 table.insert(links, url.absolute(base, href))
85 end)
86 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
87 table.insert(links, url.absolute(base, href))
88 end)
89 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
90 table.insert(links, url.absolute(base, href))
91 end)
92 return links
93end
94
95function checklinks(address)
96 local base, body, error = load(address)
97 if not body then print(error) return end
98 print("Checking ", base)
99 local links = getlinks(body, base)
100 for _, link in ipairs(links) do
101 getstatus(link)
102 end
103end
104
105for _, address in ipairs(arg) do
106 checklinks(url.absolute("file:", address))
107end
108
109while nthreads > 0 do
110 handler:step()
111end