diff options
| author | Diego Nehab <diego@tecgraf.puc-rio.br> | 2001-09-12 18:27:40 +0000 |
|---|---|---|
| committer | Diego Nehab <diego@tecgraf.puc-rio.br> | 2001-09-12 18:27:40 +0000 |
| commit | 5c622071f95b833727767219cdee31b1419d6cc4 (patch) | |
| tree | caf89ceaff537d936e8a02b42d92280774291047 /src | |
| parent | 480689a70217d2ec9bedf699166f05bb972e9448 (diff) | |
| download | luasocket-5c622071f95b833727767219cdee31b1419d6cc4.tar.gz luasocket-5c622071f95b833727767219cdee31b1419d6cc4.tar.bz2 luasocket-5c622071f95b833727767219cdee31b1419d6cc4.zip | |
Initial revision
Diffstat (limited to 'src')
| -rw-r--r-- | src/url.lua | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/src/url.lua b/src/url.lua new file mode 100644 index 0000000..8cafd9b --- /dev/null +++ b/src/url.lua | |||
| @@ -0,0 +1,204 @@ | |||
| 1 | ----------------------------------------------------------------------------- | ||
| 2 | -- URI parsing, composition and relative URL resolution | ||
| 3 | -- LuaSocket 1.4 toolkit. | ||
| 4 | -- Author: Diego Nehab | ||
| 5 | -- Date: 20/7/2001 | ||
| 6 | -- Conforming to: RFC 2396, LTN7 | ||
| 7 | -- RCS ID: $Id$ | ||
| 8 | ---------------------------------------------------------------------------- | ||
| 9 | |||
| 10 | local Public, Private = {}, {} | ||
| 11 | URL = Public | ||
| 12 | |||
| 13 | ----------------------------------------------------------------------------- | ||
| 14 | -- Parses a url and returns a table with all its parts according to RFC 2396 | ||
| 15 | -- The following grammar describes the names given to the URL parts | ||
| 16 | -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment> | ||
| 17 | -- <authority> ::= <userinfo>@<host>:<port> | ||
| 18 | -- <userinfo> ::= <user>[:<password>] | ||
| 19 | -- <path> :: = {<segment>/}<segment> | ||
| 20 | -- Input | ||
| 21 | -- url: uniform resource locator of request | ||
| 22 | -- default: table with default values for each field | ||
| 23 | -- Returns | ||
| 24 | -- table with the following fields, where RFC naming conventions have | ||
| 25 | -- been preserved: | ||
| 26 | -- scheme, authority, userinfo, user, password, host, port, | ||
| 27 | -- path, params, query, fragment | ||
| 28 | -- Obs: | ||
| 29 | -- the leading '/' in {/<path>} is considered part of <path> | ||
| 30 | ----------------------------------------------------------------------------- | ||
| 31 | function Public.parse_url(url, default) | ||
| 32 | -- initialize default parameters | ||
| 33 | local parsed = default or {} | ||
| 34 | -- empty url is parsed to nil | ||
| 35 | if not url or url == "" then return nil end | ||
| 36 | -- remove whitespace | ||
| 37 | url = gsub(url, "%s", "") | ||
| 38 | -- get fragment | ||
| 39 | url = gsub(url, "#(.*)$", function(f) %parsed.fragment = f end) | ||
| 40 | -- get scheme | ||
| 41 | url = gsub(url, "^([%w][%w%+%-%.]*)%:", function(s) %parsed.scheme = s end) | ||
| 42 | -- get authority | ||
| 43 | url = gsub(url, "^//([^/]*)", function(n) %parsed.authority = n end) | ||
| 44 | -- get query string | ||
| 45 | url = gsub(url, "%?(.*)", function(q) %parsed.query = q end) | ||
| 46 | -- get params | ||
| 47 | url = gsub(url, "%;(.*)", function(p) %parsed.params = p end) | ||
| 48 | if url ~= "" then parsed.path = url end | ||
| 49 | local authority = parsed.authority | ||
| 50 | if not authority then return parsed end | ||
| 51 | authority = gsub(authority,"^([^@]*)@",function(u) %parsed.userinfo = u end) | ||
| 52 | authority = gsub(authority, ":([^:]*)$", function(p) %parsed.port = p end) | ||
| 53 | if authority ~= "" then parsed.host = authority end | ||
| 54 | local userinfo = parsed.userinfo | ||
| 55 | if not userinfo then return parsed end | ||
| 56 | userinfo = gsub(userinfo, ":([^:]*)$", function(p) %parsed.password = p end) | ||
| 57 | parsed.user = userinfo | ||
| 58 | return parsed | ||
| 59 | end | ||
| 60 | |||
| 61 | ----------------------------------------------------------------------------- | ||
| 62 | -- Rebuilds a parsed URL from its components. | ||
| 63 | -- Components are protected if any reserved or unallowed characters are found | ||
| 64 | -- Input | ||
| 65 | -- parsed: parsed URL, as returned by Public.parse | ||
| 66 | -- Returns | ||
| 67 | -- a string with the corresponding URL | ||
| 68 | ----------------------------------------------------------------------------- | ||
| 69 | function Public.build_url(parsed) | ||
| 70 | local url = parsed.path or "" | ||
| 71 | if parsed.params then url = url .. ";" .. parsed.params end | ||
| 72 | if parsed.query then url = url .. "?" .. parsed.query end | ||
| 73 | if parsed.authority then url = "//" .. parsed.authority .. url end | ||
| 74 | if parsed.scheme then url = parsed.scheme .. ":" .. url end | ||
| 75 | if parsed.fragment then url = url .. "#" .. parsed.fragment end | ||
| 76 | return gsub(url, "%s", "") | ||
| 77 | end | ||
| 78 | |||
| 79 | ----------------------------------------------------------------------------- | ||
| 80 | -- Builds a absolute URL from a base and a relative URL according to RFC 2396 | ||
| 81 | -- Input | ||
| 82 | -- base_url | ||
| 83 | -- relative_url | ||
| 84 | -- Returns | ||
| 85 | -- corresponding absolute url | ||
| 86 | ----------------------------------------------------------------------------- | ||
| 87 | function Public.absolute_url(base_url, relative_url) | ||
| 88 | local base = %Public.parse_url(base_url) | ||
| 89 | local relative = %Public.parse_url(relative_url) | ||
| 90 | if not base then return relative_url | ||
| 91 | elseif not relative then return base_url | ||
| 92 | elseif relative.scheme then return relative_url | ||
| 93 | else | ||
| 94 | relative.scheme = base.scheme | ||
| 95 | if not relative.authority then | ||
| 96 | relative.authority = base.authority | ||
| 97 | if not relative.path then | ||
| 98 | relative.path = base.path | ||
| 99 | if not relative.params then | ||
| 100 | relative.params = base.params | ||
| 101 | if not relative.query then | ||
| 102 | relative.query = base.query | ||
| 103 | end | ||
| 104 | end | ||
| 105 | else | ||
| 106 | relative.path = %Private.absolute_path(base.path,relative.path) | ||
| 107 | end | ||
| 108 | end | ||
| 109 | return %Public.build_url(relative) | ||
| 110 | end | ||
| 111 | end | ||
| 112 | |||
| 113 | ----------------------------------------------------------------------------- | ||
| 114 | -- Breaks a path into its segments, unescaping the segments | ||
| 115 | -- Input | ||
| 116 | -- path | ||
| 117 | -- Returns | ||
| 118 | -- segment: a table with one entry per segment | ||
| 119 | ----------------------------------------------------------------------------- | ||
| 120 | function Public.parse_path(path) | ||
| 121 | local parsed = {} | ||
| 122 | path = gsub(path, "%s", "") | ||
| 123 | gsub(path, "([^/]+)", function (s) tinsert(%parsed, s) end) | ||
| 124 | for i = 1, getn(parsed) do | ||
| 125 | parsed[i] = Code.unescape(parsed[i]) | ||
| 126 | end | ||
| 127 | if strsub(path, 1, 1) == "/" then parsed.is_absolute = 1 end | ||
| 128 | if strsub(path, -1, -1) == "/" then parsed.is_directory = 1 end | ||
| 129 | return parsed | ||
| 130 | end | ||
| 131 | |||
| 132 | ----------------------------------------------------------------------------- | ||
| 133 | -- Builds a path component from its segments, escaping protected characters. | ||
| 134 | -- Input | ||
| 135 | -- parsed: path segments | ||
| 136 | -- Returns | ||
| 137 | -- path: correspondin path string | ||
| 138 | ----------------------------------------------------------------------------- | ||
| 139 | function Public.build_path(parsed) | ||
| 140 | local path = "" | ||
| 141 | local n = getn(parsed) | ||
| 142 | for i = 1, n-1 do | ||
| 143 | path = path .. %Private.protect_segment(parsed[i]) | ||
| 144 | path = path .. "/" | ||
| 145 | end | ||
| 146 | if n > 0 then | ||
| 147 | path = path .. %Private.protect_segment(parsed[n]) | ||
| 148 | if parsed.is_directory then path = path .. "/" end | ||
| 149 | end | ||
| 150 | if parsed.is_absolute then path = "/" .. path end | ||
| 151 | return path | ||
| 152 | end | ||
| 153 | |||
| 154 | function Private.make_set(table) | ||
| 155 | local s = {} | ||
| 156 | for i = 1, getn(table) do | ||
| 157 | s[table[i]] = 1 | ||
| 158 | end | ||
| 159 | return s | ||
| 160 | end | ||
| 161 | |||
| 162 | -- these are allowed withing a path segment, along with alphanum | ||
| 163 | -- other characters must be escaped | ||
| 164 | Private.segment_set = Private.make_set { | ||
| 165 | "-", "_", ".", "!", "~", "*", "'", "(", | ||
| 166 | ")", ":", "@", "&", "=", "+", "$", ",", | ||
| 167 | } | ||
| 168 | |||
| 169 | function Private.protect_segment(s) | ||
| 170 | local segment_set = %Private.segment_set | ||
| 171 | return gsub(s, "(%W)", function (c) | ||
| 172 | if %segment_set[c] then return c | ||
| 173 | else return Code.escape(c) end | ||
| 174 | end) | ||
| 175 | end | ||
| 176 | |||
| 177 | ----------------------------------------------------------------------------- | ||
| 178 | -- Builds a path from a base path and a relative path | ||
| 179 | -- Input | ||
| 180 | -- base_path | ||
| 181 | -- relative_path | ||
| 182 | -- Returns | ||
| 183 | -- corresponding absolute path | ||
| 184 | ----------------------------------------------------------------------------- | ||
| 185 | function Private.absolute_path(base_path, relative_path) | ||
| 186 | if strsub(relative_path, 1, 1) == "/" then return relative_path end | ||
| 187 | local path = gsub(base_path, "[^/]*$", "") | ||
| 188 | path = path .. relative_path | ||
| 189 | path = gsub(path, "([^/]*%./)", function (s) | ||
| 190 | if s ~= "./" then return s else return "" end | ||
| 191 | end) | ||
| 192 | path = gsub(path, "/%.$", "/") | ||
| 193 | local reduced | ||
| 194 | while reduced ~= path do | ||
| 195 | reduced = path | ||
| 196 | path = gsub(reduced, "([^/]*/%.%./)", function (s) | ||
| 197 | if s ~= "../../" then return "" else return s end | ||
| 198 | end) | ||
| 199 | end | ||
| 200 | path = gsub(reduced, "([^/]*/%.%.)$", function (s) | ||
| 201 | if s ~= "../.." then return "" else return s end | ||
| 202 | end) | ||
| 203 | return path | ||
| 204 | end | ||
