From 17a95c126a178b17292637785c6ec09bb1180493 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Sun, 19 Aug 2018 11:32:42 -0600 Subject: url.lua:absolute_path(): fix issue #254, simplify, add more test cases --- src/url.lua | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 110ea94..29b6734 100644 --- a/src/url.lua +++ b/src/url.lua @@ -88,20 +88,18 @@ local function absolute_path(base_path, relative_path) if string.sub(relative_path, 1, 1) == "/" then return relative_path end local path = string.gsub(base_path, "[^/]*$", "") path = path .. relative_path - path = string.gsub(path, "([^/]*%./)", function (s) - if s ~= "./" then return s else return "" end - end) - path = string.gsub(path, "/%.$", "/") - local reduced - while reduced ~= path do - reduced = path - path = string.gsub(reduced, "([^/]*/%.%./)", function (s) - if s ~= "../../" then return "" else return s end - end) - end - path = string.gsub(reduced, "([^/]*/%.%.)$", function (s) - if s ~= "../.." then return "" else return s end - end) + repeat + local was = path + path = path:gsub('/%./', '/') + until path == was + repeat + local was = path + path = path:gsub('[^/]+/%.%./([^/]+)', '%1') + until path == was + path = path:gsub('[^/]+/%.%./*$', '') + path = path:gsub('/%.%.$', '/') + path = path:gsub('/%.$', '/') + path = path:gsub('^/%.%.', '') return path end -- cgit v1.2.3-55-g6feb From c905b5d44f8cdfbc8110a9a7d1d62c08b5703ae3 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 08:03:51 -0600 Subject: url.lua: separate remove_dot_components() from absolute_path(); also use in _M.absolute() even when not merging --- src/url.lua | 51 ++++++++++++++++++++++++++++++++++----------------- test/urltest.lua | 8 ++++---- 2 files changed, 38 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 29b6734..a354ab5 100644 --- a/src/url.lua +++ b/src/url.lua @@ -77,24 +77,19 @@ function _M.unescape(s) end ----------------------------------------------------------------------------- --- Builds a path from a base path and a relative path +-- Removes '..' and '.' components appropriately from a path. -- Input --- base_path --- relative_path +-- path -- Returns --- corresponding absolute path ------------------------------------------------------------------------------ -local function absolute_path(base_path, relative_path) - if string.sub(relative_path, 1, 1) == "/" then return relative_path end - local path = string.gsub(base_path, "[^/]*$", "") - path = path .. relative_path +-- dot-normalized path +local function remove_dot_components(path) repeat - local was = path - path = path:gsub('/%./', '/') + local was = path + path = path:gsub('/%./', '/') until path == was repeat - local was = path - path = path:gsub('[^/]+/%.%./([^/]+)', '%1') + local was = path + path = path:gsub('[^/]+/%.%./([^/]+)', '%1') until path == was path = path:gsub('[^/]+/%.%./*$', '') path = path:gsub('/%.%.$', '/') @@ -103,6 +98,23 @@ local function absolute_path(base_path, relative_path) return path end +----------------------------------------------------------------------------- +-- Builds a path from a base path and a relative path +-- Input +-- base_path +-- relative_path +-- Returns +-- corresponding absolute path +----------------------------------------------------------------------------- +local function absolute_path(base_path, relative_path) + if string.sub(relative_path, 1, 1) == "/" then + return remove_dot_components(relative_path) end + base_path = base_path:gsub("[^/]*$", "") + local path = base_path .. relative_path + path = remove_dot_components(path) + return path +end + ----------------------------------------------------------------------------- -- Parses a url and returns a table with all its parts according to RFC 2396 -- The following grammar describes the names given to the URL parts @@ -225,10 +237,14 @@ function _M.absolute(base_url, relative_url) else base_parsed = _M.parse(base_url) end + local result local relative_parsed = _M.parse(relative_url) - if not base_parsed then return relative_url - elseif not relative_parsed then return base_url - elseif relative_parsed.scheme then return relative_url + if not base_parsed then + result = relative_url + elseif not relative_parsed then + result = base_url + elseif relative_parsed.scheme then + result = relative_url else relative_parsed.scheme = base_parsed.scheme if not relative_parsed.authority then @@ -246,8 +262,9 @@ function _M.absolute(base_url, relative_url) relative_parsed.path) end end - return _M.build(relative_parsed) + result = _M.build(relative_parsed) end + return remove_dot_components(result) end ----------------------------------------------------------------------------- diff --git a/test/urltest.lua b/test/urltest.lua index 63a33ea..649be88 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -627,10 +627,10 @@ check_absolute_url("http://a/b/c/d;p?q#f", "/g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "//g", "http://g") check_absolute_url("http://a/b/c/d;p?q#f", "?y", "http://a/b/c/d;p?y") check_absolute_url("http://a/b/c/d;p?q#f", "g?y", "http://a/b/c/g?y") -check_absolute_url("http://a/b/c/d;p?q#f", "g?y/./x", "http://a/b/c/g?y/./x") +check_absolute_url("http://a/b/c/d;p?q#f", "g?y/./x", "http://a/b/c/g?y/x") check_absolute_url("http://a/b/c/d;p?q#f", "#s", "http://a/b/c/d;p?q#s") check_absolute_url("http://a/b/c/d;p?q#f", "g#s", "http://a/b/c/g#s") -check_absolute_url("http://a/b/c/d;p?q#f", "g#s/./x", "http://a/b/c/g#s/./x") +check_absolute_url("http://a/b/c/d;p?q#f", "g#s/./x", "http://a/b/c/g#s/x") check_absolute_url("http://a/b/c/d;p?q#f", "g?y#s", "http://a/b/c/g?y#s") check_absolute_url("http://a/b/c/d;p?q#f", ";x", "http://a/b/c/d;x") check_absolute_url("http://a/b/c/d;p?q#f", "g;x", "http://a/b/c/g;x") @@ -655,8 +655,8 @@ check_absolute_url("http://a/b/c/d;p?q#f", "../..", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "", "http://a/b/c/d;p?q#f") -check_absolute_url("http://a/b/c/d;p?q#f", "/./g", "http://a/./g") -check_absolute_url("http://a/b/c/d;p?q#f", "/../g", "http://a/../g") +check_absolute_url("http://a/b/c/d;p?q#f", "/./g", "http://a/g") +check_absolute_url("http://a/b/c/d;p?q#f", "/../g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "g.", "http://a/b/c/g.") check_absolute_url("http://a/b/c/d;p?q#f", ".g", "http://a/b/c/.g") check_absolute_url("http://a/b/c/d;p?q#f", "g..", "http://a/b/c/g..") -- cgit v1.2.3-55-g6feb From c570a32c219c957fd405ed018f2500f06952c043 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 09:07:42 -0600 Subject: url.lua:remove_dot_components(): limit beginning-of-string double-dot corner case to prevent triple-dot activation and authority collision --- src/url.lua | 2 +- test/urltest.lua | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index a354ab5..0d88adb 100644 --- a/src/url.lua +++ b/src/url.lua @@ -94,7 +94,7 @@ local function remove_dot_components(path) path = path:gsub('[^/]+/%.%./*$', '') path = path:gsub('/%.%.$', '/') path = path:gsub('/%.$', '/') - path = path:gsub('^/%.%.', '') + path = path:gsub('^/%.%./', '/') return path end diff --git a/test/urltest.lua b/test/urltest.lua index 649be88..8664fa6 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -685,6 +685,7 @@ check_absolute_url("a/b/c/d/../", "d/e/f", "a/b/c/d/e/f") check_absolute_url("http://velox.telemar.com.br", "/dashboard/index.html", "http://velox.telemar.com.br/dashboard/index.html") check_absolute_url("http://example.com/", "../.badhost.com/", "http://example.com/.badhost.com/") +check_absolute_url("http://example.com/", "...badhost.com/", "http://example.com/...badhost.com/") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 }) -- cgit v1.2.3-55-g6feb From 7ccea58776b8084f29a48610cb44b17ca604e4b5 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 09:59:45 -0600 Subject: url.lua:remove_dot_components(): avoid overconsuming dot segments --- src/url.lua | 4 ++-- test/urltest.lua | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 0d88adb..e1fa2bc 100644 --- a/src/url.lua +++ b/src/url.lua @@ -85,11 +85,11 @@ end local function remove_dot_components(path) repeat local was = path - path = path:gsub('/%./', '/') + path = path:gsub('/%./', '/', 1) until path == was repeat local was = path - path = path:gsub('[^/]+/%.%./([^/]+)', '%1') + path = path:gsub('[^/]+/%.%./([^/]+)', '%1', 1) until path == was path = path:gsub('[^/]+/%.%./*$', '') path = path:gsub('/%.%.$', '/') diff --git a/test/urltest.lua b/test/urltest.lua index 8664fa6..04b3c7f 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -654,6 +654,7 @@ check_absolute_url("http://a/b/c/d;p?q#f", "../g", "http://a/b/g") check_absolute_url("http://a/b/c/d;p?q#f", "../..", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../", "http://a/") check_absolute_url("http://a/b/c/d;p?q#f", "../../g", "http://a/g") +check_absolute_url("http://a/b/c/d;p?q#f", "../../../g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "", "http://a/b/c/d;p?q#f") check_absolute_url("http://a/b/c/d;p?q#f", "/./g", "http://a/g") check_absolute_url("http://a/b/c/d;p?q#f", "/../g", "http://a/g") @@ -686,6 +687,10 @@ check_absolute_url("http://velox.telemar.com.br", "/dashboard/index.html", "http://velox.telemar.com.br/dashboard/index.html") check_absolute_url("http://example.com/", "../.badhost.com/", "http://example.com/.badhost.com/") check_absolute_url("http://example.com/", "...badhost.com/", "http://example.com/...badhost.com/") +check_absolute_url("http://example.com/a/b/c/d/", "../q", "http://example.com/a/b/c/q") +check_absolute_url("http://example.com/a/b/c/d/", "../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com/a/b/c/d/", "../../../../q", "http://example.com/q") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 }) -- cgit v1.2.3-55-g6feb From 5b862e6a3c79b8e336a0ac2f0d23ca69993b326d Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 10:43:04 -0600 Subject: url.lua:absolute_path(): ensure a separator between base_path and relative_path --- src/url.lua | 1 + test/urltest.lua | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index e1fa2bc..243ee1c 100644 --- a/src/url.lua +++ b/src/url.lua @@ -110,6 +110,7 @@ local function absolute_path(base_path, relative_path) if string.sub(relative_path, 1, 1) == "/" then return remove_dot_components(relative_path) end base_path = base_path:gsub("[^/]*$", "") + if not base_path:find'/$' then base_path = base_path .. '/' end local path = base_path .. relative_path path = remove_dot_components(path) return path diff --git a/test/urltest.lua b/test/urltest.lua index 04b3c7f..b6ee299 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -690,7 +690,7 @@ check_absolute_url("http://example.com/", "...badhost.com/", "http://example.com check_absolute_url("http://example.com/a/b/c/d/", "../q", "http://example.com/a/b/c/q") check_absolute_url("http://example.com/a/b/c/d/", "../../q", "http://example.com/a/b/q") check_absolute_url("http://example.com/a/b/c/d/", "../../../q", "http://example.com/a/q") -check_absolute_url("http://example.com/a/b/c/d/", "../../../../q", "http://example.com/q") +check_absolute_url("http://example.com", ".badhost.com", "http://example.com/.badhost.com") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 }) -- cgit v1.2.3-55-g6feb From 38d936ec0ea05da9f85a5c582e5073e0d1b82209 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 11:27:42 -0600 Subject: url.lua:remove_dot_components(): empty path component double-dot corner case --- src/url.lua | 4 ++++ test/urltest.lua | 1 + 2 files changed, 5 insertions(+) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 243ee1c..10a9d90 100644 --- a/src/url.lua +++ b/src/url.lua @@ -87,6 +87,10 @@ local function remove_dot_components(path) local was = path path = path:gsub('/%./', '/', 1) until path == was + repeat + local was = path + path = path:gsub('//%.%./([^/]+)', '/%1', 1) + until path == was repeat local was = path path = path:gsub('[^/]+/%.%./([^/]+)', '%1', 1) diff --git a/test/urltest.lua b/test/urltest.lua index b6ee299..13deb10 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -691,6 +691,7 @@ check_absolute_url("http://example.com/a/b/c/d/", "../q", "http://example.com/a/ check_absolute_url("http://example.com/a/b/c/d/", "../../q", "http://example.com/a/b/q") check_absolute_url("http://example.com/a/b/c/d/", "../../../q", "http://example.com/a/q") check_absolute_url("http://example.com", ".badhost.com", "http://example.com/.badhost.com") +check_absolute_url("http://example.com/a/b/c/d/", "..//../../../q", "http://example.com/a/q") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 }) -- cgit v1.2.3-55-g6feb From ca5398be098b571912dcbd93c83ab78151814f99 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 12:02:25 -0600 Subject: url.lua:remove_dot_components(): use temporary NUL marker to reduce empty-segment special-case code --- src/url.lua | 5 +++-- test/urltest.lua | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 10a9d90..466d4fa 100644 --- a/src/url.lua +++ b/src/url.lua @@ -85,11 +85,11 @@ end local function remove_dot_components(path) repeat local was = path - path = path:gsub('/%./', '/', 1) + path = path:gsub('//', '/'..0x00..'/', 1) until path == was repeat local was = path - path = path:gsub('//%.%./([^/]+)', '/%1', 1) + path = path:gsub('/%./', '/', 1) until path == was repeat local was = path @@ -99,6 +99,7 @@ local function remove_dot_components(path) path = path:gsub('/%.%.$', '/') path = path:gsub('/%.$', '/') path = path:gsub('^/%.%./', '/') + path = path:gsub(0x00, '') return path end diff --git a/test/urltest.lua b/test/urltest.lua index 13deb10..ae8ba75 100644 --- a/test/urltest.lua +++ b/test/urltest.lua @@ -692,6 +692,10 @@ check_absolute_url("http://example.com/a/b/c/d/", "../../q", "http://example.com check_absolute_url("http://example.com/a/b/c/d/", "../../../q", "http://example.com/a/q") check_absolute_url("http://example.com", ".badhost.com", "http://example.com/.badhost.com") check_absolute_url("http://example.com/a/b/c/d/", "..//../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/../../../../q", "http://example.com/a/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/..//../../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "..//a/..///../../../../q", "http://example.com/a/b/q") +check_absolute_url("http://example.com/a/b/c/d/", "../x/a/../y/z/../../../../q", "http://example.com/a/b/q") print("testing path parsing and composition") check_parse_path("/eu/tu/ele", { "eu", "tu", "ele"; is_absolute = 1 }) -- cgit v1.2.3-55-g6feb From 043e99771352aff47680b99f09b66a32f0cc3ef5 Mon Sep 17 00:00:00 2001 From: "E. Westbrook" Date: Tue, 21 Aug 2018 12:42:26 -0600 Subject: url.lua:remove_dot_components(): avoid ambiguous numeric representation as empty-path-segment marker --- src/url.lua | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/url.lua b/src/url.lua index 466d4fa..0a3a80a 100644 --- a/src/url.lua +++ b/src/url.lua @@ -83,9 +83,10 @@ end -- Returns -- dot-normalized path local function remove_dot_components(path) + local marker = string.char(1) repeat local was = path - path = path:gsub('//', '/'..0x00..'/', 1) + path = path:gsub('//', '/'..marker..'/', 1) until path == was repeat local was = path @@ -99,7 +100,7 @@ local function remove_dot_components(path) path = path:gsub('/%.%.$', '/') path = path:gsub('/%.$', '/') path = path:gsub('^/%.%./', '/') - path = path:gsub(0x00, '') + path = path:gsub(marker, '') return path end -- cgit v1.2.3-55-g6feb