PageRenderTime 35ms CodeModel.GetById 2ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 1ms

/script/socket/url.lua

http://sdccs.googlecode.com/
Lua | 297 lines | 263 code | 5 blank | 29 comment | 1 complexity | ffc9e8b65161d99b97c05e4673b47dfe MD5 | raw file
  1-----------------------------------------------------------------------------
  2-- URI parsing, composition and relative URL resolution
  3-- LuaSocket toolkit.
  4-- Author: Diego Nehab
  5-- RCS ID: $Id: url.lua 7 2010-11-21 23:50:47Z kaszasg $
  6-----------------------------------------------------------------------------
  7
  8-----------------------------------------------------------------------------
  9-- Declare module
 10-----------------------------------------------------------------------------
 11local string = require("string")
 12local base = _G
 13local table = require("table")
 14module("socket.url")
 15
 16-----------------------------------------------------------------------------
 17-- Module version
 18-----------------------------------------------------------------------------
 19_VERSION = "URL 1.0.1"
 20
 21-----------------------------------------------------------------------------
 22-- Encodes a string into its escaped hexadecimal representation
 23-- Input
 24--   s: binary string to be encoded
 25-- Returns
 26--   escaped representation of string binary
 27-----------------------------------------------------------------------------
 28function escape(s)
 29    return string.gsub(s, "([^A-Za-z0-9_])", function(c)
 30        return string.format("%%%02x", string.byte(c))
 31    end)
 32end
 33
 34-----------------------------------------------------------------------------
 35-- Protects a path segment, to prevent it from interfering with the
 36-- url parsing.
 37-- Input
 38--   s: binary string to be encoded
 39-- Returns
 40--   escaped representation of string binary
 41-----------------------------------------------------------------------------
 42local function make_set(t)
 43	local s = {}
 44	for i,v in base.ipairs(t) do
 45		s[t[i]] = 1
 46	end
 47	return s
 48end
 49
 50-- these are allowed withing a path segment, along with alphanum
 51-- other characters must be escaped
 52local segment_set = make_set {
 53    "-", "_", ".", "!", "~", "*", "'", "(",
 54	")", ":", "@", "&", "=", "+", "$", ",",
 55}
 56
 57local function protect_segment(s)
 58	return string.gsub(s, "([^A-Za-z0-9_])", function (c)
 59		if segment_set[c] then return c
 60		else return string.format("%%%02x", string.byte(c)) end
 61	end)
 62end
 63
 64-----------------------------------------------------------------------------
 65-- Encodes a string into its escaped hexadecimal representation
 66-- Input
 67--   s: binary string to be encoded
 68-- Returns
 69--   escaped representation of string binary
 70-----------------------------------------------------------------------------
 71function unescape(s)
 72    return string.gsub(s, "%%(%x%x)", function(hex)
 73        return string.char(base.tonumber(hex, 16))
 74    end)
 75end
 76
 77-----------------------------------------------------------------------------
 78-- Builds a path from a base path and a relative path
 79-- Input
 80--   base_path
 81--   relative_path
 82-- Returns
 83--   corresponding absolute path
 84-----------------------------------------------------------------------------
 85local function absolute_path(base_path, relative_path)
 86    if string.sub(relative_path, 1, 1) == "/" then return relative_path end
 87    local path = string.gsub(base_path, "[^/]*$", "")
 88    path = path .. relative_path
 89    path = string.gsub(path, "([^/]*%./)", function (s)
 90        if s ~= "./" then return s else return "" end
 91    end)
 92    path = string.gsub(path, "/%.$", "/")
 93    local reduced
 94    while reduced ~= path do
 95        reduced = path
 96        path = string.gsub(reduced, "([^/]*/%.%./)", function (s)
 97            if s ~= "../../" then return "" else return s end
 98        end)
 99    end
100    path = string.gsub(reduced, "([^/]*/%.%.)$", function (s)
101        if s ~= "../.." then return "" else return s end
102    end)
103    return path
104end
105
106-----------------------------------------------------------------------------
107-- Parses a url and returns a table with all its parts according to RFC 2396
108-- The following grammar describes the names given to the URL parts
109-- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
110-- <authority> ::= <userinfo>@<host>:<port>
111-- <userinfo> ::= <user>[:<password>]
112-- <path> :: = {<segment>/}<segment>
113-- Input
114--   url: uniform resource locator of request
115--   default: table with default values for each field
116-- Returns
117--   table with the following fields, where RFC naming conventions have
118--   been preserved:
119--     scheme, authority, userinfo, user, password, host, port,
120--     path, params, query, fragment
121-- Obs:
122--   the leading '/' in {/<path>} is considered part of <path>
123-----------------------------------------------------------------------------
124function parse(url, default)
125    -- initialize default parameters
126    local parsed = {}
127    for i,v in base.pairs(default or parsed) do parsed[i] = v end
128    -- empty url is parsed to nil
129    if not url or url == "" then return nil, "invalid url" end
130    -- remove whitespace
131    -- url = string.gsub(url, "%s", "")
132    -- get fragment
133    url = string.gsub(url, "#(.*)$", function(f)
134        parsed.fragment = f
135        return ""
136    end)
137    -- get scheme
138    url = string.gsub(url, "^([%w][%w%+%-%.]*)%:",
139        function(s) parsed.scheme = s; return "" end)
140    -- get authority
141    url = string.gsub(url, "^//([^/]*)", function(n)
142        parsed.authority = n
143        return ""
144    end)
145    -- get query stringing
146    url = string.gsub(url, "%?(.*)", function(q)
147        parsed.query = q
148        return ""
149    end)
150    -- get params
151    url = string.gsub(url, "%;(.*)", function(p)
152        parsed.params = p
153        return ""
154    end)
155    -- path is whatever was left
156    if url ~= "" then parsed.path = url end
157    local authority = parsed.authority
158    if not authority then return parsed end
159    authority = string.gsub(authority,"^([^@]*)@",
160        function(u) parsed.userinfo = u; return "" end)
161    authority = string.gsub(authority, ":([^:]*)$",
162        function(p) parsed.port = p; return "" end)
163    if authority ~= "" then parsed.host = authority end
164    local userinfo = parsed.userinfo
165    if not userinfo then return parsed end
166    userinfo = string.gsub(userinfo, ":([^:]*)$",
167        function(p) parsed.password = p; return "" end)
168    parsed.user = userinfo
169    return parsed
170end
171
172-----------------------------------------------------------------------------
173-- Rebuilds a parsed URL from its components.
174-- Components are protected if any reserved or unallowed characters are found
175-- Input
176--   parsed: parsed URL, as returned by parse
177-- Returns
178--   a stringing with the corresponding URL
179-----------------------------------------------------------------------------
180function build(parsed)
181    local ppath = parse_path(parsed.path or "")
182    local url = build_path(ppath)
183    if parsed.params then url = url .. ";" .. parsed.params end
184    if parsed.query then url = url .. "?" .. parsed.query end
185	local authority = parsed.authority
186	if parsed.host then
187		authority = parsed.host
188		if parsed.port then authority = authority .. ":" .. parsed.port end
189		local userinfo = parsed.userinfo
190		if parsed.user then
191			userinfo = parsed.user
192			if parsed.password then
193				userinfo = userinfo .. ":" .. parsed.password
194			end
195		end
196		if userinfo then authority = userinfo .. "@" .. authority end
197	end
198    if authority then url = "//" .. authority .. url end
199    if parsed.scheme then url = parsed.scheme .. ":" .. url end
200    if parsed.fragment then url = url .. "#" .. parsed.fragment end
201    -- url = string.gsub(url, "%s", "")
202    return url
203end
204
205-----------------------------------------------------------------------------
206-- Builds a absolute URL from a base and a relative URL according to RFC 2396
207-- Input
208--   base_url
209--   relative_url
210-- Returns
211--   corresponding absolute url
212-----------------------------------------------------------------------------
213function absolute(base_url, relative_url)
214    if base.type(base_url) == "table" then
215        base_parsed = base_url
216        base_url = build(base_parsed)
217    else
218        base_parsed = parse(base_url)
219    end
220    local relative_parsed = parse(relative_url)
221    if not base_parsed then return relative_url
222    elseif not relative_parsed then return base_url
223    elseif relative_parsed.scheme then return relative_url
224    else
225        relative_parsed.scheme = base_parsed.scheme
226        if not relative_parsed.authority then
227            relative_parsed.authority = base_parsed.authority
228            if not relative_parsed.path then
229                relative_parsed.path = base_parsed.path
230                if not relative_parsed.params then
231                    relative_parsed.params = base_parsed.params
232                    if not relative_parsed.query then
233                        relative_parsed.query = base_parsed.query
234                    end
235                end
236            else    
237                relative_parsed.path = absolute_path(base_parsed.path or "",
238                    relative_parsed.path)
239            end
240        end
241        return build(relative_parsed)
242    end
243end
244
245-----------------------------------------------------------------------------
246-- Breaks a path into its segments, unescaping the segments
247-- Input
248--   path
249-- Returns
250--   segment: a table with one entry per segment
251-----------------------------------------------------------------------------
252function parse_path(path)
253	local parsed = {}
254	path = path or ""
255	--path = string.gsub(path, "%s", "")
256	string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
257	for i = 1, table.getn(parsed) do
258		parsed[i] = unescape(parsed[i])
259	end
260	if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
261	if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
262	return parsed
263end
264
265-----------------------------------------------------------------------------
266-- Builds a path component from its segments, escaping protected characters.
267-- Input
268--   parsed: path segments
269--   unsafe: if true, segments are not protected before path is built
270-- Returns
271--   path: corresponding path stringing
272-----------------------------------------------------------------------------
273function build_path(parsed, unsafe)
274	local path = ""
275	local n = table.getn(parsed)
276	if unsafe then
277		for i = 1, n-1 do
278			path = path .. parsed[i]
279			path = path .. "/"
280		end
281		if n > 0 then
282			path = path .. parsed[n]
283			if parsed.is_directory then path = path .. "/" end
284		end
285	else
286		for i = 1, n-1 do
287			path = path .. protect_segment(parsed[i])
288			path = path .. "/"
289		end
290		if n > 0 then
291			path = path .. protect_segment(parsed[n])
292			if parsed.is_directory then path = path .. "/" end
293		end
294	end
295	if parsed.is_absolute then path = "/" .. path end
296	return path
297end