PageRenderTime 23ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/Angel-v2.2/Code-vc9/Tools/Python26/Lib/urlparse.py

https://github.com/chrishaukap/GameDev
Python | 423 lines | 421 code | 0 blank | 2 comment | 0 complexity | d4dadf43d0f2408056a948b9b164de04 MD5 | raw file
  1. """Parse (absolute and relative) URLs.
  2. See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
  3. UC Irvine, June 1995.
  4. """
  5. __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
  6. "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
  7. # A classification of schemes ('' means apply by default)
  8. uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
  9. 'wais', 'file', 'https', 'shttp', 'mms',
  10. 'prospero', 'rtsp', 'rtspu', '', 'sftp']
  11. uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
  12. 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
  13. 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
  14. 'svn', 'svn+ssh', 'sftp']
  15. non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
  16. 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
  17. uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
  18. 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
  19. 'mms', '', 'sftp']
  20. uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
  21. 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
  22. uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
  23. 'nntp', 'wais', 'https', 'shttp', 'snews',
  24. 'file', 'prospero', '']
  25. # Characters valid in scheme names
  26. scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
  27. 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  28. '0123456789'
  29. '+-.')
  30. MAX_CACHE_SIZE = 20
  31. _parse_cache = {}
  32. def clear_cache():
  33. """Clear the parse cache."""
  34. _parse_cache.clear()
  35. class ResultMixin(object):
  36. """Shared methods for the parsed result objects."""
  37. @property
  38. def username(self):
  39. netloc = self.netloc
  40. if "@" in netloc:
  41. userinfo = netloc.rsplit("@", 1)[0]
  42. if ":" in userinfo:
  43. userinfo = userinfo.split(":", 1)[0]
  44. return userinfo
  45. return None
  46. @property
  47. def password(self):
  48. netloc = self.netloc
  49. if "@" in netloc:
  50. userinfo = netloc.rsplit("@", 1)[0]
  51. if ":" in userinfo:
  52. return userinfo.split(":", 1)[1]
  53. return None
  54. @property
  55. def hostname(self):
  56. netloc = self.netloc
  57. if "@" in netloc:
  58. netloc = netloc.rsplit("@", 1)[1]
  59. if ":" in netloc:
  60. netloc = netloc.split(":", 1)[0]
  61. return netloc.lower() or None
  62. @property
  63. def port(self):
  64. netloc = self.netloc
  65. if "@" in netloc:
  66. netloc = netloc.rsplit("@", 1)[1]
  67. if ":" in netloc:
  68. port = netloc.split(":", 1)[1]
  69. return int(port, 10)
  70. return None
  71. from collections import namedtuple
  72. class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
  73. __slots__ = ()
  74. def geturl(self):
  75. return urlunsplit(self)
  76. class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
  77. __slots__ = ()
  78. def geturl(self):
  79. return urlunparse(self)
  80. def urlparse(url, scheme='', allow_fragments=True):
  81. """Parse a URL into 6 components:
  82. <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
  83. Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
  84. Note that we don't break the components up in smaller bits
  85. (e.g. netloc is a single string) and we don't expand % escapes."""
  86. tuple = urlsplit(url, scheme, allow_fragments)
  87. scheme, netloc, url, query, fragment = tuple
  88. if scheme in uses_params and ';' in url:
  89. url, params = _splitparams(url)
  90. else:
  91. params = ''
  92. return ParseResult(scheme, netloc, url, params, query, fragment)
  93. def _splitparams(url):
  94. if '/' in url:
  95. i = url.find(';', url.rfind('/'))
  96. if i < 0:
  97. return url, ''
  98. else:
  99. i = url.find(';')
  100. return url[:i], url[i+1:]
  101. def _splitnetloc(url, start=0):
  102. delim = len(url) # position of end of domain part of url, default is end
  103. for c in '/?#': # look for delimiters; the order is NOT important
  104. wdelim = url.find(c, start) # find first of this delim
  105. if wdelim >= 0: # if found
  106. delim = min(delim, wdelim) # use earliest delim position
  107. return url[start:delim], url[delim:] # return (domain, rest)
  108. def urlsplit(url, scheme='', allow_fragments=True):
  109. """Parse a URL into 5 components:
  110. <scheme>://<netloc>/<path>?<query>#<fragment>
  111. Return a 5-tuple: (scheme, netloc, path, query, fragment).
  112. Note that we don't break the components up in smaller bits
  113. (e.g. netloc is a single string) and we don't expand % escapes."""
  114. allow_fragments = bool(allow_fragments)
  115. key = url, scheme, allow_fragments, type(url), type(scheme)
  116. cached = _parse_cache.get(key, None)
  117. if cached:
  118. return cached
  119. if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
  120. clear_cache()
  121. netloc = query = fragment = ''
  122. i = url.find(':')
  123. if i > 0:
  124. if url[:i] == 'http': # optimize the common case
  125. scheme = url[:i].lower()
  126. url = url[i+1:]
  127. if url[:2] == '//':
  128. netloc, url = _splitnetloc(url, 2)
  129. if allow_fragments and '#' in url:
  130. url, fragment = url.split('#', 1)
  131. if '?' in url:
  132. url, query = url.split('?', 1)
  133. v = SplitResult(scheme, netloc, url, query, fragment)
  134. _parse_cache[key] = v
  135. return v
  136. for c in url[:i]:
  137. if c not in scheme_chars:
  138. break
  139. else:
  140. scheme, url = url[:i].lower(), url[i+1:]
  141. if scheme in uses_netloc and url[:2] == '//':
  142. netloc, url = _splitnetloc(url, 2)
  143. if allow_fragments and scheme in uses_fragment and '#' in url:
  144. url, fragment = url.split('#', 1)
  145. if scheme in uses_query and '?' in url:
  146. url, query = url.split('?', 1)
  147. v = SplitResult(scheme, netloc, url, query, fragment)
  148. _parse_cache[key] = v
  149. return v
  150. def urlunparse(data):
  151. """Put a parsed URL back together again. This may result in a
  152. slightly different, but equivalent URL, if the URL that was parsed
  153. originally had redundant delimiters, e.g. a ? with an empty query
  154. (the draft states that these are equivalent)."""
  155. scheme, netloc, url, params, query, fragment = data
  156. if params:
  157. url = "%s;%s" % (url, params)
  158. return urlunsplit((scheme, netloc, url, query, fragment))
  159. def urlunsplit(data):
  160. scheme, netloc, url, query, fragment = data
  161. if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
  162. if url and url[:1] != '/': url = '/' + url
  163. url = '//' + (netloc or '') + url
  164. if scheme:
  165. url = scheme + ':' + url
  166. if query:
  167. url = url + '?' + query
  168. if fragment:
  169. url = url + '#' + fragment
  170. return url
  171. def urljoin(base, url, allow_fragments=True):
  172. """Join a base URL and a possibly relative URL to form an absolute
  173. interpretation of the latter."""
  174. if not base:
  175. return url
  176. if not url:
  177. return base
  178. bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
  179. urlparse(base, '', allow_fragments)
  180. scheme, netloc, path, params, query, fragment = \
  181. urlparse(url, bscheme, allow_fragments)
  182. if scheme != bscheme or scheme not in uses_relative:
  183. return url
  184. if scheme in uses_netloc:
  185. if netloc:
  186. return urlunparse((scheme, netloc, path,
  187. params, query, fragment))
  188. netloc = bnetloc
  189. if path[:1] == '/':
  190. return urlunparse((scheme, netloc, path,
  191. params, query, fragment))
  192. if not path:
  193. path = bpath
  194. if not params:
  195. params = bparams
  196. else:
  197. path = path[:-1]
  198. return urlunparse((scheme, netloc, path,
  199. params, query, fragment))
  200. if not query:
  201. query = bquery
  202. return urlunparse((scheme, netloc, path,
  203. params, query, fragment))
  204. segments = bpath.split('/')[:-1] + path.split('/')
  205. # XXX The stuff below is bogus in various ways...
  206. if segments[-1] == '.':
  207. segments[-1] = ''
  208. while '.' in segments:
  209. segments.remove('.')
  210. while 1:
  211. i = 1
  212. n = len(segments) - 1
  213. while i < n:
  214. if (segments[i] == '..'
  215. and segments[i-1] not in ('', '..')):
  216. del segments[i-1:i+1]
  217. break
  218. i = i+1
  219. else:
  220. break
  221. if segments == ['', '..']:
  222. segments[-1] = ''
  223. elif len(segments) >= 2 and segments[-1] == '..':
  224. segments[-2:] = ['']
  225. return urlunparse((scheme, netloc, '/'.join(segments),
  226. params, query, fragment))
  227. def urldefrag(url):
  228. """Removes any existing fragment from URL.
  229. Returns a tuple of the defragmented URL and the fragment. If
  230. the URL contained no fragments, the second element is the
  231. empty string.
  232. """
  233. if '#' in url:
  234. s, n, p, a, q, frag = urlparse(url)
  235. defrag = urlunparse((s, n, p, a, q, ''))
  236. return defrag, frag
  237. else:
  238. return url, ''
  239. # unquote method for parse_qs and parse_qsl
  240. # Cannot use directly from urllib as it would create circular reference.
  241. # urllib uses urlparse methods ( urljoin)
  242. _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
  243. _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
  244. def unquote(s):
  245. """unquote('abc%20def') -> 'abc def'."""
  246. res = s.split('%')
  247. for i in xrange(1, len(res)):
  248. item = res[i]
  249. try:
  250. res[i] = _hextochr[item[:2]] + item[2:]
  251. except KeyError:
  252. res[i] = '%' + item
  253. except UnicodeDecodeError:
  254. res[i] = unichr(int(item[:2], 16)) + item[2:]
  255. return "".join(res)
  256. def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
  257. """Parse a query given as a string argument.
  258. Arguments:
  259. qs: URL-encoded query string to be parsed
  260. keep_blank_values: flag indicating whether blank values in
  261. URL encoded queries should be treated as blank strings.
  262. A true value indicates that blanks should be retained as
  263. blank strings. The default false value indicates that
  264. blank values are to be ignored and treated as if they were
  265. not included.
  266. strict_parsing: flag indicating what to do with parsing errors.
  267. If false (the default), errors are silently ignored.
  268. If true, errors raise a ValueError exception.
  269. """
  270. dict = {}
  271. for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
  272. if name in dict:
  273. dict[name].append(value)
  274. else:
  275. dict[name] = [value]
  276. return dict
  277. def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
  278. """Parse a query given as a string argument.
  279. Arguments:
  280. qs: URL-encoded query string to be parsed
  281. keep_blank_values: flag indicating whether blank values in
  282. URL encoded queries should be treated as blank strings. A
  283. true value indicates that blanks should be retained as blank
  284. strings. The default false value indicates that blank values
  285. are to be ignored and treated as if they were not included.
  286. strict_parsing: flag indicating what to do with parsing errors. If
  287. false (the default), errors are silently ignored. If true,
  288. errors raise a ValueError exception.
  289. Returns a list, as G-d intended.
  290. """
  291. pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
  292. r = []
  293. for name_value in pairs:
  294. if not name_value and not strict_parsing:
  295. continue
  296. nv = name_value.split('=', 1)
  297. if len(nv) != 2:
  298. if strict_parsing:
  299. raise ValueError, "bad query field: %r" % (name_value,)
  300. # Handle case of a control-name with no equal sign
  301. if keep_blank_values:
  302. nv.append('')
  303. else:
  304. continue
  305. if len(nv[1]) or keep_blank_values:
  306. name = unquote(nv[0].replace('+', ' '))
  307. value = unquote(nv[1].replace('+', ' '))
  308. r.append((name, value))
  309. return r
  310. test_input = """
  311. http://a/b/c/d
  312. g:h = <URL:g:h>
  313. http:g = <URL:http://a/b/c/g>
  314. http: = <URL:http://a/b/c/d>
  315. g = <URL:http://a/b/c/g>
  316. ./g = <URL:http://a/b/c/g>
  317. g/ = <URL:http://a/b/c/g/>
  318. /g = <URL:http://a/g>
  319. //g = <URL:http://g>
  320. ?y = <URL:http://a/b/c/d?y>
  321. g?y = <URL:http://a/b/c/g?y>
  322. g?y/./x = <URL:http://a/b/c/g?y/./x>
  323. . = <URL:http://a/b/c/>
  324. ./ = <URL:http://a/b/c/>
  325. .. = <URL:http://a/b/>
  326. ../ = <URL:http://a/b/>
  327. ../g = <URL:http://a/b/g>
  328. ../.. = <URL:http://a/>
  329. ../../g = <URL:http://a/g>
  330. ../../../g = <URL:http://a/../g>
  331. ./../g = <URL:http://a/b/g>
  332. ./g/. = <URL:http://a/b/c/g/>
  333. /./g = <URL:http://a/./g>
  334. g/./h = <URL:http://a/b/c/g/h>
  335. g/../h = <URL:http://a/b/c/h>
  336. http:g = <URL:http://a/b/c/g>
  337. http: = <URL:http://a/b/c/d>
  338. http:?y = <URL:http://a/b/c/d?y>
  339. http:g?y = <URL:http://a/b/c/g?y>
  340. http:g?y/./x = <URL:http://a/b/c/g?y/./x>
  341. """
  342. def test():
  343. import sys
  344. base = ''
  345. if sys.argv[1:]:
  346. fn = sys.argv[1]
  347. if fn == '-':
  348. fp = sys.stdin
  349. else:
  350. fp = open(fn)
  351. else:
  352. try:
  353. from cStringIO import StringIO
  354. except ImportError:
  355. from StringIO import StringIO
  356. fp = StringIO(test_input)
  357. for line in fp:
  358. words = line.split()
  359. if not words:
  360. continue
  361. url = words[0]
  362. parts = urlparse(url)
  363. print '%-10s : %s' % (url, parts)
  364. abs = urljoin(base, url)
  365. if not base:
  366. base = abs
  367. wrapped = '<URL:%s>' % abs
  368. print '%-10s = %s' % (url, wrapped)
  369. if len(words) == 3 and words[1] == '=':
  370. if wrapped != words[2]:
  371. print 'EXPECTED', words[2], '!!!!!!!!!!'
  372. if __name__ == '__main__':
  373. test()