PageRenderTime 66ms CodeModel.GetById 20ms app.highlight 41ms RepoModel.GetById 1ms app.codeStats 0ms

/Lib/test/test_urlparse.py

http://unladen-swallow.googlecode.com/
Python | 357 lines | 335 code | 10 blank | 12 comment | 0 complexity | f30cafd6881896d3655be743ea4d72ea MD5 | raw file
  1#! /usr/bin/env python
  2
  3from test import test_support
  4import unittest
  5import urlparse
  6
  7RFC1808_BASE = "http://a/b/c/d;p?q#f"
  8RFC2396_BASE = "http://a/b/c/d;p?q"
  9RFC3986_BASE = "http://a/b/c/d;p?q"
 10
 11# A list of test cases.  Each test case is a a two-tuple that contains
 12# a string with the query and a dictionary with the expected result.
 13
 14parse_qsl_test_cases = [
 15    ("", []),
 16    ("&", []),
 17    ("&&", []),
 18    ("=", [('', '')]),
 19    ("=a", [('', 'a')]),
 20    ("a", [('a', '')]),
 21    ("a=", [('a', '')]),
 22    ("a=", [('a', '')]),
 23    ("&a=b", [('a', 'b')]),
 24    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
 25    ("a=1&a=2", [('a', '1'), ('a', '2')]),
 26]
 27
 28class UrlParseTestCase(unittest.TestCase):
 29
 30    def checkRoundtrips(self, url, parsed, split):
 31        result = urlparse.urlparse(url)
 32        self.assertEqual(result, parsed)
 33        t = (result.scheme, result.netloc, result.path,
 34             result.params, result.query, result.fragment)
 35        self.assertEqual(t, parsed)
 36        # put it back together and it should be the same
 37        result2 = urlparse.urlunparse(result)
 38        self.assertEqual(result2, url)
 39        self.assertEqual(result2, result.geturl())
 40
 41        # the result of geturl() is a fixpoint; we can always parse it
 42        # again to get the same result:
 43        result3 = urlparse.urlparse(result.geturl())
 44        self.assertEqual(result3.geturl(), result.geturl())
 45        self.assertEqual(result3,          result)
 46        self.assertEqual(result3.scheme,   result.scheme)
 47        self.assertEqual(result3.netloc,   result.netloc)
 48        self.assertEqual(result3.path,     result.path)
 49        self.assertEqual(result3.params,   result.params)
 50        self.assertEqual(result3.query,    result.query)
 51        self.assertEqual(result3.fragment, result.fragment)
 52        self.assertEqual(result3.username, result.username)
 53        self.assertEqual(result3.password, result.password)
 54        self.assertEqual(result3.hostname, result.hostname)
 55        self.assertEqual(result3.port,     result.port)
 56
 57        # check the roundtrip using urlsplit() as well
 58        result = urlparse.urlsplit(url)
 59        self.assertEqual(result, split)
 60        t = (result.scheme, result.netloc, result.path,
 61             result.query, result.fragment)
 62        self.assertEqual(t, split)
 63        result2 = urlparse.urlunsplit(result)
 64        self.assertEqual(result2, url)
 65        self.assertEqual(result2, result.geturl())
 66
 67        # check the fixpoint property of re-parsing the result of geturl()
 68        result3 = urlparse.urlsplit(result.geturl())
 69        self.assertEqual(result3.geturl(), result.geturl())
 70        self.assertEqual(result3,          result)
 71        self.assertEqual(result3.scheme,   result.scheme)
 72        self.assertEqual(result3.netloc,   result.netloc)
 73        self.assertEqual(result3.path,     result.path)
 74        self.assertEqual(result3.query,    result.query)
 75        self.assertEqual(result3.fragment, result.fragment)
 76        self.assertEqual(result3.username, result.username)
 77        self.assertEqual(result3.password, result.password)
 78        self.assertEqual(result3.hostname, result.hostname)
 79        self.assertEqual(result3.port,     result.port)
 80
 81    def test_qsl(self):
 82        for orig, expect in parse_qsl_test_cases:
 83            result = urlparse.parse_qsl(orig, keep_blank_values=True)
 84            self.assertEqual(result, expect, "Error parsing %s" % repr(orig))
 85
 86    def test_roundtrips(self):
 87        testcases = [
 88            ('file:///tmp/junk.txt',
 89             ('file', '', '/tmp/junk.txt', '', '', ''),
 90             ('file', '', '/tmp/junk.txt', '', '')),
 91            ('imap://mail.python.org/mbox1',
 92             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
 93             ('imap', 'mail.python.org', '/mbox1', '', '')),
 94            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
 95             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
 96              '', '', ''),
 97             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
 98              '', '')),
 99            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
100             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
101              '', '', ''),
102             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
103              '', ''))
104            ]
105        for url, parsed, split in testcases:
106            self.checkRoundtrips(url, parsed, split)
107
108    def test_http_roundtrips(self):
109        # urlparse.urlsplit treats 'http:' as an optimized special case,
110        # so we test both 'http:' and 'https:' in all the following.
111        # Three cheers for white box knowledge!
112        testcases = [
113            ('://www.python.org',
114             ('www.python.org', '', '', '', ''),
115             ('www.python.org', '', '', '')),
116            ('://www.python.org#abc',
117             ('www.python.org', '', '', '', 'abc'),
118             ('www.python.org', '', '', 'abc')),
119            ('://www.python.org?q=abc',
120             ('www.python.org', '', '', 'q=abc', ''),
121             ('www.python.org', '', 'q=abc', '')),
122            ('://www.python.org/#abc',
123             ('www.python.org', '/', '', '', 'abc'),
124             ('www.python.org', '/', '', 'abc')),
125            ('://a/b/c/d;p?q#f',
126             ('a', '/b/c/d', 'p', 'q', 'f'),
127             ('a', '/b/c/d;p', 'q', 'f')),
128            ]
129        for scheme in ('http', 'https'):
130            for url, parsed, split in testcases:
131                url = scheme + url
132                parsed = (scheme,) + parsed
133                split = (scheme,) + split
134                self.checkRoundtrips(url, parsed, split)
135
136    def checkJoin(self, base, relurl, expected):
137        self.assertEqual(urlparse.urljoin(base, relurl), expected,
138                         (base, relurl, expected))
139
140    def test_unparse_parse(self):
141        for u in ['Python', './Python']:
142            self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
143            self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
144
145    def test_RFC1808(self):
146        # "normal" cases from RFC 1808:
147        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
148        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
149        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
150        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
151        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
152        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
153        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
154        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
155        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
156        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
157        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
158        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
159        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
160        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
161        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
162        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
163        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
164        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
165        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
166        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
167        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
168        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
169
170        # "abnormal" cases from RFC 1808:
171        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
172        self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
173        self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
174        self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
175        self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
176        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
177        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
178        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
179        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
180        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
181        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
182        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
183        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
184
185        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
186        # so we'll not actually run these tests (which expect 1808 behavior).
187        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
188        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
189
190    def test_RFC2396(self):
191        # cases from RFC 2396
192
193
194        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
195        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
196        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
197        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
198        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
199        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
200        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
201        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
202        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
203        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
204        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
205        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
206        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
207        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
208        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
209        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
210        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
211        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
212        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
213        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
214        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
215        self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
216        self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
217        self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
218        self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
219        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
220        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
221        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
222        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
223        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
224        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
225        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
226        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
227        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
228        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
229        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
230        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
231        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
232        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
233
234        #The following scenarios have been updated in RFC3986
235        #self.checkJoin(RFC2396_BASE, '?y', 'http://a/b/c/?y')
236        #self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
237
238    def test_RFC3986(self):
239        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
240        self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x')
241
242    def test_urldefrag(self):
243        for url, defrag, frag in [
244            ('http://python.org#frag', 'http://python.org', 'frag'),
245            ('http://python.org', 'http://python.org', ''),
246            ('http://python.org/#frag', 'http://python.org/', 'frag'),
247            ('http://python.org/', 'http://python.org/', ''),
248            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
249            ('http://python.org/?q', 'http://python.org/?q', ''),
250            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
251            ('http://python.org/p?q', 'http://python.org/p?q', ''),
252            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
253            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
254            ]:
255            self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
256
257    def test_urlsplit_attributes(self):
258        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
259        p = urlparse.urlsplit(url)
260        self.assertEqual(p.scheme, "http")
261        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
262        self.assertEqual(p.path, "/doc/")
263        self.assertEqual(p.query, "")
264        self.assertEqual(p.fragment, "frag")
265        self.assertEqual(p.username, None)
266        self.assertEqual(p.password, None)
267        self.assertEqual(p.hostname, "www.python.org")
268        self.assertEqual(p.port, None)
269        # geturl() won't return exactly the original URL in this case
270        # since the scheme is always case-normalized
271        #self.assertEqual(p.geturl(), url)
272
273        url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
274        p = urlparse.urlsplit(url)
275        self.assertEqual(p.scheme, "http")
276        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
277        self.assertEqual(p.path, "/doc/")
278        self.assertEqual(p.query, "query=yes")
279        self.assertEqual(p.fragment, "frag")
280        self.assertEqual(p.username, "User")
281        self.assertEqual(p.password, "Pass")
282        self.assertEqual(p.hostname, "www.python.org")
283        self.assertEqual(p.port, 80)
284        self.assertEqual(p.geturl(), url)
285
286        # Addressing issue1698, which suggests Username can contain
287        # "@" characters.  Though not RFC compliant, many ftp sites allow
288        # and request email addresses as usernames.
289
290        url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
291        p = urlparse.urlsplit(url)
292        self.assertEqual(p.scheme, "http")
293        self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
294        self.assertEqual(p.path, "/doc/")
295        self.assertEqual(p.query, "query=yes")
296        self.assertEqual(p.fragment, "frag")
297        self.assertEqual(p.username, "User@example.com")
298        self.assertEqual(p.password, "Pass")
299        self.assertEqual(p.hostname, "www.python.org")
300        self.assertEqual(p.port, 80)
301        self.assertEqual(p.geturl(), url)
302
303
304    def test_attributes_bad_port(self):
305        """Check handling of non-integer ports."""
306        p = urlparse.urlsplit("http://www.example.net:foo")
307        self.assertEqual(p.netloc, "www.example.net:foo")
308        self.assertRaises(ValueError, lambda: p.port)
309
310        p = urlparse.urlparse("http://www.example.net:foo")
311        self.assertEqual(p.netloc, "www.example.net:foo")
312        self.assertRaises(ValueError, lambda: p.port)
313
314    def test_attributes_without_netloc(self):
315        # This example is straight from RFC 3261.  It looks like it
316        # should allow the username, hostname, and port to be filled
317        # in, but doesn't.  Since it's a URI and doesn't use the
318        # scheme://netloc syntax, the netloc and related attributes
319        # should be left empty.
320        uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
321        p = urlparse.urlsplit(uri)
322        self.assertEqual(p.netloc, "")
323        self.assertEqual(p.username, None)
324        self.assertEqual(p.password, None)
325        self.assertEqual(p.hostname, None)
326        self.assertEqual(p.port, None)
327        self.assertEqual(p.geturl(), uri)
328
329        p = urlparse.urlparse(uri)
330        self.assertEqual(p.netloc, "")
331        self.assertEqual(p.username, None)
332        self.assertEqual(p.password, None)
333        self.assertEqual(p.hostname, None)
334        self.assertEqual(p.port, None)
335        self.assertEqual(p.geturl(), uri)
336
337    def test_caching(self):
338        # Test case for bug #1313119
339        uri = "http://example.com/doc/"
340        unicode_uri = unicode(uri)
341
342        urlparse.urlparse(unicode_uri)
343        p = urlparse.urlparse(uri)
344        self.assertEqual(type(p.scheme), type(uri))
345        self.assertEqual(type(p.hostname), type(uri))
346        self.assertEqual(type(p.path), type(uri))
347
348    def test_noslash(self):
349        # Issue 1637: http://foo.com?query is legal
350        self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
351                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
352
353def test_main():
354    test_support.run_unittest(UrlParseTestCase)
355
356if __name__ == "__main__":
357    test_main()