PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/distutils2/tests/test_index_simple.py

https://bitbucket.org/tarek/distutils2/
Python | 322 lines | 306 code | 8 blank | 8 comment | 1 complexity | 473623586580067ad9555ef0320477ea MD5 | raw file
  1. """Tests for the pypi.simple module.
  2. """
  3. import sys
  4. import os
  5. import urllib2
  6. from distutils2.index.simple import Crawler
  7. from distutils2.tests import unittest
  8. from distutils2.tests.support import TempdirManager, LoggingCatcher
  9. from distutils2.tests.pypi_server import (use_pypi_server, PyPIServer,
  10. PYPI_DEFAULT_STATIC_PATH)
  11. class SimpleCrawlerTestCase(TempdirManager,
  12. LoggingCatcher,
  13. unittest.TestCase):
  14. def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
  15. *args, **kwargs):
  16. """Build and return a SimpleIndex instance, with the test server
  17. urls
  18. """
  19. if hosts is None:
  20. hosts = (server.full_address.replace("http://", ""),)
  21. kwargs['hosts'] = hosts
  22. return Crawler(server.full_address + base_url, *args,
  23. **kwargs)
  24. @use_pypi_server()
  25. def test_bad_urls(self, server):
  26. crawler = Crawler()
  27. url = 'http://127.0.0.1:0/nonesuch/test_simple'
  28. try:
  29. v = crawler._open_url(url)
  30. except Exception, v:
  31. self.assertTrue(url in str(v))
  32. else:
  33. self.assertTrue(isinstance(v, urllib2.HTTPError))
  34. # issue 16
  35. # easy_install inquant.contentmirror.plone breaks because of a typo
  36. # in its home URL
  37. crawler = Crawler(hosts=('example.org',))
  38. url = 'url:%20https://svn.plone.org/svn/collective/inquant.contentmirror.plone/trunk'
  39. try:
  40. v = crawler._open_url(url)
  41. except Exception, v:
  42. self.assertTrue(url in str(v))
  43. else:
  44. self.assertTrue(isinstance(v, urllib2.HTTPError))
  45. def _urlopen(*args):
  46. import httplib
  47. raise httplib.BadStatusLine('line')
  48. old_urlopen = urllib2.urlopen
  49. urllib2.urlopen = _urlopen
  50. url = 'http://example.org'
  51. try:
  52. try:
  53. v = crawler._open_url(url)
  54. except Exception, v:
  55. self.assertTrue('line' in str(v))
  56. else:
  57. raise AssertionError('Should have raise here!')
  58. finally:
  59. urllib2.urlopen = old_urlopen
  60. # issue 20
  61. url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
  62. try:
  63. crawler._open_url(url)
  64. except Exception, v:
  65. self.assertTrue('nonnumeric port' in str(v))
  66. # issue #160
  67. if sys.version_info[0] == 2 and sys.version_info[1] == 7:
  68. # this should not fail
  69. url = server.full_address
  70. page = ('<a href="http://www.famfamfam.com]('
  71. 'http://www.famfamfam.com/">')
  72. crawler._process_url(url, page)
  73. @use_pypi_server("test_found_links")
  74. def test_found_links(self, server):
  75. # Browse the index, asking for a specified release version
  76. # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
  77. crawler = self._get_simple_crawler(server)
  78. last_release = crawler.get_release("foobar")
  79. # we have scanned the index page
  80. self.assertIn(server.full_address + "/simple/foobar/",
  81. crawler._processed_urls)
  82. # we have found 4 releases in this page
  83. self.assertEqual(len(crawler._projects["foobar"]), 4)
  84. # and returned the most recent one
  85. self.assertEqual("%s" % last_release.version, '2.0.1')
  86. def test_is_browsable(self):
  87. crawler = Crawler(follow_externals=False)
  88. self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
  89. # Now, when following externals, we can have a list of hosts to trust.
  90. # and don't follow other external links than the one described here.
  91. crawler = Crawler(hosts=["pypi.python.org", "example.org"],
  92. follow_externals=True)
  93. good_urls = (
  94. "http://pypi.python.org/foo/bar",
  95. "http://pypi.python.org/simple/foobar",
  96. "http://example.org",
  97. "http://example.org/",
  98. "http://example.org/simple/",
  99. )
  100. bad_urls = (
  101. "http://python.org",
  102. "http://example.tld",
  103. )
  104. for url in good_urls:
  105. self.assertTrue(crawler._is_browsable(url))
  106. for url in bad_urls:
  107. self.assertFalse(crawler._is_browsable(url))
  108. # allow all hosts
  109. crawler = Crawler(follow_externals=True, hosts=("*",))
  110. self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
  111. self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
  112. # specify a list of hosts we want to allow
  113. crawler = Crawler(follow_externals=True,
  114. hosts=("*.example.org",))
  115. self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
  116. self.assertTrue(crawler._is_browsable("http://pypi.example.org/a/path"))
  117. @use_pypi_server("with_externals")
  118. def test_follow_externals(self, server):
  119. # Include external pages
  120. # Try to request the package index, wich contains links to "externals"
  121. # resources. They have to be scanned too.
  122. crawler = self._get_simple_crawler(server, follow_externals=True)
  123. crawler.get_release("foobar")
  124. self.assertIn(server.full_address + "/external/external.html",
  125. crawler._processed_urls)
  126. @use_pypi_server("with_real_externals")
  127. def test_restrict_hosts(self, server):
  128. # Only use a list of allowed hosts is possible
  129. # Test that telling the simple pyPI client to not retrieve external
  130. # works
  131. crawler = self._get_simple_crawler(server, follow_externals=False)
  132. crawler.get_release("foobar")
  133. self.assertNotIn(server.full_address + "/external/external.html",
  134. crawler._processed_urls)
  135. @use_pypi_server(static_filesystem_paths=["with_externals"],
  136. static_uri_paths=["simple", "external"])
  137. def test_links_priority(self, server):
  138. # Download links from the pypi simple index should be used before
  139. # external download links.
  140. # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
  141. #
  142. # Usecase :
  143. # - someone uploads a package on pypi, a md5 is generated
  144. # - someone manually coindexes this link (with the md5 in the url) onto
  145. # an external page accessible from the package page.
  146. # - someone reuploads the package (with a different md5)
  147. # - while easy_installing, an MD5 error occurs because the external
  148. # link is used
  149. # -> The index should use the link from pypi, not the external one.
  150. # start an index server
  151. index_url = server.full_address + '/simple/'
  152. # scan a test index
  153. crawler = Crawler(index_url, follow_externals=True)
  154. releases = crawler.get_releases("foobar")
  155. server.stop()
  156. # we have only one link, because links are compared without md5
  157. self.assertEqual(1, len(releases))
  158. self.assertEqual(1, len(releases[0].dists))
  159. # the link should be from the index
  160. self.assertEqual(2, len(releases[0].dists['sdist'].urls))
  161. self.assertEqual('12345678901234567',
  162. releases[0].dists['sdist'].url['hashval'])
  163. self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
  164. @use_pypi_server(static_filesystem_paths=["with_norel_links"],
  165. static_uri_paths=["simple", "external"])
  166. def test_not_scan_all_links(self, server):
  167. # Do not follow all index page links.
  168. # The links not tagged with rel="download" and rel="homepage" have
  169. # to not be processed by the package index, while processing "pages".
  170. # process the pages
  171. crawler = self._get_simple_crawler(server, follow_externals=True)
  172. crawler.get_releases("foobar")
  173. # now it should have processed only pages with links rel="download"
  174. # and rel="homepage"
  175. self.assertIn("%s/simple/foobar/" % server.full_address,
  176. crawler._processed_urls) # it's the simple index page
  177. self.assertIn("%s/external/homepage.html" % server.full_address,
  178. crawler._processed_urls) # the external homepage is rel="homepage"
  179. self.assertNotIn("%s/external/nonrel.html" % server.full_address,
  180. crawler._processed_urls) # this link contains no rel=*
  181. self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
  182. crawler._processed_urls) # linked from simple index (no rel)
  183. self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
  184. crawler._processed_urls) # linked from simple index (rel)
  185. self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
  186. crawler._processed_urls) # linked from external homepage (rel)
  187. def test_uses_mirrors(self):
  188. # When the main repository seems down, try using the given mirrors"""
  189. server = PyPIServer("foo_bar_baz")
  190. mirror = PyPIServer("foo_bar_baz")
  191. mirror.start() # we dont start the server here
  192. try:
  193. # create the index using both servers
  194. crawler = Crawler(server.full_address + "/simple/",
  195. hosts=('*',), timeout=1, # set the timeout to 1s for the tests
  196. mirrors=[mirror.full_address])
  197. # this should not raise a timeout
  198. self.assertEqual(4, len(crawler.get_releases("foo")))
  199. finally:
  200. mirror.stop()
  201. def test_simple_link_matcher(self):
  202. # Test that the simple link matcher yields the right links"""
  203. crawler = Crawler(follow_externals=False)
  204. # Here, we define:
  205. # 1. one link that must be followed, cause it's a download one
  206. # 2. one link that must *not* be followed, cause the is_browsable
  207. # returns false for it.
  208. # 3. one link that must be followed cause it's a homepage that is
  209. # browsable
  210. # 4. one link that must be followed, because it contain a md5 hash
  211. self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
  212. self.assertFalse(crawler._is_browsable("http://dl-link2"))
  213. content = """
  214. <a href="http://dl-link1" rel="download">download_link1</a>
  215. <a href="http://dl-link2" rel="homepage">homepage_link1</a>
  216. <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
  217. <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
  218. """ % {'index_url': crawler.index_url }
  219. # Test that the simple link matcher yield the good links.
  220. generator = crawler._simple_link_matcher(content, crawler.index_url)
  221. self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % crawler.index_url,
  222. True), generator.next())
  223. self.assertEqual(('http://dl-link1', True), generator.next())
  224. self.assertEqual(('%stest' % crawler.index_url, False),
  225. generator.next())
  226. self.assertRaises(StopIteration, generator.next)
  227. # Follow the external links is possible (eg. homepages)
  228. crawler.follow_externals = True
  229. generator = crawler._simple_link_matcher(content, crawler.index_url)
  230. self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % crawler.index_url,
  231. True), generator.next())
  232. self.assertEqual(('http://dl-link1', True), generator.next())
  233. self.assertEqual(('http://dl-link2', False), generator.next())
  234. self.assertEqual(('%stest' % crawler.index_url, False),
  235. generator.next())
  236. self.assertRaises(StopIteration, generator.next)
  237. def test_browse_local_files(self):
  238. # Test that we can browse local files"""
  239. index_path = os.sep.join(["file://" + PYPI_DEFAULT_STATIC_PATH,
  240. "test_found_links", "simple"])
  241. crawler = Crawler(index_path)
  242. dists = crawler.get_releases("foobar")
  243. self.assertEqual(4, len(dists))
  244. def test_get_link_matcher(self):
  245. crawler = Crawler("http://example.org")
  246. self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
  247. "http://example.org/some/file").__name__)
  248. self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
  249. "http://other-url").__name__)
  250. def test_default_link_matcher(self):
  251. crawler = Crawler("http://example.org", mirrors=[])
  252. crawler.follow_externals = True
  253. crawler._is_browsable = lambda *args:True
  254. base_url = "http://example.org/some/file/"
  255. content = """
  256. <a href="../homepage" rel="homepage">link</a>
  257. <a href="../download" rel="download">link2</a>
  258. <a href="../simpleurl">link2</a>
  259. """
  260. found_links = set(dict(crawler._default_link_matcher(content,
  261. base_url)))
  262. self.assertIn('http://example.org/some/homepage', found_links)
  263. self.assertIn('http://example.org/some/simpleurl', found_links)
  264. self.assertIn('http://example.org/some/download', found_links)
  265. @use_pypi_server("project_list")
  266. def test_search_projects(self, server):
  267. # we can search the index for some projects, on their names
  268. # the case used no matters here
  269. crawler = self._get_simple_crawler(server)
  270. tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
  271. ('foobar*', ['FooBar-bar', 'Foobar-baz']),
  272. ('*foobar', ['Baz-FooBar',]))
  273. for search, expected in tests:
  274. projects = [p.name for p in crawler.search_projects(search)]
  275. self.assertListEqual(expected, projects)
  276. def test_suite():
  277. return unittest.makeSuite(SimpleCrawlerTestCase)
  278. if __name__ == '__main__':
  279. unittest.main(defaultTest="test_suite")