PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/test_run.py

https://gitlab.com/czm1129/pyspider
Python | 336 lines | 297 code | 28 blank | 11 comment | 37 complexity | 354b03fcf898a8e91588ec8c2c8c5f52 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
  4. # Author: Binux<roy@binux.me>
  5. # http://binux.me
  6. # Created on 2014-11-21 22:32:35
  7. from __future__ import print_function
  8. import os
  9. import sys
  10. import six
  11. import time
  12. import json
  13. import signal
  14. import shutil
  15. import inspect
  16. import requests
  17. import unittest2 as unittest
  18. from pyspider import run
  19. from pyspider.libs import utils
  20. from tests import data_sample_handler
  21. class TestRun(unittest.TestCase):
  22. @classmethod
  23. def setUpClass(self):
  24. shutil.rmtree('./data/tests', ignore_errors=True)
  25. os.makedirs('./data/tests')
  26. import tests.data_test_webpage
  27. import httpbin
  28. self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
  29. self.httpbin = 'http://127.0.0.1:14887'
  30. @classmethod
  31. def tearDownClass(self):
  32. self.httpbin_thread.terminate()
  33. self.httpbin_thread.join()
  34. shutil.rmtree('./data/tests', ignore_errors=True)
  35. def test_10_cli(self):
  36. ctx = run.cli.make_context('test', [], None, obj=dict(testing_mode=True))
  37. ctx = run.cli.invoke(ctx)
  38. self.assertEqual(ctx.obj.debug, False)
  39. for db in ('taskdb', 'projectdb', 'resultdb'):
  40. self.assertIsNotNone(getattr(ctx.obj, db))
  41. for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
  42. 'fetcher2processor', 'processor2result'):
  43. self.assertIsNotNone(getattr(ctx.obj, name))
  44. self.assertEqual(len(ctx.obj.instances), 0)
  45. def test_20_cli_config(self):
  46. with open('./data/tests/config.json', 'w') as fp:
  47. json.dump({
  48. 'debug': True,
  49. 'taskdb': 'mysql+taskdb://localhost:23456/taskdb',
  50. 'amqp-url': 'amqp://guest:guest@localhost:23456/%%2F'
  51. }, fp)
  52. ctx = run.cli.make_context('test',
  53. ['--config', './data/tests/config.json'],
  54. None, obj=dict(testing_mode=True))
  55. ctx = run.cli.invoke(ctx)
  56. self.assertEqual(ctx.obj.debug, True)
  57. import mysql.connector
  58. with self.assertRaises(mysql.connector.InterfaceError):
  59. ctx.obj.taskdb
  60. with self.assertRaisesRegexp(Exception, 'Connection refused'):
  61. ctx.obj.newtask_queue
  62. def test_30_cli_command_line(self):
  63. ctx = run.cli.make_context(
  64. 'test',
  65. ['--projectdb', 'mongodb+projectdb://localhost:23456/projectdb'],
  66. None,
  67. obj=dict(testing_mode=True)
  68. )
  69. ctx = run.cli.invoke(ctx)
  70. from pymongo.errors import ConnectionFailure
  71. with self.assertRaises(ConnectionFailure):
  72. ctx.obj.projectdb
  73. def test_40_cli_env(self):
  74. try:
  75. os.environ['RESULTDB'] = 'sqlite+resultdb://'
  76. ctx = run.cli.make_context('test', [], None,
  77. obj=dict(testing_mode=True))
  78. ctx = run.cli.invoke(ctx)
  79. from pyspider.database.sqlite import resultdb
  80. self.assertIsInstance(ctx.obj.resultdb, resultdb.ResultDB)
  81. finally:
  82. del os.environ['RESULTDB']
  83. @unittest.skipIf(os.environ.get('IGNORE_RABBITMQ'), 'no rabbitmq server for test.')
  84. def test_50_docker_rabbitmq(self):
  85. try:
  86. os.environ['RABBITMQ_NAME'] = 'rabbitmq'
  87. os.environ['RABBITMQ_PORT_5672_TCP_ADDR'] = 'localhost'
  88. os.environ['RABBITMQ_PORT_5672_TCP_PORT'] = '5672'
  89. ctx = run.cli.make_context('test', [], None,
  90. obj=dict(testing_mode=True))
  91. ctx = run.cli.invoke(ctx)
  92. queue = ctx.obj.newtask_queue
  93. queue.put('abc')
  94. queue.delete()
  95. except Exception as e:
  96. self.assertIsNone(e)
  97. finally:
  98. del os.environ['RABBITMQ_NAME']
  99. del os.environ['RABBITMQ_PORT_5672_TCP_ADDR']
  100. del os.environ['RABBITMQ_PORT_5672_TCP_PORT']
  101. @unittest.skipIf(os.environ.get('IGNORE_MONGODB'), 'no mongodb server for test.')
  102. def test_60_docker_mongodb(self):
  103. try:
  104. os.environ['MONGODB_NAME'] = 'mongodb'
  105. os.environ['MONGODB_PORT_27017_TCP_ADDR'] = 'localhost'
  106. os.environ['MONGODB_PORT_27017_TCP_PORT'] = '27017'
  107. ctx = run.cli.make_context('test', [], None,
  108. obj=dict(testing_mode=True))
  109. ctx = run.cli.invoke(ctx)
  110. ctx.obj.resultdb
  111. except Exception as e:
  112. self.assertIsNone(e)
  113. finally:
  114. del os.environ['MONGODB_NAME']
  115. del os.environ['MONGODB_PORT_27017_TCP_ADDR']
  116. del os.environ['MONGODB_PORT_27017_TCP_PORT']
  117. @unittest.skip('noly available in docker')
  118. @unittest.skipIf(os.environ.get('IGNORE_MYSQL'), 'no mysql server for test.')
  119. def test_70_docker_mysql(self):
  120. try:
  121. os.environ['MYSQL_NAME'] = 'mysql'
  122. os.environ['MYSQL_PORT_3306_TCP_ADDR'] = 'localhost'
  123. os.environ['MYSQL_PORT_3306_TCP_PORT'] = '3306'
  124. ctx = run.cli.make_context('test', [], None,
  125. obj=dict(testing_mode=True))
  126. ctx = run.cli.invoke(ctx)
  127. ctx.obj.resultdb
  128. except Exception as e:
  129. self.assertIsNone(e)
  130. finally:
  131. del os.environ['MYSQL_NAME']
  132. del os.environ['MYSQL_PORT_3306_TCP_ADDR']
  133. del os.environ['MYSQL_PORT_3306_TCP_PORT']
  134. def test_80_docker_phantomjs(self):
  135. try:
  136. os.environ['PHANTOMJS_NAME'] = 'phantomjs'
  137. os.environ['PHANTOMJS_PORT_25555_TCP'] = 'tpc://binux:25678'
  138. ctx = run.cli.make_context('test', [], None,
  139. obj=dict(testing_mode=True))
  140. ctx = run.cli.invoke(ctx)
  141. self.assertEqual(ctx.obj.phantomjs_proxy, 'binux:25678')
  142. except Exception as e:
  143. self.assertIsNone(e)
  144. finally:
  145. del os.environ['PHANTOMJS_NAME']
  146. del os.environ['PHANTOMJS_PORT_25555_TCP']
  147. def test_90_docker_scheduler(self):
  148. try:
  149. os.environ['SCHEDULER_NAME'] = 'scheduler'
  150. os.environ['SCHEDULER_PORT_23333_TCP'] = 'tpc://binux:25678'
  151. ctx = run.cli.make_context('test', [], None,
  152. obj=dict(testing_mode=True))
  153. ctx = run.cli.invoke(ctx)
  154. webui = run.cli.get_command(ctx, 'webui')
  155. webui_ctx = webui.make_context('webui', [], ctx)
  156. app = webui.invoke(webui_ctx)
  157. rpc = app.config['scheduler_rpc']
  158. self.assertEqual(rpc._ServerProxy__host, 'binux:25678')
  159. except Exception as e:
  160. self.assertIsNone(e)
  161. finally:
  162. del os.environ['SCHEDULER_NAME']
  163. del os.environ['SCHEDULER_PORT_23333_TCP']
  164. def test_a100_all(self):
  165. import subprocess
  166. #cmd = [sys.executable]
  167. cmd = ['coverage', 'run']
  168. p = subprocess.Popen(cmd+[
  169. inspect.getsourcefile(run),
  170. '--taskdb', 'sqlite+taskdb:///data/tests/all_test_task.db',
  171. '--resultdb', 'sqlite+resultdb:///data/tests/all_test_result.db',
  172. '--projectdb', 'local+projectdb://'+inspect.getsourcefile(data_sample_handler),
  173. 'all',
  174. ], close_fds=True, preexec_fn=os.setsid)
  175. try:
  176. limit = 30
  177. while limit >= 0:
  178. time.sleep(3)
  179. # click run
  180. try:
  181. requests.post('http://localhost:5000/run', data={
  182. 'project': 'data_sample_handler',
  183. })
  184. except requests.exceptions.ConnectionError:
  185. limit -= 1
  186. continue
  187. break
  188. limit = 30
  189. data = requests.get('http://localhost:5000/counter')
  190. self.assertEqual(data.status_code, 200)
  191. while data.json().get('data_sample_handler', {}).get('5m', {}).get('success', 0) < 5:
  192. time.sleep(1)
  193. data = requests.get('http://localhost:5000/counter')
  194. limit -= 1
  195. if limit <= 0:
  196. break
  197. self.assertGreater(limit, 0)
  198. rv = requests.get('http://localhost:5000/results?project=data_sample_handler')
  199. self.assertIn('<th>url</th>', rv.text)
  200. self.assertIn('class=url', rv.text)
  201. except:
  202. raise
  203. finally:
  204. time.sleep(1)
  205. os.killpg(p.pid, signal.SIGTERM)
  206. p.wait()
  207. def test_a110_one(self):
  208. pid, fd = os.forkpty()
  209. #cmd = [sys.executable]
  210. cmd = ['coverage', 'run']
  211. cmd += [
  212. inspect.getsourcefile(run),
  213. 'one',
  214. '-i',
  215. inspect.getsourcefile(data_sample_handler)
  216. ]
  217. if pid == 0:
  218. # child
  219. os.execvp(cmd[0], cmd)
  220. else:
  221. # parent
  222. def wait_text(timeout=1):
  223. import select
  224. text = []
  225. while True:
  226. rl, wl, xl = select.select([fd], [], [], timeout)
  227. if not rl:
  228. break
  229. try:
  230. t = os.read(fd, 1024)
  231. except OSError:
  232. break
  233. if not t:
  234. break
  235. t = utils.text(t)
  236. text.append(t)
  237. print(t, end='')
  238. return ''.join(text)
  239. text = wait_text(3)
  240. self.assertIn('new task data_sample_handler:on_start', text)
  241. self.assertIn('pyspider shell', text)
  242. os.write(fd, utils.utf8('run()\n'))
  243. text = wait_text()
  244. self.assertIn('task done data_sample_handler:on_start', text)
  245. os.write(fd, utils.utf8('crawl("%s/pyspider/test.html")\n' % self.httpbin))
  246. text = wait_text()
  247. self.assertIn('/robots.txt', text)
  248. os.write(fd, utils.utf8('crawl("%s/links/10/0")\n' % self.httpbin))
  249. text = wait_text()
  250. if '"title": "Links"' not in text:
  251. os.write(fd, utils.utf8('crawl("%s/links/10/1")\n' % self.httpbin))
  252. text = wait_text()
  253. self.assertIn('"title": "Links"', text)
  254. os.write(fd, utils.utf8('crawl("%s/404")\n' % self.httpbin))
  255. text = wait_text()
  256. self.assertIn('task retry', text)
  257. os.write(fd, b'quit_pyspider()\n')
  258. text = wait_text()
  259. self.assertIn('scheduler exiting...', text)
  260. os.close(fd)
  261. os.kill(pid, signal.SIGINT)
  262. class TestSendMessage(unittest.TestCase):
  263. @classmethod
  264. def setUpClass(self):
  265. shutil.rmtree('./data/tests', ignore_errors=True)
  266. os.makedirs('./data/tests')
  267. ctx = run.cli.make_context('test', [
  268. '--taskdb', 'sqlite+taskdb:///data/tests/task.db',
  269. '--projectdb', 'sqlite+projectdb:///data/tests/projectdb.db',
  270. '--resultdb', 'sqlite+resultdb:///data/tests/resultdb.db',
  271. ], None, obj=dict(testing_mode=True))
  272. self.ctx = run.cli.invoke(ctx)
  273. ctx = run.scheduler.make_context('scheduler', [], self.ctx)
  274. scheduler = run.scheduler.invoke(ctx)
  275. utils.run_in_thread(scheduler.xmlrpc_run)
  276. utils.run_in_thread(scheduler.run)
  277. time.sleep(1)
  278. @classmethod
  279. def tearDownClass(self):
  280. for each in self.ctx.obj.instances:
  281. each.quit()
  282. time.sleep(1)
  283. shutil.rmtree('./data/tests', ignore_errors=True)
  284. def test_10_send_message(self):
  285. ctx = run.send_message.make_context('send_message', [
  286. 'test_project', 'test_message'
  287. ], self.ctx)
  288. self.assertTrue(run.send_message.invoke(ctx))
  289. while True:
  290. task = self.ctx.obj.scheduler2fetcher.get(timeout=1)
  291. if task['url'] == 'data:,on_message':
  292. break
  293. self.assertEqual(task['process']['callback'], '_on_message')