PageRenderTime 60ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/qp/hub/dispatcher.py

https://bitbucket.org/pfw/durusworks
Python | 454 lines | 371 code | 24 blank | 59 comment | 70 complexity | 9ad20e5c78c40a5f6600cf0cfdbe31a1 MD5 | raw file
  1. """
  2. open/DurusWorks/qp/hub/dispatcher.py
  3. For systems that support passfd, this provides
  4. a pre-forking server that uses file descriptor passing to off-load
  5. requests to child worker processes.
  6. """
  7. try:
  8. from qp.hub import passfd
  9. from qp.hub.passfd import recvfd, sendfd, socketpair
  10. except ImportError:
  11. # If passfd is not installed, we can still run in non-forking mode.
  12. passfd = None
  13. from durus.utils import as_bytes
  14. from qp.lib.util import as_str
  15. from select import select, error as select_error
  16. from errno import EPIPE, EWOULDBLOCK, EINTR
  17. from fcntl import fcntl, F_GETFL, F_SETFL
  18. from socket import SOCK_STREAM, AF_UNIX, SOL_SOCKET, SO_REUSEADDR, AF_INET
  19. from socket import socket, fromfd, MSG_PEEK, error as socket_error
  20. from stat import ST_MTIME
  21. from os import close, getpid, fork, O_NONBLOCK, read, write, waitpid, stat
  22. from os import environ, WNOHANG, execve, kill
  23. import errno
  24. import re
  25. import signal
  26. import sys
  27. def log(s):
  28. sys.stdout.write("hub[%s] %s\n" % (getpid(), s))
  29. sys.stdout.flush()
  30. def select_ready(fds):
  31. try:
  32. return select(fds, [], [], None)[0]
  33. except select_error:
  34. e = sys.exc_info()[1]
  35. if e[0] == EINTR: # got a signal, try again
  36. return []
  37. raise
  38. class Child (object):
  39. """
  40. This represents a child process that is running a service loop.
  41. """
  42. __slots__ = ['pid', 'fd', 'client_address', 'client_cookie',
  43. 'ready_for_connection', 'close_has_been_called']
  44. def __init__(self, pid, child_fd):
  45. self.pid = pid
  46. self.fd = child_fd
  47. self.client_address = None
  48. self.client_cookie = None
  49. self.ready_for_connection = False
  50. self.close_has_been_called = False
  51. def fileno(self):
  52. return self.fd
  53. def close(self):
  54. """
  55. Close the connection to the child process.
  56. """
  57. self.close_has_been_called = True
  58. try:
  59. close(self.fd)
  60. except OSError:
  61. log("close() failed for %s" % self.pid)
  62. def wait(self):
  63. """
  64. Wait for the child process to terminate.
  65. """
  66. log("Wait for %s" % self.pid)
  67. waitpid(self.pid, 0)
  68. log("%s is done." % self.pid)
  69. def kill(self):
  70. try:
  71. kill(self.pid, signal.SIGKILL)
  72. log("kill() %s succeeded" % self.pid)
  73. except OSError:
  74. log("kill() %s failed" % self.pid)
  75. self.close()
  76. def is_ready_for_connection(self):
  77. return self.ready_for_connection
  78. def update_ready_for_connection(self):
  79. if not self.ready_for_connection:
  80. ready_byte = ''
  81. try:
  82. ready_byte = read(self.fd, 1)
  83. except socket_error:
  84. exc = sys.exc_info()[1]
  85. if exc[0] != EWOULDBLOCK:
  86. raise
  87. except (OSError, IOError):
  88. pass
  89. if not ready_byte:
  90. self.close()
  91. else:
  92. self.ready_for_connection = (ready_byte == as_bytes('1'))
  93. def send_connection(self, conn, connection_address, connection_cookie):
  94. if not self.is_ready_for_connection():
  95. return False
  96. try:
  97. sendfd(self.fd, conn.fileno())
  98. except IOError:
  99. exc = sys.exc_info()[1]
  100. if exc.errno == EPIPE:
  101. return False
  102. else:
  103. raise
  104. self.client_address = connection_address
  105. self.client_cookie = connection_cookie
  106. self.ready_for_connection = False
  107. conn.close()
  108. return True
  109. def get_peername_address(connection):
  110. try:
  111. address = connection.getpeername()[0]
  112. except socket_error:
  113. log("get_peername_address: %s" % sys.exc_info()[1])
  114. address = None
  115. return address
  116. class HubServer (object):
  117. banned_msg = (
  118. 'HTTP/1.0 403 Forbidden\r\n'
  119. 'Content-Type: text/plain\r\n\r\n'
  120. 'Your IP address has been banned from accessing this site.\n'
  121. 'Please email the webmaster here if you need the ban removed.')
  122. def __init__(self, site_name, max_children=5, banned=None,
  123. busy_limit=1, dispatcher_command_prefix=None):
  124. """(site_name:string,
  125. max_children:int=5,
  126. banned:str|None=None,
  127. busy_limit:int=1,
  128. dispatcher_command_prefix:str|None=None)
  129. max_children is the maximum number of child processes. If the value
  130. of max_children is 0, the server runs in non-forking mode.
  131. banned is the path to a file, if any, containing IP addresses from
  132. which connections are not permitted.
  133. busy_limit is the maximum number of processes that may be kept
  134. busy by connections from any single IP address.
  135. dispatcher_command_prefix is a uri prefix for dispatcher commands,
  136. if dispatcher commands are enabled.
  137. """
  138. self.site_name = site_name
  139. if passfd:
  140. self.max_children = max_children
  141. else:
  142. # We can't pass file descriptors.
  143. # Fall back to non-forking mode.
  144. log("No passfd installed. Using non-forking mode.")
  145. self.max_children = 0
  146. self.children = []
  147. self.banned = banned
  148. self.banned_time = -1
  149. self.banned_addresses = set()
  150. self.connection_queue = []
  151. self.busy_limit = busy_limit
  152. self.dispatcher_command_prefix = dispatcher_command_prefix
  153. def spawn_child(self):
  154. """
  155. Unless we have reached the maximum number, start up a new child
  156. process.
  157. """
  158. if len(self.children) < self.max_children:
  159. parent_fd, child_fd = socketpair(AF_UNIX, SOCK_STREAM)
  160. # make child fd non-blocking
  161. flags = fcntl(child_fd, F_GETFL, 0)
  162. fcntl(child_fd, F_SETFL, flags | O_NONBLOCK)
  163. pid = fork()
  164. if pid == 0: # child
  165. close(child_fd)
  166. for s in self.listening_sockets + self.connection_queue:
  167. s.close()
  168. import qp.hub.web
  169. args = [sys.executable, qp.hub.web.__file__,
  170. self.site_name, str(parent_fd)]
  171. execve(sys.executable, args, environ)
  172. close(parent_fd)
  173. self.children.append(Child(pid, child_fd))
  174. log("Child [%s] %s of %s started." % (
  175. pid, len(self.children), self.max_children))
  176. def reap_children(self):
  177. """
  178. Check to see if any of the child processes have died, and if so,
  179. remove them from the list.
  180. """
  181. while self.children:
  182. (pid, status) = waitpid(-1, WNOHANG)
  183. if pid <= 0:
  184. break
  185. for child in self.children:
  186. if child.pid == pid:
  187. log("reap %s" % pid)
  188. child.close()
  189. self.children.remove(child)
  190. break
  191. def do_stop(self):
  192. """
  193. This may be called from a signal handler to stop the child processes.
  194. """
  195. log("stopping children")
  196. for child in self.children:
  197. child.close()
  198. for child in self.children:
  199. child.wait()
  200. self.children = []
  201. log("children stopped")
  202. remote_address_re = re.compile('.*REMOTE_ADDR.([.\d]+)')
  203. x_forwarded_for_re = re.compile('.*X_FORWARDED_FOR: ([.\d]+)')
  204. def get_connection_headers(self, connection):
  205. try:
  206. connection.settimeout(0.1)
  207. headers = connection.recv(2048, MSG_PEEK)
  208. connection.settimeout(3)
  209. except socket_error:
  210. exc_type, exc_value, exc_tb = sys.exc_info()
  211. if str(exc_value) != 'timed out':
  212. log("in get_connection_headers: %s" % exc_value)
  213. headers = ''
  214. headers = as_str(headers)
  215. #log("in get_connection_headers: %r" % headers)
  216. return headers
  217. def get_connection_address(self, connection, headers):
  218. if hasattr(connection, 'ssl_version'):
  219. return get_peername_address(connection)
  220. address = None
  221. address_match = (self.remote_address_re.match(headers) or
  222. self.x_forwarded_for_re.match(headers))
  223. if address_match:
  224. address = address_match.group(1)
  225. else:
  226. address = get_peername_address(connection)
  227. return address
  228. cookie_re = re.compile('.*[Cc][Oo][Oo][Kk][Ii][Ee]:?[\x00| ]([^\r\n\x00]+)')
  229. def get_connection_cookie(self, headers):
  230. cookie_search = self.cookie_re.search(headers)
  231. if cookie_search:
  232. cookie = cookie_search.group(1)
  233. else:
  234. cookie = None
  235. #log("in get_connection_cookie: %r" % cookie)
  236. return cookie
  237. def delegated(self, connection):
  238. """
  239. Try to find a suitable child process to handle this connection,
  240. and pass it to the child. If sucessful, return True.
  241. Otherwise, return False and wait for another chance.
  242. """
  243. headers = self.get_connection_headers(connection)
  244. connection_address = self.get_connection_address(connection, headers)
  245. if connection_address in self.banned_addresses:
  246. # Shut up. Go away.
  247. connection.send(self.banned_msg)
  248. connection.close()
  249. return True
  250. connection_cookie = self.get_connection_cookie(headers)
  251. children_with_this_address_and_cookie = []
  252. if connection_cookie is None and connection_address in (None, '127.0.0.1'):
  253. for child in self.children:
  254. if child.send_connection(connection, None, connection_cookie):
  255. return True
  256. else:
  257. children_with_this_address_and_cookie = [
  258. child for child in self.children
  259. if child.client_address == connection_address and child.client_cookie == connection_cookie]
  260. for child in children_with_this_address_and_cookie:
  261. if child.send_connection(connection, connection_address, connection_cookie):
  262. return True
  263. if len(children_with_this_address_and_cookie) < self.busy_limit:
  264. for child in self.children:
  265. if ((child.client_address != connection_address or child.client_cookie != connection_cookie) and
  266. child.send_connection(connection, connection_address, connection_cookie)):
  267. return True
  268. # Maybe a child process died?
  269. self.reap_children()
  270. if len(children_with_this_address_and_cookie) < self.busy_limit:
  271. # Spawn a new child if we haven't reached the max_children limit.
  272. self.spawn_child()
  273. return False
  274. def listen(self, *addresses):
  275. """
  276. Set self.listening_sockets to a list of sockets, bound the the given
  277. addresses and listening for new connections.
  278. To allow quick restarts without re-binding sockets, this method will,
  279. instead of doing the usual thing, use an environment variable to identify
  280. file descriptors for sockets that are presumed to already be bound to the
  281. given set of addresses.
  282. """
  283. name = 'QP_HUBSERVER_FILE_DESCRIPTORS'
  284. sockets = []
  285. addresses = [a for a in addresses if a]
  286. if environ.get(name):
  287. fds = [int(c) for c in environ.get(name).split(',')]
  288. assert len(fds) == len(addresses)
  289. sockets = [fromfd(fd, AF_INET, SOCK_STREAM)
  290. for fd in fds]
  291. if not sockets:
  292. def get_socket_bound_to_address(address):
  293. s = socket(AF_INET, SOCK_STREAM)
  294. s.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
  295. s.bind(address[:2])
  296. s.listen(128)
  297. log("LISTEN %s" % repr(address))
  298. return s
  299. sockets = [get_socket_bound_to_address(address)
  300. for address in addresses]
  301. environ[name] = ','.join([str(s.fileno()) for s in sockets])
  302. self.listening_sockets = sockets
  303. def update_banned_addresses(self):
  304. """
  305. Consult the file named by self.banned, if any, and update
  306. self.banned_addresses to reflect the current content of the
  307. file.
  308. """
  309. if self.banned:
  310. try:
  311. mtime = stat(self.banned)[ST_MTIME]
  312. except OSError:
  313. pass
  314. else:
  315. if mtime > self.banned_time:
  316. self.banned_time = mtime
  317. f = open(self.banned)
  318. new_banned_addresses = set(
  319. [a.strip() for a in f.read().split()])
  320. f.close()
  321. for address in (self.banned_addresses -
  322. new_banned_addresses):
  323. log('UNBANNED: %s' % address)
  324. for address in (new_banned_addresses -
  325. self.banned_addresses):
  326. log('BANNED: %s' % address)
  327. self.banned_addresses = new_banned_addresses
  328. def remove_hogs(self):
  329. connections_by_ip_address = {}
  330. for c in self.connection_queue:
  331. address = self.get_connection_address(c)
  332. if address not in connections_by_ip_address:
  333. connections_by_ip_address[address] = []
  334. connections_by_ip_address[address].append(c)
  335. def value_length(x):
  336. return len(x[1])
  337. by_frequency = sorted(
  338. connections_by_ip_address.items(), key=value_length)
  339. hog_address, hogs = by_frequency[-1]
  340. log("REMOVE %s HOGS FROM %s" % (len(hogs), hog_address))
  341. try:
  342. for c in hogs:
  343. c.close()
  344. except:
  345. pass # Don't let the server die for this.
  346. for child in list(self.children):
  347. if child.client_address == hog_address:
  348. child.kill()
  349. self.children.remove(child)
  350. self.connection_queue = [
  351. c for c in self.connection_queue if c not in hogs]
  352. def run(self, handle_connection=None):
  353. """
  354. The main service loop of the parent.
  355. """
  356. while True:
  357. if not handle_connection and not self.children:
  358. self.spawn_child()
  359. if len(self.connection_queue) > 50:
  360. self.remove_hogs()
  361. self.children = [
  362. child for child in self.children
  363. if not child.close_has_been_called]
  364. # Accept any new connections.
  365. to_check = self.children + self.listening_sockets
  366. self.update_banned_addresses()
  367. if len(self.connection_queue) > 0:
  368. log("connections in queue: %s." % len(self.connection_queue))
  369. # Wait until there is something to read.
  370. ready = select_ready(to_check)
  371. for s in ready:
  372. if s in self.listening_sockets:
  373. try:
  374. conn, addr = s.accept()
  375. except socket_error:
  376. e = sys.exc_info()[1]
  377. if e[0] != EINTR: # signal
  378. raise
  379. self.connection_queue.append(conn)
  380. # Go ahead and read the ready-byte from children that
  381. # appear to have sent it.
  382. for child in self.children:
  383. if child in ready:
  384. child.update_ready_for_connection()
  385. # Now try to actually handle connections in the queue.
  386. if handle_connection and self.max_children == 0:
  387. # We are in non-forking mode. Handle waiting connections now.
  388. for c in self.connection_queue:
  389. handle_connection(c)
  390. c.close()
  391. self.connection_queue = []
  392. elif self.connection_queue:
  393. self.connection_queue = [
  394. c for c in self.connection_queue
  395. if not self.delegated(c)]
  396. def worker(handle_connection, parent_fd):
  397. parent_fd = int(parent_fd)
  398. while True:
  399. try:
  400. # Tell the parent that we are ready.
  401. write(parent_fd, as_bytes("1"))
  402. # Receive the descriptor for the connection
  403. fd = recvfd(parent_fd)
  404. except (IOError, OSError):
  405. # The parent probably exited
  406. # (EPIPE comes thru as OSError).
  407. log("Terminating %s." % getpid())
  408. raise SystemExit
  409. # Make a blocking socket from the file descriptor.
  410. conn = fromfd(fd, AF_INET, SOCK_STREAM)
  411. conn.setblocking(1)
  412. close(fd)
  413. handle_connection(conn)
  414. conn.close()