PageRenderTime 47ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/pyhs/sockets.py

https://bitbucket.org/excieve/pyhs/
Python | 511 lines | 482 code | 11 blank | 18 comment | 15 complexity | e898f9d0e6a0ff28e07a1653cb0606f1 MD5 | raw file
Possible License(s): MIT
  1. import socket
  2. import threading
  3. import time
  4. import random
  5. from itertools import imap, chain
  6. try:
  7. from _speedups import encode, decode
  8. except ImportError:
  9. from utils import encode, decode
  10. from utils import check_columns
  11. from exceptions import *
  12. class Connection(object):
  13. """Single HandlerSocket connection.
  14. Maintains a streamed socket connection and defines methods to send and
  15. read data from it.
  16. In case of failure :attr:`~.retry_time` will be set to the exact time after
  17. which the connection may be retried to deal with temporary connection issues.
  18. """
  19. UNIX_PROTO = 'unix'
  20. INET_PROTO = 'inet'
  21. DEFAULT_TIMEOUT = 3
  22. RETRY_INTERVAL = 30
  23. def __init__(self, protocol, host, port=None, timeout=None):
  24. """
  25. :param string protocol: socket protocol (*'unix'* and *'inet'* are supported).
  26. :param string host: server host for *'inet'* protocol or socket file path for *'unix'*.
  27. :param port: server port for *'inet'* protocol connection.
  28. :type port: integer or None
  29. :param timeout: timeout value for socket, default is defined in
  30. :const:`.DEFAULT_TIMEOUT`.
  31. :type timeout: integer or None
  32. """
  33. self.timeout = timeout or self.DEFAULT_TIMEOUT
  34. self.host = host
  35. if protocol == self.UNIX_PROTO:
  36. self.protocol = socket.AF_UNIX
  37. self.address = self.host
  38. elif protocol == self.INET_PROTO:
  39. self.protocol = socket.AF_INET
  40. if not port:
  41. raise ValueError('Port is not specified for TCP connection')
  42. self.address = (self.host, port)
  43. else:
  44. raise ValueError('Unsupported protocol')
  45. self.socket = None
  46. self.retry_time = 0
  47. self.debug = False
  48. def set_debug_mode(self, mode):
  49. """Changes debugging mode of the connection.
  50. If enabled, some debugging info will be printed to stdout.
  51. :param bool mode: mode value
  52. """
  53. self.debug = mode
  54. def connect(self):
  55. """Establishes connection with a new socket. If some socket is
  56. associated with the instance - no new socket will be created.
  57. """
  58. if self.socket:
  59. return
  60. try:
  61. sock = socket.socket(self.protocol, socket.SOCK_STREAM)
  62. # Disable Nagle algorithm to improve latency:
  63. # http://developers.slashdot.org/comments.pl?sid=174457&threshold=1&commentsort=0&mode=thread&cid=14515105
  64. sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  65. sock.settimeout(self.timeout)
  66. sock.connect(self.address)
  67. except socket.error, e:
  68. self._die(e, 'Connection error')
  69. self.socket = sock
  70. def _die(self, e, msg='Socket error'):
  71. """Disconnects from the host and assigns failure retry time. Throws a
  72. :exc:`~.exceptions.ConnectionError` exception with failure details.
  73. This is a private method and is meant to be used for any connection
  74. failures.
  75. :param e: original exception that caused connection failure.
  76. :type e: :exc:`socket.error`
  77. :param msg: optional exception message to indentify operation that was
  78. being in process (e.g. 'Read error').
  79. :type msg: string or None
  80. """
  81. self.retry_time = time.time() + self.RETRY_INTERVAL
  82. self.disconnect()
  83. exmsg = len(e.args) == 1 and e.args[0] or e.args[1]
  84. raise ConnectionError("%s: %s" % (msg, exmsg))
  85. def is_ready(self):
  86. """Checks if connection instance is ready to be used.
  87. :rtype: bool
  88. """
  89. if self.retry_time and self.retry_time > time.time():
  90. return False
  91. self.retry_time = 0
  92. return True
  93. def disconnect(self):
  94. """Closes a socket and disassociates it from the connection instance.
  95. .. note:: It ignores any socket exceptions that might happen in process.
  96. """
  97. if self.socket:
  98. try:
  99. self.socket.close()
  100. except socket.error:
  101. pass
  102. self.socket = None
  103. def readline(self):
  104. """Reads one line from the socket stream and returns it.
  105. Lines are expected to be delimited with LF.
  106. Throws :exc:`~.exceptions.ConnectionError` in case of failure.
  107. :rtype: string
  108. .. note:: Currently Connection class supports only one line per
  109. request/response. All data in the stream after first LF will be ignored.
  110. """
  111. buffer = ''
  112. index = -1
  113. while True:
  114. index = buffer.find('\n')
  115. if index >= 0:
  116. break
  117. try:
  118. data = self.socket.recv(4096)
  119. if self.debug:
  120. print "DEBUG: read data bucket: %s" % data
  121. if not data:
  122. raise RecoverableConnectionError('Connection closed on the remote end.')
  123. except socket.error, e:
  124. self._die(e, 'Read error')
  125. buffer += data
  126. return buffer[:index]
  127. def send(self, data):
  128. """Sends all given data into the socket stream.
  129. Throws :exc:`~.exceptions.ConnectionError` in case of failure.
  130. :param string data: data to send
  131. """
  132. try:
  133. self.socket.sendall(data)
  134. if self.debug:
  135. print "DEBUG: sent data: %s" % data
  136. except socket.error, e:
  137. self._die(e, 'Send error')
  138. class HandlerSocket(threading.local):
  139. """Pool of HandlerSocket connections.
  140. Manages connections and defines common HandlerSocket operations.
  141. Uses internal index id cache.
  142. Subclasses :class:`threading.local` to put connection pool and indexes data
  143. in thread-local storage as they're not safe to share between threads.
  144. .. warning::
  145. Shouldn't be used directly in most cases.
  146. Use :class:`~.ReadSocket` for read operations and :class:`~.WriteSocket` for
  147. writes.
  148. """
  149. RETRY_LIMIT = 5
  150. FIND_OPERATIONS = ('=', '>', '>=', '<', '<=')
  151. def __init__(self, servers, debug=False):
  152. """Pool constructor initializes connections for all given HandlerSocket servers.
  153. :param iterable servers: a list of lists that define server data,
  154. *format*: ``(protocol, host, port, timeout)``.
  155. See :class:`~.Connection` for details.
  156. :param bool debug: enable or disable debug mode, default is ``False``.
  157. """
  158. self.connections = []
  159. for server in servers:
  160. conn = Connection(*server)
  161. conn.set_debug_mode(debug)
  162. self.connections.append(conn)
  163. self._clear_caches()
  164. def _clear_caches(self):
  165. """Clears index cache, connection map, index id counter and last cached
  166. exception.
  167. Private method.
  168. """
  169. self.index_map = {}
  170. self.current_index_id = 0
  171. self.index_cache = {}
  172. self.last_connection_exception = None
  173. def _get_connection(self, index_id=None, force_index=False):
  174. """Returns active connection from the pool.
  175. It will retry available connections in case of connection failure. Max
  176. retry limit is defined in :const:`~.RETRY_LIMIT`.
  177. In case of connection failure on all available servers will raise
  178. :exc:`~.exceptions.ConnectionError`. If ``force_index`` is set, it will
  179. try only one connection that was used to open given ``index_id``. If that
  180. fails will throw :exc:`~.exceptions.RecoverableConnectionError`.
  181. :param index_id: index id to look up connection for, if ``None`` (default)
  182. or not found a new connection will be returned.
  183. :type index_id: integer or None
  184. :param bool force_index: if ``True`` will ensure that only a connection
  185. that was used to open ``index id`` would be returned, will raise
  186. :exc:`~.exceptions.OperationalError` otherwise.
  187. :rtype: :class:`~.Connection` instance
  188. """
  189. connections = self.connections[:]
  190. random.shuffle(connections)
  191. # Try looking up for index_id in index_map - we should use same connections
  192. # for opened indexes and operations using them
  193. if index_id is not None and index_id in self.index_map:
  194. conn = self.index_map[index_id]
  195. else:
  196. if force_index:
  197. raise OperationalError('There is no connection with given index id "%d"' % index_id)
  198. conn = connections.pop()
  199. exception = lambda exc: ConnectionError('Could not connect to any of given servers: %s'\
  200. % exc.args[0])
  201. # Retry until either limit is reached or all connections tried
  202. for i in range(max(self.RETRY_LIMIT, len(connections))):
  203. try:
  204. if conn.is_ready():
  205. conn.connect()
  206. break
  207. except ConnectionError, e:
  208. self.last_connection_exception = e
  209. # In case indexed connection is forced remove it from the caches
  210. # and raise exception so higher level code could retry whole operation
  211. if force_index:
  212. self.purge_index(index_id)
  213. if connections:
  214. raise RecoverableConnectionError('Could not use connection with given index id "%d"' % index_id)
  215. else:
  216. # No point retrying if no more connections are available
  217. raise exception(self.last_connection_exception)
  218. if connections:
  219. conn = connections.pop()
  220. else:
  221. raise exception(self.last_connection_exception)
  222. # If we have an index id, save a relation between it and a connection
  223. if index_id is not None:
  224. self.index_map[index_id] = conn
  225. return conn
  226. def _parse_response(self, raw_data):
  227. """Parses HandlerSocket response data.
  228. Returns a list of result rows which are lists of result columns.
  229. Raises :exc:`~.exceptions.OperationalError` in case data contains
  230. a HS error code.
  231. Private method.
  232. :param string raw_data: data string returned by HS server.
  233. :rtype: list
  234. """
  235. tokens = raw_data.split('\t')
  236. if not len(tokens) or int(tokens[0]) != 0:
  237. error = 'Unknown remote error'
  238. if len(tokens) > 2:
  239. error = tokens[2]
  240. raise OperationalError('HandlerSocket returned an error code: %s' % error)
  241. columns = int(tokens[1])
  242. decoded_tokens = imap(decode, tokens[2:])
  243. # Divide response tokens list by number of columns
  244. data = zip(*[decoded_tokens]*columns)
  245. return data
  246. def _open_index(self, index_id, db, table, fields, index_name):
  247. """Calls open index query on HandlerSocket.
  248. This is a required first operation for any read or write usages.
  249. Private method.
  250. :param integer index_id: id number that will be associated with opened index.
  251. :param string db: database name.
  252. :param string table: table name.
  253. :param string fields: comma-separated list of table's fields that would
  254. be used in further operations. Fields that are part of opened index
  255. must be present in the same order they are declared in the index.
  256. :param string index_name: name of the index.
  257. :rtype: list
  258. """
  259. encoded = imap(encode, (db, table, index_name, fields))
  260. query = chain(('P', str(index_id)), encoded)
  261. response = self._call(index_id, query)
  262. return response
  263. def get_index_id(self, db, table, fields, index_name=None):
  264. """Returns index id for given index data. This id must be used in all
  265. operations that use given data.
  266. Uses internal index cache that keys index ids on a combination of:
  267. ``db:table:index_name:fields``.
  268. In case no index was found in the cache, a new index will be opened.
  269. .. note:: ``fields`` is position-dependent, so change of fields order will open
  270. a new index with another index id.
  271. :param string db: database name.
  272. :param string table: table name.
  273. :param iterable fields: list of table's fields that would be used in further
  274. operations. See :meth:`._open_index` for more info on fields order.
  275. :param index_name: name of the index, default is ``PRIMARY``.
  276. :type index_name: string or None
  277. :rtype: integer or None
  278. """
  279. index_name = index_name or 'PRIMARY'
  280. fields = ','.join(fields)
  281. cache_key = ':'.join((db, table, index_name, fields))
  282. index_id = self.index_cache.get(cache_key)
  283. if index_id is not None:
  284. return index_id
  285. response = self._open_index(self.current_index_id, db, table, fields, index_name)
  286. if response is not None:
  287. index_id = self.current_index_id
  288. self.index_cache[cache_key] = index_id
  289. self.current_index_id += 1
  290. return index_id
  291. return None
  292. def purge_indexes(self):
  293. """Closes all indexed connections, cleans caches, zeroes index id counter.
  294. """
  295. for conn in self.index_map.values():
  296. conn.disconnect()
  297. self._clear_caches()
  298. def purge(self):
  299. """Closes all connections, cleans caches, zeroes index id counter."""
  300. for conn in self.connections:
  301. conn.disconnect()
  302. self._clear_caches()
  303. def purge_index(self, index_id):
  304. """Clear single index connection and cache.
  305. :param integer index_id: id of the index to purge.
  306. """
  307. del self.index_map[index_id]
  308. for key, value in self.index_cache.items():
  309. if value == index_id:
  310. del self.index_cache[key]
  311. def _call(self, index_id, query, force_index=False):
  312. """Helper that performs actual data exchange with HandlerSocket server.
  313. Returns parsed response data.
  314. :param integer index_id: id of the index to operate on.
  315. :param iterable query: list/iterable of tokens ready for sending.
  316. :param bool force_index: pass ``True`` when operation requires connection
  317. with given ``index_id`` to work. This is usually everything except
  318. index opening. See :meth:`~._get_connection`.
  319. :rtype: list
  320. """
  321. conn = self._get_connection(index_id, force_index)
  322. try:
  323. conn.send('\t'.join(query)+'\n')
  324. response = self._parse_response(conn.readline())
  325. except ConnectionError, e:
  326. self.purge_index(index_id)
  327. raise e
  328. return response
  329. class ReadSocket(HandlerSocket):
  330. """HandlerSocket client for read operations."""
  331. def find(self, index_id, operation, columns, limit=0, offset=0):
  332. """Finds row(s) via opened index.
  333. Raises ``ValueError`` if given data doesn't validate.
  334. :param integer index_id: id of opened index.
  335. :param string operation: logical comparison operation to use over ``columns``.
  336. Currently allowed operations are defined in :const:`~.FIND_OPERATIONS`.
  337. Only one operation is allowed per call.
  338. :param iterable columns: list of column values for comparison operation.
  339. List must be ordered in the same way as columns are defined
  340. in opened index.
  341. :param integer limit: optional limit of results to return. Default is
  342. one row. In case multiple results are expected, ``limit`` must be
  343. set explicitly, HS wont return all found rows by default.
  344. :param integer offset: optional offset of rows to search for.
  345. :rtype: list
  346. """
  347. if operation not in self.FIND_OPERATIONS:
  348. raise ValueError('Operation is not supported.')
  349. if not check_columns(columns):
  350. raise ValueError('Columns must be a non-empty iterable.')
  351. query = chain(
  352. (str(index_id), operation, str(len(columns))),
  353. imap(encode, columns),
  354. (str(limit), str(offset))
  355. )
  356. response = self._call(index_id, query, force_index=True)
  357. return response
  358. class WriteSocket(HandlerSocket):
  359. """HandlerSocket client for write operations."""
  360. MODIFY_OPERATIONS = ('U', 'D', '+', '-', 'U?', 'D?', '+?', '-?')
  361. def find_modify(self, index_id, operation, columns, modify_operation,
  362. modify_columns=[], limit=0, offset=0):
  363. """Updates/deletes row(s) using opened index.
  364. Returns number of modified rows or a list of original values in case
  365. ``modify_operation`` ends with ``?``.
  366. Raises ``ValueError`` if given data doesn't validate.
  367. :param integer index_id: id of opened index.
  368. :param string operation: logical comparison operation to use over ``columns``.
  369. Currently allowed operations are defined in :const:`~.FIND_OPERATIONS`.
  370. Only one operation is allowed per call.
  371. :param iterable columns: list of column values for comparison operation.
  372. List must be ordered in the same way as columns are defined in
  373. opened index.
  374. :param string modify_operation: modification operation (update or delete).
  375. Currently allowed operations are defined in :const:`~.MODIFY_OPERATIONS`.
  376. :param iterable modify_columns: list of column values for update operation.
  377. List must be ordered in the same way as columns are defined in
  378. opened index. Only usable for *update* operation,
  379. :param integer limit: optional limit of results to change. Default is
  380. one row. In case multiple rows are expected to be changed, ``limit``
  381. must be set explicitly, HS wont change all found rows by default.
  382. :param integer offset: optional offset of rows to search for.
  383. :rtype: list
  384. """
  385. if operation not in self.FIND_OPERATIONS \
  386. or modify_operation not in self.MODIFY_OPERATIONS:
  387. raise ValueError('Operation is not supported.')
  388. if not check_columns(columns):
  389. raise ValueError('Columns must be a non-empty iterable.')
  390. if modify_operation in ('U', '+', '-', 'U?', '+?', '-?') \
  391. and not check_columns(modify_columns):
  392. raise ValueError('Modify_columns must be a non-empty iterable for update operation')
  393. query = chain(
  394. (str(index_id), operation, str(len(columns))),
  395. imap(encode, columns),
  396. (str(limit), str(offset), modify_operation),
  397. imap(encode, modify_columns)
  398. )
  399. response = self._call(index_id, query, force_index=True)
  400. return response
  401. def insert(self, index_id, columns):
  402. """Inserts single row using opened index.
  403. Raises ``ValueError`` if given data doesn't validate.
  404. :param integer index_id: id of opened index.
  405. :param list columns: list of column values for insertion. List must be
  406. ordered in the same way as columns are defined in opened index.
  407. :rtype: bool
  408. """
  409. if not check_columns(columns):
  410. raise ValueError('Columns must be a non-empty iterable.')
  411. query = chain(
  412. (str(index_id), '+', str(len(columns))),
  413. imap(encode, columns)
  414. )
  415. self._call(index_id, query, force_index=True)
  416. return True