PageRenderTime 23ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/io/sql.py

https://github.com/rkabir/pandas
Python | 443 lines | 234 code | 68 blank | 141 comment | 61 complexity | 1a3e3bcf85790afd10eaad8b43b6dac4 MD5 | raw file
  1. """
  2. Collection of query wrappers / abstractions to both facilitate data
  3. retrieval and to reduce dependency on DB-specific API.
  4. """
  5. from datetime import datetime
  6. import numpy as np
  7. import traceback
  8. from numpy import NaN
  9. from pandas.core.datetools import format, to_datetime
  10. from pandas.core.api import DataFrame, Series, Index, isnull, pivot
  11. #-------------------------------------------------------------------------------
  12. # Query formatting
  13. _formatters = {
  14. datetime : lambda dt: "'%s'" % format(dt),
  15. str : lambda x: "'%s'" % x,
  16. np.str_ : lambda x: "'%s'" % x,
  17. unicode : lambda x: "'%s'" % x,
  18. float : lambda x: "%.8f" % x,
  19. int : lambda x: "%s" % x,
  20. type(None) : lambda x: "NULL",
  21. np.float64 : lambda x: "%.8f" % x,
  22. bool : lambda x: "'%s'" % x,
  23. }
  24. def format_query(sql, *args):
  25. """
  26. """
  27. processed_args = []
  28. for arg in args:
  29. if isinstance(arg, float) and isnull(arg):
  30. arg = None
  31. formatter = _formatters[type(arg)]
  32. processed_args.append(formatter(arg))
  33. return sql % tuple(processed_args)
  34. #-------------------------------------------------------------------------------
  35. # SQL Connection objects
  36. class SQLConnection(object):
  37. """
  38. SQL Connection wrapper. Encapsulates parameters necessary to recreate
  39. connection upon database reset or failure.
  40. Parameters
  41. ----------
  42. host: string
  43. user: string
  44. password: string
  45. database: string
  46. """
  47. def __init__(self, host, user, password, database='.', trusted=False):
  48. self.host = host
  49. self.user = user
  50. self.password = password
  51. self.database = database
  52. self.trusted = trusted
  53. self._con = None
  54. self.connected = False
  55. @property
  56. def driver(self):
  57. raise Exception('No default driver')
  58. def connect(self):
  59. """
  60. """
  61. if self._con is not None:
  62. try:
  63. self._con.close()
  64. except Exception:
  65. pass
  66. if self.trusted:
  67. self._con = self.driver.connect(host=self.host,
  68. trusted=True,
  69. database=self.database)
  70. else:
  71. self._con = self.driver.connect(user=self.user,
  72. password=self.password,
  73. host=self.host,
  74. database=self.database)
  75. def execute(self, sql, retry=False):
  76. return execute(sql, self, retry=retry)
  77. def _get_con(self):
  78. """
  79. """
  80. if self._con is None:
  81. self.connect()
  82. return self._con
  83. def close(self):
  84. """
  85. """
  86. try:
  87. self._con.close()
  88. except Exception:
  89. pass
  90. def cursor(self, retry=True):
  91. """
  92. """
  93. try:
  94. return self._get_con().cursor()
  95. except Exception:
  96. if retry:
  97. self.connect()
  98. return self.cursor(retry=False)
  99. def commit(self):
  100. """
  101. """
  102. if self._con is None:
  103. raise Exception('Cannot commit with no connection created yet!')
  104. self._con.commit()
  105. def rollback(self):
  106. """
  107. """
  108. if self._con is None:
  109. raise Exception('Cannot commit with no connection created yet!')
  110. self._con.rollback()
  111. def getinfo(self, *args, **kwargs):
  112. """
  113. """
  114. return self._get_con().getinfo(*args, **kwargs)
  115. class PymssqlConnection(SQLConnection):
  116. @property
  117. def driver(self):
  118. import pymssql
  119. return pymssql
  120. class PyODBCConnection(SQLConnection):
  121. @property
  122. def driver(self):
  123. import pyodbc
  124. return pyodbc
  125. def connect(self):
  126. self._con = self.driver.connect(user=self.user, password=self.password,
  127. host=self.host, database=self.database,
  128. driver='{SQL Server}')
  129. class SQLiteConnection(SQLConnection):
  130. def __init__(self, path):
  131. self.path = path
  132. self._con = None
  133. def connect(self):
  134. """
  135. """
  136. if self._con is not None:
  137. try:
  138. self._con.close()
  139. except Exception:
  140. pass
  141. self._con = self.driver.connect(self.path)
  142. @property
  143. def driver(self):
  144. import sqlite3
  145. return sqlite3
  146. class ConnectionFactory(object):
  147. """
  148. SQL Connection Factory
  149. Parameters
  150. ----------
  151. host: string
  152. servername or path to database
  153. user: string
  154. Username (if needed)
  155. password: string (if needed)
  156. defaultDatabase: string
  157. Database to connect to by default
  158. """
  159. connectionClass = SQLConnection
  160. def __init__(self, host, user=None, password=None, defaultDatabase=None,
  161. trusted=False):
  162. self.host = host
  163. self.user = user
  164. self.password = password
  165. self.trusted = trusted
  166. self.defaultDatabase = defaultDatabase
  167. self.connections = {}
  168. def __getitem__(self, database):
  169. return self.get_con(database)
  170. def is_user_authorized(self):
  171. return True
  172. def get_con(self, database=None, forceNew=False):
  173. if not self.is_user_authorized():
  174. raise Exception('Not authorized to use this connection')
  175. if database is None:
  176. database = self.defaultDatabase
  177. database = database.lower()
  178. if database in self.connections:
  179. con = self.connections[database]
  180. else:
  181. con = self.connectionClass(self.host,
  182. user=self.user,
  183. password=self.password,
  184. trusted=self.trusted,
  185. database=database)
  186. self.connections[database] = con
  187. if forceNew:
  188. con.connect()
  189. return con
  190. class PymssqlFactory(ConnectionFactory):
  191. connectionClass = PymssqlConnection
  192. # Note: with PyODBC, executing stored procedures may at times produce
  193. # weird results. You may have to put "SET NOCOUNT ON;" at the
  194. # beginning of the query
  195. class PyODBCFactory(ConnectionFactory):
  196. connectionClass = PyODBCConnection
  197. #-------------------------------------------------------------------------------
  198. # Helper execution function
  199. def execute(sql, con, retry=True, cur=None, params=()):
  200. """
  201. Execute the given SQL query using the provided connection object.
  202. Parameters
  203. ----------
  204. sql: string
  205. Query to be executed
  206. Returns
  207. -------
  208. Cursor object
  209. """
  210. providedCursor = False
  211. try:
  212. if cur is None:
  213. cur = con.cursor()
  214. else:
  215. providedCursor = True
  216. retry = False
  217. if len(params) == 0:
  218. cur.execute(sql)
  219. else:
  220. cur.execute(sql, params)
  221. return cur
  222. except ImportError:
  223. raise
  224. except Exception, e:
  225. excName = e.__class__.__name__
  226. if not providedCursor and excName in ('OperationalError', 'Error'):
  227. # connection wrapper
  228. if retry and isinstance(con, SQLConnection):
  229. con.connect()
  230. return execute(sql, con, retry=False)
  231. try:
  232. con.rollback()
  233. except Exception, e:
  234. pass
  235. print 'Error on sql %s' % sql
  236. raise
  237. def _safe_fetch(cur):
  238. try:
  239. return cur.fetchall()
  240. except Exception, e:
  241. excName = e.__class__.__name__
  242. if excName == 'OperationalError':
  243. return []
  244. def array_query(sql, con):
  245. """Returns results of query as a dict of numpy-arrays.
  246. Parameters
  247. ----------
  248. sql: string
  249. SQL query to be executed
  250. con: DB connection object
  251. """
  252. cur = execute(sql, con)
  253. rows = _safe_fetch(cur)
  254. result = [np.array(x) for x in zip(*rows)]
  255. con.commit()
  256. return dict([(c[0], result[i] if len(result) > 0 else [])
  257. for i, c in enumerate(cur.description)])
  258. # def col_query(sql, con):
  259. # """Returns results of query as a dict of python lists.
  260. # Parameters
  261. # ----------
  262. # sql: string
  263. # SQL query to be executed
  264. # con: DB connection object, optional
  265. # """
  266. # cur = execute(sql, con)
  267. # rows = _safe_fetch(cur)
  268. # result = [list(x) for x in zip(*rows)]
  269. # con.commit()
  270. # if len(result) > 0:
  271. # return dict([(c[0], result[i]) for i, c in enumerate(cur.description)])
  272. # else:
  273. # return dict([(c[0], []) for c in cur.description])
  274. def tquery(sql, con=None, cur=None, retry=True):
  275. """
  276. Returns list of tuples corresponding to each row in given sql
  277. query.
  278. If only one column selected, then plain list is returned.
  279. Parameters
  280. ----------
  281. sql: string
  282. SQL query to be executed
  283. con: SQLConnection or DB API 2.0-compliant connection
  284. cur: DB API 2.0 cursor
  285. Provide a specific connection or a specific cursor if you are executing a
  286. lot of sequential statements and want to commit outside.
  287. """
  288. cur = execute(sql, con, cur=cur)
  289. result = _safe_fetch(cur)
  290. try:
  291. con.commit()
  292. except Exception, e:
  293. excName = e.__class__.__name__
  294. if excName == 'OperationalError':
  295. print 'Failed to commit, may need to restart interpreter'
  296. else:
  297. raise
  298. traceback.print_exc()
  299. if retry:
  300. return tquery(sql, con=con, retry=False)
  301. if result and len(result[0]) == 1:
  302. result = list(zip(*result)[0])
  303. elif result is None:
  304. result = []
  305. return result
  306. def uquery(sql, con=None, cur=None, retry=True, params=()):
  307. """
  308. Does the same thing as tquery, but instead of returning results, it
  309. returns the number of rows affected. Good for update queries.
  310. """
  311. cur = execute(sql, con, cur=cur, retry=retry, params=params)
  312. result = cur.rowcount
  313. try:
  314. con.commit()
  315. except Exception, e:
  316. excName = e.__class__.__name__
  317. if excName != 'OperationalError':
  318. raise
  319. traceback.print_exc()
  320. if retry:
  321. print 'Looks like your connection failed, reconnecting...'
  322. return uquery(sql, con, retry=False)
  323. return result
  324. def frame_query(sql, con, indexField='Time'):
  325. """
  326. Returns a DataFrame corresponding to the result set of the query
  327. string.
  328. Optionally provide an indexField parameter to use one of the
  329. columns as the index. Otherwise will be 0 to len(results) - 1.
  330. Parameters
  331. ----------
  332. sql: string
  333. SQL query to be executed
  334. con: DB connection object, optional
  335. indexField: string, optional
  336. column name to use for the returned DataFrame object.
  337. """
  338. data = array_query(sql, con)
  339. if indexField is not None:
  340. try:
  341. idx = Index(data.pop(indexField))
  342. except KeyError:
  343. raise KeyError('indexField %s not found! %s' % (indexField, sql))
  344. else:
  345. idx = Index(np.arange(len(data.values()[0])))
  346. return DataFrame(data=data, index=idx)
  347. def pivot_query(sql, rows, columns, values, con):
  348. """
  349. Returns DataFrame with columns corresponding to unique Item
  350. entries in the requested SQL query.
  351. Parameters
  352. ----------
  353. sql: string
  354. SQL query to be executed
  355. con: SQLConnection
  356. """
  357. data = frame_query(sql, con)
  358. data = dict([(key.lower(), values) for key, values in data.iteritems()])
  359. pivoted = pivot(data[rows], data[columns], data[values])
  360. return pivoted