PageRenderTime 112ms CodeModel.GetById 8ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/lib/python/indra/util/named_query.py

https://bitbucket.org/lindenlab/viewer-beta/
Python | 592 lines | 570 code | 8 blank | 14 comment | 11 complexity | 4654e6953bb90962b93063b5bf7dc369 MD5 | raw file
Possible License(s): LGPL-2.1
  1. """\
  2. @file named_query.py
  3. @author Ryan Williams, Phoenix
  4. @date 2007-07-31
  5. @brief An API for running named queries.
  6. $LicenseInfo:firstyear=2007&license=mit$
  7. Copyright (c) 2007-2009, Linden Research, Inc.
  8. Permission is hereby granted, free of charge, to any person obtaining a copy
  9. of this software and associated documentation files (the "Software"), to deal
  10. in the Software without restriction, including without limitation the rights
  11. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. copies of the Software, and to permit persons to whom the Software is
  13. furnished to do so, subject to the following conditions:
  14. The above copyright notice and this permission notice shall be included in
  15. all copies or substantial portions of the Software.
  16. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. THE SOFTWARE.
  23. $/LicenseInfo$
  24. """
  25. import errno
  26. import MySQLdb
  27. import MySQLdb.cursors
  28. import os
  29. import os.path
  30. import re
  31. import time
  32. from indra.base import llsd
  33. from indra.base import config
  34. DEBUG = False
  35. NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '.nq')
  36. NQ_FILE_SUFFIX_LEN = len(NQ_FILE_SUFFIX)
  37. _g_named_manager = None
  38. def _init_g_named_manager(sql_dir = None):
  39. """Initializes a global NamedManager object to point at a
  40. specified named queries hierarchy.
  41. This function is intended entirely for testing purposes,
  42. because it's tricky to control the config from inside a test."""
  43. global NQ_FILE_SUFFIX
  44. NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '.nq')
  45. global NQ_FILE_SUFFIX_LEN
  46. NQ_FILE_SUFFIX_LEN = len(NQ_FILE_SUFFIX)
  47. if sql_dir is None:
  48. sql_dir = config.get('named-query-base-dir')
  49. # extra fallback directory in case config doesn't return what we want
  50. if sql_dir is None:
  51. sql_dir = os.path.abspath(
  52. os.path.join(
  53. os.path.realpath(os.path.dirname(__file__)), "..", "..", "..", "..", "web", "dataservice", "sql"))
  54. global _g_named_manager
  55. _g_named_manager = NamedQueryManager(
  56. os.path.abspath(os.path.realpath(sql_dir)))
  57. def get(name, schema = None):
  58. "Get the named query object to be used to perform queries"
  59. if _g_named_manager is None:
  60. _init_g_named_manager()
  61. return _g_named_manager.get(name).for_schema(schema)
  62. def sql(connection, name, params):
  63. # use module-global NamedQuery object to perform default substitution
  64. return get(name).sql(connection, params)
  65. def run(connection, name, params, expect_rows = None):
  66. """\
  67. @brief given a connection, run a named query with the params
  68. Note that this function will fetch ALL rows.
  69. @param connection The connection to use
  70. @param name The name of the query to run
  71. @param params The parameters passed into the query
  72. @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
  73. @return Returns the result set as a list of dicts.
  74. """
  75. return get(name).run(connection, params, expect_rows)
  76. class ExpectationFailed(Exception):
  77. """ Exception that is raised when an expectation for an sql query
  78. is not met."""
  79. def __init__(self, message):
  80. Exception.__init__(self, message)
  81. self.message = message
  82. class NamedQuery(object):
  83. def __init__(self, name, filename):
  84. """ Construct a NamedQuery object. The name argument is an
  85. arbitrary name as a handle for the query, and the filename is
  86. a path to a file or a file-like object containing an llsd named
  87. query document."""
  88. self._stat_interval_seconds = 5 # 5 seconds
  89. self._name = name
  90. if (filename is not None and isinstance(filename, (str, unicode))
  91. and NQ_FILE_SUFFIX != filename[-NQ_FILE_SUFFIX_LEN:]):
  92. filename = filename + NQ_FILE_SUFFIX
  93. self._location = filename
  94. self._alternative = dict()
  95. self._last_mod_time = 0
  96. self._last_check_time = 0
  97. self.deleted = False
  98. self.load_contents()
  99. def name(self):
  100. """ The name of the query. """
  101. return self._name
  102. def get_modtime(self):
  103. """ Returns the mtime (last modified time) of the named query
  104. filename. For file-like objects, expect a modtime of 0"""
  105. if self._location and isinstance(self._location, (str, unicode)):
  106. return os.path.getmtime(self._location)
  107. return 0
  108. def load_contents(self):
  109. """ Loads and parses the named query file into self. Does
  110. nothing if self.location is nonexistant."""
  111. if self._location:
  112. if isinstance(self._location, (str, unicode)):
  113. contents = llsd.parse(open(self._location).read())
  114. else:
  115. # we probably have a file-like object. Godspeed!
  116. contents = llsd.parse(self._location.read())
  117. self._reference_contents(contents)
  118. # Check for alternative implementations
  119. try:
  120. for name, alt in self._contents['alternative'].items():
  121. nq = NamedQuery(name, None)
  122. nq._reference_contents(alt)
  123. self._alternative[name] = nq
  124. except KeyError, e:
  125. pass
  126. self._last_mod_time = self.get_modtime()
  127. self._last_check_time = time.time()
  128. def _reference_contents(self, contents):
  129. "Helper method which builds internal structure from parsed contents"
  130. self._contents = contents
  131. self._ttl = int(self._contents.get('ttl', 0))
  132. self._return_as_map = bool(self._contents.get('return_as_map', False))
  133. self._legacy_dbname = self._contents.get('legacy_dbname', None)
  134. # reset these before doing the sql conversion because we will
  135. # read them there. reset these while loading so we pick up
  136. # changes.
  137. self._around = set()
  138. self._append = set()
  139. self._integer = set()
  140. self._options = self._contents.get('dynamic_where', {})
  141. for key in self._options:
  142. if isinstance(self._options[key], basestring):
  143. self._options[key] = self._convert_sql(self._options[key])
  144. elif isinstance(self._options[key], list):
  145. lines = []
  146. for line in self._options[key]:
  147. lines.append(self._convert_sql(line))
  148. self._options[key] = lines
  149. else:
  150. moreopt = {}
  151. for kk in self._options[key]:
  152. moreopt[kk] = self._convert_sql(self._options[key][kk])
  153. self._options[key] = moreopt
  154. self._base_query = self._convert_sql(self._contents['base_query'])
  155. self._query_suffix = self._convert_sql(
  156. self._contents.get('query_suffix', ''))
  157. def _convert_sql(self, sql):
  158. """convert the parsed sql into a useful internal structure.
  159. This function has to turn the named query format into a pyformat
  160. style. It also has to look for %:name% and :name% and
  161. ready them for use in LIKE statements"""
  162. if sql:
  163. # This first sub is to properly escape any % signs that
  164. # are meant to be literally passed through to mysql in the
  165. # query. It leaves any %'s that are used for
  166. # like-expressions.
  167. expr = re.compile("(?<=[^a-zA-Z0-9_-])%(?=[^:])")
  168. sql = expr.sub('%%', sql)
  169. # This should tackle the rest of the %'s in the query, by
  170. # converting them to LIKE clauses.
  171. expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%")
  172. sql = expr.sub(self._prepare_like, sql)
  173. expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)")
  174. sql = expr.sub(self._prepare_integer, sql)
  175. expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)")
  176. sql = expr.sub("%(\\1)s", sql)
  177. return sql
  178. def _prepare_like(self, match):
  179. """This function changes LIKE statement replace behavior
  180. It works by turning %:name% to %(_name_around)s and :name% to
  181. %(_name_append)s. Since a leading '_' is not a valid keyname
  182. input (enforced via unit tests), it will never clash with
  183. existing keys. Then, when building the statement, the query
  184. runner will generate corrected strings."""
  185. if match.group(1) == '%':
  186. # there is a leading % so this is treated as prefix/suffix
  187. self._around.add(match.group(2))
  188. return "%(" + self._build_around_key(match.group(2)) + ")s"
  189. else:
  190. # there is no leading %, so this is suffix only
  191. self._append.add(match.group(2))
  192. return "%(" + self._build_append_key(match.group(2)) + ")s"
  193. def _build_around_key(self, key):
  194. return "_" + key + "_around"
  195. def _build_append_key(self, key):
  196. return "_" + key + "_append"
  197. def _prepare_integer(self, match):
  198. """This function adjusts the sql for #:name replacements
  199. It works by turning #:name to %(_name_as_integer)s. Since a
  200. leading '_' is not a valid keyname input (enforced via unit
  201. tests), it will never clash with existing keys. Then, when
  202. building the statement, the query runner will generate
  203. corrected strings."""
  204. self._integer.add(match.group(1))
  205. return "%(" + self._build_integer_key(match.group(1)) + ")s"
  206. def _build_integer_key(self, key):
  207. return "_" + key + "_as_integer"
  208. def _strip_wildcards_to_list(self, value):
  209. """Take string, and strip out the LIKE special characters.
  210. Technically, this is database dependant, but postgresql and
  211. mysql use the same wildcards, and I am not aware of a general
  212. way to handle this. I think you need a sql statement of the
  213. form:
  214. LIKE_STRING( [ANY,ONE,str]... )
  215. which would treat ANY as their any string, and ONE as their
  216. single glyph, and str as something that needs database
  217. specific encoding to not allow any % or _ to affect the query.
  218. As it stands, I believe it's impossible to write a named query
  219. style interface which uses like to search the entire space of
  220. text available. Imagine the query:
  221. % of brain used by average linden
  222. In order to search for %, it must be escaped, so once you have
  223. escaped the string to not do wildcard searches, and be escaped
  224. for the database, and then prepended the wildcard you come
  225. back with one of:
  226. 1) %\% of brain used by average linden
  227. 2) %%% of brain used by average linden
  228. Then, when passed to the database to be escaped to be database
  229. safe, you get back:
  230. 1) %\\% of brain used by average linden
  231. : which means search for any character sequence, followed by a
  232. backslash, followed by any sequence, followed by ' of
  233. brain...'
  234. 2) %%% of brain used by average linden
  235. : which (I believe) means search for a % followed by any
  236. character sequence followed by 'of brain...'
  237. Neither of which is what we want!
  238. So, we need a vendor (or extention) for LIKE_STRING. Anyone
  239. want to write it?"""
  240. if isinstance(value, unicode):
  241. utf8_value = value
  242. else:
  243. utf8_value = unicode(value, "utf-8")
  244. esc_list = []
  245. remove_chars = set(u"%_")
  246. for glyph in utf8_value:
  247. if glyph in remove_chars:
  248. continue
  249. esc_list.append(glyph.encode("utf-8"))
  250. return esc_list
  251. def delete(self):
  252. """ Makes this query unusable by deleting all the members and
  253. setting the deleted member. This is desired when the on-disk
  254. query has been deleted but the in-memory copy remains."""
  255. # blow away all members except _name, _location, and deleted
  256. name, location = self._name, self._location
  257. for key in self.__dict__.keys():
  258. del self.__dict__[key]
  259. self.deleted = True
  260. self._name, self._location = name, location
  261. def ttl(self):
  262. """ Estimated time to live of this query. Used for web
  263. services to set the Expires header."""
  264. return self._ttl
  265. def legacy_dbname(self):
  266. return self._legacy_dbname
  267. def return_as_map(self):
  268. """ Returns true if this query is configured to return its
  269. results as a single map (as opposed to a list of maps, the
  270. normal behavior)."""
  271. return self._return_as_map
  272. def for_schema(self, db_name):
  273. "Look trough the alternates and return the correct query"
  274. if db_name is None:
  275. return self
  276. try:
  277. return self._alternative[db_name]
  278. except KeyError, e:
  279. pass
  280. return self
  281. def run(self, connection, params, expect_rows = None, use_dictcursor = True):
  282. """given a connection, run a named query with the params
  283. Note that this function will fetch ALL rows. We do this because it
  284. opens and closes the cursor to generate the values, and this
  285. isn't a generator so the cursor has no life beyond the method call.
  286. @param cursor The connection to use (this generates its own cursor for the query)
  287. @param name The name of the query to run
  288. @param params The parameters passed into the query
  289. @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
  290. @param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
  291. @return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
  292. """
  293. if use_dictcursor:
  294. cursor = connection.cursor(MySQLdb.cursors.DictCursor)
  295. else:
  296. cursor = connection.cursor()
  297. full_query, params = self._construct_sql(params)
  298. if DEBUG:
  299. print "SQL:", self.sql(connection, params)
  300. rows = cursor.execute(full_query, params)
  301. # *NOTE: the expect_rows argument is a very cheesy way to get some
  302. # validation on the result set. If you want to add more expectation
  303. # logic, do something more object-oriented and flexible. Or use an ORM.
  304. if(self._return_as_map):
  305. expect_rows = 1
  306. if expect_rows is not None and rows != expect_rows:
  307. cursor.close()
  308. raise ExpectationFailed("Statement expected %s rows, got %s. Sql: '%s' %s" % (
  309. expect_rows, rows, full_query, params))
  310. # convert to dicts manually if we're not using a dictcursor
  311. if use_dictcursor:
  312. result_set = cursor.fetchall()
  313. else:
  314. if cursor.description is None:
  315. # an insert or something
  316. x = cursor.fetchall()
  317. cursor.close()
  318. return x
  319. names = [x[0] for x in cursor.description]
  320. result_set = []
  321. for row in cursor.fetchall():
  322. converted_row = {}
  323. for idx, col_name in enumerate(names):
  324. converted_row[col_name] = row[idx]
  325. result_set.append(converted_row)
  326. cursor.close()
  327. if self._return_as_map:
  328. return result_set[0]
  329. return result_set
  330. def _construct_sql(self, params):
  331. """ Returns a query string and a dictionary of parameters,
  332. suitable for directly passing to the execute() method."""
  333. self.refresh()
  334. # build the query from the options available and the params
  335. base_query = []
  336. base_query.append(self._base_query)
  337. for opt, extra_where in self._options.items():
  338. if type(extra_where) in (dict, list, tuple):
  339. if opt in params:
  340. base_query.append(extra_where[params[opt]])
  341. else:
  342. if opt in params and params[opt]:
  343. base_query.append(extra_where)
  344. if self._query_suffix:
  345. base_query.append(self._query_suffix)
  346. full_query = '\n'.join(base_query)
  347. # Go through the query and rewrite all of the ones with the
  348. # @:name syntax.
  349. rewrite = _RewriteQueryForArray(params)
  350. expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s")
  351. full_query = expr.sub(rewrite.operate, full_query)
  352. params.update(rewrite.new_params)
  353. # build out the params for like. We only have to do this
  354. # parameters which were detected to have ued the where syntax
  355. # during load.
  356. #
  357. # * treat the incoming string as utf-8
  358. # * strip wildcards
  359. # * append or prepend % as appropriate
  360. new_params = {}
  361. for key in params:
  362. if key in self._around:
  363. new_value = ['%']
  364. new_value.extend(self._strip_wildcards_to_list(params[key]))
  365. new_value.append('%')
  366. new_params[self._build_around_key(key)] = ''.join(new_value)
  367. if key in self._append:
  368. new_value = self._strip_wildcards_to_list(params[key])
  369. new_value.append('%')
  370. new_params[self._build_append_key(key)] = ''.join(new_value)
  371. if key in self._integer:
  372. new_params[self._build_integer_key(key)] = int(params[key])
  373. params.update(new_params)
  374. return full_query, params
  375. def sql(self, connection, params):
  376. """ Generates an SQL statement from the named query document
  377. and a dictionary of parameters.
  378. *NOTE: Only use for debugging, because it uses the
  379. non-standard MySQLdb 'literal' method.
  380. """
  381. if not DEBUG:
  382. import warnings
  383. warnings.warn("Don't use named_query.sql() when not debugging. Used on %s" % self._location)
  384. # do substitution using the mysql (non-standard) 'literal'
  385. # function to do the escaping.
  386. full_query, params = self._construct_sql(params)
  387. return full_query % connection.literal(params)
  388. def refresh(self):
  389. """ Refresh self from the file on the filesystem.
  390. This is optimized to be callable as frequently as you wish,
  391. without adding too much load. It does so by only stat-ing the
  392. file every N seconds, where N defaults to 5 and is
  393. configurable through the member _stat_interval_seconds. If the stat
  394. reveals that the file has changed, refresh will re-parse the
  395. contents of the file and use them to update the named query
  396. instance. If the stat reveals that the file has been deleted,
  397. refresh will call self.delete to make the in-memory
  398. representation unusable."""
  399. now = time.time()
  400. if(now - self._last_check_time > self._stat_interval_seconds):
  401. self._last_check_time = now
  402. try:
  403. modtime = self.get_modtime()
  404. if(modtime > self._last_mod_time):
  405. self.load_contents()
  406. except OSError, e:
  407. if e.errno == errno.ENOENT: # file not found
  408. self.delete() # clean up self
  409. raise # pass the exception along to the caller so they know that this query disappeared
  410. class NamedQueryManager(object):
  411. """ Manages the lifespan of NamedQuery objects, drawing from a
  412. directory hierarchy of named query documents.
  413. In practice this amounts to a memory cache of NamedQuery objects."""
  414. def __init__(self, named_queries_dir):
  415. """ Initializes a manager to look for named queries in a
  416. directory."""
  417. self._dir = os.path.abspath(os.path.realpath(named_queries_dir))
  418. self._cached_queries = {}
  419. def sql(self, connection, name, params):
  420. nq = self.get(name)
  421. return nq.sql(connection, params)
  422. def get(self, name):
  423. """ Returns a NamedQuery instance based on the name, either
  424. from memory cache, or by parsing from disk.
  425. The name is simply a relative path to the directory associated
  426. with the manager object. Before returning the instance, the
  427. NamedQuery object is cached in memory, so that subsequent
  428. accesses don't have to read from disk or do any parsing. This
  429. means that NamedQuery objects returned by this method are
  430. shared across all users of the manager object.
  431. NamedQuery.refresh is used to bring the NamedQuery objects in
  432. sync with the actual files on disk."""
  433. nq = self._cached_queries.get(name)
  434. if nq is None:
  435. nq = NamedQuery(name, os.path.join(self._dir, name))
  436. self._cached_queries[name] = nq
  437. else:
  438. try:
  439. nq.refresh()
  440. except OSError, e:
  441. if e.errno == errno.ENOENT: # file not found
  442. del self._cached_queries[name]
  443. raise # pass exception along to caller so they know that the query disappeared
  444. return nq
  445. class _RewriteQueryForArray(object):
  446. "Helper class for rewriting queries with the @:name syntax"
  447. def __init__(self, params):
  448. self.params = params
  449. self.new_params = dict()
  450. def operate(self, match):
  451. "Given a match, return the string that should be in use"
  452. key = match.group(1)
  453. value = self.params[key]
  454. if type(value) in (list,tuple):
  455. rv = []
  456. for idx in range(len(value)):
  457. # if the value@idx is array-like, we are
  458. # probably dealing with a VALUES
  459. new_key = "_%s_%s"%(key, str(idx))
  460. val_item = value[idx]
  461. if type(val_item) in (list, tuple, dict):
  462. if type(val_item) is dict:
  463. # this is because in Python, the order of
  464. # key, value retrieval from the dict is not
  465. # guaranteed to match what the input intended
  466. # and for VALUES, order is important.
  467. # TODO: Implemented ordered dict in LLSD parser?
  468. raise ExpectationFailed('Only lists/tuples allowed,\
  469. received dict')
  470. values_keys = []
  471. for value_idx, item in enumerate(val_item):
  472. # we want a key of the format :
  473. # key_#replacement_#value_row_#value_col
  474. # ugh... so if we are replacing 10 rows in user_note,
  475. # the first values clause would read (for @:user_notes) :-
  476. # ( :_user_notes_0_1_1, :_user_notes_0_1_2, :_user_notes_0_1_3 )
  477. # the input LLSD for VALUES will look like:
  478. # <llsd>...
  479. # <map>
  480. # <key>user_notes</key>
  481. # <array>
  482. # <array> <!-- row 1 for VALUES -->
  483. # <string>...</string>
  484. # <string>...</string>
  485. # <string>...</string>
  486. # </array>
  487. # ...
  488. # </array>
  489. # </map>
  490. # ... </llsd>
  491. values_key = "%s_%s"%(new_key, value_idx)
  492. self.new_params[values_key] = item
  493. values_keys.append("%%(%s)s"%values_key)
  494. # now collapse all these new place holders enclosed in ()
  495. # from [':_key_0_1_1', ':_key_0_1_2', ':_key_0_1_3,...]
  496. # rv will have [ '(:_key_0_1_1, :_key_0_1_2, :_key_0_1_3)', ]
  497. # which is flattened a few lines below join(rv)
  498. rv.append('(%s)' % ','.join(values_keys))
  499. else:
  500. self.new_params[new_key] = val_item
  501. rv.append("%%(%s)s"%new_key)
  502. return ','.join(rv)
  503. else:
  504. # not something that can be expanded, so just drop the
  505. # leading @ in the front of the match. This will mean that
  506. # the single value we have, be it a string, int, whatever
  507. # (other than dict) will correctly show up, eg:
  508. #
  509. # where foo in (@:foobar) -- foobar is a string, so we get
  510. # where foo in (:foobar)
  511. return match.group(0)[1:]