PageRenderTime 99ms CodeModel.GetById 32ms app.highlight 60ms RepoModel.GetById 1ms app.codeStats 1ms

/indra/lib/python/indra/util/named_query.py

https://bitbucket.org/lindenlab/viewer-beta/
Python | 592 lines | 570 code | 8 blank | 14 comment | 10 complexity | 4654e6953bb90962b93063b5bf7dc369 MD5 | raw file
  1"""\
  2@file named_query.py
  3@author Ryan Williams, Phoenix
  4@date 2007-07-31
  5@brief An API for running named queries.
  6
  7$LicenseInfo:firstyear=2007&license=mit$
  8
  9Copyright (c) 2007-2009, Linden Research, Inc.
 10
 11Permission is hereby granted, free of charge, to any person obtaining a copy
 12of this software and associated documentation files (the "Software"), to deal
 13in the Software without restriction, including without limitation the rights
 14to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15copies of the Software, and to permit persons to whom the Software is
 16furnished to do so, subject to the following conditions:
 17
 18The above copyright notice and this permission notice shall be included in
 19all copies or substantial portions of the Software.
 20
 21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 27THE SOFTWARE.
 28$/LicenseInfo$
 29"""
 30
 31import errno
 32import MySQLdb
 33import MySQLdb.cursors
 34import os
 35import os.path
 36import re
 37import time
 38
 39from indra.base import llsd
 40from indra.base import config
 41
 42DEBUG = False
 43NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '.nq')
 44NQ_FILE_SUFFIX_LEN  = len(NQ_FILE_SUFFIX)
 45
 46_g_named_manager = None
 47
 48def _init_g_named_manager(sql_dir = None):
 49    """Initializes a global NamedManager object to point at a
 50    specified named queries hierarchy.
 51
 52    This function is intended entirely for testing purposes,
 53    because it's tricky to control the config from inside a test."""
 54    global NQ_FILE_SUFFIX
 55    NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '.nq')
 56    global NQ_FILE_SUFFIX_LEN
 57    NQ_FILE_SUFFIX_LEN  = len(NQ_FILE_SUFFIX)
 58
 59    if sql_dir is None:
 60        sql_dir = config.get('named-query-base-dir')
 61
 62    # extra fallback directory in case config doesn't return what we want
 63    if sql_dir is None:
 64        sql_dir = os.path.abspath(
 65            os.path.join(
 66            os.path.realpath(os.path.dirname(__file__)), "..", "..", "..", "..", "web", "dataservice", "sql"))
 67
 68    global _g_named_manager
 69    _g_named_manager = NamedQueryManager(
 70        os.path.abspath(os.path.realpath(sql_dir)))
 71
 72def get(name, schema = None):
 73    "Get the named query object to be used to perform queries"
 74    if _g_named_manager is None:
 75        _init_g_named_manager()
 76    return _g_named_manager.get(name).for_schema(schema)
 77
 78def sql(connection, name, params):
 79    # use module-global NamedQuery object to perform default substitution
 80    return get(name).sql(connection, params)
 81
 82def run(connection, name, params, expect_rows = None):
 83    """\
 84@brief given a connection, run a named query with the params
 85
 86Note that this function will fetch ALL rows.
 87@param connection The connection to use
 88@param name The name of the query to run
 89@param params The parameters passed into the query
 90@param expect_rows The number of rows expected. Set to 1 if return_as_map is true.  Raises ExpectationFailed if the number of returned rows doesn't exactly match.  Kind of a hack.
 91@return Returns the result set as a list of dicts.
 92"""
 93    return get(name).run(connection, params, expect_rows)
 94
 95class ExpectationFailed(Exception):
 96    """ Exception that is raised when an expectation for an sql query
 97    is not met."""
 98    def __init__(self, message):
 99        Exception.__init__(self, message)
100        self.message = message
101
102class NamedQuery(object):
103    def __init__(self, name, filename):
104        """ Construct a NamedQuery object.  The name argument is an
105        arbitrary name as a handle for the query, and the filename is
106        a path to a file or a file-like object containing an llsd named
107        query document."""
108        self._stat_interval_seconds = 5  # 5 seconds
109        self._name = name
110        if (filename is not None and isinstance(filename, (str, unicode))
111            and NQ_FILE_SUFFIX != filename[-NQ_FILE_SUFFIX_LEN:]):
112            filename = filename + NQ_FILE_SUFFIX
113        self._location = filename
114        self._alternative = dict()
115        self._last_mod_time = 0
116        self._last_check_time = 0
117        self.deleted = False
118        self.load_contents()
119
120    def name(self):
121        """ The name of the query. """
122        return self._name
123
124    def get_modtime(self):
125        """ Returns the mtime (last modified time) of the named query
126        filename. For file-like objects, expect a modtime of 0"""
127        if self._location and isinstance(self._location, (str, unicode)):
128            return os.path.getmtime(self._location)
129        return 0
130
131    def load_contents(self):
132        """ Loads and parses the named query file into self.  Does
133        nothing if self.location is nonexistant."""
134        if self._location:
135            if isinstance(self._location, (str, unicode)):
136                contents = llsd.parse(open(self._location).read())
137            else:
138                # we probably have a file-like object. Godspeed!
139                contents = llsd.parse(self._location.read())
140            self._reference_contents(contents)
141            # Check for alternative implementations
142            try:
143                for name, alt in self._contents['alternative'].items():
144                    nq = NamedQuery(name, None)
145                    nq._reference_contents(alt)
146                    self._alternative[name] = nq
147            except KeyError, e:
148                pass
149            self._last_mod_time = self.get_modtime()
150            self._last_check_time = time.time()
151
152    def _reference_contents(self, contents):
153        "Helper method which builds internal structure from parsed contents"
154        self._contents = contents
155        self._ttl = int(self._contents.get('ttl', 0))
156        self._return_as_map = bool(self._contents.get('return_as_map', False))
157        self._legacy_dbname = self._contents.get('legacy_dbname', None)
158
159        # reset these before doing the sql conversion because we will
160        # read them there. reset these while loading so we pick up
161        # changes.
162        self._around = set()
163        self._append = set()
164        self._integer = set()
165        self._options = self._contents.get('dynamic_where', {})
166        for key in self._options:
167            if isinstance(self._options[key], basestring):
168                self._options[key] = self._convert_sql(self._options[key])
169            elif isinstance(self._options[key], list):
170                lines = []
171                for line in self._options[key]:
172                    lines.append(self._convert_sql(line))
173                self._options[key] = lines
174            else:
175                moreopt = {}
176                for kk in self._options[key]:
177                    moreopt[kk] = self._convert_sql(self._options[key][kk]) 
178                self._options[key] = moreopt
179        self._base_query = self._convert_sql(self._contents['base_query'])
180        self._query_suffix = self._convert_sql(
181            self._contents.get('query_suffix', ''))
182
183    def _convert_sql(self, sql):
184        """convert the parsed sql into a useful internal structure.
185
186        This function has to turn the named query format into a pyformat
187        style. It also has to look for %:name% and :name% and
188        ready them for use in LIKE statements"""
189        if sql:
190            # This first sub is to properly escape any % signs that
191            # are meant to be literally passed through to mysql in the
192            # query.  It leaves any %'s that are used for
193            # like-expressions.
194            expr = re.compile("(?<=[^a-zA-Z0-9_-])%(?=[^:])")
195            sql = expr.sub('%%', sql)
196
197            # This should tackle the rest of the %'s in the query, by
198            # converting them to LIKE clauses.
199            expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%")
200            sql = expr.sub(self._prepare_like, sql)
201            expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)")
202            sql = expr.sub(self._prepare_integer, sql)
203            expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)")
204            sql = expr.sub("%(\\1)s", sql)
205        return sql
206
207    def _prepare_like(self, match):
208        """This function changes LIKE statement replace behavior
209
210        It works by turning %:name% to %(_name_around)s and :name% to
211        %(_name_append)s. Since a leading '_' is not a valid keyname
212        input (enforced via unit tests), it will never clash with
213        existing keys. Then, when building the statement, the query
214        runner will generate corrected strings."""
215        if match.group(1) == '%':
216            # there is a leading % so this is treated as prefix/suffix
217            self._around.add(match.group(2))
218            return "%(" + self._build_around_key(match.group(2)) + ")s"
219        else:
220            # there is no leading %, so this is suffix only
221            self._append.add(match.group(2))
222            return "%(" + self._build_append_key(match.group(2)) + ")s"
223
224    def _build_around_key(self, key):
225        return "_" + key + "_around"
226
227    def _build_append_key(self, key):
228        return "_" + key + "_append"
229
230    def _prepare_integer(self, match):
231        """This function adjusts the sql for #:name replacements
232
233        It works by turning #:name to %(_name_as_integer)s. Since a
234        leading '_' is not a valid keyname input (enforced via unit
235        tests), it will never clash with existing keys. Then, when
236        building the statement, the query runner will generate
237        corrected strings."""
238        self._integer.add(match.group(1))
239        return "%(" + self._build_integer_key(match.group(1)) + ")s"
240
241    def _build_integer_key(self, key):
242        return "_" + key + "_as_integer"
243
244    def _strip_wildcards_to_list(self, value):
245        """Take string, and strip out the LIKE special characters.
246
247        Technically, this is database dependant, but postgresql and
248        mysql use the same wildcards, and I am not aware of a general
249        way to handle this. I think you need a sql statement of the
250        form:
251
252        LIKE_STRING( [ANY,ONE,str]... )
253
254        which would treat ANY as their any string, and ONE as their
255        single glyph, and str as something that needs database
256        specific encoding to not allow any % or _ to affect the query.
257
258        As it stands, I believe it's impossible to write a named query
259        style interface which uses like to search the entire space of
260        text available. Imagine the query:
261
262        % of brain used by average linden
263
264        In order to search for %, it must be escaped, so once you have
265        escaped the string to not do wildcard searches, and be escaped
266        for the database, and then prepended the wildcard you come
267        back with one of:
268
269        1) %\% of brain used by average linden
270        2) %%% of brain used by average linden
271
272        Then, when passed to the database to be escaped to be database
273        safe, you get back:
274        
275        1) %\\% of brain used by average linden
276        : which means search for any character sequence, followed by a
277          backslash, followed by any sequence, followed by ' of
278          brain...'
279        2) %%% of brain used by average linden
280        : which (I believe) means search for a % followed by any
281          character sequence followed by 'of brain...'
282
283        Neither of which is what we want!
284
285        So, we need a vendor (or extention) for LIKE_STRING. Anyone
286        want to write it?"""
287        if isinstance(value, unicode):
288            utf8_value = value
289        else:
290            utf8_value = unicode(value, "utf-8")
291        esc_list = []
292        remove_chars = set(u"%_")
293        for glyph in utf8_value:
294            if glyph in remove_chars:
295                continue
296            esc_list.append(glyph.encode("utf-8"))
297        return esc_list
298
299    def delete(self):
300        """ Makes this query unusable by deleting all the members and
301        setting the deleted member.  This is desired when the on-disk
302        query has been deleted but the in-memory copy remains."""
303        # blow away all members except _name, _location, and deleted
304        name, location = self._name, self._location
305        for key in self.__dict__.keys():
306            del self.__dict__[key]
307        self.deleted = True
308        self._name, self._location = name, location
309
310    def ttl(self):
311        """ Estimated time to live of this query. Used for web
312        services to set the Expires header."""
313        return self._ttl
314
315    def legacy_dbname(self):
316        return self._legacy_dbname
317
318    def return_as_map(self):
319        """ Returns true if this query is configured to return its
320        results as a single map (as opposed to a list of maps, the
321        normal behavior)."""
322        
323        return self._return_as_map
324
325    def for_schema(self, db_name):
326        "Look trough the alternates and return the correct query"
327        if db_name is None:
328            return self
329        try:
330            return self._alternative[db_name]
331        except KeyError, e:
332            pass
333        return self
334
335    def run(self, connection, params, expect_rows = None, use_dictcursor = True):
336        """given a connection, run a named query with the params
337
338        Note that this function will fetch ALL rows. We do this because it
339        opens and closes the cursor to generate the values, and this 
340        isn't a generator so the cursor has no life beyond the method call.
341
342        @param cursor The connection to use (this generates its own cursor for the query)
343        @param name The name of the query to run
344        @param params The parameters passed into the query
345        @param expect_rows The number of rows expected. Set to 1 if return_as_map is true.  Raises ExpectationFailed if the number of returned rows doesn't exactly match.  Kind of a hack.
346        @param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
347        @return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
348        """
349        if use_dictcursor:
350            cursor = connection.cursor(MySQLdb.cursors.DictCursor)
351        else:
352            cursor = connection.cursor()
353
354        full_query, params = self._construct_sql(params)
355        if DEBUG:
356            print "SQL:", self.sql(connection, params)
357        rows = cursor.execute(full_query, params)
358
359        # *NOTE: the expect_rows argument is a very cheesy way to get some
360        # validation on the result set.  If you want to add more expectation
361        # logic, do something more object-oriented and flexible. Or use an ORM.
362        if(self._return_as_map):
363            expect_rows = 1
364        if expect_rows is not None and rows != expect_rows:
365            cursor.close()
366            raise ExpectationFailed("Statement expected %s rows, got %s.  Sql: '%s' %s" % (
367                expect_rows, rows, full_query, params))
368
369        # convert to dicts manually if we're not using a dictcursor
370        if use_dictcursor:
371            result_set = cursor.fetchall()
372        else:
373            if cursor.description is None:
374                # an insert or something
375                x = cursor.fetchall()
376                cursor.close()
377                return x
378
379            names = [x[0] for x in cursor.description]
380
381            result_set = []
382            for row in cursor.fetchall():
383                converted_row = {}
384                for idx, col_name in enumerate(names):
385                    converted_row[col_name] = row[idx]
386                result_set.append(converted_row)
387
388        cursor.close()
389        if self._return_as_map:
390            return result_set[0]
391        return result_set
392
393    def _construct_sql(self, params):
394        """ Returns a query string and a dictionary of parameters,
395        suitable for directly passing to the execute() method."""
396        self.refresh()
397
398        # build the query from the options available and the params
399        base_query = []
400        base_query.append(self._base_query)
401        for opt, extra_where in self._options.items():
402            if type(extra_where) in (dict, list, tuple):
403                if opt in params:
404                    base_query.append(extra_where[params[opt]])
405            else:
406                if opt in params and params[opt]:
407                    base_query.append(extra_where)
408        if self._query_suffix:
409            base_query.append(self._query_suffix)
410        full_query = '\n'.join(base_query)
411
412        # Go through the query and rewrite all of the ones with the
413        # @:name syntax.
414        rewrite = _RewriteQueryForArray(params)
415        expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s")
416        full_query = expr.sub(rewrite.operate, full_query)
417        params.update(rewrite.new_params)
418
419        # build out the params for like. We only have to do this
420        # parameters which were detected to have ued the where syntax
421        # during load.
422        #
423        # * treat the incoming string as utf-8
424        # * strip wildcards
425        # * append or prepend % as appropriate
426        new_params = {}
427        for key in params:
428            if key in self._around:
429                new_value = ['%']
430                new_value.extend(self._strip_wildcards_to_list(params[key]))
431                new_value.append('%')
432                new_params[self._build_around_key(key)] = ''.join(new_value)
433            if key in self._append:
434                new_value = self._strip_wildcards_to_list(params[key])
435                new_value.append('%')
436                new_params[self._build_append_key(key)] = ''.join(new_value)
437            if key in self._integer:
438                new_params[self._build_integer_key(key)] = int(params[key])
439        params.update(new_params)
440
441        return full_query, params
442
443    def sql(self, connection, params):
444        """ Generates an SQL statement from the named query document
445        and a dictionary of parameters.
446
447        *NOTE: Only use for debugging, because it uses the
448         non-standard MySQLdb 'literal' method.
449        """
450        if not DEBUG:
451            import warnings
452            warnings.warn("Don't use named_query.sql() when not debugging.  Used on %s" % self._location)
453        # do substitution using the mysql (non-standard) 'literal'
454        # function to do the escaping.
455        full_query, params = self._construct_sql(params)
456        return full_query % connection.literal(params)
457        
458
459    def refresh(self):
460        """ Refresh self from the file on the filesystem.
461
462        This is optimized to be callable as frequently as you wish,
463        without adding too much load.  It does so by only stat-ing the
464        file every N seconds, where N defaults to 5 and is
465        configurable through the member _stat_interval_seconds.  If the stat
466        reveals that the file has changed, refresh will re-parse the
467        contents of the file and use them to update the named query
468        instance.  If the stat reveals that the file has been deleted,
469        refresh will call self.delete to make the in-memory
470        representation unusable."""
471        now = time.time()
472        if(now - self._last_check_time > self._stat_interval_seconds):
473            self._last_check_time = now
474            try:
475                modtime = self.get_modtime()
476                if(modtime > self._last_mod_time):
477                    self.load_contents()
478            except OSError, e:
479                if e.errno == errno.ENOENT: # file not found
480                    self.delete() # clean up self
481                raise  # pass the exception along to the caller so they know that this query disappeared
482
483class NamedQueryManager(object):
484    """ Manages the lifespan of NamedQuery objects, drawing from a
485    directory hierarchy of named query documents.
486
487    In practice this amounts to a memory cache of NamedQuery objects."""
488    
489    def __init__(self, named_queries_dir):
490        """ Initializes a manager to look for named queries in a
491        directory."""
492        self._dir = os.path.abspath(os.path.realpath(named_queries_dir))
493        self._cached_queries = {}
494
495    def sql(self, connection, name, params):
496        nq = self.get(name)
497        return nq.sql(connection, params)
498        
499    def get(self, name):
500        """ Returns a NamedQuery instance based on the name, either
501        from memory cache, or by parsing from disk.
502
503        The name is simply a relative path to the directory associated
504        with the manager object.  Before returning the instance, the
505        NamedQuery object is cached in memory, so that subsequent
506        accesses don't have to read from disk or do any parsing.  This
507        means that NamedQuery objects returned by this method are
508        shared across all users of the manager object.
509        NamedQuery.refresh is used to bring the NamedQuery objects in
510        sync with the actual files on disk."""
511        nq = self._cached_queries.get(name)
512        if nq is None:
513            nq = NamedQuery(name, os.path.join(self._dir, name))
514            self._cached_queries[name] = nq
515        else:
516            try:
517                nq.refresh()
518            except OSError, e:
519                if e.errno == errno.ENOENT: # file not found
520                    del self._cached_queries[name]
521                raise # pass exception along to caller so they know that the query disappeared
522
523        return nq
524
525class _RewriteQueryForArray(object):
526    "Helper class for rewriting queries with the @:name syntax"
527    def __init__(self, params):
528        self.params = params
529        self.new_params = dict()
530
531    def operate(self, match):
532        "Given a match, return the string that should be in use"
533        key = match.group(1)
534        value = self.params[key]
535        if type(value) in (list,tuple):
536            rv = []
537            for idx in range(len(value)):
538                # if the value@idx is array-like, we are
539                # probably dealing with a VALUES
540                new_key = "_%s_%s"%(key, str(idx))
541                val_item = value[idx]
542                if type(val_item) in (list, tuple, dict):
543                    if type(val_item) is dict:
544                        # this is because in Python, the order of 
545                        # key, value retrieval from the dict is not
546                        # guaranteed to match what the input intended
547                        # and for VALUES, order is important.
548                        # TODO: Implemented ordered dict in LLSD parser?
549                        raise ExpectationFailed('Only lists/tuples allowed,\
550                                received dict')
551                    values_keys = []
552                    for value_idx, item in enumerate(val_item):
553                        # we want a key of the format :
554                        # key_#replacement_#value_row_#value_col
555                        # ugh... so if we are replacing 10 rows in user_note, 
556                        # the first values clause would read (for @:user_notes) :-
557                        # ( :_user_notes_0_1_1,  :_user_notes_0_1_2, :_user_notes_0_1_3 )
558                        # the input LLSD for VALUES will look like:
559                        # <llsd>...
560                        # <map>
561                        #  <key>user_notes</key>
562                        #      <array>
563                        #      <array> <!-- row 1 for VALUES -->
564                        #          <string>...</string>
565                        #          <string>...</string>
566                        #          <string>...</string>
567                        #      </array>
568                        # ...
569                        #      </array>
570                        # </map>
571                        # ... </llsd>
572                        values_key = "%s_%s"%(new_key, value_idx)
573                        self.new_params[values_key] = item
574                        values_keys.append("%%(%s)s"%values_key)
575                    # now collapse all these new place holders enclosed in ()
576                    # from [':_key_0_1_1', ':_key_0_1_2', ':_key_0_1_3,...] 
577                    # rv will have [ '(:_key_0_1_1, :_key_0_1_2, :_key_0_1_3)', ]
578                    # which is flattened a few lines below join(rv)
579                    rv.append('(%s)' % ','.join(values_keys))
580                else:
581                    self.new_params[new_key] = val_item
582                    rv.append("%%(%s)s"%new_key)
583            return ','.join(rv)
584        else:
585            # not something that can be expanded, so just drop the
586            # leading @ in the front of the match. This will mean that
587            # the single value we have, be it a string, int, whatever
588            # (other than dict) will correctly show up, eg:
589            #
590            # where foo in (@:foobar) -- foobar is a string, so we get
591            # where foo in (:foobar)
592            return match.group(0)[1:]