db.py | searchcode

/src/db.py

https://github.com/Dischi/kaa-base
Python | 2571 lines | 2424 code | 23 blank | 124 comment | 51 complexity | 380f4e5e41fb43919618ebe8d2db9bbb MD5 | raw file
Possible License(s): LGPL-2.1

# -----------------------------------------------------------------------------
# db.py - db abstraction module
# -----------------------------------------------------------------------------
# Copyright 2006-2012 Dirk Meyer, Jason Tackaberry
#
# Please see the file AUTHORS for a complete list of authors.
#
# This library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License version
# 2.1 as published by the Free Software Foundation.
#
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
#
# -----------------------------------------------------------------------------
from __future__ import absolute_import

__all__ = [
    'Database', 'QExpr', 'DatabaseError', 'DatabaseReadOnlyError',
    'split_path', 'ATTR_SIMPLE', 'ATTR_SEARCHABLE', 'ATTR_IGNORE_CASE',
    'ATTR_INDEXED', 'ATTR_INDEXED_IGNORE_CASE', 'ATTR_INVERTED_INDEX',
    'RAW_TYPE'
]

# python imports
import sys
import os
import time
import re
import logging
import math
import cPickle
import copy_reg
import _weakref
import threading
try:
    # Try a system install of pysqlite
    from pysqlite2 import dbapi2 as sqlite
except ImportError:
    # Python 2.6 provides sqlite natively, so try that next.
    from sqlite3 import dbapi2 as sqlite

# kaa base imports
from .utils import property
from .strutils import py3_str, BYTES_TYPE, UNICODE_TYPE
from .timer import WeakOneShotTimer
from . import main

if sqlite.version < '2.1.0':
    raise ImportError('pysqlite 2.1.0 or higher required')
if sqlite.sqlite_version < '3.3.1':
    raise ImportError('sqlite 3.3.1 or higher required')

# get logging object
log = logging.getLogger('kaa.base.db')

SCHEMA_VERSION = 0.2
SCHEMA_VERSION_COMPATIBLE = 0.2
CREATE_SCHEMA = """
    CREATE TABLE meta (
        attr        TEXT UNIQUE,
        value       TEXT
    );
    INSERT INTO meta VALUES('version', %s);

    CREATE TABLE types (
        id              INTEGER PRIMARY KEY AUTOINCREMENT,
        name            TEXT UNIQUE,
        attrs_pickle    BLOB,
        idx_pickle      BLOB
    );

    CREATE TABLE inverted_indexes (
        name            TEXT,
        attr            TEXT,
        value           TEXT
    );

    CREATE UNIQUE INDEX inverted_indexes_idx on inverted_indexes (name, attr);
"""

CREATE_IVTIDX_TEMPLATE = """
    CREATE TABLE ivtidx_%IDXNAME%_terms (
        id              INTEGER PRIMARY KEY AUTOINCREMENT,
        term            TEXT,
        count           INTEGER
    );
    CREATE UNIQUE INDEX ivtidx_%IDXNAME%_terms_idx on ivtidx_%IDXNAME%_terms (term);

    CREATE TABLE ivtidx_%IDXNAME%_terms_map (
        rank            INTEGER,
        term_id         INTEGER,
        object_type     INTEGER,
        object_id       INTEGER,
        frequency       FLOAT
    );
    CREATE INDEX ivtidx_%IDXNAME%_terms_map_idx ON ivtidx_%IDXNAME%_terms_map (term_id, rank, object_type, object_id);
    CREATE INDEX ivtidx_%IDXNAME%_terms_map_object_idx ON ivtidx_%IDXNAME%_terms_map (object_id, object_type, term_id);
    CREATE TRIGGER ivtidx_%IDXNAME%_delete_terms_map DELETE ON ivtidx_%IDXNAME%_terms_map
    BEGIN
        UPDATE ivtidx_%IDXNAME%_terms SET count=MAX(0, count-1) WHERE id=old.term_id;
    END;
"""


ATTR_SIMPLE              = 0x01
ATTR_SEARCHABLE          = 0x02      # Is a SQL column, not a pickled field
ATTR_INDEXED             = 0x04      # Will have an SQL index
ATTR_IGNORE_CASE         = 0x08      # Store in db as lowercase for searches.
ATTR_INVERTED_INDEX      = 0x10      # Attribute associated with an inverted idx
ATTR_INDEXED_IGNORE_CASE = ATTR_INDEXED | ATTR_IGNORE_CASE

# These are special attributes for querying.  Attributes with
# these names cannot be registered.
RESERVED_ATTRIBUTES = ('id', 'parent', 'object', 'type', 'limit', 'attrs', 'distinct', 'orattrs')

STOP_WORDS = (
    "about", "and", "are", "but", "com", "for", "from", "how", "not",
    "some", "that", "the", "this", "was", "what", "when", "where", "who",
    "will", "with", "the", "www", "http", "org", "of", "on"
)


class PyObjectRow(object):
    """
    ObjectRows are dictionary-like objects that represent an object in
    the database.  They are used by pysqlite instead of tuples or indexes.

    ObjectRows support on-demand unpickling of the internally stored pickle
    which contains ATTR_SIMPLE attributes.

    This is the native Python implementation of ObjectRow.  There is a
    faster C implementation in the _objectrow extension.  This
    implementation is still designed to be efficient -- the C version is
    only about 60-70% faster.
    """
    # A dict containing per-query data: [refcount, idxmap, typemap, pickle_idx]
    # This is constructed once for each query, and each row returned in the
    # query references the same data.  Each ObjectRow instance adds to the
    # refcount once initialized, and is decremented when the object is deleted.
    # Once it reaches 0, the entry is removed from the dict.
    queries = {}
    # Use __slots__ as a minor optimization to improve object creation time.
    __slots__ = ('_description', '_object_types', '_type_name', '_row', '_pickle',
                '_idxmap', '_typemap', '_keys')
    def __init__(self, cursor, row, pickle_dict=None):
        # The following is done per row per query, so it should be as light as
        # possible.
        if pickle_dict:
            # Created outside pysqlite, e.g. from Database.add()
            self._pickle = pickle_dict
            self._idxmap = None
            return

        if isinstance(cursor, tuple):
            self._description, self._object_types = cursor
        else:
            self._description = cursor.description
            self._object_types = cursor._db()._object_types

        self._row = row
        # _pickle: False == no pickle present; None == pickle present but
        # empty; if a dict, is the unpickled dictionary; else it's a byte
        # string containing the pickled data.
        self._pickle = False
        self._type_name = row[0]
        try:
            attrs = self._object_types[self._type_name][1]
        except KeyError:
            raise ValueError("Object type '%s' not defined." % self._type_name)

        query_key = id(self._description)
        if query_key in PyObjectRow.queries:
            query_info = PyObjectRow.queries[query_key]
            # Increase refcount to the query info
            query_info[0] += 1
            self._idxmap, self._typemap, pickle_idx = query_info[1:]
            if pickle_idx != -1:
                self._pickle = self._row[pickle_idx]
            return

        # Everything below this is done once per query, not per row, so
        # performance isn't quite as critical.
        idxmap = {} # attr_name -> (idx, pickled, named_ivtdx, type, flags)
        pickle_idx = -1
        for i in range(2, len(self._description)):
            attr_name = self._description[i][0]
            idxmap[attr_name] = i
            if attr_name == 'pickle':
                pickle_idx = i
                self._pickle = self._row[i]

        for attr_name, (attr_type, flags, ivtidx, split) in attrs.items():
            idx = idxmap.get(attr_name, -1)
            pickled = flags & ATTR_SIMPLE or (flags & ATTR_INDEXED_IGNORE_CASE == ATTR_INDEXED_IGNORE_CASE)
            idxmap[attr_name] = idx, pickled, attr_name == ivtidx, attr_type, flags

        # Construct dict mapping type id -> type name.  Essentially an
        # inversion of _object_types
        typemap = dict((v[0], k) for k, v in self._object_types.items())

        self._idxmap = idxmap
        self._typemap = typemap
        PyObjectRow.queries[query_key] = [1, idxmap, typemap, pickle_idx]


    def __del__(self):
        if self._idxmap is None:
            # From Database.add(), pickle only, no pysqlite row.
            return
        query_key = id(self._description)
        query_info = PyObjectRow.queries[query_key]
        query_info[0] -= 1
        if query_info[0] == 0:
            # Refcount for this query info is 0, so remove from global queries
            # dict.
            del PyObjectRow.queries[query_key]


    def __getitem__(self, key):
        if self._idxmap is None:
            # From Database.add(), work strictly from pickle
            return self._pickle[key]
        elif key == 'type':
            return self._type_name
        elif key == 'parent':
            type_idx = self._idxmap.get('parent_type', [-1])[0]
            id_idx = self._idxmap.get('parent_id', [-1])[0]
            if type_idx == -1 or id_idx == -1:
                raise KeyError('Parent attribute not available')
            type_id = self._row[type_idx]
            return self._typemap.get(type_id, type_id), self._row[id_idx]
        elif key == '_row':
            return self._row
        elif isinstance(key, int):
            return self._row[key]

        attr = self._idxmap[key]
        attr_idx = attr[0]
        is_indexed_ignore_case = (attr[4] & ATTR_INDEXED_IGNORE_CASE == ATTR_INDEXED_IGNORE_CASE)
        if attr_idx == -1:
            # Attribute is not in the sql row
            if attr[1] and self._pickle is None:
                # Pickle is empty, which means this attribute was never
                # assigned a value.  Return a default (empty list if attribute
                # is named after an inverted index
                return [] if attr[2] else None
            elif not attr[1] or self._pickle is False:
                # The requested attribute is not in the sqlite row, and neither is the pickle.
                raise KeyError("ObjectRow does not have enough data to provide '%s'" % key)

        if not attr[1]:
            value = self._row[attr_idx]
        elif attr_idx >= 0 and not self._pickle and is_indexed_ignore_case:
            # Attribute is ATTR_INDEXED_IGNORE_CASE which means the
            # authoritative source is in the pickle, but we don't have it.  So just
            # return what we have.
            value = self._row[attr_idx]
        else:
            if self._pickle and not isinstance(self._pickle, dict):
                # We need to check the pickle but it's not unpickled, so do so now.
                self._pickle = dbunpickle(self._pickle)
            if is_indexed_ignore_case:
                key = '__' + key
            if key not in self._pickle:
                return [] if attr[2] else None
            else:
                value = self._pickle[key]

        if sys.hexversion < 0x03000000 and (attr[3] == str or attr[3] == buffer):
            # Python 2's pysqlite returns BLOBs as buffers.  If the attribute
            # type is string or buffer (RAW_TYPE on Python 2), convert to string.
            return str(value)
        else:
            return value


    def keys(self):
        if not self._idxmap:
            # Ad hoc ObjectRow, proxy to pickle dict.
            return self._pickle.keys()

        if not hasattr(self, '_keys'):
            self._keys = ['type']
            for name, attr in self._idxmap.items():
                if (attr[0] >= 0 or (attr[1] and self._pickle is not False)) and name != 'pickle':
                    self._keys.append(name)
            if 'parent_type' in self._idxmap and 'parent_id' in self._idxmap:
                self._keys.append('parent')
        return self._keys


    def values(self):
        if not self._idxmap:
            # Ad hoc ObjectRow, proxy to pickle dict.
            return self._pickle.values()
        return [self[k] for k in self.keys()]


    def items(self):
        return zip(self.keys(), self.values())


    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default


    def has_key(self, key):
        return key in self.keys()


    def __iter__(self):
        return iter(self.keys())


    def __contains__(self, key):
        if key == 'type' or (key == 'parent' and 'parent_id' in self._idxmap):
            return True
        else:
            return key in self._idxmap


if sys.hexversion >= 0x03000000:
    RAW_TYPE = bytes
    PICKLE_PROTOCOL = 3
    class Proto2Unpickler(pickle._Unpickler):
        """
        In spite of the promise that pickles will be compatible between Python
        releases, Python 3 does the wrong thing with non-unicode strings in
        pickles (BINSTRING pickle opcode).  It will try to convert them to
        unicode strings, which is wrong when the intention was to store binary
        data in a Python 2 str.

        This class implements a custom unpickler that will load BINSTRING as
        bytes objects.  An exception is made for dictionaries, where BINSTRING
        keys are converted to unicode strings.

        Additionally, it maps the unicode and buffer types to corresponding
        Python 3 types.
        """
        dispatch = pickle._Unpickler.dispatch.copy()
        def load_binstring(self):
            len = pickle.mloads(bytes('i', 'ascii') + self.read(4))
            self.append(bytes(self.read(len)))
        dispatch[pickle.BINSTRING[0]] = load_binstring

        def load_short_binstring(self):
            len = ord(self.read(1))
            self.append(bytes(self.read(len)))
        dispatch[pickle.SHORT_BINSTRING[0]] = load_short_binstring

        def load_setitems(self):
            super(Proto2Unpickler, self).load_setitems()
            d = self.stack[-1]
            for k, v in d.items():
                if type(k) == bytes:
                    sk = str(k, self.encoding, self.errors)
                    if sk not in d:
                        d[sk] = v
                        del d[k]
        dispatch[pickle.SETITEMS[0]] = load_setitems

        def find_class(self, module, name):
            if module == '__builtin__':
                if name == 'unicode':
                    return str
                elif name == 'buffer':
                    return bytes
            return super(Proto2Unpickler, self).find_class(module, name)

    def dbunpickle(s):
        if s[1] == 0x02:
            import io
            #import pickletools
            #pickletools.dis(io.BytesIO(bytes(s)))
            return Proto2Unpickler(io.BytesIO(bytes(s))).load()
        else:
            return pickle.loads(bytes(s))

    def dbpickle(value):
        return bytes(pickle.dumps(value, 3))

    # Need to be able to unpickle pickled buffers from Python 2.
    def _unpickle_buffer(s):
        return bytes(s)

else:
    RAW_TYPE = buffer
    PICKLE_PROTOCOL = 2
    def dbunpickle(s):
        return cPickle.loads(str(s))

    def dbpickle(value):
        return buffer(cPickle.dumps(value, 2))


    # Python2 can't pickle buffer types, so register a handler for that.
    def _pickle_buffer(b):
        return _unpickle_buffer, (str(b),)

    def _unpickle_buffer(s):
        return str(s)
    copy_reg.pickle(buffer, _pickle_buffer, _unpickle_buffer)


try:
    from . import _objectrow
except ImportError:
    # Use the python-native ObjectRow
    ObjectRow = PyObjectRow
else:
    # Use the faster C-based ObjectRow
    ObjectRow = _objectrow.ObjectRow
    # Expose the custom unpickler to the _objectrow module so it can deal with
    # pickles stored inside DB rows.
    _objectrow.dbunpickle = dbunpickle


# Register a handler for pickling ObjectRow objects.
def _pickle_ObjectRow(o):
    if o._description:
        return _unpickle_ObjectRow, ((o._description, o._object_types), o._row)
    else:
        return _unpickle_ObjectRow, (None, None, dict(o.items()))

def _unpickle_ObjectRow(*args):
    return ObjectRow(*args)

copy_reg.pickle(ObjectRow, _pickle_ObjectRow, _unpickle_ObjectRow)



PATH_SPLIT = re.compile(ur'(\d+)|[_\W]', re.U | re.X)
def split_path(s):
    """
    Convenience split function for inverted index attributes.  Useful for
    attributes that contain filenames.  Splits the given string s into
    components parts (directories, filename), discarding the extension and all
    but the last two directories.  What's remaining is split into words and the
    result is returned.
    """
    dirname, filename = os.path.split(s)
    fname_noext, ext = os.path.splitext(filename)
    for part in dirname.strip('/').split(os.path.sep)[2:][-2:] + [fname_noext]:
        for match in PATH_SPLIT.split(part):
            if match:
                yield match


# FIXME: this is flawed.  Can use placeholders by taking a n-tuple and
# replacing ? to (?, ?, ..., n) and then extend the query params list with the
# given tuple/list value.
def _list_to_printable(value):
    """
    Takes a list of mixed types and outputs a unicode string.  For
    example, a list [42, 'foo', None, "foo's' string"], this returns the
    string:

        (42, 'foo', NULL, 'foo''s'' string')

    Single quotes are escaped as ''.  This is suitable for use in SQL
    queries.
    """
    fixed_items = []
    for item in value:
        if isinstance(item, (int, long, float)):
           fixed_items.append(str(item))
        elif item == None:
            fixed_items.append("NULL")
        elif isinstance(item, UNICODE_TYPE):
            fixed_items.append("'%s'" % item.replace("'", "''"))
        elif isinstance(item, BYTES_TYPE):
            fixed_items.append("'%s'" % py3_str(item.replace("'", "''")))
        else:
            raise Exception, "Unsupported type '%s' given to _list_to_printable" % type(item)

    return '(' + ','.join(fixed_items) + ')'


class DatabaseError(Exception):
    pass

class DatabaseReadOnlyError(Exception):
    pass

class QExpr(object):
    """
    Flexible query expressions for use with :meth:`kaa.db.Database.query()`
    """
    def __init__(self, operator, operand):
        """
        :param operator: ``=``, ``!=``, ``<``, ``<=``, ``>``, ``>=``, ``in``,
                         ``not in``, ``range``, ``like``, or ``regexp``
        :type operator: str
        :param operand: the rvalue of the expression; any scalar values as part of
                        the operand must be the same type as the attribute being
                        evaluated

        Except for ``in``, ``not in``, and ``range``, the operand must be the
        type of the registered attribute being evaluated (e.g. unicode, int,
        etc.).

        The operand for ``in`` and ``not in`` are lists or tuples of the attribute
        type, to test inclusion in the given set.

        The ``range`` operator accepts a 2-tuple specifying min and max values
        for the attribute.  The Python expression age=QExpr('range', (20, 30))
        translates to ``age >= 20 AND age <= 30``.
        """
        operator = operator.lower()
        assert(operator in ('=', '!=', '<', '<=', '>', '>=', 'in', 'not in', 'range', 'like', 'regexp'))
        if operator in ('in', 'not in', 'range'):
            assert(isinstance(operand, (list, tuple)))
            if operator == 'range':
                assert(len(operand) == 2)

        self._operator = operator
        self._operand = operand

    def as_sql(self, var):
        if self._operator == "range":
            a, b = self._operand
            return "%s >= ? AND %s <= ?" % (var, var), (a, b)
        elif self._operator in ("in", "not in"):
            return "%s %s %s" % (var, self._operator.upper(),
                   _list_to_printable(self._operand)), ()
        else:
            return "%s %s ?" % (var, self._operator.upper()), \
                   (self._operand,)


class RegexpCache(object):
    def __init__(self):
        self.last_item = None
        self.last_expr = None

    def __call__(self, expr, item):
        if item is None:
            return 0

        if self.last_item == item and self.last_item is not None and self.last_expr == expr:
            return self.last_result
        if self.last_expr != expr:
            self.last_expr = re.compile(unicode(expr), re.U)

        self.last_item = item
        # FIXME: bad conversion to unicode!
        self.last_result = self.last_expr.match(unicode(item)) is not None
        return self.last_result


class Database(object):
    def __init__(self, dbfile):
        """
        Open a database, creating one if it doesn't already exist.

        :param dbfile: path to the database file
        :type dbfile: str

        SQLite is used to provide the underlying database.
        """
        super(Database, self).__init__()
        # _object_types dict is keyed on type name, where value is a 3-
        # tuple (id, attrs, idx), where:
        #   - id is a unique numeric database id for the type,
        #   - attrs is a dict containing registered attributes for the type,
        #     keyed on attribute name, where value is a 3-tuple of (type, flags,
        #     ivtidx), where type is a python datatype, flags is a bitmask of
        #     ATTR_*, ivtidx is the name of the associated inverted index (used
        #     if flags has ATTR_INVERTED_INDEX, otherwise None)
        #   - idx is a list of n-tuples, where each n-tuple defines two or more
        #     (non-ATTR_SIMPLE) attributes on which to create a multi-column
        #     sql index.
        self._object_types = {}

        # _inverted_indexes dict is keyed on index name, where value is
        # a dict keyed on:
        #   - min: minimum length of terms
        #   - max: maximum length of terms
        #   - ignore: list of terms to ignore
        #   - split: function or regular expression used to split string ATTR_INVERTED_INDEX
        #            attributes.
        self._inverted_indexes = {}

        # True when there are uncommitted changes
        self._dirty = False
        # True when modifications are not allowed to the database, which
        # is the case when Python 3 is opening a database created by Python 2
        # and upgrade_to_py3() has not been called.
        self._readonly = False
        self._dbfile = os.path.realpath(dbfile)
        self._lock = threading.RLock()
        self._lazy_commit_timer = WeakOneShotTimer(self.commit)
        self._lazy_commit_interval = None
        self._open_db()


    def _open_db(self):
        self._db = sqlite.connect(self._dbfile, check_same_thread=False)

        # Create the function "regexp" for the REGEXP operator of SQLite
        self._db.create_function("regexp", 2, RegexpCache())

        self._cursor = self._db.cursor()

        class Cursor(sqlite.Cursor):
            _db = _weakref.ref(self)
        self._db.row_factory = ObjectRow
        # Queries done through this cursor will use the ObjectRow row factory.
        self._qcursor = self._db.cursor(Cursor)

        for cursor in self._cursor, self._qcursor:
            cursor.execute("PRAGMA synchronous=OFF")
            cursor.execute("PRAGMA temp_store=MEMORY")
            cursor.execute("PRAGMA cache_size=50000")
            cursor.execute("PRAGMA page_size=8192")

        if not self._check_table_exists("meta"):
            self._db.executescript(CREATE_SCHEMA % SCHEMA_VERSION)

        row = self._db_query_row("SELECT value FROM meta WHERE attr='version'")
        if float(row[0]) < SCHEMA_VERSION_COMPATIBLE:
            raise DatabaseError("Database '%s' has schema version %s; required %s" % \
                                (self._dbfile, row[0], SCHEMA_VERSION_COMPATIBLE))

        self._load_inverted_indexes()
        self._load_object_types()


    def _set_dirty(self):
        if self._lazy_commit_interval is not None:
            self._lazy_commit_timer.start(self._lazy_commit_interval)
        if self._dirty:
            return
        self._dirty = True
        main.signals['exit'].connect(self.commit)


    def _db_query(self, statement, args = (), cursor = None, many = False):
        t0=time.time()
        with self._lock:
            if not cursor:
                cursor = self._cursor
            if many:
                cursor.executemany(statement, args)
            else:
                cursor.execute(statement, args)
            rows = cursor.fetchall()
        t1=time.time()
        #print "QUERY [%.06f%s]: %s" % (t1-t0, ('', ' (many)')[many], statement), args
        return rows


    def _db_query_row(self, statement, args = (), cursor = None):
        rows = self._db_query(statement, args, cursor)
        if len(rows) == 0:
            return None
        return rows[0]


    def _to_obj_tuple(self, obj, numeric=False):
        """
        Returns a normalized object reference as a 2-tuple (type, id).

        :param obj: an ObjectRow, or 2-tuple (type, id)
        :param numeric: if True, coerce type name to a type id

        Raises ValueError if obj is not valid.
        """
        if isinstance(obj, ObjectRow):
            object_type, object_id = obj['type'], obj['id']
        else:
            try:
                object_type, object_id = obj
                if not isinstance(object_type, (int, basestring)) or not isinstance(object_id, (int, long, QExpr)):
                    raise TypeError
            except TypeError:
                raise ValueError('Object reference must be either ObjectRow, or (type, id), got %s' % obj)

        if numeric:
            object_type = self._get_type_id(object_type)

        return object_type, object_id


    def _check_table_exists(self, table):
        res = self._db_query_row("SELECT name FROM sqlite_master where " \
                                 "name=? and type='table'", (table,))
        return res != None


    def _register_check_indexes(self, indexes, attrs):
        for cols in indexes:
            if not isinstance(cols, (list, tuple)):
                raise ValueError, "Single column index specified ('%s') where multi-column index expected." % cols
            for col in cols:
                errstr = "Multi-column index (%s) contains" % ",".join(cols)
                if col not in attrs:
                    raise ValueError, "%s unknown attribute '%s'" % (errstr, col)
                if not attrs[col][1]:
                    raise ValueError, "%s ATTR_SIMPLE attribute '%s'" % (errstr, col)


    def _register_create_multi_indexes(self, indexes, table_name):
        for cols in indexes:
            self._db_query("CREATE INDEX %s_%s_idx ON %s (%s)" % \
                           (table_name, "_".join(cols), table_name, ",".join(cols)))


    def register_object_type_attrs(self, type_name, indexes = [], **attrs):
        """
        Register one or more object attributes and/or multi-column indexes for
        the given type name.

        This function modifies the database as needed to accommodate new
        indexes and attributes, either by creating the object's tables (in the
        case of a new object type) or by altering the object's tables to add
        new columns or indexes.

        This method is idempotent: if the attributes and indexes specified have
        not changed from previous invocations, no changes will be made to the
        database.  Moreover, newly registered attributes will not affect
        previously registered attributes.  This allows, for example, a plugin
        to extend an existing object type created by the core application
        without interfering with it.

        :param type_name: the name of object type the registered attributes or
                          indexes apply to.
        :type type_name: str
        :param indexes: a list of tuples where each tuple contains 2 or more
                        registered :attr:`~kaa.db.ATTR_SEARCHABLE` attributes
                        for which a composite index will be created in the
                        underlying database.  This is useful for speeding
                        up queries involving these attributes combined.
        :type indexes: list of tuples of strings
        :param attrs: keyword arguments defining the attributes to be
                      registered.  The keyword defining the attribute name
                      cannot conflict with any of the names in
                      :attr:`~kaa.db.RESERVED_ATTRIBUTES`.  See below for a
                      more complete specification of the value.
        :type attrs: 2, 3, or 4-tuple

        Previously registered attributes may be updated in limited ways (e.g.
        by adding an index to the attribute).  If the change requested is
        not supported, a ValueError will be raised.

        .. note:: Currently, indexes and attributes can only be added, not
           removed.  That is, once an attribute or index is added, it lives
           forever.

        Object attributes, which are supplied as keyword arguments, are either
        *searchable* or *simple*.  *Searchable* attributes occupy a column in
        the underlying database table and so queries can be performed on these
        attributes, but their types are more restricted.  *Simple* attributes
        can be any type that can be pickled, but can't be searched.

        The attribute kwarg value is a tuple of 2 to 4 items in length and in
        the form (attr_type, flags, ivtidx, split).

            * attr_type: the type of the object attribute. For simple attributes
              (:attr:`~kaa.db.ATTR_SIMPLE` in *flags*), this can be any picklable
              type; for searchable attributes (:attr:`~kaa.db.ATTR_SEARCHABLE`
              in *flags*), this must be either *int*, *float*, *str*, *unicode*,
              *bytes*, or *bool*.  (On Python 2.5, you can use ``kaa.db.RAW_TYPE``
              instead of *bytes*.)
            * flags: a bitmap of :ref:`attribute flags <attrflags>`
            * ivtidx: name of a previously registered inverted index used for
              this attribute. Only needed if flags contains
              :attr:`~kaa.db.ATTR_INVERTED_INDEX`
            * split: function or regular expression used to split string-based
              values for this attribute into separate terms for indexing.  If
              this isn't defined, then the default split strategy for the
              inverted index wil be used.

        Apart from not being allowed to conflict with one of the reserved
        names, there is a special case for attribute names: when they have
        the same name as a previously registered inverted index.  These
        attributes must be :attr:`~kaa.db.ATTR_SIMPLE`, and of type *list*.
        Terms explicitly associated with the attribute are persisted with the
        object, but when accessed, all terms for all attributes for that
        inverted index will be contained in the list, not just those explicitly
        associated with the same-named attribute.

        The following example shows what an application that indexes email might
        do::

            from kaa.db import *
            from datetime import datetime
            db = Database('email.db')
            db.register_inverted_index('keywords', min=3, max=30)
            db.register_object_type_attrs('msg',
                # Create a composite index on sender and recipient, because
                # (let's suppose) it's we do a lot of searches for specific
                # senders emailing specific recipients.
                [('sender', 'recipient')],

                # Simple attribute can be anything that's picklable, which datetime is.
                date = (datetime, ATTR_SIMPLE),

                # Sender and recipient names need to be ATTR_SEARCHABLE since
                # they're part of a composite index.
                sender = (unicode, ATTR_SEARCHABLE),
                recipient = (unicode, ATTR_SEARCHABLE),

                # Subject is searchable (standard SQL-based substring matches),
                # but also being indexed as part of the keywords inverted
                # index for fast term-based searching.
                subject = (unicode, ATTR_SEARCHABLE | ATTR_INVERTED_INDEX, 'keywords'),

                # Special case where an attribute name is the same as a registered
                # inverted index.  This lets us index on, for example, the message body
                # without actually storing the message inside the database.
                keywords = (list, ATTR_SIMPLE | ATTR_INVERTED_INDEX, 'keywords')
            )
        """
        if len(indexes) == len(attrs) == 0:
            raise ValueError, "Must specify indexes or attributes for object type"

        table_name = "objects_%s" % type_name

        # First pass over the attributes kwargs, sanity-checking provided values.
        for attr_name, attr_defn in attrs.items():
            # We allow attribute definition to be either a 2- to 4-tuple (last two
            # are optional), so pad the tuple with None if a 2- or 3-tuple was specified.
            attrs[attr_name] = attr_defn = tuple(attr_defn) + (None,) * (4-len(attr_defn))
            if len(attr_defn) != 4:
                raise ValueError, "Definition for attribute '%s' is not a 2- to 4-tuple." % attr_name

            # Verify the attribute flags contain either ATTR_SEARCHABLE or ATTR_SIMPLE;
            # it can't contain both as that doesn't make sense.
            if attr_defn[1] & (ATTR_SIMPLE | ATTR_SEARCHABLE) not in (ATTR_SIMPLE, ATTR_SEARCHABLE):
                raise ValueError, "Flags for attribute '%s' must contain exactly one " \
                                  "of ATTR_SIMPLE or ATTR_SEARCHABLE" % attr_name

            # Attribute name can't conflict with reserved names.
            if attr_name in RESERVED_ATTRIBUTES:
                raise ValueError, "Attribute name '%s' is reserved." % attr_name
            elif attr_name in self._inverted_indexes:
                if not attr_defn[1] & ATTR_INVERTED_INDEX or attr_defn[2] != attr_name:
                    # Attributes can be named after inverted indexes, but only if
                    # ATTR_INVERTED_INDEX is specified and the attribute name is the
                    # same as its ivtidx name.
                    raise ValueError, "Attribute '%s' conflicts with inverted index of same name, " \
                                      "but ATTR_INVERTED_INDEX not specified in flags." % attr_name

            if attr_defn[1] & ATTR_INVERTED_INDEX:
                # Attributes with ATTR_INVERTED_INDEX can only be certain types.
                if attr_defn[0] not in (str, unicode, tuple, list, set):
                    raise TypeError, "Type for attribute '%s' must be string, unicode, list, tuple, or set " \
                                     "because it is ATTR_INVERTED_INDEX" % attr_name

                # Make sure inverted index name is valid.
                if attr_defn[2] is None:
                    raise ValueError, "Attribute '%s' flags specify inverted index, " \
                                      "but no inverted index name supplied." % attr_name
                elif attr_defn[2] not in self._inverted_indexes:
                    raise ValueError, "Attribute '%s' specifies undefined interverted index '%s'" % \
                                      (attr_name, attr_defn[2])

            # Compile split regexp if it was given.
            if attr_defn[3] is not None and not callable(attr_defn[3]):
                attrs[attr_name] = attr_defn[:3] + (re.compile(attr_defn[3]),)


        if type_name in self._object_types:
            # This type already exists.  Compare given attributes with
            # existing attributes for this type to see what needs to be done
            # (if anything).
            cur_type_id, cur_type_attrs, cur_type_idx = self._object_types[type_name]
            new_attrs = {}
            table_needs_rebuild = False
            changed = False
            for attr_name, attr_defn in attrs.items():
                attr_type, attr_flags, attr_ivtidx, attr_split = attr_defn
                # TODO: converting an attribute from SIMPLE to SEARCHABLE or vice
                # versa isn't supported yet.  Raise exception here to prevent
                # potential data loss.
                if attr_name in cur_type_attrs and attr_flags & (ATTR_SEARCHABLE | ATTR_SIMPLE) != \
                   cur_type_attrs[attr_name][1] & (ATTR_SEARCHABLE | ATTR_SIMPLE):
                   raise ValueError, "Unsupported attempt to convert attribute '%s' " \
                                     "between ATTR_SIMPLE and ATTR_SEARCHABLE" % attr_name

                if attr_name not in cur_type_attrs or cur_type_attrs[attr_name] != attr_defn:
                    # There is a new attribute specified for this type, or an
                    # existing one has changed.
                    new_attrs[attr_name] = attr_defn
                    changed = True
                    if attr_flags & ATTR_SEARCHABLE:
                        # New attribute isn't simple, needs to alter table.
                        table_needs_rebuild = True
                    elif attr_flags & ATTR_INVERTED_INDEX:
                        # TODO: there is no need to rebuild the table when adding/modifying
                        # an ATTR_SIMPLE | ATTR_INVERTED_INDEX attribute, we just need to
                        # recreate the delete trigger (and remove any rows from the
                        # inverted index's map for this object type if we're removing
                        # an association with that ivtidx).  For now we will force a
                        # rebuild since I'm too lazy to implement the proper way.
                        table_needs_rebuild = True

                        if attr_name in cur_type_attrs and not cur_type_attrs[attr_name][1] & ATTR_INVERTED_INDEX:
                            # FIXME: if we add an inverted index to an existing attribute, we'd
                            # need to reparse that attribute in all rows to populate the inverted
                            # map.  Right now just log a warning.
                            log.warning("Adding inverted index '%s' to existing attribute '%s' not fully " \
                                        "implemented; index may be out of sync.", attr_ivtidx, attr_name)

            if not changed:
                return
            if self._readonly:
                raise DatabaseReadOnlyError('upgrade_to_py3() must be called before database can be modified')

            # Update the attr list to merge both existing and new attributes.
            attrs = cur_type_attrs.copy()
            attrs.update(new_attrs)
            new_indexes = set(indexes).difference(cur_type_idx)
            indexes = set(indexes).union(cur_type_idx)
            self._register_check_indexes(indexes, attrs)

            if not table_needs_rebuild:
                # Only simple (i.e. pickled only) attributes are being added,
                # or only new indexes are added, so we don't need to rebuild the
                # table.
                if len(new_attrs):
                    self._db_query("UPDATE types SET attrs_pickle=? WHERE id=?", (dbpickle(attrs), cur_type_id))

                if len(new_indexes):
                    self._register_create_multi_indexes(new_indexes, table_name)
                    self._db_query("UPDATE types SET idx_pickle=? WHERE id=?", (dbpickle(indexes), cur_type_id))

                self.commit()
                self._load_object_types()
                return

            # We need to update the database now ...

        else:
            # New type definition.  Populate attrs with required internal
            # attributes so they get created with the table.

            new_attrs = cur_type_id = None
            # Merge standard attributes with user attributes for this new type.
            attrs.update({
                'id': (int, ATTR_SEARCHABLE, None, None),
                'parent_type': (int, ATTR_SEARCHABLE, None, None),
                'parent_id': (int, ATTR_SEARCHABLE, None, None),
                'pickle': (RAW_TYPE, ATTR_SEARCHABLE, None, None)
            })
            self._register_check_indexes(indexes, attrs)

        create_stmt = 'CREATE TABLE %s_tmp (' % table_name

        # Iterate through type attributes and append to SQL create statement.
        sql_types = {int: 'INTEGER', long: 'INTEGER', float: 'FLOAT', RAW_TYPE: 'BLOB',
                     UNICODE_TYPE: 'TEXT', BYTES_TYPE: 'BLOB', bool: 'INTEGER', basestring: 'TEXT'}
        for attr_name, (attr_type, attr_flags, attr_ivtidx, attr_split) in attrs.items():
            if attr_flags & ATTR_SEARCHABLE:
                # Attribute needs to be a column in the table, not a pickled value.
                if attr_type not in sql_types:
                    raise ValueError, "Type '%s' not supported" % str(attr_type)
                create_stmt += '%s %s' % (attr_name, sql_types[attr_type])
                if attr_name == 'id':
                    # Special case, these are auto-incrementing primary keys
                    create_stmt += ' PRIMARY KEY AUTOINCREMENT'
                create_stmt += ','

        create_stmt = create_stmt.rstrip(',') + ')'
        self._db_query(create_stmt)


        # Add this type to the types table, including the attributes
        # dictionary.
        self._db_query('INSERT OR REPLACE INTO types VALUES(?, ?, ?, ?)',
                       (cur_type_id, type_name, dbpickle(attrs), dbpickle(indexes)))

        # Sync self._object_types with the object type definition we just
        # stored to the db.
        self._load_object_types()

        if new_attrs:
            # Migrate rows from old table to new temporary one.  Here we copy only
            # ATTR_SEARCHABLE columns that exist in both old and new definitions.
            columns = filter(lambda x: cur_type_attrs[x][1] & ATTR_SEARCHABLE and \
                                       x in attrs and attrs[x][1] & ATTR_SEARCHABLE, cur_type_attrs.keys())
            columns = ','.join(columns)
            self._db_query('INSERT INTO %s_tmp (%s) SELECT %s FROM %s' % \
                           (table_name, columns, columns, table_name))

            # Delete old table.
            self._db_query('DROP TABLE %s' % table_name)

        # Rename temporary table.
        self._db_query('ALTER TABLE %s_tmp RENAME TO %s' % (table_name, table_name))

        # Increase the objectcount for new inverted indexes, and create a
        # trigger that reduces the objectcount for each applicable inverted
        # index when a row is deleted.
        inverted_indexes = self._get_type_inverted_indexes(type_name)
        if inverted_indexes:
            n_rows = self._db_query_row('SELECT COUNT(*) FROM %s' % table_name)[0]
            sql = 'CREATE TRIGGER delete_object_%s DELETE ON %s BEGIN ' % (type_name, table_name)
            for idx_name in inverted_indexes:
                sql += "UPDATE inverted_indexes SET value=MAX(0, value-1) WHERE name='%s' AND attr='objectcount';" % idx_name
                # Add to objectcount (both in db and cached value)
                self._db_query("UPDATE inverted_indexes SET value=value+? WHERE name=? and attr='objectcount'",
                               (n_rows, idx_name))
                self._inverted_indexes[idx_name]['objectcount'] += n_rows
            sql += 'END'
            self._db_query(sql)

        # Create index for locating all objects under a given parent.
        self._db_query("CREATE INDEX %s_parent_idx on %s (parent_id, "\
                       "parent_type)" % (table_name, table_name))

        # If any of these attributes need to be indexed, create the index
        # for that column.
        for attr_name, (attr_type, attr_flags, attr_ivtidx, attr_split) in attrs.items():
            if attr_flags & ATTR_INDEXED:
                self._db_query("CREATE INDEX %s_%s_idx ON %s (%s)" % \
                               (table_name, attr_name, table_name, attr_name))

        # Create multi-column indexes; indexes value has already been verified.
        self._register_create_multi_indexes(indexes, table_name)
        self.commit()


    def register_inverted_index(self, name, min = None, max = None, split = None, ignore = None):
        """
        Registers a new inverted index with the database.

        An inverted index maps arbitrary terms to objects and allows you to
        query based on one or more terms.  If the inverted index already exists
        with the given parameters, no action is performed.

        :param name: the name of the inverted index; must be alphanumeric.
        :type name: str
        :param min: the minimum length of terms to index; terms smaller
                    than this will be ignored.  If None (default), there
                    is no minimum size.
        :type min: int
        :param max: the maximum length of terms to index; terms larger than
                    this will be ignored.  If None (default), there is no
                    maximum size.
        :type max: int
        :param split: used to parse string-based attributes using this inverted
                      index into individual terms.  In the case of regexps, the
                      split method will be called.  (If a string is specified,
                      it will be compiled into a regexp first.)  If *split* is
                      a callable, it will receive a string of text and must return
                      a sequence, and each item in the sequence will be indexed
                      as an individual term.  If split is not specified, the
                      default is to split words at non-alphanumeric/underscore/digit
                      boundaries.
        :type split: callable, regexp (SRE_Pattern) object, or str
        :param ignore: a list of terms that will not be indexed (so-called
                       *stop words*).  If specified, each indexed term for this
                       inverted index will first be checked against this list.
                       If it exists, the term is discarded.

        For example::

            from kaa.db import *
            db = Database('test.db')
            db.register_inverted_index('tags')
            db.register_inverted_index('keywords', min=3, max=30, ignore=STOP_WORDS)
        """
        # Verify specified name doesn't already exist as some object attribute.
        for object_name, object_type in self._object_types.items():
            if name in object_type[1] and name != object_type[1][name][2]:
                raise ValueError, "Inverted index name '%s' conflicts with registered attribute in object '%s'" % \
                                  (name, object_name)

        if split is None:
            # Default split regexp is to split words on
            # alphanumeric/digits/underscore boundaries.
            split = re.compile(u"(\d+)|[_\W]", re.U)
        elif isinstance(split, basestring):
            split = re.compile(py3_str(split), re.U)

        if name not in self._inverted_indexes and not self._readonly:
            self._db_query('INSERT INTO inverted_indexes VALUES(?, "objectcount", 0)', (name,))
            # Create the tables needed by the inverted index.
            with self._lock:
                self._db.executescript(CREATE_IVTIDX_TEMPLATE.replace('%IDXNAME%', name))
        elif name in self._inverted_indexes:
            defn = self._inverted_indexes[name]
            if min == defn['min'] and max == defn['max'] and split == defn['split'] and \
               ignore == defn['ignore']:
               # Definition unchanged, nothing to do.
               return

        if self._readonly:
            raise DatabaseReadOnlyError('upgrade_to_py3() must be called before database can be modified')

        defn = {
            'min': min,
            'max': max,
            'split': split,
            'ignore': ignore,
        }

        self._db_query("INSERT OR REPLACE INTO inverted_indexes VALUES(?, 'definition', ?)",
                       (name, dbpickle(defn)))

        defn['objectcount'] = 0
        self._inverted_indexes[name] = defn
        self.commit()


    def _load_inverted_indexes(self):
        for name, attr, value in self._db_query("SELECT * from inverted_indexes"):
            if name not in self._inverted_indexes:
                self._inverted_indexes[name] = {}
            if attr == 'objectcount':
                self._inverted_indexes[name][attr] = int(value)
            elif attr == 'definition':
                self._inverted_indexes[name].update(dbunpickle(value))


    def _load_object_types(self):
        is_pickle_proto_2 = False
        for id, name, attrs, idx in self._db_query("SELECT * from types"):
            if attrs[1] == 0x02 or idx[1] == 0x02:
                is_pickle_proto_2 = True
            self._object_types[name] = id, dbunpickle(attrs), dbunpickle(idx)

        if sys.hexversion >= 0x03000000 and is_pickle_proto_2:
            self._readonly = True
            log.warning('kaa.db databases created by Python 2 are read-only until upgrade_to_py3() is called')


    def _get_type_inverted_indexes(self, type_name):
        if type_name not in self._object_types:
            return []

        indexed_attrs = set()
        type_attrs = self._object_types[type_name][1]
        for name, (attr_type, flags, attr_ivtidx, attr_split) in type_attrs.items():
            if flags & ATTR_INVERTED_INDEX:
                indexed_attrs.add(attr_ivtidx)

        return list(indexed_attrs)


    def _get_type_attrs(self, type_name):
        return self._object_types[type_name][1]

    def _get_type_id(self, type_name):
        return self._object_types[type_name][0]


    def _make_query_from_attrs(self, query_type, attrs, type_name):
        type_attrs = self._get_type_attrs(type_name)

        # True if an attribute from the pickle was removed and we must force an update.
        pickle_attr_removed = False
        columns = []
        values = []
        placeholders = []

        for key in attrs.keys():
            if key not in type_attrs:
                if key in self._inverted_indexes:
                    continue
                raise ValueError("Reference to undefined attribute '%s' for type '%s'" % (key, type_name))
            if attrs[key] == None:
                # Remove all None attributes (even ATTR_SEARCHABLE), otherwise we will
                # raise a TypeError later, since NoneType isn't the right type.
                if type_attrs[key][1] & ATTR_SIMPLE:
                    # Attribute removed from a pickle, be sure we update the pickle in
                    # the db if this is an 'update' query_type.
                    pickle_attr_removed = True
                del attrs[key]

        attrs_copy = attrs.copy()
        for name, (attr_type, flags, attr_ivtidx, attr_split) in type_attrs.items():
            if flags & ATTR_SEARCHABLE and name in attrs:
                columns.append(name)
                placeholders.append("?")
                value = attrs[name]
                # Coercion for numeric types
                if isinstance(value, (int, long, float)) and attr_type in (int, long, float):
                    value = attr_type(value)
                elif isinstance(value, basestring) and \
                     flags & ATTR_INDEXED_IGNORE_CASE == ATTR_INDEXED_IGNORE_CASE:
                    # If the attribute is ATTR_INDEXED_IGNORE_CASE and it's a string
                    # then we store it as lowercase in the table column, while
                    # keeping the original (unchanged case) value in the pickle.
                    # This allows us to do case-insensitive searches on indexed
                    # columns and still benefit from the index.
                    attrs_copy["__" + name] = value
                    value = value.lower()

                if not isinstance(value, attr_type):
                    raise TypeError("Type mismatch in query for %s: '%s' (%s) is not a %s" % \
                                    (name, str(value), str(type(value)), str(attr_type)))
                if attr_type == BYTES_TYPE:
                    # For Python 2, convert non-unicode strings to buffers.  (For Python 3,
                    # BYTES_TYPE == RAW_TYPE so this is a no-op.)
                    value = RAW_TYPE(value)
                values.append(value)
                del attrs_copy[name]

        if len(attrs_copy) > 0 or (pickle_attr_removed and query_type == 'update'):
            # From the remaining attributes, remove those named after inverted
            # indexes that aren't explicitly registered as attributes in the
            # object type.  Here is where we keep our cached copy of inverted
            # index terms.
            for attr in attrs_copy.keys():
                if attr in self._inverted_indexes and attr not in type_attrs:
                    del attrs_copy[attr]

            # What's left gets put into the pickle.
            columns.append("pickle")
            values.append(dbpickle(attrs_copy))
            placeholders.append("?")

        table_name = "objects_" + type_name

        if query_type == "add":
            if columns:
                columns = ",".join(columns)
                placeholders = ",".join(placeholders)
                q = "INSERT INTO %s (%s) VALUES(%s)" % (table_name, columns, placeholders)
            else:
                # Insert empty row (need to specify at least one column to make valid
                # SQL, so just specify id of null, which will assume next value in
                # sequence.
                q = 'INSERT INTO %s (id) VALUES(null)' % table_name
        else:
            q = "UPDATE %s SET " % table_name
            for col, ph in zip(columns, placeholders):
                q += "%s=%s," % (col, ph)
            # Trim off last comma
            q = q.rstrip(",")
            q += " WHERE id=?"
            values.append(attrs["id"])

        return q, values


    def delete(self, obj):
        """
        Delete the specified object.

        :param obj: the object to delete
        :type obj: :class:`ObjectRow` or (object_type, object_id)
        """
        # TODO: support recursive delete (delete all decendents)
        object_type, object_id = self._to_obj_tuple(obj)
        return self._delete_multiple_objects({object_type: (object_id,)})


    def reparent(self, obj, parent):
        """
        Change the parent of an object.

        :param obj: the object to reparent
        :type obj: :class:`ObjectRow`, or (type, id)
        :param parent: the new parent of the object
        :type parent: :class:`ObjectRow`, or (type, id)

        This is a convenience method to improve code readability, and is
        equivalent to::

            database.update(obj, parent=parent)
        """
        return self.update(obj, parent=parent)


    def retype(self, obj, new_type):
        """
        Convert the object to a new type.

        :param obj: the object to be converted to the new type
        :type obj: :class:`ObjectRow`, or (type, id)
        :param new_type: the type to convert the object to
        :type newtype: str
        :returns: an :class:`ObjectRow`, converted to the new type with the new id

        Any attribute that has not also been registered with ``new_type``
        (and with the same name) will be removed.  Because the object is
        effectively changing ids, all of its existing children will be
        reparented to the new id.
        """

        if new_type not in self._object_types:
            raise ValueError('Parent type %s not registered in database' % new_type)

        # Reload and force pickled attributes into the dict.
        try:
            attrs = dict(self.get(obj))
        except TypeError:
            raise ValueError('Object (%s, %s) is not found in database' % (obj['type'], obj['id']))

        parent = attrs.get('parent')
        # Remove all attributes that aren't also in the destination type.  Also
        # remove type, id, and parent attrs, which get regenerated when we add().
        for attr_name in attrs.keys():
            # TODO: check src and dst attr types and try to coerce, and if
            # not possible, raise an exception.
            if attr_name not in self._object_types[new_type][1] or attr_name in ('type', 'id', 'parent'):
                del attrs[attr_name]

        new_obj = self.add(new_type, parent, **attrs)
        # Reparent all current children to the new id.
        for child in self.query(parent=obj):
            # TODO: if this raises, delete new_obj (to rollback) and reraise.
            self.reparent(child, new_obj)

        self.delete(obj)
        return new_obj


    def delete_by_query(self, **attrs):
        """
        Delete all objects returned by the given query.

        :param attrs: see :meth:`~kaa.db.Database.query` for details.
        :returns: the number of objects deleted
        :rtype: int
        """
        attrs["attrs"] = ["id"]
        results = self.query(**attrs)
        if len(results) == 0:
            return 0

        results_by_type = {}
        for o in results:
            if o["type"] not in results_by_type:
                results_by_type[o["type"]] = []
            results_by_type[o["type"]].append(o["id"])

        return self._delete_multiple_objects(results_by_type)


    def _delete_multiple_objects(self, objects):
        if self._readonly:
            raise DatabaseReadOnlyError('upgrade_to_py3() must be called before database can be modified')

        child_objects = {}
        count = 0
        for object_type, object_ids in objects.items():
            ivtidxes = self._get_type_inverted_indexes(object_type)
            self._delete_multiple_objects_inverted_index_terms({object_type: (ivtidxes, object_ids)})
            object_type_id = self._get_type_id(object_type)
            if len(object_ids) == 0:
                continue

            object_ids_str = _list_to_printable(object_ids)
            self._db_query("DELETE FROM objects_%s WHERE id IN %s" % \
                           (object_type, object_ids_str))
            count += self._cursor.rowcount

            # Record all children of this object so we can later delete them.
            for tp_name, (tp_id, tp_attrs, tp_idx) in self._object_types.items():
                children_ids = self._db_query("SELECT id FROM objects_%s WHERE parent_id IN %s AND parent_type=?" % \
                                              (tp_name, object_ids_str), (object_type_id,))
                if len(children_ids):
                    child_objects[tp_name] = [x[0] for x in children_ids]

        if len(child_objects):
            # If there are any child objects of the objects we just deleted,
            # delete those now.
            count += self._delete_multiple_objects(child_objects)

        if count:
            self._set_dirty()
        return count


    def add(self, object_type, parent=None, **attrs):
        """
        Add an object to the database.

        :param object_type: the name of the object type previously created by
                            :meth:`~kaa.db.Database.register_object_type_attrs`.
        :type object_type: str
        :param parent: specifies the parent of this object, if any; does not have
                       to be an object of the same type.
        :type parent: :class:`ObjectRow` or 2-tuple (object_type, object_id)
        :param attrs: keyword arguments specifying the attribute (which must
                      have been registered) values.  Registered attributes that
                      are not explicitly specified here will default to None.
        :returns: :class:`ObjectRow` representing the added object

        For example::

            import os
            from kaa.db import *
            db = Database('test.db')
            db.register_object_type_attrs('directory',
                name = (str, ATTR_SEARCHABLE),
                mtime = (float, ATTR_SIMPLE)
            )
            root = db.add('directory', name='/', mtime=os.stat('/').st_mtime)
            db.add('directory', parent=root, name='etc', mtime=os.stat('/etc').st_mtime)
            db.add('directory', parent=root, name='var', mtime=os.stat('/var').st_mtime)
        """
        if self._readonly:
            raise DatabaseReadOnlyError('upgrade_to_py3() must be called before database can be modified')

        type_attrs = self._get_type_attrs(object_type)
        if parent:
            attrs['parent_type'], attrs['parent_id'] = self._to_obj_tuple(parent, numeric=True)

        # Increment objectcount for the applicable inverted indexes.
        inverted_indexes = self._get_type_inverted_indexes(object_type)
        if inverted_indexes:
            self._db_query("UPDATE inverted_indexes SET value=value+1 WHERE attr='objectcount' AND name IN %s" % \
                           _list_to_printable(inverted_indexes))


        # Process inverted index maps for this row
        ivtidx_terms = []
        for ivtidx in inverted_indexes:
            # Sync cached objectcount with the DB (that we just updated above)
            self._inverted_indexes[ivtidx]['objectcount'] += 1
            terms_list = []
            split = self._inverted_indexes[ivtidx]['split']
            for name, (attr_type, flags, attr_ivtidx, attr_split) in type_attrs.items():
                if attr_ivtidx == ivtidx and name in attrs:
                    terms_list.append((attrs[name], 1.0, attr_split or split, ivtidx))

            if ivtidx in attrs and ivtidx not in type_attrs:
                # Attribute named after an inverted index is given in kwagrs,
                # but that ivtidx is not a registered attribute (which would be
                # handled in the for loop just above).
                terms_list.append((attrs[ivtidx], 1.0, split, ivtidx))

            terms = self._score_terms(terms_list)
            if terms:
                ivtidx_terms.append((ivtidx, terms))
                # If there are no terms for this ivtidx, we don't bother storing
                # an empty list in the pickle.
                if ivtidx in type_attrs:
                    # Registered attribute named after ivtidx; store ivtidx
                    # terms in object.
                    attrs[ivtidx] = terms.keys()

        query, values = self._make_query_from_attrs("add", attrs, object_type)
        self._db_query(query, values)

        # Add id given by db, as well as object type.
        attrs['id'] = self._cursor.lastrowid
        attrs['type'] = unicode(object_type)
        attrs['parent'] = self._to_obj_tuple(parent) if parent else (None, None)

        for ivtidx, terms in ivtidx_terms:
            self._add_object_inverted_index_terms((object_type, attrs['id']), ivtidx, terms)

        # Populate dictionary with keys for this object type not specified in kwargs.
        attrs.update(dict.fromkeys([k for k in type_attrs if k not in attrs.keys() + ['pickle']]))

        self._set_dirty()
        return ObjectRow(None, None, attrs)


    def get(self, obj):
        """
        Fetch the given object from the database.

        :param obj: a 2-tuple (type, id) representing the object.
        :returns: :class:`ObjectRow`

        obj may also be an :class:`ObjectRow`, however that usage is less
        likely to be useful, because an :class:`ObjectRow` already contains all
        information about the object.  One common use-case is to reload a
        possibly changed object from disk.

        This method is essentially shorthand for::

           database.query(object=(object_type, object_id))[0]
        """
        obj = self._to_obj_tuple(obj)
        rows = self.query(object=obj)
        if rows:
            return rows[0]


    def update(self, obj, parent=None, **attrs):
        """
        Update attributes for an existing object in the database.

        :param obj: the object whose attributes are being modified
        :type obj: :class:`ObjectRow` or 2-tuple (object_type, object_id)
        :param parent: if specified, the object is reparented to the given
                       parent object, otherwise the parent remains the
                       same as when the object was added with
                       :meth:`~kaa.db.Database.add`.
        :type parent: :class:`ObjectRow` or 2-tuple (object_type, object_id)
        :param attrs: keyword arguments specifying the attribute (which must
                      have been registered) values.  Registered attributes that
                      are not explicitly specified here will preserve their
                      original values (except for special attributes named
                      after inverted index; see warning below).

        Continuing from the example in :meth:`~kaa.db.Database.add`, consider::

            >>> d = db.add('directory', parent=root, name='foo')
            >>> db.update(d, name='bar')
            >>> d = db.get(d)   # Reload changes
            >>> d['name']
            'bar'


        .. warning::
           When updating an attribute associated with an inverted index, all
           terms for that inverted index in the object need to be rescored.
           For special attributes with the same name as inverted indexes, it's
           the caller's responsibility to ensure terms are passed back during
           update.

           In the email example from :meth:`register_object_type_attrs`, if the
           subject attribute is updated by itself, any previously indexed terms
           passed to the keywords attribute (the message body) would be
           discarded after the update.  If updating the subject, the caller
           would be required to pass the message body in the keywords attribute
           again, in order to preserve those terms.

           If none of the attributes being updated are associated with an
           inverted index that also has a same-named special attribute then
           this warning doesn't apply as the inverted index does not need
           to be updated.
        """
        if self._readonly:
            raise DatabaseReadOnlyError('upgrade_to_py3() must be called before database can be modified')
        object_type, object_id = self._to_obj_tuple(obj)

        type_attrs = self._get_type_attrs(object_type)
        get_pickle = False

        # Determine which inverted indexes need to be regenerated for this
        # object.  Builds a dictionary of ivtidxes with a dirty flag and
        # a list of sql columns needed for reindexing.
        ivtidx_columns = {}
        for name, (attr_type, flags, attr_ivtidx, attr_split) in type_attrs.items():
            if flags & ATTR_INVERTED_INDEX:
                if attr_ivtidx not in ivtidx_columns:
                    ivtidx_columns[attr_ivtidx] = [ False, [] ]
                if flags & ATTR_SEARCHABLE:
                    ivtidx_columns[attr_ivtidx][1].append(name)
                if flags & (ATTR_SIMPLE | ATTR_IGNORE_CASE):
                    get_pickle = True
                if name in attrs:
                    ivtidx_columns[attr_ivtidx][0] = True

            # If the updated attribute is stored in the pickle (either a simple attr
            # or an case-insensitive indexed attr in which __foo is in the pickle)
            # then we must first retrieve the pickle for this object from the db.
            if (flags & ATTR_SIMPLE or flags & ATTR_INDEXED_IGNORE_CASE == ATTR_INDEXED_IGNORE_CASE) and \
               name in attrs:
                get_pickle = True

        # TODO: if ObjectRow is supplied, don't need to fetch columns
        # that are available in the ObjectRow.  (Of course this assumes
        # the object wasn't changed via elsewhere during the life of the
        # ObjectRow object, so maybe we don't want to do that.)
        reqd_columns = ['pickle'] if get_pickle else []
        for dirty, searchable_attrs in ivtidx_columns.values():
            if dirty:
                reqd_columns.extend(searchable_attrs)

        if reqd_columns:
            q = 'SELECT %s FROM objects_%s WHERE id=?' % (','.join(reqd_columns), object_type)
            row = self._db_query_row(q, (object_id,))
            if not row:
                raise ValueError, "Can't update unknown object (%s, %d)" % (object_type, object_id)
            if reqd_columns[0] == 'pickle' and row[0]:
                # One of the attrs we're updating is in the pickle, so we
                # have fetched it; now convert it to a dict.
                row_attrs = dbunpickle(row[0])
                for key, value in row_attrs.items():
                    # Rename all __foo to foo for ATTR_IGNORE_CASE columns
                    if key.startswith('__') and type_attrs[key[2:]][1] & ATTR_IGNORE_CASE:
                        row_attrs[key[2:]] = value
                        del row_attrs[key]
                # Update stored pickle data with new ATTR_SIMPLE attribute values
                row_attrs.update(attrs)
                attrs = row_attrs


        if parent:
            attrs['parent_type'], attrs['parent_id'] = self._to_obj_tuple(parent, numeric=True)
        attrs['id'] = object_id
        # Make copy of attrs for later query, since we're now about to mess with it.
        orig_attrs = attrs.copy()

        # Merge the ivtidx columns we grabbed above into attrs dict.
        for n, name in enumerate(reqd_columns):
            if name not in attrs and name != 'pickle':
                attrs[name] = row[n]

        for ivtidx, (dirty, searchable_attrs) in ivtidx_columns.items():
            if not dirty:
                # No attribute for this ivtidx changed.
                continue
            split = self._inverted_indexes[ivtidx]['split']
            # Remove existing indexed words for this object.
            self._delete_object_inverted_index_terms((object_type, object_id), ivtidx)

            # TODO: code duplication from add()
            # Need to reindex all columns in this object using this ivtidx.
            terms_list = []
            for name, (attr_type, flags, attr_ivtidx, attr_split) in type_attrs.items():
                if attr_ivtidx == ivtidx and name in attrs:
                    if attr_type == BYTES_TYPE and isinstance(attrs[name], RAW_TYPE):
                        # We store string objects in the db as buffers, in
                        # order to prevent any unicode issues.  So we need
                        # to convert the buffer we got from the db back to
                        # a string before parsing the attribute into terms.
                        attrs[name] = BYTES_TYPE(attrs[name])
                    terms_list.append((attrs[name], 1.0, attr_split or split, ivtidx))

            if ivtidx in attrs and ivtidx not in type_attrs:
                # Attribute named after an inverted index is given, but
                # that ivtidx is not a named attribute (which would be handled
                # in the for loop just above).
                terms_list.append((attrs[ivtidx], 1.0, split, ivtidx))

            terms = self._score_terms(terms_list)
            self._add_object_inverted_index_terms((object_type, object_id), ivtidx, terms)
            if ivtidx in type_attrs:
                # Registered attribute named after ivtidx; store ivtidx
                # terms in object.
                if not terms and ivtidx in orig_attrs:
                    # Update removed all terms for this ivtidx, remove from pickle.
                    orig_attrs[ivtidx] = None
                elif terms:
                    # There are terms for this ivtidx, store in pickle.
                    orig_attrs[ivtidx] = terms.keys()

        query, values = self._make_query_from_attrs("update", orig_attrs, object_type)
        self._db_query(query, values)
        self._set_dirty()
        # TODO: if an objectrow was given, return an updated objectrow


    def commit(self):
        """
        Explicitly commit any changes made to the database.

        .. note:: Any uncommitted changes will automatically be committed at
                  program exit.
        """
        main.signals['exit'].disconnect(self.commit)
        self._dirty = False
        with self._lock:
            self._db.commit()


    def query(self, **attrs):
        """
        Query the database for objects matching all of the given keyword
        attributes.

        Keyword arguments can be any previously registered :attr:`~kaa.db.ATTR_SEARCHABLE`
        object attribute for any object type, or the name of a registered
        inverted index.  There are some special keyword arguments:

        :param parent: require all matched objects to have the given object
                       (or objects) as their immediate parent ancestor.  If
                       parent is a list or tuple, then they specify a list
                       of possible parents, any of which would do.
        :type parent: :class:`ObjectRow`, 2-tuple (object_type, object_id), 2-tuple
                      (object_type, :class:`~kaa.db.QExpr`), or a list of those
        :param object: match only a specific object. Not usually very useful,
                       but could be used to test if the given object matches
                       terms from an inverted index.
        :type object: :class:`ObjectRow` or 2-tuple (object_type, object_id)
        :param type: only search items of this object type; if None (or not
                     specified) then all types are searched
        :type type: str
        :param limit: return only this number of results; if None (or not
                      specified), all matches are returned.
        :type limit: int
        :param attrs: a list of attribute names to be returned; if not specified,
                      all attributes registered with the object type are available
                      in the result.  Only specifying the attributes required
                      can help performance moderately, but generally it isn't
                      required except wit *distinct* below.
        :type attrs: list of str
        :param distinct: if True, ensures that each object in the result set is
                         unique with respect to the attributes specified in the
                         *attrs* parameter.  When distinct is True, *attrs* is
                         required and none of the attributes specified may be
                         simple.
        :param orattrs: attribute names that will be ORed in the query; by default,
                        all attributes are ANDed.
        :type orattrs: list
        :raises: ValueError if the query is invalid (e.g. attempting to query
                 on a simple attribute)
        :returns: a list of :class:`ObjectRow` objects

        When any of the attributes are inverted indexes, the result list is
        sorted according to a score.  The score is based upon the frequency
        of the matched terms relative to the entire database.

        .. note:: If you know which type of object you're interested in, you
           should specify the *type* as it will help improve performance by
           reducing the scope of the search, especially for inverted indexes.

           Another significant factor in performance is whether or not a *limit*
           is specified.  Query time generally scales linearly with respect to
           the number of rows found, but in the case of searches on inverted
           indexes, specifying a limit can drastically reduce search time, but
           does not affect scoring.

        Values supplied to attributes (other than inverted indexes) require
        exact matches.  To search based on an expression, such as inequality,
        ranges, substrings, set inclusion, etc. require the use of a
        :class:`~kaa.db.QExpr` object.

        Expanding on the example provided in
        :meth:`~kaa.db.Database.register_object_type_attrs`::

            >>> db.add('msg', sender=u'Stewie Griffin', subject=u'Blast!',
                       keywords='When the world is mine, your death shall be quick and painless.')
            >>> # Exact match based on sender
            >>> db.query(sender=u'Stewie Griffin')
            [<kaa.db.ObjectRow object at 0x7f652b251030>]
            >>> # Keyword search requires all keywords
            >>> db.query(keywords=['death', 'blast'])
            [<kaa.db.ObjectRow object at 0x7f652c3d1f90>]
            >>> # This doesn't work, since it does an exact match ...
            >>> db.query(sender=u'Stewie')
            []
            >>> # ... but we can use QExpr to do a substring/pattern match.
            >>> db.query(sender=QExpr('like', u'Stewie%'))
            [<kaa.db.ObjectRow object at 0x7f652c3d1f90>]
            >>> # How about a regexp search.
            >>> db.query(sender=QExpr('regexp', ur'.*\\bGriffin'))
            [<kaa.db.ObjectRow object at 0x7f652b255030>]

        """
        query_info = {}
        parents = []
        query_type = "ALL"
        results = []
        query_info["columns"] = {}
        query_info["attrs"] = {}

        if "object" in attrs:
            attrs['type'], attrs['id'] = self._to_obj_tuple(attrs['object'])
            del attrs['object']

        ivtidx_results = ivtidx_results_by_type = None
        for ivtidx in self._inverted_indexes:
            # TODO: Possible optimization: do ivtidx search after the query
            # below only on types that have results iff all queried columns are
            # indexed.
            # TODO: could be smarter about the order in which we do ivtidx
            # searches (do least populated first)
            if ivtidx in attrs:
                # If search criteria other than this inverted index are specified,
                # we can't enforce a limit on the search, otherwise we
                # might miss intersections.
                if len(set(attrs).difference(('type', 'limit', ivtidx))) > 0:
                    limit = None
                else:
                    limit = attrs.get('limit')

                r = self._query_inverted_index(ivtidx, attrs[ivtidx], limit, attrs.get('type'))
                if ivtidx_results is None:
                    ivtidx_results = r
                else:
                    for o in ivtidx_results.keys():
                        if o not in r:
                            del ivtidx_results[o]
                        else:
                            ivtidx_results[o] *= r[o]

                if not ivtidx_results:
                    # No matches, so we're done.
                    return []

                del attrs[ivtidx]

        if ivtidx_results:
            ivtidx_results_by_type = {}
            for tp, id in ivtidx_results.keys():
                if tp not in ivtidx_results_by_type:
                    ivtidx_results_by_type[tp] = []
                ivtidx_results_by_type[tp].append(id)

        if attrs.get('type') is not None:
            if attrs["type"] not in self._object_types:
                raise ValueError, "Unknown object type '%s'" % attrs["type"]
            type_list = [(attrs["type"], self._object_types[attrs["type"]])]
        else:
            type_list = self._object_types.items()

        if attrs.get('parent') is not None:
            # ("type", id_or_QExpr) or (("type1", id_or_QExpr), ("type2", id_or_QExpr), ...)
            if isinstance(attrs['parent'], ObjectRow) or \
               (isinstance(attrs['parent'], (list, tuple)) and \
                not isinstance(attrs['parent'][0], (list, tuple))):
                # (type, parent) -> ((type, parent),)
                attrs['parent'] = (attrs['parent'],)

            for parent_obj in attrs['parent']:
                parent_type_id, parent_id = self._to_obj_tuple(parent_obj, numeric=True)
                if not isinstance(parent_id, QExpr):
                    parent_id = QExpr("=", parent_id)
                parents.append((parent_type_id, parent_id))

        if attrs.get('limit') is not None:
            result_limit = attrs["limit"]
        else:
            result_limit = None

        if attrs.get('attrs') is not None:
            requested_columns = attrs["attrs"]
        else:
            requested_columns = None

        if attrs.get('distinct') is not None:
            if attrs["distinct"]:
                if not requested_columns:
                    raise ValueError, "Distinct query specified, but no attrs kwarg given."
                query_type = "DISTINCT"

        if attrs.get('orattrs') is not None:
            orattrs = set(attrs['orattrs'])
        else:
            orattrs = ()

        # Remove all special keywords
        for attr in ('parent', 'object', 'type', 'limit', 'attrs', 'distinct', 'orattrs'):
            attrs.pop(attr, None)

        for type_name, (type_id, type_attrs, type_idx) in type_list:
            if ivtidx_results and type_id not in ivtidx_results_by_type:
                # If we've done a ivtidx search, don't bother querying
                # object types for which there were no hits.
                continue

            # Select only sql columns (i.e. attrs that aren't ATTR_SIMPLE).
            all_columns = [ x for x in type_attrs if type_attrs[x][1] & ATTR_SEARCHABLE ]
            if requested_columns:
                columns = requested_columns[:]
                # Ensure that all the requested columns exist for this type
                missing = tuple(set(columns).difference(type_attrs.keys()))
                if missing:
                    raise ValueError, "One or more requested attributes %s are not available for type '%s'" % \
                                      (str(missing), type_name)
                # If any of the requested attributes are ATTR_SIMPLE or
                # ATTR_INDEXED_IGNORE_CASE then we need the pickle.
                pickled = [ x for x in columns if type_attrs[x][1] & (ATTR_SIMPLE | ATTR_INDEXED_IGNORE_CASE) in
                                                                     (ATTR_SIMPLE, ATTR_INDEXED_IGNORE_CASE)]
                if pickled:
                    # One or more attributes from pickle are requested in attrs list,
                    # so we need to grab the pickle column.
                    if 'pickle' not in columns:
                        columns.append('pickle')
                    # Remove the list of pickled attributes so we don't
                    # request them as sql columns.
                    columns = list(set(columns).difference(pickled))
            else:
                columns = all_columns

            # Now construct a query based on the supplied attributes for this
            # object type.

            # If any of the attribute names aren't valid for this type, then we
            # don't bother matching, since this an AND query and there won't be
            # any matches.
            missing = set(attrs).difference(all_columns)
            if missing:
                # Raise exception if user attempts to search on a simple attr.
                simple = [ x for x in missing if x in type_attrs and type_attrs[x][1] & ATTR_SIMPLE ]
                if simple:
                    raise ValueError, "Querying on non-searchable attribute '%s'" % simple[0]
                continue

            q, qor = [], []
            query_values, qor_values = [], []
            q.append("SELECT %s '%s',%d,id,%s FROM objects_%s" % \
                (query_type, type_name, type_id, ",".join(columns), type_name))

            if ivtidx_results != None:
                q.append("WHERE")
                q.append("id IN %s" % _list_to_printable(ivtidx_results_by_type[type_id]))

            if len(parents):
                q.append(("WHERE", "AND")["WHERE" in q])
                expr = []
                for parent_type, parent_id in parents:
                    sql, values = parent_id.as_sql("parent_id")
                    expr.append("(parent_type=? AND %s)" % sql)
                    query_values += (parent_type,) + values
                q.append("(%s)" % " OR ".join(expr))

            for attr, value in attrs.items():
                is_or_attr = attr in orattrs
                attr_type, attr_flags = type_attrs[attr][:2]
                if not isinstance(value, QExpr):
                    value = QExpr("=", value)

                # Coerce between numeric types; also coerce a string of digits into a numeric
                # type.
                if attr_type in (int, long, float) and (isinstance(value._operand, (int, long, float)) or \
                    isinstance(value._operand, basestring) and value._operand.isdigit()):
                    value._operand = attr_type(value._operand)

                # Verify expression operand type is correct for this attribute.
                if value._operator not in ("range", "in", "not in") and \
                   not isinstance(value._operand, attr_type):
                    raise TypeError, "Type mismatch in query: '%s' (%s) is not a %s" % \
                                          (str(value._operand), str(type(value._operand)), str(attr_type))

                # Queries on ATTR_IGNORE_CASE string columns are case-insensitive.
                if isinstance(value._operand, basestring) and type_attrs[attr][1] & ATTR_IGNORE_CASE:
                    value._operand = value._operand.lower()
                    if not (type_attrs[attr][1] & ATTR_INDEXED):
                        # If this column is ATTR_INDEXED then we already ensure
                        # the values are stored in lowercase in the db, so we
                        # don't want to get sql to lower() the column because
                        # it's needless, and more importantly, we won't be able
                        # to use any indices on the column.
                        attr = 'lower(%s)' % attr

                if isinstance(value._operand, BYTES_TYPE):
                    # For Python 2, convert non-unicode strings to buffers.  (For Python 3,
                    # BYTES_TYPE == RAW_TYPE so this is a no-op.)
                    value._operand = RAW_TYPE(value._operand)

                sql, values = value.as_sql(attr)
                if is_or_attr:
                    qor.append(sql)
                    qor_values.extend(values)
                else:
                    q.append('AND' if 'WHERE' in q else 'WHERE')
                    q.append(sql)
                    query_values.extend(values)

            if qor:
                q.append('AND' if 'WHERE' in q else 'WHERE')
                q.append('(%s)' % ' OR '.join(qor))

            if query_type == 'DISTINCT':
                q.append(' GROUP BY %s' % ','.join(requested_columns))

            if result_limit != None:
                q.append(" LIMIT %d" % result_limit)

            q = " ".join(q)
            rows = self._db_query(q, query_values + qor_values, cursor=self._qcursor)

            if result_limit != None:
                results.extend(rows[:result_limit - len(results) + 1])
            else:
                results.extend(rows)

            query_info["columns"][type_name] = ["type"] + columns
            query_info["attrs"][type_name] = type_attrs

            if result_limit != None and len(rows) == result_limit:
                # No need to try the other types, we're done.
                break

        # If ivtidx search was done, sort results based on score (highest
        # score first).
        if ivtidx_results:
            results.sort(key=lambda r: ivtidx_results[(r[1], r[2])])

        return results


    def query_one(self, **attrs):
        """
        Like :meth:`~kaa.db.Database.query` but returns a single object only.

        This is a convenience method, and query_one(...) is equivalent to::

            results = db.query(...)
            if results:
                obj = results[0]
            else:
                obj = None

        limit=1 is implied by this query.
        """
        results = self.query(limit=1, **attrs)
        return results[0] if results else None


    def _score_terms(self, terms_list):
        """
        Scores the terms given in terms_list, which is a list of tuples (terms,
        coeff, split, ivtidx), where terms is the string or sequence of
        terms to be scored, coeff is the weight to give each term in this part
        (1.0 is normal), split is the function or regular expression used to
        split terms (only used if a string is given for terms), and ivtidx is
        the name of inverted index we're scoring for.

        Terms are either unicode objects or strings, or sequences of unicode or
        string objects.  In the case of strings, they are passed through
        py3_str() to try to decode them intelligently.

        Each term T is given the score:
             sqrt( (T coeff * T count) / total term count )

        Counts are relative to the given object, not all objects in the
        database.

        Returns a dict of term->score.  Terms (the keys) are converted to
        unicode objects, but their case is preserved as given (if term is
        given more than once, the case of the first occurence is used), and
        score (the values) are calculated as described above.
        """
        terms_scores = {}
        total_terms = 0

        for terms, coeff, split, ivtidx in terms_list:
            if not terms:
                continue
            # Swap ivtidx name for inverted index definition dict
            ivtidx = self._inverted_indexes[ivtidx]
            if not isinstance(terms, (basestring, list, tuple)):
                raise ValueError, "Invalid type (%s) for ATTR_INVERTED_INDEX attribute. " \
                                  "Only sequence, unicode or str allowed." % str(type(terms))

            if isinstance(terms, (list, tuple)):
                terms = [py3_str(term) for term in terms]
                parsed = terms
            else:
                terms = py3_str(terms)
                if callable(split):
                    parsed = list(split(terms))
                else:
                    parsed = split.split(terms)

            for term in parsed:
                if not term or (ivtidx['max'] and len(term) > ivtidx['max']) or \
                   (ivtidx['min'] and len(term) < ivtidx['min']):
                    continue

                lower_term = term.lower()

                if ivtidx['ignore'] and lower_term in ivtidx['ignore']:
                    continue
                if lower_term not in terms_scores:
                    terms_scores[lower_term] = [term, coeff]
                else:
                    terms_scores[lower_term][1] += coeff
                total_terms += 1

        # Score based on term frequency in document.  (Add weight for
        # non-dictionary terms?  Or longer terms?)
        for lower_term, score in terms_scores.items():
            terms_scores[lower_term][1] = math.sqrt(terms_scores[lower_term][1] / total_terms)
        return dict(terms_scores.values())


    def _delete_object_inverted_index_terms(self, (object_type, object_id), ivtidx):
        """
        Removes all indexed terms under the specified inverted index for the
        given object.  This function must be called when an object is removed
        from the database, or when an ATTR_INVERTED_INDEX attribute of an
        object is being updated (and therefore that inverted index must be
        re-indexed).
        """
        self._delete_multiple_objects_inverted_index_terms({object_type: ((ivtidx,), (object_id,))})


    def _delete_multiple_objects_inverted_index_terms(self, objects):
        """
        objects = dict type_name -> (ivtidx tuple, ids tuple)
        """
        for type_name, (ivtidxes, object_ids) in objects.items():
            # Resolve object type name to id
            type_id = self._get_type_id(type_name)

            for ivtidx in ivtidxes:
                # Remove all terms for the inverted index associated with this
                # object.  A trigger will decrement the count column in the
                # terms table for all term_id that get affected.
                self._db_query("DELETE FROM ivtidx_%s_terms_map WHERE object_type=? AND object_id IN %s" % \
                               (ivtidx, _list_to_printable(object_ids)), (type_id,))
                self._inverted_indexes[ivtidx]['objectcount'] -= len(object_ids)


    def _add_object_inverted_index_terms(self, (object_type, object_id), ivtidx, terms):
        """
        Adds the dictionary of terms (as computed by _score_terms()) to the
        specified inverted index database for the given object.
        """
        if not terms:
            return

        # Resolve object type name to id
        object_type = self._get_type_id(object_type)

        # Holds any of the given terms that already exist in the database
        # with their id and count.
        db_terms_count = {}

        terms_list = _list_to_printable([ t.lower() for t in terms.keys() ])
        q = "SELECT id,term,count FROM ivtidx_%s_terms WHERE term IN %s" % (ivtidx, terms_list)
        rows = self._db_query(q)
        for row in rows:
            db_terms_count[row[1]] = row[0], row[2]

        # For executemany queries later.
        update_list, map_list = [], []

        for term, score in terms.items():
            term = term.lower()
            if term not in db_terms_count:
                # New term, so insert it now.
                self._db_query('INSERT OR REPLACE INTO ivtidx_%s_terms VALUES(NULL, ?, 1)' % ivtidx, (term,))
                db_id, db_count = self._cursor.lastrowid, 1
                db_terms_count[term] = db_id, db_count
            else:
                db_id, db_count = db_terms_count[term]
                update_list.append((db_count + 1, db_id))

            map_list.append((int(score*10), db_id, object_type, object_id, score))

        self._db_query('UPDATE ivtidx_%s_terms SET count=? WHERE id=?' % ivtidx, update_list, many = True)
        self._db_query('INSERT INTO ivtidx_%s_terms_map VALUES(?, ?, ?, ?, ?)' % ivtidx, map_list, many = True)


    def _query_inverted_index(self, ivtidx, terms, limit = 100, object_type = None):
        """
        Queries the inverted index ivtidx for the terms supplied in the terms
        argument.  If terms is a string, it is parsed into individual terms
        based on the split for the given ivtidx.  The terms argument may
        also be a list or tuple, in which case no parsing is done.

        The search algorithm tries to optimize for the common case.  When
        terms are scored (_score_terms()), each term is assigned a score that
        is stored in the database (as a float) and also as an integer in the
        range 0-10, called rank.  (So a term with score 0.35 has a rank 3.)

        Multiple passes are made over the terms map table for the given ivtidx,
        first starting at the highest rank fetching a certain number of rows,
        and progressively drilling down to lower ranks, trying to find enough
        results to fill our limit that intersects on all supplied terms.  If
        our limit isn't met and all ranks have been searched but there are
        still more possible matches (because we use LIMIT on the SQL
        statement), we expand the LIMIT (currently by an order of 10) and try
        again, specifying an OFFSET in the query.

        The worst case scenario is given two search terms, each term matches
        50% of all rows but there is only one intersecting row.  (Or, more
        generally, given N terms, each term matches (1/N)*100 percent rows with
        only 1 row intersection between all N terms.)   This could be improved
        by avoiding the OFFSET/LIMIT technique as described above, but that
        approach provides a big performance win in more common cases.  This
        case can be mitigated by caching common term combinations, but it is
        an extremely difficult problem to solve.

        object_type specifies an type name to search (for example we can
        search type "image" with keywords "2005 vacation"), or if object_type
        is None (default), then all types are searched.

        This function returns a dictionary (object_type, object_id) -> score
        which match the query.
        """
        t0 = time.time()
        # Fetch number of files the inverted index applies to.  (Used in score
        # calculations.)
        objectcount = self._inverted_indexes[ivtidx]['objectcount']

        if not isinstance(terms, (list, tuple)):
            split = self._inverted_indexes[ivtidx]['split']
            if callable(split):
                terms = [term for term in split(py3_str(terms).lower()) if term]
            else:
                terms = [term for term in split.split(py3_str(terms).lower()) if term]
        else:
            terms = [ py3_str(x).lower() for x in terms ]

        # Remove terms that aren't indexed (words less than minimum length
        # or and terms in the ignore list for this ivtidx).
        if self._inverted_indexes[ivtidx]['min']:
            terms = [ x for x in terms if len(x) >= self._inverted_indexes[ivtidx]['min'] ]
        if self._inverted_indexes[ivtidx]['ignore']:
            terms = [ x for x in terms if x not in self._inverted_indexes[ivtidx]['ignore'] ]

        terms_list = _list_to_printable(terms)
        nterms = len(terms)

        if nterms == 0:
            return []

        # Find term ids and order by least popular to most popular.
        rows = self._db_query('SELECT id,term,count FROM ivtidx_%s_terms WHERE ' \
                              'term IN %s ORDER BY count' % (ivtidx, terms_list))
        save = map(lambda x: x.lower(), terms)
        terms = {}
        ids = []
        for row in rows:
            if row[2] == 0:
                return []

            # Give terms weight according to their order
            order_weight = 1 + len(save) - list(save).index(row[1])
            terms[row[0]] = {
                'term': row[1],
                'count': row[2],
                'idf_t': math.log(objectcount / row[2] + 1) + order_weight,
                'ids': {}
            }
            ids.append(row[0])

        # Not all the terms we requested are in the database, so we return
        # 0 results.
        if len(ids) < nterms:
            return []

        if object_type:
            # Resolve object type name to id
            object_type = self._get_type_id(object_type)

        results, state = {}, {}
        for id in ids:
            results[id] = {}
            state[id] = {
                'offset': [0]*11,
                'more': [True]*11,
                'count': 0,
                'done': False
            }

        all_results = {}
        if limit == None:
            limit = objectcount

        if limit <= 0 or objectcount <= 0:
            return {}

        sql_limit = min(limit*3, 200)
        finished = False
        nqueries = 0

        # Keep a dict keyed on object_id that we can use to narrow queries
        # once we have a full list of all objects that match a given term.
        id_constraints = None
        t1 = time.time()
        while not finished:
            for rank in range(10, -1, -1):
                for id in ids:
                    if not state[id]['more'][rank] or state[id]['done']:
                        # If there's no more results at this rank, or we know
                        # we've already seen all the results for this term, we
                        # don't bother with the query.
                        continue

                    q = 'SELECT object_type,object_id,frequency FROM ivtidx_%s_terms_map ' % ivtidx + \
                        'WHERE term_id=? AND rank=? %s %%s LIMIT ? OFFSET ?'

                    if object_type == None:
                        q %= ''
                        v = [id, rank, sql_limit, state[id]["offset"][rank]]
                    else:
                        q %= 'AND object_type=?'
                        v = [id, rank, object_type, sql_limit, state[id]["offset"][rank]]

                    if id_constraints:
                        # We know about all objects that match one or more of the other
                        # search terms, so we add the constraint that all rows for this
                        # term match the others as well.  Effectively we push the logic
                        # to generate the intersection into the db.
                        # XXX: This can't benefit from the index if object_type
                        # is not specified.
                        q %= ' AND object_id IN %s' % _list_to_printable(tuple(id_constraints))
                        # But since we're specifying a list of ids to search for with this
                        # term, we can't use limit/offset, since the constraints might be
                        # different since the last iteration.
                        v[-2:] = [-1, 0]
                    else:
                        q %= ''

                    rows = self._db_query(q, v)
                    nqueries += 1
                    state[id]['more'][rank] = len(rows) == sql_limit
                    state[id]['count'] += len(rows)

                    for row in rows:
                        results[id][row[0], row[1]] = row[2] * terms[id]['idf_t']
                        terms[id]['ids'][row[1]] = 1

                    if state[id]['count'] >= terms[id]['count'] or \
                       (id_constraints and len(rows) == len(id_constraints)):
                        # If we've now retrieved all objects for this term, or if
                        # all the results we just got now intersect with our
                        # constraints set, we're done this term and don't bother
                        # querying it at other ranks.
                        #print "Done term '%s' at rank %d" % (terms[id]['term'], rank)
                        state[id]['done'] = True
                        if id_constraints is not None:
                            id_constraints = id_constraints.intersection(terms[id]['ids'])
                        else:
                            id_constraints = set(terms[id]['ids'])
                #
                # end loop over terms


                for r in reduce(lambda a, b: set(a).intersection(b), results.values()):
                    all_results[r] = 0
                    for id in ids:
                        if r in results[id]:
                            all_results[r] += results[id][r]

                # If we have enough results already, no sense in querying the
                # next rank.
                if limit > 0 and len(all_results) > limit*2:
                    finished = True
                    #print "Breaking at rank:", rank
                    break
            #
            # end loop over ranks

            if finished:
                break

            finished = True
            for index in range(len(ids)):
                id = ids[index]

                if index > 0:
                    last_id = ids[index-1]
                    a = results[last_id]
                    b = results[id]
                    intersect = set(a).intersection(b)

                    if len(intersect) == 0:
                        # Is there any more at any rank?
                        a_more = b_more = False
                        for rank in range(11):
                            a_more = a_more or state[last_id]['more'][rank]
                            b_more = b_more or state[id]['more'][rank]

                        if not a_more and not b_more:
                            # There's no intersection between these two search
                            # terms and neither have more at any rank, so we
                            # can stop the whole query.
                            finished = True
                            break

                # There's still hope of a match.  Go through this term and
                # see if more exists at any rank, increasing offset and
                # unsetting finished flag so we iterate again.
                for rank in range(10, -1, -1):
                    if state[id]['more'][rank] and not state[id]['done']:
                        state[id]['offset'][rank] += sql_limit
                        finished = False

            # If we haven't found enough results after this pass, grow our
            # limit so that we expand our search scope.  (XXX: this value may
            # need empirical tweaking.)
            sql_limit *= 10

        # end loop while not finished
        log.info('%d results, did %d subqueries, %.04f seconds (%.04f overhead)',
                 len(all_results), nqueries, time.time()-t0, t1-t0)
        return all_results


    def get_inverted_index_terms(self, ivtidx, associated = None, prefix = None):
        """
        Obtain terms used by objects for an inverted index.

        :param ivtidx: the name of an inverted index previously registered with
                       :meth:`~kaa.db.Database.register_inverted_index`.
        :type ivtidx: str
        :param associated: specifies a list of terms, and only those terms which are
                           mapped to objects *in addition to* the supplied associated
                           terms will be returned.  If None, all terms for the inverted
                           index are returned.
        :type associated: list of str or unicode
        :param prefix: only terms that begin with the specified prefix are returned.
                       This is useful for auto-completion while a user is typing a
                       query.
        :type prefix: str or unicode
        :returns: a list of 2-tuples, where each tuple is (*term*, *count*).  If
                  *associated* is not given, *count* is the total number of objects
                  that term is mapped to.  Otherwise, *count* reflects the number
                  of objects which have that term plus all the given associated
                  terms.  The list is sorted with the highest counts appearing first.

        For example, given an otherwise empty database, if you have an object
        with terms ['vacation', 'hawaii'] and two other object with terms
        ['vacation', 'spain'] and the associated list passed is ['vacation'],
        the return value will be [('spain', 2), ('hawaii', 1)].
        """
        if ivtidx not in self._inverted_indexes:
            raise ValueError, "'%s' is not a registered inverted index." % ivtidx

        if prefix:
            where_clause = 'WHERE terms.term >= ? AND terms.term <= ?'
            where_values = (prefix, prefix + 'z')
        else:
            where_clause = ''
            where_values = ()

        if not associated:
            return self._db_query('''SELECT term, count
                                      FROM ivtidx_%s_terms AS terms
                                        %s
                                  ORDER BY count DESC''' % (ivtidx, where_clause), where_values)


        rows = self._db_query('SELECT id FROM ivtidx_%s_terms WHERE term IN %s ORDER BY count' % \
                              (ivtidx, _list_to_printable(associated)))
        term_ids = [ x[0] for x in rows ]
        if len(term_ids) < len(associated):
            return []

        query = '''SELECT term, COUNT(*) AS total
                     FROM ivtidx_%s_terms_map AS t0''' % ivtidx
        for n, term_id in enumerate(term_ids):
            query += ''' JOIN ivtidx_%s_terms_map t%d
                           ON t%d.object_type = t%d.object_type AND
                              t%d.object_id = t%d.object_id AND
                              t%d.term_id = %d''' % \
                     (ivtidx, n + 1, n, n + 1, n, n + 1, n + 1, term_id)
        query += ''' JOIN ivtidx_%s_terms AS terms
                       ON t0.term_id = terms.id AND
                          t0.term_id NOT IN %s
                       %s
                 GROUP BY t0.term_id
                 ORDER BY total DESC ''' % \
                 (ivtidx, _list_to_printable(term_ids), where_clause)
        return self._db_query(query, where_values)


    def get_db_info(self):
        """
        Return information about the database.

        :returns: a dict

        The returned dictionary has the following keys:
            * count: dict of object types holding their counts
            * total: total number of objects in the database
            * types: a dict keyed on object type which contains:
                * attrs: a dictionary of registered attributes for this type
                * idx: a list of composite indexes for this type
            * termcounts: a dict of the number of indexed terms for each
              inverted index
            * file: full path to the database file
        """
        total = 0
        info = {
            'count': {},
            'types': {}
        }
        for name in self._object_types:
            id, attrs, idx = self._object_types[name]
            info['types'][name] = {
                'attrs': attrs,
                'idx': idx
            }
            row = self._db_query_row('SELECT COUNT(*) FROM objects_%s' % name)
            info['count'][name] = row[0]
            total += row[0]

        info['total'] = total

        info['termcounts'] = {}
        for ivtidx in self._inverted_indexes:
            row = self._db_query_row('SELECT COUNT(*) FROM ivtidx_%s_terms' % ivtidx)
            info['termcounts'][ivtidx] = int(row[0])

        info['file'] = self._dbfile
        return info


    def set_metadata(self, key, value):
        """
        Associate simple key/value pairs with the database.

        :param key: the key name for the metadata; it is required that key
                    is prefixed with ``appname::`` in order to avoid namespace
                    collisions.
        :type key: str or unicode
        :param value: the value to associate with the given key
        :type value: str or unicode
        """
        if '::' not in key:
            raise ValueError('Invalid key %s; must be prefixed with "appname::"' % key)

        self._db_query('DELETE FROM meta WHERE attr=?', (key,))
        self._db_query('INSERT INTO meta VALUES (?, ?)', (key, value))
        self._set_dirty()


    def get_metadata(self, key, default=None):
        """
        Fetch metadata previously set by :meth:`~kaa.db.Database.set_metadata`.

        :param key: the key name for the metadata, prefixed with ``appname::``.
        :type key: str
        :param default: value to return if key is not found
        :returns: unicode string containing the value for this key, or
                  the ``default`` parameter if the key was not found.
        """
        row = self._db_query_row('SELECT value FROM meta WHERE attr=?', (key,))
        if row:
            return row[0]
        return default


    def vacuum(self):
        """
        Cleans up the database, removing unused inverted index terms.

        This also calls VACUUM on the underlying sqlite database, which
        rebuilds the database to reclaim unused space and reduces
        fragmentation.

        Applications should call this periodically, however this operation
        can be expensive for large databases so it should be done during
        an extended idle period.
        """
        # We need to do this eventually, but there's no index on count, so
        # this could potentially be slow.  It doesn't hurt to leave rows
        # with count=0, so this could be done intermittently.
        for ivtidx in self._inverted_indexes:
            self._db_query('DELETE FROM ivtidx_%s_terms WHERE count=0' % ivtidx)
        self._db_query("VACUUM")


    @property
    def filename(self):
        """
        Full path to the database file.
        """
        return self._dbfile


    @property
    def lazy_commit(self):
        """
        The interval after which any changes made to the database will be
        automatically committed, or None to require explicit commiting.
        (Default is None.)

        The timer is restarted upon each change to the database, so prolonged
        updates may still benefit from explicit periodic commits.
        """
        return self._lazy_commit_interval

    @lazy_commit.setter
    def lazy_commit(self, value):
        self._lazy_commit_interval = float(value)
        if value is None:
            self._lazy_commit_timer.stop()
        elif self._dirty:
            self._lazy_commit_timer.start(self._lazy_commit_interval)

    @property
    def readonly(self):
        return self._readonly


    def upgrade_to_py3(self):
        raise NotImplementedError