query.py | searchcode

/query.py

Large files files are truncated, but you can click here to view the full file

""" Query for cwm architecture

2003-09-07 split off from llyn.py
"""

QL_NS = "http://www.w3.org/2004/ql#"
from sparql2cwm import SPARQL_NS

from set_importer import Set, ImmutableSet, sorted

from RDFSink import Logic_NS, RDFSink, forSomeSym, forAllSym
from RDFSink import CONTEXT, PRED, SUBJ, OBJ, PARTS, ALL4
from RDFSink import N3_nil, N3_first, N3_rest, OWL_NS, N3_Empty, N3_List, List_NS
from RDFSink import RDF_NS_URI

from OrderedSequence import intersection, minus, indentString

import diag
from diag import chatty_flag, tracking, progress
from term import BuiltIn, LightBuiltIn, RDFBuiltIn, ArgumentNotLiteral, \
    HeavyBuiltIn, Function, ReverseFunction, MultipleFunction, \
    MultipleReverseFunction, UnknownType, Env, unify, \
    Literal, Symbol, Fragment, FragmentNil,  Term, \
    CompoundTerm, List, EmptyList, NonEmptyList, ErrorFlag
from formula import StoredStatement, Formula
from why import Because, BecauseBuiltIn, BecauseOfRule, \
    BecauseOfExperience, becauseSubexpression, Reason, \
    BecauseSupports, BecauseMerge ,report, Premise, newTopLevelFormula, isTopLevel


BuiltinFeedError = (ArgumentNotLiteral, UnknownType)

import types
import sys
import weakref
# from sets import Set  # only in python 2.3 and following
                        # set_importer does a cleaner job

INFINITY = 1000000000           # @@ larger than any number occurences


# State values as follows, high value=try first:
S_UNKNOWN =     99  # State unknown - to be [re]calculated by setup.
S_DONE =        80  # Exhausted all possible ways to saitsfy this. return now.
S_LIGHT_UNS_READY= 70  # Light, not searched yet, but can run
S_LIGHT_GO =    65  # Light, can run  Do this!
S_NOT_LIGHT =   60  # Not a light built-in, haven't searched yet.
S_LIGHT_EARLY=  50  # Light built-in, not ready to calculate, not searched yet.
S_NEED_DEEP=    45  # Can't search because of unbound compound term,
                    #   could do recursive unification
S_HEAVY_READY=  40  # Heavy built-in, search done,
                    #    but formula now has no vars left. Ready to run.
S_LIGHT_WAIT=   30  # Light built-in, not enough constants to calculate, search done.
S_HEAVY_WAIT=   20  # Heavy built-in, too many variables in args to calculate, search done.
S_REMOTE =      10  # Waiting for local query to be resolved as much as possible
#S_SATISFIED =   0  # Item has been staisfied, and is no longer a constraint, continue with others

stateName = { 
    S_UNKNOWN : "????",
    S_DONE :        "DONE",
    S_LIGHT_UNS_READY : "LtUsGo",
    S_LIGHT_GO : "LtGo",
    S_NOT_LIGHT : "NotLt",
    S_LIGHT_EARLY : "LtEarly",
    S_NEED_DEEP :  "Deep",
    S_HEAVY_READY :   "HvGo",
    S_LIGHT_WAIT : "LtWait",
    S_HEAVY_WAIT : "HvWait",
    S_REMOTE :   "Remote"}
#    S_SATISFIED:          "Satis" }





def think(knowledgeBase, ruleFormula=None, mode="", why=None):
    """Forward-chaining inference
    
    In the case in which rules are added back into the
    store. The store is used for read (normally canonical) and write
    (normally open) at the samne time.  It in fact has to be open.
    """
    if ruleFormula == None:
        ruleFormula = knowledgeBase
    assert knowledgeBase.canonical == None , "Must be open to add stuff:"+ `knowledgeBase `

    if diag.chatty_flag > 45: progress("think: rules from %s added to %s" %(
                                        knowledgeBase, ruleFormula))
    return InferenceTask(knowledgeBase, ruleFormula, mode=mode, why=why, repeat=1).run()

def applyRules(
                workingContext,    # Data we assume 
                ruleFormula = None,    # Where to find the rules
                targetContext = None):   # Where to put the conclusions
    """Once"""
    t = InferenceTask(workingContext, ruleFormula, targetContext)
    result = t.run()
    del(t)
    return result

def applyQueries(
                workingContext,    # Data we assume 
                ruleFormula = None,    # Where to find the rules
                targetContext = None):   # Where to put the conclusions
    """Once, nothing recusive, for a N3QL query"""
    t = InferenceTask(workingContext, ruleFormula, targetContext)
    t.gatherQueries(t.ruleFormula)
    result = t.run()
    del(t)
    return result


def applySparqlQueries(
                workingContext,    # Data we assume 
                ruleFormula = None,    # Where to find the rules
                targetContext = None):   # Where to put the conclusions
    """Once, nothing recusive, for a N3QL query"""
    t = InferenceTask(workingContext, ruleFormula, targetContext, mode="q")
    t.gatherSparqlQueries(t.ruleFormula)
    result = t.run()
    del(t)
    return result

class InferenceTask:
    """A task of applying rules or filters to information"""
    def __init__(self,
                workingContext,    # Data we assume 
                ruleFormula = None,    # Where to find the rules
                targetContext = None,   # Where to put the conclusions
                universals = Set(),        # Inherited from higher contexts
                mode="",                # modus operandi
                why=None,                       # Trace reason for all this
                repeat = 0):            # do it until finished
        """ Apply rules in one context to the same or another
    
        A rule here is defined by log:implies, which associates the template
        (aka premise, precondidtion, antecedent, body) to the conclusion
        (aka postcondition, head).
        """
        if diag.chatty_flag >20:
            progress("New Inference task, rules from %s" % ruleFormula)
        if targetContext is None: targetContext = workingContext # return new data to store
        if ruleFormula is None: self.ruleFormula = workingContext # apply own rules
        else: self.ruleFormula = ruleFormula
        self.ruleFor = {}
        self.hasMetaRule = 0
        self.scheduler = Scheduler()

        self.workingContext, self.targetContext, self.mode, self.repeat = \
            workingContext, targetContext, mode, repeat
        self.store = self.workingContext.store

    def scheduleAttachRule(task, statement, formula, variables):
        formula = statement.context()
        subj, pred, obj = statement.spo()
        variables = variables | formula.universals()
        def addRule():
            if not formula.contains(subj=subj, pred=formula.store.implies, obj=obj):
                return 0  # The triple is no longer there
            return Rule(task, subj, obj, statement, variables).once()
        task.schedule(addRule)

    def scheduleAttachQuery(task, subj, obj, statement, variables):
        formula = statement.context()
        variables = variables | formula.universals()
        def addRule():
            if not formula.contains(subj=statement.subject(), pred=statement.predicate(), obj=statement.object()):
                return 0  # The triple is no longer there
            r = Rule(task, subj, obj, statement, variables).once()
            if (diag.chatty_flag >30):
                progress( "Found rule %r for statement %s " % (r, statement))
            return r
        task.schedule(addRule)


    def schedule(self, thunk):
        if self.scheduler is not None:
            self.scheduler.add(thunk)


    def run(self):
        """Perform task.
        Return number of  new facts"""
        return self.runBrilliant()

    def runBrilliant(self):
        """Perform task.
        Return number of  new facts.
        Start again if new rule mayhave been generated.
        This should be much faster than even runSmart,
        despite being much simpler"""
        if self.repeat and self.targetContext is self.workingContext:
            # We can loop
            canLoop = True
        else:
            canLoop = False

        universals = Set() # self.universals???
        
        if "q" not in self.mode:
            self.gatherRules(self.ruleFormula)


        scheduler = self.scheduler
        if not canLoop:
            self.scheduler = None

        total = scheduler.run(int.__add__)

        self.scheduler = scheduler

        return total
        

    def gatherRules(self, ruleFormula):
        universals = Set() # @@ self.universals??
        v2 = universals
        RuleInstaller(self, ruleFormula, v2).think()

        for F in ruleFormula.each(pred=self.store.type, obj=self.store.Truth): #@@ take out when --closure=T ??
            self.gatherRules(F)  #See test/rules13.n3, test/schema-rules.n3 etc

    def gatherQueries(self, ruleFormula):
        "Find a set of rules in N3QL"
        universals = Set() # @@ self.universals??
        ql_select = self.store.newSymbol(QL_NS + "select")
        ql_where = self.store.newSymbol(QL_NS + "where")
        for s in ruleFormula.statementsMatching(pred=ql_select):
            r = self.ruleFor.get(s, None)
            if r != None: continue
            con, pred, query, selectClause  = s.quad
            whereClause= ruleFormula.the(subj=query, pred=ql_where)
            if whereClause == None: continue # ignore (warning?)
            
            if (isinstance(selectClause, Formula)
                and isinstance(whereClause, Formula)):
                v2 = universals | ruleFormula.universals() # Note new variables can be generated
                self.scheduleAttachQuery(whereClause, selectClause, s, v2)

    def gatherSparqlQueries(self, ruleFormula):
        "Find the rules in SPARQL"
        store = self.store
        sparql = store.newSymbol(SPARQL_NS)
        
        for from_statement in ruleFormula.statementsMatching(pred=sparql['data']):
            working_context_stand_in = from_statement.object()
            ruleFormula = ruleFormula.substitution({working_context_stand_in: self.workingContext})
        query_root = ruleFormula.any(pred=store.type, obj=sparql['ConstructQuery'])
        if not query_root:
            # This is wrong
            query_root = ruleFormula.any(pred=store.type, obj=sparql['SelectQuery'])
        if not query_root:
            query_root = ruleFormula.any(pred=store.type, obj=sparql['AskQuery'])
        # query_root is a very boring bNode
        if query_root:
            #construct query
            for where_triple in ruleFormula.statementsMatching(subj=query_root, pred=sparql['where']):
                where_clause = where_triple.object()
                #where_clause is the tail of the rule
                implies_clause = ruleFormula.the(subj=where_clause, pred=store.implies)
                assert implies_clause is not None, ("where=%s, f=%s" % (where_clause.debugString(), ruleFormula.debugString()))
                #implies_clause is the head of the rule
                v2 = ruleFormula.universals().copy()
                self.scheduleAttachQuery(where_clause, implies_clause, where_triple, v2)


#### TOPO
#def partialOrdered(cy1, pool):
#    """Return sequence conforming to the partially order in a set of cyclic subsystems
#    
#    Basially, we find the dependencies of a node and remove them from the pool.
#    Then, any node in the pool can be done earlier, because has no depndency from those done.
#    """
#    seq = []
#    for r1 in cy1:   # @@ did just chose first rule cy[0], but didn't get all
#        for r2 in  r1.affects:
#            if r2 not in cy1:  # An external dependency
#                cy2 = r2.cycle
#                if cy2 in pool:
#                    seq = partialOrdered(cy2, pool) + seq
#    pool.remove(cy1)
#    if diag.chatty_flag > 90: progress("partial topo: %s" % `[cy1] + seq`)
#    return [cy1] + seq
#
#class CyclicSetOfRules:
#    """A set of rules which are connected
#    """
#    def __init__(self, rules):
#        self.rules = rules
#        for r1 in rules:
#            r1.cycle = self
#
#    def __getitem__(self, i):
#        return self.rules[i]
#
#    def __repr__(self):
#        return `self.rules`
#
#    def run(self):
#        "Run a cyclic subset of the rules"
#        if diag.chatty_flag > 20:
#            progress()
#            progress("Running cyclic system %s" % (self))
#        if len(self.rules) == 1:
#            rule = self.rules[0]
#            if not rule.affects.get(rule, 0):
##               rule.already = None # Suppress recording of previous answers
#                # - no, needed to remove dup bnodes as in test/includes/quant-implies.n3 --think
#                # When Rule.once is smarter about not iterating over things not mentioned elsewhere,
#                # can remove this.
#                return rule.once()
#                
#        agenda = self.rules[:]
#        total = 0
#        for r1 in self.rules:
#            af = r1.affects.keys()
#            af.sort()
#            r1.affectsInCyclic = intersection(self.rules, af)
#        while agenda:
#            rule = agenda[0]
#            agenda = agenda[1:]
#            found = rule.once()
#            if diag.chatty_flag > 20: progress("Rule %s gave %i. Affects:%s." %(
#                        rule, found, rule.affectsInCyclic))
#            if found:
#                total = total + found
#                for r2 in rule.affectsInCyclic:
#                    if r2 not in agenda:
#                        if diag.chatty_flag > 30: progress("...rescheduling", r2)
#                        agenda.append(r2)
#        if diag.chatty_flag > 20: progress("Cyclic subsystem exhausted")
#        return total
        

def buildPattern(workingContext, template):
    """Make a list of unmatched statements including special
    builtins to check something is universally quantified"""
    unmatched = template.statements[:]
    for v in template.occurringIn(template.universals()):
        if diag.chatty_flag > 100: progress(
            "Template %s has universalVariableName %s, formula is %s" % (template, v, template.debugString()))
        unmatched.append(StoredStatement((workingContext,
                template.store.universalVariableName,
                workingContext,
                v
#               workingContext.store.newLiteral(v.uriref())
                                          )))
    return unmatched

def buildStrictPattern(workingContext, template):
    unmatched = buildPattern(workingContext, template)
    for v in template.occurringIn(template.existentials()):
        if diag.chatty_flag > 100: progress(
            "Tempate %s has existentialVariableName %s, formula is %s" % (template, v, template.debugString()))
        unmatched.append(StoredStatement((workingContext,
                template.store.existentialVariableName,
                workingContext,
                v
#               workingContext.store.newLiteral(v.uriref())
                                          )))
##    for v in template.variables():
##      if diag.chatty_flag > 100: progress(
##          "Tempate %s has enforceUniqueBinding %s, formula is %s" % (template, v, template.debugString()))
##      unmatched.append(StoredStatement((workingContext,
##              template.store.enforceUniqueBinding,
##                v,
##              workingContext.store.newLiteral(v.uriref())
##                                          )))
    return unmatched
    
nextRule = 0
class Rule:

    def __init__(self, task, antecedent, consequent, statement, variables):
        """Try a rule
        
        Beware lists are corrupted. Already list is updated if present.
        The idea is that, for a rule which may be tried many times, the constant 
        processing is done in this rather than in Query().
        
        The already dictionary is used to track bindings.
        This less useful when not repeating (as in --filter), but as in fact
        there may be several ways in which one cane get the same bindings,
        even without a repeat.
        """
        global nextRule
        self.task = task
        self.template = antecedent
        self.conclusion = consequent
        self.store = self.template.store
        self.statement = statement      #  original statement
        self.number = nextRule = nextRule+1
        self.meta = self.conclusion.contains(pred=self.conclusion.store.implies) #generate rules?
#       if task.repeat: self.already = []    # No neat to track dups if not 
#       else: self.already = None
        self.already = []
        self.affects = {}
        self.indirectlyAffects = []
        self.indirectlyAffectedBy = []
        self.affectsInCyclic = []
        self.cycle = None
        
        # When the template refers to itself, the thing we are
        # are looking for will refer to the context we are searching
        # Similarly, references to the working context have to be moved into the
        # target context when the conclusion is drawn.
    
    
#       if self.template.universals() != Set():
#           raise RuntimeError("""Cannot query for universally quantified things.
#           As of 2003/07/28 forAll x ...x cannot be on left hand side of rule.
#           This/these were: %s\n""" % self.template.universals())
    
        self.unmatched = buildPattern(task.workingContext, self.template)
        self.templateExistentials = self.template.existentials().copy()
        _substitute({self.template: task.workingContext}, self.unmatched)
    
        variablesMentioned = self.template.occurringIn(variables)
        self.variablesUsed = self.conclusion.occurringIn(variablesMentioned)
        for x in variablesMentioned:
            if x not in self.variablesUsed:
                self.templateExistentials.add(x)

        allVariablesMentioned = self.templateExistentials | self.variablesUsed
        self.patternsToUnmatched = {}
        for p in self.unmatched:
            def replaceWithNil(x):
                if isinstance(x, Formula) or \
                   (isinstance(x, List) and hasFormula(x)) or \
                   x.occurringIn(allVariablesMentioned):
                    return None
                return x
            patternTuple = tuple(replaceWithNil(x) for x in (p[1],
                                                             p[2],
                                                             p[3]))
##            print 'patternTuple is %s, p is %s, vars=%s' % (patternTuple, p[1:], variables)
            primaryAlpha = p[0].statementsMatching(*patternTuple)
            primaryAlpha.addConsumer(self)
            self.patternsToUnmatched.setdefault(primaryAlpha.identity, []).append(p)

        
        if diag.chatty_flag >20:
            progress("New Rule %s ============ looking for:" % `self` )
            for s in self.template.statements: progress("    ", `s`)
            progress("=>")
            for s in self.conclusion.statements: progress("    ", `s`)
            progress("Universals declared in outer " + seqToString(variables))
            progress(" mentioned in template       " + seqToString(variablesMentioned))
            progress(" also used in conclusion     " + seqToString(self.variablesUsed))
            progress("Existentials in template     " + seqToString(self.templateExistentials))
        return

    def once(self):
        if diag.chatty_flag >20:
            progress("Trying rule %s ===================" % self )
            progress( setToString(self.unmatched))
        task = self.task
        query = Query(self.store,
                        unmatched = self.unmatched[:],
                        template = self.template,
                        variables = self.variablesUsed.copy(),
                        existentials = self.templateExistentials.copy(),
                        workingContext = task.workingContext,
                        conclusion = self.conclusion,
                        targetContext = task.targetContext,
                        already = self.already,
                      ###
                        rule = self.statement,
                      ###
                        interpretBuiltins = 1,    # (...)
                        meta = task.workingContext,
                        mode = task.mode)
        Formula.resetRenames()
        total = query.resolve()
        Formula.resetRenames(False)
        if diag.chatty_flag > 20:
            progress("Rule try generated %i new statements" % total)
        return total

    def scheduleAddTriple(rule, key, triple):
        def fireRuleWithTriple():
            if not triple.context().contains(subj=triple.subject(), pred=triple.predicate(), obj=triple.object()):
                return 0  # The triple is no longer there
            return rule.addTriple(key, triple)
        rule.task.schedule(fireRuleWithTriple)


    def addTriple(self, key, triple):
        """One triple was added to the store. Run the rule on it
        """
        possiblesInUnmatched = self.patternsToUnmatched[key]
        total = 0
        for pattern in possiblesInUnmatched:
            for env1, env2 in unify(pattern[1:], triple.quad[1:], vars=self.variablesUsed | self.templateExistentials):
                if diag.chatty_flag >20:
                    progress("Trying rule %s on pattern %s, triple %s, env=%s===================" %
                             (self, pattern, triple.quad, env1) )
                    progress( setToString(self.unmatched))
                task = self.task
                query = Query(self.store,
                                unmatched = [x for x in self.unmatched if x is not pattern],
                                template = self.template,
                                variables = self.variablesUsed.copy(),
                                existentials = self.templateExistentials.copy(),
                                workingContext = task.workingContext,
                                conclusion = self.conclusion,
                                targetContext = task.targetContext,
                                already = self.already,
                              ###
                                rule = self.statement,
                              ###
                                interpretBuiltins = 1,    # (...)
                                meta = task.workingContext,
                                mode = task.mode)
                Formula.resetRenames()
                subtotal = query.resolve((env1, [triple]))
                total += subtotal
                Formula.resetRenames(False)
                if diag.chatty_flag > 20:
                    progress("Rule try generated %i new statements" % subtotal)
        return total                
    
    def __repr__(self):
        if self in self.affects: return "R"+`self.number`+ "*"
        return "R"+`self.number`

    def compareByAffects(other):
        if other in self.indirectlyAffects: return -1  # Do me earlier
        if other in self.indirectlyAffectedBy: return 1
        return 0
        

    def traceForward(self, r1):
        for r2 in r1.affects:
            if r2 not in self.indirectlyAffects:
                self.indirectlyAffects.append(r2)
                r2.indirectlyAffectedBy.append(self)
                self.traceForward(r2)
#           else:
#               self.__setattr__("leadsToCycle", 1)
    
def testIncludes(f, g, _variables=Set(),  bindings={}, interpretBuiltins = 0):
    """Return whether or nor f contains a top-level formula equvalent to g.
    Just a test: no bindings returned."""
    if diag.chatty_flag >30: progress("testIncludes ============\nseeing if %s entails %s" % (f, g))
#    raise RuntimeError()
    if not(isinstance(f, Formula) and isinstance(g, Formula)): return 0

    assert f.canonical is f, f.debugString()
    assert g.canonical is g
    m = diag.chatty_flag
    diag.chatty_flag = 0
    if m > 60: progress("Before rename: ", f.debugString())
    f = f.renameVars()
    if m > 60: progress("After rename: ", f.debugString())
    diag.chatty_flag = m
    if diag.chatty_flag >100: progress("Formula we are searching in is\n%s" % g.debugString())
    unmatched = buildPattern(f, g)
    templateExistentials = g.existentials()
    more_variables = g.universals().copy()
    _substitute({g: f}, unmatched)
    
#    if g.universals() != Set():
#       raise RuntimeError("""Cannot query for universally quantified things.
#       As of 2003/07/28 forAll x ...x cannot be on left hand side of rule.
#       This/these were: %s\n""" % g.universals())

#    if bindings != {}: _substitute(bindings, unmatched)

    if diag.chatty_flag > 20:
        progress( "# testIncludes BUILTIN, %i terms in template %s, %i unmatched, %i template variables" % (
            len(g.statements),
            `g`[-8:], len(unmatched), len(templateExistentials)))
        if diag.chatty_flag > 80:
            for v in _variables:
                progress( "    Variable: " + `v`[-8:])

    result = Query(f.store,
                unmatched=unmatched,
                template = g,
                variables=Set(),
                interpretBuiltins = interpretBuiltins,
                existentials=_variables | templateExistentials | more_variables,
                justOne=1, mode="").resolve()

    if diag.chatty_flag >30: progress("=================== end testIncludes =" + `result`)
    return result


def n3Equivalent(g, f, env1, env2, vars=Set([]),
                 universals=Set(), existentials=Set([]),
                 n1Source=42, n2Source=42):    
    """Return whether or nor f contains a top-level formula equvalent to g.
    Just a test: no bindings returned."""
    if diag.chatty_flag >30: progress("Query.py n3Equivalent ============\nseeing if %s equals %s" % (f, g))
#    raise RuntimeError()
    if not(isinstance(f, Formula) and isinstance(g, Formula)): pass
    elif f is g:
        yield env1, env2
    elif len(f) > len(g):
        pass
    else:

        assert f.canonical is f, "%s, %s" % (f.debugString(), f.canonical.debugString())
        assert g.canonical is g, "%s, %s" % (g.debugString(), g.canonical.debugString())

        m = diag.chatty_flag
        diag.chatty_flag = 0
        if m > 60: progress("Before rename: ", f.debugString())
        f = f.renameVars()
        if m > 60: progress("After rename: ", f.debugString())
        diag.chatty_flag = m
        unmatched = buildStrictPattern(f, g)
        templateExistentials = g.existentials() | g.universals() | existentials
        more_variables = Set(vars)
        _substitute({g: f}, unmatched)
        
        if env1: _substitute(env1.asDict(), unmatched)

        if diag.chatty_flag > 20:
            progress( "# testEqual BUILTIN, %i terms in template %s, %i unmatched, %i template variables" % (
                len(g.statements),
                `g`[-8:], len(unmatched), len(templateExistentials)))
            if diag.chatty_flag > 80:
                for v in vars:
                    progress( "    Variable: " + `v`[-8:])

        result = Query(f.store,
                    unmatched=unmatched,
                    template = g,
                    variables=more_variables,
                    workingContext = f,
                    interpretBuiltins = False,
                    existentials= templateExistentials ,
                    justReturn=1, mode="").resolve()

        if diag.chatty_flag >30: progress("=================== end n3Equivalent =" + `result`)
        if not result: result = []
        for x in result:
            for k, (v, source) in x.items():
                env1 = env1.bind(k, (v, env2.id))
            yield env1, env2
##    return [(x, None) for x in result]



############################################################## Query engine
#
# Template matching in a graph
#
# Optimizations we have NOT done:
#   - storing the tree of bindings so that we don't have to duplicate them another time
#   - using that tree to check only whether new data would extend it (very cool - memory?)
#      (this links to dynamic data, live variables.)
#   - recognising in advance disjoint graph templates, doing cross product of separate searches
#
# Built-Ins:
#   The trick seems to be to figure out which built-ins are going to be faster to
# calculate, and so should be resolved before query terms involving a search, and
# which, like those involving recursive queries or net access, will be slower than a query term,
# and so should be left till last.
#   I feel that it should be possible to argue about built-ins just like anything else,
# so we do not exclude these things from the query system. We therefore may have both light and
# heavy built-ins which still have too many variables to calculate at this stage.
# When we do the variable substitution for new bindings, these can be reconsidered.


class Queue([].__class__):
    __slots__ = ['statements', 'bNodes']
    list = [].__class__

    def __init__(self, other=[], metaSource = None):
        self.list.__init__(self, other)
        if isinstance(metaSource, Queue):
            for k in self.__slots__:
                setattr(self, k, getattr(metaSource, k).copy())
        else:
            self.statements = Set()
            self.bNodes = Set()
            pass #fill in slots here

    def popBest(self):
        best = len(self) -1 # , say...
        i = best - 1
        while i >=0:
            if (self[i].state > self[best].state
                or (self[i].state == self[best].state
                    and self[i].short < self[best].short)): best=i
            i = i - 1                
        item = self[best]
        self.remove(item)
        return item

    def __repr__(self):
        return 'Queue(%s, bNodes=%s)' % (list.__repr__(self), self.bNodes)

#Queue = [].__class__

class Chain_Step(object):
    def __init__(self, vars, existentials, queue, env, parent=None, evidence=[]):
        self.vars = vars
        self.existentials = existentials
        self.lines = queue
        self.env = env
        assert parent is None
        self.parent = parent
        self.evidence = evidence

    def popBest(self):
        return self.lines.popBest()

    def copy(self):
        retVal = self.__class__(self.vars, self.existentials, self.lines, self.env, self.parent, self.evidence)
        return retVal

    def done(self):
        return not self.lines

    def __cmp__(self, other):
        return cmp(len(other.lines), len(self.lines))

    def __repr__(self):
        return "%s(lines=%r,\n\tenv=%r,\n\tparent=%r,\n\tevidence=%r)" % (self.__class__.__name__, self.lines, self.env, self.parent, self.evidence)


def returnWrapper(f):
    def g(*args, **keywords):
        retVal = f(*args, **keywords)
        progress('%s() returns %s' % (f.func_name, retVal))
        return retVal
    return g

class Query(Formula):
    """A query holds a hypothesis/antecedent/template which is being matched aginst (unified with)
    the knowledge base."""
    def __init__(self,
               store,
               unmatched=[],           # Tuple of interned quads we are trying to match CORRUPTED
               template = None,         # Actually, must have one
               variables=Set(),           # List of variables to match and return CORRUPTED
               existentials=Set(),        # List of variables to match to anything
                                    # Existentials or any kind of variable in subexpression
               workingContext = None,
               conclusion = None,
               targetContext = None,
               already = None,      # Dictionary of matches already found
               rule = None,                 # The rule statement
               interpretBuiltins = 0,        # List of contexts in which to use builtins
               justOne = 0,         # Flag: Stop when you find the first one
               justReturn = 0,      # Flag: Return bindings, don't conclude
               mode = "",           # Character flags modifying modus operandi
            meta = None):           # Context to check for useful info eg remote stuff

        
        if diag.chatty_flag > 50:
            progress( "Query: created with %i terms. (justone=%i, wc=%s)" % 
                    (len(unmatched), justOne, workingContext))
            if diag.chatty_flag > 80: progress( setToString(unmatched))
            if diag.chatty_flag > 90 and interpretBuiltins: progress(
                "iBuiltIns=1 ")

        Formula.__init__(self, store)
        self.statements = Queue()   #  Unmatched with more info
#       self.store = store      # Initialized by Formula
        self.variables = variables
        self._existentialVariables = existentials
        self.workingContext = workingContext
        self.conclusion = conclusion
        self.targetContext = targetContext
        self.justOne = justOne
        self.already = already
        self.rule = rule
        self.template = template  # For looking for lists
        self.meta = meta
        self.mode = mode
        self.lastCheckedNumberOfRedirections = 0
        self.bindingList = []
        self.justReturn = justReturn
        realMatchCount = 0
        if justReturn and not variables:
            self.justOne = True
        for quad in unmatched:
            item = QueryItem(self, quad)
            if not item.setup(allvars=variables|existentials, unmatched=unmatched,
                        interpretBuiltins=interpretBuiltins, mode=mode):
                if diag.chatty_flag > 80: progress(
                                    "match: abandoned, no way for "+`item`)
                self.noWay = 1
                return  # save time
            if not item.builtIn:
                realMatchCount += 1    
            self.statements.append(item)
        if justReturn and realMatchCount > len(workingContext):
            self.noWay = 1
            return
        return
        
    def resolve(self, alreadyBound=None):
        if hasattr(self, "noWay"): return 0
        if alreadyBound is not None:
            env, evidence = alreadyBound
            k = self.matchFormula(self.statements, self.variables, self._existentialVariables, env, evidence=evidence)
        else:
            k = self.matchFormula(self.statements, self.variables, self._existentialVariables)
        if self.justReturn:
            return self.bindingList
        return k

    def checkRedirectsInAlready(self):
        """Kludge"""
        n = len(self.targetContext._redirections)
        if n  > self.lastCheckedNumberOfRedirections:
            self.lastCheckedNumberOfRedirections = n
            self.redirect(self.targetContext._redirections)
            
    def redirect(self, redirections):
        for bindings in self.already:
            for var, value in bindings.items():
                try:
                    x = redirections[value]
                except:
                    pass
                else:
                    if diag.chatty_flag>29: progress("Redirecting binding %r to %r" % (value, x))
                    bindings[var] = x

    def conclude(self, bindings, evidence = [], extraBNodes = Set(), allBindings=None):
        """When a match found in a query, add conclusions to target formula.

        Returns the number of statements added."""
        if self.justOne:
            self.bindingList = [{}]
            return 1   # If only a test needed
        if self.justReturn:
            if bindings not in self.bindingList:
#                progress('CONCLUDE bindings = %s' % bindings)
                self.bindingList.append(bindings)
            return 1

        if diag.chatty_flag >60:
                        progress( "Concluding tentatively...%r" % bindings)
        if self.already != None:
            self.checkRedirectsInAlready() # @@@ KLUDGE - use delegation and notification systme instead
            if bindings in self.already:
                if diag.chatty_flag > 30:
                    progress("@@ Duplicate result: %r is in %r" %  (bindings, self.already))
                return 0
            if diag.chatty_flag > 30: progress("Not duplicate: %r" % bindings)
            self.already.append(bindings)
        else: 
            if diag.chatty_flag >60:
                        progress( "No duplication check")
        
        if diag.tracking:
            if allBindings is None:
                allBindings = bindings
            for loc in xrange(len(evidence)):
                r = evidence[loc]
                
                if isinstance(r, BecauseSupportsWill):                    
                    evidence[loc] = BecauseSupports(*([smarterSubstitution(k, allBindings,
                        r.args[1], why=Because("I support it: "), exception=[r.args[2]]) for k in r.args] +
                        [[k for k in evidence if isinstance(k, (StoredStatement, Reason))]]))
                if isinstance(r, BecauseBuiltInWill):
                    evidence[loc] = BecauseBuiltIn(*[smarterSubstitution(k, allBindings,
                        r.args[0], why=Because("I include it: " + k.debugString() + `allBindings`)) for k in r.args[1:]])
            reason = BecauseOfRule(self.rule, bindings=allBindings, knownExistentials = extraBNodes,
                            evidence=evidence, kb=self.workingContext)
#           progress("We have a reason for %s of %s with bindings %s" % (self.rule, reason, alBindings))
        else:
            reason = None

        es, exout = (self.workingContext.existentials() | extraBNodes), Set() #self.workingContext.existentials() | 
        for var, (val, source) in bindings.items():
            if isinstance(val, Exception):
                if "q" in self.mode: # How nice are we?
                    raise ValueError(val)
                return 0
            intersection = val.occurringIn(es) #  Take time for large number of bnodes?
            if intersection:   
                exout.update(intersection)
                if diag.chatty_flag > 25: progress(
                "Match found to that which is only an existential: %s -> %s" %
                                                    (var, val))
                for val2 in intersection:
                    if val not in self.targetContext.existentials():
                        if self.conclusion.occurringIn([var]):
                            self.targetContext.declareExistential(val2)

        # Variable renaming

        b2 = bindings.asDict()
#        b2[self.conclusion] = self.targetContext  # What does this mean?
        ok = self.targetContext.universals() 
        # It is actually ok to share universal variables with other stuff
        poss = self.conclusion.universals().copy()
        for x in poss.copy():
            if x in ok: poss.remove(x)
        poss_sorted = list(poss)
        poss_sorted.sort(Term.compareAnyTerm)
        #progress(poss)

#        vars = self.conclusion.existentials() + poss  # Terms with arbitrary identifiers
#        clashes = self.occurringIn(targetContext, vars)    Too slow to do every time; play safe
        if diag.chatty_flag > 25:
            s=""
        for v in poss_sorted:
            v2 = self.targetContext.newUniversal()
            b2[v] =v2   # Regenerate names to avoid clash
            if diag.chatty_flag > 25: s = s + ",uni %s -> %s" %(v, v2)
        for v in sorted(list(self.conclusion.existentials()), Term.compareAnyTerm):
            if v not in exout:
                v2 = self.targetContext.newBlankNode()
                b2[v] =v2   # Regenerate names to avoid clash
                if diag.chatty_flag > 25: s = s + ",exi %s -> %s" %(v, v2)
            else:
                if diag.chatty_flag > 25: s = s + (", (%s is existential in kb)"%v)
        if diag.chatty_flag > 25:
            progress("Variables regenerated: universal " + `poss`
                + " existential: " +`self.conclusion.existentials()` + s)
        

        if diag.chatty_flag>19:
            progress("Concluding DEFINITELY" + bindingsToString(b2) )
        before = self.store.size
        _, delta = self.targetContext.loadFormulaWithSubstitution(
                    self.conclusion, b2, why=reason, cannon=True)
        if diag.chatty_flag>29 and delta:
            progress(" --- because of: %s => %s, with bindings %s" % (self.template.debugString(),
                                                                      self.conclusion.debugString(),
                                                                      b2))
        if diag.chatty_flag> 40:
            progress("Added %i, nominal size of store changed from %i to %i."%(delta, before, self.store.size))
        return delta #  self.store.size - before


##################################################################################

    def matchFormula(query, queue, variables, existentials, env=Env(), evidence=[]):
        total = 0
        if env:
            for i in queue:
                i.bindNew(env)
        stack = [Chain_Step(variables, existentials, queue, env, evidence=evidence)]
        while stack:
            if diag.chatty_flag > 150:
                progress(stack)
            workingStep = stack.pop()
            if not workingStep.done():
                queue = workingStep.lines
                evidence = workingStep.evidence
                bindings = workingStep.env
                variables = workingStep.vars
                existentials = workingStep.existentials
                
                item = workingStep.popBest()

                con, pred, subj, obj = item.quad
                state = item.state
                if state == S_DONE:  # After bindNew, could be undoable.
                    nbs = []
                elif state == S_LIGHT_UNS_READY:          # Search then 
                    nbs = item.tryBuiltin(queue, bindings, evidence=evidence)
                    item.state = S_LIGHT_EARLY   # Unsearched, try builtin @@@@@@@@@ <== need new state here
                elif state == S_LIGHT_GO:
                    nbs = item.tryBuiltin(queue, bindings, evidence=evidence)
                    item.state = S_DONE   # Searched.
                elif (state == S_LIGHT_EARLY or state == S_NOT_LIGHT or
                                        state == S_NEED_DEEP): #  Not searched yet
                    nbs = item.tryDeepSearch(queue, bindings)
                elif state == S_HEAVY_READY:  # not light, may be heavy; or heavy ready to run
                    if pred is query.store.includes: # and not diag.tracking:  # don't optimize when tracking?
                        variables = variables.copy()
                        existentials = existentials.copy()
                        nbs = item.doIncludes(queue, existentials, variables, bindings)
                    elif pred is query.store.supports:
                        variables = variables.copy()
                        existentials = existentials.copy()
                        nbs = item.doSupports(queue, existentials, variables, bindings)
                    else:
                        item.state = S_HEAVY_WAIT  # Assume can't resolve
                        nbs = item.tryBuiltin(queue, bindings, evidence=evidence)
                    item.state = S_DONE
                elif state == S_REMOTE: # Remote query -- need to find all of them for the same service
                    items = [item]
                    for i in queue[:]:
                        if i.state == S_REMOTE and i.service is item.service: #@@ optimize which group is done first!
                            items.append(i)
                            queue.remove(i)
                    nbs = query.remoteQuery(items)
                    item.state = S_DONE  # do not put back on list
                elif state ==S_HEAVY_WAIT or state == S_LIGHT_WAIT:
                    if item.quad[PRED] is query.store.universalVariableName or \
                       item.quad[PRED] is query.store.existentialVariableName:
                        ### We will never bind this variable in the first place
                        item.state = S_DONE
                        nbs = []
                    else:
                        if diag.chatty_flag > 20 :
                            progress("@@@@ Warning: query can't find term which will work.")
                            progress( "   state is %s, queue length %i" % (state, len(queue)+1))
                            progress("@@ Current item: %s" % `item`)
                            progress(queueToString(queue))
                        continue  # Forget it
                else:
                    raise RuntimeError, "Unknown state " + `state`


                stack_extent = []
                for nb, reason in nbs:
                    assert isinstance(nb, dict), nb
                    q2 = Queue([], queue)
                    if query.justReturn:
                        ### What does the following do?
                        ### If we are doing a 1::1 match, record everything we have matched
                        if isinstance(reason, StoredStatement):
                            if reason not in q2.statements and \
                               reason[CONTEXT] is query.workingContext:
                                q2.statements.add(reason)
                            else:
                                continue
                    if isinstance(reason, StoredStatement):
                        if True or reason[CONTEXT] is not query.workingContext:
                            for m in nb.values():
                                if isinstance(m, tuple):
                                    m = m[0]
                                if m in reason[CONTEXT].existentials():
                                    q2.bNodes.add(m)
                                    if diag.chatty_flag > 80:
                                        ### These get picked up from log:includes
                                        ### {[:b :c]} log:includes {?X :b :c} ...
                                        progress('Adding bNode %s, now %s' % (m, q2.bNodes))
                    new_thing = False
                    try:
                        new_env = bindings.flatten(nb)
                    except ValueError:
                        pass
                    else:
                        new_thing = True
                    for i in queue:
                        newItem = i.clone()
                        newItem.bindNew(new_env) ## Is this right? I was hoping to avoid this
                        q2.append(newItem)  #@@@@@@@@@@  If exactly 1 binding, loop (tail recurse)
                    if new_thing:
                        if diag.chatty_flag > 70:
                            progress("query.py bindings nb new_entry:", bindings, nb, new_env)
                        new_step = Chain_Step(variables, existentials, q2, new_env, workingStep.parent, workingStep.evidence + [reason])
                        stack_extent.append(new_step)

                if item.state != S_DONE:
                    queue.append(item)
                    new_step = workingStep.copy()
                    stack_extent.append(new_step)
                stack.extend(stack_extent)

                    

            else:
                if workingStep.parent is not None:
                    raise RuntimeError("We are not chaining yet.\n How did I get here?")
                else:
                    total = query.conclude(workingStep.env.filter(workingStep.vars), allBindings=workingStep.env
                                          , evidence=workingStep.evidence, extraBNodes = workingStep.lines.bNodes) + total  # No terms left .. success!
                    #raise RuntimeError("I need to conclude here, workingStep=%s" % workingStep)
                    pass
        return total


    def matchFormula2(query,
               queue,               # Set of items we are trying to match CORRUPTED
               variables,           # List of variables to match and return CORRUPTED
               existentials,        # List of variables to match to anything
                                    # Existentials or any kind of variable in subexpression
               bindings = Env(),       # Bindings discovered so far
               newBindings = Env(),    # New bindings not yet incorporated
               evidence = []):      # List of statements supporting the bindings so far
        """ Iterate on the remaining query items
    bindings      collected matches already found
    newBindings  matches found and not yet applied - used in recursion
    
    You probably really need the state diagram to understand this
    http://www.w3.org/2000/10/swap/doc/states.svg
    even if it is a bit out of date.
        """
        total = 0
        assert isinstance(bindings, Env)
        assert isinstance(newBindings, Env), 'env is an %s, not an %s' % (newBindings.__class__, Env)
        if diag.chatty_flag > 59:
            progress( "QUERY2: called %i terms, %i bindings %s, (new: %s)" %
                      (len(queue), len(bindings), `bindings`,
                       `newBindings`))
            if diag.chatty_flag > 90: progress( queueToString(queue))

        newBindingItems = newBindings.items()
        while newBindingItems:   # Take care of business left over from recursive call
            pair = newBindingItems.pop(0)
            if isinstance(pair[1], tuple):
                pair = (pair[0], pair[1][0])
            else:
                raise RuntimeError
            if diag.chatty_flag>95: progress("    new binding:  %s -> %s" % (`pair[0]`, `pair[1]`))
            if pair[0] in variables:
                variables.remove(pair[0])
                bindings = bindings.newBinding(pair[0], (pair[1], None))
            else:      # Formulae aren't needed as existentials, unlike lists. hmm.
                ### bindings.update({pair[0]: pair[1]})  # remove me!!!!!
#               if diag.tracking: raise RuntimeError(pair[0], pair[1])
                #bindings.update({pair[0]: pair[1]})  # Record for proof only
                if pair[0] not in existentials:
                    if isinstance(pair[0], List):
                        # pair[0] should be a variable, can't be a list, surely
                …
Large files files are truncated, but you can click here to view the full file