PageRenderTime 632ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/nltk_contrib/nltk_contrib/tiger/query/ast_utils.py

http://nltk.googlecode.com/
Python | 216 lines | 162 code | 12 blank | 42 comment | 4 complexity | dea7af27f551f413c620d516a4f8580a MD5 | raw file
Possible License(s): Apache-2.0, AGPL-1.0
  1. # -*- coding: utf-8 -*-
  2. # Copyright Š 2007-2008 Stockholm TreeAligner Project
  3. # Author: Torsten Marek <shlomme@gmx.net>
  4. # Licensed under the GNU GPLv2
  5. """Utility functions for the abstract syntax trees defined in `nltk_contrib.tiger.query.ast`.
  6. """
  7. from functools import partial
  8. from nltk_contrib.tiger.query import ast
  9. from nltk_contrib.tiger.query.ast_visitor import AstVisitor, node_handler
  10. from nltk_contrib.tiger.query.exceptions import UndefinedNameError
  11. __all__ = ["create_varref", "create_vardef", "split_children", "to_dnf"]
  12. def create_varref(name, var_type=ast.VariableTypes.NodeIdentifier,
  13. container_type=ast.ContainerTypes.Single):
  14. """Creates a new `VariableReference` to a variable `name`.
  15. *Parameters*:
  16. * `name`: the name of the variable.
  17. * `var_type`: the type of the variable, a member of `nltk_contrib.tiger.query.ast.VariableTypes`.
  18. * `container_type`: the container type of the variable, a member of
  19. `nltk_contrib.tiger.query.ast.ContainerTypes`.
  20. """
  21. return ast.VariableReference(ast.Variable(name, var_type, container_type))
  22. def create_vardef(name, expression, var_type=ast.VariableTypes.NodeIdentifier,
  23. container_type=ast.ContainerTypes.Single):
  24. """Creates a new `VariableDefinition` of a variable `name` and a RHS `expression`.
  25. *Parameters*:
  26. * `name`: the name of the variable
  27. * `expression`: the expression which is assigned to the variable
  28. * `var_type`: the type of the variable, a member of `nltk_contrib.tiger.query.ast.VariableTypes`
  29. * `container_type`: the container type, a member of `nltk_contrib.tiger.query.ast.ContainerTypes`
  30. """
  31. return ast.VariableDefinition(ast.Variable(name, var_type, container_type), expression)
  32. def split_children(ast_node, ast_type):
  33. """Sorts the children of `ast_node` into two lists, based on their type.
  34. *Parameters*:
  35. * `ast_node`: the AST node whose children should be sorted
  36. * `ast_type`: the type all children in the first list should have.
  37. *Return Value*:
  38. A tuple `(a, b)` of lists, `a` contains all child nodes with type `ast_type`, `b` the rest.
  39. """
  40. match, nomatch = [], []
  41. for node in ast_node:
  42. if node.TYPE is ast_type:
  43. match.append(node)
  44. else:
  45. nomatch.append(node)
  46. return match, nomatch
  47. class NegNormalizer(AstVisitor):
  48. """An AST visitor that normalizes all negations in a boolean expression.
  49. The visitor applies the following transformations:
  50. - elimination of double negation: ``!!a => a``
  51. - De Morgan's laws:
  52. * ``!(a | b) => !a & !b``
  53. * ``!(a & b) => !a | !b``
  54. After the transformation, `Negation` nodes will only contain atoms, not other terms.
  55. *Parameters*:
  56. * `feature_types`: a dictionary that maps feature names to `NodeType`s, needed for negation
  57. `FeatureRecords`.
  58. """
  59. def __init__(self, feature_types):
  60. super(self.__class__, self).__init__()
  61. self._feature_types = feature_types
  62. @node_handler(ast.Negation)
  63. def normalize_neg(self, child_node):
  64. """Normalizes a negation if is reigns over something other than a term."""
  65. if child_node.expression.TYPE is ast.FeatureConstraint:
  66. # cf. TIGERSearch Query Language, section 8.4 (http://tinyurl.com/2jm24u)
  67. # !(pos="ART") === !(T & pos="ART") === !(T) | (pos != "ART")
  68. try:
  69. orig_type = self._feature_types[child_node.expression.feature]
  70. except KeyError, e:
  71. raise UndefinedNameError, (UndefinedNameError.FEATURE, e.args[0])
  72. return self.REPLACE(
  73. ast.Disjunction([
  74. ast.FeatureRecord(~orig_type),
  75. ast.FeatureConstraint(
  76. child_node.expression.feature,
  77. ast.Negation(child_node.expression.expression))]))
  78. else:
  79. try:
  80. return self.REPLACE(~child_node.expression)
  81. except TypeError:
  82. return self.CONTINUE(child_node)
  83. def outer_product(l):
  84. """Produces the outer product of a list of lists.
  85. Example:
  86. >>> l = [[1, 2], [3, 4]]
  87. >>> outer_product(l) == [[1, 3], [1, 4], [2, 3], [2, 4]]
  88. If the number of lists is fixed, it is better to use a list comprehension
  89. >>> [(e1, e2) for e1 in l1 for e2 in l2]
  90. """
  91. def _expand(l, pos, result):
  92. """Recurses on `l` and produces all combinations of elements from the lists."""
  93. for e in l[pos]:
  94. result.append(e)
  95. if pos == len(l) - 1:
  96. yield result[:]
  97. else:
  98. for r in _expand(l, pos + 1, result):
  99. yield r
  100. result.pop()
  101. return _expand(l, 0, [])
  102. def _distribute(top_conjunction):
  103. """Applies the law of distributivity to a conjunction that has disjunctions.
  104. Example:
  105. ``(d1 | d2) & d3 => (d1 & d3) | (d2 & d3)``
  106. The function also applies associatity laws, it will never produce
  107. nested expressions with the same operators.
  108. """
  109. disj, terms = split_children(top_conjunction, ast.Disjunction)
  110. terms = [
  111. ast.Conjunction(t + combination)
  112. for t in [terms]
  113. for combination in outer_product(disj)
  114. ]
  115. for t in terms:
  116. t.apply_associativity()
  117. return ast.Disjunction(terms)
  118. def distribute_disjunctions(tree):
  119. """Distributes all disjunctions in `tree` and also applies the law of associativity.
  120. Example:
  121. ``A & (B & (C | D) | E) => (A & B & C) | (A & B & D) | (A & E)``
  122. """
  123. has_disj = False
  124. for child_name, child_node in tree.named_iter():
  125. if child_node.TYPE in (ast.Conjunction, ast.Disjunction):
  126. new = distribute_disjunctions(child_node)
  127. if new.TYPE is ast.Disjunction:
  128. has_disj = True
  129. new.apply_associativity()
  130. tree.set_child(child_name, new)
  131. if tree.TYPE is ast.Conjunction and has_disj:
  132. return _distribute(tree)
  133. else:
  134. return tree
  135. def to_dnf(tree, feature_types):
  136. """Converts a boolean expression into Disjunctive Normal Form.
  137. After the transformation, the boolean expression will be a disjunction of terms, negation will
  138. be normalized as well, and associativity laws have been applied.
  139. Note that the `tree` node itself will not be considered to be a part of the boolean expression.
  140. *Parameters*:
  141. * `feature_types`: a dictionary that maps feature names to `NodeType`s, needed for negation of
  142. `FeatureRecords`.
  143. """
  144. n = NegNormalizer(feature_types)
  145. n.run(tree)
  146. return distribute_disjunctions(tree)
  147. class NodeDescriptionNormalizer(AstVisitor):
  148. """Normalizes a node description.
  149. After normalization, a node description is in disjunctive normal form,
  150. and no feature constraint expression contains any disjunctions.
  151. """
  152. def __init__(self, feature_types):
  153. super(self.__class__, self).__init__()
  154. self._feature_types = feature_types
  155. @node_handler(ast.FeatureConstraint)
  156. def distribute(self, child_node):
  157. """Normalizes feature constraint expressions.
  158. A feature constraint with disjunctions is turned into a disjunction of feature
  159. constraints.
  160. """
  161. to_dnf(child_node, self._feature_types)
  162. if child_node.expression.TYPE is ast.Disjunction:
  163. fc = partial(ast.FeatureConstraint, child_node.feature)
  164. return self.REPLACE(ast.Disjunction(
  165. [fc(term) for term in child_node.expression]))
  166. else:
  167. return self.CONTINUE(child_node)
  168. def result(self, query):
  169. """Normalizes the node description, after all feature constraints have been normalized."""
  170. to_dnf(query, self._feature_types)