PageRenderTime 92ms CodeModel.GetById 60ms app.highlight 25ms RepoModel.GetById 1ms app.codeStats 0ms

/Lib/lib2to3/pytree.py

http://unladen-swallow.googlecode.com/
Python | 802 lines | 792 code | 1 blank | 9 comment | 0 complexity | ccba1a4f53acf6914c1412f2d765e6b8 MD5 | raw file
  1# Copyright 2006 Google, Inc. All Rights Reserved.
  2# Licensed to PSF under a Contributor Agreement.
  3
  4"""Python parse tree definitions.
  5
  6This is a very concrete parse tree; we need to keep every token and
  7even the comments and whitespace between tokens.
  8
  9There's also a pattern matching implementation here.
 10"""
 11
 12__author__ = "Guido van Rossum <guido@python.org>"
 13
 14import sys
 15from StringIO import StringIO
 16
 17
 18HUGE = 0x7FFFFFFF  # maximum repeat count, default max
 19
 20_type_reprs = {}
 21def type_repr(type_num):
 22    global _type_reprs
 23    if not _type_reprs:
 24        from .pygram import python_symbols
 25        # printing tokens is possible but not as useful
 26        # from .pgen2 import token // token.__dict__.items():
 27        for name, val in python_symbols.__dict__.items():
 28            if type(val) == int: _type_reprs[val] = name
 29    return _type_reprs.setdefault(type_num, type_num)
 30
 31
 32class Base(object):
 33
 34    """Abstract base class for Node and Leaf.
 35
 36    This provides some default functionality and boilerplate using the
 37    template pattern.
 38
 39    A node may be a subnode of at most one parent.
 40    """
 41
 42    # Default values for instance variables
 43    type = None    # int: token number (< 256) or symbol number (>= 256)
 44    parent = None  # Parent node pointer, or None
 45    children = ()  # Tuple of subnodes
 46    was_changed = False
 47
 48    def __new__(cls, *args, **kwds):
 49        """Constructor that prevents Base from being instantiated."""
 50        assert cls is not Base, "Cannot instantiate Base"
 51        return object.__new__(cls)
 52
 53    def __eq__(self, other):
 54        """Compares two nodes for equality.
 55
 56        This calls the method _eq().
 57        """
 58        if self.__class__ is not other.__class__:
 59            return NotImplemented
 60        return self._eq(other)
 61
 62    def __ne__(self, other):
 63        """Compares two nodes for inequality.
 64
 65        This calls the method _eq().
 66        """
 67        if self.__class__ is not other.__class__:
 68            return NotImplemented
 69        return not self._eq(other)
 70
 71    def _eq(self, other):
 72        """Compares two nodes for equality.
 73
 74        This is called by __eq__ and __ne__.  It is only called if the
 75        two nodes have the same type.  This must be implemented by the
 76        concrete subclass.  Nodes should be considered equal if they
 77        have the same structure, ignoring the prefix string and other
 78        context information.
 79        """
 80        raise NotImplementedError
 81
 82    def clone(self):
 83        """Returns a cloned (deep) copy of self.
 84
 85        This must be implemented by the concrete subclass.
 86        """
 87        raise NotImplementedError
 88
 89    def post_order(self):
 90        """Returns a post-order iterator for the tree.
 91
 92        This must be implemented by the concrete subclass.
 93        """
 94        raise NotImplementedError
 95
 96    def pre_order(self):
 97        """Returns a pre-order iterator for the tree.
 98
 99        This must be implemented by the concrete subclass.
100        """
101        raise NotImplementedError
102
103    def set_prefix(self, prefix):
104        """Sets the prefix for the node (see Leaf class).
105
106        This must be implemented by the concrete subclass.
107        """
108        raise NotImplementedError
109
110    def get_prefix(self):
111        """Returns the prefix for the node (see Leaf class).
112
113        This must be implemented by the concrete subclass.
114        """
115        raise NotImplementedError
116
117    def replace(self, new):
118        """Replaces this node with a new one in the parent."""
119        assert self.parent is not None, str(self)
120        assert new is not None
121        if not isinstance(new, list):
122            new = [new]
123        l_children = []
124        found = False
125        for ch in self.parent.children:
126            if ch is self:
127                assert not found, (self.parent.children, self, new)
128                if new is not None:
129                    l_children.extend(new)
130                found = True
131            else:
132                l_children.append(ch)
133        assert found, (self.children, self, new)
134        self.parent.changed()
135        self.parent.children = l_children
136        for x in new:
137            x.parent = self.parent
138        self.parent = None
139
140    def get_lineno(self):
141        """Returns the line number which generated the invocant node."""
142        node = self
143        while not isinstance(node, Leaf):
144            if not node.children:
145                return
146            node = node.children[0]
147        return node.lineno
148
149    def changed(self):
150        if self.parent:
151            self.parent.changed()
152        self.was_changed = True
153
154    def remove(self):
155        """Remove the node from the tree. Returns the position of the node
156        in its parent's children before it was removed."""
157        if self.parent:
158            for i, node in enumerate(self.parent.children):
159                if node is self:
160                    self.parent.changed()
161                    del self.parent.children[i]
162                    self.parent = None
163                    return i
164
165    def get_next_sibling(self):
166        """Return the node immediately following the invocant in their
167        parent's children list. If the invocant does not have a next
168        sibling, return None."""
169        if self.parent is None:
170            return None
171
172        # Can't use index(); we need to test by identity
173        for i, child in enumerate(self.parent.children):
174            if child is self:
175                try:
176                    return self.parent.children[i+1]
177                except IndexError:
178                    return None
179
180    def get_prev_sibling(self):
181        """Return the node immediately preceding the invocant in their
182        parent's children list. If the invocant does not have a previous
183        sibling, return None."""
184        if self.parent is None:
185            return None
186
187        # Can't use index(); we need to test by identity
188        for i, child in enumerate(self.parent.children):
189            if child is self:
190                if i == 0:
191                    return None
192                return self.parent.children[i-1]
193
194    def get_suffix(self):
195        """Return the string immediately following the invocant node. This
196        is effectively equivalent to node.get_next_sibling().get_prefix()"""
197        next_sib = self.get_next_sibling()
198        if next_sib is None:
199            return ""
200        return next_sib.get_prefix()
201
202
203class Node(Base):
204
205    """Concrete implementation for interior nodes."""
206
207    def __init__(self, type, children, context=None, prefix=None):
208        """Initializer.
209
210        Takes a type constant (a symbol number >= 256), a sequence of
211        child nodes, and an optional context keyword argument.
212
213        As a side effect, the parent pointers of the children are updated.
214        """
215        assert type >= 256, type
216        self.type = type
217        self.children = list(children)
218        for ch in self.children:
219            assert ch.parent is None, repr(ch)
220            ch.parent = self
221        if prefix is not None:
222            self.set_prefix(prefix)
223
224    def __repr__(self):
225        """Returns a canonical string representation."""
226        return "%s(%s, %r)" % (self.__class__.__name__,
227                               type_repr(self.type),
228                               self.children)
229
230    def __str__(self):
231        """Returns a pretty string representation.
232
233        This reproduces the input source exactly.
234        """
235        return "".join(map(str, self.children))
236
237    def _eq(self, other):
238        """Compares two nodes for equality."""
239        return (self.type, self.children) == (other.type, other.children)
240
241    def clone(self):
242        """Returns a cloned (deep) copy of self."""
243        return Node(self.type, [ch.clone() for ch in self.children])
244
245    def post_order(self):
246        """Returns a post-order iterator for the tree."""
247        for child in self.children:
248            for node in child.post_order():
249                yield node
250        yield self
251
252    def pre_order(self):
253        """Returns a pre-order iterator for the tree."""
254        yield self
255        for child in self.children:
256            for node in child.post_order():
257                yield node
258
259    def set_prefix(self, prefix):
260        """Sets the prefix for the node.
261
262        This passes the responsibility on to the first child.
263        """
264        if self.children:
265            self.children[0].set_prefix(prefix)
266
267    def get_prefix(self):
268        """Returns the prefix for the node.
269
270        This passes the call on to the first child.
271        """
272        if not self.children:
273            return ""
274        return self.children[0].get_prefix()
275
276    def set_child(self, i, child):
277        """Equivalent to 'node.children[i] = child'. This method also sets the
278        child's parent attribute appropriately."""
279        child.parent = self
280        self.children[i].parent = None
281        self.children[i] = child
282        self.changed()
283
284    def insert_child(self, i, child):
285        """Equivalent to 'node.children.insert(i, child)'. This method also
286        sets the child's parent attribute appropriately."""
287        child.parent = self
288        self.children.insert(i, child)
289        self.changed()
290
291    def append_child(self, child):
292        """Equivalent to 'node.children.append(child)'. This method also
293        sets the child's parent attribute appropriately."""
294        child.parent = self
295        self.children.append(child)
296        self.changed()
297
298
299class Leaf(Base):
300
301    """Concrete implementation for leaf nodes."""
302
303    # Default values for instance variables
304    prefix = ""  # Whitespace and comments preceding this token in the input
305    lineno = 0   # Line where this token starts in the input
306    column = 0   # Column where this token tarts in the input
307
308    def __init__(self, type, value, context=None, prefix=None):
309        """Initializer.
310
311        Takes a type constant (a token number < 256), a string value,
312        and an optional context keyword argument.
313        """
314        assert 0 <= type < 256, type
315        if context is not None:
316            self.prefix, (self.lineno, self.column) = context
317        self.type = type
318        self.value = value
319        if prefix is not None:
320            self.prefix = prefix
321
322    def __repr__(self):
323        """Returns a canonical string representation."""
324        return "%s(%r, %r)" % (self.__class__.__name__,
325                               self.type,
326                               self.value)
327
328    def __str__(self):
329        """Returns a pretty string representation.
330
331        This reproduces the input source exactly.
332        """
333        return self.prefix + str(self.value)
334
335    def _eq(self, other):
336        """Compares two nodes for equality."""
337        return (self.type, self.value) == (other.type, other.value)
338
339    def clone(self):
340        """Returns a cloned (deep) copy of self."""
341        return Leaf(self.type, self.value,
342                    (self.prefix, (self.lineno, self.column)))
343
344    def post_order(self):
345        """Returns a post-order iterator for the tree."""
346        yield self
347
348    def pre_order(self):
349        """Returns a pre-order iterator for the tree."""
350        yield self
351
352    def set_prefix(self, prefix):
353        """Sets the prefix for the node."""
354        self.changed()
355        self.prefix = prefix
356
357    def get_prefix(self):
358        """Returns the prefix for the node."""
359        return self.prefix
360
361
362def convert(gr, raw_node):
363    """Converts raw node information to a Node or Leaf instance.
364
365    This is passed to the parser driver which calls it whenever a
366    reduction of a grammar rule produces a new complete node, so that
367    the tree is build strictly bottom-up.
368    """
369    type, value, context, children = raw_node
370    if children or type in gr.number2symbol:
371        # If there's exactly one child, return that child instead of
372        # creating a new node.
373        if len(children) == 1:
374            return children[0]
375        return Node(type, children, context=context)
376    else:
377        return Leaf(type, value, context=context)
378
379
380class BasePattern(object):
381
382    """A pattern is a tree matching pattern.
383
384    It looks for a specific node type (token or symbol), and
385    optionally for a specific content.
386
387    This is an abstract base class.  There are three concrete
388    subclasses:
389
390    - LeafPattern matches a single leaf node;
391    - NodePattern matches a single node (usually non-leaf);
392    - WildcardPattern matches a sequence of nodes of variable length.
393    """
394
395    # Defaults for instance variables
396    type = None     # Node type (token if < 256, symbol if >= 256)
397    content = None  # Optional content matching pattern
398    name = None     # Optional name used to store match in results dict
399
400    def __new__(cls, *args, **kwds):
401        """Constructor that prevents BasePattern from being instantiated."""
402        assert cls is not BasePattern, "Cannot instantiate BasePattern"
403        return object.__new__(cls)
404
405    def __repr__(self):
406        args = [type_repr(self.type), self.content, self.name]
407        while args and args[-1] is None:
408            del args[-1]
409        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
410
411    def optimize(self):
412        """A subclass can define this as a hook for optimizations.
413
414        Returns either self or another node with the same effect.
415        """
416        return self
417
418    def match(self, node, results=None):
419        """Does this pattern exactly match a node?
420
421        Returns True if it matches, False if not.
422
423        If results is not None, it must be a dict which will be
424        updated with the nodes matching named subpatterns.
425
426        Default implementation for non-wildcard patterns.
427        """
428        if self.type is not None and node.type != self.type:
429            return False
430        if self.content is not None:
431            r = None
432            if results is not None:
433                r = {}
434            if not self._submatch(node, r):
435                return False
436            if r:
437                results.update(r)
438        if results is not None and self.name:
439            results[self.name] = node
440        return True
441
442    def match_seq(self, nodes, results=None):
443        """Does this pattern exactly match a sequence of nodes?
444
445        Default implementation for non-wildcard patterns.
446        """
447        if len(nodes) != 1:
448            return False
449        return self.match(nodes[0], results)
450
451    def generate_matches(self, nodes):
452        """Generator yielding all matches for this pattern.
453
454        Default implementation for non-wildcard patterns.
455        """
456        r = {}
457        if nodes and self.match(nodes[0], r):
458            yield 1, r
459
460
461class LeafPattern(BasePattern):
462
463    def __init__(self, type=None, content=None, name=None):
464        """Initializer.  Takes optional type, content, and name.
465
466        The type, if given must be a token type (< 256).  If not given,
467        this matches any *leaf* node; the content may still be required.
468
469        The content, if given, must be a string.
470
471        If a name is given, the matching node is stored in the results
472        dict under that key.
473        """
474        if type is not None:
475            assert 0 <= type < 256, type
476        if content is not None:
477            assert isinstance(content, basestring), repr(content)
478        self.type = type
479        self.content = content
480        self.name = name
481
482    def match(self, node, results=None):
483        """Override match() to insist on a leaf node."""
484        if not isinstance(node, Leaf):
485            return False
486        return BasePattern.match(self, node, results)
487
488    def _submatch(self, node, results=None):
489        """Match the pattern's content to the node's children.
490
491        This assumes the node type matches and self.content is not None.
492
493        Returns True if it matches, False if not.
494
495        If results is not None, it must be a dict which will be
496        updated with the nodes matching named subpatterns.
497
498        When returning False, the results dict may still be updated.
499        """
500        return self.content == node.value
501
502
503class NodePattern(BasePattern):
504
505    wildcards = False
506
507    def __init__(self, type=None, content=None, name=None):
508        """Initializer.  Takes optional type, content, and name.
509
510        The type, if given, must be a symbol type (>= 256).  If the
511        type is None this matches *any* single node (leaf or not),
512        except if content is not None, in which it only matches
513        non-leaf nodes that also match the content pattern.
514
515        The content, if not None, must be a sequence of Patterns that
516        must match the node's children exactly.  If the content is
517        given, the type must not be None.
518
519        If a name is given, the matching node is stored in the results
520        dict under that key.
521        """
522        if type is not None:
523            assert type >= 256, type
524        if content is not None:
525            assert not isinstance(content, basestring), repr(content)
526            content = list(content)
527            for i, item in enumerate(content):
528                assert isinstance(item, BasePattern), (i, item)
529                if isinstance(item, WildcardPattern):
530                    self.wildcards = True
531        self.type = type
532        self.content = content
533        self.name = name
534
535    def _submatch(self, node, results=None):
536        """Match the pattern's content to the node's children.
537
538        This assumes the node type matches and self.content is not None.
539
540        Returns True if it matches, False if not.
541
542        If results is not None, it must be a dict which will be
543        updated with the nodes matching named subpatterns.
544
545        When returning False, the results dict may still be updated.
546        """
547        if self.wildcards:
548            for c, r in generate_matches(self.content, node.children):
549                if c == len(node.children):
550                    if results is not None:
551                        results.update(r)
552                    return True
553            return False
554        if len(self.content) != len(node.children):
555            return False
556        for subpattern, child in zip(self.content, node.children):
557            if not subpattern.match(child, results):
558                return False
559        return True
560
561
562class WildcardPattern(BasePattern):
563
564    """A wildcard pattern can match zero or more nodes.
565
566    This has all the flexibility needed to implement patterns like:
567
568    .*      .+      .?      .{m,n}
569    (a b c | d e | f)
570    (...)*  (...)+  (...)?  (...){m,n}
571
572    except it always uses non-greedy matching.
573    """
574
575    def __init__(self, content=None, min=0, max=HUGE, name=None):
576        """Initializer.
577
578        Args:
579            content: optional sequence of subsequences of patterns;
580                     if absent, matches one node;
581                     if present, each subsequence is an alternative [*]
582            min: optinal minumum number of times to match, default 0
583            max: optional maximum number of times tro match, default HUGE
584            name: optional name assigned to this match
585
586        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
587            equivalent to (a b c | d e | f g h); if content is None,
588            this is equivalent to '.' in regular expression terms.
589            The min and max parameters work as follows:
590                min=0, max=maxint: .*
591                min=1, max=maxint: .+
592                min=0, max=1: .?
593                min=1, max=1: .
594            If content is not None, replace the dot with the parenthesized
595            list of alternatives, e.g. (a b c | d e | f g h)*
596        """
597        assert 0 <= min <= max <= HUGE, (min, max)
598        if content is not None:
599            content = tuple(map(tuple, content))  # Protect against alterations
600            # Check sanity of alternatives
601            assert len(content), repr(content)  # Can't have zero alternatives
602            for alt in content:
603                assert len(alt), repr(alt) # Can have empty alternatives
604        self.content = content
605        self.min = min
606        self.max = max
607        self.name = name
608
609    def optimize(self):
610        """Optimize certain stacked wildcard patterns."""
611        subpattern = None
612        if (self.content is not None and
613            len(self.content) == 1 and len(self.content[0]) == 1):
614            subpattern = self.content[0][0]
615        if self.min == 1 and self.max == 1:
616            if self.content is None:
617                return NodePattern(name=self.name)
618            if subpattern is not None and  self.name == subpattern.name:
619                return subpattern.optimize()
620        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
621            subpattern.min <= 1 and self.name == subpattern.name):
622            return WildcardPattern(subpattern.content,
623                                   self.min*subpattern.min,
624                                   self.max*subpattern.max,
625                                   subpattern.name)
626        return self
627
628    def match(self, node, results=None):
629        """Does this pattern exactly match a node?"""
630        return self.match_seq([node], results)
631
632    def match_seq(self, nodes, results=None):
633        """Does this pattern exactly match a sequence of nodes?"""
634        for c, r in self.generate_matches(nodes):
635            if c == len(nodes):
636                if results is not None:
637                    results.update(r)
638                    if self.name:
639                        results[self.name] = list(nodes)
640                return True
641        return False
642
643    def generate_matches(self, nodes):
644        """Generator yielding matches for a sequence of nodes.
645
646        Args:
647            nodes: sequence of nodes
648
649        Yields:
650            (count, results) tuples where:
651            count: the match comprises nodes[:count];
652            results: dict containing named submatches.
653        """
654        if self.content is None:
655            # Shortcut for special case (see __init__.__doc__)
656            for count in xrange(self.min, 1 + min(len(nodes), self.max)):
657                r = {}
658                if self.name:
659                    r[self.name] = nodes[:count]
660                yield count, r
661        elif self.name == "bare_name":
662            yield self._bare_name_matches(nodes)
663        else:
664            # The reason for this is that hitting the recursion limit usually
665            # results in some ugly messages about how RuntimeErrors are being
666            # ignored.
667            save_stderr = sys.stderr
668            sys.stderr = StringIO()
669            try:
670                for count, r in self._recursive_matches(nodes, 0):
671                    if self.name:
672                        r[self.name] = nodes[:count]
673                    yield count, r
674            except RuntimeError:
675                # We fall back to the iterative pattern matching scheme if the recursive
676                # scheme hits the recursion limit.
677                for count, r in self._iterative_matches(nodes):
678                    if self.name:
679                        r[self.name] = nodes[:count]
680                    yield count, r
681            finally:
682                sys.stderr = save_stderr
683
684    def _iterative_matches(self, nodes):
685        """Helper to iteratively yield the matches."""
686        nodelen = len(nodes)
687        if 0 >= self.min:
688            yield 0, {}
689
690        results = []
691        # generate matches that use just one alt from self.content
692        for alt in self.content:
693            for c, r in generate_matches(alt, nodes):
694                yield c, r
695                results.append((c, r))
696
697        # for each match, iterate down the nodes
698        while results:
699            new_results = []
700            for c0, r0 in results:
701                # stop if the entire set of nodes has been matched
702                if c0 < nodelen and c0 <= self.max:
703                    for alt in self.content:
704                        for c1, r1 in generate_matches(alt, nodes[c0:]):
705                            if c1 > 0:
706                                r = {}
707                                r.update(r0)
708                                r.update(r1)
709                                yield c0 + c1, r
710                                new_results.append((c0 + c1, r))
711            results = new_results
712
713    def _bare_name_matches(self, nodes):
714        """Special optimized matcher for bare_name."""
715        count = 0
716        r = {}
717        done = False
718        max = len(nodes)
719        while not done and count < max:
720            done = True
721            for leaf in self.content:
722                if leaf[0].match(nodes[count], r):
723                    count += 1
724                    done = False
725                    break
726        r[self.name] = nodes[:count]
727        return count, r
728
729    def _recursive_matches(self, nodes, count):
730        """Helper to recursively yield the matches."""
731        assert self.content is not None
732        if count >= self.min:
733            yield 0, {}
734        if count < self.max:
735            for alt in self.content:
736                for c0, r0 in generate_matches(alt, nodes):
737                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
738                        r = {}
739                        r.update(r0)
740                        r.update(r1)
741                        yield c0 + c1, r
742
743
744class NegatedPattern(BasePattern):
745
746    def __init__(self, content=None):
747        """Initializer.
748
749        The argument is either a pattern or None.  If it is None, this
750        only matches an empty sequence (effectively '$' in regex
751        lingo).  If it is not None, this matches whenever the argument
752        pattern doesn't have any matches.
753        """
754        if content is not None:
755            assert isinstance(content, BasePattern), repr(content)
756        self.content = content
757
758    def match(self, node):
759        # We never match a node in its entirety
760        return False
761
762    def match_seq(self, nodes):
763        # We only match an empty sequence of nodes in its entirety
764        return len(nodes) == 0
765
766    def generate_matches(self, nodes):
767        if self.content is None:
768            # Return a match if there is an empty sequence
769            if len(nodes) == 0:
770                yield 0, {}
771        else:
772            # Return a match if the argument pattern has no matches
773            for c, r in self.content.generate_matches(nodes):
774                return
775            yield 0, {}
776
777
778def generate_matches(patterns, nodes):
779    """Generator yielding matches for a sequence of patterns and nodes.
780
781    Args:
782        patterns: a sequence of patterns
783        nodes: a sequence of nodes
784
785    Yields:
786        (count, results) tuples where:
787        count: the entire sequence of patterns matches nodes[:count];
788        results: dict containing named submatches.
789        """
790    if not patterns:
791        yield 0, {}
792    else:
793        p, rest = patterns[0], patterns[1:]
794        for c0, r0 in p.generate_matches(nodes):
795            if not rest:
796                yield c0, r0
797            else:
798                for c1, r1 in generate_matches(rest, nodes[c0:]):
799                    r = {}
800                    r.update(r0)
801                    r.update(r1)
802                    yield c0 + c1, r