/lib/matplotlib/pyparsing_py3.py
Python | 3682 lines | 3622 code | 13 blank | 47 comment | 41 complexity | 46a6f273ce4d626a176d7e1c2699c6aa MD5 | raw file
Possible License(s): MIT, GPL-3.0, BSD-3-Clause
Large files files are truncated, but you can click here to view the full file
- # module pyparsing.py
- #
- # Copyright (c) 2003-2010 Paul T. McGuire
- #
- # Permission is hereby granted, free of charge, to any person obtaining
- # a copy of this software and associated documentation files (the
- # "Software"), to deal in the Software without restriction, including
- # without limitation the rights to use, copy, modify, merge, publish,
- # distribute, sublicense, and/or sell copies of the Software, and to
- # permit persons to whom the Software is furnished to do so, subject to
- # the following conditions:
- #
- # The above copyright notice and this permission notice shall be
- # included in all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #
- #from __future__ import generators
- __doc__ = \
- """
- pyparsing module - Classes and methods to define and execute parsing grammars
- The pyparsing module is an alternative approach to creating and executing simple grammars,
- vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
- don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
- provides a library of classes that you use to construct the grammar directly in Python.
- Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
- from pyparsing import Word, alphas
- # define grammar of a greeting
- greet = Word( alphas ) + "," + Word( alphas ) + "!"
- hello = "Hello, World!"
- print hello, "->", greet.parseString( hello )
- The program outputs the following::
- Hello, World! -> ['Hello', ',', 'World', '!']
- The Python representation of the grammar is quite readable, owing to the self-explanatory
- class names, and the use of '+', '|' and '^' operators.
- The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
- object with named attributes.
- The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
- - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
- - quoted strings
- - embedded comments
- """
- __version__ = "1.5.5"
- __versionTime__ = "12 Aug 2010 03:56"
- __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
- import string
- from weakref import ref as wkref
- import copy
- import sys
- import warnings
- import re
- import sre_constants
- import collections
- #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
- __all__ = [
- 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
- 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
- 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
- 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
- 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
- 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
- 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
- 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
- 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
- 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
- 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
- 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
- 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
- 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
- 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
- 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
- 'indentedBlock', 'originalTextFor',
- ]
- """
- Detect if we are running version 3.X and make appropriate changes
- Robert A. Clark
- """
- _PY3K = sys.version_info[0] > 2
- if _PY3K:
- _MAX_INT = sys.maxsize
- basestring = str
- unichr = chr
- _ustr = str
- alphas = string.ascii_lowercase + string.ascii_uppercase
- else:
- _MAX_INT = sys.maxint
- range = xrange
- set = lambda s : dict( [(c,0) for c in s] )
- alphas = string.lowercase + string.uppercase
- def _ustr(obj):
- """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
- str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
- then < returns the unicode object | encodes it with the default encoding | ... >.
- """
- if isinstance(obj,unicode):
- return obj
- try:
- # If this works, then _ustr(obj) has the same behaviour as str(obj), so
- # it won't break any existing code.
- return str(obj)
- except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
- # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
- singleArgBuiltins = []
- import builtins
- for fname in "sum len enumerate sorted reversed list tuple set any all".split():
- try:
- singleArgBuiltins.append(getattr(builtins,fname))
- except AttributeError:
- continue
- def _xml_escape(data):
- """Escape &, <, >, ", ', etc. in a string of data."""
- # ampersand must be replaced first
- for from_,to_ in zip('&><"\'', "amp gt lt quot apos".split()):
- data = data.replace(from_, '&'+to_+';')
- return data
- class _Constants(object):
- pass
- nums = string.digits
- hexnums = nums + "ABCDEFabcdef"
- alphanums = alphas + nums
- _bslash = chr(92)
- printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
- class ParseBaseException(Exception):
- """base exception class for all parsing runtime exceptions"""
- # Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
- def __init__( self, pstr, loc=0, msg=None, elem=None ):
- self.loc = loc
- if msg is None:
- self.msg = pstr
- self.pstr = ""
- else:
- self.msg = msg
- self.pstr = pstr
- self.parserElement = elem
- def __getattr__( self, aname ):
- """supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
- """
- if( aname == "lineno" ):
- return lineno( self.loc, self.pstr )
- elif( aname in ("col", "column") ):
- return col( self.loc, self.pstr )
- elif( aname == "line" ):
- return line( self.loc, self.pstr )
- else:
- raise AttributeError(aname)
- def __str__( self ):
- return "%s (at char %d), (line:%d, col:%d)" % \
- ( self.msg, self.loc, self.lineno, self.column )
- def __repr__( self ):
- return _ustr(self)
- def markInputline( self, markerString = ">!<" ):
- """Extracts the exception line from the input string, and marks
- the location of the exception with a special symbol.
- """
- line_str = self.line
- line_column = self.column - 1
- if markerString:
- line_str = "".join( [line_str[:line_column],
- markerString, line_str[line_column:]])
- return line_str.strip()
- def __dir__(self):
- return "loc msg pstr parserElement lineno col line " \
- "markInputLine __str__ __repr__".split()
- class ParseException(ParseBaseException):
- """exception thrown when parse expressions don't match class;
- supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
- """
- pass
- class ParseFatalException(ParseBaseException):
- """user-throwable exception thrown when inconsistent parse content
- is found; stops all parsing immediately"""
- pass
- class ParseSyntaxException(ParseFatalException):
- """just like C{ParseFatalException}, but thrown internally when an
- C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because
- an unbacktrackable syntax error has been found"""
- def __init__(self, pe):
- super(ParseSyntaxException, self).__init__(
- pe.pstr, pe.loc, pe.msg, pe.parserElement)
- #~ class ReparseException(ParseBaseException):
- #~ """Experimental class - parse actions can raise this exception to cause
- #~ pyparsing to reparse the input string:
- #~ - with a modified input string, and/or
- #~ - with a modified start location
- #~ Set the values of the ReparseException in the constructor, and raise the
- #~ exception in a parse action to cause pyparsing to use the new string/location.
- #~ Setting the values as None causes no change to be made.
- #~ """
- #~ def __init_( self, newstring, restartLoc ):
- #~ self.newParseText = newstring
- #~ self.reparseLoc = restartLoc
- class RecursiveGrammarException(Exception):
- """exception thrown by C{validate()} if the grammar could be improperly recursive"""
- def __init__( self, parseElementList ):
- self.parseElementTrace = parseElementList
- def __str__( self ):
- return "RecursiveGrammarException: %s" % self.parseElementTrace
- class _ParseResultsWithOffset(object):
- def __init__(self,p1,p2):
- self.tup = (p1,p2)
- def __getitem__(self,i):
- return self.tup[i]
- def __repr__(self):
- return repr(self.tup)
- def setOffset(self,i):
- self.tup = (self.tup[0],i)
- class ParseResults(object):
- """Structured parse results, to provide multiple means of access to the parsed data:
- - as a list (C{len(results)})
- - by list index (C{results[0], results[1]}, etc.)
- - by attribute (C{results.<resultsName>})
- """
- #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
- def __new__(cls, toklist, name=None, asList=True, modal=True ):
- if isinstance(toklist, cls):
- return toklist
- retobj = object.__new__(cls)
- retobj.__doinit = True
- return retobj
- # Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
- def __init__( self, toklist, name=None, asList=True, modal=True ):
- if self.__doinit:
- self.__doinit = False
- self.__name = None
- self.__parent = None
- self.__accumNames = {}
- if isinstance(toklist, list):
- self.__toklist = toklist[:]
- else:
- self.__toklist = [toklist]
- self.__tokdict = dict()
- if name is not None and name:
- if not modal:
- self.__accumNames[name] = 0
- if isinstance(name,int):
- name = _ustr(name) # will always return a str, but use _ustr for consistency
- self.__name = name
- if not toklist in (None,'',[]):
- if isinstance(toklist,basestring):
- toklist = [ toklist ]
- if asList:
- if isinstance(toklist,ParseResults):
- self[name] = _ParseResultsWithOffset(toklist.copy(),0)
- else:
- self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
- self[name].__name = name
- else:
- try:
- self[name] = toklist[0]
- except (KeyError,TypeError,IndexError):
- self[name] = toklist
- def __getitem__( self, i ):
- if isinstance( i, (int,slice) ):
- return self.__toklist[i]
- else:
- if i not in self.__accumNames:
- return self.__tokdict[i][-1][0]
- else:
- return ParseResults([ v[0] for v in self.__tokdict[i] ])
- def __setitem__( self, k, v ):
- if isinstance(v,_ParseResultsWithOffset):
- self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
- sub = v[0]
- elif isinstance(k,int):
- self.__toklist[k] = v
- sub = v
- else:
- self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
- sub = v
- if isinstance(sub,ParseResults):
- sub.__parent = wkref(self)
- def __delitem__( self, i ):
- if isinstance(i,(int,slice)):
- mylen = len( self.__toklist )
- del self.__toklist[i]
- # convert int to slice
- if isinstance(i, int):
- if i < 0:
- i += mylen
- i = slice(i, i+1)
- # get removed indices
- removed = list(range(*i.indices(mylen)))
- removed.reverse()
- # fixup indices in token dictionary
- for name in self.__tokdict:
- occurrences = self.__tokdict[name]
- for j in removed:
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
- else:
- del self.__tokdict[i]
- def __contains__( self, k ):
- return k in self.__tokdict
- def __len__( self ): return len( self.__toklist )
- def __bool__(self): return len( self.__toklist ) > 0
- __nonzero__ = __bool__
- def __iter__( self ): return iter( self.__toklist )
- def __reversed__( self ): return iter( reversed(self.__toklist) )
- def keys( self ):
- """Returns all named result keys."""
- return self.__tokdict.keys()
- def pop( self, index=-1 ):
- """Removes and returns item at specified index (default=last).
- Will work with either numeric indices or dict-key indicies."""
- ret = self[index]
- del self[index]
- return ret
- def get(self, key, defaultValue=None):
- """Returns named result matching the given key, or if there is no
- such name, then returns the given C{defaultValue} or C{None} if no
- C{defaultValue} is specified."""
- if key in self:
- return self[key]
- else:
- return defaultValue
- def insert( self, index, insStr ):
- """Inserts new element at location index in the list of parsed tokens."""
- self.__toklist.insert(index, insStr)
- # fixup indices in token dictionary
- for name in self.__tokdict:
- occurrences = self.__tokdict[name]
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
- def items( self ):
- """Returns all named result keys and values as a list of tuples."""
- return [(k,self[k]) for k in self.__tokdict]
- def values( self ):
- """Returns all named result values."""
- return [ v[-1][0] for v in self.__tokdict.values() ]
- def __getattr__( self, name ):
- if True: #name not in self.__slots__:
- if name in self.__tokdict:
- if name not in self.__accumNames:
- return self.__tokdict[name][-1][0]
- else:
- return ParseResults([ v[0] for v in self.__tokdict[name] ])
- else:
- return ""
- return None
- def __add__( self, other ):
- ret = self.copy()
- ret += other
- return ret
- def __iadd__( self, other ):
- if other.__tokdict:
- offset = len(self.__toklist)
- addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
- otheritems = other.__tokdict.items()
- otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
- for (k,vlist) in otheritems for v in vlist]
- for k,v in otherdictitems:
- self[k] = v
- if isinstance(v[0],ParseResults):
- v[0].__parent = wkref(self)
- self.__toklist += other.__toklist
- self.__accumNames.update( other.__accumNames )
- return self
- def __radd__(self, other):
- if isinstance(other,int) and other == 0:
- return self.copy()
- def __repr__( self ):
- return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
- def __str__( self ):
- out = "["
- sep = ""
- for i in self.__toklist:
- if isinstance(i, ParseResults):
- out += sep + _ustr(i)
- else:
- out += sep + repr(i)
- sep = ", "
- out += "]"
- return out
- def _asStringList( self, sep='' ):
- out = []
- for item in self.__toklist:
- if out and sep:
- out.append(sep)
- if isinstance( item, ParseResults ):
- out += item._asStringList()
- else:
- out.append( _ustr(item) )
- return out
- def asList( self ):
- """Returns the parse results as a nested list of matching tokens, all converted to strings."""
- out = []
- for res in self.__toklist:
- if isinstance(res,ParseResults):
- out.append( res.asList() )
- else:
- out.append( res )
- return out
- def asDict( self ):
- """Returns the named parse results as dictionary."""
- return dict( self.items() )
- def copy( self ):
- """Returns a new copy of a C{ParseResults} object."""
- ret = ParseResults( self.__toklist )
- ret.__tokdict = self.__tokdict.copy()
- ret.__parent = self.__parent
- ret.__accumNames.update( self.__accumNames )
- ret.__name = self.__name
- return ret
- def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
- """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
- nl = "\n"
- out = []
- namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
- for v in vlist ] )
- nextLevelIndent = indent + " "
- # collapse out indents if formatting is not desired
- if not formatted:
- indent = ""
- nextLevelIndent = ""
- nl = ""
- selfTag = None
- if doctag is not None:
- selfTag = doctag
- else:
- if self.__name:
- selfTag = self.__name
- if not selfTag:
- if namedItemsOnly:
- return ""
- else:
- selfTag = "ITEM"
- out += [ nl, indent, "<", selfTag, ">" ]
- worklist = self.__toklist
- for i,res in enumerate(worklist):
- if isinstance(res,ParseResults):
- if i in namedItems:
- out += [ res.asXML(namedItems[i],
- namedItemsOnly and doctag is None,
- nextLevelIndent,
- formatted)]
- else:
- out += [ res.asXML(None,
- namedItemsOnly and doctag is None,
- nextLevelIndent,
- formatted)]
- else:
- # individual token, see if there is a name for it
- resTag = None
- if i in namedItems:
- resTag = namedItems[i]
- if not resTag:
- if namedItemsOnly:
- continue
- else:
- resTag = "ITEM"
- xmlBodyText = _xml_escape(_ustr(res))
- out += [ nl, nextLevelIndent, "<", resTag, ">",
- xmlBodyText,
- "</", resTag, ">" ]
- out += [ nl, indent, "</", selfTag, ">" ]
- return "".join(out)
- def __lookup(self,sub):
- for k,vlist in self.__tokdict.items():
- for v,loc in vlist:
- if sub is v:
- return k
- return None
- def getName(self):
- """Returns the results name for this token expression."""
- if self.__name:
- return self.__name
- elif self.__parent:
- par = self.__parent()
- if par:
- return par.__lookup(self)
- else:
- return None
- elif (len(self) == 1 and
- len(self.__tokdict) == 1 and
- self.__tokdict.values()[0][0][1] in (0,-1)):
- return self.__tokdict.keys()[0]
- else:
- return None
- def dump(self,indent='',depth=0):
- """Diagnostic method for listing out the contents of a C{ParseResults}.
- Accepts an optional C{indent} argument so that this string can be embedded
- in a nested display of other data."""
- out = []
- out.append( indent+_ustr(self.asList()) )
- keys = self.items()
- keys.sort()
- for k,v in keys:
- if out:
- out.append('\n')
- out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
- if isinstance(v,ParseResults):
- if v.keys():
- out.append( v.dump(indent,depth+1) )
- else:
- out.append(_ustr(v))
- else:
- out.append(_ustr(v))
- return "".join(out)
- # add support for pickle protocol
- def __getstate__(self):
- return ( self.__toklist,
- ( self.__tokdict.copy(),
- self.__parent is not None and self.__parent() or None,
- self.__accumNames,
- self.__name ) )
- def __setstate__(self,state):
- self.__toklist = state[0]
- self.__tokdict, \
- par, \
- inAccumNames, \
- self.__name = state[1]
- self.__accumNames = {}
- self.__accumNames.update(inAccumNames)
- if par is not None:
- self.__parent = wkref(par)
- else:
- self.__parent = None
- def __dir__(self):
- return dir(super(ParseResults,self)) + self.keys()
- collections.MutableMapping.register(ParseResults)
- def col (loc,strg):
- """Returns current column within a string, counting newlines as line separators.
- The first column is number 1.
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
- on parsing strings containing <TAB>s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
- return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
- def lineno(loc,strg):
- """Returns current line number within a string, counting newlines as line separators.
- The first line is number 1.
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
- on parsing strings containing <TAB>s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
- return strg.count("\n",0,loc) + 1
- def line( loc, strg ):
- """Returns the line of text containing loc within a string, counting newlines as line separators.
- """
- lastCR = strg.rfind("\n", 0, loc)
- nextCR = strg.find("\n", loc)
- if nextCR >= 0:
- return strg[lastCR+1:nextCR]
- else:
- return strg[lastCR+1:]
- def _defaultStartDebugAction( instring, loc, expr ):
- print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
- def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
- print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
- def _defaultExceptionDebugAction( instring, loc, expr, exc ):
- print ("Exception raised:" + _ustr(exc))
- def nullDebugAction(*args):
- """'Do-nothing' debug action, to suppress debugging output during parsing."""
- pass
- class ParserElement(object):
- """Abstract base level parser element class."""
- DEFAULT_WHITE_CHARS = " \n\t\r"
- verbose_stacktrace = False
- def setDefaultWhitespaceChars( chars ):
- """Overrides the default whitespace chars
- """
- ParserElement.DEFAULT_WHITE_CHARS = chars
- setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
- def __init__( self, savelist=False ):
- self.parseAction = list()
- self.failAction = None
- #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
- self.strRepr = None
- self.resultsName = None
- self.saveAsList = savelist
- self.skipWhitespace = True
- self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
- self.copyDefaultWhiteChars = True
- self.mayReturnEmpty = False # used when checking for left-recursion
- self.keepTabs = False
- self.ignoreExprs = list()
- self.debug = False
- self.streamlined = False
- self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
- self.errmsg = ""
- self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
- self.debugActions = ( None, None, None ) #custom debug actions
- self.re = None
- self.callPreparse = True # used to avoid redundant calls to preParse
- self.callDuringTry = False
- def copy( self ):
- """Make a copy of this C{ParserElement}. Useful for defining different parse actions
- for the same parsing pattern, using copies of the original parse element."""
- cpy = copy.copy( self )
- cpy.parseAction = self.parseAction[:]
- cpy.ignoreExprs = self.ignoreExprs[:]
- if self.copyDefaultWhiteChars:
- cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
- return cpy
- def setName( self, name ):
- """Define name for this expression, for use in debugging."""
- self.name = name
- self.errmsg = "Expected " + self.name
- if hasattr(self,"exception"):
- self.exception.msg = self.errmsg
- return self
- def setResultsName( self, name, listAllMatches=False ):
- """Define name for referencing matching tokens as a nested attribute
- of the returned parse results.
- NOTE: this returns a *copy* of the original C{ParserElement} object;
- this is so that the client can define a basic element, such as an
- integer, and reference it in multiple places with different names.
- You can also set results names using the abbreviated syntax,
- C{expr("name")} in place of C{expr.setResultsName("name")} -
- see L{I{__call__}<__call__>}.
- """
- newself = self.copy()
- newself.resultsName = name
- newself.modalResults = not listAllMatches
- return newself
- def setBreak(self,breakFlag = True):
- """Method to invoke the Python pdb debugger when this element is
- about to be parsed. Set C{breakFlag} to True to enable, False to
- disable.
- """
- if breakFlag:
- _parseMethod = self._parse
- def breaker(instring, loc, doActions=True, callPreParse=True):
- import pdb
- pdb.set_trace()
- return _parseMethod( instring, loc, doActions, callPreParse )
- breaker._originalParseMethod = _parseMethod
- self._parse = breaker
- else:
- if hasattr(self._parse,"_originalParseMethod"):
- self._parse = self._parse._originalParseMethod
- return self
- def _normalizeParseActionArgs( f ):
- """Internal method used to decorate parse actions that take fewer than 3 arguments,
- so that all parse actions can be called as C{f(s,l,t)}."""
- STAR_ARGS = 4
- # special handling for single-argument builtins
- if (f in singleArgBuiltins):
- numargs = 1
- else:
- try:
- restore = None
- if isinstance(f,type):
- restore = f
- f = f.__init__
- if not _PY3K:
- codeObj = f.func_code
- else:
- codeObj = f.code
- if codeObj.co_flags & STAR_ARGS:
- return f
- numargs = codeObj.co_argcount
- if not _PY3K:
- if hasattr(f,"im_self"):
- numargs -= 1
- else:
- if hasattr(f,"__self__"):
- numargs -= 1
- if restore:
- f = restore
- except AttributeError:
- try:
- if not _PY3K:
- call_im_func_code = f.__call__.im_func.func_code
- else:
- call_im_func_code = f.__code__
- # not a function, must be a callable object, get info from the
- # im_func binding of its bound __call__ method
- if call_im_func_code.co_flags & STAR_ARGS:
- return f
- numargs = call_im_func_code.co_argcount
- if not _PY3K:
- if hasattr(f.__call__,"im_self"):
- numargs -= 1
- else:
- if hasattr(f.__call__,"__self__"):
- numargs -= 0
- except AttributeError:
- if not _PY3K:
- call_func_code = f.__call__.func_code
- else:
- call_func_code = f.__call__.__code__
- # not a bound method, get info directly from __call__ method
- if call_func_code.co_flags & STAR_ARGS:
- return f
- numargs = call_func_code.co_argcount
- if not _PY3K:
- if hasattr(f.__call__,"im_self"):
- numargs -= 1
- else:
- if hasattr(f.__call__,"__self__"):
- numargs -= 1
- # print ("adding function %s with %d args" % (f.func_name,numargs))
- if numargs == 3:
- return f
- else:
- if numargs > 3:
- def tmp(s,l,t):
- return f(s,l,t)
- elif numargs == 2:
- def tmp(s,l,t):
- return f(l,t)
- elif numargs == 1:
- def tmp(s,l,t):
- return f(t)
- else: #~ numargs == 0:
- def tmp(s,l,t):
- return f()
- try:
- tmp.__name__ = f.__name__
- except (AttributeError,TypeError):
- # no need for special handling if attribute doesnt exist
- pass
- try:
- tmp.__doc__ = f.__doc__
- except (AttributeError,TypeError):
- # no need for special handling if attribute doesnt exist
- pass
- try:
- tmp.__dict__.update(f.__dict__)
- except (AttributeError,TypeError):
- # no need for special handling if attribute doesnt exist
- pass
- return tmp
- _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
- def setParseAction( self, *fns, **kwargs ):
- """Define action to perform when successfully matching parse element definition.
- Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
- C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- - s = the original string being parsed (see note below)
- - loc = the location of the matching substring
- - toks = a list of the matched tokens, packaged as a ParseResults object
- If the functions in fns modify the tokens, they can return them as the return
- value from fn, and the modified list of tokens will replace the original.
- Otherwise, fn does not need to return any value.
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{parseString}<parseString>} for more information
- on parsing strings containing <TAB>s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
- self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
- self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
- return self
- def addParseAction( self, *fns, **kwargs ):
- """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
- self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
- self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
- return self
- def setFailAction( self, fn ):
- """Define action to perform if parsing fails at this expression.
- Fail acton fn is a callable function that takes the arguments
- C{fn(s,loc,expr,err)} where:
- - s = string being parsed
- - loc = location where expression match was attempted and failed
- - expr = the parse expression that failed
- - err = the exception thrown
- The function returns no value. It may throw C{ParseFatalException}
- if it is desired to stop parsing immediately."""
- self.failAction = fn
- return self
- def _skipIgnorables( self, instring, loc ):
- exprsFound = True
- while exprsFound:
- exprsFound = False
- for e in self.ignoreExprs:
- try:
- while 1:
- loc,dummy = e._parse( instring, loc )
- exprsFound = True
- except ParseException:
- pass
- return loc
- def preParse( self, instring, loc ):
- if self.ignoreExprs:
- loc = self._skipIgnorables( instring, loc )
- if self.skipWhitespace:
- wt = self.whiteChars
- instrlen = len(instring)
- while loc < instrlen and instring[loc] in wt:
- loc += 1
- return loc
- def parseImpl( self, instring, loc, doActions=True ):
- return loc, []
- def postParse( self, instring, loc, tokenlist ):
- return tokenlist
- #~ @profile
- def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
- debugging = ( self.debug ) #and doActions )
- if debugging or self.failAction:
- #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
- if (self.debugActions[0] ):
- self.debugActions[0]( instring, loc, self )
- if callPreParse and self.callPreparse:
- preloc = self.preParse( instring, loc )
- else:
- preloc = loc
- tokensStart = preloc
- try:
- try:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- except IndexError:
- raise ParseException( instring, len(instring), self.errmsg, self )
- except ParseBaseException as err:
- #~ print ("Exception raised:", err)
- if self.debugActions[2]:
- self.debugActions[2]( instring, tokensStart, self, err )
- if self.failAction:
- self.failAction( instring, tokensStart, self, err )
- raise
- else:
- if callPreParse and self.callPreparse:
- preloc = self.preParse( instring, loc )
- else:
- preloc = loc
- tokensStart = preloc
- if self.mayIndexError or loc >= len(instring):
- try:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- except IndexError:
- raise ParseException( instring, len(instring), self.errmsg, self )
- else:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- tokens = self.postParse( instring, loc, tokens )
- retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
- if self.parseAction and (doActions or self.callDuringTry):
- if debugging:
- try:
- for fn in self.parseAction:
- tokens = fn( instring, tokensStart, retTokens )
- if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
- modal=self.modalResults )
- except ParseBaseException as err:
- #~ print "Exception raised in user parse action:", err
- if (self.debugActions[2] ):
- self.debugActions[2]( instring, tokensStart, self, err )
- raise
- else:
- for fn in self.parseAction:
- tokens = fn( instring, tokensStart, retTokens )
- if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
- modal=self.modalResults )
- if debugging:
- #~ print ("Matched",self,"->",retTokens.asList())
- if (self.debugActions[1] ):
- self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
- return loc, retTokens
- def tryParse( self, instring, loc ):
- try:
- return self._parse( instring, loc, doActions=False )[0]
- except ParseFatalException:
- raise ParseException( instring, loc, self.errmsg, self)
- # this method gets repeatedly called during backtracking with the same arguments -
- # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
- def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
- lookup = (self,instring,loc,callPreParse,doActions)
- if lookup in ParserElement._exprArgCache:
- value = ParserElement._exprArgCache[ lookup ]
- if isinstance(value, Exception):
- raise value
- return value
- else:
- try:
- value = self._parseNoCache( instring, loc, doActions, callPreParse )
- ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
- return value
- except ParseBaseException as err:
- err.__traceback__ = None
- ParserElement._exprArgCache[ lookup ] = err
- raise
- _parse = _parseNoCache
- # argument cache for optimizing repeated calls when backtracking through recursive expressions
- _exprArgCache = {}
- def resetCache():
- ParserElement._exprArgCache.clear()
- resetCache = staticmethod(resetCache)
- _packratEnabled = False
- def enablePackrat():
- """Enables "packrat" parsing, which adds memoizing to the parsing logic.
- Repeated parse attempts at the same string location (which happens
- often in many complex grammars) can immediately return a cached value,
- instead of re-executing parsing/validating code. Memoizing is done of
- both valid results and parsing exceptions.
- This speedup may break existing programs that use parse actions that
- have side-effects. For this reason, packrat parsing is disabled when
- you first import pyparsing. To activate the packrat feature, your
- program must call the class method C{ParserElement.enablePackrat()}. If
- your program uses C{psyco} to "compile as you go", you must call
- C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
- Python will crash. For best results, call C{enablePackrat()} immediately
- after importing pyparsing.
- """
- if not ParserElement._packratEnabled:
- ParserElement._packratEnabled = True
- ParserElement._parse = ParserElement._parseCache
- enablePackrat = staticmethod(enablePackrat)
- def parseString( self, instring, parseAll=False ):
- """Execute the parse expression with the given string.
- This is the main interface to the client code, once the complete
- expression has been built.
- If you want the grammar to require that the entire input string be
- successfully parsed, then set C{parseAll} to True (equivalent to ending
- the grammar with C{StringEnd()}).
- Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
- in order to report proper column numbers in parse actions.
- If the input string contains tabs and
- the grammar uses parse actions that use the C{loc} argument to index into the
- string being parsed, you can ensure you have a consistent view of the input
- string by:
- - calling C{parseWithTabs} on your grammar before calling C{parseString}
- (see L{I{parseWithTabs}<parseWithTabs>})
- - define your parse action using the full C{(s,loc,toks)} signature, and
- reference the input string using the parse action's C{s} argument
- - explictly expand the tabs in your input string before calling
- C{parseString}
- """
- ParserElement.resetCache()
- if not self.streamlined:
- self.streamline()
- #~ self.saveAsList = True
- for e in self.ignoreExprs:
- e.streamline()
- if not self.keepTabs:
- instring = instring.expandtabs()
- try:
- loc, tokens = self._parse( instring, 0 )
- if parseAll:
- #loc = self.preParse( instring, loc )
- se = StringEnd()
- se._parse( instring, loc )
- except ParseBaseException as err:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise err
- else:
- return tokens
- def scanString( self, instring, maxMatches=_MAX_INT ):
- """Scan the input string for expression matches. Each match will return the
- matching tokens, start location, and end location. May be called with optional
- C{maxMatches} argument, to clip scanning after 'n' matches are found.
- Note that the start and end locations are reported relative to the string
- being parsed. See L{I{parseString}<parseString>} for more information on parsing
- strings with embedded tabs."""
- if not self.streamlined:
- self.streamline()
- for e in self.ignoreExprs:
- e.streamline()
- if not self.keepTabs:
- instring = _ustr(instring).expandtabs()
- instrlen = len(instring)
- loc = 0
- preparseFn = self.preParse
- parseFn = self._parse
- ParserElement.resetCache()
- matches = 0
- try:
- while loc <= instrlen and matches < maxMatches:
- try:
- preloc = preparseFn( instring, loc )
- nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
- except ParseException:
- loc = preloc+1
- else:
- if nextLoc > loc:
- matches += 1
- yield tokens, preloc, nextLoc
- loc = nextLoc
- else:
- loc = preloc+1
- except ParseBaseException as err:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise err
- def transformString( self, instring ):
- """Extension to C{scanString}, to modify matching text with modified tokens that may
- be returned from a parse action. To use C{transformString}, define a grammar and
- attach a parse action to it that modifies the returned token list.
- Invoking C{transformString()} on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
- action. C{transformString()} returns the resulting transformed string."""
- out = []
- lastE = 0
- # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
- # keep string locs straight between transformString and scanString
- self.keepTabs = True
- try:
- for t,s,e in self.scanString( instring ):
- out.append( instring[lastE:s] )
- if t:
- if isinstance(t,ParseResults):
- out += t.asList()
- elif isinstance(t,list):
- out += t
- else:
- out.append(t)
- lastE = e
- out.append(instring[lastE:])
- return "".join(map(_ustr,out))
- except ParseBaseException as err:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise err
- def searchString( self, instring, maxMatches=_MAX_INT ):
- """Another extension to C{scanString}, simplifying the access to the tokens found
- to match the given parse expression. May be called with optional
- C{maxMatches} argument, to clip searching after 'n' matches are found.
- """
- try:
- return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
- except ParseBaseException as err:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise err
- def __add__(self, other ):
- """Implementation of + operator - returns And"""
- if isinstance( other, basestring ):
- other = Literal( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return And( [ self, other ] )
- def __radd__(self, other ):
- """Implementation of + operator when left operand is not a C{ParserElement}"""
- if isinstance( other, basestring ):
- other = Literal( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other + self
- def __sub__(self, other):
- """Implementation of - operator, returns C{And} with error stop"""
- if isinstance( other, basestring ):
- other = Literal( other )
- if not isinstance( other, ParserElement ):…
Large files files are truncated, but you can click here to view the full file