/Support/bin/texparser.py
Python | 397 lines | 385 code | 7 blank | 5 comment | 2 complexity | 50f8aa1fbcbf7d8b8506ee5b63ad6519 MD5 | raw file
- import sys
- import re
- import os.path
- import os
- import tmprefs
- from struct import *
- import urllib
- def percent_escape(str):
- return re.sub('[\x80-\xff /&]', lambda x: '%%%02X' % unpack('B', x.group(0))[0], str)
- # Swapped call to percent_escape with urllib.quote. Was causing links to fail in TM2
- def make_link(file, line):
- return 'txmt://open/?url=file://' + urllib.quote(file) + '&line=' + line
- def shell_quote(string):
- return '"' + re.sub(r'([`$\\"])', r'\\\1', string) + '"'
- class TexParser(object):
- """Master Class for Parsing Tex Typsetting Streams"""
- def __init__(self, input_stream, verbose):
- super(TexParser, self).__init__()
- self.input_stream = input_stream
- self.patterns = []
- self.done = False
- self.verbose = verbose
- self.numErrs = 0
- self.numWarns = 0
- self.isFatal = False
- self.fileStack = [] #TODO: long term - can improve currentFile handling by keeping track of (xxx and )
- def getRewrappedLine(self):
- """Sometimes TeX breaks up lines with hard linebreaks. This is annoying.
- Even more annoying is that it sometime does not break line, for two distinct
- warnings. This function attempts to return a single statement."""
- statement = ""
- while True:
- line = self.input_stream.readline()
- if not line:
- if statement:
- return statement
- else:
- return ""
- statement += line.rstrip("\n")
- if len(line) != 80: # including line break
- break
- return statement+"\n"
- def parseStream(self):
- """Process the input_stream one line at a time, matching against
- each pattern in the patterns dictionary. If a pattern matches
- call the corresponding method in the dictionary. The dictionary
- is organized with patterns as the keys and methods as the values."""
- line = self.getRewrappedLine()
- while line and not self.done:
- line = line.rstrip("\n")
- foundMatch = False
- # process matching patterns until we find one
- for pat,fun in self.patterns:
- myMatch = pat.match(line)
- if myMatch:
- fun(myMatch,line)
- sys.stdout.flush()
- foundMatch = True
- break
- if self.verbose and not foundMatch:
- print line
- line = self.getRewrappedLine()
- if self.done == False:
- self.badRun()
- return self.isFatal, self.numErrs, self.numWarns
- def info(self,m,line):
- print '<p class="info">'
- print line
- print '</p>'
- def error(self,m,line):
- print '<p class="error">'
- print line
- print '</p>'
- self.numErrs += 1
- def warning(self,m,line):
- print '<p class="warning">'
- print line
- print '</p>'
- self.numWarns += 1
- def warn2(self,m,line):
- print '<p class="fmtWarning">'
- print line
- print '</p>'
- def fatal(self,m,line):
- print '<p class="error">'
- print line
- print '</p>'
- self.isFatal = True
- def badRun(self):
- """docstring for finishRun"""
- pass
- class BibTexParser(TexParser):
- """Parse and format Error Messages from bibtex"""
- def __init__(self, btex, verbose):
- super(BibTexParser, self).__init__(btex,verbose)
- self.patterns += [
- (re.compile("Warning--I didn't find a database entry") , self.warning),
- (re.compile(r'I found no \\\w+ command') , self.error),
- (re.compile(r"I couldn't open style file"), self.error),
- (re.compile(r"You're missing a field name---line (\d+)"), self.error),
- (re.compile(r'Too many commas in name \d+ of'), self.error),
- (re.compile(r'I was expecting a'),self.error),
- (re.compile('This is BibTeX') , self.info),
- (re.compile('The style') , self.info),
- (re.compile('Database') , self.info),
- (re.compile('---') , self.finishRun)
- ]
- def finishRun(self,m,line):
- self.done = True
- print '</div>'
- class BiberParser(TexParser):
- """Parse and format Error Messages from biber"""
- def __init__(self, btex, verbose):
- super(BiberParser, self).__init__(btex,verbose)
- self.patterns += [
- (re.compile('^.*WARN') , self.warning),
- (re.compile('^.*ERROR') , self.error),
- (re.compile('^.*FATAL'), self.fatal),
- (re.compile('^.*Output to (.*)$') , self.finishRun),
- ]
- def warning(self,m,line):
- """Using one print command works more reliably
- than using several lines"""
- print '<p class="warning">' + line + '</p>'
- self.numWarns += 1
- def finishRun(self,m,line):
- logFile = m.group(1)[:-3] + 'blg'
- print '<p> Complete transcript is in '
- print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') + '">' + logFile + '</a>'
- print '</p>'
- self.done = True
- print '</div>'
- class MakeGlossariesParser(TexParser):
- """Parse and format Error Messages from makeglossaries"""
- def __init__(self, btex, verbose):
- super(MakeGlossariesParser, self).__init__(btex,verbose)
- self.patterns += [
- (re.compile('^.*makeglossaries version (.*)$') , self.beginRun),
- (re.compile('^.*added glossary type \'(.*)\' \((.*)\).*$') , self.addType),
- (re.compile('^.*Markup written into file "(.*)".$') , self.finishMarkup),
- (re.compile('^.*xindy.*-L (.*) -I.*-t ".*\.(.*)" -o.*$'), self.runXindy),
- (re.compile('Cannot locate xindy module') , self.warning),
- (re.compile('ERROR'),self.error),
- (re.compile('Warning'),self.warning),
- (re.compile('^\*\*\*'),self.info),
- ]
- self.types = dict()
- def beginRun(self,m,line):
- version = m.group(1)
- print "<h2>Make Glossaries</h2>"
- print '<p class="info" >Version: <i>'+version+ "</i></p>"
- def addType(self,m,line):
- thisType = m.group(1)
- files = m.group(2)
- filesSet = files.split(',')
- for file in filesSet:
- self.types[file] = thisType
- print '<p class="info"> Add Glossary Type <strong>' + thisType +'</strong> <i>(Files: ' + files + ')</i></p>'
- def runXindy(self,m,line):
- lang = m.group(1)
- file = m.group(2)
- thisType = self.types[file]
- print '<h3>Run xindy for glossary type '+ thisType +'</h3>'
- print '<p class="info">Language: '+ lang +'</p>'
- def finishMarkup(self,m,line):
- mkFile = m.group(1)
- thisType = self.types[mkFile[-3:]]
- print '<p class="info"> Finished glossary for type <strong>'+ thisType+ '</strong>. Output is in <a href="' + make_link(os.path.join(os.getcwd(),mkFile),'1') + '">' + mkFile + '</a></p>'
- def warning(self,m,line):
- """Using one print command works more reliably
- than using several lines"""
- print '<p class="warning">' + line + '</p>'
- self.numWarns += 1
- def error(self,m,line):
- """Using one print command works more reliably
- than using several lines"""
- print '<p class="error">' + line + '</p>'
- self.numWarns += 1
- class LaTexParser(TexParser):
- """Parse Output From Latex"""
- def __init__(self, input_stream, verbose, fileName):
- super(LaTexParser, self).__init__(input_stream,verbose)
- self.suffix = fileName[fileName.rfind('.')+1:]
- self.currentFile = fileName
- self.patterns += [
- #(re.compile('^This is') , self.info),
- (re.compile('^Document Class') , self.info),
- (re.compile('.*?\((\.\/[^\)]*?\.(tex|'+self.suffix+')( |$))') , self.detectNewFile),
- (re.compile('.*\<use (.*?)\>') , self.detectInclude),
- (re.compile('^Output written') , self.info),
- (re.compile('LaTeX Warning:.*?input line (\d+)(\.|$)') , self.handleWarning),
- (re.compile('LaTeX Warning:.*') , self.warning),
- (re.compile('^([^:]*):(\d+):\s+(pdfTeX warning.*)') , self.handleFileLineWarning),
- (re.compile('.*pdfTeX warning.*') , self.warning),
- (re.compile('LaTeX Font Warning:.*') , self.warning),
- (re.compile('Overfull.*wide') , self.warn2),
- (re.compile('Underfull.*badness') , self.warn2),
- (re.compile('^([\.\/\w\x7f-\xff\- ]+(?:\.sty|\.tex|\.'+self.suffix+')):(\d+):\s+(.*)') , self.handleError),
- (re.compile('([^:]*):(\d+): LaTeX Error:(.*)') , self.handleError),
- (re.compile('([^:]*):(\d+): (Emergency stop)') , self.handleError),
- (re.compile('Runaway argument') , self.pdfLatexError),
- # We need the (.*) at the beginning of the regular expression
- # since in some edge cases cases the output about the transcript
- # might actually not start at the beginning of the line.
- (re.compile('(.*)Transcript written on (.*)\.$') , self.finishRun),
- (re.compile('^Error: pdflatex') , self.pdfLatexError),
- (re.compile('\!.*') , self.handleOldStyleErrors),
- (re.compile('^\s+==>') , self.fatal)
- ]
- self.blankLine = re.compile(r'^\s*$')
- def detectNewFile(self,m,line):
- self.currentFile = m.group(1).rstrip()
- print "<h4>Processing: " + self.currentFile + "</h4>"
- def detectInclude(self,m,line):
- print "<ul><li>Including: " + m.group(1)
- print "</li></ul>"
- def handleWarning(self,m,line):
- print '<p class="warning"><a href="' + make_link(os.path.join(os.getcwd(),self.currentFile), m.group(1)) + '">'+line+"</a></p>"
- self.numWarns += 1
- def handleFileLineWarning(self,m,line):
- """Display warning. match m should contain file, line, warning message"""
- print '<p class="warning"><a href="' + make_link(os.path.join(os.getcwd(), m.group(1)),m.group(2)) + '">' + m.group(3) + "</a></p>"
- self.numWarns += 1
- def handleError(self,m,line):
- print '<p class="error">'
- print 'Latex Error: <a href="' + make_link(os.path.join(os.getcwd(),m.group(1)),m.group(2)) + '">' + m.group(1)+":"+m.group(2) + '</a> '+m.group(3)+'</p>'
- self.numErrs += 1
- def finishRun(self,m,line):
- logFile = m.group(1).strip('"')
- print '<p> Complete transcript is in '
- print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') + '">' + logFile + '</a>'
- print '</p>'
- self.done = True
- def handleOldStyleErrors(self,m,line):
- if re.search('[Ee]rror', line):
- print '<p class="error">'
- print line
- print '</p>'
- self.numErrs += 1
- else:
- print '<p class="warning">'
- print line
- print '</p>'
- self.numWarns += 1
- def pdfLatexError(self,m,line):
- """docstring for pdfLatexError"""
- self.numErrs += 1
- print '<p class="error">'
- print line
- line = self.input_stream.readline()
- if line and re.match('^ ==> Fatal error occurred', line):
- print line.rstrip("\n")
- print '</p>'
- self.isFatal = True
- else:
- if line:
- print '<pre> '+ line.rstrip("\n") + '</pre>'
- print '</p>'
- sys.stdout.flush()
- def badRun(self):
- """docstring for finishRun"""
- print '<p class="error">A fatal error occured, log file is in '
- logFile = os.path.basename(os.getenv('TM_FILEPATH'))
- logFile = logFile.replace(self.suffix,'log')
- print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') + '">' + logFile + '</a>'
- print '</p>'
- class ParseLatexMk(TexParser):
- """docstring for ParseLatexMk"""
- def __init__(self, input_stream, verbose,filename):
- super(ParseLatexMk, self).__init__(input_stream,verbose)
- self.fileName = filename
- self.patterns += [
- (re.compile('This is (pdfTeX|latex2e|latex|XeTeX)') , self.startLatex),
- (re.compile('This is BibTeX') , self.startBibtex),
- (re.compile('^.*This is biber') , self.startBiber),
- (re.compile('^Latexmk: All targets \(.*?\) are up-to-date') , self.finishRun),
- (re.compile('This is makeindex') , self.startBibtex),
- (re.compile('^Latexmk') , self.ltxmk),
- (re.compile('Run number') , self.newRun)
- ]
- self.numRuns = 0
- def startBibtex(self,m,line):
- print '<div class="bibtex">'
- print '<h3>' + line[:-1] + '</h3>'
- bp = BibTexParser(self.input_stream,self.verbose)
- f,e,w = bp.parseStream()
- self.numErrs += e
- self.numWarns += w
- def startBiber(self,m,line):
- print '<div class="biber">'
- print '<h3>' + line + '</h3>'
- bp = BiberParser(self.input_stream,self.verbose)
- f,e,w = bp.parseStream()
- self.numErrs += e
- self.numWarns += w
- def startLatex(self,m,line):
- print '<div class="latex">'
- print '<hr>'
- print '<h3>' + line[:-1] + '</h3>'
- bp = LaTexParser(self.input_stream,self.verbose,self.fileName)
- f,e,w = bp.parseStream()
- self.numErrs += e
- self.numWarns += w
- def newRun(self,m,line):
- if self.numRuns > 0:
- print '<hr />'
- print '<p>', self.numErrs, 'Errors', self.numWarns, 'Warnings', 'in this run.', '</p>'
- self.numWarns = 0
- self.numErrs = 0
- self.numRuns += 1
- def finishRun(self,m,line):
- self.ltxmk(m,line)
- self.done = True
- def ltxmk(self,m,line):
- print '<p class="ltxmk">%s</p>'%line
- class ChkTeXParser(TexParser):
- """Parse the output from chktex"""
- def __init__(self, input_stream, verbose, filename):
- super(ChkTeXParser, self).__init__(input_stream,verbose)
- self.fileName = filename
- self.patterns += [
- (re.compile('^ChkTeX') , self.info),
- (re.compile('Warning \d+ in (.*.tex) line (\d+):(.*)') , self.handleWarning),
- (re.compile('Error \d+ in (.*.tex) line (\d+):(.*)') , self.handleError),
- ]
- self.numRuns = 0
- def handleWarning(self,m,line):
- """Display warning. match m should contain file, line, warning message"""
- print '<p class="warning">Warning: <a href="' + make_link(os.path.join(os.getcwd(), m.group(1)),m.group(2)) + '">' + m.group(1)+ ": "+m.group(2)+":</a>"+m.group(3)+"</p>"
- warnDetail = self.input_stream.readline()
- if len(warnDetail) > 2:
- print '<pre>',warnDetail[:-1]
- print self.input_stream.readline()[:-1], '</pre>'
- self.numWarns += 1
- def handleError(self,m,line):
- print '<p class="error">'
- print 'Error: <a href="' + make_link(os.path.join(os.getcwd(),m.group(1)),m.group(2)) + '">' + m.group(1)+":"+m.group(2) + ':</a> '+m.group(3)+'</p>'
- print '<pre>', self.input_stream.readline()[:-1]
- print self.input_stream.readline()[:-1], '</pre>'
- self.numErrs += 1
- if __name__ == '__main__':
- # test
- stream = open('../tex/test.log')
- lp = LaTexParser(stream,False,"test.tex")
- lp = BiberParser(stream, False)
- f,e,w = lp.parseStream()