texparser.py - This Python script parses and processes LaTe…

/Support/bin/texparser.py

http://github.com/hesstobi/latex.tmbundle · Python · 397 lines · 366 code · 19 blank · 12 comment · 18 complexity · 50f8aa1fbcbf7d8b8506ee5b63ad6519 MD5 · raw file

import sys
import re
import os.path
import os
import tmprefs
from struct import *

import urllib

def percent_escape(str):
	return re.sub('[\x80-\xff /&]', lambda x: '%%%02X' % unpack('B', x.group(0))[0], str)

# Swapped call to percent_escape with urllib.quote.  Was causing links to fail in TM2
def make_link(file, line):
	return 'txmt://open/?url=file://' + urllib.quote(file) + '&amp;line=' + line

def shell_quote(string):
	return '"' + re.sub(r'([`$\\"])', r'\\\1', string) + '"'


class TexParser(object):
    """Master Class for Parsing Tex Typsetting Streams"""
    def __init__(self, input_stream, verbose):
        super(TexParser, self).__init__()
        self.input_stream = input_stream
        self.patterns = []
        self.done = False
        self.verbose = verbose
        self.numErrs = 0
        self.numWarns = 0
        self.isFatal = False
        self.fileStack = []  #TODO: long term - can improve currentFile handling by keeping track of (xxx and )

    def getRewrappedLine(self):
        """Sometimes TeX breaks up lines with hard linebreaks.  This is annoying.
           Even more annoying is that it sometime does not break line, for two distinct
           warnings. This function attempts to return a single statement."""
        statement = ""
        while True:
            line = self.input_stream.readline()
            if not line:
                if statement:
                    return statement
                else:
                    return ""
            statement += line.rstrip("\n")
            if len(line) != 80: # including line break
                break
        return statement+"\n"

    def parseStream(self):
        """Process the input_stream one line at a time, matching against
           each pattern in the patterns dictionary.  If a pattern matches
           call the corresponding method in the dictionary.  The dictionary
           is organized with patterns as the keys and methods as the values."""
        line = self.getRewrappedLine()
        while line and not self.done:
            line = line.rstrip("\n")
            foundMatch = False

            # process matching patterns until we find one
            for pat,fun in self.patterns:
                myMatch = pat.match(line)
                if myMatch:
                    fun(myMatch,line)
                    sys.stdout.flush()
                    foundMatch = True
                    break
            if self.verbose and not foundMatch:
                print line

            line = self.getRewrappedLine()
        if self.done == False:
            self.badRun()
        return self.isFatal, self.numErrs, self.numWarns

    def info(self,m,line):
        print '<p class="info">'
        print line
        print '</p>'

    def error(self,m,line):
        print '<p class="error">'
        print line
        print '</p>'
        self.numErrs += 1

    def warning(self,m,line):
        print '<p class="warning">'
        print line
        print '</p>'
        self.numWarns += 1

    def warn2(self,m,line):
        print '<p class="fmtWarning">'
        print line
        print '</p>'

    def fatal(self,m,line):
        print '<p class="error">'
        print line
        print '</p>'
        self.isFatal = True

    def badRun(self):
        """docstring for finishRun"""
        pass

class BibTexParser(TexParser):
    """Parse and format Error Messages from bibtex"""
    def __init__(self, btex, verbose):
        super(BibTexParser, self).__init__(btex,verbose)
        self.patterns += [
            (re.compile("Warning--I didn't find a database entry") , self.warning),
            (re.compile(r'I found no \\\w+ command') , self.error),
            (re.compile(r"I couldn't open style file"), self.error),
            (re.compile(r"You're missing a field name---line (\d+)"), self.error),
            (re.compile(r'Too many commas in name \d+ of'), self.error),
            (re.compile(r'I was expecting a'),self.error),
            (re.compile('This is BibTeX') , self.info),
            (re.compile('The style') , self.info),
            (re.compile('Database') , self.info),
            (re.compile('---') , self.finishRun)
        ]

    def finishRun(self,m,line):
        self.done = True
        print '</div>'

class BiberParser(TexParser):
    """Parse and format Error Messages from biber"""
    def __init__(self, btex, verbose):
        super(BiberParser, self).__init__(btex,verbose)
        self.patterns += [
            (re.compile('^.*WARN') , self.warning),
            (re.compile('^.*ERROR') , self.error),
            (re.compile('^.*FATAL'), self.fatal),
            (re.compile('^.*Output to (.*)$') , self.finishRun),
        ]

    def warning(self,m,line):
        """Using one print command works more reliably
           than using several lines"""
        print '<p class="warning">' + line + '</p>'
        self.numWarns += 1

    def finishRun(self,m,line):
      logFile = m.group(1)[:-3] + 'blg'
      print '<p>  Complete transcript is in '
      print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') +  '">' + logFile + '</a>'
      print '</p>'
      self.done = True
      print '</div>'

class MakeGlossariesParser(TexParser):
    """Parse and format Error Messages from makeglossaries"""
    def __init__(self, btex, verbose):
        super(MakeGlossariesParser, self).__init__(btex,verbose)
        self.patterns += [
            (re.compile('^.*makeglossaries version (.*)$') , self.beginRun),
            (re.compile('^.*added glossary type \'(.*)\' \((.*)\).*$') , self.addType),
            (re.compile('^.*Markup written into file "(.*)".$') , self.finishMarkup),
            (re.compile('^.*xindy.*-L (.*) -I.*-t ".*\.(.*)" -o.*$'), self.runXindy),
            (re.compile('Cannot locate xindy module') , self.warning),
            (re.compile('ERROR'),self.error),
            (re.compile('Warning'),self.warning),
            (re.compile('^\*\*\*'),self.info),
        ]
        self.types = dict()

    def beginRun(self,m,line):
        version = m.group(1)
        print "<h2>Make Glossaries</h2>"
        print '<p class="info" >Version: <i>'+version+ "</i></p>"

    def addType(self,m,line):
        thisType = m.group(1)
        files = m.group(2)
        filesSet = files.split(',')
        for file in filesSet:
            self.types[file] = thisType
        print '<p class="info"> Add Glossary Type <strong>' + thisType +'</strong> <i>(Files: ' + files + ')</i></p>'


    def runXindy(self,m,line):
        lang = m.group(1)
        file = m.group(2)
        thisType = self.types[file]

        print '<h3>Run xindy for glossary type '+ thisType +'</h3>'
        print '<p class="info">Language: '+ lang +'</p>'

    def finishMarkup(self,m,line):
      mkFile = m.group(1)
      thisType = self.types[mkFile[-3:]]
      print '<p class="info">  Finished glossary for type <strong>'+ thisType+ '</strong>. Output is in <a href="' + make_link(os.path.join(os.getcwd(),mkFile),'1') +  '">' + mkFile + '</a></p>'

    def warning(self,m,line):
        """Using one print command works more reliably
           than using several lines"""
        print '<p class="warning">' + line + '</p>'
        self.numWarns += 1

    def error(self,m,line):
        """Using one print command works more reliably
           than using several lines"""
        print '<p class="error">' + line + '</p>'
        self.numWarns += 1

class LaTexParser(TexParser):
    """Parse Output From Latex"""
    def __init__(self, input_stream, verbose, fileName):
        super(LaTexParser, self).__init__(input_stream,verbose)
        self.suffix = fileName[fileName.rfind('.')+1:]
        self.currentFile = fileName
        self.patterns += [
            #(re.compile('^This is') , self.info),
            (re.compile('^Document Class') , self.info),
            (re.compile('.*?\((\.\/[^\)]*?\.(tex|'+self.suffix+')( |$))') , self.detectNewFile),
            (re.compile('.*\<use (.*?)\>') , self.detectInclude),
            (re.compile('^Output written') , self.info),
            (re.compile('LaTeX Warning:.*?input line (\d+)(\.|$)') , self.handleWarning),
            (re.compile('LaTeX Warning:.*') , self.warning),
            (re.compile('^([^:]*):(\d+):\s+(pdfTeX warning.*)') , self.handleFileLineWarning),
            (re.compile('.*pdfTeX warning.*') , self.warning),
            (re.compile('LaTeX Font Warning:.*') , self.warning),
            (re.compile('Overfull.*wide') , self.warn2),
            (re.compile('Underfull.*badness') , self.warn2),
            (re.compile('^([\.\/\w\x7f-\xff\- ]+(?:\.sty|\.tex|\.'+self.suffix+')):(\d+):\s+(.*)') , self.handleError),
            (re.compile('([^:]*):(\d+): LaTeX Error:(.*)') , self.handleError),
            (re.compile('([^:]*):(\d+): (Emergency stop)') , self.handleError),
            (re.compile('Runaway argument') , self.pdfLatexError),
            # We need the (.*) at the beginning of the regular expression
            # since in some edge cases cases the output about the transcript
            # might actually not start at the beginning of the line.
            (re.compile('(.*)Transcript written on (.*)\.$') , self.finishRun),
            (re.compile('^Error: pdflatex') , self.pdfLatexError),
            (re.compile('\!.*') , self.handleOldStyleErrors),
            (re.compile('^\s+==>') , self.fatal)
        ]
        self.blankLine = re.compile(r'^\s*$')

    def detectNewFile(self,m,line):
        self.currentFile = m.group(1).rstrip()
        print "<h4>Processing: " + self.currentFile + "</h4>"

    def detectInclude(self,m,line):
        print "<ul><li>Including: " + m.group(1)
        print "</li></ul>"

    def handleWarning(self,m,line):
        print '<p class="warning"><a href="' + make_link(os.path.join(os.getcwd(),self.currentFile), m.group(1)) + '">'+line+"</a></p>"
        self.numWarns += 1

    def handleFileLineWarning(self,m,line):
        """Display warning. match m should contain file, line, warning message"""
        print '<p class="warning"><a href="' + make_link(os.path.join(os.getcwd(), m.group(1)),m.group(2)) + '">' + m.group(3) + "</a></p>"
        self.numWarns += 1

    def handleError(self,m,line):
        print '<p class="error">'
        print 'Latex Error: <a  href="' + make_link(os.path.join(os.getcwd(),m.group(1)),m.group(2)) +  '">' + m.group(1)+":"+m.group(2) + '</a> '+m.group(3)+'</p>'
        self.numErrs += 1

    def finishRun(self,m,line):
        logFile = m.group(1).strip('"')
        print '<p>  Complete transcript is in '
        print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') +  '">' + logFile + '</a>'
        print '</p>'
        self.done = True

    def handleOldStyleErrors(self,m,line):
        if re.search('[Ee]rror', line):
            print '<p class="error">'
            print line
            print '</p>'
            self.numErrs += 1
        else:
            print '<p class="warning">'
            print line
            print '</p>'
            self.numWarns += 1

    def pdfLatexError(self,m,line):
        """docstring for pdfLatexError"""
        self.numErrs += 1
        print '<p class="error">'
        print line
        line = self.input_stream.readline()
        if line and re.match('^ ==> Fatal error occurred', line):
            print line.rstrip("\n")
            print '</p>'
            self.isFatal = True
        else:
            if line:
                print '<pre>    '+ line.rstrip("\n") + '</pre>'
            print '</p>'
        sys.stdout.flush()

    def badRun(self):
        """docstring for finishRun"""
        print '<p class="error">A fatal error occured, log file is in '
        logFile = os.path.basename(os.getenv('TM_FILEPATH'))
        logFile = logFile.replace(self.suffix,'log')
        print '<a href="' + make_link(os.path.join(os.getcwd(),logFile),'1') +  '">' + logFile + '</a>'
        print '</p>'

class ParseLatexMk(TexParser):
    """docstring for ParseLatexMk"""
    def __init__(self, input_stream, verbose,filename):
        super(ParseLatexMk, self).__init__(input_stream,verbose)
        self.fileName = filename
        self.patterns += [
            (re.compile('This is (pdfTeX|latex2e|latex|XeTeX)') , self.startLatex),
            (re.compile('This is BibTeX') , self.startBibtex),
            (re.compile('^.*This is biber') , self.startBiber),
            (re.compile('^Latexmk: All targets \(.*?\) are up-to-date') , self.finishRun),
            (re.compile('This is makeindex') , self.startBibtex),
            (re.compile('^Latexmk') , self.ltxmk),
            (re.compile('Run number') , self.newRun)
        ]
        self.numRuns = 0

    def startBibtex(self,m,line):
        print '<div class="bibtex">'
        print '<h3>' + line[:-1] + '</h3>'
        bp = BibTexParser(self.input_stream,self.verbose)
        f,e,w = bp.parseStream()
        self.numErrs += e
        self.numWarns += w

    def startBiber(self,m,line):
        print '<div class="biber">'
        print '<h3>' + line + '</h3>'
        bp = BiberParser(self.input_stream,self.verbose)
        f,e,w = bp.parseStream()
        self.numErrs += e
        self.numWarns += w

    def startLatex(self,m,line):
        print '<div class="latex">'
        print '<hr>'
        print '<h3>' + line[:-1] + '</h3>'
        bp = LaTexParser(self.input_stream,self.verbose,self.fileName)
        f,e,w = bp.parseStream()
        self.numErrs += e
        self.numWarns += w

    def newRun(self,m,line):
        if self.numRuns > 0:
            print '<hr />'
            print '<p>', self.numErrs, 'Errors', self.numWarns, 'Warnings', 'in this run.', '</p>'
        self.numWarns = 0
        self.numErrs = 0
        self.numRuns += 1

    def finishRun(self,m,line):
        self.ltxmk(m,line)
        self.done = True

    def ltxmk(self,m,line):
        print '<p class="ltxmk">%s</p>'%line

class ChkTeXParser(TexParser):
    """Parse the output from chktex"""
    def __init__(self, input_stream, verbose, filename):
        super(ChkTeXParser, self).__init__(input_stream,verbose)
        self.fileName = filename
        self.patterns += [
            (re.compile('^ChkTeX') , self.info),
            (re.compile('Warning \d+ in (.*.tex) line (\d+):(.*)') , self.handleWarning),
            (re.compile('Error \d+ in (.*.tex) line (\d+):(.*)') , self.handleError),
        ]
        self.numRuns = 0

    def handleWarning(self,m,line):
        """Display warning. match m should contain file, line, warning message"""
        print '<p class="warning">Warning: <a href="' + make_link(os.path.join(os.getcwd(), m.group(1)),m.group(2)) + '">' + m.group(1)+ ": "+m.group(2)+":</a>"+m.group(3)+"</p>"
        warnDetail = self.input_stream.readline()
        if len(warnDetail) > 2:
            print '<pre>',warnDetail[:-1]
            print self.input_stream.readline()[:-1], '</pre>'
        self.numWarns += 1

    def handleError(self,m,line):
        print '<p class="error">'
        print 'Error: <a  href="' + make_link(os.path.join(os.getcwd(),m.group(1)),m.group(2)) +  '">' + m.group(1)+":"+m.group(2) + ':</a> '+m.group(3)+'</p>'
        print '<pre>', self.input_stream.readline()[:-1]
        print self.input_stream.readline()[:-1], '</pre>'
        self.numErrs += 1

if __name__ == '__main__':
    # test
    stream = open('../tex/test.log')
    lp = LaTexParser(stream,False,"test.tex")
    lp = BiberParser(stream, False)
    f,e,w = lp.parseStream()
Summary ✨

This Python script parses and processes LaTeX files, specifically log files generated by tools like latexmk and chktex. It uses various classes to parse different types of logs, including LaTeX, BibTeX, Biber, and chktex outputs. The script displays warnings and errors found in the log files, providing a summary of the parsing process.
Tech Fingerprint

Alerts (14)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
6
'def' Ensure functions have docstrings for documentation
10 14 17 272 324 332 340 349 357 361 385
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
11
'open(' Use 'with open()' to ensure Files are properly closed
394