PageRenderTime 29ms CodeModel.GetById 14ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/Tools/scripts/texcheck.py

http://unladen-swallow.googlecode.com/
Python | 233 lines | 208 code | 2 blank | 23 comment | 1 complexity | 4a69a31a9a1f4a73903d5e047d84dc67 MD5 | raw file
  1""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
  2
  3   Written by Raymond D. Hettinger <python at rcn.com>
  4   Copyright (c) 2003 Python Software Foundation.  All rights reserved.
  5
  6Designed to catch common markup errors including:
  7* Unbalanced or mismatched parenthesis, brackets, and braces.
  8* Unbalanced or mismatched \\begin and \\end blocks.
  9* Misspelled or invalid LaTeX commands.
 10* Use of forward slashes instead of backslashes for commands.
 11* Table line size mismatches.
 12
 13Sample command line usage:
 14    python texcheck.py -k chapterheading -m lib/librandomtex *.tex
 15
 16Options:
 17    -m          Munge parenthesis and brackets. [0,n) would normally mismatch.
 18    -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
 19    -d:         Delimiter check only (useful for non-LaTeX files).
 20    -h:         Help
 21    -s lineno:  Start at lineno (useful for skipping complex sections).
 22    -v:         Verbose.  Trace the matching of //begin and //end blocks.
 23"""
 24
 25import re
 26import sys
 27import getopt
 28from itertools import izip, count, islice
 29import glob
 30
 31cmdstr = r"""
 32    \section \module \declaremodule \modulesynopsis \moduleauthor
 33    \sectionauthor \versionadded \code \class \method \begin
 34    \optional \var \ref \end \subsection \lineiii \hline \label
 35    \indexii \textrm \ldots \keyword \stindex \index \item \note
 36    \withsubitem \ttindex \footnote \citetitle \samp \opindex
 37    \noindent \exception \strong \dfn \ctype \obindex \character
 38    \indexiii \function \bifuncindex \refmodule \refbimodindex
 39    \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
 40    \regexp \program \production \token \productioncont \term
 41    \grammartoken \lineii \seemodule \file \EOF \documentclass
 42    \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
 43    \tableofcontents \kbd \programopt \envvar \refstmodindex
 44    \cfunction \constant \NULL \moreargs \cfuncline \cdata
 45    \textasciicircum \n \ABC \setindexsubitem \versionchanged
 46    \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
 47    \verbatiminput \methodline \textgreater \seetitle \lineiv
 48    \funclineni \ulink \manpage \funcline \dataline \unspecified
 49    \textbackslash \mimetype \mailheader \seepep \textunderscore
 50    \longprogramopt \infinity \plusminus \shortversion \version
 51    \refmodindex \seerfc \makeindex \makemodindex \renewcommand
 52    \indexname \appendix \protect \indexiv \mbox \textasciitilde
 53    \platform \seeurl \leftmargin \labelwidth \localmoduletable
 54    \LaTeX \copyright \memberline \backslash \pi \centerline
 55    \caption \vspace \textwidth \menuselection \textless
 56    \makevar \csimplemacro \menuselection \bfcode \sub \release
 57    \email \kwindex \refexmodindex \filenq \e \menuselection
 58    \exindex \linev \newsgroup \verbatim \setshortversion
 59    \author \authoraddress \paragraph \subparagraph \cmemberline
 60    \textbar \C \seelink
 61"""
 62
 63def matchclose(c_lineno, c_symbol, openers, pairmap):
 64    "Verify that closing delimiter matches most recent opening delimiter"
 65    try:
 66        o_lineno, o_symbol = openers.pop()
 67    except IndexError:
 68        print "\nDelimiter mismatch.  On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
 69        return
 70    if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
 71    print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
 72    return
 73
 74def checkit(source, opts, morecmds=[]):
 75    """Check the LaTeX formatting in a sequence of lines.
 76
 77    Opts is a mapping of options to option values if any:
 78        -m          munge parenthesis and brackets
 79        -d          delimiters only checking
 80        -v          verbose trace of delimiter matching
 81        -s lineno:  linenumber to start scan (default is 1).
 82
 83    Morecmds is a sequence of LaTeX commands (without backslashes) that
 84    are to be considered valid in the scan.
 85    """
 86
 87    texcmd = re.compile(r'\\[A-Za-z]+')
 88    falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
 89
 90    validcmds = set(cmdstr.split())
 91    for cmd in morecmds:
 92        validcmds.add('\\' + cmd)
 93
 94    if '-m' in opts:
 95        pairmap = {']':'[(', ')':'(['}      # Munged openers
 96    else:
 97        pairmap = {']':'[', ')':'('}        # Normal opener for a given closer
 98    openpunct = set('([')                   # Set of valid openers
 99
100    delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
101    braces = re.compile(r'({)|(})')
102    doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
103    spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
104
105    openers = []                            # Stack of pending open delimiters
106    bracestack = []                         # Stack of pending open braces
107
108    tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
109    tableline = re.compile(r'\\line([iv]+){')
110    tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
111    tablelevel = ''
112    tablestartline = 0
113
114    startline = int(opts.get('-s', '1'))
115    lineno = 0
116
117    for lineno, line in izip(count(startline), islice(source, startline-1, None)):
118        line = line.rstrip()
119
120        # Check balancing of open/close parenthesis, brackets, and begin/end blocks
121        for begend, name, punct in delimiters.findall(line):
122            if '-v' in opts:
123                print lineno, '|', begend, name, punct,
124            if begend == 'begin' and '-d' not in opts:
125                openers.append((lineno, name))
126            elif punct in openpunct:
127                openers.append((lineno, punct))
128            elif begend == 'end' and '-d' not in opts:
129                matchclose(lineno, name, openers, pairmap)
130            elif punct in pairmap:
131                matchclose(lineno, punct, openers, pairmap)
132            if '-v' in opts:
133                print '   --> ', openers
134
135        # Balance opening and closing braces
136        for open, close in braces.findall(line):
137            if open == '{':
138                bracestack.append(lineno)
139            if close == '}':
140                try:
141                    bracestack.pop()
142                except IndexError:
143                    print r'Warning, unmatched } on line %s.' % (lineno,)
144
145        # Optionally, skip LaTeX specific checks
146        if '-d' in opts:
147            continue
148
149        # Warn whenever forward slashes encountered with a LaTeX command
150        for cmd in falsetexcmd.findall(line):
151            if '822' in line or '.html' in line:
152                continue    # Ignore false positives for urls and for /rfc822
153            if '\\' + cmd in validcmds:
154                print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
155
156        # Check for markup requiring {} for correct spacing
157        for cmd in spacingmarkup.findall(line):
158            print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
159
160        # Validate commands
161        nc = line.find(r'\newcommand')
162        if nc != -1:
163            start = line.find('{', nc)
164            end = line.find('}', start)
165            validcmds.add(line[start+1:end])
166        for cmd in texcmd.findall(line):
167            if cmd not in validcmds:
168                print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
169
170        # Check table levels (make sure lineii only inside tableii)
171        m = tablestart.search(line)
172        if m:
173            tablelevel = m.group(1)
174            tablestartline = lineno
175        m = tableline.search(line)
176        if m and m.group(1) != tablelevel:
177            print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
178        if tableend.search(line):
179            tablelevel = ''
180
181        # Style guide warnings
182        if 'e.g.' in line or 'i.e.' in line:
183            print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
184
185        for dw in doubledwords.findall(line):
186            print r'Doubled word warning.  "%s" on line %d' % (dw, lineno)
187
188    lastline = lineno
189    for lineno, symbol in openers:
190        print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
191    for lineno in bracestack:
192        print "Unmatched { on line %d" % (lineno,)
193    print 'Done checking %d lines.' % (lastline,)
194    return 0
195
196def main(args=None):
197    if args is None:
198        args = sys.argv[1:]
199    optitems, arglist = getopt.getopt(args, "k:mdhs:v")
200    opts = dict(optitems)
201    if '-h' in opts or args==[]:
202        print __doc__
203        return 0
204
205    if len(arglist) < 1:
206        print 'Please specify a file to be checked'
207        return 1
208
209    for i, filespec in enumerate(arglist):
210        if '*' in filespec or '?' in filespec:
211            arglist[i:i+1] = glob.glob(filespec)
212
213    morecmds = [v for k,v in optitems if k=='-k']
214    err = []
215
216    for filename in arglist:
217        print '=' * 30
218        print "Checking", filename
219        try:
220            f = open(filename)
221        except IOError:
222            print 'Cannot open file %s.' % arglist[0]
223            return 2
224
225        try:
226            err.append(checkit(f, opts, morecmds))
227        finally:
228            f.close()
229
230    return max(err)
231
232if __name__ == '__main__':
233    sys.exit(main())