/Tools/scripts/texcheck.py

http://unladen-swallow.googlecode.com/ · Python · 233 lines · 161 code · 30 blank · 42 comment · 54 complexity · 4a69a31a9a1f4a73903d5e047d84dc67 MD5 · raw file

  1. """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
  2. Written by Raymond D. Hettinger <python at rcn.com>
  3. Copyright (c) 2003 Python Software Foundation. All rights reserved.
  4. Designed to catch common markup errors including:
  5. * Unbalanced or mismatched parenthesis, brackets, and braces.
  6. * Unbalanced or mismatched \\begin and \\end blocks.
  7. * Misspelled or invalid LaTeX commands.
  8. * Use of forward slashes instead of backslashes for commands.
  9. * Table line size mismatches.
  10. Sample command line usage:
  11. python texcheck.py -k chapterheading -m lib/librandomtex *.tex
  12. Options:
  13. -m Munge parenthesis and brackets. [0,n) would normally mismatch.
  14. -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
  15. -d: Delimiter check only (useful for non-LaTeX files).
  16. -h: Help
  17. -s lineno: Start at lineno (useful for skipping complex sections).
  18. -v: Verbose. Trace the matching of //begin and //end blocks.
  19. """
  20. import re
  21. import sys
  22. import getopt
  23. from itertools import izip, count, islice
  24. import glob
  25. cmdstr = r"""
  26. \section \module \declaremodule \modulesynopsis \moduleauthor
  27. \sectionauthor \versionadded \code \class \method \begin
  28. \optional \var \ref \end \subsection \lineiii \hline \label
  29. \indexii \textrm \ldots \keyword \stindex \index \item \note
  30. \withsubitem \ttindex \footnote \citetitle \samp \opindex
  31. \noindent \exception \strong \dfn \ctype \obindex \character
  32. \indexiii \function \bifuncindex \refmodule \refbimodindex
  33. \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
  34. \regexp \program \production \token \productioncont \term
  35. \grammartoken \lineii \seemodule \file \EOF \documentclass
  36. \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
  37. \tableofcontents \kbd \programopt \envvar \refstmodindex
  38. \cfunction \constant \NULL \moreargs \cfuncline \cdata
  39. \textasciicircum \n \ABC \setindexsubitem \versionchanged
  40. \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
  41. \verbatiminput \methodline \textgreater \seetitle \lineiv
  42. \funclineni \ulink \manpage \funcline \dataline \unspecified
  43. \textbackslash \mimetype \mailheader \seepep \textunderscore
  44. \longprogramopt \infinity \plusminus \shortversion \version
  45. \refmodindex \seerfc \makeindex \makemodindex \renewcommand
  46. \indexname \appendix \protect \indexiv \mbox \textasciitilde
  47. \platform \seeurl \leftmargin \labelwidth \localmoduletable
  48. \LaTeX \copyright \memberline \backslash \pi \centerline
  49. \caption \vspace \textwidth \menuselection \textless
  50. \makevar \csimplemacro \menuselection \bfcode \sub \release
  51. \email \kwindex \refexmodindex \filenq \e \menuselection
  52. \exindex \linev \newsgroup \verbatim \setshortversion
  53. \author \authoraddress \paragraph \subparagraph \cmemberline
  54. \textbar \C \seelink
  55. """
  56. def matchclose(c_lineno, c_symbol, openers, pairmap):
  57. "Verify that closing delimiter matches most recent opening delimiter"
  58. try:
  59. o_lineno, o_symbol = openers.pop()
  60. except IndexError:
  61. print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
  62. return
  63. if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
  64. print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
  65. return
  66. def checkit(source, opts, morecmds=[]):
  67. """Check the LaTeX formatting in a sequence of lines.
  68. Opts is a mapping of options to option values if any:
  69. -m munge parenthesis and brackets
  70. -d delimiters only checking
  71. -v verbose trace of delimiter matching
  72. -s lineno: linenumber to start scan (default is 1).
  73. Morecmds is a sequence of LaTeX commands (without backslashes) that
  74. are to be considered valid in the scan.
  75. """
  76. texcmd = re.compile(r'\\[A-Za-z]+')
  77. falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
  78. validcmds = set(cmdstr.split())
  79. for cmd in morecmds:
  80. validcmds.add('\\' + cmd)
  81. if '-m' in opts:
  82. pairmap = {']':'[(', ')':'(['} # Munged openers
  83. else:
  84. pairmap = {']':'[', ')':'('} # Normal opener for a given closer
  85. openpunct = set('([') # Set of valid openers
  86. delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
  87. braces = re.compile(r'({)|(})')
  88. doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
  89. spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
  90. openers = [] # Stack of pending open delimiters
  91. bracestack = [] # Stack of pending open braces
  92. tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
  93. tableline = re.compile(r'\\line([iv]+){')
  94. tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
  95. tablelevel = ''
  96. tablestartline = 0
  97. startline = int(opts.get('-s', '1'))
  98. lineno = 0
  99. for lineno, line in izip(count(startline), islice(source, startline-1, None)):
  100. line = line.rstrip()
  101. # Check balancing of open/close parenthesis, brackets, and begin/end blocks
  102. for begend, name, punct in delimiters.findall(line):
  103. if '-v' in opts:
  104. print lineno, '|', begend, name, punct,
  105. if begend == 'begin' and '-d' not in opts:
  106. openers.append((lineno, name))
  107. elif punct in openpunct:
  108. openers.append((lineno, punct))
  109. elif begend == 'end' and '-d' not in opts:
  110. matchclose(lineno, name, openers, pairmap)
  111. elif punct in pairmap:
  112. matchclose(lineno, punct, openers, pairmap)
  113. if '-v' in opts:
  114. print ' --> ', openers
  115. # Balance opening and closing braces
  116. for open, close in braces.findall(line):
  117. if open == '{':
  118. bracestack.append(lineno)
  119. if close == '}':
  120. try:
  121. bracestack.pop()
  122. except IndexError:
  123. print r'Warning, unmatched } on line %s.' % (lineno,)
  124. # Optionally, skip LaTeX specific checks
  125. if '-d' in opts:
  126. continue
  127. # Warn whenever forward slashes encountered with a LaTeX command
  128. for cmd in falsetexcmd.findall(line):
  129. if '822' in line or '.html' in line:
  130. continue # Ignore false positives for urls and for /rfc822
  131. if '\\' + cmd in validcmds:
  132. print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
  133. # Check for markup requiring {} for correct spacing
  134. for cmd in spacingmarkup.findall(line):
  135. print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
  136. # Validate commands
  137. nc = line.find(r'\newcommand')
  138. if nc != -1:
  139. start = line.find('{', nc)
  140. end = line.find('}', start)
  141. validcmds.add(line[start+1:end])
  142. for cmd in texcmd.findall(line):
  143. if cmd not in validcmds:
  144. print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
  145. # Check table levels (make sure lineii only inside tableii)
  146. m = tablestart.search(line)
  147. if m:
  148. tablelevel = m.group(1)
  149. tablestartline = lineno
  150. m = tableline.search(line)
  151. if m and m.group(1) != tablelevel:
  152. print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
  153. if tableend.search(line):
  154. tablelevel = ''
  155. # Style guide warnings
  156. if 'e.g.' in line or 'i.e.' in line:
  157. print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
  158. for dw in doubledwords.findall(line):
  159. print r'Doubled word warning. "%s" on line %d' % (dw, lineno)
  160. lastline = lineno
  161. for lineno, symbol in openers:
  162. print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
  163. for lineno in bracestack:
  164. print "Unmatched { on line %d" % (lineno,)
  165. print 'Done checking %d lines.' % (lastline,)
  166. return 0
  167. def main(args=None):
  168. if args is None:
  169. args = sys.argv[1:]
  170. optitems, arglist = getopt.getopt(args, "k:mdhs:v")
  171. opts = dict(optitems)
  172. if '-h' in opts or args==[]:
  173. print __doc__
  174. return 0
  175. if len(arglist) < 1:
  176. print 'Please specify a file to be checked'
  177. return 1
  178. for i, filespec in enumerate(arglist):
  179. if '*' in filespec or '?' in filespec:
  180. arglist[i:i+1] = glob.glob(filespec)
  181. morecmds = [v for k,v in optitems if k=='-k']
  182. err = []
  183. for filename in arglist:
  184. print '=' * 30
  185. print "Checking", filename
  186. try:
  187. f = open(filename)
  188. except IOError:
  189. print 'Cannot open file %s.' % arglist[0]
  190. return 2
  191. try:
  192. err.append(checkit(f, opts, morecmds))
  193. finally:
  194. f.close()
  195. return max(err)
  196. if __name__ == '__main__':
  197. sys.exit(main())