PageRenderTime 49ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/Support/bin/texparser.py

http://github.com/jeroenh/pytexmate.tmbundle
Python | 462 lines | 460 code | 1 blank | 1 comment | 2 complexity | dddc9cc23d0a6f5e08eac7c4ec84a86a MD5 | raw file
  1. import sys
  2. import re
  3. import os
  4. import os.path
  5. import tmprefs
  6. from struct import *
  7. from xml.sax.saxutils import escape
  8. from urllib import quote
  9. def make_link(file, line):
  10. """A custom version of urlparse.urlunparse()"""
  11. file = os.path.realpath(os.path.join(os.getcwd(), file)) # make absolute, (works fine even if it was already absolute)
  12. return 'txmt://open?url=file:%2F%2F' + quote(file, '') + '&line=' + str(line)
  13. class TexParser(object):
  14. """Master Class for Parsing Tex Typsetting Streams"""
  15. def __init__(self, input_stream, verbose, fileName=None):
  16. super(TexParser, self).__init__()
  17. self.fileName = fileName
  18. self.setInput(input_stream)
  19. self.patterns = []
  20. self.done = False
  21. self.verbose = verbose
  22. self.numErrs = 0
  23. self.numWarns = 0
  24. self.isFatal = False
  25. self.numRuns = 0
  26. def setInput(self, input_stream):
  27. self.input_stream = input_stream
  28. def parseLine(self, line):
  29. """Process a single line"""
  30. # process matching patterns until we find one
  31. foundMatch = False
  32. for pat,fun in self.patterns:
  33. myMatch = pat.match(line)
  34. if myMatch:
  35. fun(myMatch,line)
  36. sys.stdout.flush()
  37. foundMatch = True
  38. break
  39. if self.verbose and not foundMatch:
  40. print escape(line)
  41. def parseStream(self):
  42. """Process the input_stream one line at a time, matching against
  43. each pattern in the patterns dictionary. If a pattern matches
  44. call the corresponding method in the dictionary. The dictionary
  45. is organized with patterns as the keys and methods as the values."""
  46. line = self.input_stream.readline()
  47. while line and not self.done:
  48. line = line.rstrip("\n")
  49. self.parseLine(line)
  50. line = self.input_stream.readline()
  51. self.wrapup()
  52. return self.isFatal, self.numErrs, self.numWarns
  53. def wrapup(self):
  54. if self.done == False:
  55. self.badRun()
  56. if self.numRuns == 0:
  57. self.numRuns = 1
  58. def info(self,m,line):
  59. print '<p class="info">'
  60. print escape(line)
  61. print '</p>'
  62. def error(self,m,line):
  63. print '<p class="error">'
  64. print escape(line)
  65. print '</p>'
  66. self.numErrs += 1
  67. def warning(self,m,line):
  68. print '<p class="warning">'
  69. print escape(line)
  70. print '</p>'
  71. self.numWarns += 1
  72. def warn2(self,m,line):
  73. print '<p class="fmtWarning">'
  74. print escape(line)
  75. print '</p>'
  76. def fatal(self,m,line):
  77. print '<p class="error">'
  78. print escape(line)
  79. print '</p>'
  80. self.isFatal = True
  81. def badRun(self):
  82. """docstring for finishRun"""
  83. pass
  84. class MkIndexParser(TexParser):
  85. """Parse and format Error Messages from makeindex"""
  86. def __init__(self, btex, verbose, fileName=None):
  87. super(MkIndexParser, self).__init__(btex,verbose,fileName)
  88. self.patterns += [
  89. (re.compile("Input index file (.*) not found") , self.noInputError)
  90. ]
  91. def noInputError(self,m,line):
  92. print '<p class="error">'
  93. print escape(line)
  94. print '</p>'
  95. print '<p class="info">'
  96. print "Make sure your latex file includes <code>\usepackage{makeidx} \makeindex</code> and run latex before running makeindex."
  97. print '</p>'
  98. self.numErrs += 1
  99. class BibTexParser(TexParser):
  100. """Parse and format Error Messages from bibtex"""
  101. def __init__(self, btex, verbose, fileName=None):
  102. super(BibTexParser, self).__init__(btex,verbose,fileName)
  103. self.numNobiblioErrs = 0
  104. self.patterns += [
  105. (re.compile("Warning--(.*)") , self.warning),
  106. (re.compile("--line (\d+) of file (.*)") , self.handleFileLineReference),
  107. (re.compile(r'I found no \\\w+ command') , self.warning),
  108. (re.compile(r"I couldn't open style file"), self.error),
  109. (re.compile(r"I couldn't open \w+ file"), self.error),
  110. (re.compile('This is BibTeX') , self.info),
  111. (re.compile('The style') , self.info),
  112. (re.compile('Database') , self.info),
  113. (re.compile('---') , self.finishRun)
  114. ]
  115. def handleFileLineReference(self,m,line):
  116. # TODO: fix
  117. """Display warning. match m should contain file, line, warning message. Ideally, this line should be merged with the previous line, but this would require that getRewrappedLine also merges these lines."""
  118. print '<p><a href="' + make_link(m.group(2),m.group(1)) + '">' + escape(line) + "</a></p>"
  119. self.numWarns += 1
  120. def finishRun(self,m,line):
  121. self.done = True
  122. class LaTexParser(TexParser):
  123. """Parse Output From Latex"""
  124. def __init__(self, input_stream, verbose, fileName=None):
  125. super(LaTexParser, self).__init__(input_stream,verbose,fileName)
  126. self.outputFile = ""
  127. if fileName:
  128. self.fileStack = [fileName]
  129. else:
  130. self.fileStack = []
  131. self.currentFile = ""
  132. self.exts = set(['.tex']) # files with these extensions are displayed. Includes dot
  133. if self.fileName and len(os.path.splitext(self.fileName)) > 1:
  134. self.exts.add(os.path.splitext(self.fileName)[1]) # extension with dot
  135. # NOTE: to support file names with accented chars, the line needs to be a Unicode string
  136. # instead of a binary string (e.g. u"line" instead of "line".) In addition, add the re.UNICODE
  137. # flag to each regexp. That would be sufficient for regexps with \w in the name. That does
  138. # not help for file names with spaces in the name. Also, I doubt that latex supports files
  139. # names with accented chars, especially because HFS+ uses NFD (decomposed) Unicode chars,
  140. # while most UNIX tools expect NFC (precomposed) chars.
  141. self.patterns += [
  142. (re.compile('This is') , self.info),
  143. (re.compile('Document Class') , self.info),
  144. (re.compile('.*\<use (.*?)\>') , self.detectInclude),
  145. (re.compile('Output written on (.*) (\(.*\))') , self.outputInfo),
  146. (re.compile('LaTeX Warning:.*?input line (\d+)(\.|$)') , self.handleWarning),
  147. (re.compile('Package \w+ Warning:.*?input line (\d+)(\.|$)') , self.handleWarning),
  148. (re.compile('LaTeX Warning:.*') , self.warning),
  149. (re.compile('Package \w+ Warning:.*') , self.warning),
  150. (re.compile('([^:]*):(\d+):\s+(pdfTeX warning.*)') , self.handleFileLineWarning),
  151. (re.compile('.*pdfTeX warning.*') , self.warning),
  152. (re.compile('LaTeX Font Warning:.*') , self.warning),
  153. (re.compile('Overfull.*wide') , self.warn2),
  154. (re.compile('Underfull.*badness') , self.warn2),
  155. (re.compile('([^:]*):(\d+): LaTeX Error:(.*)') , self.handleError),
  156. (re.compile('([^:]*):(\d+): (Emergency stop)') , self.handleError),
  157. (re.compile('.*?([^:]+\.\w+):(\d+):\s+(.*)') , self.handleError),
  158. (re.compile('Transcript written on (.*)\.$') , self.finishRun),
  159. (re.compile('Error: pdflatex') , self.pdfLatexError),
  160. (re.compile('\!.*') , self.handleOldStyleErrors),
  161. (re.compile('\s+==>') , self.fatal)
  162. ]
  163. self.blankLine = re.compile(r'^\s*$')
  164. def setInput(self, input_stream):
  165. # Decorate input_stream with formatters that reformats the log lines to single log statements
  166. self.input_stream = NoMultilinePackageWarning(NoMultilineWarning(LinebreakWarning(NoLinebreak80(input_stream))))
  167. # self.input_stream = input_stream
  168. def getLastFile(self):
  169. """Returns the short name of the last file present in self.fileStack.
  170. self.fileStack contains a lot of bogus and irrelevant entries.
  171. e.g. 'verson 3.14 (Web2C)' or .clo, .sty, .ldf files instead of .tex files"""
  172. # Typical matches: '', '.', '.\d+', '.\d+pt', '.aux', '.bbl', '.cfg', '.clo', '.cls', '.def', '.fd', '.ldf', '.out', '.sty', '.tex', '.toc',
  173. for filename in reversed(self.fileStack):
  174. if os.path.splitext(filename)[1] in self.exts:
  175. return filename
  176. return ""
  177. def parseLine(self, line):
  178. """Process a single line"""
  179. # Find parsed file names
  180. filematch = re.compile(r'([\(\)])([\w/\.\-]*)') # matches '(filename.tex' or ')'
  181. for (openclose, filename) in filematch.findall(line):
  182. if openclose == '(':
  183. self.fileStack.append(filename)
  184. newfile = self.getLastFile() # see if this changes the "active" file
  185. if newfile != self.currentFile:
  186. print "<h4>Processing: " + escape(newfile) + "</h4>"
  187. self.currentFile = newfile
  188. elif len(self.fileStack) > 0:
  189. self.fileStack.pop()
  190. # self.currentFile = self.getLastFile()
  191. newfile = self.getLastFile() # see if this changes the "active" file
  192. if newfile != self.currentFile:
  193. print "<h4>Resume processing: " + escape(newfile) + "</h4>"
  194. self.currentFile = newfile
  195. # process matching patterns until we find one
  196. TexParser.parseLine(self, line)
  197. def detectInclude(self,m,line):
  198. print "<ul><li>Including: " + escape(m.group(1))
  199. print "</li></ul>"
  200. def handleWarning(self,m,line):
  201. """Display warning. match m should contain line, warning message"""
  202. print '<p class="warning"><a href="' + make_link(self.currentFile, m.group(1)) + '">'+escape(line)+"</a></p>"
  203. self.numWarns += 1
  204. def handleFileLineWarning(self,m,line):
  205. """Display warning. match m should contain file, line, warning message"""
  206. print '<p class="warning"><a href="' + make_link(m.group(1),m.group(2)) + '">' + escape(m.group(3)) + "</a></p>"
  207. self.numWarns += 1
  208. def handleError(self,m,line):
  209. """Display error. match m should contain file, line, error message"""
  210. print '<p class="error">'
  211. print 'Latex Error: <a href="' + make_link(m.group(1),m.group(2)) + '">' + escape(m.group(1)+":"+m.group(2)) + '</a> '+escape(m.group(3))+'</p>'
  212. self.numErrs += 1
  213. def finishRun(self,m,line):
  214. logFile = m.group(1).strip('"')
  215. print '<p>Complete transcript is in '
  216. print '<a href="' + make_link(logFile,'1') + '">' + escape(logFile) + '</a>'
  217. print '</p>'
  218. self.done = True
  219. def outputInfo(self,m,line):
  220. self.outputFile = m.group(1).strip('"')
  221. print '<p class="info">Output written on <a href="%s">%s</a> (%s)</p>' % (self.outputFile, escape(m.group(1)), escape(m.group(2)))
  222. def handleOldStyleErrors(self,m,line):
  223. if re.search('[Ee]rror', line):
  224. print '<p class="error">'
  225. print escape(line)
  226. print '</p>'
  227. self.numErrs += 1
  228. else:
  229. print '<p class="warning">'
  230. print escape(line)
  231. print '</p>'
  232. self.numWarns += 1
  233. def pdfLatexError(self,m,line):
  234. """docstring for pdfLatexError"""
  235. self.numErrs += 1
  236. print '<p class="error">'
  237. print escape(line)
  238. line = self.input_stream.readline()
  239. if line and re.match('^ ==> Fatal error occurred', line):
  240. print escape(line.rstrip("\n"))
  241. print '</p>'
  242. self.isFatal = True
  243. else:
  244. print '</p>'
  245. sys.stdout.flush()
  246. def badRun(self):
  247. """docstring for finishRun"""
  248. # logfile location is wrong for different output directory, but fixing this is not worth the effort.
  249. logfile = os.path.splitext(self.fileName)[0]+'.log'
  250. print '<p class="error">Output of program terminated prematurely. Logfile is in <a href="%s">%s</a></p>' % (make_link(logfile,1), escape(logfile))
  251. class ParseLatexMk(TexParser):
  252. """docstring for ParseLatexMk"""
  253. def __init__(self, input_stream, verbose,fileName=None):
  254. super(ParseLatexMk, self).__init__(input_stream,verbose,fileName)
  255. self.patterns += [
  256. (re.compile('This is (pdfTeXk|latex2e|latex|XeTeXk)') , self.startLatex),
  257. (re.compile('This is BibTeX') , self.startBibtex),
  258. (re.compile('^Latexmk: All targets \(.*?\) are up-to-date') , self.finishRun),
  259. (re.compile('This is makeindex') , self.startBibtex),
  260. (re.compile('^Latexmk') , self.ltxmk),
  261. (re.compile('Run number') , self.newRun)
  262. ]
  263. def startBibtex(self,m,line):
  264. print '<div class="bibtex">'
  265. print '<h3>' + escape(line[:-1]) + '</h3>'
  266. bp = BibTexParser(self.input_stream,self.verbose)
  267. f,e,w = bp.parseStream()
  268. self.numErrs += e
  269. self.numWarns += w
  270. print '</div>'
  271. def startLatex(self,m,line):
  272. print '<div class="latex">'
  273. print '<hr>'
  274. print '<h3>' + escape(line[:-1]) + '</h3>'
  275. bp = LaTexParser(self.input_stream,self.verbose,self.fileName)
  276. f,e,w = bp.parseStream()
  277. self.numErrs += e
  278. self.numWarns += w
  279. print '</div>'
  280. def newRun(self,m,line):
  281. if self.numRuns > 0:
  282. print '<hr />'
  283. print '<p>', self.numErrs, 'Errors', self.numWarns, 'Warnings', 'in this run.', '</p>'
  284. self.numWarns = 0
  285. self.numErrs = 0
  286. self.numRuns += 1
  287. def finishRun(self,m,line):
  288. self.ltxmk(m,line)
  289. self.done = True
  290. def ltxmk(self,m,line):
  291. print '<p class="ltxmk">%s</p>'%escape(line)
  292. class StreamWrapper(file):
  293. """Sometimes TeX breaks up lines with hard linebreaks. This is annoying.
  294. Even more annoying is that it sometime does not break line, for two distinct
  295. warnings. This class decorates the stdin file object, and modifies the
  296. readline function to return more appropriate units (log statements rather than log lines).
  297. """
  298. def __init__(self,input_stream):
  299. self.input_stream = input_stream
  300. def readline(self):
  301. return self.input_stream.readline()
  302. class NoLinebreak80(StreamWrapper):
  303. """TeX inserts hard line breaks if the length of a line exceeds 80 chars.
  304. This wrappers undos that behaviour by removing line breaks with lines of exactly 80 chars length"""
  305. def readline(self):
  306. statement = ""
  307. while True:
  308. line = self.input_stream.readline()
  309. if not line: # EOF
  310. return statement
  311. if len(line) == 80: # continue the loop for lines of 80 chars incl. line break
  312. statement += line.rstrip("\n")
  313. else:
  314. statement += line
  315. break
  316. return statement
  317. class LinebreakWarning(StreamWrapper):
  318. """TeX often doesn't break a line. This wrapper tries to at least insert a line break
  319. before a warning or error. It matches line like
  320. sometext1234 pdfTeX warning (ext4): destination with the same identifier"""
  321. def __init__(self,input_stream):
  322. StreamWrapper.__init__(self,input_stream)
  323. self.buffer = ""
  324. self.pattern = re.compile('(.*[^a-zA-Z])([a-zA-Z]*[Tt]e[Xx] (?:warning|error).*)')
  325. def readline(self):
  326. if self.buffer != "":
  327. statement = self.buffer
  328. self.buffer = ""
  329. return statement
  330. line = self.input_stream.readline()
  331. if not line: # EOF
  332. return line
  333. match = self.pattern.match(line)
  334. if match:
  335. self.buffer = match.group(2)
  336. return match.group(1)
  337. return line
  338. class NoMultilineWarning(StreamWrapper):
  339. """LaTeX sometimes prints a warning over multiple lines.
  340. This wrapper makes those warning into one line. Continuation lines
  341. are expected to start with multiple spaces. It matches warnings like:
  342. LaTeX Warning: You have requested package `styles/cases',
  343. but the package provides `cases'."""
  344. def __init__(self,input_stream):
  345. StreamWrapper.__init__(self,input_stream)
  346. self.buffer = ""
  347. def getline(self):
  348. if self.buffer:
  349. line = self.buffer
  350. self.buffer = ""
  351. return line
  352. else:
  353. return self.input_stream.readline()
  354. def readline(self):
  355. statement = self.getline()
  356. if not statement: # EOF
  357. return statement
  358. continuation = statement.startswith("LaTeX Warning")
  359. while continuation:
  360. line = self.getline()
  361. if line.startswith(" "):
  362. statement = statement.rstrip("\n")+" "+line.lstrip()
  363. else:
  364. self.buffer = line
  365. continuation = False
  366. return statement
  367. class NoMultilinePackageWarning(StreamWrapper):
  368. """Some packages print a warning over multiple lines.
  369. This wrapper makes those warning into one line. Continuation lines
  370. are expected to start with multiple spaces. It matches warnings like:
  371. Package amsmath Warning: Cannot use `split' here;
  372. (amsmath) trying to recover with `aligned'
  373. """
  374. def __init__(self,input_stream):
  375. StreamWrapper.__init__(self,input_stream)
  376. self.buffer = ""
  377. self.firstlinere = re.compile('Package (\w+) Warning:.*')
  378. def getline(self):
  379. if self.buffer != "":
  380. line = self.buffer
  381. self.buffer = ""
  382. return line
  383. else:
  384. return self.input_stream.readline()
  385. def readline(self):
  386. statement = self.getline()
  387. if not statement: # EOF
  388. return statement
  389. match = self.firstlinere.match(statement)
  390. if match:
  391. contstart = '('+match.group(1)+')'
  392. continuation = True
  393. else:
  394. continuation = False
  395. while continuation:
  396. line = self.getline()
  397. if line.startswith(contstart):
  398. statement = statement.rstrip("\n") + " " + line[len(contstart):].lstrip()
  399. else:
  400. self.buffer = line
  401. continuation = False
  402. return statement
  403. if __name__ == '__main__':
  404. # test
  405. stream = open('../tex/test.log')
  406. lp = LaTexParser(stream,False,"test.tex")
  407. f,e,w = lp.parseStream()