PageRenderTime 27ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/dblatex-0.3/lib/dbtexmf/dblatex/grubber/logparser.py

#
Python | 359 lines | 319 code | 9 blank | 31 comment | 9 complexity | 301c5a10b9239600b4a11d3df9661d90 MD5 | raw file
  1. # This file is part of Rubber and thus covered by the GPL
  2. # (c) Emmanuel Beffara, 2002--2006
  3. """
  4. LaTeX document building system for Rubber.
  5. This module defines the class that parses the LaTeX log files.
  6. """
  7. from __future__ import generators
  8. import re
  9. from msg import _, msg
  10. class LogParser:
  11. """
  12. This class performs all the extraction of information from the log file.
  13. For efficiency, the instances contain the whole file as a list of strings
  14. so that it can be read several times with no disk access.
  15. """
  16. re_loghead = re.compile("This is [0-9a-zA-Z-]*(TeX|Omega)")
  17. re_rerun = re.compile(
  18. "(LaTeX|Package longtable|Package bibtopic) Warning:.*Rerun")
  19. re_rerun2 = re.compile("\(Changebar\).*Rerun")
  20. re_file = re.compile("(\\((?P<file>[^ \n\t(){}]*)|\\))")
  21. re_badbox = re.compile(r"(Ov|Und)erfull \\[hv]box ")
  22. re_line = re.compile(r"(l\.(?P<line>[0-9]+)( (?P<code>.*))?$|<\*>)")
  23. re_cseq = re.compile(r".*(?P<seq>\\[^ ]*) ?$")
  24. re_page = re.compile("\[(?P<num>[0-9]+)\]")
  25. re_atline = re.compile(
  26. "( detected| in paragraph)? at lines? (?P<line>[0-9]*)(--(?P<last>[0-9]*))?")
  27. re_reference = re.compile("LaTeX Warning: Reference `(?P<ref>.*)' \
  28. on page (?P<page>[0-9]*) undefined on input line (?P<line>[0-9]*)\\.$")
  29. re_label = re.compile("LaTeX Warning: (?P<text>Label .*)$")
  30. re_warning = re.compile(
  31. "(LaTeX|Package)( (?P<pkg>.*))? Warning: (?P<text>.*)$")
  32. re_online = re.compile("(; reported)? on input line (?P<line>[0-9]*)")
  33. re_ignored = re.compile("; all text was ignored after line (?P<line>[0-9]*).$")
  34. #-- Initialization {{{2
  35. def __init__ (self):
  36. self.lines = []
  37. def read (self, name):
  38. """
  39. Read the specified log file, checking that it was produced by the
  40. right compiler. Returns true if the log file is invalid or does not
  41. exist.
  42. """
  43. self.lines = []
  44. try:
  45. file = open(name)
  46. except IOError:
  47. return 2
  48. line = file.readline()
  49. if not line:
  50. file.close()
  51. return 1
  52. if not self.re_loghead.match(line):
  53. file.close()
  54. return 1
  55. self.lines = file.readlines()
  56. file.close()
  57. return 0
  58. #-- Process information {{{2
  59. def errors (self):
  60. """
  61. Returns true if there was an error during the compilation.
  62. """
  63. skipping = 0
  64. for line in self.lines:
  65. if line.strip() == "":
  66. skipping = 0
  67. continue
  68. if skipping:
  69. continue
  70. m = self.re_badbox.match(line)
  71. if m:
  72. skipping = 1
  73. continue
  74. if line[0] == "!":
  75. # We check for the substring "pdfTeX warning" because pdfTeX
  76. # sometimes issues warnings (like undefined references) in the
  77. # form of errors...
  78. if line.find("pdfTeX warning") == -1:
  79. return 1
  80. return 0
  81. def run_needed (self):
  82. """
  83. Returns true if LaTeX indicated that another compilation is needed.
  84. """
  85. for line in self.lines:
  86. if self.re_rerun.match(line):
  87. return 1
  88. if self.re_rerun2.match(line):
  89. return 1
  90. return 0
  91. #-- Information extraction {{{2
  92. def continued (self, line):
  93. """
  94. Check if a line in the log is continued on the next line. This is
  95. needed because TeX breaks messages at 79 characters per line. We make
  96. this into a method because the test is slightly different in Metapost.
  97. """
  98. return len(line) == 79
  99. def parse (self, errors=0, boxes=0, refs=0, warnings=0):
  100. """
  101. Parse the log file for relevant information. The named arguments are
  102. booleans that indicate which information should be extracted:
  103. - errors: all errors
  104. - boxes: bad boxes
  105. - refs: warnings about references
  106. - warnings: all other warnings
  107. The function returns a generator. Each generated item is a dictionary
  108. that contains (some of) the following entries:
  109. - kind: the kind of information ("error", "box", "ref", "warning")
  110. - text: the text of the error or warning
  111. - code: the piece of code that caused an error
  112. - file, line, last, pkg: as used by Message.format_pos.
  113. """
  114. if not self.lines:
  115. return
  116. last_file = None
  117. pos = [last_file]
  118. page = 1
  119. parsing = 0 # 1 if we are parsing an error's text
  120. skipping = 0 # 1 if we are skipping text until an empty line
  121. something = 0 # 1 if some error was found
  122. prefix = None # the prefix for warning messages from packages
  123. accu = "" # accumulated text from the previous line
  124. for line in self.lines:
  125. line = line[:-1] # remove the line feed
  126. # TeX breaks messages at 79 characters, just to make parsing
  127. # trickier...
  128. if self.continued(line):
  129. accu += line
  130. continue
  131. line = accu + line
  132. accu = ""
  133. # Text that should be skipped (from bad box messages)
  134. if prefix is None and line == "":
  135. skipping = 0
  136. continue
  137. if skipping:
  138. continue
  139. # Errors (including aborted compilation)
  140. if parsing:
  141. if error == "Undefined control sequence.":
  142. # This is a special case in order to report which control
  143. # sequence is undefined.
  144. m = self.re_cseq.match(line)
  145. if m:
  146. error = "Undefined control sequence %s." % m.group("seq")
  147. m = self.re_line.match(line)
  148. if m:
  149. parsing = 0
  150. skipping = 1
  151. pdfTeX = error.find("pdfTeX warning") != -1
  152. if (pdfTeX and warnings) or (errors and not pdfTeX):
  153. if pdfTeX:
  154. d = {
  155. "kind": "warning",
  156. "pkg": "pdfTeX",
  157. "text": error[error.find(":")+2:]
  158. }
  159. else:
  160. d = {
  161. "kind": "error",
  162. "text": error
  163. }
  164. d.update( m.groupdict() )
  165. m = self.re_ignored.search(error)
  166. if m:
  167. d["file"] = last_file
  168. if d.has_key("code"):
  169. del d["code"]
  170. d.update( m.groupdict() )
  171. elif pos[-1] is None:
  172. d["file"] = last_file
  173. else:
  174. d["file"] = pos[-1]
  175. yield d
  176. elif line[0] == "!":
  177. error = line[2:]
  178. elif line[0:3] == "***":
  179. parsing = 0
  180. skipping = 1
  181. if errors:
  182. yield {
  183. "kind": "abort",
  184. "text": error,
  185. "why" : line[4:],
  186. "file": last_file
  187. }
  188. elif line[0:15] == "Type X to quit ":
  189. parsing = 0
  190. skipping = 0
  191. if errors:
  192. yield {
  193. "kind": "error",
  194. "text": error,
  195. "file": pos[-1]
  196. }
  197. continue
  198. if len(line) > 0 and line[0] == "!":
  199. error = line[2:]
  200. parsing = 1
  201. continue
  202. if line == "Runaway argument?":
  203. error = line
  204. parsing = 1
  205. continue
  206. # Long warnings
  207. if prefix is not None:
  208. if line[:len(prefix)] == prefix:
  209. text.append(line[len(prefix):].strip())
  210. else:
  211. text = " ".join(text)
  212. m = self.re_online.search(text)
  213. if m:
  214. info["line"] = m.group("line")
  215. text = text[:m.start()] + text[m.end():]
  216. if warnings:
  217. info["text"] = text
  218. d = { "kind": "warning" }
  219. d.update( info )
  220. yield d
  221. prefix = None
  222. continue
  223. # Undefined references
  224. m = self.re_reference.match(line)
  225. if m:
  226. if refs:
  227. d = {
  228. "kind": "warning",
  229. "text": _("Reference `%s' undefined.") % m.group("ref"),
  230. "file": pos[-1]
  231. }
  232. d.update( m.groupdict() )
  233. yield d
  234. continue
  235. m = self.re_label.match(line)
  236. if m:
  237. if refs:
  238. d = {
  239. "kind": "warning",
  240. "file": pos[-1]
  241. }
  242. d.update( m.groupdict() )
  243. yield d
  244. continue
  245. # Other warnings
  246. if line.find("Warning") != -1:
  247. m = self.re_warning.match(line)
  248. if m:
  249. info = m.groupdict()
  250. info["file"] = pos[-1]
  251. info["page"] = page
  252. if info["pkg"] is None:
  253. del info["pkg"]
  254. prefix = ""
  255. else:
  256. prefix = ("(%s)" % info["pkg"])
  257. prefix = prefix.ljust(m.start("text"))
  258. text = [info["text"]]
  259. continue
  260. # Bad box messages
  261. m = self.re_badbox.match(line)
  262. if m:
  263. if boxes:
  264. mpos = { "file": pos[-1], "page": page }
  265. m = self.re_atline.search(line)
  266. if m:
  267. md = m.groupdict()
  268. for key in "line", "last":
  269. if md[key]: mpos[key] = md[key]
  270. line = line[:m.start()]
  271. d = {
  272. "kind": "warning",
  273. "text": line
  274. }
  275. d.update( mpos )
  276. yield d
  277. skipping = 1
  278. continue
  279. # If there is no message, track source names and page numbers.
  280. last_file = self.update_file(line, pos, last_file)
  281. page = self.update_page(line, page)
  282. def get_errors (self):
  283. return self.parse(errors=1)
  284. def get_boxes (self):
  285. return self.parse(boxes=1)
  286. def get_references (self):
  287. return self.parse(refs=1)
  288. def get_warnings (self):
  289. return self.parse(warnings=1)
  290. def update_file (self, line, stack, last):
  291. """
  292. Parse the given line of log file for file openings and closings and
  293. update the list `stack'. Newly opened files are at the end, therefore
  294. stack[1] is the main source while stack[-1] is the current one. The
  295. first element, stack[0], contains the value None for errors that may
  296. happen outside the source. Return the last file from which text was
  297. read (the new stack top, or the one before the last closing
  298. parenthesis).
  299. """
  300. m = self.re_file.search(line)
  301. while m:
  302. if line[m.start()] == '(':
  303. last = m.group("file")
  304. stack.append(last)
  305. else:
  306. last = stack[-1]
  307. del stack[-1]
  308. line = line[m.end():]
  309. m = self.re_file.search(line)
  310. return last
  311. def update_page (self, line, before):
  312. """
  313. Parse the given line and return the number of the page that is being
  314. built after that line, assuming the current page before the line was
  315. `before'.
  316. """
  317. ms = self.re_page.findall(line)
  318. if ms == []:
  319. return before
  320. return int(ms[-1]) + 1