PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/fignumber.py

https://github.com/imrehg/website
Python | 77 lines | 43 code | 14 blank | 20 comment | 5 complexity | b78359d4264268db0e1731b03ec64d59 MD5 | raw file
  1. """
  2. Number figures. Sucks to do this with regular expressions, but trying
  3. to read and write XML with character entities using ElementTree while
  4. preserving comments is just too hard.
  5. """
  6. import sys
  7. import re
  8. DEF_P = re.compile(r'<figure\s+id="(f:[^"]+)"[^>]*>')
  9. REF_P = re.compile(r'<a\s+href="#(f:[^"]+)">.*?</a>')
  10. FIG_P = re.compile(r'<figure\s+id="(f:[^"]+)"[^>]*>(\s+)<img\s+src="([^"]+)"\s+alt="([^"]+)"\s*/>(\s+)</figure>',
  11. re.MULTILINE)
  12. FORMATTED_FIG = '<figure id="%(id)s">%(ws_1)s<img src="%(img)s" alt="%(cap)s" />%(ws_1)s<caption>Figure %(chap_num)d.%(fig_num)d: %(cap)s</caption>%(ws_2)s</figure>'
  13. #-------------------------------------------------------------------------------
  14. def extract_defs(data):
  15. """
  16. Extract figure definitions, returning a dict mapping the anchor tag to
  17. the figure number.
  18. """
  19. return dict((m, i+1) for (i, m) in enumerate(DEF_P.findall(data)))
  20. #-------------------------------------------------------------------------------
  21. def update_refs(data, file_num, refs):
  22. """
  23. Update references, returning new data.
  24. """
  25. def repl(m):
  26. return '<a class="figref" href="#%s">Figure %d.%d</a>' % \
  27. (m.group(1), file_num, refs[m.group(1)])
  28. return REF_P.sub(repl, data)
  29. #-------------------------------------------------------------------------------
  30. def update_figs(data, file_num, refs):
  31. """
  32. Update figures, returning new data.
  33. """
  34. def repl(m):
  35. vals = {
  36. 'id' : m.group(1),
  37. 'ws_1' : m.group(2),
  38. 'img' : m.group(3),
  39. 'cap' : m.group(4),
  40. 'ws_2' : m.group(5),
  41. 'chap_num' : file_num,
  42. 'fig_num' : refs[m.group(1)]
  43. }
  44. id_stem = vals['id'].split(':')[1]
  45. file_stem = vals['img'].split('/')[-1].split('.')[0]
  46. assert id_stem == file_stem, \
  47. '%s != %s' % (vals['id'], vals['img'])
  48. return FORMATTED_FIG % vals
  49. return FIG_P.sub(repl, data)
  50. #-------------------------------------------------------------------------------
  51. def main(filenames):
  52. for (i, f) in enumerate(filenames):
  53. file_num = i + 1
  54. with open(f, 'r') as reader:
  55. data = reader.read()
  56. refs = extract_defs(data)
  57. data = update_refs(data, file_num, refs)
  58. data = update_figs(data, file_num, refs)
  59. with open(f, 'w') as writer:
  60. writer.write(data)
  61. #-------------------------------------------------------------------------------
  62. if __name__ == '__main__':
  63. main(sys.argv[1:])