PageRenderTime 37ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/old/txt2tags-2.3.py

http://txt2tags.googlecode.com/
Python | 4622 lines | 4158 code | 173 blank | 291 comment | 176 complexity | fc3d524e614abd0cb77e8397f28b69ff MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004, 2005 Aurelio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. #
  20. #
  21. # +-------------------------------------------------------------+
  22. # | IMPORTANT MESSAGES, PLEASE READ |
  23. # +-------------------------------------------------------------+
  24. # | |
  25. # | |
  26. # | v1.x COMPATIBILITY |
  27. # | ------------------ |
  28. # | |
  29. # | Due the major syntax changes, the new 2.x series |
  30. # | BREAKS backwards compatibility. |
  31. # | |
  32. # | Use the 't2tconv' script to upgrade your existing |
  33. # | v1.x files to conform the new v2.x syntax. |
  34. # | |
  35. # | Do a visual inspection on the new converted file. |
  36. # | Specially Pre & Post proc filters can break. |
  37. # | Check them! |
  38. # | |
  39. # | |
  40. # +-------------------------------------------------------------+
  41. #
  42. #
  43. ########################################################################
  44. #
  45. # BORING CODE EXPLANATION AHEAD
  46. #
  47. # Just read if you wish to understand how the txt2tags code works
  48. #
  49. ########################################################################
  50. #
  51. # Version 2.0 was a complete rewrite for the program 'core'.
  52. #
  53. # Now the code that [1] parses the marked text is separated from the
  54. # code that [2] insert the target tags.
  55. #
  56. # [1] made by: def convert()
  57. # [2] made by: class BlockMaster
  58. #
  59. # The structures of the marked text are identifyed and its contents are
  60. # extracted into a data holder (Python lists and dictionaries).
  61. #
  62. # When parsing the source file, the blocks (para, lists, quote, table)
  63. # are opened with BlockMaster, right when found. Then its contents,
  64. # which spans on several lines, are feeded into a special holder on the
  65. # BlockMaster instance. Just when the block is closed, the target tags
  66. # are inserted for the full block as a whole, in one pass. This way, we
  67. # have a better control on blocks. Much better than the previous line by
  68. # line approach.
  69. #
  70. # In other words, whenever inside a block, the parser *holds* the tag
  71. # insertion process, waiting until the full block is readed. That was
  72. # needed primary to close paragraphs for the new XHTML target, but
  73. # proved to be a very good adding, improving many other processings.
  74. #
  75. # -------------------------------------------------------------------
  76. #
  77. # There is also a brand new code for the Configuration schema, 100%
  78. # rewritten. There are new classes, all self documented: CommandLine,
  79. # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
  80. # Config format was created, and all kind of configuration is first
  81. # converted to this format, and then a generic method parses it.
  82. #
  83. # The init processing was changed also, and now the functions which
  84. # gets informations about the input files are: get_infiles_config(),
  85. # process_source_file() and convert_this_files()
  86. #
  87. # Other parts are untouched, and remains the same as in v1.7, as the
  88. # marks regexes, target Headers and target Tags&Rules.
  89. #
  90. ########################################################################
  91. # Now I think the code is nice, easier to read and understand
  92. #XXX Python coding warning
  93. # Avoid common mistakes:
  94. # - do NOT use newlist=list instead newlist=list[:]
  95. # - do NOT use newdic=dic instead newdic=dic.copy()
  96. # - do NOT use dic[key] instead dic.get(key)
  97. # - do NOT use del dic[key] without has_key() before
  98. #XXX Smart Image Align don't work if the image is a link
  99. # Can't fix that because the image is expanded together with the
  100. # link, at the linkbank filling moment. Only the image is passed
  101. # to parse_images(), not the full line, so it is always 'middle'.
  102. #XXX Paragraph separation not valid inside Quote
  103. # Quote will not have <p></p> inside, instead will close and open
  104. # again the <blockquote>. This really sux in CSS, when defining a
  105. # diferent background color. Still don't know how to fix it.
  106. #XXX TODO (maybe)
  107. # New mark or macro which expands to an anchor full title.
  108. # It is necessary to parse the full document in this order:
  109. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  110. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  111. # 3rd scan: BODY: read titles and compose TOC info
  112. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  113. # Steps 2 and 3 can be made together, with no tag adding.
  114. # Two complete body scans will be *slow*, don't know if it worths.
  115. # One solution may be add the titles as postproc rules
  116. ##############################################################################
  117. # User config (1=ON, 0=OFF)
  118. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  119. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  120. BG_LIGHT = 0 # your terminal background color is light (default is 0)
  121. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  122. ##############################################################################
  123. # these are all the core Python modules used by txt2tags (KISS!)
  124. import re, string, os, sys, time, getopt
  125. # program information
  126. my_url = 'http://txt2tags.sf.net'
  127. my_name = 'txt2tags'
  128. my_email = 'verde@aurelio.net'
  129. my_version = '2.3'
  130. # i18n - just use if available
  131. if USE_I18N:
  132. try:
  133. import gettext
  134. # if your locale dir is different, change it here
  135. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  136. _ = cat.gettext
  137. except:
  138. _ = lambda x:x
  139. else:
  140. _ = lambda x:x
  141. # FLAGS : the conversion related flags , may be used in %!options
  142. # OPTIONS : the conversion related options, may be used in %!options
  143. # ACTIONS : the other behaviour modifiers, valid on command line only
  144. # MACROS : the valid macros with their default values for formatting
  145. # SETTINGS: global miscelaneous settings, valid on RC file only
  146. # NO_TARGET: actions that don't require a target specification
  147. # NO_MULTI_INPUT: actions that don't accept more than one input file
  148. # CONFIG_KEYWORDS: the valid %!key:val keywords
  149. #
  150. # FLAGS and OPTIONS are configs that affect the converted document.
  151. # They usually have also a --no-<option> to turn them OFF.
  152. # ACTIONS are needed because when doing multiple input files, strange
  153. # behaviour would be found, as use command line interface for the
  154. # first file and gui for the second. There is no --no-<action>.
  155. # --version and --help inside %!options are also odd
  156. #
  157. TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'lout', 'man', 'mgp',
  158. 'moin', 'pm6' , 'txt']
  159. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  160. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  161. 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
  162. 'quiet' :0 }
  163. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  164. 'infile' :'', 'outfile' :'', 'encoding' :'',
  165. 'config-file':'', 'split' :0 , 'lang' :''}
  166. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  167. 'verbose' :0 , 'debug' :0 , 'dump-config':0 ,
  168. 'dump-source':0 }
  169. MACROS = {'date' : '%Y%m%d', 'infile': '%f',
  170. 'mtime': '%Y%m%d', 'outfile': '%f'}
  171. SETTINGS = {} # for future use
  172. NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
  173. NO_MULTI_INPUT = ['gui','dump-config','dump-source']
  174. CONFIG_KEYWORDS = [
  175. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  176. 'guicolors']
  177. TARGET_NAMES = {
  178. 'html' : _('HTML page'),
  179. 'xhtml': _('XHTML page'),
  180. 'sgml' : _('SGML document'),
  181. 'tex' : _('LaTeX document'),
  182. 'lout' : _('Lout document'),
  183. 'man' : _('UNIX Manual page'),
  184. 'mgp' : _('Magic Point presentation'),
  185. 'moin' : _('MoinMoin page'),
  186. 'pm6' : _('PageMaker 6.0 document'),
  187. 'txt' : _('Plain Text'),
  188. }
  189. DEBUG = 0 # do not edit here, please use --debug
  190. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  191. QUIET = 0 # do not edit here, please use --quiet
  192. GUI = 0 # do not edit here, please use --gui
  193. AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
  194. RC_RAW = []
  195. CMDLINE_RAW = []
  196. CONF = {}
  197. BLOCK = None
  198. regex = {}
  199. TAGS = {}
  200. rules = {}
  201. lang = 'english'
  202. TARGET = ''
  203. STDIN = STDOUT = '-'
  204. MODULEIN = MODULEOUT = '-module-'
  205. ESCCHAR = '\x00'
  206. SEPARATOR = '\x01'
  207. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  208. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  209. # plataform specific settings
  210. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  211. # identify a development version
  212. #dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time()))
  213. #my_version = my_version + dev_suffix
  214. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  215. USAGE = string.join([
  216. '',
  217. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  218. '',
  219. _(" -t, --target=TYPE set target document type. currently supported:"),
  220. ' %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
  221. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  222. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  223. _(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"),
  224. _(" -H, --no-headers suppress header, title and footer contents"),
  225. _(" --headers show header, title and footer contents (default ON)"),
  226. _(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"),
  227. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  228. _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
  229. _(" --css-inside insert CSS file contents inside HTML/XHTML headers"),
  230. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  231. _(" --toc add TOC (Table of Contents) to target document"),
  232. _(" --toc-only print document TOC and exit"),
  233. _(" --toc-level=N set maximum TOC level (depth) to N"),
  234. _(" -C, --config-file=F read config from file F"),
  235. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  236. _(" --gui invoke Graphical Tk Interface"),
  237. _(" -q, --quiet quiet mode, suppress all output (except errors)"),
  238. _(" -v, --verbose print informative messages during conversion"),
  239. _(" -h, --help print this help information and exit"),
  240. _(" -V, --version print program version and exit"),
  241. _(" --dump-config print all the config found and exit"),
  242. _(" --dump-source print the document source, with includes expanded"),
  243. '',
  244. _("Turn OFF options:"),
  245. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  246. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  247. " --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
  248. " --no-dump-source",
  249. '',
  250. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  251. '',
  252. _("By default, converted output is saved to 'infile.<target>'."),
  253. _("Use --outfile to force an output file name."),
  254. _("If input file is '-', reads from STDIN."),
  255. _("If output file is '-', dumps output to STDOUT."),
  256. ''
  257. ], '\n')
  258. ##############################################################################
  259. # here is all the target's templates
  260. # you may edit them to fit your needs
  261. # - the %(HEADERn)s strings represent the Header lines
  262. # - the %(STYLE)s string is changed by --style contents
  263. # - the %(ENCODING)s string is changed by --encoding contents
  264. # - if any of the above is empty, the full line is removed
  265. # - use %% to represent a literal %
  266. #
  267. HEADER_TEMPLATE = {
  268. 'txt': """\
  269. %(HEADER1)s
  270. %(HEADER2)s
  271. %(HEADER3)s
  272. """,
  273. 'sgml': """\
  274. <!doctype linuxdoc system>
  275. <article>
  276. <title>%(HEADER1)s
  277. <author>%(HEADER2)s
  278. <date>%(HEADER3)s
  279. """,
  280. 'html': """\
  281. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  282. <HTML>
  283. <HEAD>
  284. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  285. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  286. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  287. <TITLE>%(HEADER1)s</TITLE>
  288. </HEAD><BODY BGCOLOR="white" TEXT="black">
  289. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  290. <FONT SIZE="4">
  291. <I>%(HEADER2)s</I><BR>
  292. %(HEADER3)s
  293. </FONT></CENTER>
  294. """,
  295. 'htmlcss': """\
  296. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  297. <HTML>
  298. <HEAD>
  299. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  300. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  301. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  302. <TITLE>%(HEADER1)s</TITLE>
  303. </HEAD>
  304. <BODY>
  305. <DIV CLASS="header" ID="header">
  306. <H1>%(HEADER1)s</H1>
  307. <H2>%(HEADER2)s</H2>
  308. <H3>%(HEADER3)s</H3>
  309. </DIV>
  310. """,
  311. 'xhtml': """\
  312. <?xml version="1.0"
  313. encoding="%(ENCODING)s"
  314. ?>
  315. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  316. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  317. <html xmlns="http://www.w3.org/1999/xhtml">
  318. <head>
  319. <title>%(HEADER1)s</title>
  320. <meta name="generator" content="http://txt2tags.sf.net" />
  321. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  322. </head>
  323. <body bgcolor="white" text="black">
  324. <div align="center">
  325. <h1>%(HEADER1)s</h1>
  326. <h2>%(HEADER2)s</h2>
  327. <h3>%(HEADER3)s</h3>
  328. </div>
  329. """,
  330. 'xhtmlcss': """\
  331. <?xml version="1.0"?>
  332. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  333. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  334. <html xmlns="http://www.w3.org/1999/xhtml">
  335. <head>
  336. <title>%(HEADER1)s</title>
  337. <meta name="generator" content="http://txt2tags.sf.net" />
  338. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  339. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  340. </head>
  341. <body>
  342. <div class="header" id="header">
  343. <h1>%(HEADER1)s</h1>
  344. <h2>%(HEADER2)s</h2>
  345. <h3>%(HEADER3)s</h3>
  346. </div>
  347. """,
  348. 'man': """\
  349. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  350. """,
  351. # TODO style to <HR>
  352. 'pm6': """\
  353. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  354. ><@Normal=
  355. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  356. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  357. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  358. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  359. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  360. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  361. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  362. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  363. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  364. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  365. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  366. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  367. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  368. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  369. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  370. ><@Title4=<@-PARENT "Title3">
  371. ><@Title5=<@-PARENT "Title3">
  372. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  373. %(HEADER1)s
  374. %(HEADER2)s
  375. %(HEADER3)s
  376. """,
  377. 'mgp': """\
  378. #!/usr/X11R6/bin/mgp -t 90
  379. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  380. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  381. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  382. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  383. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  384. %%default 1 size 5
  385. %%default 2 size 8, fore "yellow", font "normal-b", center
  386. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  387. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  388. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  389. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  390. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  391. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  392. %%%%------------------------- end of headers -----------------------------
  393. %%page
  394. %%size 10, center, fore "yellow"
  395. %(HEADER1)s
  396. %%font "normal-i", size 6, fore "white", center
  397. %(HEADER2)s
  398. %%font "mono", size 7, center
  399. %(HEADER3)s
  400. """,
  401. 'moin': """\
  402. '''%(HEADER1)s'''
  403. ''%(HEADER2)s''
  404. %(HEADER3)s
  405. """,
  406. 'tex': \
  407. r"""\documentclass[11pt,a4paper]{article}
  408. \usepackage{amsfonts,graphicx}
  409. \usepackage[pdfstartview=FitH,urlcolor=blue,colorlinks=true,bookmarks=true]{hyperref}
  410. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  411. \usepackage{%(STYLE)s} %% user defined package
  412. \pagestyle{plain} %% do page numbering ('empty' turns off)
  413. \frenchspacing %% no aditional spaces after periods
  414. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  415. \title{%(HEADER1)s}
  416. \author{%(HEADER2)s}
  417. \begin{document}
  418. \date{%(HEADER3)s}
  419. \maketitle
  420. \clearpage
  421. """,
  422. 'lout': """\
  423. @SysInclude { doc }
  424. @Document
  425. @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ...
  426. @PageOrientation { Portrait } # Portrait, Landscape
  427. @ColumnNumber { 1 } # Number of columns (2, 3, ...)
  428. @PageHeaders { Simple } # None, Simple, Titles, NoTitles
  429. @InitialLanguage { English } # German, French, Portuguese, ...
  430. @OptimizePages { Yes } # Yes/No smart page break feature
  431. //
  432. @Text @Begin
  433. @Display @Heading { %(HEADER1)s }
  434. @Display @I { %(HEADER2)s }
  435. @Display { %(HEADER3)s }
  436. #@NP # Break page after Headers
  437. """
  438. # @SysInclude { tbl } # Tables support
  439. # setup: @MakeContents { Yes } # show TOC
  440. # setup: @SectionGap # break page at each section
  441. }
  442. ##############################################################################
  443. def getTags(config):
  444. "Returns all the known tags for the specified target"
  445. keys = [
  446. 'paragraphOpen','paragraphClose',
  447. 'title1','title2','title3','title4','title5',
  448. 'title1Open','title1Close','title2Open','title2Close',
  449. 'blocktitle1Open','title1Close','title2Open','title2Close',
  450. 'title3Open','title3Close','title4Open','title4Close',
  451. 'title5Open','title5Close',
  452. 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
  453. 'blockVerbOpen','blockVerbClose',
  454. 'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
  455. 'fontMonoOpen','fontMonoClose',
  456. 'fontBoldOpen','fontBoldClose',
  457. 'fontItalicOpen','fontItalicClose',
  458. 'fontUnderlineOpen','fontUnderlineClose',
  459. 'listOpen','listClose',
  460. 'listItemOpen','listItemClose','listItemLine',
  461. 'numlistOpen','numlistClose',
  462. 'numlistItemOpen','numlistItemClose','numlistItemLine',
  463. 'deflistOpen','deflistClose',
  464. 'deflistItem1Open','deflistItem1Close',
  465. 'deflistItem2Open','deflistItem2Close',
  466. 'bar1','bar2',
  467. 'url','urlMark','email','emailMark',
  468. 'img','imgAlignLeft','imgAlignRight','imgAlignCenter',
  469. 'tableOpen','tableClose',
  470. 'tableRowOpen','tableRowClose','tableRowSep',
  471. 'tableCellOpen','tableCellClose','tableCellSep',
  472. 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
  473. 'tableTitleRowOpen','tableTitleRowClose',
  474. 'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
  475. 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
  476. 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
  477. 'tableColAlignSep', 'tableCellColSpan',
  478. 'anchor','comment','pageBreak',
  479. 'TOC','tocOpen','tocClose',
  480. 'cssOpen', 'cssClose',
  481. 'bodyOpen','bodyClose',
  482. 'EOD'
  483. ]
  484. # TIP: \a represents the current text on the mark
  485. # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
  486. alltags = {
  487. 'txt': {
  488. 'title1' : ' \a' ,
  489. 'title2' : '\t\a' ,
  490. 'title3' : '\t\t\a' ,
  491. 'title4' : '\t\t\t\a' ,
  492. 'title5' : '\t\t\t\t\a',
  493. 'blockQuoteLine' : '\t' ,
  494. 'listItemOpen' : '- ' ,
  495. 'numlistItemOpen' : '\a. ' ,
  496. 'bar1' : '\a' ,
  497. 'url' : '\a' ,
  498. 'urlMark' : '\a (\a)' ,
  499. 'email' : '\a' ,
  500. 'emailMark' : '\a (\a)' ,
  501. 'img' : '[\a]' ,
  502. },
  503. 'html': {
  504. 'paragraphOpen' : '<P>' ,
  505. 'paragraphClose' : '</P>' ,
  506. 'title1' : '~A~<H1>\a</H1>' ,
  507. 'title2' : '~A~<H2>\a</H2>' ,
  508. 'title3' : '~A~<H3>\a</H3>' ,
  509. 'title4' : '~A~<H4>\a</H4>' ,
  510. 'title5' : '~A~<H5>\a</H5>' ,
  511. 'blockVerbOpen' : '<PRE>' ,
  512. 'blockVerbClose' : '</PRE>' ,
  513. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  514. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  515. 'fontMonoOpen' : '<CODE>' ,
  516. 'fontMonoClose' : '</CODE>' ,
  517. 'fontBoldOpen' : '<B>' ,
  518. 'fontBoldClose' : '</B>' ,
  519. 'fontItalicOpen' : '<I>' ,
  520. 'fontItalicClose' : '</I>' ,
  521. 'fontUnderlineOpen' : '<U>' ,
  522. 'fontUnderlineClose' : '</U>' ,
  523. 'listOpen' : '<UL>' ,
  524. 'listClose' : '</UL>' ,
  525. 'listItemOpen' : '<LI>' ,
  526. 'numlistOpen' : '<OL>' ,
  527. 'numlistClose' : '</OL>' ,
  528. 'numlistItemOpen' : '<LI>' ,
  529. 'deflistOpen' : '<DL>' ,
  530. 'deflistClose' : '</DL>' ,
  531. 'deflistItem1Open' : '<DT>' ,
  532. 'deflistItem1Close' : '</DT>' ,
  533. 'deflistItem2Open' : '<DD>' ,
  534. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  535. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  536. 'url' : '<A HREF="\a">\a</A>' ,
  537. 'urlMark' : '<A HREF="\a">\a</A>' ,
  538. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  539. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  540. 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
  541. 'imgAlignLeft' : ' ALIGN="left"' ,
  542. 'imgAlignCenter' : ' ALIGN="middle"',
  543. 'imgAlignRight' : ' ALIGN="right"' ,
  544. 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>',
  545. 'tableClose' : '</TABLE>' ,
  546. 'tableRowOpen' : '<TR>' ,
  547. 'tableRowClose' : '</TR>' ,
  548. 'tableCellOpen' : '<TD~A~~S~>' ,
  549. 'tableCellClose' : '</TD>' ,
  550. 'tableTitleCellOpen' : '<TH~S~>' ,
  551. 'tableTitleCellClose' : '</TH>' ,
  552. 'tableBorder' : ' BORDER="1"' ,
  553. 'tableAlignCenter' : ' ALIGN="center"',
  554. 'tableCellAlignRight' : ' ALIGN="right"' ,
  555. 'tableCellAlignCenter': ' ALIGN="center"',
  556. 'tableCellColSpan' : ' COLSPAN="\a"' ,
  557. 'anchor' : '<A NAME="\a"></A>\n',
  558. 'cssOpen' : '<STYLE TYPE="text/css">',
  559. 'cssClose' : '</STYLE>' ,
  560. 'comment' : '<!-- \a -->' ,
  561. 'EOD' : '</BODY></HTML>'
  562. },
  563. #TIP xhtml inherits all HTML definitions (lowercased)
  564. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  565. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  566. 'xhtml': {
  567. 'listItemClose' : '</li>' ,
  568. 'numlistItemClose' : '</li>' ,
  569. 'deflistItem2Close' : '</dd>' ,
  570. 'bar1' : '<hr class="light" />',
  571. 'bar2' : '<hr class="heavy" />',
  572. 'anchor' : '<a id="\a" name="\a"></a>\n',
  573. 'img' : '<img~A~ src="\a" border="0" alt=""/>',
  574. },
  575. 'sgml': {
  576. 'paragraphOpen' : '<p>' ,
  577. 'title1' : '<sect>\a~A~<p>' ,
  578. 'title2' : '<sect1>\a~A~<p>' ,
  579. 'title3' : '<sect2>\a~A~<p>' ,
  580. 'title4' : '<sect3>\a~A~<p>' ,
  581. 'title5' : '<sect4>\a~A~<p>' ,
  582. 'blockVerbOpen' : '<tscreen><verb>' ,
  583. 'blockVerbClose' : '</verb></tscreen>' ,
  584. 'blockQuoteOpen' : '<quote>' ,
  585. 'blockQuoteClose' : '</quote>' ,
  586. 'fontMonoOpen' : '<tt>' ,
  587. 'fontMonoClose' : '</tt>' ,
  588. 'fontBoldOpen' : '<bf>' ,
  589. 'fontBoldClose' : '</bf>' ,
  590. 'fontItalicOpen' : '<em>' ,
  591. 'fontItalicClose' : '</em>' ,
  592. 'fontUnderlineOpen' : '<bf><em>' ,
  593. 'fontUnderlineClose' : '</em></bf>' ,
  594. 'listOpen' : '<itemize>' ,
  595. 'listClose' : '</itemize>' ,
  596. 'listItemOpen' : '<item>' ,
  597. 'numlistOpen' : '<enum>' ,
  598. 'numlistClose' : '</enum>' ,
  599. 'numlistItemOpen' : '<item>' ,
  600. 'deflistOpen' : '<descrip>' ,
  601. 'deflistClose' : '</descrip>' ,
  602. 'deflistItem1Open' : '<tag>' ,
  603. 'deflistItem1Close' : '</tag>' ,
  604. 'bar1' : '<!-- \a -->' ,
  605. 'url' : '<htmlurl url="\a" name="\a">' ,
  606. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  607. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  608. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  609. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  610. '</figure>' ,
  611. 'tableOpen' : '<table><tabular ca="~C~">' ,
  612. 'tableClose' : '</tabular></table>' ,
  613. 'tableRowSep' : '<rowsep>' ,
  614. 'tableCellSep' : '<colsep>' ,
  615. 'tableColAlignLeft' : 'l' ,
  616. 'tableColAlignRight' : 'r' ,
  617. 'tableColAlignCenter' : 'c' ,
  618. 'comment' : '<!-- \a -->' ,
  619. 'anchor' : '<label id="\a">' ,
  620. 'TOC' : '<toc>' ,
  621. 'EOD' : '</article>'
  622. },
  623. 'tex': {
  624. 'title1' : '\n\section*{\a}' ,
  625. 'title2' : '\\subsection*{\a}' ,
  626. 'title3' : '\\subsubsection*{\a}',
  627. # title 4/5: DIRTY: para+BF+\\+\n
  628. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  629. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  630. 'numtitle1' : '\n\section{\a}' ,
  631. 'numtitle2' : '\\subsection{\a}' ,
  632. 'numtitle3' : '\\subsubsection{\a}' ,
  633. 'blockVerbOpen' : '\\begin{verbatim}' ,
  634. 'blockVerbClose' : '\\end{verbatim}' ,
  635. 'blockQuoteOpen' : '\\begin{quotation}' ,
  636. 'blockQuoteClose' : '\\end{quotation}' ,
  637. 'fontMonoOpen' : '\\texttt{' ,
  638. 'fontMonoClose' : '}' ,
  639. 'fontBoldOpen' : '\\textbf{' ,
  640. 'fontBoldClose' : '}' ,
  641. 'fontItalicOpen' : '\\textit{' ,
  642. 'fontItalicClose' : '}' ,
  643. 'fontUnderlineOpen' : '\\underline{' ,
  644. 'fontUnderlineClose' : '}' ,
  645. 'listOpen' : '\\begin{itemize}' ,
  646. 'listClose' : '\\end{itemize}' ,
  647. 'listItemOpen' : '\\item ' ,
  648. 'numlistOpen' : '\\begin{enumerate}' ,
  649. 'numlistClose' : '\\end{enumerate}' ,
  650. 'numlistItemOpen' : '\\item ' ,
  651. 'deflistOpen' : '\\begin{description}',
  652. 'deflistClose' : '\\end{description}' ,
  653. 'deflistItem1Open' : '\\item[' ,
  654. 'deflistItem1Close' : ']' ,
  655. 'bar1' : '\n\\hrulefill{}\n' ,
  656. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  657. 'url' : '\\htmladdnormallink{\a}{\a}',
  658. 'urlMark' : '\\htmladdnormallink{\a}{\a}',
  659. 'email' : '\\htmladdnormallink{\a}{mailto:\a}',
  660. 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}',
  661. 'img' : '\\includegraphics{\a}',
  662. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  663. 'tableClose' : '\\end{tabular}\\end{center}',
  664. 'tableRowOpen' : '\\hline ' ,
  665. 'tableRowClose' : ' \\\\' ,
  666. 'tableCellSep' : ' & ' ,
  667. 'tableColAlignLeft' : 'l' ,
  668. 'tableColAlignRight' : 'r' ,
  669. 'tableColAlignCenter' : 'c' ,
  670. 'tableColAlignSep' : '|' ,
  671. 'comment' : '% \a' ,
  672. 'TOC' : '\\tableofcontents',
  673. 'pageBreak' : '\\clearpage',
  674. 'EOD' : '\\end{document}'
  675. },
  676. 'lout': {
  677. 'paragraphOpen' : '@LP' ,
  678. 'blockTitle1Open' : '@BeginSections' ,
  679. 'blockTitle1Close' : '@EndSections' ,
  680. 'blockTitle2Open' : ' @BeginSubSections' ,
  681. 'blockTitle2Close' : ' @EndSubSections' ,
  682. 'blockTitle3Open' : ' @BeginSubSubSections' ,
  683. 'blockTitle3Close' : ' @EndSubSubSections' ,
  684. 'title1Open' : '\n@Section @Title { \a } @Begin',
  685. 'title1Close' : '@End @Section' ,
  686. 'title2Open' : '\n @SubSection @Title { \a } @Begin',
  687. 'title2Close' : ' @End @SubSection' ,
  688. 'title3Open' : '\n @SubSubSection @Title { \a } @Begin',
  689. 'title3Close' : ' @End @SubSubSection' ,
  690. 'title4Open' : '\n@LP @LeftDisplay @B { \a }',
  691. 'title5Open' : '\n@LP @LeftDisplay @B { \a }',
  692. 'anchor' : '@Tag { \a }' ,
  693. 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin',
  694. 'blockVerbClose' : '@End @RawVerbatim' ,
  695. 'blockQuoteOpen' : '@QD {' ,
  696. 'blockQuoteClose' : '}' ,
  697. # enclosed inside {} to deal with joined**words**
  698. 'fontMonoOpen' : '{@F {' ,
  699. 'fontMonoClose' : '}}' ,
  700. 'fontBoldOpen' : '{@B {' ,
  701. 'fontBoldClose' : '}}' ,
  702. 'fontItalicOpen' : '{@II {' ,
  703. 'fontItalicClose' : '}}' ,
  704. 'fontUnderlineOpen' : '{@Underline{' ,
  705. 'fontUnderlineClose' : '}}' ,
  706. # the full form is more readable, but could be BL EL LI NL TL DTI
  707. 'listOpen' : '@BulletList' ,
  708. 'listClose' : '@EndList' ,
  709. 'listItemOpen' : '@ListItem{' ,
  710. 'listItemClose' : '}' ,
  711. 'numlistOpen' : '@NumberedList' ,
  712. 'numlistClose' : '@EndList' ,
  713. 'numlistItemOpen' : '@ListItem{' ,
  714. 'numlistItemClose' : '}' ,
  715. 'deflistOpen' : '@TaggedList' ,
  716. 'deflistClose' : '@EndList' ,
  717. 'deflistItem1Open' : '@DropTagItem {' ,
  718. 'deflistItem1Close' : '}' ,
  719. 'deflistItem2Open' : '{' ,
  720. 'deflistItem2Close' : '}' ,
  721. 'bar1' : '\n@DP @FullWidthRule\n' ,
  722. 'url' : '{blue @Colour { \a }}' ,
  723. 'urlMark' : '\a ({blue @Colour { \a }})' ,
  724. 'email' : '{blue @Colour { \a }}' ,
  725. 'emailMark' : '\a ({blue Colour{ \a }})' ,
  726. 'img' : '~A~@IncludeGraphic { \a }' , # eps only!
  727. 'imgAlignLeft' : '@LeftDisplay ' ,
  728. 'imgAlignRight' : '@RightDisplay ' ,
  729. 'imgAlignCenter' : '@CentredDisplay ' ,
  730. # lout tables are *way* complicated, no support for now
  731. #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
  732. #'tableClose' : '}' ,
  733. #'tableRowOpen' : '@Rowa\n' ,
  734. #'tableTitleRowOpen' : '@HeaderRowa' ,
  735. #'tableCenterAlign' : '@CentredDisplay ' ,
  736. #'tableCellOpen' : '\a {' , # A, B, ...
  737. #'tableCellClose' : '}' ,
  738. #'tableBorder' : '\nrule {yes}' ,
  739. 'comment' : '# \a' ,
  740. # @MakeContents must be on the config file
  741. 'TOC' : '@DP @ContentsGoesHere @DP',
  742. 'pageBreak' : '\n@NP\n' ,
  743. 'EOD' : '@End @Text'
  744. },
  745. 'moin': {
  746. 'title1' : '= \a =' ,
  747. 'title2' : '== \a ==' ,
  748. 'title3' : '=== \a ===' ,
  749. 'title4' : '==== \a ====' ,
  750. 'title5' : '===== \a =====',
  751. 'blockVerbOpen' : '{{{' ,
  752. 'blockVerbClose' : '}}}' ,
  753. 'blockQuoteLine' : ' ' ,
  754. 'fontMonoOpen' : '{{{' ,
  755. 'fontMonoClose' : '}}}' ,
  756. 'fontBoldOpen' : "'''" ,
  757. 'fontBoldClose' : "'''" ,
  758. 'fontItalicOpen' : "''" ,
  759. 'fontItalicClose' : "''" ,
  760. 'fontUnderlineOpen' : "__" ,
  761. 'fontUnderlineClose' : "__" ,
  762. 'listItemOpen' : ' * ' ,
  763. 'numlistItemOpen' : ' \a. ' ,
  764. 'bar1' : '----' ,
  765. 'url' : '[\a]' ,
  766. 'urlMark' : '[\a \a]' ,
  767. 'email' : '[\a]' ,
  768. 'emailMark' : '[\a \a]' ,
  769. 'img' : '[\a]' ,
  770. 'tableRowOpen' : '||' ,
  771. 'tableCellOpen' : '~A~' ,
  772. 'tableCellClose' : '||' ,
  773. 'tableTitleCellClose' : '||' ,
  774. 'tableCellAlignRight' : '<)>' ,
  775. 'tableCellAlignCenter': '<:>' ,
  776. 'comment' : '## \a' ,
  777. 'TOC' : '[[TableOfContents]]'
  778. },
  779. 'mgp': {
  780. 'paragraphOpen' : '%font "normal", size 5' ,
  781. 'title1' : '%page\n\n\a\n' ,
  782. 'title2' : '%page\n\n\a\n' ,
  783. 'title3' : '%page\n\n\a\n' ,
  784. 'title4' : '%page\n\n\a\n' ,
  785. 'title5' : '%page\n\n\a\n' ,
  786. 'blockVerbOpen' : '%font "mono"' ,
  787. 'blockVerbClose' : '%font "normal"' ,
  788. 'blockQuoteOpen' : '%prefix " "' ,
  789. 'blockQuoteClose' : '%prefix " "' ,
  790. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  791. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  792. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  793. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  794. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  795. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  796. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  797. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  798. 'listItemLine' : '\t' ,
  799. 'numlistItemLine' : '\t' ,
  800. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  801. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  802. 'bar1' : '%bar "white" 5' ,
  803. 'bar2' : '%pause' ,
  804. 'url' : '\n%cont, fore "cyan"\n\a' +\
  805. '\n%cont, fore "white"\n' ,
  806. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  807. '\n%cont, fore "white"\n' ,
  808. 'email' : '\n%cont, fore "cyan"\n\a' +\
  809. '\n%cont, fore "white"\n' ,
  810. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  811. '\n%cont, fore "white"\n' ,
  812. 'img' : '~A~\n%newimage "\a"\n%left\n',
  813. 'imgAlignLeft' : '\n%left' ,
  814. 'imgAlignRight' : '\n%right' ,
  815. 'imgAlignCenter' : '\n%center' ,
  816. 'comment' : '%% \a' ,
  817. 'pageBreak' : '%page\n\n\n' ,
  818. 'EOD' : '%%EOD'
  819. },
  820. # man groff_man ; man 7 groff
  821. 'man': {
  822. 'paragraphOpen' : '.P' ,
  823. 'title1' : '.SH \a' ,
  824. 'title2' : '.SS \a' ,
  825. 'title3' : '.SS \a' ,
  826. 'title4' : '.SS \a' ,
  827. 'title5' : '.SS \a' ,
  828. 'blockVerbOpen' : '.nf' ,
  829. 'blockVerbClose' : '.fi\n' ,
  830. 'blockQuoteOpen' : '.RS' ,
  831. 'blockQuoteClose' : '.RE' ,
  832. 'fontBoldOpen' : '\\fB' ,
  833. 'fontBoldClose' : '\\fR' ,
  834. 'fontItalicOpen' : '\\fI' ,
  835. 'fontItalicClose' : '\\fR' ,
  836. 'listOpen' : '.RS' ,
  837. 'listItemOpen' : '.IP \(bu 3\n',
  838. 'listClose' : '.RE' ,
  839. 'numlistOpen' : '.RS' ,
  840. 'numlistItemOpen' : '.IP \a. 3\n',
  841. 'numlistClose' : '.RE' ,
  842. 'deflistItem1Open' : '.TP\n' ,
  843. 'bar1' : '\n\n' ,
  844. 'url' : '\a' ,
  845. 'urlMark' : '\a (\a)',
  846. 'email' : '\a' ,
  847. 'emailMark' : '\a (\a)',
  848. 'img' : '\a' ,
  849. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  850. 'tableClose' : '.TE' ,
  851. 'tableRowOpen' : ' ' ,
  852. 'tableCellSep' : '^' ,
  853. 'tableAlignCenter' : 'center, ',
  854. 'tableBorder' : 'allbox, ',
  855. 'tableColAlignLeft' : 'l' ,
  856. 'tableColAlignRight' : 'r' ,
  857. 'tableColAlignCenter' : 'c' ,
  858. 'comment' : '.\\" \a'
  859. },
  860. 'pm6': {
  861. 'paragraphOpen' : '<@Normal:>' ,
  862. 'title1' : '\n<@Title1:>\a',
  863. 'title2' : '\n<@Title2:>\a',
  864. 'title3' : '\n<@Title3:>\a',
  865. 'title4' : '\n<@Title4:>\a',
  866. 'title5' : '\n<@Title5:>\a',
  867. 'blockVerbOpen' : '<@PreFormat:>' ,
  868. 'blockQuoteLine' : '<@Quote:>' ,
  869. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  870. 'fontMonoClose' : '<SIZE$><FONT$>',
  871. 'fontBoldOpen' : '<B>' ,
  872. 'fontBoldClose' : '<P>' ,
  873. 'fontItalicOpen' : '<I>' ,
  874. 'fontItalicClose' : '<P>' ,
  875. 'fontUnderlineOpen' : '<U>' ,
  876. 'fontUnderlineClose' : '<P>' ,
  877. 'listOpen' : '<@Bullet:>' ,
  878. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  879. 'numlistOpen' : '<@Bullet:>' ,
  880. 'numlistItemOpen' : '\x95\t' ,
  881. 'bar1' : '\a' ,
  882. 'url' : '<U>\a<P>' , # underline
  883. 'urlMark' : '\a <U>\a<P>' ,
  884. 'email' : '\a' ,
  885. 'emailMark' : '\a \a' ,
  886. 'img' : '\a'
  887. }
  888. }
  889. # exceptions for --css-sugar
  890. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  891. # change just HTML because XHTML inherits it
  892. htmltags = alltags['html']
  893. # table with no cellpadding
  894. htmltags['tableOpen'] = string.replace(
  895. htmltags['tableOpen'], ' CELLPADDING="4"', '')
  896. # DIVs
  897. htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
  898. htmltags['tocClose'] = '</DIV>'
  899. htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
  900. htmltags['bodyClose']= '</DIV>'
  901. # make the HTML -> XHTML inheritance
  902. xhtml = alltags['html'].copy()
  903. for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
  904. # some like HTML tags as lowercase, some don't... (headers out)
  905. if HTML_LOWER: alltags['html'] = xhtml.copy()
  906. xhtml.update(alltags['xhtml'])
  907. alltags['xhtml'] = xhtml.copy()
  908. # compose the target tags dictionary
  909. tags = {}
  910. target_tags = alltags[config['target']].copy()
  911. for key in keys: tags[key] = '' # create empty keys
  912. for key in target_tags.keys():
  913. tags[key] = maskEscapeChar(target_tags[key]) # populate
  914. # map strong line to separator if not defined
  915. if not tags['bar2'] and tags['bar1']:
  916. tags['bar2'] = tags['bar1']
  917. return tags
  918. ##############################################################################
  919. def getRules(config):
  920. "Returns all the target-specific syntax rules"
  921. ret = {}
  922. allrules = [
  923. # target rules (ON/OFF)
  924. 'linkable', # target supports external links
  925. 'tableable', # target supports tables
  926. 'imglinkable', # target supports images as links
  927. 'imgalignable', # target supports image alignment
  928. 'imgasdefterm', # target supports image as definition term
  929. 'autonumberlist', # target supports numbered lists natively
  930. 'autonumbertitle', # target supports numbered titles natively
  931. 'parainsidelist', # lists items supports paragraph
  932. 'spacedlistitem', # lists support blank lines between items
  933. 'listnotnested', # lists cannot be nested
  934. 'quotenotnested', # quotes cannot be nested
  935. 'verbblocknotescaped', # don't escape specials in verb block
  936. 'verbblockfinalescape', # do final escapes in verb block
  937. 'escapeurl', # escape special in link URL
  938. 'onelinepara', # dump paragraph as a single long line
  939. 'tabletitlerowinbold', # manually bold any cell on table titles
  940. 'tablecellstrip', # strip extra spaces from each table cell
  941. 'tablecellspannable', # the table cells can have span attribute
  942. 'barinsidequote', # bars are allowed inside quote blocks
  943. 'finalescapetitle', # perform final escapes on title lines
  944. 'autotocnewpagebefore', # break page before automatic TOC
  945. 'autotocnewpageafter', # break page after automatic TOC
  946. 'autotocwithbars', # automatic TOC surrounded by bars
  947. 'mapbar2pagebreak', # map the strong bar to a page break
  948. 'titleblocks', # titles must be on open/close section blocks
  949. # target code beautify (ON/OFF)
  950. 'indentverbblock', # add leading spaces to verb block lines
  951. 'breaktablecell', # break lines after any table cell
  952. 'breaktablelineopen', # break line after opening table line
  953. 'notbreaklistopen', # don't break line after opening a new list
  954. 'notbreakparaopen', # don't break line after opening a new para
  955. 'keepquoteindent', # don't remove the leading TABs on quotes
  956. 'keeplistindent', # don't remove the leading spaces on lists
  957. 'blankendmotherlist', # append a blank line at the mother list end
  958. 'blankendtable', # append a blank line at the table end
  959. 'blankendautotoc', # append a blank line at the auto TOC end
  960. 'tagnotindentable', # tags must be placed at the line begining
  961. # value settings
  962. 'listmaxdepth', # maximum depth for lists
  963. 'tablecellaligntype' # type of table cell align: cell, column
  964. ]
  965. rules_bank = {
  966. 'txt' : {
  967. 'indentverbblock':1,
  968. 'spacedlistitem':1,
  969. 'parainsidelist':1,
  970. 'keeplistindent':1,
  971. 'barinsidequote':1,
  972. 'autotocwithbars':1,
  973. 'blankendmotherlist':1
  974. },
  975. 'html': {
  976. 'indentverbblock':1,
  977. 'linkable':1,
  978. 'escapeurl':1,
  979. 'imglinkable':1,
  980. 'imgalignable':1,
  981. 'imgasdefterm':1,
  982. 'autonumberlist':1,
  983. 'spacedlistitem':1,
  984. 'parainsidelist':1,
  985. 'blankendmotherlist':1,
  986. 'tableable':1,
  987. 'tablecellstrip':1,
  988. 'blankendtable':1,
  989. 'breaktablecell':1,
  990. 'breaktablelineopen':1,
  991. 'keeplistindent':1,
  992. 'keepquoteindent':1,
  993. 'barinsidequote':1,
  994. 'autotocwithbars':1,
  995. 'tablecellspannable':1,
  996. 'tablecellaligntype':'cell'
  997. },
  998. #TIP xhtml inherits all HTML rules
  999. 'xhtml': {
  1000. },
  1001. 'sgml': {
  1002. 'linkable':1,
  1003. 'escapeurl':1,
  1004. 'autonumberlist':1,
  1005. 'spacedlistitem':1,
  1006. 'blankendmotherlist':1,
  1007. 'tableable':1,
  1008. 'tablecellstrip':1,
  1009. 'blankendtable':1,
  1010. 'blankendautotoc':1,
  1011. 'quotenotnested':1,
  1012. 'keeplistindent':1,
  1013. 'keepquoteindent':1,
  1014. 'barinsidequote':1,
  1015. 'finalescapetitle':1,
  1016. 'tablecellaligntype':'column'
  1017. },
  1018. 'mgp' : {
  1019. 'blankendmotherlist':1,
  1020. 'tagnotindentable':1,
  1021. 'spacedlistitem':1,
  1022. 'imgalignable':1,
  1023. 'autotocnewpagebefore':1,
  1024. },
  1025. 'tex' : {
  1026. 'autonumberlist':1,
  1027. 'autonumbertitle':1,
  1028. 'spacedlistitem':1,
  1029. 'blankendmotherlist':1,
  1030. 'tableable':1,
  1031. 'tablecellstrip':1,
  1032. 'tabletitlerowinbold':1,
  1033. 'blankendtable':1,
  1034. 'verbblocknotescaped':1,
  1035. 'keeplistindent':1,
  1036. 'listmaxdepth':4,
  1037. 'barinsidequote':1,
  1038. 'finalescapetitle':1,
  1039. 'autotocnewpageafter':1,
  1040. 'mapbar2pagebreak':1,
  1041. 'tablecellaligntype':'column'
  1042. },
  1043. 'lout': {
  1044. 'keepquoteindent':1,
  1045. 'escapeurl':1,
  1046. 'verbblocknotescaped':1,
  1047. 'tableable':0,
  1048. 'imgalignable':1,
  1049. 'mapbar2pagebreak':1,
  1050. 'titleblocks':1,
  1051. 'notbreakparaopen':1
  1052. },
  1053. 'moin': {
  1054. 'spacedlistitem':1,
  1055. 'linkable':1,
  1056. 'blankendmotherlist':1,
  1057. 'keeplistindent':1,
  1058. 'tableable':1,
  1059. 'barinsidequote':1,
  1060. 'blankendtable':1,
  1061. 'tabletitlerowinbold':1,
  1062. 'tablecellstrip':1,
  1063. 'autotocwithbars':1,
  1064. 'tablecellaligntype':'cell'
  1065. },
  1066. 'man' : {
  1067. 'spacedlistitem':1,
  1068. 'indentverbblock':1,
  1069. 'blankendmotherlist':1,
  1070. 'tagnotindentable':1,
  1071. 'tableable':1,
  1072. 'tablecellaligntype':'column',
  1073. 'tabletitlerowinbold':1,
  1074. 'tablecellstrip':1,
  1075. 'blankendtable':1,
  1076. 'keeplistindent':0,
  1077. 'barinsidequote':1,
  1078. 'parainsidelist':0,
  1079. },
  1080. 'pm6' : {
  1081. 'keeplistindent':1,
  1082. 'verbblockfinalescape':1,
  1083. #TODO add support for these - maybe set a JOINNEXT char and
  1084. # do it on addLineBreaks()
  1085. 'notbreaklistopen':1,
  1086. 'notbreakparaopen':1,
  1087. 'barinsidequote':1,
  1088. 'autotocwithbars':1,
  1089. 'onelinepara':1,
  1090. }
  1091. }
  1092. # exceptions for --css-sugar
  1093. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1094. rules_bank['html']['indentverbblock'] = 0
  1095. rules_bank['html']['autotocwithbars'] = 0
  1096. # get the target specific rules
  1097. if config['target'] == 'xhtml':
  1098. myrules = rules_bank['html'].copy() # inheritance
  1099. myrules.update(rules_bank['xhtml']) # get XHTML specific
  1100. else:
  1101. myrules = rules_bank[config['target']].copy()
  1102. # populate return dictionary
  1103. for key in allrules: ret[key] = 0 # reset all
  1104. ret.update(myrules) # get rules
  1105. return ret
  1106. ##############################################################################
  1107. def getRegexes():
  1108. "Returns all the regexes used to find the t2t marks"
  1109. bank = {
  1110. 'blockVerbOpen':
  1111. re.compile(r'^```\s*$'),
  1112. 'blockVerbClose':
  1113. re.compile(r'^```\s*$'),
  1114. 'blockRawOpen':
  1115. re.compile(r'^"""\s*$'),
  1116. 'blockRawClose':
  1117. re.compile(r'^"""\s*$'),
  1118. 'quote':
  1119. re.compile(r'^\t+'),
  1120. '1lineVerb':
  1121. re.compile(r'^``` (?=.)'),
  1122. '1lineRaw':
  1123. re.compile(r'^""" (?=.)'),
  1124. # mono, raw, bold, italic, underline:
  1125. # - marks must be glued with the contents, no boundary spaces
  1126. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  1127. 'fontMono':
  1128. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  1129. 'raw':
  1130. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  1131. 'fontBold':
  1132. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  1133. 'fontItalic':
  1134. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  1135. 'fontUnderline':
  1136. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  1137. 'list':
  1138. re.compile(r'^( *)(-) (?=[^ ])'),
  1139. 'numlist':
  1140. re.compile(r'^( *)(\+) (?=[^ ])'),
  1141. 'deflist':
  1142. re.compile(r'^( *)(:) (.*)$'),
  1143. 'listclose':
  1144. re.compile(r'^( *)([-+:])\s*$'),
  1145. 'bar':
  1146. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  1147. 'table':
  1148. re.compile(r'^ *\|\|? '),
  1149. 'blankline':
  1150. re.compile(r'^\s*$'),
  1151. 'comment':
  1152. re.compile(r'^%'),
  1153. # auxiliar tag regexes
  1154. '_imgAlign' : re.compile(r'~A~', re.I),
  1155. '_tableAlign' : re.compile(r'~A~', re.I),
  1156. '_anchor' : re.compile(r'~A~', re.I),
  1157. '_tableBorder' : re.compile(r'~B~', re.I),
  1158. '_tableColAlign' : re.compile(r'~C~', re.I),
  1159. '_tableCellColSpan': re.compile(r'~S~', re.I),
  1160. '_tableCellAlign' : re.compile(r'~A~', re.I),
  1161. }
  1162. # special char to place data on TAGs contents (\a == bell)
  1163. bank['x'] = re.compile('\a')
  1164. # %%macroname [ (formatting) ]
  1165. bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
  1166. string.join(MACROS.keys(), '|')), re.I)
  1167. # %%TOC special macro for TOC positioning
  1168. bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
  1169. # almost complicated title regexes ;)
  1170. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
  1171. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  1172. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  1173. ### complicated regexes begin here ;)
  1174. #
  1175. # textual descriptions on --help's style: [...] is optional, | is OR
  1176. ### first, some auxiliar variables
  1177. #
  1178. # [image.EXT]
  1179. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1180. # link things
  1181. urlskel = {
  1182. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1183. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1184. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1185. 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
  1186. 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
  1187. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1188. 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
  1189. 'punct' : r'.,;:!?'
  1190. }
  1191. # username [ :password ] @
  1192. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1193. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1194. # [ #anchor | ?form=data ]
  1195. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  1196. urlskel['proto'],patt_url_login, urlskel['guess'],
  1197. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1198. # filename | [ filename ] #anchor
  1199. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  1200. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1201. # user@domain [ ?form=data ]
  1202. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1203. urlskel['login'],urlskel['form'])
  1204. # saving for future use
  1205. bank['_urlskel'] = urlskel
  1206. ### and now the real regexes
  1207. #
  1208. bank['email'] = re.compile(patt_email,re.I)
  1209. # email | url
  1210. bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1211. # \[ label | imagetag url | email | filename \]
  1212. bank['linkmark'] = re.compile(
  1213. r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1214. patt_img, retxt_url, patt_email, retxt_url_local),
  1215. re.L+re.I)
  1216. # image
  1217. bank['img'] = re.compile(patt_img, re.L+re.I)
  1218. # special things
  1219. bank['special'] = re.compile(r'^%!\s*')
  1220. return bank
  1221. ### END OF regex nightmares
  1222. ##############################################################################
  1223. class error(Exception):
  1224. pass
  1225. def echo(msg): # for quick debug
  1226. print '\033[32;1m%s\033[m'%msg
  1227. def Quit(msg=''):
  1228. if msg: print msg
  1229. sys.exit(0)
  1230. def Error(msg):
  1231. msg = _("%s: Error: ")%my_name + msg
  1232. raise error, msg
  1233. def getTraceback():
  1234. try:
  1235. from traceback import format_exception
  1236. etype, value, tb = sys.exc_info()
  1237. return string.join(format_exception(etype, value, tb), '')
  1238. except: pass
  1239. def getUnknownErrorMessage():
  1240. msg = '%s\n%s (%s):\n\n%s'%(
  1241. _('Sorry! Txt2tags aborted by an unknow error.'),
  1242. _('Please send the following Error Traceback to the author'),
  1243. my_email, getTraceback())
  1244. return msg
  1245. def Message(msg,level):
  1246. if level <= VERBOSE and not QUIET:
  1247. prefix = '-'*5
  1248. print "%s %s"%(prefix*level, msg)
  1249. def Debug(msg,id=0,linenr=None):
  1250. "Show debug messages, categorized (colored or not)"
  1251. if QUIET or not DEBUG: return
  1252. if int(id) not in range(8): id = 0
  1253. # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
  1254. ids = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
  1255. colors_bgdark = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
  1256. colors_bglight = ['0' ,'1' ,'3' ,'6' ,'4' ,'5' ,'2' ,'0' ]
  1257. if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
  1258. if COLOR_DEBUG:
  1259. if BG_LIGHT: color = colors_bglight[id]
  1260. else : color = colors_bgdark[id]
  1261. msg = '\033[3%sm%s\033[m'%(color,msg)
  1262. print "++ %s: %s"%(ids[id],msg)
  1263. def Readfile(file, remove_linebreaks=0, ignore_error=0):
  1264. data = []
  1265. if file == '-':
  1266. try: data = sys.stdin.readlines()
  1267. except:
  1268. if not ignore_error:
  1269. Error(_('You must feed me with data on STDIN!'))
  1270. else:
  1271. try: f = open(file); data = f.readlines() ; f.close()
  1272. except:
  1273. if not ignore_error:
  1274. Error(_("Cannot read file:")+" %s"%file)
  1275. if remove_linebreaks:
  1276. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  1277. Message(_("Readed file (%d lines): %s")%(len(data),file),2)
  1278. return data
  1279. def Savefile(file, contents):
  1280. try: f = open(file, 'wb')
  1281. except: Error(_("Cannot open file for writing:")+" %s"%file)
  1282. if type(contents) == type([]): doit = f.writelines
  1283. else: doit = f.write
  1284. doit(contents) ; f.close()
  1285. def showdic(dic):
  1286. for k in dic.keys(): print "%15s : %s" % (k,dic[k])
  1287. def dotted_spaces(txt=''):
  1288. return string.replace(txt,' ','.')
  1289. # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
  1290. def get_rc_path():
  1291. "Return the full path for the users' RC file"
  1292. # try to get the path from an env var. if yes, we're done
  1293. user_defined = os.environ.get('T2TCONFIG')
  1294. if user_defined: return user_defined
  1295. # env var not found, so perform automatic path composing
  1296. # set default filename according system platform
  1297. rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
  1298. rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
  1299. # the file must be on the user directory, but where is this dir?
  1300. rc_dir_search = ['HOME', 'HOMEPATH']
  1301. for var in rc_dir_search:
  1302. rc_dir = os.environ.get(var)
  1303. if rc_dir: break
  1304. # rc dir found, now we must join dir+file to compose the full path
  1305. if rc_dir:
  1306. # compose path and return it if the file exists
  1307. rc_path = os.path.join(rc_dir, rc_file)
  1308. # on windows, prefix with the drive (%homedrive%: 2k/XP/NT)
  1309. if sys.platform[:3] == 'win':
  1310. rc_drive = os.environ.get('HOMEDRIVE')
  1311. rc_path = os.path.join(rc_drive,rc_path)
  1312. return rc_path
  1313. # sorry, not found
  1314. return ''
  1315. ##############################################################################
  1316. class CommandLine:
  1317. """
  1318. Command Line class - Masters command line
  1319. This class checks and extract data from the provided command line.
  1320. The --long options and flags are taken from the global OPTIONS,
  1321. FLAGS and ACTIONS dictionaries. The short options are registered
  1322. here, and also their equivalence to the long ones.
  1323. METHODS:
  1324. _compose_short_opts() -> str
  1325. _compose_long_opts() -> list
  1326. Compose the valid short and long options list, on the
  1327. 'getopt' format.
  1328. parse() -> (opts, args)
  1329. Call getopt to check and parse the command line.
  1330. It expects to receive the command line as a list, and
  1331. without the program name (sys.argv[1:]).
  1332. get_raw_config() -> [RAW config]
  1333. Scans command line and convert the data to the RAW config
  1334. format. See ConfigMaster class to the RAW format description.
  1335. Optional 'ignore' and 'filter' arguments are used to filter
  1336. in or out specified keys.
  1337. compose_cmdline(dict) -> [Command line]
  1338. Compose a command line list from an already parsed config
  1339. dictionary, generated from RAW by ConfigMaster(). Use
  1340. this to compose an optimal command line for a group of
  1341. options.
  1342. The get_raw_config() calls parse(), so the tipical use of this
  1343. class is:
  1344. raw = CommandLine().get_raw_config(sys.argv[1:])
  1345. """
  1346. def __init__(self):
  1347. self.all_options = OPTIONS.keys()
  1348. self.all_flags = FLAGS.keys()
  1349. self.all_actions = ACTIONS.keys()
  1350. # short:long options equivalence
  1351. self.short_long = {
  1352. 'h':'help' , 'V':'version',
  1353. 'n':'enum-title', 'i':'infile' ,
  1354. 'H':'no-headers', 'o':'outfile',
  1355. 'v':'verbose' , 't':'target' ,
  1356. 'q':'quiet' , 'C':'config-file'
  1357. }
  1358. # compose valid short and long options data for getopt
  1359. self.short_opts = self._compose_short_opts()
  1360. self.long_opts = self._compose_long_opts()
  1361. def _compose_short_opts(self):
  1362. "Returns a string like 'hVt:o' with all short options/flags"
  1363. ret = []
  1364. for opt in self.short_long.keys():
  1365. long = self.short_long[opt]
  1366. if long in self.all_options: # is flag or option?
  1367. opt = opt+':' # option: have param
  1368. ret.append(opt)
  1369. #Debug('Valid SHORT options: %s'%ret)
  1370. return string.join(ret, '')
  1371. def _compose_long_opts(self):
  1372. "Returns a list with all the valid long options/flags"
  1373. ret = map(lambda x:x+'=', self.all_options) # add =
  1374. ret.extend(self.all_flags) # flag ON
  1375. ret.extend(self.all_actions) # acts
  1376. ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
  1377. ret.extend(['no-style','no-encoding']) # turn OFF
  1378. ret.extend(['no-outfile','no-infile']) # turn OFF
  1379. ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF
  1380. #Debug('Valid LONG options: %s'%ret)
  1381. return ret
  1382. def _tokenize(self, cmd_string=''):
  1383. "Convert a command line string to a list"
  1384. #TODO protect quotes contents
  1385. return string.split(cmd_string)
  1386. def parse(self, cmdline=[]):
  1387. "Check/Parse a command line list TIP: no program name!"
  1388. # get the valid options
  1389. short, long = self.short_opts, self.long_opts
  1390. # parse it!
  1391. try:
  1392. opts, args = getopt.getopt(cmdline, short, long)
  1393. except getopt.error, errmsg:
  1394. Error(_("%s (try --help)")%errmsg)
  1395. return (opts, args)
  1396. def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
  1397. "Returns the options/arguments found as RAW config"
  1398. if not cmdline: return []
  1399. ret = []
  1400. # we need lists, not strings
  1401. if type(cmdline) == type(''): cmdline = self._tokenize(cmdline)
  1402. opts, args = self.parse(cmdline[:])
  1403. # parse all options
  1404. for name,value in opts:
  1405. # remove leading - and --
  1406. name = re.sub('^--?', '', name)
  1407. # alias to old mispelled 'suGGar'
  1408. if name == 'css-suggar': name = 'css-sugar'
  1409. elif name == 'no-css-suggar': name = 'no-css-sugar'
  1410. # translate short opt to long
  1411. if len(name) == 1: name = self.short_long.get(name)
  1412. # outfile exception: path relative to PWD
  1413. if name == 'outfile' and relative \
  1414. and value not in [STDOUT, MODULEOUT]:
  1415. value = os.path.abspath(value)
  1416. # config-file inclusion, path relative to PWD
  1417. if name == 'config-file':
  1418. configs = ConfigLines().include_config_file(
  1419. value)
  1420. # remove the 'target' item of all configs
  1421. configs = map(lambda c: [c[1],c[2]], configs)
  1422. ret.extend(configs)
  1423. continue
  1424. # save it
  1425. ret.append([name, value])
  1426. # get infile, if any
  1427. while args:
  1428. infile = args.pop(0)
  1429. ret.append(['infile', infile])
  1430. # apply 'ignore' and 'filter' rules (filter is stronger)
  1431. temp = ret[:] ; ret = []
  1432. for name,value in temp:
  1433. if (not filter and not ignore) or \
  1434. (filter and name in filter) or \
  1435. (ignore and name not in ignore):
  1436. ret.append( ['all', name, value] )
  1437. # add the original command line string as 'realcmdline'
  1438. ret.append( ['all', 'realcmdline', cmdline] )
  1439. return ret
  1440. def compose_cmdline(self, conf={}, no_check=0):
  1441. "compose a full (and diet) command line from CONF dict"
  1442. if not conf: return []
  1443. args = []
  1444. dft_options = OPTIONS.copy()
  1445. cfg = conf.copy()
  1446. valid_opts = self.all_options + self.all_flags
  1447. use_short = {'no-headers':'H', 'enum-title':'n'}
  1448. # remove useless options
  1449. if not no_check and cfg.get('toc-only'):
  1450. if cfg.has_key('no-headers'):
  1451. del cfg['no-headers']
  1452. if cfg.has_key('outfile'):
  1453. del cfg['outfile'] # defaults to STDOUT
  1454. if cfg.get('target') == 'txt':
  1455. del cfg['target'] # already default
  1456. args.append('--toc-only') # must be the first
  1457. del cfg['toc-only']
  1458. # add target type
  1459. if cfg.has_key('target'):
  1460. args.append('-t '+cfg['target'])
  1461. del cfg['target']
  1462. # add other options
  1463. for key in cfg.keys():
  1464. if key not in valid_opts: continue # may be a %!setting
  1465. if key in ['outfile','infile']: continue # later
  1466. val = cfg[key]
  1467. if not val: continue
  1468. # default values are useless on cmdline
  1469. if val == dft_options.get(key): continue
  1470. # -short format
  1471. if key in use_short.keys():
  1472. args.append('-'+use_short[key])
  1473. continue
  1474. # --long format
  1475. if key in self.all_flags: # add --option
  1476. args.append('--'+key)
  1477. else: # add --option=value
  1478. args.append('--%s=%s'%(key,val))
  1479. # the outfile using -o
  1480. if cfg.has_key('outfile') and \
  1481. cfg['outfile'] != dft_options.get('outfile'):
  1482. args.append('-o '+cfg['outfile'])
  1483. # place input file(s) always at the end
  1484. if cfg.has_key('infile'):
  1485. args.append(string.join(cfg['infile'],' '))
  1486. # return as a nice list
  1487. Debug("Diet command line: %s"%string.join(args,' '), 1)
  1488. return args
  1489. ##############################################################################
  1490. class SourceDocument:
  1491. """
  1492. SourceDocument class - scan document structure, extract data
  1493. It knows about full files. It reads a file and identify all
  1494. the areas begining (Head,Conf,Body). With this info it can
  1495. extract each area contents.
  1496. Note: the original line break is removed.
  1497. DATA:
  1498. self.arearef - Save Head, Conf, Body init line number
  1499. self.areas - Store the area names which are not empty
  1500. self.buffer - The full file contents (with NO \\r, \\n)
  1501. METHODS:
  1502. get() - Access the contents of an Area. Example:
  1503. config = SourceDocument(file).get('conf')
  1504. split() - Get all the document Areas at once. Example:
  1505. head, conf, body = SourceDocument(file).split()
  1506. RULES:
  1507. * The document parts are sequential: Head, Conf and Body.
  1508. * One ends when the next begins.
  1509. * The Conf Area is optional, so a document can have just
  1510. Head and Body Areas.
  1511. These are the Areas limits:
  1512. - Head Area: the first three lines
  1513. - Body Area: from the first valid text line to the end
  1514. - Conf Area: the comments between Head and Body Areas
  1515. Exception: If the first line is blank, this means no
  1516. header info, so the Head Area is just the first line.
  1517. """
  1518. def __init__(self, filename='', contents=[]):
  1519. self.areas = ['head','conf','body']
  1520. self.arearef = []
  1521. self.areas_fancy = ''
  1522. self.filename = filename
  1523. self.buffer = []
  1524. if filename:
  1525. self.scan_file(filename)
  1526. elif contents:
  1527. self.scan(contents)
  1528. def split(self):
  1529. "Returns all document parts, splitted into lists."
  1530. return self.get('head'), self.get('conf'), self.get('body')
  1531. def get(self, areaname):
  1532. "Returns head|conf|body contents from self.buffer"
  1533. # sanity
  1534. if areaname not in self.areas: return []
  1535. if not self.buffer : return []
  1536. # go get it
  1537. bufini = 1
  1538. bufend = len(self.buffer)
  1539. if areaname == 'head':
  1540. ini = bufini
  1541. end = self.arearef[1] or self.arearef[2] or bufend
  1542. elif areaname == 'conf':
  1543. ini = self.arearef[1]
  1544. end = self.arearef[2] or bufend
  1545. elif areaname == 'body':
  1546. ini = self.arearef[2]
  1547. end = bufend
  1548. else:
  1549. Error("Unknown Area name '%s'"%areaname)
  1550. lines = self.buffer[ini:end]
  1551. # make sure head will always have 3 lines
  1552. while areaname == 'head' and len(lines) < 3:
  1553. lines.append('')
  1554. return lines
  1555. def scan_file(self, filename):
  1556. Debug("source file: %s"%filename)
  1557. Message(_("Loading source document"),1)
  1558. buf = Readfile(filename, remove_linebreaks=1)
  1559. self.scan(buf)
  1560. def scan(self, lines):
  1561. "Run through source file and identify head/conf/body areas"
  1562. buf = lines
  1563. if len(buf) == 0:
  1564. Error(_('The input file is empty: %s')%self.filename)
  1565. cfg_parser = ConfigLines().parse_line
  1566. buf.insert(0, '') # text start at pos 1
  1567. ref = [1,4,0]
  1568. if not string.strip(buf[1]): # no header
  1569. ref[0] = 0 ; ref[1] = 2
  1570. rgx = getRegexes()
  1571. for i in range(ref[1],len(buf)): # find body init:
  1572. if string.strip(buf[i]) and ( # ... not blank and
  1573. buf[i][0] != '%' or # ... not comment or
  1574. rgx['macros'].match(buf[i]) or # ... %%macro
  1575. rgx['toc'].match(buf[i]) or # ... %%toc
  1576. cfg_parser(buf[i],'include')[1]): # ... %!include
  1577. ref[2] = i ; break
  1578. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  1579. for i in 0,1,2: # del !existent
  1580. if ref[i] >= len(buf): ref[i] = 0 # title-only
  1581. if not ref[i]: self.areas[i] = ''
  1582. Debug('Head,Conf,Body start line: %s'%ref)
  1583. self.arearef = ref # save results
  1584. self.buffer = buf
  1585. # fancyness sample: head conf body (1 4 8)
  1586. self.areas_fancy = "%s (%s)"%(
  1587. string.join(self.areas),
  1588. string.join(map(str, map(lambda x:x or '', ref))))
  1589. Message(_("Areas found: %s")%self.areas_fancy, 2)
  1590. def get_raw_config(self):
  1591. "Handy method to get the CONF area RAW config (if any)"
  1592. if not self.areas.count('conf'): return []
  1593. Message(_("Scanning source document CONF area"),1)
  1594. raw = ConfigLines(
  1595. file=self.filename, lines=self.get('conf'),
  1596. first_line=self.arearef[1]).get_raw_config()
  1597. Debug("document raw config: %s"%raw, 1)
  1598. return raw
  1599. ##############################################################################
  1600. class ConfigMaster:
  1601. """
  1602. ConfigMaster class - the configuration wizard
  1603. This class is the configuration master. It knows how to handle
  1604. the RAW and PARSED config format. It also performs the sanity
  1605. checkings for a given configuration.
  1606. DATA:
  1607. self.raw - Stores the config on the RAW format
  1608. self.parsed - Stores the config on the PARSED format
  1609. self.defaults - Stores the default values for all keys
  1610. self.off - Stores the OFF values for all keys
  1611. self.multi - List of keys which can have multiple values
  1612. self.numeric - List of keys which value must be a number
  1613. self.incremental - List of keys which are incremental
  1614. RAW FORMAT:
  1615. The RAW format is a list of lists, being each mother list item
  1616. a full configuration entry. Any entry is a 3 item list, on
  1617. the following format: [ TARGET, KEY, VALUE ]
  1618. Being a list, the order is preserved, so it's easy to use
  1619. different kinds of configs, as CONF area and command line,
  1620. respecting the precedence.
  1621. The special target 'all' is used when no specific target was
  1622. defined on the original config.
  1623. PARSED FORMAT:
  1624. The PARSED format is a dictionary, with all the 'key : value'
  1625. found by reading the RAW config. The self.target contents
  1626. matters, so this dictionary only contains the target's
  1627. config. The configs of other targets are ignored.
  1628. The CommandLine and ConfigLines classes have the get_raw_config()
  1629. method which convert the configuration found to the RAW format.
  1630. Just feed it to parse() and get a brand-new ready-to-use config
  1631. dictionary. Example:
  1632. >>> raw = CommandLine().get_raw_config(['-n', '-H'])
  1633. >>> print raw
  1634. [['all', 'enum-title', ''], ['all', 'no-headers', '']]
  1635. >>> parsed = ConfigMaster(raw).parse()
  1636. >>> print parsed
  1637. {'enum-title': 1, 'headers': 0}
  1638. """
  1639. def __init__(self, raw=[], target=''):
  1640. self.raw = raw
  1641. self.target = target
  1642. self.parsed = {}
  1643. self.dft_options = OPTIONS.copy()
  1644. self.dft_flags = FLAGS.copy()
  1645. self.dft_actions = ACTIONS.copy()
  1646. self.dft_settings = SETTINGS.copy()
  1647. self.defaults = self._get_defaults()
  1648. self.off = self._get_off()
  1649. self.multi = ['infile', 'options','preproc','postproc']
  1650. self.incremental = ['verbose']
  1651. self.numeric = ['toc-level','split']
  1652. def _get_defaults(self):
  1653. "Get the default values for all config/options/flags"
  1654. empty = {}
  1655. for kw in CONFIG_KEYWORDS: empty[kw] = ''
  1656. empty.update(self.dft_options)
  1657. empty.update(self.dft_flags)
  1658. empty.update(self.dft_actions)
  1659. empty.update(self.dft_settings)
  1660. empty['realcmdline'] = '' # internal use only
  1661. empty['sourcefile'] = '' # internal use only
  1662. return empty
  1663. def _get_off(self):
  1664. "Turns OFF all the config/options/flags"
  1665. off = {}
  1666. for key in self.defaults.keys():
  1667. kind = type(self.defaults[key])
  1668. if kind == type(9):
  1669. off[key] = 0
  1670. elif kind == type(''):
  1671. off[key] = ''
  1672. elif kind == type([]):
  1673. off[key] = []
  1674. else:
  1675. Error('ConfigMaster: %s: Unknown type'+key)
  1676. return off
  1677. def _check_target(self):
  1678. "Checks if the target is already defined. If not, do it"
  1679. if not self.target:
  1680. self.target = self.find_value('target')
  1681. def get_target_raw(self):
  1682. "Returns the raw config for self.target or 'all'"
  1683. ret = []
  1684. self._check_target()
  1685. for entry in self.raw:
  1686. if entry[0] in [self.target, 'all']:
  1687. ret.append(entry)
  1688. return ret
  1689. def add(self, key, val):
  1690. "Adds the key:value pair to the config dictionary (if needed)"
  1691. # %!options
  1692. if key == 'options':
  1693. ignoreme = self.dft_actions.keys() + ['target']
  1694. ignoreme.remove('dump-config')
  1695. ignoreme.remove('dump-source')
  1696. raw_opts = CommandLine().get_raw_config(
  1697. val, ignore=ignoreme)
  1698. for target, key, val in raw_opts:
  1699. self.add(key, val)
  1700. return
  1701. # the no- prefix turns OFF this key
  1702. if key[:3] == 'no-':
  1703. key = key[3:] # remove prefix
  1704. val = self.off.get(key) # turn key OFF
  1705. # is this key valid?
  1706. if key not in self.defaults.keys():
  1707. Debug('Bogus Config %s:%s'%(key,val),1)
  1708. return
  1709. # is this value the default one?
  1710. if val == self.defaults.get(key):
  1711. # if default value, remove previous key:val
  1712. if self.parsed.has_key(key):
  1713. del self.parsed[key]
  1714. # nothing more to do
  1715. return
  1716. # flags ON comes empty. we'll add the 1 value now
  1717. if val == '' and \
  1718. key in self.dft_flags.keys()+self.dft_actions.keys():
  1719. val = 1
  1720. # multi value or single?
  1721. if key in self.multi:
  1722. # first one? start new list
  1723. if not self.parsed.has_key(key):
  1724. self.parsed[key] = []
  1725. self.parsed[key].append(val)
  1726. # incremental value? so let's add it
  1727. elif key in self.incremental:
  1728. self.parsed[key] = (self.parsed.get(key) or 0) + val
  1729. else:
  1730. self.parsed[key] = val
  1731. fancykey = dotted_spaces("%12s"%key)
  1732. Message(_("Added config %s : %s")%(fancykey,val),3)
  1733. def get_outfile_name(self, config={}):
  1734. "Dirname is the same for {in,out}file"
  1735. infile, outfile = config['sourcefile'], config['outfile']
  1736. if outfile and outfile not in [STDOUT, MODULEOUT] \
  1737. and not os.path.isabs(outfile):
  1738. outfile = os.path.join(os.path.dirname(infile), outfile)
  1739. if infile == STDIN and not outfile: outfile = STDOUT
  1740. if infile == MODULEIN and not outfile: outfile = MODULEOUT
  1741. if not outfile and (infile and config.get('target')):
  1742. basename = re.sub('\.(txt|t2t)$','',infile)
  1743. outfile = "%s.%s"%(basename, config['target'])
  1744. Debug(" infile: '%s'"%infile , 1)
  1745. Debug("outfile: '%s'"%outfile, 1)
  1746. return outfile
  1747. def sanity(self, config, gui=0):
  1748. "Basic config sanity checkings"
  1749. if not config: return {}
  1750. target = config.get('target')
  1751. # some actions don't require target specification
  1752. if not target:
  1753. for action in NO_TARGET:
  1754. if config.get(action):
  1755. target = 'txt'
  1756. break
  1757. # on GUI, some checkings are skipped
  1758. if not gui:
  1759. # we *need* a target
  1760. if not target:
  1761. Error(_('No target specified (try --help)')+\
  1762. '\n\n'+\
  1763. _('Maybe trying to convert an old v1.x file?'))
  1764. # and of course, an infile also
  1765. if not config.get('infile'):
  1766. Error(_('Missing input file (try --help)'))
  1767. # is the target valid?
  1768. if not TARGETS.count(target):
  1769. Error(_("Invalid target '%s' (try --help)")%\
  1770. target)
  1771. # ensure all keys are present
  1772. empty = self.defaults.copy() ; empty.update(config)
  1773. config = empty.copy()
  1774. # check integers options
  1775. for key in config.keys():
  1776. if key in self.numeric:
  1777. try: config[key] = int(config[key])
  1778. except: Error(_('--%s value must be a number'
  1779. )%key)
  1780. # check split level value
  1781. if config['split'] not in [0,1,2]:
  1782. Error(_('Option --split must be 0, 1 or 2'))
  1783. # --toc-only is stronger than others
  1784. if config['toc-only']:
  1785. config['headers'] = 0
  1786. config['toc'] = 0
  1787. config['split'] = 0
  1788. config['gui'] = 0
  1789. config['outfile'] = config['outfile'] or STDOUT
  1790. # splitting is disable for now (future: HTML only, no STDOUT)
  1791. config['split'] = 0
  1792. # restore target
  1793. config['target'] = target
  1794. # set output file name
  1795. config['outfile'] = self.get_outfile_name(config)
  1796. # checking suicide
  1797. if config['sourcefile'] == config['outfile'] and \
  1798. config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
  1799. Error(_("Input and Output files are the same: %s")%(
  1800. config['outfile']))
  1801. return config
  1802. def parse(self):
  1803. "Returns the parsed config for the current target"
  1804. raw = self.get_target_raw()
  1805. for target, key, value in raw:
  1806. self.add(key, value)
  1807. Message(_("Added the following keys: %s")%string.join(
  1808. self.parsed.keys(),', '),2)
  1809. return self.parsed.copy()
  1810. def find_value(self, key='', target=''):
  1811. "Scans ALL raw config to find the desired key"
  1812. ret = []
  1813. # scan and save all values found
  1814. for targ, k, val in self.raw:
  1815. if targ in [target, 'all'] and k == key:
  1816. ret.append(val)
  1817. if not ret: return ''
  1818. # if not multi value, return only the last found
  1819. if key in self.multi: return ret
  1820. else : return ret[-1]
  1821. ########################################################################
  1822. class ConfigLines:
  1823. """
  1824. ConfigLines class - the config file data extractor
  1825. This class reads and parse the config lines on the %!key:val
  1826. format, converting it to RAW config. It deals with user
  1827. config file (RC file), source document CONF area and
  1828. %!includeconf directives.
  1829. Call it passing a file name or feed the desired config lines.
  1830. Then just call the get_raw_config() method and wait to
  1831. receive the full config data on the RAW format. This method
  1832. also follows the possible %!includeconf directives found on
  1833. the config lines. Example:
  1834. raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
  1835. The parse_line() method is also useful to be used alone,
  1836. to identify and tokenize a single config line. For example,
  1837. to get the %!include command components, on the source
  1838. document BODY:
  1839. target, key, value = ConfigLines().parse_line(body_line)
  1840. """
  1841. def __init__(self, file='', lines=[], first_line=1):
  1842. self.file = file or 'NOFILE'
  1843. self.lines = lines
  1844. self.first_line = first_line
  1845. def load_lines(self):
  1846. "Make sure we've loaded the file contents into buffer"
  1847. if not self.lines and not self.file:
  1848. Error("ConfigLines: No file or lines provided")
  1849. if not self.lines:
  1850. self.lines = self.read_config_file(self.file)
  1851. def read_config_file(self, filename=''):
  1852. "Read a Config File contents, aborting on invalid line"
  1853. if not filename: return []
  1854. errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
  1855. lines = Readfile(filename, remove_linebreaks=1)
  1856. # sanity: try to find invalid config lines
  1857. for i in range(len(lines)):
  1858. line = string.rstrip(lines[i])
  1859. if not line: continue # empty
  1860. if line[0] != '%': Error(errormsg%(filename,i+1,line))
  1861. return lines
  1862. def include_config_file(self, file=''):
  1863. "Perform the %!includeconf action, returning RAW config"
  1864. if not file: return []
  1865. # current dir relative to the current file (self.file)
  1866. current_dir = os.path.dirname(self.file)
  1867. file = os.path.join(current_dir, file)
  1868. # read and parse included config file contents
  1869. lines = self.read_config_file(file)
  1870. return ConfigLines(file=file, lines=lines).get_raw_config()
  1871. def get_raw_config(self):
  1872. "Scan buffer and extract all config as RAW (including includes)"
  1873. ret = []
  1874. self.load_lines()
  1875. first = self.first_line
  1876. for i in range(len(self.lines)):
  1877. line = self.lines[i]
  1878. Message(_("Processing line %03d: %s")%(first+i,line),2)
  1879. target, key, val = self.parse_line(line)
  1880. if not key: continue # no config on this line
  1881. if key == 'includeconf':
  1882. err = _('A file cannot include itself (loop!)')
  1883. if val == self.file:
  1884. Error("%s: %%!includeconf: %s"%(
  1885. err, self.file))
  1886. more_raw = self.include_config_file(val)
  1887. ret.extend(more_raw)
  1888. Message(_("Finished Config file inclusion: %s"
  1889. )%(val),2)
  1890. else:
  1891. ret.append([target, key, val])
  1892. Message(_("Added %s")%key,3)
  1893. return ret
  1894. def parse_line(self, line='', keyname='', target=''):
  1895. "Detects %!key:val config lines and extract data from it"
  1896. empty = ['', '', '']
  1897. if not line: return empty
  1898. no_target = ['target', 'includeconf']
  1899. re_name = keyname or '[a-z]+'
  1900. re_target = target or '[a-z]*'
  1901. cfgregex = re.compile("""
  1902. ^%%!\s* # leading id with opt spaces
  1903. (?P<name>%s)\s* # config name
  1904. (\((?P<target>%s)\))? # optional target spec inside ()
  1905. \s*:\s* # key:value delimiter with opt spaces
  1906. (?P<value>\S.+?) # config value
  1907. \s*$ # rstrip() spaces and hit EOL
  1908. """%(re_name,re_target), re.I+re.VERBOSE)
  1909. prepostregex = re.compile("""
  1910. # ---[ PATTERN ]---
  1911. ^( "([^"]*)" # "double quoted" or
  1912. | '([^']*)' # 'single quoted' or
  1913. | ([^\s]+) # single_word
  1914. )
  1915. \s+ # separated by spaces
  1916. # ---[ REPLACE ]---
  1917. ( "([^"]*)" # "double quoted" or
  1918. | '([^']*)' # 'single quoted' or
  1919. | (.*) # anything
  1920. )
  1921. \s*$
  1922. """, re.VERBOSE)
  1923. guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
  1924. match = cfgregex.match(line)
  1925. if not match: return empty
  1926. name = string.lower(match.group('name') or '')
  1927. target = string.lower(match.group('target') or 'all')
  1928. value = match.group('value')
  1929. # NO target keywords: force all targets
  1930. if name in no_target: target = 'all'
  1931. # special config for GUI colors
  1932. if name == 'guicolors':
  1933. valmatch = guicolors.search(value)
  1934. if not valmatch: return empty
  1935. value = re.split('\s+', value)
  1936. # Special config with two quoted values (%!preproc: "foo" 'bar')
  1937. if name in ['preproc','postproc']:
  1938. valmatch = prepostregex.search(value)
  1939. if not valmatch: return empty
  1940. getval = valmatch.group
  1941. patt = getval(2) or getval(3) or getval(4) or ''
  1942. repl = getval(6) or getval(7) or getval(8) or ''
  1943. value = (patt, repl)
  1944. return [target, name, value]
  1945. ##############################################################################
  1946. class MaskMaster:
  1947. "(Un)Protect important structures from escaping and formatting"
  1948. def __init__(self):
  1949. self.linkmask = 'vvvLINKvvv'
  1950. self.monomask = 'vvvMONOvvv'
  1951. self.macromask = 'vvvMACROvvv'
  1952. self.rawmask = 'vvvRAWvvv'
  1953. self.tocmask = 'vvvTOCvvv'
  1954. self.macroman = MacroMaster()
  1955. self.reset()
  1956. def reset(self):
  1957. self.linkbank = []
  1958. self.monobank = []
  1959. self.macrobank = []
  1960. self.rawbank = []
  1961. def mask(self, line=''):
  1962. global AUTOTOC
  1963. # protect raw text
  1964. while regex['raw'].search(line):
  1965. txt = regex['raw'].search(line).group(1)
  1966. txt = doEscape(TARGET,txt)
  1967. self.rawbank.append(txt)
  1968. line = regex['raw'].sub(self.rawmask,line,1)
  1969. # protect pre-formatted font text
  1970. while regex['fontMono'].search(line):
  1971. txt = regex['fontMono'].search(line).group(1)
  1972. txt = doEscape(TARGET,txt)
  1973. self.monobank.append(txt)
  1974. line = regex['fontMono'].sub(self.monomask,line,1)
  1975. # protect macros
  1976. while regex['macros'].search(line):
  1977. txt = regex['macros'].search(line).group()
  1978. self.macrobank.append(txt)
  1979. line = regex['macros'].sub(self.macromask,line,1)
  1980. # protect TOC location
  1981. while regex['toc'].search(line):
  1982. line = regex['toc'].sub(self.tocmask,line)
  1983. AUTOTOC = 0
  1984. # protect URLs and emails
  1985. while regex['linkmark'].search(line) or \
  1986. regex['link' ].search(line):
  1987. # try to match plain or named links
  1988. match_link = regex['link'].search(line)
  1989. match_named = regex['linkmark'].search(line)
  1990. # define the current match
  1991. if match_link and match_named:
  1992. # both types found, which is the first?
  1993. m = match_link
  1994. if match_named.start() < match_link.start():
  1995. m = match_named
  1996. else:
  1997. # just one type found, we're fine
  1998. m = match_link or match_named
  1999. # extract link data and apply mask
  2000. if m == match_link: # plain link
  2001. link = m.group()
  2002. label = ''
  2003. link_re = regex['link']
  2004. else: # named link
  2005. link = m.group('link')
  2006. label = string.rstrip(m.group('label'))
  2007. link_re = regex['linkmark']
  2008. line = link_re.sub(self.linkmask,line,1)
  2009. # save link data to the link bank
  2010. self.linkbank.append((label, link))
  2011. return line
  2012. def undo(self, line):
  2013. # url & email
  2014. for label,url in self.linkbank:
  2015. link = get_tagged_link(label, url)
  2016. line = string.replace(line, self.linkmask, link, 1)
  2017. # expand macros
  2018. for macro in self.macrobank:
  2019. macro = self.macroman.expand(macro)
  2020. line = string.replace(line, self.macromask, macro,1)
  2021. # expand verb
  2022. for mono in self.monobank:
  2023. open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
  2024. tagged = open+mono+close
  2025. line = string.replace(line,self.monomask,tagged,1)
  2026. # expand raw
  2027. for raw in self.rawbank:
  2028. line = string.replace(line,self.rawmask,raw,1)
  2029. return line
  2030. ##############################################################################
  2031. class TitleMaster:
  2032. "Title things"
  2033. def __init__(self):
  2034. self.count = ['',0,0,0,0,0]
  2035. self.toc = []
  2036. self.level = 0
  2037. self.kind = ''
  2038. self.txt = ''
  2039. self.label = ''
  2040. self.tag = ''
  2041. self.tag_hold = []
  2042. self.last_level = 0
  2043. self.count_id = ''
  2044. self.user_labels = {}
  2045. self.anchor_count = 0
  2046. self.anchor_prefix = 'toc'
  2047. def _open_close_blocks(self):
  2048. "Open new title blocks, closing the previous (if any)"
  2049. if not rules['titleblocks']: return
  2050. tag = ''
  2051. last = self.last_level
  2052. curr = self.level
  2053. # same level, just close the previous
  2054. if curr == last:
  2055. tag = TAGS.get('title%dClose'%last)
  2056. if tag: self.tag_hold.append(tag)
  2057. # section -> subsection, more depth
  2058. while curr > last:
  2059. last = last + 1
  2060. # open the new block of subsections
  2061. tag = TAGS.get('blockTitle%dOpen'%last)
  2062. if tag: self.tag_hold.append(tag)
  2063. # jump from title1 to title3 or more
  2064. # fill the gap with an empty section
  2065. if curr - last > 0:
  2066. tag = TAGS.get('title%dOpen'%last)
  2067. tag = regex['x'].sub('', tag) # del \a
  2068. if tag: self.tag_hold.append(tag)
  2069. # section <- subsection, less depth
  2070. while curr < last:
  2071. # close the current opened subsection
  2072. tag = TAGS.get('title%dClose'%last)
  2073. if tag: self.tag_hold.append(tag)
  2074. # close the current opened block of subsections
  2075. tag = TAGS.get('blockTitle%dClose'%last)
  2076. if tag: self.tag_hold.append(tag)
  2077. last = last - 1
  2078. # close the previous section of the same level
  2079. # the subsections were under it
  2080. if curr == last:
  2081. tag = TAGS.get('title%dClose'%last)
  2082. if tag: self.tag_hold.append(tag)
  2083. def add(self, line):
  2084. "Parses a new title line."
  2085. if not line: return
  2086. self._set_prop(line)
  2087. self._open_close_blocks()
  2088. self._set_count_id()
  2089. self._set_label()
  2090. self._save_toc_info()
  2091. def close_all(self):
  2092. "Closes all opened title blocks"
  2093. ret = []
  2094. ret.extend(self.tag_hold)
  2095. while self.level:
  2096. tag = TAGS.get('title%dClose'%self.level)
  2097. if tag: ret.append(tag)
  2098. tag = TAGS.get('blockTitle%dClose'%self.level)
  2099. if tag: ret.append(tag)
  2100. self.level = self.level - 1
  2101. return ret
  2102. def _save_toc_info(self):
  2103. "Save TOC info, used by self.dump_marked_toc()"
  2104. self.toc.append((self.level, self.count_id,
  2105. self.txt , self.label ))
  2106. def _set_prop(self, line=''):
  2107. "Extract info from original line and set data holders."
  2108. # detect title type (numbered or not)
  2109. id = string.lstrip(line)[0]
  2110. if id == '=': kind = 'title'
  2111. elif id == '+': kind = 'numtitle'
  2112. else: Error("Unknown Title ID '%s'"%id)
  2113. # extract line info
  2114. match = regex[kind].search(line)
  2115. level = len(match.group('id'))
  2116. txt = string.strip(match.group('txt'))
  2117. label = match.group('label')
  2118. # parse info & save
  2119. if CONF['enum-title']: kind = 'numtitle' # force
  2120. if rules['titleblocks']:
  2121. self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
  2122. TAGS.get('title%dOpen'%level)
  2123. else:
  2124. self.tag = TAGS.get(kind+`level`) or \
  2125. TAGS.get('title'+`level`)
  2126. self.last_level = self.level
  2127. self.kind = kind
  2128. self.level = level
  2129. self.txt = txt
  2130. self.label = label
  2131. def _set_count_id(self):
  2132. "Compose and save the title count identifier (if needed)."
  2133. count_id = ''
  2134. if self.kind == 'numtitle' and not rules['autonumbertitle']:
  2135. # manually increase title count
  2136. self.count[self.level] = self.count[self.level] +1
  2137. # reset sublevels count (if any)
  2138. max_levels = len(self.count)
  2139. if self.level < max_levels-1:
  2140. for i in range(self.level+1, max_levels):
  2141. self.count[i] = 0
  2142. # compose count id from hierarchy
  2143. for i in range(self.level):
  2144. count_id= "%s%d."%(count_id, self.count[i+1])
  2145. self.count_id = count_id
  2146. def _set_label(self):
  2147. "Compose and save title label, used by anchors."
  2148. # remove invalid chars from label set by user
  2149. self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
  2150. # generate name as 15 first :alnum: chars
  2151. #TODO how to translate safely accented chars to plain?
  2152. #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
  2153. # 'tocN' label - sequential count, ignoring 'toc-level'
  2154. #self.label = self.anchor_prefix + str(len(self.toc)+1)
  2155. def _get_tagged_anchor(self):
  2156. "Return anchor if user defined a label, or TOC is on."
  2157. ret = ''
  2158. label = self.label
  2159. if CONF['toc'] and self.level <= CONF['toc-level']:
  2160. # this count is needed bcos self.toc stores all
  2161. # titles, regardless of the 'toc-level' setting,
  2162. # so we can't use self.toc lenght to number anchors
  2163. self.anchor_count = self.anchor_count + 1
  2164. # autonumber label (if needed)
  2165. label = label or '%s%s'%(
  2166. self.anchor_prefix, self.anchor_count)
  2167. if label and TAGS['anchor']:
  2168. ret = regex['x'].sub(label,TAGS['anchor'])
  2169. return ret
  2170. def _get_full_title_text(self):
  2171. "Returns the full title contents, already escaped."
  2172. ret = self.txt
  2173. # insert count_id (if any) before text
  2174. if self.count_id:
  2175. ret = '%s %s'%(self.count_id, ret)
  2176. # escape specials
  2177. ret = doEscape(TARGET, ret)
  2178. # same targets needs final escapes on title lines
  2179. # it's here because there is a 'continue' after title
  2180. if rules['finalescapetitle']:
  2181. ret = doFinalEscape(TARGET, ret)
  2182. return ret
  2183. def get(self):
  2184. "Returns the tagged title as a list."
  2185. ret = []
  2186. # maybe some anchoring before?
  2187. anchor = self._get_tagged_anchor()
  2188. self.tag = regex['_anchor'].sub(anchor, self.tag)
  2189. ### compose & escape title text (TOC uses unescaped)
  2190. full_title = self._get_full_title_text()
  2191. # close previous section area
  2192. ret.extend(self.tag_hold)
  2193. self.tag_hold = []
  2194. # finish title, adding "underline" on TXT target
  2195. tagged = regex['x'].sub(full_title, self.tag)
  2196. if TARGET == 'txt':
  2197. ret.append('') # blank line before
  2198. ret.append(tagged)
  2199. ret.append(regex['x'].sub('='*len(full_title),self.tag))
  2200. ret.append('') # blank line after
  2201. else:
  2202. ret.append(tagged)
  2203. return ret
  2204. def dump_marked_toc(self, max_level=99):
  2205. "Dumps all toc itens as a valid t2t markup list"
  2206. #TODO maybe use quote+linebreaks instead lists
  2207. ret = []
  2208. toc_count = 1
  2209. for level, count_id, txt, label in self.toc:
  2210. if level > max_level: continue # ignore
  2211. indent = ' '*level
  2212. id_txt = string.lstrip('%s %s'%(count_id, txt))
  2213. label = label or self.anchor_prefix+`toc_count`
  2214. toc_count = toc_count + 1
  2215. # TOC will have links
  2216. if TAGS['anchor']:
  2217. # TOC is more readable with master topics
  2218. # not linked at number. This is a stoled
  2219. # idea from Windows .CHM help files
  2220. if CONF['enum-title'] and level == 1:
  2221. tocitem = '%s+ [""%s"" #%s]'%(
  2222. indent, txt, label)
  2223. else:
  2224. tocitem = '%s- [""%s"" #%s]'%(
  2225. indent, id_txt, label)
  2226. # no links on TOC, just text
  2227. else:
  2228. # man don't reformat TOC lines, cool!
  2229. if TARGET in ['txt', 'man']:
  2230. tocitem = '%s""%s""' %(
  2231. indent, id_txt)
  2232. else:
  2233. tocitem = '%s- ""%s""'%(
  2234. indent, id_txt)
  2235. ret.append(tocitem)
  2236. return ret
  2237. ##############################################################################
  2238. #TODO check all this table mess
  2239. # trata linhas TABLE, com as prop do parse_row
  2240. # o metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
  2241. class TableMaster:
  2242. def __init__(self, line=''):
  2243. self.rows = []
  2244. self.border = 0
  2245. self.align = 'Left'
  2246. self.cellalign = []
  2247. self.cellspan = []
  2248. if line:
  2249. prop = self.parse_row(line)
  2250. self.border = prop['border']
  2251. self.align = prop['align']
  2252. self.cellalign = prop['cellalign']
  2253. self.cellspan = prop['cellspan']
  2254. def _get_open_tag(self):
  2255. topen = TAGS['tableOpen']
  2256. tborder = TAGS['tableBorder']
  2257. talign = TAGS['tableAlign'+self.align]
  2258. calignsep = TAGS['tableColAlignSep']
  2259. calign = ''
  2260. # the first line defines if table has border or not
  2261. if not self.border: tborder = ''
  2262. # set the columns alignment
  2263. if rules['tablecellaligntype'] == 'column':
  2264. calign = map(lambda x: TAGS['tableColAlign%s'%x],
  2265. self.cellalign)
  2266. calign = string.join(calign, calignsep)
  2267. # align full table, set border and Column align (if any)
  2268. topen = regex['_tableAlign' ].sub(talign , topen)
  2269. topen = regex['_tableBorder' ].sub(tborder, topen)
  2270. topen = regex['_tableColAlign'].sub(calign , topen)
  2271. # tex table spec, border or not: {|l|c|r|} , {lcr}
  2272. if calignsep and not self.border:
  2273. # remove cell align separator
  2274. topen = string.replace(topen, calignsep, '')
  2275. return topen
  2276. def _get_cell_align(self, cells):
  2277. ret = []
  2278. for cell in cells:
  2279. align = 'Left'
  2280. if string.strip(cell):
  2281. if cell[0] == ' ' and cell[-1] == ' ':
  2282. align = 'Center'
  2283. elif cell[0] == ' ':
  2284. align = 'Right'
  2285. ret.append(align)
  2286. return ret
  2287. def _get_cell_span(self, cells):
  2288. ret = []
  2289. for cell in cells:
  2290. span = 0
  2291. m = re.search('\a(\|+)$', cell)
  2292. if m: span = len(m.group(1))+1
  2293. ret.append(span)
  2294. return ret
  2295. def _tag_cells(self, rowdata):
  2296. row = []
  2297. cells = rowdata['cells']
  2298. open = TAGS['tableCellOpen']
  2299. close = TAGS['tableCellClose']
  2300. sep = TAGS['tableCellSep']
  2301. calign = map(lambda x: TAGS['tableCellAlign'+x],
  2302. rowdata['cellalign'])
  2303. # populate the span tag
  2304. cspan = []
  2305. for i in rowdata['cellspan']:
  2306. if i > 0:
  2307. cspan.append(regex['x'].sub(
  2308. str(i), TAGS['tableCellColSpan']))
  2309. else:
  2310. cspan.append('')
  2311. # maybe is it a title row?
  2312. if rowdata['title']:
  2313. open = TAGS['tableTitleCellOpen'] or open
  2314. close = TAGS['tableTitleCellClose'] or close
  2315. sep = TAGS['tableTitleCellSep'] or sep
  2316. # should we break the line on *each* table cell?
  2317. if rules['breaktablecell']: close = close+'\n'
  2318. # cells pre processing
  2319. if rules['tablecellstrip']:
  2320. cells = map(lambda x: string.strip(x), cells)
  2321. if rowdata['title'] and rules['tabletitlerowinbold']:
  2322. cells = map(lambda x: enclose_me('fontBold',x), cells)
  2323. # add cell BEGIN/END tags
  2324. for cell in cells:
  2325. copen = open
  2326. # insert cell align into open tag (if cell is alignable)
  2327. if rules['tablecellaligntype'] == 'cell':
  2328. copen = regex['_tableCellAlign'].sub(
  2329. calign.pop(0), copen)
  2330. if rules['tablecellspannable']:
  2331. copen = regex['_tableCellColSpan'].sub(
  2332. cspan.pop(0), copen)
  2333. row.append(copen + cell + close)
  2334. # maybe there are cell separators?
  2335. return string.join(row, sep)
  2336. def add_row(self, cells):
  2337. self.rows.append(cells)
  2338. def parse_row(self, line):
  2339. # default table proprierties
  2340. ret = {'border':0,'title':0,'align':'Left',
  2341. 'cells':[],'cellalign':[], 'cellspan':[]}
  2342. # detect table align (and remove spaces mark)
  2343. if line[0] == ' ': ret['align'] = 'Center'
  2344. line = string.lstrip(line)
  2345. # detect title mark
  2346. if line[1] == '|': ret['title'] = 1
  2347. # detect border mark and normalize the EOL
  2348. m = re.search(' (\|+) *$', line)
  2349. if m: line = line+' ' ; ret['border'] = 1
  2350. else: line = line+' | '
  2351. # delete table mark
  2352. line = regex['table'].sub('', line)
  2353. # detect colspan | foo | bar baz |||
  2354. line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
  2355. # split cells (the last is fake)
  2356. ret['cells'] = string.split(line, ' | ')[:-1]
  2357. # find cells span
  2358. ret['cellspan'] = self._get_cell_span(ret['cells'])
  2359. # remove span ID
  2360. ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
  2361. # find cells align
  2362. ret['cellalign'] = self._get_cell_align(ret['cells'])
  2363. # hooray!
  2364. Debug('Table Prop: %s' % ret, 7)
  2365. return ret
  2366. def dump(self):
  2367. open = self._get_open_tag()
  2368. rows = self.rows
  2369. close = TAGS['tableClose']
  2370. rowopen = TAGS['tableRowOpen']
  2371. rowclose = TAGS['tableRowClose']
  2372. rowsep = TAGS['tableRowSep']
  2373. titrowopen = TAGS['tableTitleRowOpen'] or rowopen
  2374. titrowclose = TAGS['tableTitleRowClose'] or rowclose
  2375. if rules['breaktablelineopen']:
  2376. rowopen = rowopen + '\n'
  2377. titrowopen = titrowopen + '\n'
  2378. # tex gotchas
  2379. if TARGET == 'tex':
  2380. if not self.border:
  2381. rowopen = titrowopen = ''
  2382. else:
  2383. close = rowopen + close
  2384. # now we tag all the table cells on each row
  2385. #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
  2386. tagged_cells = []
  2387. for cell in rows: tagged_cells.append(self._tag_cells(cell))
  2388. # add row separator tags between lines
  2389. tagged_rows = []
  2390. if rowsep:
  2391. #!py15
  2392. #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
  2393. for cell in tagged_cells:
  2394. tagged_rows.append(cell+rowsep)
  2395. # remove last rowsep, because the table is over
  2396. tagged_rows[-1] = string.replace(
  2397. tagged_rows[-1], rowsep, '')
  2398. # add row BEGIN/END tags for each line
  2399. else:
  2400. for rowdata in rows:
  2401. if rowdata['title']:
  2402. o,c = titrowopen, titrowclose
  2403. else:
  2404. o,c = rowopen, rowclose
  2405. row = tagged_cells.pop(0)
  2406. tagged_rows.append(o + row + c)
  2407. fulltable = [open] + tagged_rows + [close]
  2408. if rules['blankendtable']: fulltable.append('')
  2409. return fulltable
  2410. ##############################################################################
  2411. class BlockMaster:
  2412. "TIP: use blockin/out to add/del holders"
  2413. def __init__(self):
  2414. self.BLK = []
  2415. self.HLD = []
  2416. self.PRP = []
  2417. self.depth = 0
  2418. self.last = ''
  2419. self.tableparser = None
  2420. self.contains = {
  2421. 'para' :['passthru','raw'],
  2422. 'verb' :[],
  2423. 'table' :[],
  2424. 'raw' :[],
  2425. 'passthru':[],
  2426. 'quote' :['quote','passthru','raw'],
  2427. 'list' :['list' ,'numlist' ,'deflist','para','verb',
  2428. 'raw' ,'passthru'],
  2429. 'numlist' :['list' ,'numlist' ,'deflist','para','verb',
  2430. 'raw' ,'passthru'],
  2431. 'deflist' :['list' ,'numlist' ,'deflist','para','verb',
  2432. 'raw' ,'passthru']
  2433. }
  2434. self.allblocks = self.contains.keys()
  2435. def block(self):
  2436. if not self.BLK: return ''
  2437. return self.BLK[-1]
  2438. def isblock(self, name=''):
  2439. return self.block() == name
  2440. def prop(self, key):
  2441. if not self.PRP: return ''
  2442. return self.PRP[-1].get(key) or ''
  2443. def propset(self, key, val):
  2444. self.PRP[-1][key] = val
  2445. #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
  2446. #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
  2447. def hold(self):
  2448. if not self.HLD: return []
  2449. return self.HLD[-1]
  2450. def holdadd(self, line):
  2451. if self.block()[-4:] == 'list': line = [line]
  2452. self.HLD[-1].append(line)
  2453. Debug('HOLD add: %s'%repr(line), 4)
  2454. Debug('FULL HOLD: %s'%self.HLD, 4)
  2455. def holdaddsub(self, line):
  2456. self.HLD[-1][-1].append(line)
  2457. Debug('HOLD addsub: %s'%repr(line), 4)
  2458. Debug('FULL HOLD: %s'%self.HLD, 4)
  2459. def holdextend(self, lines):
  2460. if self.block()[-4:] == 'list': lines = [lines]
  2461. self.HLD[-1].extend(lines)
  2462. Debug('HOLD extend: %s'%repr(lines), 4)
  2463. Debug('FULL HOLD: %s'%self.HLD, 4)
  2464. def blockin(self, block):
  2465. ret = []
  2466. if block not in self.allblocks:
  2467. Error("Invalid block '%s'"%block)
  2468. # first, let's close other possible open blocks
  2469. while self.block() and block not in self.contains[self.block()]:
  2470. ret.extend(self.blockout())
  2471. # now we can gladly add this new one
  2472. self.BLK.append(block)
  2473. self.HLD.append([])
  2474. self.PRP.append({})
  2475. if block == 'table': self.tableparser = TableMaster()
  2476. # deeper and deeper
  2477. self.depth = len(self.BLK)
  2478. Debug('block ++ (%s): %s' % (block,self.BLK), 3)
  2479. return ret
  2480. def blockout(self):
  2481. if not self.BLK: Error('No block to pop')
  2482. self.last = self.BLK.pop()
  2483. tagged = getattr(self, self.last)()
  2484. parsed = self.HLD.pop()
  2485. self.PRP.pop()
  2486. self.depth = len(self.BLK)
  2487. if self.last == 'table': del self.tableparser
  2488. # inserting a nested block into mother
  2489. if self.block():
  2490. if self.block()[-4:] == 'list':
  2491. self.HLD[-1][-1].append(tagged)
  2492. else:
  2493. self.HLD[-1].append(tagged)
  2494. tagged = [] # reset. mother will have it all
  2495. Debug('block -- (%s): %s' % (self.last,self.BLK), 3)
  2496. Debug('RELEASED (%s): %s' % (self.last,parsed), 3)
  2497. if tagged: Debug('BLOCK: %s'%tagged, 6)
  2498. return tagged
  2499. def _last_escapes(self, line):
  2500. return doFinalEscape(TARGET, line)
  2501. def _get_escaped_hold(self):
  2502. ret = []
  2503. for line in self.hold():
  2504. linetype = type(line)
  2505. if linetype == type(''):
  2506. ret.append(self._last_escapes(line))
  2507. elif linetype == type([]):
  2508. ret.extend(line)
  2509. else:
  2510. Error("BlockMaster: Unknown HOLD item type:"
  2511. " %s"%linetype)
  2512. return ret
  2513. def _remove_twoblanks(self, lastitem):
  2514. if len(lastitem) > 1 and lastitem[-2:] == ['','']:
  2515. return lastitem[:-2]
  2516. return lastitem
  2517. def passthru(self):
  2518. return self.hold()
  2519. def raw(self):
  2520. lines = self.hold()
  2521. return map(lambda x: doEscape(TARGET, x), lines)
  2522. def para(self):
  2523. tagged = []
  2524. open = TAGS['paragraphOpen']
  2525. close = TAGS['paragraphClose']
  2526. lines = self._get_escaped_hold()
  2527. # open (or not) paragraph
  2528. if not open+close and self.last == 'para':
  2529. pass # avoids multiple blank lines
  2530. else:
  2531. tagged.append(open)
  2532. # pagemaker likes a paragraph as a single long line
  2533. if rules['onelinepara']:
  2534. tagged.append(string.join(lines,' '))
  2535. # others are normal :)
  2536. else:
  2537. tagged.extend(lines)
  2538. tagged.append(close)
  2539. # very very very very very very very very very UGLY fix
  2540. # needed because <center> can't appear inside <p>
  2541. try:
  2542. if len(lines) == 1 and \
  2543. TARGET in ('html', 'xhtml') and \
  2544. re.match('^\s*<center>.*</center>\s*$', lines[0]):
  2545. tagged = [lines[0]]
  2546. except: pass
  2547. return tagged
  2548. def verb(self):
  2549. "Verbatim lines are not masked, so there's no need to unmask"
  2550. tagged = []
  2551. tagged.append(TAGS['blockVerbOpen'])
  2552. for line in self.hold():
  2553. if self.prop('mapped') == 'table':
  2554. line = MacroMaster().expand(line)
  2555. if not rules['verbblocknotescaped']:
  2556. line = doEscape(TARGET,line)
  2557. if rules['indentverbblock']:
  2558. line = ' '+line
  2559. if rules['verbblockfinalescape']:
  2560. line = doFinalEscape(TARGET, line)
  2561. tagged.append(line)
  2562. #TODO maybe use if not TAGS['blockVerbClose']
  2563. if TARGET != 'pm6':
  2564. tagged.append(TAGS['blockVerbClose'])
  2565. return tagged
  2566. def table(self):
  2567. # rewrite all table cells by the unmasked and escaped data
  2568. lines = self._get_escaped_hold()
  2569. for i in range(len(lines)):
  2570. cells = string.split(lines[i], SEPARATOR)
  2571. self.tableparser.rows[i]['cells'] = cells
  2572. return self.tableparser.dump()
  2573. def quote(self):
  2574. tagged = []
  2575. myre = regex['quote']
  2576. open = TAGS['blockQuoteOpen'] # block based
  2577. close = TAGS['blockQuoteClose']
  2578. qline = TAGS['blockQuoteLine'] # line based
  2579. indent = tagindent = '\t'*self.depth
  2580. if rules['tagnotindentable']: tagindent = ''
  2581. if not rules['keepquoteindent']: indent = ''
  2582. if open: tagged.append(tagindent+open) # open block
  2583. for item in self.hold():
  2584. if type(item) == type([]):
  2585. tagged.extend(item) # subquotes
  2586. else:
  2587. item = myre.sub('', item) # del TABs
  2588. if rules['barinsidequote']:
  2589. item = get_tagged_bar(item)
  2590. item = self._last_escapes(item)
  2591. item = qline*self.depth + item
  2592. tagged.append(indent+item) # quote line
  2593. if close: tagged.append(tagindent+close) # close block
  2594. return tagged
  2595. def deflist(self): return self.list('deflist')
  2596. def numlist(self): return self.list('numlist')
  2597. def list(self, name='list'):
  2598. tagged = []
  2599. items = self.hold()
  2600. indent = self.prop('indent')
  2601. tagindent = indent
  2602. listopen = TAGS.get(name+'Open')
  2603. listclose = TAGS.get(name+'Close')
  2604. listline = TAGS.get(name+'ItemLine')
  2605. itemcount = 0
  2606. if rules['tagnotindentable']: tagindent = ''
  2607. if not rules['keeplistindent']: indent = ''
  2608. if name == 'deflist':
  2609. itemopen = TAGS[name+'Item1Open']
  2610. itemclose = TAGS[name+'Item2Close']
  2611. itemsep = TAGS[name+'Item1Close']+\
  2612. TAGS[name+'Item2Open']
  2613. else:
  2614. itemopen = TAGS[name+'ItemOpen']
  2615. itemclose = TAGS[name+'ItemClose']
  2616. itemsep = ''
  2617. # ItemLine: number of leading chars identifies list depth
  2618. if listline:
  2619. itemopen = listline*self.depth
  2620. # dirty fix for mgp
  2621. if name == 'numlist': itemopen = itemopen + '\a. '
  2622. # remove two-blanks from list ending mark, to avoid <p>
  2623. items[-1] = self._remove_twoblanks(items[-1])
  2624. # open list (not nestable lists are only opened at mother)
  2625. if listopen and not \
  2626. (rules['listnotnested'] and BLOCK.depth != 1):
  2627. tagged.append(tagindent+listopen)
  2628. # tag each list item (multine items)
  2629. itemopenorig = itemopen
  2630. for item in items:
  2631. # add "manual" item count for noautonum targets
  2632. itemcount = itemcount + 1
  2633. if name == 'numlist' and not rules['autonumberlist']:
  2634. n = str(itemcount)
  2635. itemopen = regex['x'].sub(n, itemopenorig)
  2636. del n
  2637. item[0] = self._last_escapes(item[0])
  2638. if name == 'deflist':
  2639. term, rest = string.split(item[0],SEPARATOR,1)
  2640. item[0] = rest
  2641. if not item[0]: del item[0] # to avoid <p>
  2642. tagged.append(tagindent+itemopen+term+itemsep)
  2643. else:
  2644. fullitem = tagindent+itemopen
  2645. tagged.append(string.replace(
  2646. item[0], SEPARATOR, fullitem))
  2647. del item[0]
  2648. # process next lines for this item (if any)
  2649. for line in item:
  2650. if type(line) == type([]): # sublist inside
  2651. tagged.extend(line)
  2652. else:
  2653. line = self._last_escapes(line)
  2654. # blank lines turns to <p>
  2655. if not line and rules['parainsidelist']:
  2656. line = string.rstrip(indent +\
  2657. TAGS['paragraphOpen']+\
  2658. TAGS['paragraphClose'])
  2659. if not rules['keeplistindent']:
  2660. line = string.lstrip(line)
  2661. tagged.append(line)
  2662. # close item (if needed)
  2663. if itemclose: tagged.append(tagindent+itemclose)
  2664. # close list (not nestable lists are only closed at mother)
  2665. if listclose and not \
  2666. (rules['listnotnested'] and BLOCK.depth != 1):
  2667. tagged.append(tagindent+listclose)
  2668. if rules['blankendmotherlist'] and BLOCK.depth == 1:
  2669. tagged.append('')
  2670. return tagged
  2671. ##############################################################################
  2672. class MacroMaster:
  2673. def __init__(self, config={}):
  2674. self.name = ''
  2675. self.config = config or CONF
  2676. self.infile = self.config['sourcefile']
  2677. self.outfile = self.config['outfile']
  2678. self.currdate = time.localtime(time.time())
  2679. self.rgx = regex.get('macros') or getRegexes()['macros']
  2680. self.fileinfo = { 'infile': None, 'outfile': None }
  2681. self.dft_fmt = MACROS
  2682. def walk_file_format(self, fmt):
  2683. "Walks the %%{in/out}file format string, expanding the % flags"
  2684. i = 0; ret = '' # counter/hold
  2685. while i < len(fmt): # char by char
  2686. c = fmt[i]; i = i + 1
  2687. if c == '%': # hot char!
  2688. if i == len(fmt): # % at the end
  2689. ret = ret + c
  2690. break
  2691. c = fmt[i]; i = i + 1 # read next
  2692. ret = ret + self.expand_file_flag(c)
  2693. else:
  2694. ret = ret +c # common char
  2695. return ret
  2696. def expand_file_flag(self, flag):
  2697. "%f: filename %F: filename (w/o extension)"
  2698. "%d: dirname %D: dirname (only parent dir)"
  2699. "%p: file path %e: extension"
  2700. info = self.fileinfo[self.name] # get dict
  2701. if flag == '%': x = '%' # %% -> %
  2702. elif flag == 'f': x = info['name']
  2703. elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
  2704. elif flag == 'd': x = info['dir']
  2705. elif flag == 'D': x = os.path.split(info['dir'])[-1]
  2706. elif flag == 'p': x = info['path']
  2707. elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
  2708. ).group(2) or ''
  2709. #TODO simplier way for %e ?
  2710. else : x = '%'+flag # false alarm
  2711. return x
  2712. def set_file_info(self, macroname):
  2713. if self.fileinfo.get(macroname): return # already done
  2714. file = getattr(self, self.name) # self.infile
  2715. if file in [STDOUT, MODULEOUT]:
  2716. dir = ''; path = name = file
  2717. else:
  2718. path = os.path.abspath(file)
  2719. dir = os.path.dirname(path)
  2720. name = os.path.basename(path)
  2721. self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
  2722. def expand(self, line=''):
  2723. "Expand all macros found on the line"
  2724. while self.rgx.search(line):
  2725. m = self.rgx.search(line)
  2726. name = self.name = string.lower(m.group('name'))
  2727. fmt = m.group('fmt') or self.dft_fmt.get(name)
  2728. if name == 'date':
  2729. txt = time.strftime(fmt,self.currdate)
  2730. elif name == 'mtime':
  2731. if self.infile in [STDIN, MODULEIN]:
  2732. fdate = self.currdate
  2733. else:
  2734. mtime = os.path.getmtime(self.infile)
  2735. fdate = time.localtime(mtime)
  2736. txt = time.strftime(fmt,fdate)
  2737. elif name in ['infile','outfile']:
  2738. self.set_file_info(name)
  2739. txt = self.walk_file_format(fmt)
  2740. else:
  2741. Error("Unknown macro name '%s'"%name)
  2742. line = self.rgx.sub(txt,line,1)
  2743. return line
  2744. ##############################################################################
  2745. def dumpConfig(source_raw, parsed_config):
  2746. onoff = {1:_('ON'), 0:_('OFF')}
  2747. data = [
  2748. (_('RC file') , RC_RAW ),
  2749. (_('source document'), source_raw ),
  2750. (_('command line') , CMDLINE_RAW)
  2751. ]
  2752. # first show all RAW data found
  2753. for label, cfg in data:
  2754. print _('RAW config for %s')%label
  2755. for target,key,val in cfg:
  2756. target = '(%s)'%target
  2757. key = dotted_spaces("%-14s"%key)
  2758. val = val or _('ON')
  2759. print ' %-8s %s: %s'%(target,key,val)
  2760. print
  2761. # then the parsed results of all of them
  2762. print _('Full PARSED config')
  2763. keys = parsed_config.keys() ; keys.sort() # sorted
  2764. for key in keys:
  2765. val = parsed_config[key]
  2766. # filters are the last
  2767. if key in ['preproc', 'postproc']:
  2768. continue
  2769. # flag beautifier
  2770. if key in FLAGS.keys()+ACTIONS.keys():
  2771. val = onoff.get(val) or val
  2772. # list beautifier
  2773. if type(val) == type([]):
  2774. if key == 'options': sep = ' '
  2775. else : sep = ', '
  2776. val = string.join(val, sep)
  2777. print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
  2778. print
  2779. print _('Active filters')
  2780. for filter in ['preproc','postproc']:
  2781. for rule in parsed_config.get(filter) or []:
  2782. print "%25s: %s -> %s"%(
  2783. dotted_spaces("%-14s"%filter),rule[0],rule[1])
  2784. def get_file_body(file):
  2785. "Returns all the document BODY lines"
  2786. return process_source_file(file, noconf=1)[1][2]
  2787. def finish_him(outlist, config):
  2788. "Writing output to screen or file"
  2789. outfile = config['outfile']
  2790. outlist = unmaskEscapeChar(outlist)
  2791. outlist = expandLineBreaks(outlist)
  2792. # apply PostProc filters
  2793. if config['postproc']:
  2794. filters = compile_filters(config['postproc'],
  2795. _('Invalid PostProc filter regex'))
  2796. postoutlist = []
  2797. errmsg = _('Invalid PostProc filter replacement')
  2798. for line in outlist:
  2799. for rgx,repl in filters:
  2800. try: line = rgx.sub(repl, line)
  2801. except: Error("%s: '%s'"%(errmsg, repl))
  2802. postoutlist.append(line)
  2803. outlist = postoutlist[:]
  2804. if outfile == MODULEOUT:
  2805. return outlist
  2806. elif outfile == STDOUT:
  2807. if GUI:
  2808. return outlist, config
  2809. else:
  2810. for line in outlist: print line
  2811. else:
  2812. Savefile(outfile, addLineBreaks(outlist))
  2813. if not GUI and not QUIET:
  2814. print _('%s wrote %s')%(my_name,outfile)
  2815. if config['split']:
  2816. if not QUIET: print "--- html..."
  2817. sgml2html = 'sgml2html -s %s -l %s %s'%(
  2818. config['split'],config['lang'] or lang,outfile)
  2819. if not QUIET: print "Running system command:", sgml2html
  2820. os.system(sgml2html)
  2821. def toc_inside_body(body, toc, config):
  2822. ret = []
  2823. if AUTOTOC: return body # nothing to expand
  2824. toc_mark = MaskMaster().tocmask
  2825. # expand toc mark with TOC contents
  2826. for line in body:
  2827. if string.count(line, toc_mark): # toc mark found
  2828. if config['toc']:
  2829. ret.extend(toc) # include if --toc
  2830. else:
  2831. pass # or remove %%toc line
  2832. else:
  2833. ret.append(line) # common line
  2834. return ret
  2835. def toc_tagger(toc, config):
  2836. "Convert t2t-marked TOC (it is a list) to target-tagged TOC"
  2837. ret = []
  2838. # tag if TOC-only TOC "by hand" (target don't have a TOC tag)
  2839. if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
  2840. fakeconf = config.copy()
  2841. fakeconf['headers'] = 0
  2842. fakeconf['toc-only'] = 0
  2843. fakeconf['mask-email'] = 0
  2844. fakeconf['preproc'] = []
  2845. fakeconf['postproc'] = []
  2846. fakeconf['css-sugar'] = 0
  2847. ret,foo = convert(toc, fakeconf)
  2848. set_global_config(config) # restore config
  2849. # target TOC is a tag
  2850. elif config['toc'] and TAGS['TOC']:
  2851. ret = [TAGS['TOC']]
  2852. return ret
  2853. def toc_formatter(toc, config):
  2854. "Formats TOC for automatic placement between headers and body"
  2855. if config['toc-only']: return toc # no formatting needed
  2856. if not config['toc'] : return [] # TOC disabled
  2857. ret = toc
  2858. # TOC open/close tags (if any)
  2859. if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
  2860. if TAGS['tocClose']: ret.append(TAGS['tocClose'])
  2861. # autotoc specific formatting
  2862. if AUTOTOC:
  2863. if rules['autotocwithbars']: # TOC between bars
  2864. para = TAGS['paragraphOpen']+TAGS['paragraphClose']
  2865. bar = regex['x'].sub('-'*72,TAGS['bar1'])
  2866. tocbar = [para, bar, para]
  2867. ret = tocbar + ret + tocbar
  2868. if rules['blankendautotoc']: # blank line after TOC
  2869. ret.append('')
  2870. if rules['autotocnewpagebefore']: # page break before TOC
  2871. ret.insert(0,TAGS['pageBreak'])
  2872. if rules['autotocnewpageafter']: # page break after TOC
  2873. ret.append(TAGS['pageBreak'])
  2874. return ret
  2875. def doHeader(headers, config):
  2876. if not config['headers']: return []
  2877. if not headers: headers = ['','','']
  2878. target = config['target']
  2879. if not HEADER_TEMPLATE.has_key(target):
  2880. Error("doheader: Unknow target '%s'"%target)
  2881. if target in ['html','xhtml'] and config.get('css-sugar'):
  2882. template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
  2883. else:
  2884. template = string.split(HEADER_TEMPLATE[target], '\n')
  2885. head_data = {'STYLE':'', 'ENCODING':''}
  2886. for key in head_data.keys():
  2887. val = config.get(string.lower(key))
  2888. if key == 'ENCODING': val = get_encoding_string(val, target)
  2889. head_data[key] = val
  2890. # parse header contents
  2891. for i in 0,1,2:
  2892. # expand macros
  2893. contents = MacroMaster(config=config).expand(headers[i])
  2894. # Escapes - on tex, just do it if any \tag{} present
  2895. if target != 'tex' or \
  2896. (target == 'tex' and re.search(r'\\\w+{', contents)):
  2897. contents = doEscape(target, contents)
  2898. if target == 'lout':
  2899. contents = doFinalEscape(target, contents)
  2900. head_data['HEADER%d'%(i+1)] = contents
  2901. # css-inside removes STYLE line
  2902. if target in ['html','xhtml'] and config.get('css-inside') and \
  2903. config.get('style'):
  2904. head_data['STYLE'] = ''
  2905. Debug("Header Data: %s"%head_data, 1)
  2906. # scan for empty dictionary keys
  2907. # if found, scan template lines for that key reference
  2908. # if found, remove the reference
  2909. # if there isn't any other key reference on the same line, remove it
  2910. for key in head_data.keys():
  2911. if head_data.get(key): continue
  2912. for line in template:
  2913. if string.count(line, '%%(%s)s'%key):
  2914. sline = string.replace(line, '%%(%s)s'%key, '')
  2915. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  2916. template.remove(line)
  2917. # populate template with data
  2918. template = string.join(template, '\n') % head_data
  2919. # adding CSS contents into template (for --css-inside)
  2920. if target in ['html','xhtml'] and config.get('css-inside') and \
  2921. config.get('style'):
  2922. TAGS = getTags(config)
  2923. cssfile = config['style']
  2924. if not os.path.isabs(cssfile):
  2925. infile = config.get('sourcefile')
  2926. cssfile = os.path.join(os.path.dirname(infile), cssfile)
  2927. css = string.join(Readfile(cssfile, 1, 1), '\n')
  2928. css = "%s\n%s\n%s\n" % (TAGS['cssOpen'], css, TAGS['cssClose'])
  2929. template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
  2930. return string.split(template, '\n')
  2931. def doCommentLine(txt):
  2932. # the -- string ends a (h|sg|xht)ml comment :(
  2933. txt = maskEscapeChar(txt)
  2934. if string.count(TAGS['comment'], '--') and \
  2935. string.count(txt, '--'):
  2936. txt = re.sub('-(?=-)', r'-\\', txt)
  2937. if TAGS['comment']:
  2938. return regex['x'].sub(txt, TAGS['comment'])
  2939. return ''
  2940. def doFooter(config):
  2941. if not config['headers']: return []
  2942. ret = []
  2943. target = config['target']
  2944. cmdline = config['realcmdline']
  2945. typename = target
  2946. if target == 'tex': typename = 'LaTeX2e'
  2947. ppgd = '%s code generated by %s %s (%s)'%(
  2948. typename,my_name,my_version,my_url)
  2949. cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
  2950. ret.append('')
  2951. ret.append(doCommentLine(ppgd))
  2952. ret.append(doCommentLine(cmdline))
  2953. ret.append(TAGS['EOD'])
  2954. return ret
  2955. def doEscape(target,txt):
  2956. "Target-specific special escapes. Apply *before* insert any tag."
  2957. tmpmask = 'vvvvThisEscapingSuxvvvv'
  2958. if target in ['html','sgml','xhtml']:
  2959. txt = re.sub('&','&amp;',txt)
  2960. txt = re.sub('<','&lt;',txt)
  2961. txt = re.sub('>','&gt;',txt)
  2962. if target == 'sgml':
  2963. txt = re.sub('\xff','&yuml;',txt) # "+y
  2964. elif target == 'pm6':
  2965. txt = re.sub('<','<\#60>',txt)
  2966. elif target == 'mgp':
  2967. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  2968. elif target == 'man':
  2969. txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
  2970. txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
  2971. elif target == 'lout':
  2972. # TIP: / moved to FinalEscape to avoid //italic//
  2973. # TIP: these are also converted by lout: ... --- --
  2974. txt = string.replace(txt, ESCCHAR, tmpmask) # \
  2975. txt = string.replace(txt, '"', '"%s""'%ESCCHAR) # "\""
  2976. txt = re.sub('([|&{}@#^~])', '"\\1"',txt) # "@"
  2977. txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2)) # "\\"
  2978. elif target == 'tex':
  2979. # mark literal \ to be changed to $\backslash$ later
  2980. txt = string.replace( txt, ESCCHAR, tmpmask)
  2981. txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
  2982. txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
  2983. txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
  2984. txt = string.replace(txt, tmpmask,
  2985. maskEscapeChar(r'$\backslash$'))
  2986. # TIP the _ is escaped at the end
  2987. return txt
  2988. # TODO man: where - really needs to be escaped?
  2989. def doFinalEscape(target, txt):
  2990. "Last escapes of each line"
  2991. if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
  2992. elif target == 'man' : txt = string.replace(txt, '-', r'\-')
  2993. elif target == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  2994. elif target == 'lout': txt = string.replace(txt, '/', '"/"')
  2995. elif target == 'tex' :
  2996. txt = string.replace(txt, '_', r'\_')
  2997. txt = string.replace(txt, 'vvvvTexUndervvvv', '_') # shame!
  2998. return txt
  2999. def EscapeCharHandler(action, data):
  3000. "Mask/Unmask the Escape Char on the given string"
  3001. if not string.strip(data): return data
  3002. if action not in ['mask','unmask']:
  3003. Error("EscapeCharHandler: Invalid action '%s'"%action)
  3004. if action == 'mask': return string.replace(data,'\\',ESCCHAR)
  3005. else: return string.replace(data,ESCCHAR,'\\')
  3006. def maskEscapeChar(data):
  3007. "Replace any Escape Char \ with a text mask (Input: str or list)"
  3008. if type(data) == type([]):
  3009. return map(lambda x: EscapeCharHandler('mask', x), data)
  3010. return EscapeCharHandler('mask',data)
  3011. def unmaskEscapeChar(data):
  3012. "Undo the Escape char \ masking (Input: str or list)"
  3013. if type(data) == type([]):
  3014. return map(lambda x: EscapeCharHandler('unmask', x), data)
  3015. return EscapeCharHandler('unmask',data)
  3016. def addLineBreaks(mylist):
  3017. "use LB to respect sys.platform"
  3018. ret = []
  3019. for line in mylist:
  3020. line = string.replace(line,'\n',LB) # embedded \n's
  3021. ret.append(line+LB) # add final line break
  3022. return ret
  3023. # convert ['foo\nbar'] to ['foo', 'bar']
  3024. def expandLineBreaks(mylist):
  3025. ret = []
  3026. for line in mylist:
  3027. ret.extend(string.split(line, '\n'))
  3028. return ret
  3029. def compile_filters(filters, errmsg='Filter'):
  3030. if filters:
  3031. for i in range(len(filters)):
  3032. patt,repl = filters[i]
  3033. try: rgx = re.compile(patt)
  3034. except: Error("%s: '%s'"%(errmsg, patt))
  3035. filters[i] = (rgx,repl)
  3036. return filters
  3037. def enclose_me(tagname, txt):
  3038. return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
  3039. def beautify_me(name, line):
  3040. "where name is: bold, italic or underline"
  3041. name = 'font%s' % string.capitalize(name)
  3042. open = TAGS['%sOpen'%name]
  3043. close = TAGS['%sClose'%name]
  3044. txt = r'%s\1%s'%(open, close)
  3045. line = regex[name].sub(txt,line)
  3046. return line
  3047. def get_tagged_link(label, url):
  3048. ret = ''
  3049. target = CONF['target']
  3050. image_re = regex['img']
  3051. # set link type
  3052. if regex['email'].match(url):
  3053. linktype = 'email'
  3054. else:
  3055. linktype = 'url';
  3056. # escape specials from TEXT parts
  3057. label = doEscape(target,label)
  3058. # escape specials from link URL
  3059. if rules['linkable'] and rules['escapeurl']:
  3060. url = doEscape(target, url)
  3061. # if not linkable, the URL is plain text, that needs escape
  3062. if not rules['linkable']:
  3063. if target == 'tex':
  3064. url = re.sub('^#', '\#', url) # ugly, but compile
  3065. else:
  3066. url = doEscape(target,url)
  3067. # adding protocol to guessed link
  3068. guessurl = ''
  3069. if linktype == 'url' and \
  3070. re.match(regex['_urlskel']['guess'], url):
  3071. if url[0] == 'w': guessurl = 'http://' +url
  3072. else : guessurl = 'ftp://' +url
  3073. # not link aware targets -> protocol is useless
  3074. if not rules['linkable']: guessurl = ''
  3075. # simple link (not guessed)
  3076. if not label and not guessurl:
  3077. if CONF['mask-email'] and linktype == 'email':
  3078. # do the email mask feature (no TAGs, just text)
  3079. url = string.replace(url,'@',' (a) ')
  3080. url = string.replace(url,'.',' ')
  3081. url = "<%s>" % url
  3082. if rules['linkable']: url = doEscape(target, url)
  3083. ret = url
  3084. else:
  3085. # just add link data to tag
  3086. tag = TAGS[linktype]
  3087. ret = regex['x'].sub(url,tag)
  3088. # named link or guessed simple link
  3089. else:
  3090. # adjusts for guessed link
  3091. if not label: label = url # no protocol
  3092. if guessurl : url = guessurl # with protocol
  3093. # image inside link!
  3094. if image_re.match(label):
  3095. if rules['imglinkable']: # get image tag
  3096. label = parse_images(label)
  3097. else: # img@link !supported
  3098. label = "(%s)"%image_re.match(label).group(1)
  3099. # putting data on the right appearance order
  3100. if rules['linkable']:
  3101. urlorder = [url, label] # link before label
  3102. else:
  3103. urlorder = [label, url] # label before link
  3104. # add link data to tag (replace \a's)
  3105. ret = TAGS["%sMark"%linktype]
  3106. for data in urlorder:
  3107. ret = regex['x'].sub(data,ret,1)
  3108. return ret
  3109. def parse_deflist_term(line):
  3110. "Extract and parse definition list term contents"
  3111. img_re = regex['img']
  3112. term = regex['deflist'].search(line).group(3)
  3113. # mask image inside term as (image.jpg), where not supported
  3114. if not rules['imgasdefterm'] and img_re.search(term):
  3115. while img_re.search(term):
  3116. imgfile = img_re.search(term).group(1)
  3117. term = img_re.sub('(%s)'%imgfile, term, 1)
  3118. #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
  3119. return term
  3120. def get_tagged_bar(line):
  3121. m = regex['bar'].search(line)
  3122. if not m: return line
  3123. txt = m.group(2)
  3124. # map strong bar to pagebreak
  3125. if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
  3126. TAGS['bar2'] = TAGS['pageBreak']
  3127. # set bar type
  3128. if txt[0] == '=': bar = TAGS['bar2']
  3129. else : bar = TAGS['bar1']
  3130. # to avoid comment tag confusion like <!-- ------ -->
  3131. if string.count(TAGS['comment'], '--'):
  3132. txt = string.replace(txt,'--','__')
  3133. # tag line
  3134. return regex['x'].sub(txt, bar)
  3135. def get_image_align(line):
  3136. "Return the image (first found) align for the given line"
  3137. # first clear marks that can mess align detection
  3138. line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep
  3139. line = re.sub('^'+SEPARATOR, '', line) # remove list sep
  3140. line = re.sub('^[\t]+' , '', line) # remove quote mark
  3141. # get image position on the line
  3142. m = regex['img'].search(line)
  3143. ini = m.start() ; head = 0
  3144. end = m.end() ; tail = len(line)
  3145. # the align detection algorithm
  3146. if ini == head and end != tail: align = 'left' # ^img + text$
  3147. elif ini != head and end == tail: align = 'right' # ^text + img$
  3148. else : align = 'center' # default align
  3149. # some special cases
  3150. if BLOCK.isblock('table'): align = 'center' # ignore when table
  3151. # if TARGET == 'mgp' and align == 'center': align = 'center'
  3152. return align
  3153. # reference: http://www.iana.org/assignments/character-sets
  3154. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  3155. def get_encoding_string(enc, target):
  3156. if not enc: return ''
  3157. # target specific translation table
  3158. translate = {
  3159. 'tex': {
  3160. # missing: ansinew , applemac , cp437 , cp437de , cp865
  3161. 'us-ascii' : 'ascii',
  3162. 'windows-1250': 'cp1250',
  3163. 'windows-1252': 'cp1252',
  3164. 'ibm850' : 'cp850',
  3165. 'ibm852' : 'cp852',
  3166. 'iso-8859-1' : 'latin1',
  3167. 'iso-8859-2' : 'latin2',
  3168. 'iso-8859-3' : 'latin3',
  3169. 'iso-8859-4' : 'latin4',
  3170. 'iso-8859-5' : 'latin5',
  3171. 'iso-8859-9' : 'latin9',
  3172. 'koi8-r' : 'koi8-r'
  3173. }
  3174. }
  3175. # normalization
  3176. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  3177. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  3178. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  3179. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  3180. # apply translation table
  3181. try: enc = translate[target][string.lower(enc)]
  3182. except: pass
  3183. return enc
  3184. ##############################################################################
  3185. ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
  3186. ##############################################################################
  3187. def process_source_file(file='', noconf=0, contents=[]):
  3188. """
  3189. Find and Join all the configuration available for a source file.
  3190. No sanity checkings are done on this step.
  3191. It also extracts the source document parts into separate holders.
  3192. The config scan order is:
  3193. 1. The user configuration file (i.e. $HOME/.txt2tagsrc)
  3194. 2. The source document's CONF area
  3195. 3. The command line options
  3196. The return data is a tuple of two items:
  3197. 1. The parsed config dictionary
  3198. 2. The document's parts, as a (head, conf, body) tuple
  3199. All the conversion process will be based on the data and
  3200. configuration returned by this function.
  3201. The source files is readed on this step only.
  3202. """
  3203. if contents:
  3204. source = SourceDocument(contents=contents)
  3205. else:
  3206. source = SourceDocument(file)
  3207. head, conf, body = source.split()
  3208. Message(_("Source document contents stored"),2)
  3209. if not noconf:
  3210. # read document config
  3211. source_raw = source.get_raw_config()
  3212. # join all the config directives found, then parse it
  3213. full_raw = RC_RAW + source_raw + CMDLINE_RAW
  3214. Message(_("Parsing and saving all config found (%03d items)")%(
  3215. len(full_raw)),1)
  3216. full_parsed = ConfigMaster(full_raw).parse()
  3217. # add manually the filemane to the conf dic
  3218. if contents:
  3219. full_parsed['sourcefile'] = MODULEIN
  3220. full_parsed['infile'] = MODULEIN
  3221. full_parsed['outfile'] = MODULEOUT
  3222. else:
  3223. full_parsed['sourcefile'] = file
  3224. # maybe should we dump the config found?
  3225. if full_parsed.get('dump-config'):
  3226. dumpConfig(source_raw, full_parsed)
  3227. Quit()
  3228. # okay, all done
  3229. Debug("FULL config for this file: %s"%full_parsed, 1)
  3230. else:
  3231. full_parsed = {}
  3232. return full_parsed, (head,conf,body)
  3233. def get_infiles_config(infiles):
  3234. """
  3235. Find and Join into a single list, all configuration available
  3236. for each input file. This function is supposed to be the very
  3237. first one to be called, before any processing.
  3238. """
  3239. ret = []
  3240. if not infiles: return []
  3241. for infile in infiles:
  3242. ret.append((process_source_file(infile)))
  3243. return ret
  3244. def convert_this_files(configs):
  3245. global CONF
  3246. for myconf,doc in configs: # multifile support
  3247. target_head = []
  3248. target_toc = []
  3249. target_body = []
  3250. target_foot = []
  3251. source_head, source_conf, source_body = doc
  3252. myconf = ConfigMaster().sanity(myconf)
  3253. # compose the target file Headers
  3254. #TODO escape line before?
  3255. #TODO see exceptions by tex and mgp
  3256. Message(_("Composing target Headers"),1)
  3257. target_head = doHeader(source_head, myconf)
  3258. # parse the full marked body into tagged target
  3259. first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
  3260. Message(_("Composing target Body"),1)
  3261. target_body, marked_toc = convert(source_body, myconf,
  3262. firstlinenr=first_body_line)
  3263. # if dump-source, we're done
  3264. if myconf['dump-source']:
  3265. for line in source_head+source_conf+target_body:
  3266. print line
  3267. return
  3268. # make TOC (if needed)
  3269. Message(_("Composing target TOC"),1)
  3270. tagged_toc = toc_tagger(marked_toc, myconf)
  3271. target_toc = toc_formatter(tagged_toc, myconf)
  3272. target_body = toc_inside_body(target_body, target_toc, myconf)
  3273. if not AUTOTOC and not myconf['toc-only']: target_toc = []
  3274. # compose the target file Footer
  3275. Message(_("Composing target Footer"),1)
  3276. target_foot = doFooter(myconf)
  3277. # finally, we have our document
  3278. outlist = target_head + target_toc + target_body + target_foot
  3279. # if on GUI, abort before finish_him
  3280. # if module, return finish_him as list
  3281. # else, write results to file or STDOUT
  3282. if GUI:
  3283. return outlist, myconf
  3284. elif myconf.get('outfile') == MODULEOUT:
  3285. return finish_him(outlist, myconf), myconf
  3286. else:
  3287. Message(_("Saving results to the output file"),1)
  3288. finish_him(outlist, myconf)
  3289. def parse_images(line):
  3290. "Tag all images found"
  3291. while regex['img'].search(line) and TAGS['img'] != '[\a]':
  3292. txt = regex['img'].search(line).group(1)
  3293. tag = TAGS['img']
  3294. # HTML, XHTML and mgp!
  3295. if rules['imgalignable']:
  3296. align = get_image_align(line)
  3297. # add align on tag
  3298. align_name = string.capitalize(align)
  3299. align_tag = TAGS['imgAlign'+align_name]
  3300. tag = regex['_imgAlign'].sub(align_tag, tag, 1)
  3301. # dirty fix to allow centered solo images
  3302. if align == 'center' and TARGET in ['html','xhtml']:
  3303. rest = regex['img'].sub('',line,1)
  3304. if re.match('^\s+$', rest):
  3305. tag = "<center>%s</center>" %tag
  3306. if TARGET == 'tex':
  3307. tag = re.sub(r'\\b',r'\\\\b',tag)
  3308. txt = string.replace(txt, '_', 'vvvvTexUndervvvv')
  3309. line = regex['img'].sub(tag,line,1)
  3310. line = regex['x'].sub(txt,line,1)
  3311. return line
  3312. def add_inline_tags(line):
  3313. # beautifiers
  3314. for beauti in ['Bold', 'Italic', 'Underline']:
  3315. if regex['font%s'%beauti].search(line):
  3316. line = beautify_me(beauti, line)
  3317. line = parse_images(line)
  3318. return line
  3319. def get_include_contents(file, path=''):
  3320. "Parses %!include: value and extract file contents"
  3321. ids = {'`':'verb', '"':'raw', "'":'passthru' }
  3322. id = 't2t'
  3323. # set include type and remove identifier marks
  3324. mark = file[0]
  3325. if mark in ids.keys():
  3326. if file[:2] == file[-2:] == mark*2:
  3327. id = ids[mark] # set type
  3328. file = file[2:-2] # remove marks
  3329. # handle remote dir execution
  3330. filepath = os.path.join(path, file)
  3331. # read included file contents
  3332. lines = Readfile(filepath, remove_linebreaks=1)
  3333. # default txt2tags marked text, just BODY matters
  3334. if id == 't2t':
  3335. lines = get_file_body(filepath)
  3336. lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
  3337. # This appears when included hit EOF with verbatim area open
  3338. #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
  3339. return id, lines
  3340. def set_global_config(config):
  3341. global CONF, TAGS, regex, rules, TARGET
  3342. CONF = config
  3343. TAGS = getTags(CONF)
  3344. rules = getRules(CONF)
  3345. regex = getRegexes()
  3346. TARGET = config['target'] # save for buggy functions that need global
  3347. def convert(bodylines, config, firstlinenr=1):
  3348. global BLOCK
  3349. set_global_config(config)
  3350. target = config['target']
  3351. BLOCK = BlockMaster()
  3352. MASK = MaskMaster()
  3353. TITLE = TitleMaster()
  3354. ret = []
  3355. dump_source = []
  3356. f_lastwasblank = 0
  3357. # compiling all PreProc regexes
  3358. pre_filter = compile_filters(
  3359. CONF['preproc'], _('Invalid PreProc filter regex'))
  3360. # let's mark it up!
  3361. linenr = firstlinenr-1
  3362. lineref = 0
  3363. while lineref < len(bodylines):
  3364. # defaults
  3365. MASK.reset()
  3366. results_box = ''
  3367. untouchedline = bodylines[lineref]
  3368. dump_source.append(untouchedline)
  3369. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  3370. # apply PreProc filters
  3371. if pre_filter:
  3372. errmsg = _('Invalid PreProc filter replacement')
  3373. for rgx,repl in pre_filter:
  3374. try: line = rgx.sub(repl, line)
  3375. except: Error("%s: '%s'"%(errmsg, repl))
  3376. line = maskEscapeChar(line) # protect \ char
  3377. linenr = linenr +1
  3378. lineref = lineref +1
  3379. Debug(repr(line), 2, linenr) # heavy debug: show each line
  3380. # any NOT table line (or comment), closes an open table
  3381. if ( BLOCK.isblock('table') or
  3382. ( BLOCK.isblock('verb') and
  3383. BLOCK.prop('mapped') == 'table'
  3384. )
  3385. ) \
  3386. and not regex['table'].search(line) \
  3387. and not regex['comment'].search(line):
  3388. ret.extend(BLOCK.blockout())
  3389. # any NOT quote line (or comment) closes all open quotes
  3390. if BLOCK.isblock('quote') \
  3391. and not regex['quote'].search(line) \
  3392. and not regex['comment'].search(line):
  3393. while BLOCK.isblock('quote'):
  3394. ret.extend(BLOCK.blockout())
  3395. #-------------------------[ Raw Text ]----------------------
  3396. # we're already on a raw block
  3397. if BLOCK.block() == 'raw':
  3398. # closing raw
  3399. if regex['blockRawClose'].search(line):
  3400. ret.extend(BLOCK.blockout())
  3401. continue
  3402. # normal raw-inside line
  3403. BLOCK.holdadd(line)
  3404. continue
  3405. # detecting raw block init
  3406. if regex['blockRawOpen'].search(line):
  3407. ret.extend(BLOCK.blockin('raw'))
  3408. continue
  3409. # one line verb-formatted text
  3410. if regex['1lineRaw'].search(line):
  3411. ret.extend(BLOCK.blockin('raw'))
  3412. line = regex['1lineRaw'].sub('',line)
  3413. BLOCK.holdadd(line)
  3414. ret.extend(BLOCK.blockout())
  3415. continue
  3416. #-----------------[ Verbatim (PRE-formatted) ]--------------
  3417. #TIP we'll never support beautifiers inside verbatim
  3418. # we're already on a verb block
  3419. if BLOCK.block() == 'verb':
  3420. # closing verb
  3421. if regex['blockVerbClose'].search(line):
  3422. ret.extend(BLOCK.blockout())
  3423. continue
  3424. # normal verb-inside line
  3425. BLOCK.holdadd(line)
  3426. continue
  3427. # detecting verb block init
  3428. if regex['blockVerbOpen'].search(line):
  3429. ret.extend(BLOCK.blockin('verb'))
  3430. f_lastwasblank = 0
  3431. continue
  3432. # one line verb-formatted text
  3433. if regex['1lineVerb'].search(line):
  3434. ret.extend(BLOCK.blockin('verb'))
  3435. line = regex['1lineVerb'].sub('',line)
  3436. BLOCK.holdadd(line)
  3437. ret.extend(BLOCK.blockout())
  3438. f_lastwasblank = 0
  3439. continue
  3440. # tables are mapped to verb when target is not table-aware
  3441. if not rules['tableable'] and regex['table'].search(line):
  3442. if not BLOCK.isblock('verb'):
  3443. ret.extend(BLOCK.blockin('verb'))
  3444. BLOCK.propset('mapped', 'table')
  3445. BLOCK.holdadd(line)
  3446. continue
  3447. #---------------------[ blank lines ]-----------------------
  3448. if regex['blankline'].search(line):
  3449. # close open paragraph
  3450. if BLOCK.isblock('para'):
  3451. ret.extend(BLOCK.blockout())
  3452. f_lastwasblank = 1
  3453. continue
  3454. # close all open quotes
  3455. while BLOCK.isblock('quote'):
  3456. ret.extend(BLOCK.blockout())
  3457. # closing all open lists
  3458. if f_lastwasblank: # 2nd consecutive blank
  3459. if BLOCK.block()[-4:] == 'list':
  3460. BLOCK.holdaddsub('') # helps parser
  3461. while BLOCK.depth: # closes list (if any)
  3462. ret.extend(BLOCK.blockout())
  3463. continue # ignore consecutive blanks
  3464. # paragraph (if any) is wanted inside lists also
  3465. if BLOCK.block()[-4:] == 'list':
  3466. BLOCK.holdaddsub('')
  3467. else:
  3468. # html: show blank line (needs tag)
  3469. if target in ['html','xhtml']:
  3470. ret.append(TAGS['paragraphOpen']+\
  3471. TAGS['paragraphClose'])
  3472. # otherwise we just show a blank line
  3473. else:
  3474. ret.append('')
  3475. f_lastwasblank = 1
  3476. continue
  3477. #---------------------[ special ]---------------------------
  3478. if regex['special'].search(line):
  3479. # include command
  3480. targ, key, val = ConfigLines().parse_line(
  3481. line, 'include', target)
  3482. if key:
  3483. Debug("Found config '%s', value '%s'"%(
  3484. key,val),1,linenr)
  3485. incpath = os.path.dirname(CONF['sourcefile'])
  3486. incfile = val
  3487. err = _('A file cannot include itself (loop!)')
  3488. if CONF['sourcefile'] == incfile:
  3489. Error("%s: %s"%(err,incfile))
  3490. inctype, inclines = get_include_contents(
  3491. incfile, incpath)
  3492. # verb, raw and passthru are easy
  3493. if inctype != 't2t':
  3494. ret.extend(BLOCK.blockin(inctype))
  3495. BLOCK.holdextend(inclines)
  3496. ret.extend(BLOCK.blockout())
  3497. else:
  3498. # insert include lines into body
  3499. #TODO include maxdepth limit
  3500. bodylines = bodylines[:lineref] \
  3501. +inclines \
  3502. +bodylines[lineref:]
  3503. #TODO fix path if include@include
  3504. # remove %!include call
  3505. if CONF['dump-source']:
  3506. dump_source.pop()
  3507. continue
  3508. else:
  3509. Debug('Bogus Special Line',1,linenr)
  3510. #---------------------[ dump-source ]-----------------------
  3511. # we don't need to go any further
  3512. if CONF['dump-source']:
  3513. continue
  3514. #---------------------[ comments ]--------------------------
  3515. # just skip them (if not macro)
  3516. if regex['comment'].search(line) and not \
  3517. regex['macros'].match(line) and not \
  3518. regex['toc'].match(line):
  3519. continue
  3520. # valid line, reset blank status
  3521. f_lastwasblank = 0
  3522. #---------------------[ Horizontal Bar ]--------------------
  3523. if regex['bar'].search(line):
  3524. # a bar closes a paragraph
  3525. if BLOCK.isblock('para'):
  3526. ret.extend(BLOCK.blockout())
  3527. # we need to close all opened quote blocks
  3528. # if bar isn't allowed inside or if not a quote line
  3529. if BLOCK.isblock('quote'):
  3530. if not rules['barinsidequote'] or \
  3531. not regex['quote'].search(line):
  3532. while BLOCK.isblock('quote'):
  3533. ret.extend(BLOCK.blockout())
  3534. # quote + bar: continue processing for quoting
  3535. if rules['barinsidequote'] and \
  3536. regex['quote'].search(line):
  3537. pass
  3538. # just bar: save tagged line and we're done
  3539. else:
  3540. line = get_tagged_bar(line)
  3541. if BLOCK.block()[-4:] == 'list':
  3542. BLOCK.holdaddsub(line)
  3543. elif BLOCK.block():
  3544. BLOCK.holdadd(line)
  3545. else:
  3546. ret.append(line)
  3547. Debug("BAR: %s"%line, 6)
  3548. continue
  3549. #---------------------[ Title ]-----------------------------
  3550. #TODO set next blank and set f_lastwasblank or f_lasttitle
  3551. if (regex['title'].search(line) or
  3552. regex['numtitle'].search(line)) and \
  3553. BLOCK.block()[-4:] != 'list':
  3554. # a title closes a paragraph
  3555. if BLOCK.isblock('para'):
  3556. ret.extend(BLOCK.blockout())
  3557. TITLE.add(line)
  3558. tagged_title = TITLE.get()
  3559. ret.extend(tagged_title)
  3560. Debug("TITLE: %s"%tagged_title, 6)
  3561. f_lastwasblank = 1
  3562. continue
  3563. #---------------------[ %%toc ]-----------------------
  3564. # %%toc line closes paragraph
  3565. if BLOCK.block() == 'para' and regex['toc'].search(line):
  3566. ret.extend(BLOCK.blockout())
  3567. #---------------------[ apply masks ]-----------------------
  3568. line = MASK.mask(line)
  3569. #XXX from here, only block-inside lines will pass
  3570. #---------------------[ Quote ]-----------------------------
  3571. if regex['quote'].search(line):
  3572. # store number of leading TABS
  3573. quotedepth = len(regex['quote'].search(line).group(0))
  3574. # SGML doesn't support nested quotes
  3575. if rules['quotenotnested']: quotedepth = 1
  3576. # new quote
  3577. if not BLOCK.isblock('quote'):
  3578. ret.extend(BLOCK.blockin('quote'))
  3579. # new subquotes
  3580. while BLOCK.depth < quotedepth:
  3581. BLOCK.blockin('quote')
  3582. # closing quotes
  3583. while quotedepth < BLOCK.depth:
  3584. ret.extend(BLOCK.blockout())
  3585. #---------------------[ Lists ]-----------------------------
  3586. # an empty item also closes the current list
  3587. if BLOCK.block()[-4:] == 'list':
  3588. m = regex['listclose'].match(line)
  3589. if m:
  3590. listindent = m.group(1)
  3591. listtype = m.group(2)
  3592. currlisttype = BLOCK.prop('type')
  3593. currlistindent = BLOCK.prop('indent')
  3594. if listindent == currlistindent and \
  3595. listtype == currlisttype:
  3596. ret.extend(BLOCK.blockout())
  3597. continue
  3598. if regex['list'].search(line) or \
  3599. regex['numlist'].search(line) or \
  3600. regex['deflist'].search(line):
  3601. listindent = BLOCK.prop('indent')
  3602. listids = string.join(LISTNAMES.keys(), '')
  3603. m = re.match('^( *)([%s]) '%listids, line)
  3604. listitemindent = m.group(1)
  3605. listtype = m.group(2)
  3606. listname = LISTNAMES[listtype]
  3607. results_box = BLOCK.holdadd
  3608. # del list ID (and separate term from definition)
  3609. if listname == 'deflist':
  3610. term = parse_deflist_term(line)
  3611. line = regex['deflist'].sub(term+SEPARATOR,line)
  3612. else:
  3613. line = regex[listname].sub(SEPARATOR,line)
  3614. # don't cross depth limit
  3615. maxdepth = rules['listmaxdepth']
  3616. if maxdepth and BLOCK.depth == maxdepth:
  3617. if len(listitemindent) > len(listindent):
  3618. listitemindent = listindent
  3619. # open mother list or sublist
  3620. if BLOCK.block()[-4:] != 'list' or \
  3621. len(listitemindent) > len(listindent):
  3622. ret.extend(BLOCK.blockin(listname))
  3623. BLOCK.propset('indent',listitemindent)
  3624. BLOCK.propset('type',listtype)
  3625. # closing sublists
  3626. while len(listitemindent) < len(BLOCK.prop('indent')):
  3627. ret.extend(BLOCK.blockout())
  3628. # o-oh, sublist before list ("\n\n - foo\n- foo")
  3629. # fix: close sublist (as mother), open another list
  3630. if BLOCK.block()[-4:] != 'list':
  3631. ret.extend(BLOCK.blockin(listname))
  3632. BLOCK.propset('indent',listitemindent)
  3633. BLOCK.propset('type',listtype)
  3634. #---------------------[ Table ]-----------------------------
  3635. #TODO escape undesired format inside table
  3636. #TODO add pm6 target
  3637. if regex['table'].search(line):
  3638. if not BLOCK.isblock('table'): # first table line!
  3639. ret.extend(BLOCK.blockin('table'))
  3640. BLOCK.tableparser.__init__(line)
  3641. tablerow = TableMaster().parse_row(line)
  3642. BLOCK.tableparser.add_row(tablerow) # save config
  3643. # maintain line to unmask and inlines
  3644. line = string.join(tablerow['cells'], SEPARATOR)
  3645. #---------------------[ Paragraph ]-------------------------
  3646. if not BLOCK.block() and \
  3647. not string.count(line, MASK.tocmask): # new para!
  3648. ret.extend(BLOCK.blockin('para'))
  3649. ############################################################
  3650. ############################################################
  3651. ############################################################
  3652. #---------------------[ Final Parses ]----------------------
  3653. # the target-specific special char escapes for body lines
  3654. line = doEscape(target,line)
  3655. line = add_inline_tags(line)
  3656. line = MASK.undo(line)
  3657. #---------------------[ Hold or Return? ]-------------------
  3658. ### now we must choose here to put the parsed line
  3659. #
  3660. if not results_box:
  3661. # list item extra lines
  3662. if BLOCK.block()[-4:] == 'list':
  3663. results_box = BLOCK.holdaddsub
  3664. # other blocks
  3665. elif BLOCK.block():
  3666. results_box = BLOCK.holdadd
  3667. # no blocks
  3668. else:
  3669. line = doFinalEscape(target, line)
  3670. results_box = ret.append
  3671. results_box(line)
  3672. # EOF: close any open para/verb/lists/table/quotes
  3673. Debug('EOF',7)
  3674. while BLOCK.block():
  3675. ret.extend(BLOCK.blockout())
  3676. # maybe close some opened title area?
  3677. if rules['titleblocks']:
  3678. ret.extend(TITLE.close_all())
  3679. # maybe a major tag to enclose body? (like DIV for CSS)
  3680. if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
  3681. if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
  3682. if CONF['toc-only']: ret = []
  3683. marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
  3684. # if dump-source, all parsing is ignored
  3685. if CONF['dump-source']: ret = dump_source[:]
  3686. return ret, marked_toc
  3687. ##############################################################################
  3688. ################################### GUI ######################################
  3689. ##############################################################################
  3690. #
  3691. # tk help: http://python.org/topics/tkinter/
  3692. # tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
  3693. # /usr/lib/python*/lib-tk/Tkinter.py
  3694. #
  3695. # grid table : row=0, column=0, columnspan=2, rowspan=2
  3696. # grid align : sticky='n,s,e,w' (North, South, East, West)
  3697. # pack place : side='top,bottom,right,left'
  3698. # pack fill : fill='x,y,both,none', expand=1
  3699. # pack align : anchor='n,s,e,w' (North, South, East, West)
  3700. # padding : padx=10, pady=10, ipadx=10, ipady=10 (internal)
  3701. # checkbox : offvalue is return if the _user_ deselected the box
  3702. # label align: justify=left,right,center
  3703. def load_GUI_resources():
  3704. "Load all extra modules and methods used by GUI"
  3705. global askopenfilename, showinfo, showwarning, showerror, Tkinter
  3706. from tkFileDialog import askopenfilename
  3707. from tkMessageBox import showinfo,showwarning,showerror
  3708. import Tkinter
  3709. class Gui:
  3710. "Graphical Tk Interface"
  3711. def __init__(self, conf={}):
  3712. self.root = Tkinter.Tk() # mother window, come to butthead
  3713. self.root.title(my_name) # window title bar text
  3714. self.window = self.root # variable "focus" for inclusion
  3715. self.row = 0 # row count for grid()
  3716. self.action_lenght = 150 # left column lenght (pixel)
  3717. self.frame_margin = 10 # frame margin size (pixel)
  3718. self.frame_border = 6 # frame border size (pixel)
  3719. # the default Gui colors, can be changed by %!guicolors
  3720. self.dft_gui_colors = ['blue','white','lightblue','black']
  3721. self.gui_colors = []
  3722. self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
  3723. # on Tk, vars need to be set/get using setvar()/get()
  3724. self.infile = self.setvar('')
  3725. self.target = self.setvar('')
  3726. self.target_name = self.setvar('')
  3727. # the checks appearance order
  3728. self.checks = [
  3729. 'headers','enum-title','toc','mask-email',
  3730. 'toc-only','stdout']
  3731. # creating variables for all checks
  3732. for check in self.checks:
  3733. setattr(self, 'f_'+check, self.setvar(''))
  3734. # load RC config
  3735. self.conf = {}
  3736. if conf: self.load_config(conf)
  3737. def load_config(self, conf):
  3738. self.conf = conf
  3739. self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
  3740. self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
  3741. self.root.config(bd=15,bg=self.bg1)
  3742. ### config as dic for python 1.5 compat (**opts don't work :( )
  3743. def entry(self, **opts): return Tkinter.Entry(self.window, opts)
  3744. def label(self, txt='', bg=None, **opts):
  3745. opts.update({'text':txt,'bg':bg or self.bg1})
  3746. return Tkinter.Label(self.window, opts)
  3747. def button(self,name,cmd,**opts):
  3748. opts.update({'text':name,'command':cmd})
  3749. return Tkinter.Button(self.window, opts)
  3750. def check(self,name,checked=0,**opts):
  3751. bg, fg = self.bg2, self.fg2
  3752. opts.update({
  3753. 'text':name, 'onvalue':1, 'offvalue':0,
  3754. 'activeforeground':fg, 'fg':fg,
  3755. 'activebackground':bg, 'bg':bg,
  3756. 'highlightbackground':bg, 'anchor':'w'
  3757. })
  3758. chk = Tkinter.Checkbutton(self.window, opts)
  3759. if checked: chk.select()
  3760. chk.grid(columnspan=2, sticky='w', padx=0)
  3761. def menu(self,sel,items):
  3762. return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
  3763. # handy auxiliar functions
  3764. def action(self, txt):
  3765. self.label(txt, fg=self.fg1, bg=self.bg1,
  3766. wraplength=self.action_lenght).grid(column=0,row=self.row)
  3767. def frame_open(self):
  3768. self.window = Tkinter.Frame(self.root,bg=self.bg2,
  3769. borderwidth=self.frame_border)
  3770. def frame_close(self):
  3771. self.window.grid(column=1, row=self.row, sticky='w',
  3772. padx=self.frame_margin)
  3773. self.window = self.root
  3774. self.label('').grid()
  3775. self.row = self.row + 2 # update row count
  3776. def target_name2key(self):
  3777. name = self.target_name.get()
  3778. target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
  3779. try : key = target[0]
  3780. except: key = ''
  3781. self.target = self.setvar(key)
  3782. def target_key2name(self):
  3783. key = self.target.get()
  3784. name = TARGET_NAMES.get(key) or key
  3785. self.target_name = self.setvar(name)
  3786. def exit(self): self.root.destroy()
  3787. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  3788. def askfile(self):
  3789. ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
  3790. (_('All files'),'*')]
  3791. newfile = askopenfilename(filetypes=ftypes)
  3792. if newfile:
  3793. self.infile.set(newfile)
  3794. newconf = process_source_file(newfile)[0]
  3795. newconf = ConfigMaster().sanity(newconf, gui=1)
  3796. # restate all checkboxes after file selection
  3797. #TODO how to make a refresh without killing it?
  3798. self.root.destroy()
  3799. self.__init__(newconf)
  3800. self.mainwindow()
  3801. def scrollwindow(self, txt='no text!', title=''):
  3802. # create components
  3803. win = Tkinter.Toplevel() ; win.title(title)
  3804. frame = Tkinter.Frame(win)
  3805. scroll = Tkinter.Scrollbar(frame)
  3806. text = Tkinter.Text(frame,yscrollcommand=scroll.set)
  3807. button = Tkinter.Button(win)
  3808. # config
  3809. text.insert(Tkinter.END, string.join(txt,'\n'))
  3810. scroll.config(command=text.yview)
  3811. button.config(text=_('Close'), command=win.destroy)
  3812. button.focus_set()
  3813. # packing
  3814. text.pack(side='left', fill='both', expand=1)
  3815. scroll.pack(side='right', fill='y')
  3816. frame.pack(fill='both', expand=1)
  3817. button.pack(ipadx=30)
  3818. def runprogram(self):
  3819. global CMDLINE_RAW
  3820. # prepare
  3821. self.target_name2key()
  3822. infile, target = self.infile.get(), self.target.get()
  3823. # sanity
  3824. if not target:
  3825. showwarning(my_name,_("You must select a target type!"))
  3826. return
  3827. if not infile:
  3828. showwarning(my_name,
  3829. _("You must provide the source file location!"))
  3830. return
  3831. # compose cmdline
  3832. guiflags = []
  3833. real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
  3834. if real_cmdline_conf.has_key('infile'):
  3835. del real_cmdline_conf['infile']
  3836. if real_cmdline_conf.has_key('target'):
  3837. del real_cmdline_conf['target']
  3838. real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
  3839. default_outfile = ConfigMaster().get_outfile_name(
  3840. {'sourcefile':infile, 'outfile':'', 'target':target})
  3841. for opt in self.checks:
  3842. val = int(getattr(self, 'f_%s'%opt).get() or "0")
  3843. if opt == 'stdout': opt = 'outfile'
  3844. on_config = self.conf.get(opt) or 0
  3845. on_cmdline = real_cmdline_conf.get(opt) or 0
  3846. if opt == 'outfile':
  3847. if on_config == STDOUT: on_config = 1
  3848. else: on_config = 0
  3849. if on_cmdline == STDOUT: on_cmdline = 1
  3850. else: on_cmdline = 0
  3851. if val != on_config or (
  3852. val == on_config == on_cmdline and
  3853. real_cmdline_conf.has_key(opt)):
  3854. if val:
  3855. # was not set, but user selected on GUI
  3856. Debug("user turned ON: %s"%opt)
  3857. if opt == 'outfile': opt = '-o-'
  3858. else: opt = '--%s'%opt
  3859. else:
  3860. # was set, but user deselected on GUI
  3861. Debug("user turned OFF: %s"%opt)
  3862. if opt == 'outfile':
  3863. opt = "-o%s"%default_outfile
  3864. else: opt = '--no-%s'%opt
  3865. guiflags.append(opt)
  3866. cmdline = [my_name, '-t', target] +real_cmdline \
  3867. +guiflags +[infile]
  3868. Debug('Gui/Tk cmdline: %s'%cmdline,5)
  3869. # run!
  3870. cmdline_raw_orig = CMDLINE_RAW
  3871. try:
  3872. # fake the GUI cmdline as the real one, and parse file
  3873. CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
  3874. data = process_source_file(infile)
  3875. # on GUI, convert_* returns the data, not finish_him()
  3876. outlist, config = convert_this_files([data])
  3877. # on GUI and STDOUT, finish_him() returns the data
  3878. result = finish_him(outlist, config)
  3879. # show outlist in s a nice new window
  3880. if result:
  3881. outlist, config = result
  3882. title = _('%s: %s converted to %s')%(
  3883. my_name, os.path.basename(infile),
  3884. string.upper(config['target']))
  3885. self.scrollwindow(outlist, title)
  3886. # show the "file saved" message
  3887. else:
  3888. msg = "%s\n\n %s\n%s\n\n %s\n%s"%(
  3889. _('Conversion done!'),
  3890. _('FROM:'), infile,
  3891. _('TO:'), config['outfile'])
  3892. showinfo(my_name, msg)
  3893. except error: # common error (windowed), not quit
  3894. pass
  3895. except: # fatal error (windowed and printed)
  3896. errormsg = getUnknownErrorMessage()
  3897. print errormsg
  3898. showerror(_('%s FATAL ERROR!')%my_name,errormsg)
  3899. self.exit()
  3900. CMDLINE_RAW = cmdline_raw_orig
  3901. def mainwindow(self):
  3902. self.infile.set(self.conf.get('sourcefile') or '')
  3903. self.target.set(self.conf.get('target') or \
  3904. _('-- select one --'))
  3905. outfile = self.conf.get('outfile')
  3906. if outfile == STDOUT: # map -o-
  3907. self.conf['stdout'] = 1
  3908. if self.conf.get('headers') == None:
  3909. self.conf['headers'] = 1 # map default
  3910. action1 = _("Enter the source file location:")
  3911. action2 = _("Choose the target document type:")
  3912. action3 = _("Some options you may check:")
  3913. action4 = _("Some extra options:")
  3914. checks_txt = {
  3915. 'headers' : _("Include headers on output"),
  3916. 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
  3917. 'toc' : _("Do TOC also (Table of Contents)"),
  3918. 'mask-email': _("Hide e-mails from SPAM robots"),
  3919. 'toc-only' : _("Just do TOC, nothing more"),
  3920. 'stdout' : _("Dump to screen (Don't save target file)")
  3921. }
  3922. targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
  3923. # header
  3924. self.label("%s %s"%(string.upper(my_name), my_version),
  3925. bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
  3926. self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
  3927. bg=self.bg1, fg=self.fg1).grid(columnspan=2)
  3928. self.row = 2
  3929. # choose input file
  3930. self.action(action1) ; self.frame_open()
  3931. e_infile = self.entry(textvariable=self.infile,width=25)
  3932. e_infile.grid(row=self.row, column=0, sticky='e')
  3933. if not self.infile.get(): e_infile.focus_set()
  3934. self.button(_("Browse"), self.askfile).grid(
  3935. row=self.row, column=1, sticky='w', padx=10)
  3936. # show outfile name, style and encoding (if any)
  3937. txt = ''
  3938. if outfile:
  3939. txt = outfile
  3940. if outfile == STDOUT: txt = _('<screen>')
  3941. l_output = self.label(_('Output: ')+txt,
  3942. fg=self.fg2,bg=self.bg2)
  3943. l_output.grid(columnspan=2, sticky='w')
  3944. for setting in ['style','encoding']:
  3945. if self.conf.get(setting):
  3946. name = string.capitalize(setting)
  3947. val = self.conf[setting]
  3948. self.label('%s: %s'%(name, val),
  3949. fg=self.fg2, bg=self.bg2).grid(
  3950. columnspan=2, sticky='w')
  3951. # choose target
  3952. self.frame_close() ; self.action(action2)
  3953. self.frame_open()
  3954. self.target_key2name()
  3955. self.menu(self.target_name, targets_menu).grid(
  3956. columnspan=2, sticky='w')
  3957. # options checkboxes label
  3958. self.frame_close() ; self.action(action3)
  3959. self.frame_open()
  3960. # compose options check boxes, example:
  3961. # self.check(checks_txt['toc'],1,variable=self.f_toc)
  3962. for check in self.checks:
  3963. # extra options label
  3964. if check == 'toc-only':
  3965. self.frame_close() ; self.action(action4)
  3966. self.frame_open()
  3967. txt = checks_txt[check]
  3968. var = getattr(self, 'f_'+check)
  3969. checked = self.conf.get(check)
  3970. self.check(txt,checked,variable=var)
  3971. self.frame_close()
  3972. # spacer and buttons
  3973. self.label('').grid() ; self.row = self.row + 1
  3974. b_quit = self.button(_("Quit"), self.exit)
  3975. b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
  3976. b_conv = self.button(_("Convert!"), self.runprogram)
  3977. b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
  3978. if self.target.get() and self.infile.get():
  3979. b_conv.focus_set()
  3980. # as documentation told me
  3981. if sys.platform[:3] == 'win':
  3982. self.root.iconify()
  3983. self.root.update()
  3984. self.root.deiconify()
  3985. self.root.mainloop()
  3986. ##############################################################################
  3987. ##############################################################################
  3988. def exec_command_line(user_cmdline=[]):
  3989. global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error
  3990. # extract command line data
  3991. cmdline_data = user_cmdline or sys.argv[1:]
  3992. CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
  3993. cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
  3994. DEBUG = cmdline_parsed.get('debug' ) or 0
  3995. VERBOSE = cmdline_parsed.get('verbose') or 0
  3996. QUIET = cmdline_parsed.get('quiet' ) or 0
  3997. GUI = cmdline_parsed.get('gui' ) or 0
  3998. infiles = cmdline_parsed.get('infile' ) or []
  3999. Message(_("Txt2tags %s processing begins")%my_version,1)
  4000. # the easy ones
  4001. if cmdline_parsed.get('help' ): Quit(USAGE)
  4002. if cmdline_parsed.get('version'): Quit(VERSIONSTR)
  4003. # multifile haters
  4004. if len(infiles) > 1:
  4005. errmsg=_("Option --%s can't be used with multiple input files")
  4006. for option in NO_MULTI_INPUT:
  4007. if cmdline_parsed.get(option):
  4008. Error(errmsg%option)
  4009. Debug("system platform: %s"%sys.platform)
  4010. Debug("python version: %s"%(string.split(sys.version,'(')[0]))
  4011. Debug("line break char: %s"%repr(LB))
  4012. Debug("command line: %s"%sys.argv)
  4013. Debug("command line raw config: %s"%CMDLINE_RAW,1)
  4014. # extract RC file config
  4015. if cmdline_parsed.get('rc') == 0:
  4016. Message(_("Ignoring user configuration file"),1)
  4017. else:
  4018. rc_file = get_rc_path()
  4019. if os.path.isfile(rc_file):
  4020. Message(_("Loading user configuration file"),1)
  4021. RC_RAW = ConfigLines(file=rc_file).get_raw_config()
  4022. Debug("rc file: %s"%rc_file)
  4023. Debug("rc file raw config: %s"%RC_RAW,1)
  4024. # get all infiles config (if any)
  4025. infiles_config = get_infiles_config(infiles)
  4026. # is GUI available?
  4027. # try to load and start GUI interface for --gui
  4028. # if program was called with no arguments, try GUI also
  4029. if GUI or not infiles:
  4030. try:
  4031. load_GUI_resources()
  4032. Debug("GUI resources OK (Tk module is installed)")
  4033. winbox = Gui()
  4034. Debug("GUI display OK")
  4035. GUI = 1
  4036. except:
  4037. Debug("GUI Error: no Tk module or no DISPLAY")
  4038. GUI = 0
  4039. # user forced --gui, but it's not available
  4040. if cmdline_parsed.get('gui') and not GUI:
  4041. print getTraceback(); print
  4042. Error("Sorry, I can't run my Graphical Interface - GUI\n"
  4043. "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
  4044. "- Make sure you are in a graphical environment (like X)")
  4045. # Okay, we will use GUI
  4046. if GUI:
  4047. Message(_("We are on GUI interface"),1)
  4048. # redefine Error function to raise exception instead sys.exit()
  4049. def Error(msg):
  4050. showerror(_('txt2tags ERROR!'), msg)
  4051. raise error
  4052. # if no input file, get RC+cmdline config, else full config
  4053. if not infiles:
  4054. gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
  4055. else:
  4056. try : gui_conf = infiles_config[0][0]
  4057. except: gui_conf = {}
  4058. # sanity is needed to set outfile and other things
  4059. gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
  4060. Debug("GUI config: %s"%gui_conf,5)
  4061. # insert config and populate the nice window!
  4062. winbox.load_config(gui_conf)
  4063. winbox.mainwindow()
  4064. # console mode rocks forever!
  4065. else:
  4066. Message(_("We are on Command Line interface"),1)
  4067. # called with no arguments, show error
  4068. if not infiles: Error(_('Missing input file (try --help)'))
  4069. convert_this_files(infiles_config)
  4070. Message(_("Txt2tags finished sucessfuly"),1)
  4071. if __name__ == '__main__':
  4072. try:
  4073. exec_command_line()
  4074. except error, msg:
  4075. sys.stderr.write("%s\n"%msg)
  4076. sys.stderr.flush()
  4077. sys.exit(1)
  4078. except SystemExit:
  4079. pass
  4080. except:
  4081. print getUnknownErrorMessage()
  4082. Quit()
  4083. # vim: ts=8