PageRenderTime 66ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 1ms

/old/txt2tags-2.2.py

http://txt2tags.googlecode.com/
Python | 4517 lines | 4086 code | 160 blank | 271 comment | 157 complexity | 28f3e926696e9b98da7c6234906e4f9f MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004 Aurelio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. #
  20. #
  21. # +-------------------------------------------------------------+
  22. # | IMPORTANT MESSAGES, PLEASE READ |
  23. # +-------------------------------------------------------------+
  24. # | |
  25. # | |
  26. # | v1.x COMPATIBILITY |
  27. # | ------------------ |
  28. # | |
  29. # | Due the major syntax changes, the new 2.x series |
  30. # | BREAKS backwards compatibility. |
  31. # | |
  32. # | Use the 't2tconv' script to upgrade your existing |
  33. # | v1.x files to conform the new v2.x syntax. |
  34. # | |
  35. # | Do a visual inspection on the new converted file. |
  36. # | Specially Pre & Post proc filters can break. |
  37. # | Check them! |
  38. # | |
  39. # | |
  40. # +-------------------------------------------------------------+
  41. #
  42. #
  43. ########################################################################
  44. #
  45. # BORING CODE EXPLANATION AHEAD
  46. #
  47. # Just read if you wish to understand how the txt2tags code works
  48. #
  49. ########################################################################
  50. #
  51. # Version 2.0 was a complete rewrite for the program 'core'.
  52. #
  53. # Now the code that [1] parses the marked text is separated from the
  54. # code that [2] insert the target tags.
  55. #
  56. # [1] made by: def convert()
  57. # [2] made by: class BlockMaster
  58. #
  59. # The structures of the marked text are identifyed and its contents are
  60. # extracted into a data holder (Python lists and dictionaries).
  61. #
  62. # When parsing the source file, the blocks (para, lists, quote, table)
  63. # are opened with BlockMaster, right when found. Then its contents,
  64. # which spans on several lines, are feeded into a special holder on the
  65. # BlockMaster instance. Just when the block is closed, the target tags
  66. # are inserted for the full block as a whole, in one pass. This way, we
  67. # have a better control on blocks. Much better than the previous line by
  68. # line approach.
  69. #
  70. # In other words, whenever inside a block, the parser *holds* the tag
  71. # insertion process, waiting until the full block is readed. That was
  72. # needed primary to close paragraphs for the new XHTML target, but
  73. # proved to be a very good adding, improving many other processings.
  74. #
  75. # -------------------------------------------------------------------
  76. #
  77. # There is also a brand new code for the Configuration schema, 100%
  78. # rewritten. There are new classes, all self documented: CommandLine,
  79. # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
  80. # Config format was created, and all kind of configuration is first
  81. # converted to this format, and then a generic method parses it.
  82. #
  83. # The init processing was changed also, and now the functions which
  84. # gets informations about the input files are: get_infiles_config(),
  85. # process_source_file() and convert_this_files()
  86. #
  87. # Other parts are untouched, and remains the same as in v1.7, as the
  88. # marks regexes, target Headers and target Tags&Rules.
  89. #
  90. ########################################################################
  91. # Now I think the code is nice, easier to read and understand
  92. #XXX Python coding warning
  93. # Avoid common mistakes:
  94. # - do NOT use newlist=list instead newlist=list[:]
  95. # - do NOT use newdic=dic instead newdic=dic.copy()
  96. # - do NOT use dic[key] instead dic.get(key)
  97. # - do NOT use del dic[key] without has_key() before
  98. #XXX Smart Image Align don't work if the image is a link
  99. # Can't fix that because the image is expanded together with the
  100. # link, at the linkbank filling moment. Only the image is passed
  101. # to parse_images(), not the full line, so it is always 'middle'.
  102. #XXX Paragraph separation not valid inside Quote
  103. # Quote will not have <p></p> inside, instead will close and open
  104. # again the <blockquote>. This really sux in CSS, when defining a
  105. # diferent background color. Still don't know how to fix it.
  106. #XXX TODO (maybe)
  107. # New mark or macro which expands to an anchor full title.
  108. # It is necessary to parse the full document in this order:
  109. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  110. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  111. # 3rd scan: BODY: read titles and compose TOC info
  112. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  113. # Steps 2 and 3 can be made together, with no tag adding.
  114. # Two complete body scans will be *slow*, don't know if it worths.
  115. # One solution may be add the titles as postproc rules
  116. ##############################################################################
  117. # User config (1=ON, 0=OFF)
  118. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  119. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  120. BG_LIGHT = 0 # your terminal background color is light (default is 0)
  121. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  122. ##############################################################################
  123. # these are all the core Python modules used by txt2tags (KISS!)
  124. import re, string, os, sys, time, getopt
  125. # program information
  126. my_url = 'http://txt2tags.sf.net'
  127. my_name = 'txt2tags'
  128. my_email = 'verde@aurelio.net'
  129. my_version = '2.2'
  130. # i18n - just use if available
  131. if USE_I18N:
  132. try:
  133. import gettext
  134. # if your locale dir is different, change it here
  135. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  136. _ = cat.gettext
  137. except:
  138. _ = lambda x:x
  139. else:
  140. _ = lambda x:x
  141. # FLAGS : the conversion related flags , may be used in %!options
  142. # OPTIONS : the conversion related options, may be used in %!options
  143. # ACTIONS : the other behaviour modifiers, valid on command line only
  144. # MACROS : the valid macros with their default values for formatting
  145. # SETTINGS: global miscelaneous settings, valid on RC file only
  146. # CONFIG_KEYWORDS: the valid %!key:val keywords
  147. #
  148. # FLAGS and OPTIONS are configs that affect the converted document.
  149. # They usually have also a --no-<option> to turn them OFF.
  150. # ACTIONS are needed because when doing multiple input files, strange
  151. # behaviour would be found, as use command line interface for the
  152. # first file and gui for the second. There is no --no-<action>.
  153. # --version and --help inside %!options are also odd
  154. #
  155. TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'lout', 'man', 'mgp',
  156. 'moin', 'pm6' , 'txt']
  157. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  158. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  159. 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
  160. 'quiet' :0 }
  161. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  162. 'infile' :'', 'outfile' :'', 'encoding' :'',
  163. 'split' :0 , 'lang' :''}
  164. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  165. 'verbose' :0 , 'debug' :0 , 'dump-config':0 }
  166. MACROS = {'date' : '%Y%m%d', 'infile': '%f',
  167. 'mtime': '%Y%m%d', 'outfile': '%f'}
  168. SETTINGS = {} # for future use
  169. CONFIG_KEYWORDS = [
  170. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  171. 'guicolors']
  172. TARGET_NAMES = {
  173. 'html' : _('HTML page'),
  174. 'xhtml': _('XHTML page'),
  175. 'sgml' : _('SGML document'),
  176. 'tex' : _('LaTeX document'),
  177. 'lout' : _('Lout document'),
  178. 'man' : _('UNIX Manual page'),
  179. 'mgp' : _('Magic Point presentation'),
  180. 'moin' : _('MoinMoin page'),
  181. 'pm6' : _('PageMaker 6.0 document'),
  182. 'txt' : _('Plain Text'),
  183. }
  184. DEBUG = 0 # do not edit here, please use --debug
  185. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  186. QUIET = 0 # do not edit here, please use --quiet
  187. GUI = 0 # do not edit here, please use --gui
  188. AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
  189. RC_RAW = []
  190. CMDLINE_RAW = []
  191. CONF = {}
  192. BLOCK = None
  193. regex = {}
  194. TAGS = {}
  195. rules = {}
  196. lang = 'english'
  197. TARGET = ''
  198. STDIN = STDOUT = '-'
  199. ESCCHAR = '\x00'
  200. SEPARATOR = '\x01'
  201. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  202. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  203. # plataform specific settings
  204. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  205. # identify a development version
  206. #dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time()))
  207. #my_version = my_version + dev_suffix
  208. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  209. USAGE = string.join([
  210. '',
  211. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  212. '',
  213. _(" -t, --target=TYPE set target document type. currently supported:"),
  214. ' %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
  215. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  216. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  217. _(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"),
  218. _(" -H, --no-headers suppress header, title and footer contents"),
  219. _(" --headers show header, title and footer contents (default ON)"),
  220. _(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"),
  221. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  222. _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
  223. _(" --css-inside insert CSS file contents inside HTML/XHTML headers"),
  224. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  225. _(" --toc add TOC (Table of Contents) to target document"),
  226. _(" --toc-only print document TOC and exit"),
  227. _(" --toc-level=N set maximum TOC level (depth) to N"),
  228. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  229. _(" --gui invoke Graphical Tk Interface"),
  230. _(" -q, --quiet quiet mode, suppress all output (except errors)"),
  231. _(" -v, --verbose print informative messages during conversion"),
  232. _(" -h, --help print this help information and exit"),
  233. _(" -V, --version print program version and exit"),
  234. _(" --dump-config print all the config found and exit"),
  235. '',
  236. _("Turn OFF options:"),
  237. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  238. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  239. " --no-css-sugar, --no-css-inside, --no-quiet",
  240. '',
  241. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  242. '',
  243. _("By default, converted output is saved to 'infile.<target>'."),
  244. _("Use --outfile to force an output file name."),
  245. _("If input file is '-', reads from STDIN."),
  246. _("If output file is '-', dumps output to STDOUT."),
  247. ''
  248. ], '\n')
  249. ##############################################################################
  250. # here is all the target's templates
  251. # you may edit them to fit your needs
  252. # - the %(HEADERn)s strings represent the Header lines
  253. # - the %(STYLE)s string is changed by --style contents
  254. # - the %(ENCODING)s string is changed by --encoding contents
  255. # - if any of the above is empty, the full line is removed
  256. # - use %% to represent a literal %
  257. #
  258. HEADER_TEMPLATE = {
  259. 'txt': """\
  260. %(HEADER1)s
  261. %(HEADER2)s
  262. %(HEADER3)s
  263. """,
  264. 'sgml': """\
  265. <!doctype linuxdoc system>
  266. <article>
  267. <title>%(HEADER1)s
  268. <author>%(HEADER2)s
  269. <date>%(HEADER3)s
  270. """,
  271. 'html': """\
  272. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  273. <HTML>
  274. <HEAD>
  275. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  276. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  277. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  278. <TITLE>%(HEADER1)s</TITLE>
  279. </HEAD><BODY BGCOLOR="white" TEXT="black">
  280. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  281. <FONT SIZE="4">
  282. <I>%(HEADER2)s</I><BR>
  283. %(HEADER3)s
  284. </FONT></CENTER>
  285. """,
  286. 'htmlcss': """\
  287. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  288. <HTML>
  289. <HEAD>
  290. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  291. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  292. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  293. <TITLE>%(HEADER1)s</TITLE>
  294. </HEAD>
  295. <BODY>
  296. <DIV CLASS="header" ID="header">
  297. <H1>%(HEADER1)s</H1>
  298. <H2>%(HEADER2)s</H2>
  299. <H3>%(HEADER3)s</H3>
  300. </DIV>
  301. """,
  302. 'xhtml': """\
  303. <?xml version="1.0"
  304. encoding="%(ENCODING)s"
  305. ?>
  306. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  307. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  308. <html xmlns="http://www.w3.org/1999/xhtml">
  309. <head>
  310. <title>%(HEADER1)s</title>
  311. <meta name="generator" content="http://txt2tags.sf.net" />
  312. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  313. </head>
  314. <body bgcolor="white" text="black">
  315. <div align="center">
  316. <h1>%(HEADER1)s</h1>
  317. <h2>%(HEADER2)s</h2>
  318. <h3>%(HEADER3)s</h3>
  319. </div>
  320. """,
  321. 'xhtmlcss': """\
  322. <?xml version="1.0"?>
  323. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  324. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  325. <html xmlns="http://www.w3.org/1999/xhtml">
  326. <head>
  327. <title>%(HEADER1)s</title>
  328. <meta name="generator" content="http://txt2tags.sf.net" />
  329. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  330. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  331. </head>
  332. <body>
  333. <div class="header" id="header">
  334. <h1>%(HEADER1)s</h1>
  335. <h2>%(HEADER2)s</h2>
  336. <h3>%(HEADER3)s</h3>
  337. </div>
  338. """,
  339. 'man': """\
  340. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  341. """,
  342. # TODO style to <HR>
  343. 'pm6': """\
  344. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  345. ><@Normal=
  346. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  347. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  348. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  349. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  350. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  351. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  352. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  353. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  354. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  355. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  356. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  357. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  358. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  359. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  360. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  361. ><@Title4=<@-PARENT "Title3">
  362. ><@Title5=<@-PARENT "Title3">
  363. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  364. %(HEADER1)s
  365. %(HEADER2)s
  366. %(HEADER3)s
  367. """,
  368. 'mgp': """\
  369. #!/usr/X11R6/bin/mgp -t 90
  370. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  371. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  372. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  373. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  374. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  375. %%default 1 size 5
  376. %%default 2 size 8, fore "yellow", font "normal-b", center
  377. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  378. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  379. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  380. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  381. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  382. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  383. %%%%------------------------- end of headers -----------------------------
  384. %%page
  385. %%size 10, center, fore "yellow"
  386. %(HEADER1)s
  387. %%font "normal-i", size 6, fore "white", center
  388. %(HEADER2)s
  389. %%font "mono", size 7, center
  390. %(HEADER3)s
  391. """,
  392. 'moin': """\
  393. '''%(HEADER1)s'''
  394. ''%(HEADER2)s''
  395. %(HEADER3)s
  396. """,
  397. 'tex': \
  398. r"""\documentclass[11pt,a4paper]{article}
  399. \usepackage{amsfonts,graphicx}
  400. \usepackage[pdfstartview=FitV,urlcolor=blue,colorlinks=true,bookmarks=true]{hyperref}
  401. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  402. \usepackage{%(STYLE)s} %% user defined package
  403. \pagestyle{plain} %% do page numbering ('empty' turns off)
  404. \frenchspacing %% no aditional spaces after periods
  405. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  406. \title{%(HEADER1)s}
  407. \author{%(HEADER2)s}
  408. \begin{document}
  409. \date{%(HEADER3)s}
  410. \maketitle
  411. \clearpage
  412. """,
  413. 'lout': """\
  414. @SysInclude { doc }
  415. @Document
  416. @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ...
  417. @PageOrientation { Portrait } # Portrait, Landscape
  418. @ColumnNumber { 1 } # Number of columns (2, 3, ...)
  419. @PageHeaders { Simple } # None, Simple, Titles, NoTitles
  420. @InitialLanguage { English } # German, French, Portuguese, ...
  421. @OptimizePages { Yes } # Yes/No smart page break feature
  422. //
  423. @Text @Begin
  424. @Display @Heading { %(HEADER1)s }
  425. @Display @I { %(HEADER2)s }
  426. @Display { %(HEADER3)s }
  427. #@NP # Break page after Headers
  428. """
  429. # @SysInclude { tbl } # Tables support
  430. # setup: @MakeContents { Yes } # show TOC
  431. # setup: @SectionGap # break page at each section
  432. }
  433. ##############################################################################
  434. def getTags(config):
  435. "Returns all the known tags for the specified target"
  436. keys = [
  437. 'paragraphOpen','paragraphClose',
  438. 'title1','title2','title3','title4','title5',
  439. 'title1Open','title1Close','title2Open','title2Close',
  440. 'blocktitle1Open','title1Close','title2Open','title2Close',
  441. 'title3Open','title3Close','title4Open','title4Close',
  442. 'title5Open','title5Close',
  443. 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
  444. 'blockVerbOpen','blockVerbClose',
  445. 'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
  446. 'fontMonoOpen','fontMonoClose',
  447. 'fontBoldOpen','fontBoldClose',
  448. 'fontItalicOpen','fontItalicClose',
  449. 'fontUnderlineOpen','fontUnderlineClose',
  450. 'listOpen','listClose',
  451. 'listItemOpen','listItemClose','listItemLine',
  452. 'numlistOpen','numlistClose',
  453. 'numlistItemOpen','numlistItemClose','numlistItemLine',
  454. 'deflistOpen','deflistClose',
  455. 'deflistItem1Open','deflistItem1Close',
  456. 'deflistItem2Open','deflistItem2Close',
  457. 'bar1','bar2',
  458. 'url','urlMark','email','emailMark',
  459. 'img','imgAlignLeft','imgAlignRight','imgAlignCenter',
  460. 'tableOpen','tableClose',
  461. 'tableRowOpen','tableRowClose','tableRowSep',
  462. 'tableCellOpen','tableCellClose','tableCellSep',
  463. 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
  464. 'tableTitleRowOpen','tableTitleRowClose',
  465. 'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
  466. 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
  467. 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
  468. 'tableColAlignSep',
  469. 'anchor','comment','pageBreak',
  470. 'TOC','tocOpen','tocClose',
  471. 'cssOpen', 'cssClose',
  472. 'bodyOpen','bodyClose',
  473. 'EOD'
  474. ]
  475. # TIP: \a represents the current text on the mark
  476. # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
  477. alltags = {
  478. 'txt': {
  479. 'title1' : ' \a' ,
  480. 'title2' : '\t\a' ,
  481. 'title3' : '\t\t\a' ,
  482. 'title4' : '\t\t\t\a' ,
  483. 'title5' : '\t\t\t\t\a',
  484. 'blockQuoteLine' : '\t' ,
  485. 'listItemOpen' : '- ' ,
  486. 'numlistItemOpen' : '\a. ' ,
  487. 'bar1' : '\a' ,
  488. 'url' : '\a' ,
  489. 'urlMark' : '\a (\a)' ,
  490. 'email' : '\a' ,
  491. 'emailMark' : '\a (\a)' ,
  492. 'img' : '[\a]' ,
  493. },
  494. 'html': {
  495. 'paragraphOpen' : '<P>' ,
  496. 'paragraphClose' : '</P>' ,
  497. 'title1' : '~A~<H1>\a</H1>' ,
  498. 'title2' : '~A~<H2>\a</H2>' ,
  499. 'title3' : '~A~<H3>\a</H3>' ,
  500. 'title4' : '~A~<H4>\a</H4>' ,
  501. 'title5' : '~A~<H5>\a</H5>' ,
  502. 'blockVerbOpen' : '<PRE>' ,
  503. 'blockVerbClose' : '</PRE>' ,
  504. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  505. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  506. 'fontMonoOpen' : '<CODE>' ,
  507. 'fontMonoClose' : '</CODE>' ,
  508. 'fontBoldOpen' : '<B>' ,
  509. 'fontBoldClose' : '</B>' ,
  510. 'fontItalicOpen' : '<I>' ,
  511. 'fontItalicClose' : '</I>' ,
  512. 'fontUnderlineOpen' : '<U>' ,
  513. 'fontUnderlineClose' : '</U>' ,
  514. 'listOpen' : '<UL>' ,
  515. 'listClose' : '</UL>' ,
  516. 'listItemOpen' : '<LI>' ,
  517. 'numlistOpen' : '<OL>' ,
  518. 'numlistClose' : '</OL>' ,
  519. 'numlistItemOpen' : '<LI>' ,
  520. 'deflistOpen' : '<DL>' ,
  521. 'deflistClose' : '</DL>' ,
  522. 'deflistItem1Open' : '<DT>' ,
  523. 'deflistItem1Close' : '</DT>' ,
  524. 'deflistItem2Open' : '<DD>' ,
  525. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  526. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  527. 'url' : '<A HREF="\a">\a</A>' ,
  528. 'urlMark' : '<A HREF="\a">\a</A>' ,
  529. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  530. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  531. 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
  532. 'imgAlignLeft' : ' ALIGN="left"' ,
  533. 'imgAlignCenter' : ' ALIGN="middle"',
  534. 'imgAlignRight' : ' ALIGN="right"' ,
  535. 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>',
  536. 'tableClose' : '</TABLE>' ,
  537. 'tableRowOpen' : '<TR>' ,
  538. 'tableRowClose' : '</TR>' ,
  539. 'tableCellOpen' : '<TD\a>' ,
  540. 'tableCellClose' : '</TD>' ,
  541. 'tableTitleCellOpen' : '<TH>' ,
  542. 'tableTitleCellClose' : '</TH>' ,
  543. 'tableBorder' : ' BORDER="1"' ,
  544. 'tableAlignCenter' : ' ALIGN="center"',
  545. 'tableCellAlignRight' : ' ALIGN="right"' ,
  546. 'tableCellAlignCenter': ' ALIGN="center"',
  547. 'anchor' : '<A NAME="\a"></A>\n',
  548. 'cssOpen' : '<STYLE TYPE="text/css">',
  549. 'cssClose' : '</STYLE>' ,
  550. 'comment' : '<!-- \a -->' ,
  551. 'EOD' : '</BODY></HTML>'
  552. },
  553. #TIP xhtml inherits all HTML definitions (lowercased)
  554. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  555. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  556. 'xhtml': {
  557. 'listItemClose' : '</li>' ,
  558. 'numlistItemClose' : '</li>' ,
  559. 'deflistItem2Close' : '</dd>' ,
  560. 'bar1' : '<hr class="light" />',
  561. 'bar2' : '<hr class="heavy" />',
  562. 'anchor' : '<a id="\a" name="\a"></a>\n',
  563. 'img' : '<img~A~ src="\a" border="0" alt=""/>',
  564. },
  565. 'sgml': {
  566. 'paragraphOpen' : '<p>' ,
  567. 'title1' : '<sect>\a~A~<p>' ,
  568. 'title2' : '<sect1>\a~A~<p>' ,
  569. 'title3' : '<sect2>\a~A~<p>' ,
  570. 'title4' : '<sect3>\a~A~<p>' ,
  571. 'title5' : '<sect4>\a~A~<p>' ,
  572. 'blockVerbOpen' : '<tscreen><verb>' ,
  573. 'blockVerbClose' : '</verb></tscreen>' ,
  574. 'blockQuoteOpen' : '<quote>' ,
  575. 'blockQuoteClose' : '</quote>' ,
  576. 'fontMonoOpen' : '<tt>' ,
  577. 'fontMonoClose' : '</tt>' ,
  578. 'fontBoldOpen' : '<bf>' ,
  579. 'fontBoldClose' : '</bf>' ,
  580. 'fontItalicOpen' : '<em>' ,
  581. 'fontItalicClose' : '</em>' ,
  582. 'fontUnderlineOpen' : '<bf><em>' ,
  583. 'fontUnderlineClose' : '</em></bf>' ,
  584. 'listOpen' : '<itemize>' ,
  585. 'listClose' : '</itemize>' ,
  586. 'listItemOpen' : '<item>' ,
  587. 'numlistOpen' : '<enum>' ,
  588. 'numlistClose' : '</enum>' ,
  589. 'numlistItemOpen' : '<item>' ,
  590. 'deflistOpen' : '<descrip>' ,
  591. 'deflistClose' : '</descrip>' ,
  592. 'deflistItem1Open' : '<tag>' ,
  593. 'deflistItem1Close' : '</tag>' ,
  594. 'bar1' : '<!-- \a -->' ,
  595. 'url' : '<htmlurl url="\a" name="\a">' ,
  596. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  597. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  598. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  599. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  600. '</figure>' ,
  601. 'tableOpen' : '<table><tabular ca="~C~">' ,
  602. 'tableClose' : '</tabular></table>' ,
  603. 'tableRowSep' : '<rowsep>' ,
  604. 'tableCellSep' : '<colsep>' ,
  605. 'tableColAlignLeft' : 'l' ,
  606. 'tableColAlignRight' : 'r' ,
  607. 'tableColAlignCenter' : 'c' ,
  608. 'comment' : '<!-- \a -->' ,
  609. 'anchor' : '<label id="\a">' ,
  610. 'TOC' : '<toc>' ,
  611. 'EOD' : '</article>'
  612. },
  613. 'tex': {
  614. 'title1' : '\n\section*{\a}' ,
  615. 'title2' : '\\subsection*{\a}' ,
  616. 'title3' : '\\subsubsection*{\a}',
  617. # title 4/5: DIRTY: para+BF+\\+\n
  618. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  619. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  620. 'numtitle1' : '\n\section{\a}' ,
  621. 'numtitle2' : '\\subsection{\a}' ,
  622. 'numtitle3' : '\\subsubsection{\a}' ,
  623. 'blockVerbOpen' : '\\begin{verbatim}' ,
  624. 'blockVerbClose' : '\\end{verbatim}' ,
  625. 'blockQuoteOpen' : '\\begin{quotation}' ,
  626. 'blockQuoteClose' : '\\end{quotation}' ,
  627. 'fontMonoOpen' : '\\texttt{' ,
  628. 'fontMonoClose' : '}' ,
  629. 'fontBoldOpen' : '\\textbf{' ,
  630. 'fontBoldClose' : '}' ,
  631. 'fontItalicOpen' : '\\textit{' ,
  632. 'fontItalicClose' : '}' ,
  633. 'fontUnderlineOpen' : '\\underline{' ,
  634. 'fontUnderlineClose' : '}' ,
  635. 'listOpen' : '\\begin{itemize}' ,
  636. 'listClose' : '\\end{itemize}' ,
  637. 'listItemOpen' : '\\item ' ,
  638. 'numlistOpen' : '\\begin{enumerate}' ,
  639. 'numlistClose' : '\\end{enumerate}' ,
  640. 'numlistItemOpen' : '\\item ' ,
  641. 'deflistOpen' : '\\begin{description}',
  642. 'deflistClose' : '\\end{description}' ,
  643. 'deflistItem1Open' : '\\item[' ,
  644. 'deflistItem1Close' : ']' ,
  645. 'bar1' : '\n\\hrulefill{}\n' ,
  646. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  647. 'url' : '\\htmladdnormallink{\a}{\a}',
  648. 'urlMark' : '\\htmladdnormallink{\a}{\a}',
  649. 'email' : '\\htmladdnormallink{\a}{mailto:\a}',
  650. 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}',
  651. 'img' : '\\includegraphics{\a}',
  652. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  653. 'tableClose' : '\\end{tabular}\\end{center}',
  654. 'tableRowOpen' : '\\hline ' ,
  655. 'tableRowClose' : ' \\\\' ,
  656. 'tableCellSep' : ' & ' ,
  657. 'tableColAlignLeft' : 'l' ,
  658. 'tableColAlignRight' : 'r' ,
  659. 'tableColAlignCenter' : 'c' ,
  660. 'tableColAlignSep' : '|' ,
  661. 'comment' : '% \a' ,
  662. 'TOC' : '\\tableofcontents',
  663. 'pageBreak' : '\\clearpage',
  664. 'EOD' : '\\end{document}'
  665. },
  666. 'lout': {
  667. 'paragraphOpen' : '@LP' ,
  668. 'blockTitle1Open' : '@BeginSections' ,
  669. 'blockTitle1Close' : '@EndSections' ,
  670. 'blockTitle2Open' : ' @BeginSubSections' ,
  671. 'blockTitle2Close' : ' @EndSubSections' ,
  672. 'blockTitle3Open' : ' @BeginSubSubSections' ,
  673. 'blockTitle3Close' : ' @EndSubSubSections' ,
  674. 'title1Open' : '\n@Section @Title { \a } @Begin',
  675. 'title1Close' : '@End @Section' ,
  676. 'title2Open' : '\n @SubSection @Title { \a } @Begin',
  677. 'title2Close' : ' @End @SubSection' ,
  678. 'title3Open' : '\n @SubSubSection @Title { \a } @Begin',
  679. 'title3Close' : ' @End @SubSubSection' ,
  680. 'title4Open' : '\n@LP @LeftDisplay @B { \a }',
  681. 'title5Open' : '\n@LP @LeftDisplay @B { \a }',
  682. 'anchor' : '@Tag { \a }' ,
  683. 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin',
  684. 'blockVerbClose' : '@End @RawVerbatim' ,
  685. 'blockQuoteOpen' : '@QD {' ,
  686. 'blockQuoteClose' : '}' ,
  687. # enclosed inside {} to deal with joined**words**
  688. 'fontMonoOpen' : '{@F {' ,
  689. 'fontMonoClose' : '}}' ,
  690. 'fontBoldOpen' : '{@B {' ,
  691. 'fontBoldClose' : '}}' ,
  692. 'fontItalicOpen' : '{@II {' ,
  693. 'fontItalicClose' : '}}' ,
  694. 'fontUnderlineOpen' : '{@Underline{' ,
  695. 'fontUnderlineClose' : '}}' ,
  696. # the full form is more readable, but could be BL EL LI NL TL DTI
  697. 'listOpen' : '@BulletList' ,
  698. 'listClose' : '@EndList' ,
  699. 'listItemOpen' : '@ListItem{' ,
  700. 'listItemClose' : '}' ,
  701. 'numlistOpen' : '@NumberedList' ,
  702. 'numlistClose' : '@EndList' ,
  703. 'numlistItemOpen' : '@ListItem{' ,
  704. 'numlistItemClose' : '}' ,
  705. 'deflistOpen' : '@TaggedList' ,
  706. 'deflistClose' : '@EndList' ,
  707. 'deflistItem1Open' : '@DropTagItem {' ,
  708. 'deflistItem1Close' : '}' ,
  709. 'deflistItem2Open' : '{' ,
  710. 'deflistItem2Close' : '}' ,
  711. 'bar1' : '\n@DP @FullWidthRule\n' ,
  712. 'url' : '{blue @Colour { \a }}' ,
  713. 'urlMark' : '\a ({blue @Colour { \a }})' ,
  714. 'email' : '{blue @Colour { \a }}' ,
  715. 'emailMark' : '\a ({blue Colour{ \a }})' ,
  716. 'img' : '~A~@IncludeGraphic { \a }' , # eps only!
  717. 'imgAlignLeft' : '@LeftDisplay ' ,
  718. 'imgAlignRight' : '@RightDisplay ' ,
  719. 'imgAlignCenter' : '@CentredDisplay ' ,
  720. # lout tables are *way* complicated, no support for now
  721. #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
  722. #'tableClose' : '}' ,
  723. #'tableRowOpen' : '@Rowa\n' ,
  724. #'tableTitleRowOpen' : '@HeaderRowa' ,
  725. #'tableCenterAlign' : '@CentredDisplay ' ,
  726. #'tableCellOpen' : '\a {' , # A, B, ...
  727. #'tableCellClose' : '}' ,
  728. #'tableBorder' : '\nrule {yes}' ,
  729. 'comment' : '# \a' ,
  730. # @MakeContents must be on the config file
  731. 'TOC' : '@DP @ContentsGoesHere @DP',
  732. 'pageBreak' : '\n@NP\n' ,
  733. 'EOD' : '@End @Text'
  734. },
  735. 'moin': {
  736. 'title1' : '= \a =' ,
  737. 'title2' : '== \a ==' ,
  738. 'title3' : '=== \a ===' ,
  739. 'title4' : '==== \a ====' ,
  740. 'title5' : '===== \a =====',
  741. 'blockVerbOpen' : '{{{' ,
  742. 'blockVerbClose' : '}}}' ,
  743. 'blockQuoteLine' : ' ' ,
  744. 'fontMonoOpen' : '{{{' ,
  745. 'fontMonoClose' : '}}}' ,
  746. 'fontBoldOpen' : "'''" ,
  747. 'fontBoldClose' : "'''" ,
  748. 'fontItalicOpen' : "''" ,
  749. 'fontItalicClose' : "''" ,
  750. 'fontUnderlineOpen' : "__" ,
  751. 'fontUnderlineClose' : "__" ,
  752. 'listItemOpen' : ' * ' ,
  753. 'numlistItemOpen' : ' \a. ' ,
  754. 'bar1' : '----' ,
  755. 'url' : '[\a]' ,
  756. 'urlMark' : '[\a \a]' ,
  757. 'email' : '[\a]' ,
  758. 'emailMark' : '[\a \a]' ,
  759. 'img' : '[\a]' ,
  760. 'tableRowOpen' : '||' ,
  761. 'tableCellOpen' : '\a' ,
  762. 'tableCellClose' : '||' ,
  763. 'tableTitleCellClose' : '||' ,
  764. 'tableCellAlignRight' : '<)>' ,
  765. 'tableCellAlignCenter': '<:>' ,
  766. 'comment' : '## \a' ,
  767. 'TOC' : '[[TableOfContents]]'
  768. },
  769. 'mgp': {
  770. 'paragraphOpen' : '%font "normal", size 5' ,
  771. 'title1' : '%page\n\n\a\n' ,
  772. 'title2' : '%page\n\n\a\n' ,
  773. 'title3' : '%page\n\n\a\n' ,
  774. 'title4' : '%page\n\n\a\n' ,
  775. 'title5' : '%page\n\n\a\n' ,
  776. 'blockVerbOpen' : '%font "mono"' ,
  777. 'blockVerbClose' : '%font "normal"' ,
  778. 'blockQuoteOpen' : '%prefix " "' ,
  779. 'blockQuoteClose' : '%prefix " "' ,
  780. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  781. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  782. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  783. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  784. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  785. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  786. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  787. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  788. 'listItemLine' : '\t' ,
  789. 'numlistItemLine' : '\t' ,
  790. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  791. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  792. 'bar1' : '%bar "white" 5' ,
  793. 'bar2' : '%pause' ,
  794. 'url' : '\n%cont, fore "cyan"\n\a' +\
  795. '\n%cont, fore "white"\n' ,
  796. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  797. '\n%cont, fore "white"\n' ,
  798. 'email' : '\n%cont, fore "cyan"\n\a' +\
  799. '\n%cont, fore "white"\n' ,
  800. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  801. '\n%cont, fore "white"\n' ,
  802. 'img' : '~A~\n%newimage "\a"\n%left\n',
  803. 'imgAlignLeft' : '\n%left' ,
  804. 'imgAlignRight' : '\n%right' ,
  805. 'imgAlignCenter' : '\n%center' ,
  806. 'comment' : '%% \a' ,
  807. 'pageBreak' : '%page\n\n\n' ,
  808. 'EOD' : '%%EOD'
  809. },
  810. # man groff_man ; man 7 groff
  811. 'man': {
  812. 'paragraphOpen' : '.P' ,
  813. 'title1' : '.SH \a' ,
  814. 'title2' : '.SS \a' ,
  815. 'title3' : '.SS \a' ,
  816. 'title4' : '.SS \a' ,
  817. 'title5' : '.SS \a' ,
  818. 'blockVerbOpen' : '.nf' ,
  819. 'blockVerbClose' : '.fi\n' ,
  820. 'blockQuoteOpen' : '.RS' ,
  821. 'blockQuoteClose' : '.RE' ,
  822. 'fontBoldOpen' : '\\fB' ,
  823. 'fontBoldClose' : '\\fR' ,
  824. 'fontItalicOpen' : '\\fI' ,
  825. 'fontItalicClose' : '\\fR' ,
  826. 'listOpen' : '.RS' ,
  827. 'listItemOpen' : '.IP \(bu 3\n',
  828. 'listClose' : '.RE' ,
  829. 'numlistOpen' : '.RS' ,
  830. 'numlistItemOpen' : '.IP \a. 3\n',
  831. 'numlistClose' : '.RE' ,
  832. 'deflistItem1Open' : '.TP\n' ,
  833. 'bar1' : '\n\n' ,
  834. 'url' : '\a' ,
  835. 'urlMark' : '\a (\a)',
  836. 'email' : '\a' ,
  837. 'emailMark' : '\a (\a)',
  838. 'img' : '\a' ,
  839. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  840. 'tableClose' : '.TE' ,
  841. 'tableRowOpen' : ' ' ,
  842. 'tableCellSep' : '^' ,
  843. 'tableAlignCenter' : 'center, ',
  844. 'tableBorder' : 'allbox, ',
  845. 'tableColAlignLeft' : 'l' ,
  846. 'tableColAlignRight' : 'r' ,
  847. 'tableColAlignCenter' : 'c' ,
  848. 'comment' : '.\\" \a'
  849. },
  850. 'pm6': {
  851. 'paragraphOpen' : '<@Normal:>' ,
  852. 'title1' : '\n<@Title1:>\a',
  853. 'title2' : '\n<@Title2:>\a',
  854. 'title3' : '\n<@Title3:>\a',
  855. 'title4' : '\n<@Title4:>\a',
  856. 'title5' : '\n<@Title5:>\a',
  857. 'blockVerbOpen' : '<@PreFormat:>' ,
  858. 'blockQuoteLine' : '<@Quote:>' ,
  859. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  860. 'fontMonoClose' : '<SIZE$><FONT$>',
  861. 'fontBoldOpen' : '<B>' ,
  862. 'fontBoldClose' : '<P>' ,
  863. 'fontItalicOpen' : '<I>' ,
  864. 'fontItalicClose' : '<P>' ,
  865. 'fontUnderlineOpen' : '<U>' ,
  866. 'fontUnderlineClose' : '<P>' ,
  867. 'listOpen' : '<@Bullet:>' ,
  868. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  869. 'numlistOpen' : '<@Bullet:>' ,
  870. 'numlistItemOpen' : '\x95\t' ,
  871. 'bar1' : '\a' ,
  872. 'url' : '<U>\a<P>' , # underline
  873. 'urlMark' : '\a <U>\a<P>' ,
  874. 'email' : '\a' ,
  875. 'emailMark' : '\a \a' ,
  876. 'img' : '\a'
  877. }
  878. }
  879. # exceptions for --css-sugar
  880. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  881. # change just HTML because XHTML inherits it
  882. htmltags = alltags['html']
  883. # table with no cellpadding
  884. htmltags['tableOpen'] = string.replace(
  885. htmltags['tableOpen'], ' CELLPADDING="4"', '')
  886. # DIVs
  887. htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
  888. htmltags['tocClose'] = '</DIV>'
  889. htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
  890. htmltags['bodyClose']= '</DIV>'
  891. # make the HTML -> XHTML inheritance
  892. xhtml = alltags['html'].copy()
  893. for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
  894. # some like HTML tags as lowercase, some don't... (headers out)
  895. if HTML_LOWER: alltags['html'] = xhtml.copy()
  896. xhtml.update(alltags['xhtml'])
  897. alltags['xhtml'] = xhtml.copy()
  898. # compose the target tags dictionary
  899. tags = {}
  900. target_tags = alltags[config['target']].copy()
  901. for key in keys: tags[key] = '' # create empty keys
  902. for key in target_tags.keys():
  903. tags[key] = maskEscapeChar(target_tags[key]) # populate
  904. # map strong line to separator if not defined
  905. if not tags['bar2'] and tags['bar1']:
  906. tags['bar2'] = tags['bar1']
  907. return tags
  908. ##############################################################################
  909. def getRules(config):
  910. "Returns all the target-specific syntax rules"
  911. ret = {}
  912. allrules = [
  913. # target rules (ON/OFF)
  914. 'linkable', # target supports external links
  915. 'tableable', # target supports tables
  916. 'imglinkable', # target supports images as links
  917. 'imgalignable', # target supports image alignment
  918. 'imgasdefterm', # target supports image as definition term
  919. 'autonumberlist', # target supports numbered lists natively
  920. 'autonumbertitle', # target supports numbered titles natively
  921. 'parainsidelist', # lists items supports paragraph
  922. 'spacedlistitem', # lists support blank lines between items
  923. 'listnotnested', # lists cannot be nested
  924. 'quotenotnested', # quotes cannot be nested
  925. 'verbblocknotescaped', # don't escape specials in verb block
  926. 'verbblockfinalescape', # do final escapes in verb block
  927. 'escapeurl', # escape special in link URL
  928. 'onelinepara', # dump paragraph as a single long line
  929. 'tabletitlerowinbold', # manually bold any cell on table titles
  930. 'tablecellstrip', # strip extra spaces from each table cell
  931. 'barinsidequote', # bars are allowed inside quote blocks
  932. 'finalescapetitle', # perform final escapes on title lines
  933. 'autotocnewpagebefore', # break page before automatic TOC
  934. 'autotocnewpageafter', # break page after automatic TOC
  935. 'autotocwithbars', # automatic TOC surrounded by bars
  936. 'mapbar2pagebreak', # map the strong bar to a page break
  937. 'titleblocks', # titles must be on open/close section blocks
  938. # target code beautify (ON/OFF)
  939. 'indentverbblock', # add leading spaces to verb block lines
  940. 'breaktablecell', # break lines after any table cell
  941. 'breaktablelineopen', # break line after opening table line
  942. 'notbreaklistopen', # don't break line after opening a new list
  943. 'notbreakparaopen', # don't break line after opening a new para
  944. 'keepquoteindent', # don't remove the leading TABs on quotes
  945. 'keeplistindent', # don't remove the leading spaces on lists
  946. 'blankendmotherlist', # append a blank line at the mother list end
  947. 'blankendtable', # append a blank line at the table end
  948. 'blankendautotoc', # append a blank line at the auto TOC end
  949. 'tagnotindentable', # tags must be placed at the line begining
  950. # value settings
  951. 'listmaxdepth', # maximum depth for lists
  952. 'tablecellaligntype' # type of table cell align: cell, column
  953. ]
  954. rules_bank = {
  955. 'txt' : {
  956. 'indentverbblock':1,
  957. 'spacedlistitem':1,
  958. 'parainsidelist':1,
  959. 'keeplistindent':1,
  960. 'barinsidequote':1,
  961. 'autotocwithbars':1,
  962. 'blankendmotherlist':1
  963. },
  964. 'html': {
  965. 'indentverbblock':1,
  966. 'linkable':1,
  967. 'escapeurl':1,
  968. 'imglinkable':1,
  969. 'imgalignable':1,
  970. 'imgasdefterm':1,
  971. 'autonumberlist':1,
  972. 'spacedlistitem':1,
  973. 'parainsidelist':1,
  974. 'blankendmotherlist':1,
  975. 'tableable':1,
  976. 'tablecellstrip':1,
  977. 'blankendtable':1,
  978. 'breaktablecell':1,
  979. 'breaktablelineopen':1,
  980. 'keeplistindent':1,
  981. 'keepquoteindent':1,
  982. 'barinsidequote':1,
  983. 'autotocwithbars':1,
  984. 'tablecellaligntype':'cell'
  985. },
  986. #TIP xhtml inherits all HTML rules
  987. 'xhtml': {
  988. },
  989. 'sgml': {
  990. 'linkable':1,
  991. 'escapeurl':1,
  992. 'autonumberlist':1,
  993. 'spacedlistitem':1,
  994. 'blankendmotherlist':1,
  995. 'tableable':1,
  996. 'tablecellstrip':1,
  997. 'blankendtable':1,
  998. 'blankendautotoc':1,
  999. 'quotenotnested':1,
  1000. 'keeplistindent':1,
  1001. 'keepquoteindent':1,
  1002. 'barinsidequote':1,
  1003. 'finalescapetitle':1,
  1004. 'tablecellaligntype':'column'
  1005. },
  1006. 'mgp' : {
  1007. 'blankendmotherlist':1,
  1008. 'tagnotindentable':1,
  1009. 'spacedlistitem':1,
  1010. 'imgalignable':1,
  1011. 'autotocnewpagebefore':1,
  1012. },
  1013. 'tex' : {
  1014. 'autonumberlist':1,
  1015. 'autonumbertitle':1,
  1016. 'spacedlistitem':1,
  1017. 'blankendmotherlist':1,
  1018. 'tableable':1,
  1019. 'tablecellstrip':1,
  1020. 'tabletitlerowinbold':1,
  1021. 'blankendtable':1,
  1022. 'verbblocknotescaped':1,
  1023. 'keeplistindent':1,
  1024. 'listmaxdepth':4,
  1025. 'barinsidequote':1,
  1026. 'finalescapetitle':1,
  1027. 'autotocnewpageafter':1,
  1028. 'mapbar2pagebreak':1,
  1029. 'tablecellaligntype':'column'
  1030. },
  1031. 'lout': {
  1032. 'keepquoteindent':1,
  1033. 'escapeurl':1,
  1034. 'verbblocknotescaped':1,
  1035. 'tableable':0,
  1036. 'imgalignable':1,
  1037. 'mapbar2pagebreak':1,
  1038. 'titleblocks':1,
  1039. 'notbreakparaopen':1
  1040. },
  1041. 'moin': {
  1042. 'spacedlistitem':1,
  1043. 'linkable':1,
  1044. 'blankendmotherlist':1,
  1045. 'keeplistindent':1,
  1046. 'tableable':1,
  1047. 'barinsidequote':1,
  1048. 'blankendtable':1,
  1049. 'tabletitlerowinbold':1,
  1050. 'tablecellstrip':1,
  1051. 'autotocwithbars':1,
  1052. 'tablecellaligntype':'cell'
  1053. },
  1054. 'man' : {
  1055. 'spacedlistitem':1,
  1056. 'indentverbblock':1,
  1057. 'blankendmotherlist':1,
  1058. 'tagnotindentable':1,
  1059. 'tableable':1,
  1060. 'tablecellaligntype':'column',
  1061. 'tabletitlerowinbold':1,
  1062. 'tablecellstrip':1,
  1063. 'blankendtable':1,
  1064. 'keeplistindent':0,
  1065. 'barinsidequote':1,
  1066. 'parainsidelist':0,
  1067. },
  1068. 'pm6' : {
  1069. 'keeplistindent':1,
  1070. 'verbblockfinalescape':1,
  1071. #TODO add support for these - maybe set a JOINNEXT char and
  1072. # do it on addLineBreaks()
  1073. 'notbreaklistopen':1,
  1074. 'notbreakparaopen':1,
  1075. 'barinsidequote':1,
  1076. 'autotocwithbars':1,
  1077. 'onelinepara':1,
  1078. }
  1079. }
  1080. # exceptions for --css-sugar
  1081. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1082. rules_bank['html']['indentverbblock'] = 0
  1083. rules_bank['html']['autotocwithbars'] = 0
  1084. # get the target specific rules
  1085. if config['target'] == 'xhtml':
  1086. myrules = rules_bank['html'].copy() # inheritance
  1087. myrules.update(rules_bank['xhtml']) # get XHTML specific
  1088. else:
  1089. myrules = rules_bank[config['target']].copy()
  1090. # populate return dictionary
  1091. for key in allrules: ret[key] = 0 # reset all
  1092. ret.update(myrules) # get rules
  1093. return ret
  1094. ##############################################################################
  1095. def getRegexes():
  1096. "Returns all the regexes used to find the t2t marks"
  1097. bank = {
  1098. 'blockVerbOpen':
  1099. re.compile(r'^```\s*$'),
  1100. 'blockVerbClose':
  1101. re.compile(r'^```\s*$'),
  1102. 'blockRawOpen':
  1103. re.compile(r'^"""\s*$'),
  1104. 'blockRawClose':
  1105. re.compile(r'^"""\s*$'),
  1106. 'quote':
  1107. re.compile(r'^\t+'),
  1108. '1lineVerb':
  1109. re.compile(r'^``` (?=.)'),
  1110. '1lineRaw':
  1111. re.compile(r'^""" (?=.)'),
  1112. # mono, raw, bold, italic, underline:
  1113. # - marks must be glued with the contents, no boundary spaces
  1114. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  1115. 'fontMono':
  1116. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  1117. 'raw':
  1118. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  1119. 'fontBold':
  1120. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  1121. 'fontItalic':
  1122. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  1123. 'fontUnderline':
  1124. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  1125. 'list':
  1126. re.compile(r'^( *)(-) (?=[^ ])'),
  1127. 'numlist':
  1128. re.compile(r'^( *)(\+) (?=[^ ])'),
  1129. 'deflist':
  1130. re.compile(r'^( *)(:) (.*)$'),
  1131. 'listclose':
  1132. re.compile(r'^( *)([-+:])\s*$'),
  1133. 'bar':
  1134. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  1135. 'table':
  1136. re.compile(r'^ *\|\|? '),
  1137. 'blankline':
  1138. re.compile(r'^\s*$'),
  1139. 'comment':
  1140. re.compile(r'^%'),
  1141. # auxiliar tag regexes
  1142. '_imgAlign' : re.compile(r'~A~',re.I),
  1143. '_tableAlign' : re.compile(r'~A~',re.I),
  1144. '_anchor' : re.compile(r'~A~',re.I),
  1145. '_tableBorder' : re.compile(r'~B~',re.I),
  1146. '_tableColAlign': re.compile(r'~C~',re.I),
  1147. }
  1148. # special char to place data on TAGs contents (\a == bell)
  1149. bank['x'] = re.compile('\a')
  1150. # %%macroname [ (formatting) ]
  1151. bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
  1152. string.join(MACROS.keys(), '|')), re.I)
  1153. # %%TOC special macro for TOC positioning
  1154. bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
  1155. # almost complicated title regexes ;)
  1156. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
  1157. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  1158. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  1159. ### complicated regexes begin here ;)
  1160. #
  1161. # textual descriptions on --help's style: [...] is optional, | is OR
  1162. ### first, some auxiliar variables
  1163. #
  1164. # [image.EXT]
  1165. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1166. # link things
  1167. urlskel = {
  1168. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1169. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1170. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1171. 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
  1172. 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
  1173. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1174. 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
  1175. 'punct' : r'.,;:!?'
  1176. }
  1177. # username [ :password ] @
  1178. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1179. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1180. # [ #anchor | ?form=data ]
  1181. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  1182. urlskel['proto'],patt_url_login, urlskel['guess'],
  1183. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1184. # filename | [ filename ] #anchor
  1185. retxt_url_local =

Large files files are truncated, but you can click here to view the full file