PageRenderTime 108ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 1ms

/markup/txt2tags.py

https://bitbucket.org/fgallaire/tornablog
Python | 5678 lines | 5118 code | 218 blank | 342 comment | 221 complexity | 48a2e3351921a057493fab5baa96a4a5 MD5 | raw file
Possible License(s): AGPL-3.0
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Aurelio Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. ########################################################################
  20. #
  21. # BORING CODE EXPLANATION AHEAD
  22. #
  23. # Just read it if you wish to understand how the txt2tags code works.
  24. #
  25. ########################################################################
  26. #
  27. # The code that [1] parses the marked text is separated from the
  28. # code that [2] insert the target tags.
  29. #
  30. # [1] made by: def convert()
  31. # [2] made by: class BlockMaster
  32. #
  33. # The structures of the marked text are identified and its contents are
  34. # extracted into a data holder (Python lists and dictionaries).
  35. #
  36. # When parsing the source file, the blocks (para, lists, quote, table)
  37. # are opened with BlockMaster, right when found. Then its contents,
  38. # which spans on several lines, are feeded into a special holder on the
  39. # BlockMaster instance. Just when the block is closed, the target tags
  40. # are inserted for the full block as a whole, in one pass. This way, we
  41. # have a better control on blocks. Much better than the previous line by
  42. # line approach.
  43. #
  44. # In other words, whenever inside a block, the parser *holds* the tag
  45. # insertion process, waiting until the full block is read. That was
  46. # needed primary to close paragraphs for the XHTML target, but
  47. # proved to be a very good adding, improving many other processing.
  48. #
  49. # -------------------------------------------------------------------
  50. #
  51. # These important classes are all documented:
  52. # CommandLine, SourceDocument, ConfigMaster, ConfigLines.
  53. #
  54. # There is a RAW Config format and all kind of configuration is first
  55. # converted to this format. Then a generic method parses it.
  56. #
  57. # These functions get information about the input file(s) and take
  58. # care of the init processing:
  59. # get_infiles_config(), process_source_file() and convert_this_files()
  60. #
  61. ########################################################################
  62. #XXX Python coding warning
  63. # Avoid common mistakes:
  64. # - do NOT use newlist=list instead newlist=list[:]
  65. # - do NOT use newdic=dic instead newdic=dic.copy()
  66. # - do NOT use dic[key] instead dic.get(key)
  67. # - do NOT use del dic[key] without has_key() before
  68. #XXX Smart Image Align don't work if the image is a link
  69. # Can't fix that because the image is expanded together with the
  70. # link, at the linkbank filling moment. Only the image is passed
  71. # to parse_images(), not the full line, so it is always 'middle'.
  72. #XXX Paragraph separation not valid inside Quote
  73. # Quote will not have <p></p> inside, instead will close and open
  74. # again the <blockquote>. This really sux in CSS, when defining a
  75. # different background color. Still don't know how to fix it.
  76. #XXX TODO (maybe)
  77. # New mark or macro which expands to an anchor full title.
  78. # It is necessary to parse the full document in this order:
  79. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  80. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  81. # 3rd scan: BODY: read titles and compose TOC info
  82. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  83. # Steps 2 and 3 can be made together, with no tag adding.
  84. # Two complete body scans will be *slow*, don't know if it worths.
  85. # One solution may be add the titles as postproc rules
  86. ##############################################################################
  87. # User config (1=ON, 0=OFF)
  88. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  89. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  90. BG_LIGHT = 0 # your terminal background color is light (default is 0)
  91. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  92. ##############################################################################
  93. # These are all the core Python modules used by txt2tags (KISS!)
  94. import re, os, sys, time, getopt
  95. # Program information
  96. my_url = 'http://txt2tags.sf.net'
  97. my_name = 'txt2tags'
  98. my_email = 'verde@aurelio.net'
  99. my_version = '2.6b'
  100. # i18n - just use if available
  101. if USE_I18N:
  102. try:
  103. import gettext
  104. # If your locale dir is different, change it here
  105. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  106. _ = cat.gettext
  107. except:
  108. _ = lambda x:x
  109. else:
  110. _ = lambda x:x
  111. # FLAGS : the conversion related flags , may be used in %!options
  112. # OPTIONS : the conversion related options, may be used in %!options
  113. # ACTIONS : the other behavior modifiers, valid on command line only
  114. # MACROS : the valid macros with their default values for formatting
  115. # SETTINGS: global miscellaneous settings, valid on RC file only
  116. # NO_TARGET: actions that don't require a target specification
  117. # NO_MULTI_INPUT: actions that don't accept more than one input file
  118. # CONFIG_KEYWORDS: the valid %!key:val keywords
  119. #
  120. # FLAGS and OPTIONS are configs that affect the converted document.
  121. # They usually have also a --no-<option> to turn them OFF.
  122. #
  123. # ACTIONS are needed because when doing multiple input files, strange
  124. # behavior would be found, as use command line interface for the
  125. # first file and gui for the second. There is no --no-<action>.
  126. # --version and --help inside %!options are also odd
  127. #
  128. TARGETS = 'html xhtml sgml dbk tex lout man mgp wiki gwiki doku pmw moin pm6 txt art adoc'.split()
  129. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  130. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  131. 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
  132. 'quiet' :0 }
  133. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  134. 'infile' :'', 'outfile' :'', 'encoding' :'',
  135. 'config-file':'', 'split' :0 , 'lang' :'',
  136. 'show-config-value':'', 'ascii-art' :''}
  137. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  138. 'verbose' :0 , 'debug' :0 , 'dump-config':0 ,
  139. 'dump-source':0 }
  140. MACROS = {'date' : '%Y%m%d', 'infile': '%f',
  141. 'mtime': '%Y%m%d', 'outfile': '%f'}
  142. SETTINGS = {} # for future use
  143. NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
  144. NO_MULTI_INPUT = ['gui','dump-config','dump-source']
  145. CONFIG_KEYWORDS = [
  146. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  147. 'guicolors']
  148. TARGET_NAMES = {
  149. 'html' : _('HTML page'),
  150. 'xhtml': _('XHTML page'),
  151. 'sgml' : _('SGML document'),
  152. 'dbk' : _('DocBook document'),
  153. 'tex' : _('LaTeX document'),
  154. 'lout' : _('Lout document'),
  155. 'man' : _('UNIX Manual page'),
  156. 'mgp' : _('MagicPoint presentation'),
  157. 'wiki' : _('Wikipedia page'),
  158. 'gwiki': _('Google Wiki page'),
  159. 'doku' : _('DokuWiki page'),
  160. 'pmw' : _('pmWiki page'),
  161. 'moin' : _('MoinMoin page'),
  162. 'pm6' : _('PageMaker document'),
  163. 'txt' : _('Plain Text'),
  164. 'art' : _('Ascii Art'),
  165. 'adoc' : _('AsciiDoc'),
  166. }
  167. DEBUG = 0 # do not edit here, please use --debug
  168. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  169. QUIET = 0 # do not edit here, please use --quiet
  170. GUI = 0 # do not edit here, please use --gui
  171. AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
  172. AA_LCHARS = ['coin','line','border','bar1','bar2','level2','level3','level4','level5']
  173. AA_CHARS = dict(zip(AA_LCHARS,'+-|-==-^"')) # do not edit here, please use --ascii-art or -a
  174. RC_RAW = []
  175. CMDLINE_RAW = []
  176. CONF = {}
  177. BLOCK = None
  178. regex = {}
  179. TAGS = {}
  180. rules = {}
  181. lang = 'english'
  182. TARGET = ''
  183. STDIN = STDOUT = '-'
  184. MODULEIN = MODULEOUT = '-module-'
  185. ESCCHAR = '\x00'
  186. SEPARATOR = '\x01'
  187. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  188. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  189. # Platform specific settings
  190. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  191. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  192. USAGE = '\n'.join([
  193. '',
  194. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  195. '',
  196. _(" -t, --target=TYPE set target document type. currently supported:"),
  197. ' %s,' % ', '.join(TARGETS[:8]),
  198. ' %s' % ', '.join(TARGETS[8:]),
  199. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  200. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  201. _(" -H, --no-headers suppress header, title and footer contents"),
  202. _(" --headers show header, title and footer contents (default ON)"),
  203. _(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"),
  204. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  205. _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
  206. _(" --css-inside insert CSS file contents inside HTML/XHTML headers"),
  207. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  208. _(" --toc add TOC (Table of Contents) to target document"),
  209. _(" --toc-only print document TOC and exit"),
  210. _(" --toc-level=N set maximum TOC level (depth) to N"),
  211. _(" -n, --enum-title enumerate all titles as 1, 1.1, 1.1.1, etc"),
  212. _(" -a, --ascii-art=S set the ascii art chars with the string S. in the order:"),
  213. ' %s' % ', '.join(AA_LCHARS),
  214. _(" -C, --config-file=F read config from file F"),
  215. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  216. _(" --gui invoke Graphical Tk Interface"),
  217. _(" -q, --quiet quiet mode, suppress all output (except errors)"),
  218. _(" -v, --verbose print informative messages during conversion"),
  219. _(" -h, --help print this help information and exit"),
  220. _(" -V, --version print program version and exit"),
  221. _(" --dump-config print all the config found and exit"),
  222. _(" --dump-source print the document source, with includes expanded"),
  223. '',
  224. _("Turn OFF options:"),
  225. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  226. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  227. " --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
  228. " --no-dump-source",
  229. '',
  230. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  231. '',
  232. _("By default, converted output is saved to 'infile.<target>'."),
  233. _("Use --outfile to force an output file name."),
  234. _("If input file is '-', reads from STDIN."),
  235. _("If output file is '-', dumps output to STDOUT."),
  236. '',
  237. 'http://txt2tags.sourceforge.net',
  238. ''
  239. ])
  240. ##############################################################################
  241. # Here is all the target's templates
  242. # You may edit them to fit your needs
  243. # - the %(HEADERn)s strings represent the Header lines
  244. # - the %(STYLE)s string is changed by --style contents
  245. # - the %(ENCODING)s string is changed by --encoding contents
  246. # - if any of the above is empty, the full line is removed
  247. # - use %% to represent a literal %
  248. #
  249. HEADER_TEMPLATE = {
  250. 'art':"""
  251. Fake template to respect the general process.
  252. """,
  253. 'txt': """\
  254. %(HEADER1)s
  255. %(HEADER2)s
  256. %(HEADER3)s
  257. """,
  258. 'sgml': """\
  259. <!doctype linuxdoc system>
  260. <article>
  261. <title>%(HEADER1)s
  262. <author>%(HEADER2)s
  263. <date>%(HEADER3)s
  264. """,
  265. 'html': """\
  266. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  267. <HTML>
  268. <HEAD>
  269. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  270. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  271. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  272. <TITLE>%(HEADER1)s</TITLE>
  273. </HEAD><BODY BGCOLOR="white" TEXT="black">
  274. <CENTER>
  275. <H1>%(HEADER1)s</H1>
  276. <FONT SIZE="4"><I>%(HEADER2)s</I></FONT><BR>
  277. <FONT SIZE="4">%(HEADER3)s</FONT>
  278. </CENTER>
  279. """,
  280. 'htmlcss': """\
  281. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  282. <HTML>
  283. <HEAD>
  284. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  285. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  286. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  287. <TITLE>%(HEADER1)s</TITLE>
  288. </HEAD>
  289. <BODY>
  290. <DIV CLASS="header" ID="header">
  291. <H1>%(HEADER1)s</H1>
  292. <H2>%(HEADER2)s</H2>
  293. <H3>%(HEADER3)s</H3>
  294. </DIV>
  295. """,
  296. 'xhtml': """\
  297. <?xml version="1.0"
  298. encoding="%(ENCODING)s"
  299. ?>
  300. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  301. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  302. <html xmlns="http://www.w3.org/1999/xhtml">
  303. <head>
  304. <title>%(HEADER1)s</title>
  305. <meta name="generator" content="http://txt2tags.sf.net" />
  306. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  307. </head>
  308. <body bgcolor="white" text="black">
  309. <div align="center">
  310. <h1>%(HEADER1)s</h1>
  311. <h2>%(HEADER2)s</h2>
  312. <h3>%(HEADER3)s</h3>
  313. </div>
  314. """,
  315. 'xhtmlcss': """\
  316. <?xml version="1.0"
  317. encoding="%(ENCODING)s"
  318. ?>
  319. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  320. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  321. <html xmlns="http://www.w3.org/1999/xhtml">
  322. <head>
  323. <title>%(HEADER1)s</title>
  324. <meta name="generator" content="http://txt2tags.sf.net" />
  325. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  326. </head>
  327. <body>
  328. <div class="header" id="header">
  329. <h1>%(HEADER1)s</h1>
  330. <h2>%(HEADER2)s</h2>
  331. <h3>%(HEADER3)s</h3>
  332. </div>
  333. """,
  334. 'dbk': """\
  335. <?xml version="1.0"
  336. encoding="%(ENCODING)s"
  337. ?>
  338. <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"\
  339. "docbook/dtd/xml/4.5/docbookx.dtd">
  340. <article lang="en">
  341. <articleinfo>
  342. <title>%(HEADER1)s</title>
  343. <authorgroup>
  344. <author><othername>%(HEADER2)s</othername></author>
  345. </authorgroup>
  346. <date>%(HEADER3)s</date>
  347. </articleinfo>
  348. """,
  349. 'man': """\
  350. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  351. """,
  352. # TODO style to <HR>
  353. 'pm6': """\
  354. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  355. ><@Normal=
  356. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  357. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  358. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  359. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  360. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  361. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  362. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  363. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  364. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  365. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  366. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  367. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  368. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  369. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  370. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  371. ><@Title4=<@-PARENT "Title3">
  372. ><@Title5=<@-PARENT "Title3">
  373. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  374. %(HEADER1)s
  375. %(HEADER2)s
  376. %(HEADER3)s
  377. """,
  378. 'mgp': """\
  379. #!/usr/X11R6/bin/mgp -t 90
  380. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  381. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  382. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  383. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  384. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  385. %%default 1 size 5
  386. %%default 2 size 8, fore "yellow", font "normal-b", center
  387. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  388. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  389. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  390. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  391. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  392. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  393. %%%%------------------------- end of headers -----------------------------
  394. %%page
  395. %%size 10, center, fore "yellow"
  396. %(HEADER1)s
  397. %%font "normal-i", size 6, fore "white", center
  398. %(HEADER2)s
  399. %%font "mono", size 7, center
  400. %(HEADER3)s
  401. """,
  402. 'moin': """\
  403. '''%(HEADER1)s'''
  404. ''%(HEADER2)s''
  405. %(HEADER3)s
  406. """,
  407. 'gwiki': """\
  408. *%(HEADER1)s*
  409. %(HEADER2)s
  410. _%(HEADER3)s_
  411. """,
  412. 'adoc': """\
  413. %(HEADER1)s
  414. %(HEADER2)s
  415. %(HEADER3)s
  416. """,
  417. 'doku': """\
  418. ===== %(HEADER1)s =====
  419. **//%(HEADER2)s//**
  420. //%(HEADER3)s//
  421. """,
  422. 'pmw': """\
  423. (:Title %(HEADER1)s:)
  424. (:Description %(HEADER2)s:)
  425. (:Summary %(HEADER3)s:)
  426. """,
  427. 'wiki': """\
  428. '''%(HEADER1)s'''
  429. %(HEADER2)s
  430. ''%(HEADER3)s''
  431. """,
  432. 'tex': \
  433. r"""\documentclass{article}
  434. \usepackage{graphicx}
  435. \usepackage{paralist} %% needed for compact lists
  436. \usepackage[normalem]{ulem} %% needed by strike
  437. \usepackage[urlcolor=blue,colorlinks=true]{hyperref}
  438. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  439. \usepackage{%(STYLE)s} %% user defined
  440. \title{%(HEADER1)s}
  441. \author{%(HEADER2)s}
  442. \begin{document}
  443. \date{%(HEADER3)s}
  444. \maketitle
  445. \clearpage
  446. """,
  447. 'lout': """\
  448. @SysInclude { doc }
  449. @Document
  450. @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ...
  451. @PageOrientation { Portrait } # Portrait, Landscape
  452. @ColumnNumber { 1 } # Number of columns (2, 3, ...)
  453. @PageHeaders { Simple } # None, Simple, Titles, NoTitles
  454. @InitialLanguage { English } # German, French, Portuguese, ...
  455. @OptimizePages { Yes } # Yes/No smart page break feature
  456. //
  457. @Text @Begin
  458. @Display @Heading { %(HEADER1)s }
  459. @Display @I { %(HEADER2)s }
  460. @Display { %(HEADER3)s }
  461. #@NP # Break page after Headers
  462. """
  463. # @SysInclude { tbl } # Tables support
  464. # setup: @MakeContents { Yes } # show TOC
  465. # setup: @SectionGap # break page at each section
  466. }
  467. ##############################################################################
  468. def getTags(config):
  469. "Returns all the known tags for the specified target"
  470. keys = """
  471. title1 numtitle1
  472. title2 numtitle2
  473. title3 numtitle3
  474. title4 numtitle4
  475. title5 numtitle5
  476. title1Open title1Close
  477. title2Open title2Close
  478. title3Open title3Close
  479. title4Open title4Close
  480. title5Open title5Close
  481. blocktitle1Open blocktitle1Close
  482. blocktitle2Open blocktitle2Close
  483. blocktitle3Open blocktitle3Close
  484. paragraphOpen paragraphClose
  485. blockVerbOpen blockVerbClose
  486. blockQuoteOpen blockQuoteClose blockQuoteLine
  487. blockCommentOpen blockCommentClose
  488. fontMonoOpen fontMonoClose
  489. fontBoldOpen fontBoldClose
  490. fontItalicOpen fontItalicClose
  491. fontUnderlineOpen fontUnderlineClose
  492. fontStrikeOpen fontStrikeClose
  493. listOpen listClose
  494. listOpenCompact listCloseCompact
  495. listItemOpen listItemClose listItemLine
  496. numlistOpen numlistClose
  497. numlistOpenCompact numlistCloseCompact
  498. numlistItemOpen numlistItemClose numlistItemLine
  499. deflistOpen deflistClose
  500. deflistOpenCompact deflistCloseCompact
  501. deflistItem1Open deflistItem1Close
  502. deflistItem2Open deflistItem2Close deflistItem2LinePrefix
  503. bar1 bar2
  504. url urlMark
  505. email emailMark
  506. img imgAlignLeft imgAlignRight imgAlignCenter
  507. _imgAlignLeft _imgAlignRight _imgAlignCenter
  508. tableOpen tableClose
  509. _tableBorder _tableAlignLeft _tableAlignCenter
  510. tableRowOpen tableRowClose tableRowSep
  511. tableTitleRowOpen tableTitleRowClose
  512. tableCellOpen tableCellClose tableCellSep
  513. tableTitleCellOpen tableTitleCellClose tableTitleCellSep
  514. _tableColAlignLeft _tableColAlignRight _tableColAlignCenter
  515. _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
  516. _tableCellColSpan tableColAlignSep
  517. _tableCellMulticolOpen
  518. _tableCellMulticolClose
  519. bodyOpen bodyClose
  520. cssOpen cssClose
  521. tocOpen tocClose TOC
  522. anchor
  523. comment
  524. pageBreak
  525. EOD
  526. """.split()
  527. # TIP: \a represents the current text on the mark
  528. # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
  529. alltags = {
  530. 'art': {
  531. 'title1' : '\a' ,
  532. 'title2' : '\a' ,
  533. 'title3' : '\a' ,
  534. 'title4' : '\a' ,
  535. 'title5' : '\a' ,
  536. 'blockQuoteLine' : '\t' ,
  537. 'listItemOpen' : '- ' ,
  538. 'numlistItemOpen' : '\a. ' ,
  539. 'bar1' : aa_line(AA_CHARS['bar1']),
  540. 'bar2' : aa_line(AA_CHARS['bar2']),
  541. 'url' : '\a' ,
  542. 'urlMark' : '\a (\a)' ,
  543. 'email' : '\a' ,
  544. 'emailMark' : '\a (\a)' ,
  545. 'img' : '[\a]' ,
  546. },
  547. 'txt': {
  548. 'title1' : ' \a' ,
  549. 'title2' : '\t\a' ,
  550. 'title3' : '\t\t\a' ,
  551. 'title4' : '\t\t\t\a' ,
  552. 'title5' : '\t\t\t\t\a',
  553. 'blockQuoteLine' : '\t' ,
  554. 'listItemOpen' : '- ' ,
  555. 'numlistItemOpen' : '\a. ' ,
  556. 'bar1' : '\a' ,
  557. 'url' : '\a' ,
  558. 'urlMark' : '\a (\a)' ,
  559. 'email' : '\a' ,
  560. 'emailMark' : '\a (\a)' ,
  561. 'img' : '[\a]' ,
  562. },
  563. 'html': {
  564. 'paragraphOpen' : '<P>' ,
  565. 'paragraphClose' : '</P>' ,
  566. 'title1' : '~A~<H1>\a</H1>' ,
  567. 'title2' : '~A~<H2>\a</H2>' ,
  568. 'title3' : '~A~<H3>\a</H3>' ,
  569. 'title4' : '~A~<H4>\a</H4>' ,
  570. 'title5' : '~A~<H5>\a</H5>' ,
  571. 'anchor' : '<A NAME="\a"></A>\n',
  572. 'blockVerbOpen' : '<PRE>' ,
  573. 'blockVerbClose' : '</PRE>' ,
  574. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  575. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  576. 'fontMonoOpen' : '<CODE>' ,
  577. 'fontMonoClose' : '</CODE>' ,
  578. 'fontBoldOpen' : '<B>' ,
  579. 'fontBoldClose' : '</B>' ,
  580. 'fontItalicOpen' : '<I>' ,
  581. 'fontItalicClose' : '</I>' ,
  582. 'fontUnderlineOpen' : '<U>' ,
  583. 'fontUnderlineClose' : '</U>' ,
  584. 'fontStrikeOpen' : '<S>' ,
  585. 'fontStrikeClose' : '</S>' ,
  586. 'listOpen' : '<UL>' ,
  587. 'listClose' : '</UL>' ,
  588. 'listItemOpen' : '<LI>' ,
  589. 'numlistOpen' : '<OL>' ,
  590. 'numlistClose' : '</OL>' ,
  591. 'numlistItemOpen' : '<LI>' ,
  592. 'deflistOpen' : '<DL>' ,
  593. 'deflistClose' : '</DL>' ,
  594. 'deflistItem1Open' : '<DT>' ,
  595. 'deflistItem1Close' : '</DT>' ,
  596. 'deflistItem2Open' : '<DD>' ,
  597. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  598. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  599. 'url' : '<A HREF="\a">\a</A>' ,
  600. 'urlMark' : '<A HREF="\a">\a</A>' ,
  601. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  602. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  603. 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
  604. '_imgAlignLeft' : ' ALIGN="left"' ,
  605. '_imgAlignCenter' : ' ALIGN="middle"',
  606. '_imgAlignRight' : ' ALIGN="right"' ,
  607. 'tableOpen' : '<TABLE~A~~B~ CELLPADDING="4">',
  608. 'tableClose' : '</TABLE>' ,
  609. 'tableRowOpen' : '<TR>' ,
  610. 'tableRowClose' : '</TR>' ,
  611. 'tableCellOpen' : '<TD~A~~S~>' ,
  612. 'tableCellClose' : '</TD>' ,
  613. 'tableTitleCellOpen' : '<TH~S~>' ,
  614. 'tableTitleCellClose' : '</TH>' ,
  615. '_tableBorder' : ' BORDER="1"' ,
  616. '_tableAlignCenter' : ' ALIGN="center"',
  617. '_tableCellAlignRight' : ' ALIGN="right"' ,
  618. '_tableCellAlignCenter': ' ALIGN="center"',
  619. '_tableCellColSpan' : ' COLSPAN="\a"' ,
  620. 'cssOpen' : '<STYLE TYPE="text/css">',
  621. 'cssClose' : '</STYLE>' ,
  622. 'comment' : '<!-- \a -->' ,
  623. 'EOD' : '</BODY></HTML>'
  624. },
  625. #TIP xhtml inherits all HTML definitions (lowercased)
  626. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  627. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  628. 'xhtml': {
  629. 'listItemClose' : '</li>' ,
  630. 'numlistItemClose' : '</li>' ,
  631. 'deflistItem2Close' : '</dd>' ,
  632. 'bar1' : '<hr class="light" />',
  633. 'bar2' : '<hr class="heavy" />',
  634. 'anchor' : '<a id="\a" name="\a"></a>\n',
  635. 'img' : '<img~A~ src="\a" border="0" alt=""/>',
  636. },
  637. 'sgml': {
  638. 'paragraphOpen' : '<p>' ,
  639. 'title1' : '<sect>\a~A~<p>' ,
  640. 'title2' : '<sect1>\a~A~<p>' ,
  641. 'title3' : '<sect2>\a~A~<p>' ,
  642. 'title4' : '<sect3>\a~A~<p>' ,
  643. 'title5' : '<sect4>\a~A~<p>' ,
  644. 'anchor' : '<label id="\a">' ,
  645. 'blockVerbOpen' : '<tscreen><verb>' ,
  646. 'blockVerbClose' : '</verb></tscreen>' ,
  647. 'blockQuoteOpen' : '<quote>' ,
  648. 'blockQuoteClose' : '</quote>' ,
  649. 'fontMonoOpen' : '<tt>' ,
  650. 'fontMonoClose' : '</tt>' ,
  651. 'fontBoldOpen' : '<bf>' ,
  652. 'fontBoldClose' : '</bf>' ,
  653. 'fontItalicOpen' : '<em>' ,
  654. 'fontItalicClose' : '</em>' ,
  655. 'fontUnderlineOpen' : '<bf><em>' ,
  656. 'fontUnderlineClose' : '</em></bf>' ,
  657. 'listOpen' : '<itemize>' ,
  658. 'listClose' : '</itemize>' ,
  659. 'listItemOpen' : '<item>' ,
  660. 'numlistOpen' : '<enum>' ,
  661. 'numlistClose' : '</enum>' ,
  662. 'numlistItemOpen' : '<item>' ,
  663. 'deflistOpen' : '<descrip>' ,
  664. 'deflistClose' : '</descrip>' ,
  665. 'deflistItem1Open' : '<tag>' ,
  666. 'deflistItem1Close' : '</tag>' ,
  667. 'bar1' : '<!-- \a -->' ,
  668. 'url' : '<htmlurl url="\a" name="\a">' ,
  669. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  670. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  671. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  672. 'img' : '<figure><ph vspace=""><img src="\a"></figure>',
  673. 'tableOpen' : '<table><tabular ca="~C~">' ,
  674. 'tableClose' : '</tabular></table>' ,
  675. 'tableRowSep' : '<rowsep>' ,
  676. 'tableCellSep' : '<colsep>' ,
  677. '_tableColAlignLeft' : 'l' ,
  678. '_tableColAlignRight' : 'r' ,
  679. '_tableColAlignCenter' : 'c' ,
  680. 'comment' : '<!-- \a -->' ,
  681. 'TOC' : '<toc>' ,
  682. 'EOD' : '</article>'
  683. },
  684. 'dbk': {
  685. 'paragraphOpen' : '<para>' ,
  686. 'paragraphClose' : '</para>' ,
  687. 'title1Open' : '~A~<sect1><title>\a</title>' ,
  688. 'title1Close' : '</sect1>' ,
  689. 'title2Open' : '~A~ <sect2><title>\a</title>' ,
  690. 'title2Close' : ' </sect2>' ,
  691. 'title3Open' : '~A~ <sect3><title>\a</title>' ,
  692. 'title3Close' : ' </sect3>' ,
  693. 'title4Open' : '~A~ <sect4><title>\a</title>' ,
  694. 'title4Close' : ' </sect4>' ,
  695. 'title5Open' : '~A~ <sect5><title>\a</title>',
  696. 'title5Close' : ' </sect5>' ,
  697. 'anchor' : '<anchor id="\a"/>\n' ,
  698. 'blockVerbOpen' : '<programlisting>' ,
  699. 'blockVerbClose' : '</programlisting>' ,
  700. 'blockQuoteOpen' : '<blockquote><para>' ,
  701. 'blockQuoteClose' : '</para></blockquote>' ,
  702. 'fontMonoOpen' : '<code>' ,
  703. 'fontMonoClose' : '</code>' ,
  704. 'fontBoldOpen' : '<emphasis role="bold">' ,
  705. 'fontBoldClose' : '</emphasis>' ,
  706. 'fontItalicOpen' : '<emphasis>' ,
  707. 'fontItalicClose' : '</emphasis>' ,
  708. 'fontUnderlineOpen' : '<emphasis role="underline">' ,
  709. 'fontUnderlineClose' : '</emphasis>' ,
  710. # 'fontStrikeOpen' : '<emphasis role="strikethrough">' , # Don't know
  711. # 'fontStrikeClose' : '</emphasis>' ,
  712. 'listOpen' : '<itemizedlist>' ,
  713. 'listClose' : '</itemizedlist>' ,
  714. 'listItemOpen' : '<listitem><para>' ,
  715. 'listItemClose' : '</para></listitem>' ,
  716. 'numlistOpen' : '<orderedlist numeration="arabic">' ,
  717. 'numlistClose' : '</orderedlist>' ,
  718. 'numlistItemOpen' : '<listitem><para>' ,
  719. 'numlistItemClose' : '</para></listitem>' ,
  720. 'deflistOpen' : '<variablelist>' ,
  721. 'deflistClose' : '</variablelist>' ,
  722. 'deflistItem1Open' : '<varlistentry><term>' ,
  723. 'deflistItem1Close' : '</term>' ,
  724. 'deflistItem2Open' : '<listitem><para>' ,
  725. 'deflistItem2Close' : '</para></listitem></varlistentry>' ,
  726. # 'bar1' : '<>' , # Don't know
  727. # 'bar2' : '<>' , # Don't know
  728. 'url' : '<ulink url="\a">\a</ulink>' ,
  729. 'urlMark' : '<ulink url="\a">\a</ulink>' ,
  730. 'email' : '<email>\a</email>' ,
  731. 'emailMark' : '<email>\a</email>' ,
  732. 'img' : '<mediaobject><imageobject><imagedata fileref="\a"/></imageobject></mediaobject>',
  733. # '_imgAlignLeft' : '' , # Don't know
  734. # '_imgAlignCenter' : '' , # Don't know
  735. # '_imgAlignRight' : '' , # Don't know
  736. 'tableOpen' : '<para>', # just to have something...
  737. 'tableClose' : '</para>',
  738. # 'tableOpen' : '<informaltable><tgroup cols=""><tbody>', # Don't work, need to know number of cols
  739. # 'tableClose' : '</tbody></tgroup></informaltable>' ,
  740. # 'tableRowOpen' : '<row>' ,
  741. # 'tableRowClose' : '</row>' ,
  742. # 'tableCellOpen' : '<entry>' ,
  743. # 'tableCellClose' : '</entry>' ,
  744. # 'tableTitleRowOpen' : '<thead>' ,
  745. # 'tableTitleRowClose' : '</thead>' ,
  746. # '_tableBorder' : ' frame="all"' ,
  747. # '_tableAlignCenter' : ' align="center"' ,
  748. # '_tableCellAlignRight' : ' align="right"' ,
  749. # '_tableCellAlignCenter': ' align="center"' ,
  750. # '_tableCellColSpan' : ' COLSPAN="\a"' ,
  751. 'TOC' : '</index>' ,
  752. 'comment' : '<!-- \a -->' ,
  753. 'EOD' : '</article>'
  754. },
  755. 'tex': {
  756. 'title1' : '~A~\section*{\a}' ,
  757. 'title2' : '~A~\\subsection*{\a}' ,
  758. 'title3' : '~A~\\subsubsection*{\a}',
  759. # title 4/5: DIRTY: para+BF+\\+\n
  760. 'title4' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
  761. 'title5' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
  762. 'numtitle1' : '\n~A~\section{\a}' ,
  763. 'numtitle2' : '~A~\\subsection{\a}' ,
  764. 'numtitle3' : '~A~\\subsubsection{\a}' ,
  765. 'anchor' : '\\hypertarget{\a}{}\n' ,
  766. 'blockVerbOpen' : '\\begin{verbatim}' ,
  767. 'blockVerbClose' : '\\end{verbatim}' ,
  768. 'blockQuoteOpen' : '\\begin{quotation}' ,
  769. 'blockQuoteClose' : '\\end{quotation}' ,
  770. 'fontMonoOpen' : '\\texttt{' ,
  771. 'fontMonoClose' : '}' ,
  772. 'fontBoldOpen' : '\\textbf{' ,
  773. 'fontBoldClose' : '}' ,
  774. 'fontItalicOpen' : '\\textit{' ,
  775. 'fontItalicClose' : '}' ,
  776. 'fontUnderlineOpen' : '\\underline{' ,
  777. 'fontUnderlineClose' : '}' ,
  778. 'fontStrikeOpen' : '\\sout{' ,
  779. 'fontStrikeClose' : '}' ,
  780. 'listOpen' : '\\begin{itemize}' ,
  781. 'listClose' : '\\end{itemize}' ,
  782. 'listOpenCompact' : '\\begin{compactitem}',
  783. 'listCloseCompact' : '\\end{compactitem}' ,
  784. 'listItemOpen' : '\\item ' ,
  785. 'numlistOpen' : '\\begin{enumerate}' ,
  786. 'numlistClose' : '\\end{enumerate}' ,
  787. 'numlistOpenCompact' : '\\begin{compactenum}',
  788. 'numlistCloseCompact' : '\\end{compactenum}' ,
  789. 'numlistItemOpen' : '\\item ' ,
  790. 'deflistOpen' : '\\begin{description}',
  791. 'deflistClose' : '\\end{description}' ,
  792. 'deflistOpenCompact' : '\\begin{compactdesc}',
  793. 'deflistCloseCompact' : '\\end{compactdesc}' ,
  794. 'deflistItem1Open' : '\\item[' ,
  795. 'deflistItem1Close' : ']' ,
  796. 'bar1' : '\\hrulefill{}' ,
  797. 'bar2' : '\\rule{\linewidth}{1mm}',
  798. 'url' : '\\htmladdnormallink{\a}{\a}',
  799. 'urlMark' : '\\htmladdnormallink{\a}{\a}',
  800. 'email' : '\\htmladdnormallink{\a}{mailto:\a}',
  801. 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}',
  802. 'img' : '\\includegraphics{\a}',
  803. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  804. 'tableClose' : '\\end{tabular}\\end{center}',
  805. 'tableRowOpen' : '\\hline ' ,
  806. 'tableRowClose' : ' \\\\' ,
  807. 'tableCellSep' : ' & ' ,
  808. '_tableColAlignLeft' : 'l' ,
  809. '_tableColAlignRight' : 'r' ,
  810. '_tableColAlignCenter' : 'c' ,
  811. '_tableCellAlignLeft' : 'l' ,
  812. '_tableCellAlignRight' : 'r' ,
  813. '_tableCellAlignCenter': 'c' ,
  814. '_tableCellColSpan' : '\a' ,
  815. '_tableCellMulticolOpen' : '\\multicolumn{\a}{|~C~|}{',
  816. '_tableCellMulticolClose' : '}',
  817. 'tableColAlignSep' : '|' ,
  818. 'comment' : '% \a' ,
  819. 'TOC' : '\\tableofcontents',
  820. 'pageBreak' : '\\clearpage',
  821. 'EOD' : '\\end{document}'
  822. },
  823. 'lout': {
  824. 'paragraphOpen' : '@LP' ,
  825. 'blockTitle1Open' : '@BeginSections' ,
  826. 'blockTitle1Close' : '@EndSections' ,
  827. 'blockTitle2Open' : ' @BeginSubSections' ,
  828. 'blockTitle2Close' : ' @EndSubSections' ,
  829. 'blockTitle3Open' : ' @BeginSubSubSections' ,
  830. 'blockTitle3Close' : ' @EndSubSubSections' ,
  831. 'title1Open' : '~A~@Section @Title { \a } @Begin',
  832. 'title1Close' : '@End @Section' ,
  833. 'title2Open' : '~A~ @SubSection @Title { \a } @Begin',
  834. 'title2Close' : ' @End @SubSection' ,
  835. 'title3Open' : '~A~ @SubSubSection @Title { \a } @Begin',
  836. 'title3Close' : ' @End @SubSubSection' ,
  837. 'title4Open' : '~A~@LP @LeftDisplay @B { \a }',
  838. 'title5Open' : '~A~@LP @LeftDisplay @B { \a }',
  839. 'anchor' : '@Tag { \a }\n' ,
  840. 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin',
  841. 'blockVerbClose' : '@End @RawVerbatim' ,
  842. 'blockQuoteOpen' : '@QD {' ,
  843. 'blockQuoteClose' : '}' ,
  844. # enclosed inside {} to deal with joined**words**
  845. 'fontMonoOpen' : '{@F {' ,
  846. 'fontMonoClose' : '}}' ,
  847. 'fontBoldOpen' : '{@B {' ,
  848. 'fontBoldClose' : '}}' ,
  849. 'fontItalicOpen' : '{@II {' ,
  850. 'fontItalicClose' : '}}' ,
  851. 'fontUnderlineOpen' : '{@Underline{' ,
  852. 'fontUnderlineClose' : '}}' ,
  853. # the full form is more readable, but could be BL EL LI NL TL DTI
  854. 'listOpen' : '@BulletList' ,
  855. 'listClose' : '@EndList' ,
  856. 'listItemOpen' : '@ListItem{' ,
  857. 'listItemClose' : '}' ,
  858. 'numlistOpen' : '@NumberedList' ,
  859. 'numlistClose' : '@EndList' ,
  860. 'numlistItemOpen' : '@ListItem{' ,
  861. 'numlistItemClose' : '}' ,
  862. 'deflistOpen' : '@TaggedList' ,
  863. 'deflistClose' : '@EndList' ,
  864. 'deflistItem1Open' : '@DropTagItem {' ,
  865. 'deflistItem1Close' : '}' ,
  866. 'deflistItem2Open' : '{' ,
  867. 'deflistItem2Close' : '}' ,
  868. 'bar1' : '@DP @FullWidthRule' ,
  869. 'url' : '{blue @Colour { \a }}' ,
  870. 'urlMark' : '\a ({blue @Colour { \a }})' ,
  871. 'email' : '{blue @Colour { \a }}' ,
  872. 'emailMark' : '\a ({blue Colour{ \a }})' ,
  873. 'img' : '~A~@IncludeGraphic { \a }' , # eps only!
  874. '_imgAlignLeft' : '@LeftDisplay ' ,
  875. '_imgAlignRight' : '@RightDisplay ' ,
  876. '_imgAlignCenter' : '@CentredDisplay ' ,
  877. # lout tables are *way* complicated, no support for now
  878. #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
  879. #'tableClose' : '}' ,
  880. #'tableRowOpen' : '@Rowa\n' ,
  881. #'tableTitleRowOpen' : '@HeaderRowa' ,
  882. #'tableCenterAlign' : '@CentredDisplay ' ,
  883. #'tableCellOpen' : '\a {' , # A, B, ...
  884. #'tableCellClose' : '}' ,
  885. #'_tableBorder' : '\nrule {yes}' ,
  886. 'comment' : '# \a' ,
  887. # @MakeContents must be on the config file
  888. 'TOC' : '@DP @ContentsGoesHere @DP',
  889. 'pageBreak' : '@NP' ,
  890. 'EOD' : '@End @Text'
  891. },
  892. # http://moinmo.in/SyntaxReference
  893. 'moin': {
  894. 'title1' : '= \a =' ,
  895. 'title2' : '== \a ==' ,
  896. 'title3' : '=== \a ===' ,
  897. 'title4' : '==== \a ====' ,
  898. 'title5' : '===== \a =====',
  899. 'blockVerbOpen' : '{{{' ,
  900. 'blockVerbClose' : '}}}' ,
  901. 'blockQuoteLine' : ' ' ,
  902. 'fontMonoOpen' : '{{{' ,
  903. 'fontMonoClose' : '}}}' ,
  904. 'fontBoldOpen' : "'''" ,
  905. 'fontBoldClose' : "'''" ,
  906. 'fontItalicOpen' : "''" ,
  907. 'fontItalicClose' : "''" ,
  908. 'fontUnderlineOpen' : '__' ,
  909. 'fontUnderlineClose' : '__' ,
  910. 'fontStrikeOpen' : '--(' ,
  911. 'fontStrikeClose' : ')--' ,
  912. 'listItemOpen' : ' * ' ,
  913. 'numlistItemOpen' : ' \a. ' ,
  914. 'deflistItem1Open' : ' ' ,
  915. 'deflistItem1Close' : '::' ,
  916. 'deflistItem2LinePrefix': ' :: ' ,
  917. 'bar1' : '----' ,
  918. 'bar2' : '--------' ,
  919. 'url' : '[\a]' ,
  920. 'urlMark' : '[\a \a]' ,
  921. 'email' : '[\a]' ,
  922. 'emailMark' : '[\a \a]' ,
  923. 'img' : '[\a]' ,
  924. 'tableRowOpen' : '||' ,
  925. 'tableCellOpen' : '~A~' ,
  926. 'tableCellClose' : '||' ,
  927. 'tableTitleCellClose' : '||' ,
  928. '_tableCellAlignRight' : '<)>' ,
  929. '_tableCellAlignCenter' : '<:>' ,
  930. 'comment' : '/* \a */' ,
  931. 'TOC' : '[[TableOfContents]]'
  932. },
  933. # http://code.google.com/p/support/wiki/WikiSyntax
  934. 'gwiki': {
  935. 'title1' : '= \a =' ,
  936. 'title2' : '== \a ==' ,
  937. 'title3' : '=== \a ===' ,
  938. 'title4' : '==== \a ====' ,
  939. 'title5' : '===== \a =====',
  940. 'blockVerbOpen' : '{{{' ,
  941. 'blockVerbClose' : '}}}' ,
  942. 'blockQuoteLine' : ' ' ,
  943. 'fontMonoOpen' : '{{{' ,
  944. 'fontMonoClose' : '}}}' ,
  945. 'fontBoldOpen' : '*' ,
  946. 'fontBoldClose' : '*' ,
  947. 'fontItalicOpen' : '_' , # underline == italic
  948. 'fontItalicClose' : '_' ,
  949. 'fontStrikeOpen' : '~~' ,
  950. 'fontStrikeClose' : '~~' ,
  951. 'listItemOpen' : ' * ' ,
  952. 'numlistItemOpen' : ' # ' ,
  953. 'url' : '\a' ,
  954. 'urlMark' : '[\a \a]' ,
  955. 'email' : 'mailto:\a' ,
  956. 'emailMark' : '[mailto:\a \a]',
  957. 'img' : '[\a]' ,
  958. 'tableRowOpen' : '|| ' ,
  959. 'tableRowClose' : ' ||' ,
  960. 'tableCellSep' : ' || ' ,
  961. },
  962. # http://powerman.name/doc/asciidoc
  963. 'adoc': {
  964. 'title1' : '== \a' ,
  965. 'title2' : '=== \a' ,
  966. 'title3' : '==== \a' ,
  967. 'title4' : '===== \a' ,
  968. 'title5' : '===== \a' ,
  969. 'blockVerbOpen' : '----' ,
  970. 'blockVerbClose' : '----' ,
  971. 'fontMonoOpen' : '+' ,
  972. 'fontMonoClose' : '+' ,
  973. 'fontBoldOpen' : '*' ,
  974. 'fontBoldClose' : '*' ,
  975. 'fontItalicOpen' : '_' ,
  976. 'fontItalicClose' : '_' ,
  977. 'listItemOpen' : '- ' ,
  978. 'listItemLine' : '\t' ,
  979. 'numlistItemOpen' : '. ' ,
  980. 'url' : '\a' ,
  981. 'urlMark' : '\a[\a]' ,
  982. 'email' : 'mailto:\a' ,
  983. 'emailMark' : 'mailto:\a[\a]' ,
  984. 'img' : 'image::\a[]' ,
  985. },
  986. # http://wiki.splitbrain.org/wiki:syntax
  987. # Hint: <br> is \\ $
  988. # Hint: You can add footnotes ((This is a footnote))
  989. 'doku': {
  990. 'title1' : '===== \a =====',
  991. 'title2' : '==== \a ====' ,
  992. 'title3' : '=== \a ===' ,
  993. 'title4' : '== \a ==' ,
  994. 'title5' : '= \a =' ,
  995. # DokuWiki uses ' ' identation to mark verb blocks (see indentverbblock)
  996. 'blockQuoteLine' : '>' ,
  997. 'fontMonoOpen' : "''" ,
  998. 'fontMonoClose' : "''" ,
  999. 'fontBoldOpen' : "**" ,
  1000. 'fontBoldClose' : "**" ,
  1001. 'fontItalicOpen' : "//" ,
  1002. 'fontItalicClose' : "//" ,
  1003. 'fontUnderlineOpen' : "__" ,
  1004. 'fontUnderlineClose' : "__" ,
  1005. 'fontStrikeOpen' : '<del>' ,
  1006. 'fontStrikeClose' : '</del>' ,
  1007. 'listItemOpen' : ' * ' ,
  1008. 'numlistItemOpen' : ' - ' ,
  1009. 'bar1' : '----' ,
  1010. 'url' : '[[\a]]' ,
  1011. 'urlMark' : '[[\a|\a]]' ,
  1012. 'email' : '[[\a]]' ,
  1013. 'emailMark' : '[[\a|\a]]' ,
  1014. 'img' : '{{\a}}' ,
  1015. 'imgAlignLeft' : '{{\a }}' ,
  1016. 'imgAlignRight' : '{{ \a}}' ,
  1017. 'imgAlignCenter' : '{{ \a }}' ,
  1018. 'tableTitleRowOpen' : '^ ' ,
  1019. 'tableTitleRowClose' : ' ^' ,
  1020. 'tableTitleCellSep' : ' ^ ' ,
  1021. 'tableRowOpen' : '| ' ,
  1022. 'tableRowClose' : ' |' ,
  1023. 'tableCellSep' : ' | ' ,
  1024. # DokuWiki has no attributes. The content must be aligned!
  1025. # '_tableCellAlignRight' : '<)>' , # ??
  1026. # '_tableCellAlignCenter': '<:>' , # ??
  1027. # DokuWiki colspan is the same as txt2tags' with multiple |||
  1028. # 'comment' : '## \a' , # ??
  1029. # TOC is automatic
  1030. },
  1031. # http://www.pmwiki.org/wiki/PmWiki/TextFormattingRules
  1032. 'pmw': {
  1033. 'title1' : '~A~! \a ' ,
  1034. 'title2' : '~A~!! \a ' ,
  1035. 'title3' : '~A~!!! \a ' ,
  1036. 'title4' : '~A~!!!! \a ' ,
  1037. 'title5' : '~A~!!!!! \a ' ,
  1038. 'blockQuoteOpen' : '->' ,
  1039. 'blockQuoteClose' : '\n' ,
  1040. # In-text font
  1041. 'fontLargeOpen' : "[+" ,
  1042. 'fontLargeClose' : "+]" ,
  1043. 'fontLargerOpen' : "[++" ,
  1044. 'fontLargerClose' : "++]" ,
  1045. 'fontSmallOpen' : "[-" ,
  1046. 'fontSmallClose' : "-]" ,
  1047. 'fontLargerOpen' : "[--" ,
  1048. 'fontLargerClose' : "--]" ,
  1049. 'fontMonoOpen' : "@@" ,
  1050. 'fontMonoClose' : "@@" ,
  1051. 'fontBoldOpen' : "'''" ,
  1052. 'fontBoldClose' : "'''" ,
  1053. 'fontItalicOpen' : "''" ,
  1054. 'fontItalicClose' : "''" ,
  1055. 'fontUnderlineOpen' : "{+" ,
  1056. 'fontUnderlineClose' : "+}" ,
  1057. 'fontStrikeOpen' : '{-' ,
  1058. 'fontStrikeClose' : '-}' ,
  1059. # Lists
  1060. 'listItemOpen' : '* ' ,
  1061. 'numlistItemOpen' : '# ' ,
  1062. 'deflistItem1Open' : ': ' ,
  1063. 'deflistItem1Close' : ':' ,
  1064. 'deflistItem2LineOpen' : '::' ,
  1065. 'deflistItem2LineClose' : ':' ,
  1066. # Verbatim block
  1067. 'blockVerbOpen' : '[@' ,
  1068. 'blockVerbClose' : '@]' ,
  1069. 'bar1' : '----' ,
  1070. # URL, email and anchor
  1071. 'url' : '\a' ,
  1072. 'urlMark' : '[[\a -> \a]]' ,
  1073. 'email' : '\a' ,
  1074. 'emailMark' : '[[\a -> mailto:\a]]',
  1075. 'anchor' : '[[#\a]]\n' ,
  1076. # Image markup
  1077. 'img' : '\a' ,
  1078. #'imgAlignLeft' : '{{\a }}' ,
  1079. #'imgAlignRight' : '{{ \a}}' ,
  1080. #'imgAlignCenter' : '{{ \a }}' ,
  1081. # Table attributes
  1082. 'tableTitleRowOpen' : '||! ' ,
  1083. 'tableTitleRowClose' : '||' ,
  1084. 'tableTitleCellSep' : ' ||!' ,
  1085. 'tableRowOpen' : '||' ,
  1086. 'tableRowClose' : '||' ,
  1087. 'tableCellSep' : ' ||' ,
  1088. },
  1089. # http://en.wikipedia.org/wiki/Help:Editing
  1090. 'wiki': {
  1091. 'title1' : '== \a ==' ,
  1092. 'title2' : '=== \a ===' ,
  1093. 'title3' : '==== \a ====' ,
  1094. 'title4' : '===== \a =====' ,
  1095. 'title5' : '====== \a ======',
  1096. 'blockVerbOpen' : '<pre>' ,
  1097. 'blockVerbClose' : '</pre>' ,
  1098. 'blockQuoteOpen' : '<blockquote>' ,
  1099. 'blockQuoteClose' : '</blockquote>' ,
  1100. 'fontMonoOpen' : '<tt>' ,
  1101. 'fontMonoClose' : '</tt>' ,
  1102. 'fontBoldOpen' : "'''" ,
  1103. 'fontBoldClose' : "'''" ,
  1104. 'fontItalicOpen' : "''" ,
  1105. 'fontItalicClose' : "''" ,
  1106. 'fontUnderlineOpen' : '<u>' ,
  1107. 'fontUnderlineClose' : '</u>' ,
  1108. 'fontStrikeOpen' : '<s>' ,
  1109. 'fontStrikeClose' : '</s>' ,
  1110. #XXX Mixed lists not working: *#* list inside numlist inside list
  1111. 'listItemLine' : '*' ,
  1112. 'numlistItemLine' : '#' ,
  1113. 'deflistItem1Open' : '; ' ,
  1114. 'deflistItem2LinePrefix': ': ' ,
  1115. 'bar1' : '----' ,
  1116. 'url' : '[\a]' ,
  1117. 'urlMark' : '[\a \a]' ,
  1118. 'email' : 'mailto:\a' ,
  1119. 'emailMark' : '[mailto:\a \a]' ,
  1120. # [[Image:foo.png|right|Optional alt/caption text]] (right, left, center, none)
  1121. 'img' : '[[Image:\a~A~]]' ,
  1122. '_imgAlignLeft' : '|left' ,
  1123. '_imgAlignCenter' : '|center' ,
  1124. '_imgAlignRight' : '|right' ,
  1125. # {| border="1" cellspacing="0" cellpadding="4" align="center"
  1126. 'tableOpen' : '{|~A~~B~ cellpadding="4"',
  1127. 'tableClose' : '|}' ,
  1128. 'tableRowOpen' : '|-\n| ' ,
  1129. 'tableTitleRowOpen' : '|-\n! ' ,
  1130. 'tableCellSep' : ' || ' ,
  1131. 'tableTitleCellSep' : ' !! ' ,
  1132. '_tableBorder' : ' border="1"' ,
  1133. '_tableAlignCenter' : ' align="center"' ,
  1134. 'comment' : '<!-- \a -->' ,
  1135. 'TOC' : '__TOC__' ,
  1136. },
  1137. # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
  1138. # http://en.wikipedia.org/wiki/MagicPoint
  1139. 'mgp': {
  1140. 'paragraphOpen' : '%font "normal", size 5' ,
  1141. 'title1' : '%page\n\n\a\n' ,
  1142. 'title2' : '%page\n\n\a\n' ,
  1143. 'title3' : '%page\n\n\a\n' ,
  1144. 'title4' : '%page\n\n\a\n' ,
  1145. 'title5' : '%page\n\n\a\n' ,
  1146. 'blockVerbOpen' : '%font "mono"' ,
  1147. 'blockVerbClose' : '%font "normal"' ,
  1148. 'blockQuoteOpen' : '%prefix " "' ,
  1149. 'blockQuoteClose' : '%prefix " "' ,
  1150. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  1151. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  1152. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  1153. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  1154. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  1155. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  1156. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  1157. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  1158. 'listItemLine' : '\t' ,
  1159. 'numlistItemLine' : '\t' ,
  1160. 'numlistItemOpen' : '\a. ' ,
  1161. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  1162. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  1163. 'bar1' : '%bar "white" 5' ,
  1164. 'bar2' : '%pause' ,
  1165. 'url' : '\n%cont, fore "cyan"\n\a' +\
  1166. '\n%cont, fore "white"\n' ,
  1167. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  1168. '\n%cont, fore "white"\n' ,
  1169. 'email' : '\n%cont, fore "cyan"\n\a' +\
  1170. '\n%cont, fore "white"\n' ,
  1171. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  1172. '\n%cont, fore "white"\n' ,
  1173. 'img' : '~A~\n%newimage "\a"\n%left\n',
  1174. '_imgAlignLeft' : '\n%left' ,
  1175. '_imgAlignRight' : '\n%right' ,
  1176. '_imgAlignCenter' : '\n%center' ,
  1177. 'comment' : '%% \a' ,
  1178. 'pageBreak' : '%page\n\n\n' ,
  1179. 'EOD' : '%%EOD'
  1180. },
  1181. # man groff_man ; man 7 groff
  1182. 'man': {
  1183. 'paragraphOpen' : '.P' ,
  1184. 'title1' : '.SH \a' ,
  1185. 'title2' : '.SS \a' ,
  1186. 'title3' : '.SS \a' ,
  1187. 'title4' : '.SS \a' ,
  1188. 'title5' : '.SS \a' ,
  1189. 'blockVerbOpen' : '.nf' ,
  1190. 'blockVerbClose' : '.fi\n' ,
  1191. 'blockQuoteOpen' : '.RS' ,
  1192. 'blockQuoteClose' : '.RE' ,
  1193. 'fontBoldOpen' : '\\fB' ,
  1194. 'fontBoldClose' : '\\fR' ,
  1195. 'fontItalicOpen' : '\\fI' ,
  1196. 'fontItalicClose' : '\\fR' ,
  1197. 'listOpen' : '.RS' ,
  1198. 'listItemOpen' : '.IP \(bu 3\n',
  1199. 'listClose' : '.RE' ,
  1200. 'numlistOpen' : '.RS' ,
  1201. 'numlistItemOpen' : '.IP \a. 3\n',
  1202. 'numlistClose' : '.RE' ,
  1203. 'deflistItem1Open' : '.TP\n' ,
  1204. 'bar1' : '\n\n' ,
  1205. 'url' : '\a' ,
  1206. 'urlMark' : '\a (\a)',
  1207. 'email' : '\a' ,
  1208. 'emailMark' : '\a (\a)',
  1209. 'img' : '\a' ,
  1210. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  1211. 'tableClose' : '.TE' ,
  1212. 'tableRowOpen' : ' ' ,
  1213. 'tableCellSep' : '^' ,
  1214. '_tableAlignCenter' : 'center, ',
  1215. '_tableBorder' : 'allbox, ',
  1216. '_tableColAlignLeft' : 'l' ,
  1217. '_tableColAlignRight' : 'r' ,
  1218. '_tableColAlignCenter' : 'c' ,
  1219. 'comment' : '.\\" \a'
  1220. },
  1221. 'pm6': {
  1222. 'paragraphOpen' : '<@Normal:>' ,
  1223. 'title1' : '<@Title1:>\a',
  1224. 'title2' : '<@Title2:>\a',
  1225. 'title3' : '<@Title3:>\a',
  1226. 'title4' : '<@Title4:>\a',
  1227. 'title5' : '<@Title5:>\a',
  1228. 'blockVerbOpen' : '<@PreFormat:>' ,
  1229. 'blockQuoteLine' : '<@Quote:>' ,
  1230. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  1231. 'fontMonoClose' : '<SIZE$><FONT$>',
  1232. 'fontBoldOpen' : '<B>' ,
  1233. 'fontBoldClose' : '<P>' ,
  1234. 'fontItalicOpen' : '<I>' ,
  1235. 'fontItalicClose' : '<P>' ,
  1236. 'fontUnderlineOpen' : '<U>' ,
  1237. 'fontUnderlineClose' : '<P>' ,
  1238. 'listOpen' : '<@Bullet:>' ,
  1239. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  1240. 'numlistOpen' : '<@Bullet:>' ,
  1241. 'numlistItemOpen' : '\x95\t' ,
  1242. 'bar1' : '\a' ,
  1243. 'url' : '<U>\a<P>' , # underline
  1244. 'urlMark' : '\a <U>\a<P>' ,
  1245. 'email' : '\a' ,
  1246. 'emailMark' : '\a \a' ,
  1247. 'img' : '\a'
  1248. }
  1249. }
  1250. # Exceptions for --css-sugar
  1251. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1252. # Change just HTML because XHTML inherits it
  1253. htmltags = alltags['html']
  1254. # Table with no cellpadding
  1255. htmltags['tableOpen'] = htmltags['tableOpen'].replace(' CELLPADDING="4"', '')
  1256. # DIVs
  1257. htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
  1258. htmltags['tocClose'] = '</DIV>'
  1259. htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
  1260. htmltags['bodyClose']= '</DIV>'
  1261. # Make the HTML -> XHTML inheritance
  1262. xhtml = alltags['html'].copy()
  1263. for key in xhtml.keys(): xhtml[key] = xhtml[key].lower()
  1264. # Some like HTML tags as lowercase, some don't... (headers out)
  1265. if HTML_LOWER: alltags['html'] = xhtml.copy()
  1266. xhtml.update(alltags['xhtml'])
  1267. alltags['xhtml'] = xhtml.copy()
  1268. # Compose the target tags dictionary
  1269. tags = {}
  1270. target_tags = alltags[config['target']].copy()
  1271. for key in keys: tags[key] = '' # create empty keys
  1272. for key in target_tags.keys():
  1273. tags[key] = maskEscapeChar(target_tags[key]) # populate
  1274. # Map strong line to pagebreak
  1275. if rules['mapbar2pagebreak'] and tags['pageBreak']:
  1276. tags['bar2'] = tags['pageBreak']
  1277. # Map strong line to separator if not defined
  1278. if not tags['bar2'] and tags['bar1']:
  1279. tags['bar2'] = tags['bar1']
  1280. return tags
  1281. ##############################################################################
  1282. def getRules(config):
  1283. "Returns all the target-specific syntax rules"
  1284. ret = {}
  1285. allrules = [
  1286. # target rules (ON/OFF)
  1287. 'linkable', # target supports external links
  1288. 'tableable', # target supports tables
  1289. 'imglinkable', # target supports images as links
  1290. 'imgalignable', # target supports image alignment
  1291. 'imgasdefterm', # target supports image as definition term
  1292. 'autonumberlist', # target supports numbered lists natively
  1293. 'autonumbertitle', # target supports numbered titles natively
  1294. 'stylable', # target supports external style files
  1295. 'parainsidelist', # lists items supports paragraph
  1296. 'compactlist', # separate enclosing tags for compact lists
  1297. 'spacedlistitem', # lists support blank lines between items
  1298. 'listnotnested', # lists cannot be nested
  1299. 'quotenotnested', # quotes cannot be nested
  1300. 'verbblocknotescaped', # don't escape specials in verb block
  1301. 'verbblockfinalescape', # do final escapes in verb block
  1302. 'escapeurl', # escape special in link URL
  1303. 'labelbeforelink', # label comes before the link on the tag
  1304. 'onelinepara', # dump paragraph as a single long line
  1305. 'tabletitlerowinbold', # manually bold any cell on table titles
  1306. 'tablecellstrip', # strip extra spaces from each table cell
  1307. 'tablecellspannable', # the table cells can have span attribute
  1308. 'tablecellmulticol', # separate open+close tags for multicol cells
  1309. 'barinsidequote', # bars are allowed inside quote blocks
  1310. 'finalescapetitle', # perform final escapes on title lines
  1311. 'autotocnewpagebefore', # break page before automatic TOC
  1312. 'autotocnewpageafter', # break page after automatic TOC
  1313. 'autotocwithbars', # automatic TOC surrounded by bars
  1314. 'mapbar2pagebreak', # map the strong bar to a page break
  1315. 'titleblocks', # titles must be on open/close section blocks
  1316. # Target code beautify (ON/OFF)
  1317. 'indentverbblock', # add leading spaces to verb block lines
  1318. 'breaktablecell', # break lines after any table cell
  1319. 'breaktablelineopen', # break line after opening table line
  1320. 'notbreaklistopen', # don't break line after opening a new list
  1321. 'keepquoteindent', # don't remove the leading TABs on quotes
  1322. 'keeplistindent', # don't remove the leading spaces on lists
  1323. 'blankendautotoc', # append a blank line at the auto TOC end
  1324. 'tagnotindentable', # tags must be placed at the line begining
  1325. 'spacedlistitemopen', # append a space after the list item open tag
  1326. 'spacednumlistitemopen',# append a space after the numlist item open tag
  1327. 'deflisttextstrip', # strip the contents of the deflist text
  1328. 'blanksaroundpara', # put a blank line before and after paragraphs
  1329. 'blanksaroundverb', # put a blank line before and after verb blocks
  1330. 'blanksaroundquote', # put a blank line before and after quotes
  1331. 'blanksaroundlist', # put a blank line before and after lists
  1332. 'blanksaroundnumlist', # put a blank line before and after numlists
  1333. 'blanksarounddeflist', # put a blank line before and after deflists
  1334. 'blanksaroundtable', # put a blank line before and after tables
  1335. 'blanksaroundbar', # put a blank line before and after bars
  1336. 'blanksaroundtitle', # put a blank line before and after titles
  1337. 'blanksaroundnumtitle', # put a blank line before and after numtitles
  1338. # Value settings
  1339. 'listmaxdepth', # maximum depth for lists
  1340. 'quotemaxdepth', # maximum depth for quotes
  1341. 'tablecellaligntype', # type of table cell align: cell, column
  1342. ]
  1343. rules_bank = {
  1344. 'txt': {
  1345. 'indentverbblock':1,
  1346. 'spacedlistitem':1,
  1347. 'parainsidelist':1,
  1348. 'keeplistindent':1,
  1349. 'barinsidequote':1,
  1350. 'autotocwithbars':1,
  1351. 'blanksaroundpara':1,
  1352. 'blanksaroundverb':1,
  1353. 'blanksaroundquote':1,
  1354. 'blanksaroundlist':1,
  1355. 'blanksaroundnumlist':1,
  1356. 'blanksarounddeflist':1,
  1357. 'blanksaroundtable':1,
  1358. 'blanksaroundbar':1,
  1359. 'blanksaroundtitle':1,
  1360. 'blanksaroundnumtitle':1,
  1361. },
  1362. 'art': {
  1363. #TIP art inherits all TXT rules
  1364. },
  1365. 'html': {
  1366. 'indentverbblock':1,
  1367. 'linkable':1,
  1368. 'stylable':1,
  1369. 'escapeurl':1,
  1370. 'imglinkable':1,
  1371. 'imgalignable':1,
  1372. 'imgasdefterm':1,
  1373. 'autonumberlist':1,
  1374. 'spacedlistitem':1,
  1375. 'parainsidelist':1,
  1376. 'tableable':1,
  1377. 'tablecellstrip':1,
  1378. 'breaktablecell':1,
  1379. 'breaktablelineopen':1,
  1380. 'keeplistindent':1,
  1381. 'keepquoteindent':1,
  1382. 'barinsidequote':1,
  1383. 'autotocwithbars':1,
  1384. 'tablecellspannable':1,
  1385. 'tablecellaligntype':'cell',
  1386. # 'blanksaroundpara':1,
  1387. 'blanksaroundverb':1,
  1388. # 'blanksaroundquote':1,
  1389. 'blanksaroundlist':1,
  1390. 'blanksaroundnumlist':1,
  1391. 'blanksarounddeflist':1,
  1392. 'blanksaroundtable':1,
  1393. 'blanksaroundbar':1,
  1394. 'blanksaroundtitle':1,
  1395. 'blanksaroundnumtitle':1,
  1396. },
  1397. 'xhtml': {
  1398. #TIP xhtml inherits all HTML rules
  1399. },
  1400. 'sgml': {
  1401. 'linkable':1,
  1402. 'escapeurl':1,
  1403. 'autonumberlist':1,
  1404. 'spacedlistitem':1,
  1405. 'tableable':1,
  1406. 'tablecellstrip':1,
  1407. 'blankendautotoc':1,
  1408. 'quotenotnested':1,
  1409. 'keeplistindent':1,
  1410. 'keepquoteindent':1,
  1411. 'barinsidequote':1,
  1412. 'finalescapetitle':1,
  1413. 'tablecellaligntype':'column',
  1414. 'blanksaroundpara':1,
  1415. 'blanksaroundverb':1,
  1416. 'blanksaroundquote':1,
  1417. 'blanksaroundlist':1,
  1418. 'blanksaroundnumlist':1,
  1419. 'blanksarounddeflist':1,
  1420. 'blanksaroundtable':1,
  1421. 'blanksaroundbar':1,
  1422. 'blanksaroundtitle':1,
  1423. 'blanksaroundnumtitle':1,
  1424. },
  1425. 'dbk': {
  1426. 'linkable':1,
  1427. 'tableable':1,
  1428. 'imglinkable':1,
  1429. 'imgalignable':1,
  1430. 'imgasdefterm':1,
  1431. 'autonumberlist':1,
  1432. 'autonumbertitle':1,
  1433. 'parainsidelist':1,
  1434. 'spacedlistitem':1,
  1435. 'titleblocks':1,
  1436. },
  1437. 'mgp': {
  1438. 'tagnotindentable':1,
  1439. 'spacedlistitem':1,
  1440. 'imgalignable':1,
  1441. 'autotocnewpagebefore':1,
  1442. 'blanksaroundpara':1,
  1443. 'blanksaroundverb':1,
  1444. # 'blanksaroundquote':1,
  1445. 'blanksaroundlist':1,
  1446. 'blanksaroundnumlist':1,
  1447. 'blanksarounddeflist':1,
  1448. 'blanksaroundtable':1,
  1449. 'blanksaroundbar':1,
  1450. # 'blanksaroundtitle':1,
  1451. # 'blanksaroundnumtitle':1,
  1452. },
  1453. 'tex': {
  1454. 'stylable':1,
  1455. 'escapeurl':1,
  1456. 'autonumberlist':1,
  1457. 'autonumbertitle':1,
  1458. 'spacedlistitem':1,
  1459. 'compactlist':1,
  1460. 'parainsidelist':1,
  1461. 'tableable':1,
  1462. 'tablecellstrip':1,
  1463. 'tabletitlerowinbold':1,
  1464. 'verbblocknotescaped':1,
  1465. 'keeplistindent':1,
  1466. 'listmaxdepth':4, # deflist is 6
  1467. 'quotemaxdepth':6,
  1468. 'barinsidequote':1,
  1469. 'finalescapetitle':1,
  1470. 'autotocnewpageafter':1,
  1471. 'mapbar2pagebreak':1,
  1472. 'tablecellaligntype':'column',
  1473. 'tablecellmulticol':1,
  1474. 'blanksaroundpara':1,
  1475. 'blanksaroundverb':1,
  1476. # 'blanksaroundquote':1,
  1477. 'blanksaroundlist':1,
  1478. 'blanksaroundnumlist':1,
  1479. 'blanksarounddeflist':1,
  1480. 'blanksaroundtable':1,
  1481. 'blanksaroundbar':1,
  1482. 'blanksaroundtitle':1,
  1483. 'blanksaroundnumtitle':1,
  1484. },
  1485. 'lout': {
  1486. 'keepquoteindent':1,
  1487. 'deflisttextstrip':1,
  1488. 'escapeurl':1,
  1489. 'verbblocknotescaped':1,
  1490. 'imgalignable':1,
  1491. 'mapbar2pagebreak':1,
  1492. 'titleblocks':1,
  1493. 'autonumberlist':1,
  1494. 'parainsidelist':1,
  1495. 'blanksaroundpara':1,
  1496. 'blanksaroundverb':1,
  1497. # 'blanksaroundquote':1,
  1498. 'blanksaroundlist':1,
  1499. 'blanksaroundnumlist':1,
  1500. 'blanksarounddeflist':1,
  1501. 'blanksaroundtable':1,
  1502. 'blanksaroundbar':1,
  1503. 'blanksaroundtitle':1,
  1504. 'blanksaroundnumtitle':1,
  1505. },
  1506. 'moin': {
  1507. 'spacedlistitem':1,
  1508. 'linkable':1,
  1509. 'keeplistindent':1,
  1510. 'tableable':1,
  1511. 'barinsidequote':1,
  1512. 'tabletitlerowinbold':1,
  1513. 'tablecellstrip':1,
  1514. 'autotocwithbars':1,
  1515. 'tablecellaligntype':'cell',
  1516. 'deflisttextstrip':1,
  1517. 'blanksaroundpara':1,
  1518. 'blanksaroundverb':1,
  1519. # 'blanksaroundquote':1,
  1520. 'blanksaroundlist':1,
  1521. 'blanksaroundnumlist':1,
  1522. 'blanksarounddeflist':1,
  1523. 'blanksaroundtable':1,
  1524. # 'blanksaroundbar':1,
  1525. 'blanksaroundtitle':1,
  1526. 'blanksaroundnumtitle':1,
  1527. },
  1528. 'gwiki': {
  1529. 'spacedlistitem':1,
  1530. 'linkable':1,
  1531. 'keeplistindent':1,
  1532. 'tableable':1,
  1533. 'tabletitlerowinbold':1,
  1534. 'tablecellstrip':1,
  1535. 'autonumberlist':1,
  1536. 'blanksaroundpara':1,
  1537. 'blanksaroundverb':1,
  1538. # 'blanksaroundquote':1,
  1539. 'blanksaroundlist':1,
  1540. 'blanksaroundnumlist':1,
  1541. 'blanksarounddeflist':1,
  1542. 'blanksaroundtable':1,
  1543. # 'blanksaroundbar':1,
  1544. 'blanksaroundtitle':1,
  1545. 'blanksaroundnumtitle':1,
  1546. },
  1547. 'adoc': {
  1548. 'spacedlistitem':1,
  1549. 'linkable':1,
  1550. 'keeplistindent':1,
  1551. 'autonumberlist':1,
  1552. 'autonumbertitle':1,
  1553. 'listnotnested':1,
  1554. 'blanksaroundpara':1,
  1555. 'blanksaroundverb':1,
  1556. 'blanksaroundlist':1,
  1557. 'blanksaroundnumlist':1,
  1558. 'blanksarounddeflist':1,
  1559. 'blanksaroundtable':1,
  1560. 'blanksaroundtitle':1,
  1561. 'blanksaroundnumtitle':1,
  1562. },
  1563. 'doku': {
  1564. 'indentverbblock':1, # DokuWiki uses ' ' to mark verb blocks
  1565. 'spacedlistitem':1,
  1566. 'linkable':1,
  1567. 'keeplistindent':1,
  1568. 'tableable':1,
  1569. 'barinsidequote':1,
  1570. 'tablecellstrip':1,
  1571. 'autotocwithbars':1,
  1572. 'autonumberlist':1,
  1573. 'imgalignable':1,
  1574. 'tablecellaligntype':'cell',
  1575. 'blanksaroundpara':1,
  1576. 'blanksaroundverb':1,
  1577. # 'blanksaroundquote':1,
  1578. 'blanksaroundlist':1,
  1579. 'blanksaroundnumlist':1,
  1580. 'blanksarounddeflist':1,
  1581. 'blanksaroundtable':1,
  1582. 'blanksaroundbar':1,
  1583. 'blanksaroundtitle':1,
  1584. 'blanksaroundnumtitle':1,
  1585. },
  1586. 'pmw': {
  1587. 'indentverbblock':1,
  1588. 'spacedlistitem':1,
  1589. 'linkable':1,
  1590. 'labelbeforelink':1,
  1591. 'keeplistindent':1,
  1592. 'tableable':1,
  1593. 'barinsidequote':1,
  1594. 'tablecellstrip':1,
  1595. 'autotocwithbars':1,
  1596. 'autonumberlist':1,
  1597. 'imgalignable':1,
  1598. 'tabletitlerowinbold':1,
  1599. 'tablecellaligntype':'cell',
  1600. 'blanksaroundpara':1,
  1601. 'blanksaroundverb':1,
  1602. 'blanksaroundquote':1,
  1603. 'blanksaroundlist':1,
  1604. 'blanksaroundnumlist':1,
  1605. 'blanksarounddeflist':1,
  1606. 'blanksaroundtable':1,
  1607. 'blanksaroundbar':1,
  1608. 'blanksaroundtitle':1,
  1609. 'blanksaroundnumtitle':1,
  1610. },
  1611. 'wiki': {
  1612. 'linkable':1,
  1613. 'tableable':1,
  1614. 'tablecellstrip':1,
  1615. 'autotocwithbars':1,
  1616. 'spacedlistitemopen':1,
  1617. 'spacednumlistitemopen':1,
  1618. 'deflisttextstrip':1,
  1619. 'autonumberlist':1,
  1620. 'imgalignable':1,
  1621. 'blanksaroundpara':1,
  1622. 'blanksaroundverb':1,
  1623. # 'blanksaroundquote':1,
  1624. 'blanksaroundlist':1,
  1625. 'blanksaroundnumlist':1,
  1626. 'blanksarounddeflist':1,
  1627. 'blanksaroundtable':1,
  1628. 'blanksaroundbar':1,
  1629. 'blanksaroundtitle':1,
  1630. 'blanksaroundnumtitle':1,
  1631. },
  1632. 'man': {
  1633. 'spacedlistitem':1,
  1634. 'indentverbblock':1,
  1635. 'tagnotindentable':1,
  1636. 'tableable':1,
  1637. 'tablecellaligntype':'column',
  1638. 'tabletitlerowinbold':1,
  1639. 'tablecellstrip':1,
  1640. 'barinsidequote':1,
  1641. 'parainsidelist':0,
  1642. 'blanksaroundpara':1,
  1643. 'blanksaroundverb':1,
  1644. # 'blanksaroundquote':1,
  1645. 'blanksaroundlist':1,
  1646. 'blanksaroundnumlist':1,
  1647. 'blanksarounddeflist':1,
  1648. 'blanksaroundtable':1,
  1649. # 'blanksaroundbar':1,
  1650. 'blanksaroundtitle':1,
  1651. 'blanksaroundnumtitle':1,
  1652. },
  1653. 'pm6': {
  1654. 'keeplistindent':1,
  1655. 'verbblockfinalescape':1,
  1656. #TODO add support for these
  1657. # maybe set a JOINNEXT char and do it on addLineBreaks()
  1658. 'notbreaklistopen':1,
  1659. 'barinsidequote':1,
  1660. 'autotocwithbars':1,
  1661. 'onelinepara':1,
  1662. 'blanksaroundpara':1,
  1663. 'blanksaroundverb':1,
  1664. # 'blanksaroundquote':1,
  1665. 'blanksaroundlist':1,
  1666. 'blanksaroundnumlist':1,
  1667. 'blanksarounddeflist':1,
  1668. # 'blanksaroundtable':1,
  1669. # 'blanksaroundbar':1,
  1670. 'blanksaroundtitle':1,
  1671. 'blanksaroundnumtitle':1,
  1672. }
  1673. }
  1674. # Exceptions for --css-sugar
  1675. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1676. rules_bank['html']['indentverbblock'] = 0
  1677. rules_bank['html']['autotocwithbars'] = 0
  1678. # Get the target specific rules
  1679. if config['target'] == 'xhtml':
  1680. myrules = rules_bank['html'].copy() # inheritance
  1681. myrules.update(rules_bank['xhtml']) # get XHTML specific
  1682. elif config['target'] == 'art':
  1683. myrules = rules_bank['txt'].copy() # inheritance
  1684. else:
  1685. myrules = rules_bank[config['target']].copy()
  1686. # Populate return dictionary
  1687. for key in allrules: ret[key] = 0 # reset all
  1688. ret.update(myrules) # get rules
  1689. return ret
  1690. ##############################################################################
  1691. def getRegexes():
  1692. "Returns all the regexes used to find the t2t marks"
  1693. bank = {
  1694. 'blockVerbOpen':
  1695. re.compile(r'^```\s*$'),
  1696. 'blockVerbClose':
  1697. re.compile(r'^```\s*$'),
  1698. 'blockRawOpen':
  1699. re.compile(r'^"""\s*$'),
  1700. 'blockRawClose':
  1701. re.compile(r'^"""\s*$'),
  1702. 'blockTaggedOpen':
  1703. re.compile(r"^'''\s*$"),
  1704. 'blockTaggedClose':
  1705. re.compile(r"^'''\s*$"),
  1706. 'blockCommentOpen':
  1707. re.compile(r'^%%%\s*$'),
  1708. 'blockCommentClose':
  1709. re.compile(r'^%%%\s*$'),
  1710. 'quote':
  1711. re.compile(r'^\t+'),
  1712. '1lineVerb':
  1713. re.compile(r'^``` (?=.)'),
  1714. '1lineRaw':
  1715. re.compile(r'^""" (?=.)'),
  1716. '1lineTagged':
  1717. re.compile(r"^''' (?=.)"),
  1718. # mono, raw, bold, italic, underline:
  1719. # - marks must be glued with the contents, no boundary spaces
  1720. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  1721. 'fontMono':
  1722. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  1723. 'raw':
  1724. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  1725. 'tagged':
  1726. re.compile( r"''([^\s](|.*?[^\s])'*)''"),
  1727. 'fontBold':
  1728. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  1729. 'fontItalic':
  1730. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  1731. 'fontUnderline':
  1732. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  1733. 'fontStrike':
  1734. re.compile( r'--([^\s](|.*?[^\s])-*)--'),
  1735. 'list':
  1736. re.compile(r'^( *)(-) (?=[^ ])'),
  1737. 'numlist':
  1738. re.compile(r'^( *)(\+) (?=[^ ])'),
  1739. 'deflist':
  1740. re.compile(r'^( *)(:) (.*)$'),
  1741. 'listclose':
  1742. re.compile(r'^( *)([-+:])\s*$'),
  1743. 'bar':
  1744. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  1745. 'table':
  1746. re.compile(r'^ *\|\|? '),
  1747. 'blankline':
  1748. re.compile(r'^\s*$'),
  1749. 'comment':
  1750. re.compile(r'^%'),
  1751. # Auxiliary tag regexes
  1752. '_imgAlign' : re.compile(r'~A~', re.I),
  1753. '_tableAlign' : re.compile(r'~A~', re.I),
  1754. '_anchor' : re.compile(r'~A~', re.I),
  1755. '_tableBorder' : re.compile(r'~B~', re.I),
  1756. '_tableColAlign' : re.compile(r'~C~', re.I),
  1757. '_tableCellColSpan': re.compile(r'~S~', re.I),
  1758. '_tableCellAlign' : re.compile(r'~A~', re.I),
  1759. }
  1760. # Special char to place data on TAGs contents (\a == bell)
  1761. bank['x'] = re.compile('\a')
  1762. # %%macroname [ (formatting) ]
  1763. bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?' % (
  1764. '|'.join(MACROS.keys())), re.I)
  1765. # %%TOC special macro for TOC positioning
  1766. bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
  1767. # Almost complicated title regexes ;)
  1768. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
  1769. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  1770. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  1771. ### Complicated regexes begin here ;)
  1772. #
  1773. # Textual descriptions on --help's style: [...] is optional, | is OR
  1774. ### First, some auxiliary variables
  1775. #
  1776. # [image.EXT]
  1777. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1778. # Link things
  1779. # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
  1780. # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
  1781. # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
  1782. # Also works : scheme://user:pass@domain/path#anchor?query=foo
  1783. # TODO form: !'():
  1784. urlskel = {
  1785. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1786. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1787. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1788. 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
  1789. 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
  1790. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1791. 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
  1792. 'punct' : r'.,;:!?'
  1793. }
  1794. # username [ :password ] @
  1795. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1796. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1797. # [ #anchor | ?form=data ]
  1798. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]*)?'%(
  1799. urlskel['proto'],patt_url_login, urlskel['guess'],
  1800. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1801. # filename | [ filename ] #anchor
  1802. retxt_url_local = r'[%s]+|[%s]*(#[%s]*)'%(
  1803. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1804. # user@domain [ ?form=data ]
  1805. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1806. urlskel['login'],urlskel['form'])
  1807. # Saving for future use
  1808. bank['_urlskel'] = urlskel
  1809. ### And now the real regexes
  1810. #
  1811. bank['email'] = re.compile(patt_email,re.I)
  1812. # email | url
  1813. bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1814. # \[ label | imagetag url | email | filename \]
  1815. bank['linkmark'] = re.compile(
  1816. r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1817. patt_img, retxt_url, patt_email, retxt_url_local),
  1818. re.L+re.I)
  1819. # Image
  1820. bank['img'] = re.compile(patt_img, re.L+re.I)
  1821. # Special things
  1822. bank['special'] = re.compile(r'^%!\s*')
  1823. return bank
  1824. ### END OF regex nightmares
  1825. ################# functions for the Ascii Art backend ########################
  1826. def aa_line(char):
  1827. return char*72 + LB
  1828. def aa_box(txt):
  1829. len_txt = len(txt)
  1830. nspace = (72-len_txt-4)/2
  1831. line_box = " "*nspace + AA_CHARS['coin'] + AA_CHARS['line']*(len_txt+2) + AA_CHARS['coin'] + LB
  1832. # <----- nspace " " -----> "+" <----- len_txt+2 "-" -----> "+"
  1833. # +-------------------------------+
  1834. # | all theeeeeeeeeeeeeeeeee text |
  1835. # <----- nspace " " -----> "| " <--------- txt ---------> " |"
  1836. line_txt = " "*nspace + AA_CHARS['border'] + ' ' + txt + ' ' + AA_CHARS['border'] + LB
  1837. return line_box + line_txt + line_box
  1838. def aa_header(header_data):
  1839. header= aa_line(AA_CHARS['bar2'])+\
  1840. LB+\
  1841. LB
  1842. for h in 'HEADER1', 'HEADER2', 'HEADER3' :
  1843. if header_data[h]: header +=\
  1844. aa_box(header_data[h])+\
  1845. LB+\
  1846. LB
  1847. header+=aa_line(AA_CHARS['bar2'])
  1848. return header
  1849. ##############################################################################
  1850. class error(Exception):
  1851. pass
  1852. def echo(msg): # for quick debug
  1853. print '\033[32;1m%s\033[m'%msg
  1854. def Quit(msg=''):
  1855. if msg: print msg
  1856. sys.exit(0)
  1857. def Error(msg):
  1858. msg = _("%s: Error: ")%my_name + msg
  1859. raise error, msg
  1860. def getTraceback():
  1861. try:
  1862. from traceback import format_exception
  1863. etype, value, tb = sys.exc_info()
  1864. return ''.join(format_exception(etype, value, tb))
  1865. except: pass
  1866. def getUnknownErrorMessage():
  1867. msg = '%s\n%s (%s):\n\n%s'%(
  1868. _('Sorry! Txt2tags aborted by an unknown error.'),
  1869. _('Please send the following Error Traceback to the author'),
  1870. my_email, getTraceback())
  1871. return msg
  1872. def Message(msg,level):
  1873. if level <= VERBOSE and not QUIET:
  1874. prefix = '-'*5
  1875. print "%s %s"%(prefix*level, msg)
  1876. def Debug(msg,id=0,linenr=None):
  1877. "Show debug messages, categorized (colored or not)"
  1878. if QUIET or not DEBUG: return
  1879. if int(id) not in range(8): id = 0
  1880. # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
  1881. ids = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
  1882. colors_bgdark = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
  1883. colors_bglight = ['0' ,'1' ,'3' ,'6' ,'4' ,'5' ,'2' ,'0' ]
  1884. if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
  1885. if COLOR_DEBUG:
  1886. if BG_LIGHT: color = colors_bglight[id]
  1887. else : color = colors_bgdark[id]
  1888. msg = '\033[3%sm%s\033[m'%(color,msg)
  1889. print "++ %s: %s"%(ids[id],msg)
  1890. def Readfile(file, remove_linebreaks=0, ignore_error=0):
  1891. data = []
  1892. if file == '-':
  1893. try: data = sys.stdin.readlines()
  1894. except:
  1895. if not ignore_error:
  1896. Error(_('You must feed me with data on STDIN!'))
  1897. else:
  1898. try: f = open(file); data = f.readlines() ; f.close()
  1899. except:
  1900. if not ignore_error:
  1901. Error(_("Cannot read file:")+" %s"%file)
  1902. if remove_linebreaks:
  1903. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  1904. Message(_("File read (%d lines): %s")%(len(data),file),2)
  1905. return data
  1906. def Savefile(file, contents):
  1907. try: f = open(file, 'wb')
  1908. except: Error(_("Cannot open file for writing:")+" %s"%file)
  1909. if type(contents) == type([]): doit = f.writelines
  1910. else: doit = f.write
  1911. doit(contents) ; f.close()
  1912. def showdic(dic):
  1913. for k in dic.keys(): print "%15s : %s" % (k,dic[k])
  1914. def dotted_spaces(txt=''):
  1915. return txt.replace(' ', '.')
  1916. # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
  1917. def get_rc_path():
  1918. "Return the full path for the users' RC file"
  1919. # Try to get the path from an env var. if yes, we're done
  1920. user_defined = os.environ.get('T2TCONFIG')
  1921. if user_defined: return user_defined
  1922. # Env var not found, so perform automatic path composing
  1923. # Set default filename according system platform
  1924. rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
  1925. rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
  1926. # The file must be on the user directory, but where is this dir?
  1927. rc_dir_search = ['HOME', 'HOMEPATH']
  1928. for var in rc_dir_search:
  1929. rc_dir = os.environ.get(var)
  1930. if rc_dir: break
  1931. # rc dir found, now we must join dir+file to compose the full path
  1932. if rc_dir:
  1933. # Compose path and return it if the file exists
  1934. rc_path = os.path.join(rc_dir, rc_file)
  1935. # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
  1936. if sys.platform.startswith('win'):
  1937. rc_drive = os.environ.get('HOMEDRIVE')
  1938. rc_path = os.path.join(rc_drive,rc_path)
  1939. return rc_path
  1940. # Sorry, not found
  1941. return ''
  1942. ##############################################################################
  1943. class CommandLine:
  1944. """
  1945. Command Line class - Masters command line
  1946. This class checks and extract data from the provided command line.
  1947. The --long options and flags are taken from the global OPTIONS,
  1948. FLAGS and ACTIONS dictionaries. The short options are registered
  1949. here, and also their equivalence to the long ones.
  1950. _compose_short_opts() -> str
  1951. _compose_long_opts() -> list
  1952. Compose the valid short and long options list, on the
  1953. 'getopt' format.
  1954. parse() -> (opts, args)
  1955. Call getopt to check and parse the command line.
  1956. It expects to receive the command line as a list, and
  1957. without the program name (sys.argv[1:]).
  1958. get_raw_config() -> [RAW config]
  1959. Scans command line and convert the data to the RAW config
  1960. format. See ConfigMaster class to the RAW format description.
  1961. Optional 'ignore' and 'filter' arguments are used to filter
  1962. in or out specified keys.
  1963. compose_cmdline(dict) -> [Command line]
  1964. Compose a command line list from an already parsed config
  1965. dictionary, generated from RAW by ConfigMaster(). Use
  1966. this to compose an optimal command line for a group of
  1967. options.
  1968. The get_raw_config() calls parse(), so the tipical use of this
  1969. class is:
  1970. raw = CommandLine().get_raw_config(sys.argv[1:])
  1971. """
  1972. def __init__(self):
  1973. self.all_options = OPTIONS.keys()
  1974. self.all_flags = FLAGS.keys()
  1975. self.all_actions = ACTIONS.keys()
  1976. # short:long options equivalence
  1977. self.short_long = {
  1978. 'a':'ascii-art',
  1979. 'C':'config-file',
  1980. 'h':'help',
  1981. 'H':'no-headers',
  1982. 'i':'infile',
  1983. 'n':'enum-title',
  1984. 'o':'outfile',
  1985. 'q':'quiet',
  1986. 't':'target',
  1987. 'v':'verbose',
  1988. 'V':'version',
  1989. }
  1990. # Compose valid short and long options data for getopt
  1991. self.short_opts = self._compose_short_opts()
  1992. self.long_opts = self._compose_long_opts()
  1993. def _compose_short_opts(self):
  1994. "Returns a string like 'hVt:o' with all short options/flags"
  1995. ret = []
  1996. for opt in self.short_long.keys():
  1997. long = self.short_long[opt]
  1998. if long in self.all_options: # is flag or option?
  1999. opt = opt+':' # option: have param
  2000. ret.append(opt)
  2001. #Debug('Valid SHORT options: %s'%ret)
  2002. return ''.join(ret)
  2003. def _compose_long_opts(self):
  2004. "Returns a list with all the valid long options/flags"
  2005. ret = map(lambda x:x+'=', self.all_options) # add =
  2006. ret.extend(self.all_flags) # flag ON
  2007. ret.extend(self.all_actions) # acts
  2008. ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
  2009. ret.extend(['no-style','no-encoding']) # turn OFF
  2010. ret.extend(['no-outfile','no-infile']) # turn OFF
  2011. ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF
  2012. #Debug('Valid LONG options: %s'%ret)
  2013. return ret
  2014. def _tokenize(self, cmd_string=''):
  2015. "Convert a command line string to a list"
  2016. #TODO protect quotes contents -- Don't use it, pass cmdline as list
  2017. return cmd_string.split()
  2018. def parse(self, cmdline=[]):
  2019. "Check/Parse a command line list TIP: no program name!"
  2020. # Get the valid options
  2021. short, long = self.short_opts, self.long_opts
  2022. # Parse it!
  2023. try:
  2024. opts, args = getopt.getopt(cmdline, short, long)
  2025. except getopt.error, errmsg:
  2026. Error(_("%s (try --help)")%errmsg)
  2027. return (opts, args)
  2028. def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
  2029. "Returns the options/arguments found as RAW config"
  2030. if not cmdline: return []
  2031. ret = []
  2032. # We need lists, not strings
  2033. if type(cmdline) in (type(''), type(u'')):
  2034. cmdline = self._tokenize(cmdline)
  2035. opts, args = self.parse(cmdline[:])
  2036. # Parse all options
  2037. for name,value in opts:
  2038. # Remove leading - and --
  2039. name = re.sub('^--?', '', name)
  2040. # Alias to old misspelled 'suGGar'
  2041. if name == 'css-suggar': name = 'css-sugar'
  2042. elif name == 'no-css-suggar': name = 'no-css-sugar'
  2043. # Translate short opt to long
  2044. if len(name) == 1: name = self.short_long.get(name)
  2045. # Outfile exception: path relative to PWD
  2046. if name == 'outfile' and relative \
  2047. and value not in [STDOUT, MODULEOUT]:
  2048. value = os.path.abspath(value)
  2049. # config-file inclusion, path relative to PWD
  2050. if name == 'config-file':
  2051. configs = ConfigLines().include_config_file(value)
  2052. # Remove the 'target' item of all configs
  2053. configs = map(lambda c: [c[1],c[2]], configs)
  2054. ret.extend(configs)
  2055. continue
  2056. # Save it
  2057. ret.append([name, value])
  2058. # Get infile, if any
  2059. while args:
  2060. infile = args.pop(0)
  2061. ret.append(['infile', infile])
  2062. # Apply 'ignore' and 'filter' rules (filter is stronger)
  2063. temp = ret[:] ; ret = []
  2064. for name,value in temp:
  2065. if (not filter and not ignore) or \
  2066. (filter and name in filter) or \
  2067. (ignore and name not in ignore):
  2068. ret.append( ['all', name, value] )
  2069. # Add the original command line string as 'realcmdline'
  2070. ret.append( ['all', 'realcmdline', cmdline] )
  2071. return ret
  2072. def compose_cmdline(self, conf={}, no_check=0):
  2073. "compose a full (and diet) command line from CONF dict"
  2074. if not conf: return []
  2075. args = []
  2076. dft_options = OPTIONS.copy()
  2077. cfg = conf.copy()
  2078. valid_opts = self.all_options + self.all_flags
  2079. use_short = {'no-headers':'H', 'enum-title':'n'}
  2080. # Remove useless options
  2081. if not no_check and cfg.get('toc-only'):
  2082. if cfg.has_key('no-headers'):
  2083. del cfg['no-headers']
  2084. if cfg.has_key('outfile'):
  2085. del cfg['outfile'] # defaults to STDOUT
  2086. if cfg.get('target') == 'txt':
  2087. del cfg['target'] # already default
  2088. args.append('--toc-only') # must be the first
  2089. del cfg['toc-only']
  2090. # Add target type
  2091. if cfg.has_key('target'):
  2092. args.append('-t '+cfg['target'])
  2093. del cfg['target']
  2094. # Add other options
  2095. for key in cfg.keys():
  2096. if key not in valid_opts: continue # may be a %!setting
  2097. if key == 'outfile' or key == 'infile': continue # later
  2098. val = cfg[key]
  2099. if not val: continue
  2100. # Default values are useless on cmdline
  2101. if val == dft_options.get(key): continue
  2102. # -short format
  2103. if key in use_short.keys():
  2104. args.append('-'+use_short[key])
  2105. continue
  2106. # --long format
  2107. if key in self.all_flags: # add --option
  2108. args.append('--'+key)
  2109. else: # add --option=value
  2110. args.append('--%s=%s'%(key,val))
  2111. # The outfile using -o
  2112. if cfg.has_key('outfile') and \
  2113. cfg['outfile'] != dft_options.get('outfile'):
  2114. args.append('-o '+cfg['outfile'])
  2115. # Place input file(s) always at the end
  2116. if cfg.has_key('infile'):
  2117. args.append(' '.join(cfg['infile']))
  2118. # Return as a nice list
  2119. Debug("Diet command line: %s"%' '.join(args), 1)
  2120. return args
  2121. ##############################################################################
  2122. class SourceDocument:
  2123. """
  2124. SourceDocument class - scan document structure, extract data
  2125. It knows about full files. It reads a file and identify all
  2126. the areas begining (Head,Conf,Body). With this info it can
  2127. extract each area contents.
  2128. Note: the original line break is removed.
  2129. DATA:
  2130. self.arearef - Save Head, Conf, Body init line number
  2131. self.areas - Store the area names which are not empty
  2132. self.buffer - The full file contents (with NO \\r, \\n)
  2133. METHODS:
  2134. get() - Access the contents of an Area. Example:
  2135. config = SourceDocument(file).get('conf')
  2136. split() - Get all the document Areas at once. Example:
  2137. head, conf, body = SourceDocument(file).split()
  2138. RULES:
  2139. * The document parts are sequential: Head, Conf and Body.
  2140. * One ends when the next begins.
  2141. * The Conf Area is optional, so a document can have just
  2142. Head and Body Areas.
  2143. These are the Areas limits:
  2144. - Head Area: the first three lines
  2145. - Body Area: from the first valid text line to the end
  2146. - Conf Area: the comments between Head and Body Areas
  2147. Exception: If the first line is blank, this means no
  2148. header info, so the Head Area is just the first line.
  2149. """
  2150. def __init__(self, filename='', contents=[]):
  2151. self.areas = ['head','conf','body']
  2152. self.arearef = []
  2153. self.areas_fancy = ''
  2154. self.filename = filename
  2155. self.buffer = []
  2156. if filename:
  2157. self.scan_file(filename)
  2158. elif contents:
  2159. self.scan(contents)
  2160. def split(self):
  2161. "Returns all document parts, splitted into lists."
  2162. return self.get('head'), self.get('conf'), self.get('body')
  2163. def get(self, areaname):
  2164. "Returns head|conf|body contents from self.buffer"
  2165. # Sanity
  2166. if areaname not in self.areas: return []
  2167. if not self.buffer : return []
  2168. # Go get it
  2169. bufini = 1
  2170. bufend = len(self.buffer)
  2171. if areaname == 'head':
  2172. ini = bufini
  2173. end = self.arearef[1] or self.arearef[2] or bufend
  2174. elif areaname == 'conf':
  2175. ini = self.arearef[1]
  2176. end = self.arearef[2] or bufend
  2177. elif areaname == 'body':
  2178. ini = self.arearef[2]
  2179. end = bufend
  2180. else:
  2181. Error("Unknown Area name '%s'"%areaname)
  2182. lines = self.buffer[ini:end]
  2183. # Make sure head will always have 3 lines
  2184. while areaname == 'head' and len(lines) < 3:
  2185. lines.append('')
  2186. return lines
  2187. def scan_file(self, filename):
  2188. Debug("source file: %s"%filename)
  2189. Message(_("Loading source document"),1)
  2190. buf = Readfile(filename, remove_linebreaks=1)
  2191. self.scan(buf)
  2192. def scan(self, lines):
  2193. "Run through source file and identify head/conf/body areas"
  2194. buf = lines
  2195. if len(buf) == 0:
  2196. Error(_('The input file is empty: %s')%self.filename)
  2197. cfg_parser = ConfigLines().parse_line
  2198. buf.insert(0, '') # text start at pos 1
  2199. ref = [1,4,0]
  2200. if not buf[1].strip(): # no header
  2201. ref[0] = 0 ; ref[1] = 2
  2202. rgx = getRegexes()
  2203. on_comment_block = 0
  2204. for i in xrange(ref[1],len(buf)): # find body init:
  2205. # Handle comment blocks inside config area
  2206. if not on_comment_block \
  2207. and rgx['blockCommentOpen'].search(buf[i]):
  2208. on_comment_block = 1
  2209. continue
  2210. if on_comment_block \
  2211. and rgx['blockCommentOpen'].search(buf[i]):
  2212. on_comment_block = 0
  2213. continue
  2214. if on_comment_block: continue
  2215. if buf[i].strip() and ( # ... not blank and
  2216. buf[i][0] != '%' or # ... not comment or
  2217. rgx['macros'].match(buf[i]) or # ... %%macro
  2218. rgx['toc'].match(buf[i]) or # ... %%toc
  2219. cfg_parser(buf[i],'include')[1]): # ... %!include
  2220. ref[2] = i ; break
  2221. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  2222. for i in 0,1,2: # del !existent
  2223. if ref[i] >= len(buf): ref[i] = 0 # title-only
  2224. if not ref[i]: self.areas[i] = ''
  2225. Debug('Head,Conf,Body start line: %s'%ref)
  2226. self.arearef = ref # save results
  2227. self.buffer = buf
  2228. # Fancyness sample: head conf body (1 4 8)
  2229. self.areas_fancy = "%s (%s)"%(
  2230. ' '.join(self.areas),
  2231. ' '.join(map(str, map(lambda x:x or '', ref))))
  2232. Message(_("Areas found: %s")%self.areas_fancy, 2)
  2233. def get_raw_config(self):
  2234. "Handy method to get the CONF area RAW config (if any)"
  2235. if not self.areas.count('conf'): return []
  2236. Message(_("Scanning source document CONF area"),1)
  2237. raw = ConfigLines(
  2238. file=self.filename, lines=self.get('conf'),
  2239. first_line=self.arearef[1]).get_raw_config()
  2240. Debug("document raw config: %s"%raw, 1)
  2241. return raw
  2242. ##############################################################################
  2243. class ConfigMaster:
  2244. """
  2245. ConfigMaster class - the configuration wizard
  2246. This class is the configuration master. It knows how to handle
  2247. the RAW and PARSED config format. It also performs the sanity
  2248. checking for a given configuration.
  2249. DATA:
  2250. self.raw - Stores the config on the RAW format
  2251. self.parsed - Stores the config on the PARSED format
  2252. self.defaults - Stores the default values for all keys
  2253. self.off - Stores the OFF values for all keys
  2254. self.multi - List of keys which can have multiple values
  2255. self.numeric - List of keys which value must be a number
  2256. self.incremental - List of keys which are incremental
  2257. RAW FORMAT:
  2258. The RAW format is a list of lists, being each mother list item
  2259. a full configuration entry. Any entry is a 3 item list, on
  2260. the following format: [ TARGET, KEY, VALUE ]
  2261. Being a list, the order is preserved, so it's easy to use
  2262. different kinds of configs, as CONF area and command line,
  2263. respecting the precedence.
  2264. The special target 'all' is used when no specific target was
  2265. defined on the original config.
  2266. PARSED FORMAT:
  2267. The PARSED format is a dictionary, with all the 'key : value'
  2268. found by reading the RAW config. The self.target contents
  2269. matters, so this dictionary only contains the target's
  2270. config. The configs of other targets are ignored.
  2271. The CommandLine and ConfigLines classes have the get_raw_config()
  2272. method which convert the configuration found to the RAW format.
  2273. Just feed it to parse() and get a brand-new ready-to-use config
  2274. dictionary. Example:
  2275. >>> raw = CommandLine().get_raw_config(['-n', '-H'])
  2276. >>> print raw
  2277. [['all', 'enum-title', ''], ['all', 'no-headers', '']]
  2278. >>> parsed = ConfigMaster(raw).parse()
  2279. >>> print parsed
  2280. {'enum-title': 1, 'headers': 0}
  2281. """
  2282. def __init__(self, raw=[], target=''):
  2283. self.raw = raw
  2284. self.target = target
  2285. self.parsed = {}
  2286. self.dft_options = OPTIONS.copy()
  2287. self.dft_flags = FLAGS.copy()
  2288. self.dft_actions = ACTIONS.copy()
  2289. self.dft_settings = SETTINGS.copy()
  2290. self.defaults = self._get_defaults()
  2291. self.off = self._get_off()
  2292. self.incremental = ['verbose']
  2293. self.numeric = ['toc-level','split']
  2294. self.multi = ['infile', 'preproc', 'postproc', 'options', 'style']
  2295. def _get_defaults(self):
  2296. "Get the default values for all config/options/flags"
  2297. empty = {}
  2298. for kw in CONFIG_KEYWORDS: empty[kw] = ''
  2299. empty.update(self.dft_options)
  2300. empty.update(self.dft_flags)
  2301. empty.update(self.dft_actions)
  2302. empty.update(self.dft_settings)
  2303. empty['realcmdline'] = '' # internal use only
  2304. empty['sourcefile'] = '' # internal use only
  2305. return empty
  2306. def _get_off(self):
  2307. "Turns OFF all the config/options/flags"
  2308. off = {}
  2309. for key in self.defaults.keys():
  2310. kind = type(self.defaults[key])
  2311. if kind == type(9):
  2312. off[key] = 0
  2313. elif kind == type('') or kind == type(u''):
  2314. off[key] = ''
  2315. elif kind == type([]):
  2316. off[key] = []
  2317. else:
  2318. Error('ConfigMaster: %s: Unknown type'+key)
  2319. return off
  2320. def _check_target(self):
  2321. "Checks if the target is already defined. If not, do it"
  2322. if not self.target:
  2323. self.target = self.find_value('target')
  2324. def get_target_raw(self):
  2325. "Returns the raw config for self.target or 'all'"
  2326. ret = []
  2327. self._check_target()
  2328. for entry in self.raw:
  2329. if entry[0] == self.target or entry[0] == 'all':
  2330. ret.append(entry)
  2331. return ret
  2332. def add(self, key, val):
  2333. "Adds the key:value pair to the config dictionary (if needed)"
  2334. # %!options
  2335. if key == 'options':
  2336. ignoreme = self.dft_actions.keys() + ['target']
  2337. ignoreme.remove('dump-config')
  2338. ignoreme.remove('dump-source')
  2339. raw_opts = CommandLine().get_raw_config(
  2340. val, ignore=ignoreme)
  2341. for target, key, val in raw_opts:
  2342. self.add(key, val)
  2343. return
  2344. # The no- prefix turns OFF this key
  2345. if key.startswith('no-'):
  2346. key = key[3:] # remove prefix
  2347. val = self.off.get(key) # turn key OFF
  2348. # Is this key valid?
  2349. if key not in self.defaults.keys():
  2350. Debug('Bogus Config %s:%s'%(key,val),1)
  2351. return
  2352. # Is this value the default one?
  2353. if val == self.defaults.get(key):
  2354. # If default value, remove previous key:val
  2355. if self.parsed.has_key(key):
  2356. del self.parsed[key]
  2357. # Nothing more to do
  2358. return
  2359. # Flags ON comes empty. we'll add the 1 value now
  2360. if val == '' and (
  2361. key in self.dft_flags.keys() or
  2362. key in self.dft_actions.keys()):
  2363. val = 1
  2364. # Multi value or single?
  2365. if key in self.multi:
  2366. # First one? start new list
  2367. if not self.parsed.has_key(key):
  2368. self.parsed[key] = []
  2369. self.parsed[key].append(val)
  2370. # Incremental value? so let's add it
  2371. elif key in self.incremental:
  2372. self.parsed[key] = (self.parsed.get(key) or 0) + val
  2373. else:
  2374. self.parsed[key] = val
  2375. fancykey = dotted_spaces("%12s"%key)
  2376. Message(_("Added config %s : %s")%(fancykey,val),3)
  2377. def get_outfile_name(self, config={}):
  2378. "Dirname is the same for {in,out}file"
  2379. infile, outfile = config['sourcefile'], config['outfile']
  2380. if outfile and outfile not in (STDOUT, MODULEOUT) \
  2381. and not os.path.isabs(outfile):
  2382. outfile = os.path.join(os.path.dirname(infile), outfile)
  2383. if infile == STDIN and not outfile: outfile = STDOUT
  2384. if infile == MODULEIN and not outfile: outfile = MODULEOUT
  2385. if not outfile and (infile and config.get('target')):
  2386. basename = re.sub('\.(txt|t2t)$','',infile)
  2387. outfile = "%s.%s"%(basename, config['target'])
  2388. Debug(" infile: '%s'"%infile , 1)
  2389. Debug("outfile: '%s'"%outfile, 1)
  2390. return outfile
  2391. def sanity(self, config, gui=0):
  2392. "Basic config sanity checking"
  2393. if not config: return {}
  2394. target = config.get('target')
  2395. # Some actions don't require target specification
  2396. if not target:
  2397. for action in NO_TARGET:
  2398. if config.get(action):
  2399. target = 'txt'
  2400. break
  2401. # On GUI, some checking are skipped
  2402. if not gui:
  2403. # We *need* a target
  2404. if not target:
  2405. Error(_('No target specified (try --help)') + '\n\n' +
  2406. _('Maybe trying to convert an old v1.x file?'))
  2407. # And of course, an infile also
  2408. if not config.get('infile'):
  2409. Error(_('Missing input file (try --help)'))
  2410. # Is the target valid?
  2411. if not TARGETS.count(target):
  2412. Error(_("Invalid target '%s' (try --help)") % target)
  2413. # Ensure all keys are present
  2414. empty = self.defaults.copy() ; empty.update(config)
  2415. config = empty.copy()
  2416. # Check integers options
  2417. for key in config.keys():
  2418. if key in self.numeric:
  2419. try: config[key] = int(config[key])
  2420. except: Error(_('--%s value must be a number') % key)
  2421. # Check split level value
  2422. if config['split'] not in (0,1,2):
  2423. Error(_('Option --split must be 0, 1 or 2'))
  2424. # --toc-only is stronger than others
  2425. if config['toc-only']:
  2426. config['headers'] = 0
  2427. config['toc'] = 0
  2428. config['split'] = 0
  2429. config['gui'] = 0
  2430. config['outfile'] = config['outfile'] or STDOUT
  2431. # Splitting is disable for now (future: HTML only, no STDOUT)
  2432. config['split'] = 0
  2433. # Restore target
  2434. config['target'] = target
  2435. # Set output file name
  2436. config['outfile'] = self.get_outfile_name(config)
  2437. # Checking suicide
  2438. if config['sourcefile'] == config['outfile'] and \
  2439. config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
  2440. Error(_("Input and Output files are the same: %s") % config['outfile'])
  2441. return config
  2442. def parse(self):
  2443. "Returns the parsed config for the current target"
  2444. raw = self.get_target_raw()
  2445. for target, key, value in raw:
  2446. self.add(key, value)
  2447. Message(_("Added the following keys: %s") % ', '.join(self.parsed.keys()), 2)
  2448. return self.parsed.copy()
  2449. def find_value(self, key='', target=''):
  2450. "Scans ALL raw config to find the desired key"
  2451. ret = []
  2452. # Scan and save all values found
  2453. for targ, k, val in self.raw:
  2454. if k == key and (targ == target or targ == 'all'):
  2455. ret.append(val)
  2456. if not ret: return ''
  2457. # If not multi value, return only the last found
  2458. if key in self.multi: return ret
  2459. else : return ret[-1]
  2460. ########################################################################
  2461. class ConfigLines:
  2462. """
  2463. ConfigLines class - the config file data extractor
  2464. This class reads and parse the config lines on the %!key:val
  2465. format, converting it to RAW config. It deals with user
  2466. config file (RC file), source document CONF area and
  2467. %!includeconf directives.
  2468. Call it passing a file name or feed the desired config lines.
  2469. Then just call the get_raw_config() method and wait to
  2470. receive the full config data on the RAW format. This method
  2471. also follows the possible %!includeconf directives found on
  2472. the config lines. Example:
  2473. raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
  2474. The parse_line() method is also useful to be used alone,
  2475. to identify and tokenize a single config line. For example,
  2476. to get the %!include command components, on the source
  2477. document BODY:
  2478. target, key, value = ConfigLines().parse_line(body_line)
  2479. """
  2480. def __init__(self, file='', lines=[], first_line=1):
  2481. self.file = file or 'NOFILE'
  2482. self.lines = lines
  2483. self.first_line = first_line
  2484. def load_lines(self):
  2485. "Make sure we've loaded the file contents into buffer"
  2486. if not self.lines and not self.file:
  2487. Error("ConfigLines: No file or lines provided")
  2488. if not self.lines:
  2489. self.lines = self.read_config_file(self.file)
  2490. def read_config_file(self, filename=''):
  2491. "Read a Config File contents, aborting on invalid line"
  2492. if not filename: return []
  2493. errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
  2494. lines = Readfile(filename, remove_linebreaks=1)
  2495. # Sanity: try to find invalid config lines
  2496. for i in xrange(len(lines)):
  2497. line = lines[i].rstrip()
  2498. if not line: continue # empty
  2499. if line[0] != '%': Error(errormsg%(filename,i+1,line))
  2500. return lines
  2501. def include_config_file(self, file=''):
  2502. "Perform the %!includeconf action, returning RAW config"
  2503. if not file: return []
  2504. # Current dir relative to the current file (self.file)
  2505. current_dir = os.path.dirname(self.file)
  2506. file = os.path.join(current_dir, file)
  2507. # Read and parse included config file contents
  2508. lines = self.read_config_file(file)
  2509. return ConfigLines(file=file, lines=lines).get_raw_config()
  2510. def get_raw_config(self):
  2511. "Scan buffer and extract all config as RAW (including includes)"
  2512. ret = []
  2513. self.load_lines()
  2514. first = self.first_line
  2515. for i in xrange(len(self.lines)):
  2516. line = self.lines[i]
  2517. Message(_("Processing line %03d: %s")%(first+i,line),2)
  2518. target, key, val = self.parse_line(line)
  2519. if not key: continue # no config on this line
  2520. if key == 'includeconf':
  2521. err = _('A file cannot include itself (loop!)')
  2522. if val == self.file:
  2523. Error("%s: %%!includeconf: %s" % (err, self.file))
  2524. more_raw = self.include_config_file(val)
  2525. ret.extend(more_raw)
  2526. Message(_("Finished Config file inclusion: %s") % val, 2)
  2527. else:
  2528. ret.append([target, key, val])
  2529. Message(_("Added %s")%key,3)
  2530. return ret
  2531. def parse_line(self, line='', keyname='', target=''):
  2532. "Detects %!key:val config lines and extract data from it"
  2533. empty = ['', '', '']
  2534. if not line: return empty
  2535. no_target = ['target', 'includeconf']
  2536. re_name = keyname or '[a-z]+'
  2537. re_target = target or '[a-z]*'
  2538. # XXX TODO <value>\S.+? requires TWO chars, breaks %!include:a
  2539. cfgregex = re.compile("""
  2540. ^%%!\s* # leading id with opt spaces
  2541. (?P<name>%s)\s* # config name
  2542. (\((?P<target>%s)\))? # optional target spec inside ()
  2543. \s*:\s* # key:value delimiter with opt spaces
  2544. (?P<value>\S.+?) # config value
  2545. \s*$ # rstrip() spaces and hit EOL
  2546. """%(re_name, re_target), re.I+re.VERBOSE)
  2547. prepostregex = re.compile("""
  2548. # ---[ PATTERN ]---
  2549. ^( "([^"]*)" # "double quoted" or
  2550. | '([^']*)' # 'single quoted' or
  2551. | ([^\s]+) # single_word
  2552. )
  2553. \s+ # separated by spaces
  2554. # ---[ REPLACE ]---
  2555. ( "([^"]*)" # "double quoted" or
  2556. | '([^']*)' # 'single quoted' or
  2557. | (.*) # anything
  2558. )
  2559. \s*$
  2560. """, re.VERBOSE)
  2561. guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
  2562. match = cfgregex.match(line)
  2563. if not match: return empty
  2564. name = (match.group('name') or '').lower()
  2565. target = (match.group('target') or 'all').lower()
  2566. value = match.group('value')
  2567. # NO target keywords: force all targets
  2568. if name in no_target: target = 'all'
  2569. # Special config for GUI colors
  2570. if name == 'guicolors':
  2571. valmatch = guicolors.search(value)
  2572. if not valmatch: return empty
  2573. value = re.split('\s+', value)
  2574. # Special config with two quoted values (%!preproc: "foo" 'bar')
  2575. if name == 'preproc' or name == 'postproc':
  2576. valmatch = prepostregex.search(value)
  2577. if not valmatch: return empty
  2578. getval = valmatch.group
  2579. patt = getval(2) or getval(3) or getval(4) or ''
  2580. repl = getval(6) or getval(7) or getval(8) or ''
  2581. value = (patt, repl)
  2582. return [target, name, value]
  2583. ##############################################################################
  2584. class MaskMaster:
  2585. "(Un)Protect important structures from escaping and formatting"
  2586. def __init__(self):
  2587. self.linkmask = 'vvvLINKvvv'
  2588. self.monomask = 'vvvMONOvvv'
  2589. self.macromask = 'vvvMACROvvv'
  2590. self.rawmask = 'vvvRAWvvv'
  2591. self.taggedmask= 'vvvTAGGEDvvv'
  2592. self.tocmask = 'vvvTOCvvv'
  2593. self.macroman = MacroMaster()
  2594. self.reset()
  2595. def reset(self):
  2596. self.linkbank = []
  2597. self.monobank = []
  2598. self.macrobank = []
  2599. self.rawbank = []
  2600. self.taggedbank = []
  2601. def mask(self, line=''):
  2602. global AUTOTOC
  2603. # The verbatim, raw and tagged inline marks are mutually exclusive.
  2604. # This means that one can't appear inside the other.
  2605. # If found, the inner marks must be ignored.
  2606. # Example: ``foo ""bar"" ''baz''``
  2607. # In HTML: <code>foo ""bar"" ''baz''</code>
  2608. #
  2609. # The trick here is to protect the mark who appears first on the line.
  2610. # The three regexes are tried and the one with the lowest index wins.
  2611. # If none is found (else), we get out of the loop.
  2612. #
  2613. while True:
  2614. # Try to match the line for the three marks
  2615. # Note: 'z' > 999999
  2616. #
  2617. t = r = v = 'z'
  2618. try: t = regex['tagged'].search(line).start()
  2619. except: pass
  2620. try: r = regex['raw'].search(line).start()
  2621. except: pass
  2622. try: v = regex['fontMono'].search(line).start()
  2623. except: pass
  2624. # Protect tagged text
  2625. if t >= 0 and t < r and t < v:
  2626. txt = regex['tagged'].search(line).group(1)
  2627. self.taggedbank.append(txt)
  2628. line = regex['tagged'].sub(self.taggedmask,line,1)
  2629. # Protect raw text
  2630. elif r >= 0 and r < t and r < v:
  2631. txt = regex['raw'].search(line).group(1)
  2632. txt = doEscape(TARGET,txt)
  2633. self.rawbank.append(txt)
  2634. line = regex['raw'].sub(self.rawmask,line,1)
  2635. # Protect verbatim text
  2636. elif v >= 0 and v < t and v < r:
  2637. txt = regex['fontMono'].search(line).group(1)
  2638. txt = doEscape(TARGET,txt)
  2639. self.monobank.append(txt)
  2640. line = regex['fontMono'].sub(self.monomask,line,1)
  2641. else:
  2642. break
  2643. # Protect macros
  2644. while regex['macros'].search(line):
  2645. txt = regex['macros'].search(line).group()
  2646. self.macrobank.append(txt)
  2647. line = regex['macros'].sub(self.macromask,line,1)
  2648. # Protect TOC location
  2649. while regex['toc'].search(line):
  2650. line = regex['toc'].sub(self.tocmask,line)
  2651. AUTOTOC = 0
  2652. # Protect URLs and emails
  2653. while regex['linkmark'].search(line) or \
  2654. regex['link' ].search(line):
  2655. # Try to match plain or named links
  2656. match_link = regex['link'].search(line)
  2657. match_named = regex['linkmark'].search(line)
  2658. # Define the current match
  2659. if match_link and match_named:
  2660. # Both types found, which is the first?
  2661. m = match_link
  2662. if match_named.start() < match_link.start():
  2663. m = match_named
  2664. else:
  2665. # Just one type found, we're fine
  2666. m = match_link or match_named
  2667. # Extract link data and apply mask
  2668. if m == match_link: # plain link
  2669. link = m.group()
  2670. label = ''
  2671. link_re = regex['link']
  2672. else: # named link
  2673. link = m.group('link')
  2674. label = m.group('label').rstrip()
  2675. link_re = regex['linkmark']
  2676. line = link_re.sub(self.linkmask,line,1)
  2677. # Save link data to the link bank
  2678. self.linkbank.append((label, link))
  2679. return line
  2680. def undo(self, line):
  2681. # url & email
  2682. for label,url in self.linkbank:
  2683. link = get_tagged_link(label, url)
  2684. line = line.replace(self.linkmask, link, 1)
  2685. # Expand macros
  2686. for macro in self.macrobank:
  2687. macro = self.macroman.expand(macro)
  2688. line = line.replace(self.macromask, macro, 1)
  2689. # Expand verb
  2690. for mono in self.monobank:
  2691. open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
  2692. line = line.replace(self.monomask, open+mono+close, 1)
  2693. # Expand raw
  2694. for raw in self.rawbank:
  2695. line = line.replace(self.rawmask, raw, 1)
  2696. # Expand tagged
  2697. for tagged in self.taggedbank:
  2698. line = line.replace(self.taggedmask, tagged, 1)
  2699. return line
  2700. ##############################################################################
  2701. class TitleMaster:
  2702. "Title things"
  2703. def __init__(self):
  2704. self.count = ['',0,0,0,0,0]
  2705. self.toc = []
  2706. self.level = 0
  2707. self.kind = ''
  2708. self.txt = ''
  2709. self.label = ''
  2710. self.tag = ''
  2711. self.tag_hold = []
  2712. self.last_level = 0
  2713. self.count_id = ''
  2714. self.user_labels = {}
  2715. self.anchor_count = 0
  2716. self.anchor_prefix = 'toc'
  2717. def _open_close_blocks(self):
  2718. "Open new title blocks, closing the previous (if any)"
  2719. if not rules['titleblocks']: return
  2720. tag = ''
  2721. last = self.last_level
  2722. curr = self.level
  2723. # Same level, just close the previous
  2724. if curr == last:
  2725. tag = TAGS.get('title%dClose'%last)
  2726. if tag: self.tag_hold.append(tag)
  2727. # Section -> subsection, more depth
  2728. while curr > last:
  2729. last += 1
  2730. # Open the new block of subsections
  2731. tag = TAGS.get('blockTitle%dOpen'%last)
  2732. if tag: self.tag_hold.append(tag)
  2733. # Jump from title1 to title3 or more
  2734. # Fill the gap with an empty section
  2735. if curr - last > 0:
  2736. tag = TAGS.get('title%dOpen'%last)
  2737. tag = regex['x'].sub('', tag) # del \a
  2738. if tag: self.tag_hold.append(tag)
  2739. # Section <- subsection, less depth
  2740. while curr < last:
  2741. # Close the current opened subsection
  2742. tag = TAGS.get('title%dClose'%last)
  2743. if tag: self.tag_hold.append(tag)
  2744. # Close the current opened block of subsections
  2745. tag = TAGS.get('blockTitle%dClose'%last)
  2746. if tag: self.tag_hold.append(tag)
  2747. last -= 1
  2748. # Close the previous section of the same level
  2749. # The subsections were under it
  2750. if curr == last:
  2751. tag = TAGS.get('title%dClose'%last)
  2752. if tag: self.tag_hold.append(tag)
  2753. def add(self, line):
  2754. "Parses a new title line."
  2755. if not line: return
  2756. self._set_prop(line)
  2757. self._open_close_blocks()
  2758. self._set_count_id()
  2759. self._set_label()
  2760. self._save_toc_info()
  2761. def close_all(self):
  2762. "Closes all opened title blocks"
  2763. ret = []
  2764. ret.extend(self.tag_hold)
  2765. while self.level:
  2766. tag = TAGS.get('title%dClose'%self.level)
  2767. if tag: ret.append(tag)
  2768. tag = TAGS.get('blockTitle%dClose'%self.level)
  2769. if tag: ret.append(tag)
  2770. self.level -= 1
  2771. return ret
  2772. def _save_toc_info(self):
  2773. "Save TOC info, used by self.dump_marked_toc()"
  2774. self.toc.append((self.level, self.count_id, self.txt, self.label))
  2775. def _set_prop(self, line=''):
  2776. "Extract info from original line and set data holders."
  2777. # Detect title type (numbered or not)
  2778. id = line.lstrip()[0]
  2779. if id == '=': kind = 'title'
  2780. elif id == '+': kind = 'numtitle'
  2781. else: Error("Unknown Title ID '%s'"%id)
  2782. # Extract line info
  2783. match = regex[kind].search(line)
  2784. level = len(match.group('id'))
  2785. txt = match.group('txt').strip()
  2786. label = match.group('label')
  2787. # Parse info & save
  2788. if CONF['enum-title']: kind = 'numtitle' # force
  2789. if rules['titleblocks']:
  2790. self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
  2791. TAGS.get('title%dOpen'%level)
  2792. else:
  2793. self.tag = TAGS.get(kind+`level`) or \
  2794. TAGS.get('title'+`level`)
  2795. self.last_level = self.level
  2796. self.kind = kind
  2797. self.level = level
  2798. self.txt = txt
  2799. self.label = label
  2800. def _set_count_id(self):
  2801. "Compose and save the title count identifier (if needed)."
  2802. count_id = ''
  2803. if self.kind == 'numtitle' and not rules['autonumbertitle']:
  2804. # Manually increase title count
  2805. self.count[self.level] += 1
  2806. # Reset sublevels count (if any)
  2807. max_levels = len(self.count)
  2808. if self.level < max_levels-1:
  2809. for i in xrange(self.level+1, max_levels):
  2810. self.count[i] = 0
  2811. # Compose count id from hierarchy
  2812. for i in xrange(self.level):
  2813. count_id= "%s%d."%(count_id, self.count[i+1])
  2814. self.count_id = count_id
  2815. def _set_label(self):
  2816. "Compose and save title label, used by anchors."
  2817. # Remove invalid chars from label set by user
  2818. self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
  2819. # Generate name as 15 first :alnum: chars
  2820. #TODO how to translate safely accented chars to plain?
  2821. #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
  2822. # 'tocN' label - sequential count, ignoring 'toc-level'
  2823. #self.label = self.anchor_prefix + str(len(self.toc)+1)
  2824. def _get_tagged_anchor(self):
  2825. "Return anchor if user defined a label, or TOC is on."
  2826. ret = ''
  2827. label = self.label
  2828. if CONF['toc'] and self.level <= CONF['toc-level']:
  2829. # This count is needed bcos self.toc stores all
  2830. # titles, regardless of the 'toc-level' setting,
  2831. # so we can't use self.toc length to number anchors
  2832. self.anchor_count += 1
  2833. # Autonumber label (if needed)
  2834. label = label or '%s%s' % (self.anchor_prefix, self.anchor_count)
  2835. if label and TAGS['anchor']:
  2836. ret = regex['x'].sub(label,TAGS['anchor'])
  2837. return ret
  2838. def _get_full_title_text(self):
  2839. "Returns the full title contents, already escaped."
  2840. ret = self.txt
  2841. # Insert count_id (if any) before text
  2842. if self.count_id:
  2843. ret = '%s %s'%(self.count_id, ret)
  2844. # Escape specials
  2845. ret = doEscape(TARGET, ret)
  2846. # Same targets needs final escapes on title lines
  2847. # It's here because there is a 'continue' after title
  2848. if rules['finalescapetitle']:
  2849. ret = doFinalEscape(TARGET, ret)
  2850. return ret
  2851. def get(self):
  2852. "Returns the tagged title as a list."
  2853. ret = []
  2854. # Maybe some anchoring before?
  2855. anchor = self._get_tagged_anchor()
  2856. self.tag = regex['_anchor'].sub(anchor, self.tag)
  2857. ### Compose & escape title text (TOC uses unescaped)
  2858. full_title = self._get_full_title_text()
  2859. # Close previous section area
  2860. ret.extend(self.tag_hold)
  2861. self.tag_hold = []
  2862. tagged = regex['x'].sub(full_title, self.tag)
  2863. # Adds "underline" on TXT target
  2864. if TARGET == 'txt':
  2865. if BLOCK.count > 1: ret.append('') # blank line before
  2866. ret.append(tagged)
  2867. # Get the right letter count for UTF
  2868. if CONF['encoding'].lower() == 'utf-8':
  2869. i = len(full_title.decode('utf-8'))
  2870. else:
  2871. i = len(full_title)
  2872. ret.append(regex['x'].sub('='*i, self.tag))
  2873. elif TARGET == 'art' and self.level == 1:
  2874. if BLOCK.count > 1: ret.append('') # blank line before
  2875. ret.append(aa_box(tagged))
  2876. elif TARGET == 'art':
  2877. level = 'level'+str(self.level)
  2878. if BLOCK.count > 1: ret.append('') # blank line before
  2879. ret.append(tagged)
  2880. ret.append(AA_CHARS[level]*len(full_title))
  2881. else:
  2882. ret.append(tagged)
  2883. return ret
  2884. def dump_marked_toc(self, max_level=99):
  2885. "Dumps all toc itens as a valid t2t markup list"
  2886. ret = []
  2887. toc_count = 1
  2888. for level, count_id, txt, label in self.toc:
  2889. if level > max_level: continue # ignore
  2890. indent = ' '*level
  2891. id_txt = ('%s %s'%(count_id, txt)).lstrip()
  2892. label = label or self.anchor_prefix+`toc_count`
  2893. toc_count += 1
  2894. # TOC will have links
  2895. if TAGS['anchor']:
  2896. # TOC is more readable with master topics
  2897. # not linked at number. This is a stoled
  2898. # idea from Windows .CHM help files
  2899. if CONF['enum-title'] and level == 1:
  2900. tocitem = '%s+ [""%s"" #%s]' % (indent, txt, label)
  2901. else:
  2902. tocitem = '%s- [""%s"" #%s]' % (indent, id_txt, label)
  2903. # No links on TOC, just text
  2904. else:
  2905. # man don't reformat TOC lines, cool!
  2906. if TARGET in ['txt', 'man', 'art']:
  2907. tocitem = '%s""%s""' % (indent, id_txt)
  2908. else:
  2909. tocitem = '%s- ""%s""' % (indent, id_txt)
  2910. ret.append(tocitem)
  2911. return ret
  2912. ##############################################################################
  2913. #TODO check all this table mess
  2914. # Trata linhas TABLE, com as prop do parse_row
  2915. # O metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
  2916. class TableMaster:
  2917. def __init__(self, line=''):
  2918. self.rows = []
  2919. self.border = 0
  2920. self.align = 'Left'
  2921. self.cellalign = []
  2922. self.colalign = []
  2923. self.cellspan = []
  2924. if line:
  2925. prop = self.parse_row(line)
  2926. self.border = prop['border']
  2927. self.align = prop['align']
  2928. self.cellalign = prop['cellalign']
  2929. self.cellspan = prop['cellspan']
  2930. self.colalign = self._get_col_align()
  2931. def _get_col_align(self):
  2932. colalign = []
  2933. for cell in range(0,len(self.cellalign)):
  2934. align = self.cellalign[cell]
  2935. span = self.cellspan[cell]
  2936. colalign.extend([align] * span)
  2937. return colalign
  2938. def _get_open_tag(self):
  2939. topen = TAGS['tableOpen']
  2940. tborder = TAGS['_tableBorder']
  2941. talign = TAGS['_tableAlign'+self.align]
  2942. calignsep = TAGS['tableColAlignSep']
  2943. calign = ''
  2944. # The first line defines if table has border or not
  2945. if not self.border: tborder = ''
  2946. # Set the columns alignment
  2947. if rules['tablecellaligntype'] == 'column':
  2948. calign = map(lambda x: TAGS['_tableColAlign%s'%x], self.colalign)
  2949. calign = calignsep.join(calign)
  2950. # Align full table, set border and Column align (if any)
  2951. topen = regex['_tableAlign' ].sub(talign , topen)
  2952. topen = regex['_tableBorder' ].sub(tborder, topen)
  2953. topen = regex['_tableColAlign'].sub(calign , topen)
  2954. # Tex table spec, border or not: {|l|c|r|} , {lcr}
  2955. if calignsep and not self.border:
  2956. # Remove cell align separator
  2957. topen = topen.replace(calignsep, '')
  2958. return topen
  2959. def _get_cell_align(self, cells):
  2960. ret = []
  2961. for cell in cells:
  2962. align = 'Left'
  2963. if cell.strip():
  2964. if cell[0] == ' ' and cell[-1] == ' ':
  2965. align = 'Center'
  2966. elif cell[0] == ' ':
  2967. align = 'Right'
  2968. ret.append(align)
  2969. return ret
  2970. def _get_cell_span(self, cells):
  2971. ret = []
  2972. for cell in cells:
  2973. span = 1
  2974. m = re.search('\a(\|+)$', cell)
  2975. if m: span = len(m.group(1))+1
  2976. ret.append(span)
  2977. return ret
  2978. def _tag_cells(self, rowdata):
  2979. row = []
  2980. cells = rowdata['cells']
  2981. open = TAGS['tableCellOpen']
  2982. close = TAGS['tableCellClose']
  2983. sep = TAGS['tableCellSep']
  2984. calign = map(lambda x: TAGS['_tableCellAlign'+x], rowdata['cellalign'])
  2985. calignsep = TAGS['tableColAlignSep']
  2986. ncolumns = len(self.colalign)
  2987. # Populate the span and multicol open tags
  2988. cspan = []
  2989. multicol = []
  2990. colindex = 0
  2991. for cellindex in range(0,len(rowdata['cellspan'])):
  2992. span = rowdata['cellspan'][cellindex]
  2993. align = rowdata['cellalign'][cellindex]
  2994. if span > 1:
  2995. cspan.append(regex['x'].sub(
  2996. str(span), TAGS['_tableCellColSpan']))
  2997. mcopen = regex['x'].sub(str(span), TAGS['_tableCellMulticolOpen'])
  2998. multicol.append(mcopen)
  2999. else:
  3000. cspan.append('')
  3001. if colindex < ncolumns and align != self.colalign[colindex]:
  3002. mcopen = regex['x'].sub('1', TAGS['_tableCellMulticolOpen'])
  3003. multicol.append(mcopen)
  3004. else:
  3005. multicol.append('')
  3006. if not self.border:
  3007. multicol[-1] = multicol[-1].replace(calignsep, '')
  3008. colindex += span
  3009. # Maybe is it a title row?
  3010. if rowdata['title']:
  3011. open = TAGS['tableTitleCellOpen'] or open
  3012. close = TAGS['tableTitleCellClose'] or close
  3013. sep = TAGS['tableTitleCellSep'] or sep
  3014. # Should we break the line on *each* table cell?
  3015. if rules['breaktablecell']: close = close+'\n'
  3016. # Cells pre processing
  3017. if rules['tablecellstrip']:
  3018. cells = map(lambda x: x.strip(), cells)
  3019. if rowdata['title'] and rules['tabletitlerowinbold']:
  3020. cells = map(lambda x: enclose_me('fontBold',x), cells)
  3021. # Add cell BEGIN/END tags
  3022. for cell in cells:
  3023. copen = open
  3024. cclose = close
  3025. # Make sure we will pop from some filled lists
  3026. # Fixes empty line bug '| |'
  3027. this_align = this_span = this_mcopen = ''
  3028. if calign: this_align = calign.pop(0)
  3029. if cspan : this_span = cspan.pop(0)
  3030. if multicol: this_mcopen = multicol.pop(0)
  3031. # Insert cell align into open tag (if cell is alignable)
  3032. if rules['tablecellaligntype'] == 'cell':
  3033. copen = regex['_tableCellAlign'].sub(
  3034. this_align, copen)
  3035. # Insert cell span into open tag (if cell is spannable)
  3036. if rules['tablecellspannable']:
  3037. copen = regex['_tableCellColSpan'].sub(
  3038. this_span, copen)
  3039. # Use multicol tags instead (if multicol supported, and if
  3040. # cell has a span or is aligned differently to column)
  3041. if rules['tablecellmulticol']:
  3042. if this_mcopen:
  3043. copen = regex['_tableColAlign'].sub(this_align, this_mcopen)
  3044. cclose = TAGS['_tableCellMulticolClose']
  3045. row.append(copen + cell + cclose)
  3046. # Maybe there are cell separators?
  3047. return sep.join(row)
  3048. def add_row(self, cells):
  3049. self.rows.append(cells)
  3050. def parse_row(self, line):
  3051. # Default table properties
  3052. ret = {
  3053. 'border':0, 'title':0, 'align':'Left',
  3054. 'cells':[], 'cellalign':[], 'cellspan':[]
  3055. }
  3056. # Detect table align (and remove spaces mark)
  3057. if line[0] == ' ': ret['align'] = 'Center'
  3058. line = line.lstrip()
  3059. # Detect title mark
  3060. if line[1] == '|': ret['title'] = 1
  3061. # Detect border mark and normalize the EOL
  3062. m = re.search(' (\|+) *$', line)
  3063. if m: line = line+' ' ; ret['border'] = 1
  3064. else: line = line+' | '
  3065. # Delete table mark
  3066. line = regex['table'].sub('', line)
  3067. # Detect colspan | foo | bar baz |||
  3068. line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
  3069. # Split cells (the last is fake)
  3070. ret['cells'] = line.split(' | ')[:-1]
  3071. # Find cells span
  3072. ret['cellspan'] = self._get_cell_span(ret['cells'])
  3073. # Remove span ID
  3074. ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
  3075. # Find cells align
  3076. ret['cellalign'] = self._get_cell_align(ret['cells'])
  3077. # Hooray!
  3078. Debug('Table Prop: %s' % ret, 7)
  3079. return ret
  3080. def dump(self):
  3081. open = self._get_open_tag()
  3082. rows = self.rows
  3083. close = TAGS['tableClose']
  3084. rowopen = TAGS['tableRowOpen']
  3085. rowclose = TAGS['tableRowClose']
  3086. rowsep = TAGS['tableRowSep']
  3087. titrowopen = TAGS['tableTitleRowOpen'] or rowopen
  3088. titrowclose = TAGS['tableTitleRowClose'] or rowclose
  3089. if rules['breaktablelineopen']:
  3090. rowopen = rowopen + '\n'
  3091. titrowopen = titrowopen + '\n'
  3092. # Tex gotchas
  3093. if TARGET == 'tex':
  3094. if not self.border:
  3095. rowopen = titrowopen = ''
  3096. else:
  3097. close = rowopen + close
  3098. # Now we tag all the table cells on each row
  3099. #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
  3100. tagged_cells = []
  3101. for cell in rows: tagged_cells.append(self._tag_cells(cell))
  3102. # Add row separator tags between lines
  3103. tagged_rows = []
  3104. if rowsep:
  3105. #!py15
  3106. #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
  3107. for cell in tagged_cells:
  3108. tagged_rows.append(cell+rowsep)
  3109. # Remove last rowsep, because the table is over
  3110. tagged_rows[-1] = tagged_rows[-1].replace(rowsep, '')
  3111. # Add row BEGIN/END tags for each line
  3112. else:
  3113. for rowdata in rows:
  3114. if rowdata['title']:
  3115. o,c = titrowopen, titrowclose
  3116. else:
  3117. o,c = rowopen, rowclose
  3118. row = tagged_cells.pop(0)
  3119. tagged_rows.append(o + row + c)
  3120. # Join the pieces together
  3121. fulltable = []
  3122. if open: fulltable.append(open)
  3123. fulltable.extend(tagged_rows)
  3124. if close: fulltable.append(close)
  3125. return fulltable
  3126. ##############################################################################
  3127. class BlockMaster:
  3128. "TIP: use blockin/out to add/del holders"
  3129. def __init__(self):
  3130. self.BLK = []
  3131. self.HLD = []
  3132. self.PRP = []
  3133. self.depth = 0
  3134. self.count = 0
  3135. self.last = ''
  3136. self.tableparser = None
  3137. self.contains = {
  3138. 'para' :['comment','raw','tagged'],
  3139. 'verb' :[],
  3140. 'table' :['comment'],
  3141. 'raw' :[],
  3142. 'tagged' :[],
  3143. 'comment' :[],
  3144. 'quote' :['quote','comment','raw','tagged'],
  3145. 'list' :['list','numlist','deflist','para','verb','comment','raw','tagged'],
  3146. 'numlist' :['list','numlist','deflist','para','verb','comment','raw','tagged'],
  3147. 'deflist' :['list','numlist','deflist','para','verb','comment','raw','tagged'],
  3148. 'bar' :[],
  3149. 'title' :[],
  3150. 'numtitle':[],
  3151. }
  3152. self.allblocks = self.contains.keys()
  3153. # If one is found inside another, ignore the marks
  3154. self.exclusive = ['comment','verb','raw','tagged']
  3155. # May we include bars inside quotes?
  3156. if rules['barinsidequote']:
  3157. self.contains['quote'].append('bar')
  3158. def block(self):
  3159. if not self.BLK: return ''
  3160. return self.BLK[-1]
  3161. def isblock(self, name=''):
  3162. return self.block() == name
  3163. def prop(self, key):
  3164. if not self.PRP: return ''
  3165. return self.PRP[-1].get(key) or ''
  3166. def propset(self, key, val):
  3167. self.PRP[-1][key] = val
  3168. #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
  3169. #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
  3170. def hold(self):
  3171. if not self.HLD: return []
  3172. return self.HLD[-1]
  3173. def holdadd(self, line):
  3174. if self.block().endswith('list'): line = [line]
  3175. self.HLD[-1].append(line)
  3176. Debug('HOLD add: %s'%repr(line), 4)
  3177. Debug('FULL HOLD: %s'%self.HLD, 4)
  3178. def holdaddsub(self, line):
  3179. self.HLD[-1][-1].append(line)
  3180. Debug('HOLD addsub: %s'%repr(line), 4)
  3181. Debug('FULL HOLD: %s'%self.HLD, 4)
  3182. def holdextend(self, lines):
  3183. if self.block().endswith('list'): lines = [lines]
  3184. self.HLD[-1].extend(lines)
  3185. Debug('HOLD extend: %s'%repr(lines), 4)
  3186. Debug('FULL HOLD: %s'%self.HLD, 4)
  3187. def blockin(self, block):
  3188. ret = []
  3189. if block not in self.allblocks:
  3190. Error("Invalid block '%s'"%block)
  3191. # First, let's close other possible open blocks
  3192. while self.block() and block not in self.contains[self.block()]:
  3193. ret.extend(self.blockout())
  3194. # Now we can gladly add this new one
  3195. self.BLK.append(block)
  3196. self.HLD.append([])
  3197. self.PRP.append({})
  3198. self.count += 1
  3199. if block == 'table': self.tableparser = TableMaster()
  3200. # Deeper and deeper
  3201. self.depth = len(self.BLK)
  3202. Debug('block ++ (%s): %s' % (block,self.BLK), 3)
  3203. return ret
  3204. def blockout(self):
  3205. if not self.BLK: Error('No block to pop')
  3206. blockname = self.BLK.pop()
  3207. result = getattr(self, blockname)()
  3208. parsed = self.HLD.pop()
  3209. self.PRP.pop()
  3210. self.depth = len(self.BLK)
  3211. if blockname == 'table': del self.tableparser
  3212. # Inserting a nested block into mother
  3213. if self.block():
  3214. if blockname != 'comment': # ignore comment blocks
  3215. if self.block().endswith('list'):
  3216. self.HLD[-1][-1].append(result)
  3217. else:
  3218. self.HLD[-1].append(result)
  3219. # Reset now. Mother block will have it all
  3220. result = []
  3221. Debug('block -- (%s): %s' % (blockname,self.BLK), 3)
  3222. Debug('RELEASED (%s): %s' % (blockname,parsed), 3)
  3223. # Save this top level block name (produced output)
  3224. # The next block will use it
  3225. if result:
  3226. self.last = blockname
  3227. Debug('BLOCK: %s'%result, 6)
  3228. return result
  3229. def _last_escapes(self, line):
  3230. return doFinalEscape(TARGET, line)
  3231. def _get_escaped_hold(self):
  3232. ret = []
  3233. for line in self.hold():
  3234. linetype = type(line)
  3235. if linetype == type('') or linetype == type(u''):
  3236. ret.append(self._last_escapes(line))
  3237. elif linetype == type([]):
  3238. ret.extend(line)
  3239. else:
  3240. Error("BlockMaster: Unknown HOLD item type: %s" % linetype)
  3241. return ret
  3242. def _remove_twoblanks(self, lastitem):
  3243. if len(lastitem) > 1 and lastitem[-2:] == ['','']:
  3244. return lastitem[:-2]
  3245. return lastitem
  3246. def _should_add_blank_line(self, where, blockname):
  3247. "Validates the blanksaround* rules"
  3248. # Nestable blocks: only mother blocks (level 1) are spaced
  3249. if blockname.endswith('list') and self.depth > 1:
  3250. return False
  3251. # The blank line after the block is always added
  3252. if where == 'after' \
  3253. and rules['blanksaround'+blockname]:
  3254. return True
  3255. # The blank line before the block is only added if
  3256. # the previous block haven't added a blank line
  3257. # (to avoid consecutive blanks)
  3258. elif where == 'before' \
  3259. and rules['blanksaround'+blockname] \
  3260. and not rules.get('blanksaround'+self.last):
  3261. return True
  3262. # Nested quotes are handled here,
  3263. # because the mother quote isn't closed yet
  3264. elif where == 'before' \
  3265. and blockname == 'quote' \
  3266. and rules['blanksaround'+blockname] \
  3267. and self.depth > 1:
  3268. return True
  3269. return False
  3270. def comment(self):
  3271. return ''
  3272. def raw(self):
  3273. lines = self.hold()
  3274. return map(lambda x: doEscape(TARGET, x), lines)
  3275. def tagged(self):
  3276. return self.hold()
  3277. def para(self):
  3278. result = []
  3279. open = TAGS['paragraphOpen']
  3280. close = TAGS['paragraphClose']
  3281. lines = self._get_escaped_hold()
  3282. # Blank line before?
  3283. if self._should_add_blank_line('before', 'para'): result.append('')
  3284. # Open tag
  3285. if open: result.append(open)
  3286. # Pagemaker likes a paragraph as a single long line
  3287. if rules['onelinepara']:
  3288. result.append(' '.join(lines))
  3289. # Others are normal :)
  3290. else:
  3291. result.extend(lines)
  3292. # Close tag
  3293. if close: result.append(close)
  3294. # Blank line after?
  3295. if self._should_add_blank_line('after', 'para'): result.append('')
  3296. # Very very very very very very very very very UGLY fix
  3297. # Needed because <center> can't appear inside <p>
  3298. try:
  3299. if len(lines) == 1 and \
  3300. TARGET in ('html', 'xhtml') and \
  3301. re.match('^\s*<center>.*</center>\s*$', lines[0]):
  3302. result = [lines[0]]
  3303. except: pass
  3304. return result
  3305. def verb(self):
  3306. "Verbatim lines are not masked, so there's no need to unmask"
  3307. result = []
  3308. open = TAGS['blockVerbOpen']
  3309. close = TAGS['blockVerbClose']
  3310. # Blank line before?
  3311. if self._should_add_blank_line('before', 'verb'): result.append('')
  3312. # Open tag
  3313. if open: result.append(open)
  3314. # Get contents
  3315. for line in self.hold():
  3316. if self.prop('mapped') == 'table':
  3317. line = MacroMaster().expand(line)
  3318. if not rules['verbblocknotescaped']:
  3319. line = doEscape(TARGET,line)
  3320. if rules['indentverbblock']:
  3321. line = ' '+line
  3322. if rules['verbblockfinalescape']:
  3323. line = doFinalEscape(TARGET, line)
  3324. result.append(line)
  3325. # Close tag
  3326. if close: result.append(close)
  3327. # Blank line after?
  3328. if self._should_add_blank_line('after', 'verb'): result.append('')
  3329. return result
  3330. def numtitle(self): return self.title('numtitle')
  3331. def title(self, name='title'):
  3332. result = []
  3333. # Blank line before?
  3334. if self._should_add_blank_line('before', name): result.append('')
  3335. # Get contents
  3336. result.extend(TITLE.get())
  3337. # Blank line after?
  3338. if self._should_add_blank_line('after', name): result.append('')
  3339. return result
  3340. def table(self):
  3341. result = []
  3342. # Blank line before?
  3343. if self._should_add_blank_line('before', 'table'): result.append('')
  3344. # Rewrite all table cells by the unmasked and escaped data
  3345. lines = self._get_escaped_hold()
  3346. for i in xrange(len(lines)):
  3347. cells = lines[i].split(SEPARATOR)
  3348. self.tableparser.rows[i]['cells'] = cells
  3349. result.extend(self.tableparser.dump())
  3350. # Blank line after?
  3351. if self._should_add_blank_line('after', 'table'): result.append('')
  3352. return result
  3353. def quote(self):
  3354. result = []
  3355. open = TAGS['blockQuoteOpen'] # block based
  3356. close = TAGS['blockQuoteClose']
  3357. qline = TAGS['blockQuoteLine'] # line based
  3358. indent = tagindent = '\t'*self.depth
  3359. # Apply rules
  3360. if rules['tagnotindentable']: tagindent = ''
  3361. if not rules['keepquoteindent']: indent = ''
  3362. # Blank line before?
  3363. if self._should_add_blank_line('before', 'quote'): result.append('')
  3364. # Open tag
  3365. if open: result.append(tagindent+open)
  3366. # Get contents
  3367. for item in self.hold():
  3368. if type(item) == type([]):
  3369. result.extend(item) # subquotes
  3370. else:
  3371. item = regex['quote'].sub('', item) # del TABs
  3372. item = self._last_escapes(item)
  3373. item = qline*self.depth + item
  3374. result.append(indent+item) # quote line
  3375. # Close tag
  3376. if close: result.append(tagindent+close)
  3377. # Blank line after?
  3378. if self._should_add_blank_line('after', 'quote'): result.append('')
  3379. return result
  3380. def bar(self):
  3381. result = []
  3382. bar_tag = ''
  3383. # Blank line before?
  3384. if self._should_add_blank_line('before', 'bar'): result.append('')
  3385. # Get the original bar chars
  3386. bar_chars = self.hold()[0].strip()
  3387. # Set bar type
  3388. if bar_chars.startswith('='): bar_tag = TAGS['bar2']
  3389. else : bar_tag = TAGS['bar1']
  3390. # To avoid comment tag confusion like <!-- ------ --> (sgml)
  3391. if TAGS['comment'].count('--'):
  3392. bar_chars = bar_chars.replace('--', '__')
  3393. # Get the bar tag (may contain \a)
  3394. result.append(regex['x'].sub(bar_chars, bar_tag))
  3395. # Blank line after?
  3396. if self._should_add_blank_line('after', 'bar'): result.append('')
  3397. return result
  3398. def deflist(self): return self.list('deflist')
  3399. def numlist(self): return self.list('numlist')
  3400. def list(self, name='list'):
  3401. result = []
  3402. items = self.hold()
  3403. indent = self.prop('indent')
  3404. tagindent = indent
  3405. listline = TAGS.get(name+'ItemLine')
  3406. itemcount = 0
  3407. if name == 'deflist':
  3408. itemopen = TAGS[name+'Item1Open']
  3409. itemclose = TAGS[name+'Item2Close']
  3410. itemsep = TAGS[name+'Item1Close']+\
  3411. TAGS[name+'Item2Open']
  3412. else:
  3413. itemopen = TAGS[name+'ItemOpen']
  3414. itemclose = TAGS[name+'ItemClose']
  3415. itemsep = ''
  3416. # Apply rules
  3417. if rules['tagnotindentable']: tagindent = ''
  3418. if not rules['keeplistindent']: indent = tagindent = ''
  3419. # ItemLine: number of leading chars identifies list depth
  3420. if listline:
  3421. itemopen = listline*self.depth + itemopen
  3422. # Adds trailing space on opening tags
  3423. if (name == 'list' and rules['spacedlistitemopen']) or \
  3424. (name == 'numlist' and rules['spacednumlistitemopen']):
  3425. itemopen = itemopen + ' '
  3426. # Remove two-blanks from list ending mark, to avoid <p>
  3427. items[-1] = self._remove_twoblanks(items[-1])
  3428. # Blank line before?
  3429. if self._should_add_blank_line('before', name): result.append('')
  3430. # Tag each list item (multiline items), store in listbody
  3431. itemopenorig = itemopen
  3432. listbody = []
  3433. widelist = 0
  3434. for item in items:
  3435. # Add "manual" item count for noautonum targets
  3436. itemcount += 1
  3437. if name == 'numlist' and not rules['autonumberlist']:
  3438. n = str(itemcount)
  3439. itemopen = regex['x'].sub(n, itemopenorig)
  3440. del n
  3441. # Tag it
  3442. item[0] = self._last_escapes(item[0])
  3443. if name == 'deflist':
  3444. z,term,rest = item[0].split(SEPARATOR, 2)
  3445. item[0] = rest
  3446. if not item[0]: del item[0] # to avoid <p>
  3447. listbody.append(tagindent+itemopen+term+itemsep)
  3448. else:
  3449. fullitem = tagindent+itemopen
  3450. listbody.append(item[0].replace(SEPARATOR, fullitem))
  3451. del item[0]
  3452. # Process next lines for this item (if any)
  3453. for line in item:
  3454. if type(line) == type([]): # sublist inside
  3455. listbody.extend(line)
  3456. else:
  3457. line = self._last_escapes(line)
  3458. # Blank lines turns to <p>
  3459. if not line and rules['parainsidelist']:
  3460. line = indent + TAGS['paragraphOpen'] + TAGS['paragraphClose']
  3461. line = line.rstrip()
  3462. widelist = 1
  3463. # Some targets don't like identation here (wiki)
  3464. if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']):
  3465. line = line.lstrip()
  3466. # Maybe we have a line prefix to add? (wiki)
  3467. if name == 'deflist' and TAGS['deflistItem2LinePrefix']:
  3468. line = TAGS['deflistItem2LinePrefix'] + line
  3469. listbody.append(line)
  3470. # Close item (if needed)
  3471. if itemclose: listbody.append(tagindent+itemclose)
  3472. if not widelist and rules['compactlist']:
  3473. listopen = TAGS.get(name+'OpenCompact')
  3474. listclose = TAGS.get(name+'CloseCompact')
  3475. else:
  3476. listopen = TAGS.get(name+'Open')
  3477. listclose = TAGS.get(name+'Close')
  3478. # Open list (not nestable lists are only opened at mother)
  3479. if listopen and not \
  3480. (rules['listnotnested'] and BLOCK.depth != 1):
  3481. result.append(tagindent+listopen)
  3482. result.extend(listbody)
  3483. # Close list (not nestable lists are only closed at mother)
  3484. if listclose and not \
  3485. (rules['listnotnested'] and self.depth != 1):
  3486. result.append(tagindent+listclose)
  3487. # Blank line after?
  3488. if self._should_add_blank_line('after', name): result.append('')
  3489. return result
  3490. ##############################################################################
  3491. class MacroMaster:
  3492. def __init__(self, config={}):
  3493. self.name = ''
  3494. self.config = config or CONF
  3495. self.infile = self.config['sourcefile']
  3496. self.outfile = self.config['outfile']
  3497. self.currdate = time.localtime(time.time())
  3498. self.rgx = regex.get('macros') or getRegexes()['macros']
  3499. self.fileinfo = { 'infile': None, 'outfile': None }
  3500. self.dft_fmt = MACROS
  3501. def walk_file_format(self, fmt):
  3502. "Walks the %%{in/out}file format string, expanding the % flags"
  3503. i = 0; ret = '' # counter/hold
  3504. while i < len(fmt): # char by char
  3505. c = fmt[i]; i += 1
  3506. if c == '%': # hot char!
  3507. if i == len(fmt): # % at the end
  3508. ret = ret + c
  3509. break
  3510. c = fmt[i]; i += 1 # read next
  3511. ret = ret + self.expand_file_flag(c)
  3512. else:
  3513. ret = ret +c # common char
  3514. return ret
  3515. def expand_file_flag(self, flag):
  3516. "%f: filename %F: filename (w/o extension)"
  3517. "%d: dirname %D: dirname (only parent dir)"
  3518. "%p: file path %e: extension"
  3519. info = self.fileinfo[self.name] # get dict
  3520. if flag == '%': x = '%' # %% -> %
  3521. elif flag == 'f': x = info['name']
  3522. elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
  3523. elif flag == 'd': x = info['dir']
  3524. elif flag == 'D': x = os.path.split(info['dir'])[-1]
  3525. elif flag == 'p': x = info['path']
  3526. elif flag == 'e': x = re.search('.(\.([^.]+))?$', info['name']).group(2) or ''
  3527. #TODO simpler way for %e ?
  3528. else : x = '%'+flag # false alarm
  3529. return x
  3530. def set_file_info(self, macroname):
  3531. if self.fileinfo.get(macroname): return # already done
  3532. file = getattr(self, self.name) # self.infile
  3533. if file == STDOUT or file == MODULEOUT:
  3534. dir = ''; path = name = file
  3535. else:
  3536. path = os.path.abspath(file)
  3537. dir = os.path.dirname(path)
  3538. name = os.path.basename(path)
  3539. self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
  3540. def expand(self, line=''):
  3541. "Expand all macros found on the line"
  3542. while self.rgx.search(line):
  3543. m = self.rgx.search(line)
  3544. name = self.name = m.group('name').lower()
  3545. fmt = m.group('fmt') or self.dft_fmt.get(name)
  3546. if name == 'date':
  3547. txt = time.strftime(fmt,self.currdate)
  3548. elif name == 'mtime':
  3549. if self.infile in (STDIN, MODULEIN):
  3550. fdate = self.currdate
  3551. else:
  3552. mtime = os.path.getmtime(self.infile)
  3553. fdate = time.localtime(mtime)
  3554. txt = time.strftime(fmt,fdate)
  3555. elif name == 'infile' or name == 'outfile':
  3556. self.set_file_info(name)
  3557. txt = self.walk_file_format(fmt)
  3558. else:
  3559. Error("Unknown macro name '%s'"%name)
  3560. line = self.rgx.sub(txt,line,1)
  3561. return line
  3562. ##############################################################################
  3563. def dumpConfig(source_raw, parsed_config):
  3564. onoff = {1:_('ON'), 0:_('OFF')}
  3565. data = [
  3566. (_('RC file') , RC_RAW ),
  3567. (_('source document'), source_raw ),
  3568. (_('command line') , CMDLINE_RAW)
  3569. ]
  3570. # First show all RAW data found
  3571. for label, cfg in data:
  3572. print _('RAW config for %s')%label
  3573. for target,key,val in cfg:
  3574. target = '(%s)'%target
  3575. key = dotted_spaces("%-14s"%key)
  3576. val = val or _('ON')
  3577. print ' %-8s %s: %s'%(target,key,val)
  3578. print
  3579. # Then the parsed results of all of them
  3580. print _('Full PARSED config')
  3581. keys = parsed_config.keys() ; keys.sort() # sorted
  3582. for key in keys:
  3583. val = parsed_config[key]
  3584. # Filters are the last
  3585. if key == 'preproc' or key == 'postproc':
  3586. continue
  3587. # Flag beautifier
  3588. if key in FLAGS.keys() or key in ACTIONS.keys():
  3589. val = onoff.get(val) or val
  3590. # List beautifier
  3591. if type(val) == type([]):
  3592. if key == 'options': sep = ' '
  3593. else : sep = ', '
  3594. val = sep.join(val)
  3595. print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
  3596. print
  3597. print _('Active filters')
  3598. for filter in ['preproc', 'postproc']:
  3599. for rule in parsed_config.get(filter) or []:
  3600. print "%25s: %s -> %s" % (
  3601. dotted_spaces("%-14s"%filter), rule[0], rule[1])
  3602. def get_file_body(file):
  3603. "Returns all the document BODY lines"
  3604. return process_source_file(file, noconf=1)[1][2]
  3605. def finish_him(outlist, config):
  3606. "Writing output to screen or file"
  3607. outfile = config['outfile']
  3608. outlist = unmaskEscapeChar(outlist)
  3609. outlist = expandLineBreaks(outlist)
  3610. # Apply PostProc filters
  3611. if config['postproc']:
  3612. filters = compile_filters(config['postproc'],
  3613. _('Invalid PostProc filter regex'))
  3614. postoutlist = []
  3615. errmsg = _('Invalid PostProc filter replacement')
  3616. for line in outlist:
  3617. for rgx,repl in filters:
  3618. try: line = rgx.sub(repl, line)
  3619. except: Error("%s: '%s'"%(errmsg, repl))
  3620. postoutlist.append(line)
  3621. outlist = postoutlist[:]
  3622. if outfile == MODULEOUT:
  3623. return outlist
  3624. elif outfile == STDOUT:
  3625. if GUI:
  3626. return outlist, config
  3627. else:
  3628. for line in outlist: print line
  3629. else:
  3630. Savefile(outfile, addLineBreaks(outlist))
  3631. if not GUI and not QUIET:
  3632. print _('%s wrote %s')%(my_name,outfile)
  3633. if config['split']:
  3634. if not QUIET: print "--- html..."
  3635. sgml2html = 'sgml2html -s %s -l %s %s' % (
  3636. config['split'], config['lang'] or lang, outfile)
  3637. if not QUIET: print "Running system command:", sgml2html
  3638. os.system(sgml2html)
  3639. def toc_inside_body(body, toc, config):
  3640. ret = []
  3641. if AUTOTOC: return body # nothing to expand
  3642. toc_mark = MaskMaster().tocmask
  3643. # Expand toc mark with TOC contents
  3644. for line in body:
  3645. if line.count(toc_mark): # toc mark found
  3646. if config['toc']:
  3647. ret.extend(toc) # include if --toc
  3648. else:
  3649. pass # or remove %%toc line
  3650. else:
  3651. ret.append(line) # common line
  3652. return ret
  3653. def toc_tagger(toc, config):
  3654. "Convert t2t-marked TOC (it is a list) to target-tagged TOC"
  3655. ret = []
  3656. # Tag if TOC-only TOC "by hand" (target don't have a TOC tag)
  3657. if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
  3658. fakeconf = config.copy()
  3659. fakeconf['headers'] = 0
  3660. fakeconf['toc-only'] = 0
  3661. fakeconf['mask-email'] = 0
  3662. fakeconf['preproc'] = []
  3663. fakeconf['postproc'] = []
  3664. fakeconf['css-sugar'] = 0
  3665. ret,foo = convert(toc, fakeconf)
  3666. set_global_config(config) # restore config
  3667. # Target TOC is a tag
  3668. elif config['toc'] and TAGS['TOC']:
  3669. ret = [TAGS['TOC']]
  3670. return ret
  3671. def toc_formatter(toc, config):
  3672. "Formats TOC for automatic placement between headers and body"
  3673. if config['toc-only']: return toc # no formatting needed
  3674. if not config['toc'] : return [] # TOC disabled
  3675. ret = toc
  3676. # TOC open/close tags (if any)
  3677. if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
  3678. if TAGS['tocClose']: ret.append(TAGS['tocClose'])
  3679. # Autotoc specific formatting
  3680. if AUTOTOC:
  3681. if rules['autotocwithbars']: # TOC between bars
  3682. para = TAGS['paragraphOpen']+TAGS['paragraphClose']
  3683. bar = regex['x'].sub('-'*72,TAGS['bar1'])
  3684. tocbar = [para, bar, para]
  3685. ret = tocbar + ret + tocbar
  3686. if rules['blankendautotoc']: # blank line after TOC
  3687. ret.append('')
  3688. if rules['autotocnewpagebefore']: # page break before TOC
  3689. ret.insert(0,TAGS['pageBreak'])
  3690. if rules['autotocnewpageafter']: # page break after TOC
  3691. ret.append(TAGS['pageBreak'])
  3692. return ret
  3693. def doHeader(headers, config):
  3694. if not config['headers']: return []
  3695. if not headers: headers = ['','','']
  3696. target = config['target']
  3697. if not HEADER_TEMPLATE.has_key(target):
  3698. Error("doHeader: Unknown target '%s'"%target)
  3699. if target in ('html','xhtml') and config.get('css-sugar'):
  3700. template = HEADER_TEMPLATE[target+'css'].split('\n')
  3701. else:
  3702. template = HEADER_TEMPLATE[target].split('\n')
  3703. head_data = {'STYLE':[], 'ENCODING':''}
  3704. for key in head_data.keys():
  3705. val = config.get(key.lower())
  3706. # Remove .sty extension from each style filename (freaking tex)
  3707. # XXX Can't handle --style foo.sty,bar.sty
  3708. if target == 'tex' and key == 'STYLE':
  3709. val = map(lambda x:re.sub('(?i)\.sty$','',x), val)
  3710. if key == 'ENCODING':
  3711. val = get_encoding_string(val, target)
  3712. head_data[key] = val
  3713. # Parse header contents
  3714. for i in 0,1,2:
  3715. # Expand macros
  3716. contents = MacroMaster(config=config).expand(headers[i])
  3717. # Escapes - on tex, just do it if any \tag{} present
  3718. if target != 'tex' or \
  3719. (target == 'tex' and re.search(r'\\\w+{', contents)):
  3720. contents = doEscape(target, contents)
  3721. if target == 'lout':
  3722. contents = doFinalEscape(target, contents)
  3723. head_data['HEADER%d'%(i+1)] = contents
  3724. if target == 'art':
  3725. if not [v for v in head_data.values() if v]:
  3726. return []
  3727. template = aa_header(head_data)
  3728. return template.split('\n')
  3729. # css-inside removes STYLE line
  3730. #XXX In tex, this also removes the modules call (%!style:amsfonts)
  3731. if target in ('html','xhtml') and config.get('css-inside') and \
  3732. config.get('style'):
  3733. head_data['STYLE'] = []
  3734. Debug("Header Data: %s"%head_data, 1)
  3735. # Scan for empty dictionary keys
  3736. # If found, scan template lines for that key reference
  3737. # If found, remove the reference
  3738. # If there isn't any other key reference on the same line, remove it
  3739. #TODO loop by template line > key
  3740. for key in head_data.keys():
  3741. if head_data.get(key): continue
  3742. for line in template:
  3743. if line.count('%%(%s)s'%key):
  3744. sline = line.replace('%%(%s)s'%key, '')
  3745. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  3746. template.remove(line)
  3747. # Style is a multiple tag.
  3748. # - If none or just one, use default template
  3749. # - If two or more, insert extra lines in a loop (and remove original)
  3750. styles = head_data['STYLE']
  3751. if len(styles) == 1:
  3752. head_data['STYLE'] = styles[0]
  3753. elif len(styles) > 1:
  3754. style_mark = '%(STYLE)s'
  3755. for i in xrange(len(template)):
  3756. if template[i].count(style_mark):
  3757. while styles:
  3758. template.insert(i+1, template[i].replace(style_mark, styles.pop()))
  3759. del template[i]
  3760. break
  3761. # Populate template with data (dict expansion)
  3762. template = '\n'.join(template) % head_data
  3763. # Adding CSS contents into template (for --css-inside)
  3764. # This code sux. Dirty++
  3765. if target in ('html','xhtml') and config.get('css-inside') and \
  3766. config.get('style'):
  3767. set_global_config(config) # usually on convert(), needed here
  3768. for i in xrange(len(config['style'])):
  3769. cssfile = config['style'][i]
  3770. if not os.path.isabs(cssfile):
  3771. infile = config.get('sourcefile')
  3772. cssfile = os.path.join(
  3773. os.path.dirname(infile), cssfile)
  3774. try:
  3775. contents = Readfile(cssfile, 1)
  3776. css = "\n%s\n%s\n%s\n%s\n" % (
  3777. doCommentLine("Included %s" % cssfile),
  3778. TAGS['cssOpen'],
  3779. '\n'.join(contents),
  3780. TAGS['cssClose'])
  3781. # Style now is content, needs escaping (tex)
  3782. #css = maskEscapeChar(css)
  3783. except:
  3784. errmsg = "CSS include failed for %s" % cssfile
  3785. css = "\n%s\n" % (doCommentLine(errmsg))
  3786. # Insert this CSS file contents on the template
  3787. template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
  3788. # template = re.sub(r'(?i)(\\begin{document})',
  3789. # css+'\n'+r'\1', template) # tex
  3790. # The last blank line to keep everything separated
  3791. template = re.sub('(?i)(</HEAD>)', '\n'+r'\1', template)
  3792. return template.split('\n')
  3793. def doCommentLine(txt):
  3794. # The -- string ends a (h|sg|xht)ml comment :(
  3795. txt = maskEscapeChar(txt)
  3796. if TAGS['comment'].count('--') and txt.count('--'):
  3797. txt = re.sub('-(?=-)', r'-\\', txt)
  3798. if TAGS['comment']:
  3799. return regex['x'].sub(txt, TAGS['comment'])
  3800. return ''
  3801. def doFooter(config):
  3802. if not config['headers']: return []
  3803. ret = []
  3804. target = config['target']
  3805. cmdline = config['realcmdline']
  3806. typename = target
  3807. if target == 'tex': typename = 'LaTeX2e'
  3808. ppgd = '%s code generated by %s %s (%s)' % (typename, my_name, my_version, my_url)
  3809. cmdline = 'cmdline: %s %s' % (my_name, ' '.join(cmdline))
  3810. ret.append('')
  3811. ret.append(doCommentLine(ppgd))
  3812. ret.append(doCommentLine(cmdline))
  3813. ret.append(TAGS['EOD'])
  3814. return ret
  3815. def doEscape(target,txt):
  3816. "Target-specific special escapes. Apply *before* insert any tag."
  3817. tmpmask = 'vvvvThisEscapingSuxvvvv'
  3818. if target in ('html','sgml','xhtml'):
  3819. txt = re.sub('&','&amp;',txt)
  3820. txt = re.sub('<','&lt;',txt)
  3821. txt = re.sub('>','&gt;',txt)
  3822. if target == 'sgml':
  3823. txt = re.sub('\xff','&yuml;',txt) # "+y
  3824. elif target == 'pm6':
  3825. txt = re.sub('<','<\#60>',txt)
  3826. elif target == 'mgp':
  3827. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  3828. elif target == 'man':
  3829. txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
  3830. txt = txt.replace(ESCCHAR, ESCCHAR+'e') # \e
  3831. elif target == 'lout':
  3832. # TIP: / moved to FinalEscape to avoid //italic//
  3833. # TIP: these are also converted by lout: ... --- --
  3834. txt = txt.replace(ESCCHAR, tmpmask) # \
  3835. txt = txt.replace('"', '"%s""'%ESCCHAR) # "\""
  3836. txt = re.sub('([|&{}@#^~])', '"\\1"', txt) # "@"
  3837. txt = txt.replace(tmpmask, '"%s"'%(ESCCHAR*2)) # "\\"
  3838. elif target == 'tex':
  3839. # Mark literal \ to be changed to $\backslash$ later
  3840. txt = txt.replace(ESCCHAR, tmpmask)
  3841. txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
  3842. txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
  3843. txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
  3844. txt = txt.replace(tmpmask, maskEscapeChar(r'$\backslash$'))
  3845. # TIP the _ is escaped at the end
  3846. return txt
  3847. # TODO man: where - really needs to be escaped?
  3848. def doFinalEscape(target, txt):
  3849. "Last escapes of each line"
  3850. if target == 'pm6' : txt = txt.replace(ESCCHAR+'<', r'<\#92><')
  3851. elif target == 'man' : txt = txt.replace('-', r'\-')
  3852. elif target == 'sgml': txt = txt.replace('[', '&lsqb;')
  3853. elif target == 'lout': txt = txt.replace('/', '"/"')
  3854. elif target == 'tex' :
  3855. txt = txt.replace('_', r'\_')
  3856. txt = txt.replace('vvvvTexUndervvvv', '_') # shame!
  3857. return txt
  3858. def EscapeCharHandler(action, data):
  3859. "Mask/Unmask the Escape Char on the given string"
  3860. if not data.strip(): return data
  3861. if action not in ('mask','unmask'):
  3862. Error("EscapeCharHandler: Invalid action '%s'"%action)
  3863. if action == 'mask': return data.replace('\\', ESCCHAR)
  3864. else: return data.replace(ESCCHAR, '\\')
  3865. def maskEscapeChar(data):
  3866. "Replace any Escape Char \ with a text mask (Input: str or list)"
  3867. if type(data) == type([]):
  3868. return map(lambda x: EscapeCharHandler('mask', x), data)
  3869. return EscapeCharHandler('mask',data)
  3870. def unmaskEscapeChar(data):
  3871. "Undo the Escape char \ masking (Input: str or list)"
  3872. if type(data) == type([]):
  3873. return map(lambda x: EscapeCharHandler('unmask', x), data)
  3874. return EscapeCharHandler('unmask',data)
  3875. def addLineBreaks(mylist):
  3876. "use LB to respect sys.platform"
  3877. ret = []
  3878. for line in mylist:
  3879. line = line.replace('\n', LB) # embedded \n's
  3880. ret.append(line+LB) # add final line break
  3881. return ret
  3882. # Convert ['foo\nbar'] to ['foo', 'bar']
  3883. def expandLineBreaks(mylist):
  3884. ret = []
  3885. for line in mylist:
  3886. ret.extend(line.split('\n'))
  3887. return ret
  3888. def compile_filters(filters, errmsg='Filter'):
  3889. if filters:
  3890. for i in xrange(len(filters)):
  3891. patt,repl = filters[i]
  3892. try: rgx = re.compile(patt)
  3893. except: Error("%s: '%s'"%(errmsg, patt))
  3894. filters[i] = (rgx,repl)
  3895. return filters
  3896. def enclose_me(tagname, txt):
  3897. return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
  3898. def beautify_me(name, line):
  3899. "where name is: bold, italic, underline or strike"
  3900. # Exception: Doesn't parse an horizontal bar as strike
  3901. if name == 'strike' and regex['bar'].search(line): return line
  3902. name = 'font%s' % name.capitalize()
  3903. open = TAGS['%sOpen'%name]
  3904. close = TAGS['%sClose'%name]
  3905. txt = r'%s\1%s'%(open, close)
  3906. line = regex[name].sub(txt,line)
  3907. return line
  3908. def get_tagged_link(label, url):
  3909. ret = ''
  3910. target = CONF['target']
  3911. image_re = regex['img']
  3912. # Set link type
  3913. if regex['email'].match(url):
  3914. linktype = 'email'
  3915. else:
  3916. linktype = 'url';
  3917. # Escape specials from TEXT parts
  3918. label = doEscape(target,label)
  3919. # Escape specials from link URL
  3920. if not rules['linkable'] or rules['escapeurl']:
  3921. url = doEscape(target, url)
  3922. # Adding protocol to guessed link
  3923. guessurl = ''
  3924. if linktype == 'url' and \
  3925. re.match('(?i)'+regex['_urlskel']['guess'], url):
  3926. if url[0] in 'Ww': guessurl = 'http://' +url
  3927. else : guessurl = 'ftp://' +url
  3928. # Not link aware targets -> protocol is useless
  3929. if not rules['linkable']: guessurl = ''
  3930. # Simple link (not guessed)
  3931. if not label and not guessurl:
  3932. if CONF['mask-email'] and linktype == 'email':
  3933. # Do the email mask feature (no TAGs, just text)
  3934. url = url.replace('@', ' (a) ')
  3935. url = url.replace('.', ' ')
  3936. url = "<%s>" % url
  3937. if rules['linkable']: url = doEscape(target, url)
  3938. ret = url
  3939. else:
  3940. # Just add link data to tag
  3941. tag = TAGS[linktype]
  3942. ret = regex['x'].sub(url,tag)
  3943. # Named link or guessed simple link
  3944. else:
  3945. # Adjusts for guessed link
  3946. if not label: label = url # no protocol
  3947. if guessurl : url = guessurl # with protocol
  3948. # Image inside link!
  3949. if image_re.match(label):
  3950. if rules['imglinkable']: # get image tag
  3951. label = parse_images(label)
  3952. else: # img@link !supported
  3953. label = "(%s)"%image_re.match(label).group(1)
  3954. # Putting data on the right appearance order
  3955. if rules['labelbeforelink'] or not rules['linkable']:
  3956. urlorder = [label, url] # label before link
  3957. else:
  3958. urlorder = [url, label] # link before label
  3959. # Add link data to tag (replace \a's)
  3960. ret = TAGS["%sMark"%linktype]
  3961. for data in urlorder:
  3962. ret = regex['x'].sub(data,ret,1)
  3963. return ret
  3964. def parse_deflist_term(line):
  3965. "Extract and parse definition list term contents"
  3966. img_re = regex['img']
  3967. term = regex['deflist'].search(line).group(3)
  3968. # Mask image inside term as (image.jpg), where not supported
  3969. if not rules['imgasdefterm'] and img_re.search(term):
  3970. while img_re.search(term):
  3971. imgfile = img_re.search(term).group(1)
  3972. term = img_re.sub('(%s)'%imgfile, term, 1)
  3973. #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
  3974. return term
  3975. def get_image_align(line):
  3976. "Return the image (first found) align for the given line"
  3977. # First clear marks that can mess align detection
  3978. line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep
  3979. line = re.sub('^'+SEPARATOR, '', line) # remove list sep
  3980. line = re.sub('^[\t]+' , '', line) # remove quote mark
  3981. # Get image position on the line
  3982. m = regex['img'].search(line)
  3983. ini = m.start() ; head = 0
  3984. end = m.end() ; tail = len(line)
  3985. # The align detection algorithm
  3986. if ini == head and end != tail: align = 'left' # ^img + text$
  3987. elif ini != head and end == tail: align = 'right' # ^text + img$
  3988. else : align = 'center' # default align
  3989. # Some special cases
  3990. if BLOCK.isblock('table'): align = 'center' # ignore when table
  3991. # if TARGET == 'mgp' and align == 'center': align = 'center'
  3992. return align
  3993. # Reference: http://www.iana.org/assignments/character-sets
  3994. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  3995. def get_encoding_string(enc, target):
  3996. if not enc: return ''
  3997. # Target specific translation table
  3998. translate = {
  3999. 'tex': {
  4000. # missing: ansinew , applemac , cp437 , cp437de , cp865
  4001. 'utf-8' : 'utf8',
  4002. 'us-ascii' : 'ascii',
  4003. 'windows-1250': 'cp1250',
  4004. 'windows-1252': 'cp1252',
  4005. 'ibm850' : 'cp850',
  4006. 'ibm852' : 'cp852',
  4007. 'iso-8859-1' : 'latin1',
  4008. 'iso-8859-2' : 'latin2',
  4009. 'iso-8859-3' : 'latin3',
  4010. 'iso-8859-4' : 'latin4',
  4011. 'iso-8859-5' : 'latin5',
  4012. 'iso-8859-9' : 'latin9',
  4013. 'koi8-r' : 'koi8-r'
  4014. }
  4015. }
  4016. # Normalization
  4017. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  4018. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  4019. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  4020. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  4021. # Apply translation table
  4022. try: enc = translate[target][enc.lower()]
  4023. except: pass
  4024. return enc
  4025. ##############################################################################
  4026. ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
  4027. ##############################################################################
  4028. def process_source_file(file='', noconf=0, contents=[]):
  4029. """
  4030. Find and Join all the configuration available for a source file.
  4031. No sanity checking is done on this step.
  4032. It also extracts the source document parts into separate holders.
  4033. The config scan order is:
  4034. 1. The user configuration file (i.e. $HOME/.txt2tagsrc)
  4035. 2. The source document's CONF area
  4036. 3. The command line options
  4037. The return data is a tuple of two items:
  4038. 1. The parsed config dictionary
  4039. 2. The document's parts, as a (head, conf, body) tuple
  4040. All the conversion process will be based on the data and
  4041. configuration returned by this function.
  4042. The source files is read on this step only.
  4043. """
  4044. if contents:
  4045. source = SourceDocument(contents=contents)
  4046. else:
  4047. source = SourceDocument(file)
  4048. head, conf, body = source.split()
  4049. Message(_("Source document contents stored"),2)
  4050. if not noconf:
  4051. # Read document config
  4052. source_raw = source.get_raw_config()
  4053. # Join all the config directives found, then parse it
  4054. full_raw = RC_RAW + source_raw + CMDLINE_RAW
  4055. Message(_("Parsing and saving all config found (%03d items)") % (len(full_raw)), 1)
  4056. full_parsed = ConfigMaster(full_raw).parse()
  4057. # Add manually the filename to the conf dic
  4058. if contents:
  4059. full_parsed['sourcefile'] = MODULEIN
  4060. full_parsed['infile'] = MODULEIN
  4061. full_parsed['outfile'] = MODULEOUT
  4062. else:
  4063. full_parsed['sourcefile'] = file
  4064. # Maybe should we dump the config found?
  4065. if full_parsed.get('dump-config'):
  4066. dumpConfig(source_raw, full_parsed)
  4067. Quit()
  4068. # The user just want to know a single config value (hidden feature)
  4069. #TODO pick a better name than --show-config-value
  4070. elif full_parsed.get('show-config-value'):
  4071. config_value = full_parsed.get(full_parsed['show-config-value'])
  4072. if config_value:
  4073. if type(config_value) == type([]):
  4074. print '\n'.join(config_value)
  4075. else:
  4076. print config_value
  4077. Quit()
  4078. # Okay, all done
  4079. Debug("FULL config for this file: %s"%full_parsed, 1)
  4080. else:
  4081. full_parsed = {}
  4082. return full_parsed, (head,conf,body)
  4083. def get_infiles_config(infiles):
  4084. """
  4085. Find and Join into a single list, all configuration available
  4086. for each input file. This function is supposed to be the very
  4087. first one to be called, before any processing.
  4088. """
  4089. return map(process_source_file, infiles)
  4090. def convert_this_files(configs):
  4091. global CONF
  4092. for myconf,doc in configs: # multifile support
  4093. target_head = []
  4094. target_toc = []
  4095. target_body = []
  4096. target_foot = []
  4097. source_head, source_conf, source_body = doc
  4098. myconf = ConfigMaster().sanity(myconf)
  4099. # Compose the target file Headers
  4100. #TODO escape line before?
  4101. #TODO see exceptions by tex and mgp
  4102. Message(_("Composing target Headers"),1)
  4103. target_head = doHeader(source_head, myconf)
  4104. # Parse the full marked body into tagged target
  4105. first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
  4106. Message(_("Composing target Body"),1)
  4107. target_body, marked_toc = convert(source_body, myconf, firstlinenr=first_body_line)
  4108. # If dump-source, we're done
  4109. if myconf['dump-source']:
  4110. for line in source_head+source_conf+target_body:
  4111. print line
  4112. return
  4113. # Make TOC (if needed)
  4114. Message(_("Composing target TOC"),1)
  4115. tagged_toc = toc_tagger(marked_toc, myconf)
  4116. target_toc = toc_formatter(tagged_toc, myconf)
  4117. target_body = toc_inside_body(target_body, target_toc, myconf)
  4118. if not AUTOTOC and not myconf['toc-only']: target_toc = []
  4119. # Compose the target file Footer
  4120. Message(_("Composing target Footer"),1)
  4121. if TARGET not in ['txt', 'art']:
  4122. target_foot = doFooter(myconf)
  4123. # Finally, we have our document
  4124. outlist = target_head + target_toc + target_body + target_foot
  4125. # If on GUI, abort before finish_him
  4126. # If module, return finish_him as list
  4127. # Else, write results to file or STDOUT
  4128. if GUI:
  4129. return outlist, myconf
  4130. elif myconf.get('outfile') == MODULEOUT:
  4131. return finish_him(outlist, myconf), myconf
  4132. else:
  4133. Message(_("Saving results to the output file"),1)
  4134. finish_him(outlist, myconf)
  4135. def parse_images(line):
  4136. "Tag all images found"
  4137. while regex['img'].search(line) and TAGS['img'] != '[\a]':
  4138. txt = regex['img'].search(line).group(1)
  4139. tag = TAGS['img']
  4140. # If target supports image alignment, here we go
  4141. if rules['imgalignable']:
  4142. align = get_image_align(line) # right
  4143. align_name = align.capitalize() # Right
  4144. # The align is a full tag, or part of the image tag (~A~)
  4145. if TAGS['imgAlign'+align_name]:
  4146. tag = TAGS['imgAlign'+align_name]
  4147. else:
  4148. align_tag = TAGS['_imgAlign'+align_name]
  4149. tag = regex['_imgAlign'].sub(align_tag, tag, 1)
  4150. # Dirty fix to allow centered solo images
  4151. if align == 'center' and TARGET in ('html','xhtml'):
  4152. rest = regex['img'].sub('',line,1)
  4153. if re.match('^\s+$', rest):
  4154. tag = "<center>%s</center>" %tag
  4155. if TARGET == 'tex':
  4156. tag = re.sub(r'\\b',r'\\\\b',tag)
  4157. txt = txt.replace('_', 'vvvvTexUndervvvv')
  4158. line = regex['img'].sub(tag,line,1)
  4159. line = regex['x'].sub(txt,line,1)
  4160. return line
  4161. def add_inline_tags(line):
  4162. # Beautifiers
  4163. for beauti in ('bold', 'italic', 'underline', 'strike'):
  4164. if regex['font%s'%beauti.capitalize()].search(line):
  4165. line = beautify_me(beauti, line)
  4166. line = parse_images(line)
  4167. return line
  4168. def get_include_contents(file, path=''):
  4169. "Parses %!include: value and extract file contents"
  4170. ids = {'`':'verb', '"':'raw', "'":'tagged' }
  4171. id = 't2t'
  4172. # Set include type and remove identifier marks
  4173. mark = file[0]
  4174. if mark in ids.keys():
  4175. if file[:2] == file[-2:] == mark*2:
  4176. id = ids[mark] # set type
  4177. file = file[2:-2] # remove marks
  4178. # Handle remote dir execution
  4179. filepath = os.path.join(path, file)
  4180. # Read included file contents
  4181. lines = Readfile(filepath, remove_linebreaks=1)
  4182. # Default txt2tags marked text, just BODY matters
  4183. if id == 't2t':
  4184. lines = get_file_body(filepath)
  4185. #TODO fix images relative path if file has a path, ie.: chapter1/index.t2t (wait until tree parsing)
  4186. #TODO for the images path fix, also respect outfile path, if different from infile (wait until tree parsing)
  4187. lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
  4188. # This appears when included hit EOF with verbatim area open
  4189. #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
  4190. return id, lines
  4191. def set_global_config(config):
  4192. global CONF, TAGS, regex, rules, TARGET
  4193. CONF = config
  4194. rules = getRules(CONF)
  4195. TAGS = getTags(CONF)
  4196. regex = getRegexes()
  4197. TARGET = config['target'] # save for buggy functions that need global
  4198. def convert(bodylines, config, firstlinenr=1):
  4199. global BLOCK, TITLE
  4200. set_global_config(config)
  4201. target = config['target']
  4202. BLOCK = BlockMaster()
  4203. MASK = MaskMaster()
  4204. TITLE = TitleMaster()
  4205. ret = []
  4206. dump_source = []
  4207. f_lastwasblank = 0
  4208. # Compiling all PreProc regexes
  4209. pre_filter = compile_filters(
  4210. CONF['preproc'], _('Invalid PreProc filter regex'))
  4211. # Let's mark it up!
  4212. linenr = firstlinenr-1
  4213. lineref = 0
  4214. while lineref < len(bodylines):
  4215. # Defaults
  4216. MASK.reset()
  4217. results_box = ''
  4218. untouchedline = bodylines[lineref]
  4219. dump_source.append(untouchedline)
  4220. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  4221. # Apply PreProc filters
  4222. if pre_filter:
  4223. errmsg = _('Invalid PreProc filter replacement')
  4224. for rgx,repl in pre_filter:
  4225. try: line = rgx.sub(repl, line)
  4226. except: Error("%s: '%s'"%(errmsg, repl))
  4227. line = maskEscapeChar(line) # protect \ char
  4228. linenr += 1
  4229. lineref += 1
  4230. Debug(repr(line), 2, linenr) # heavy debug: show each line
  4231. #------------------[ Comment Block ]------------------------
  4232. # We're already on a comment block
  4233. if BLOCK.block() == 'comment':
  4234. # Closing comment
  4235. if regex['blockCommentClose'].search(line):
  4236. ret.extend(BLOCK.blockout() or [])
  4237. continue
  4238. # Normal comment-inside line. Ignore it.
  4239. continue
  4240. # Detecting comment block init
  4241. if regex['blockCommentOpen'].search(line) \
  4242. and BLOCK.block() not in BLOCK.exclusive:
  4243. ret.extend(BLOCK.blockin('comment'))
  4244. continue
  4245. #-------------------------[ Tagged Text ]----------------------
  4246. # We're already on a tagged block
  4247. if BLOCK.block() == 'tagged':
  4248. # Closing tagged
  4249. if regex['blockTaggedClose'].search(line):
  4250. ret.extend(BLOCK.blockout())
  4251. continue
  4252. # Normal tagged-inside line
  4253. BLOCK.holdadd(line)
  4254. continue
  4255. # Detecting tagged block init
  4256. if regex['blockTaggedOpen'].search(line) \
  4257. and BLOCK.block() not in BLOCK.exclusive:
  4258. ret.extend(BLOCK.blockin('tagged'))
  4259. continue
  4260. # One line tagged text
  4261. if regex['1lineTagged'].search(line) \
  4262. and BLOCK.block() not in BLOCK.exclusive:
  4263. ret.extend(BLOCK.blockin('tagged'))
  4264. line = regex['1lineTagged'].sub('',line)
  4265. BLOCK.holdadd(line)
  4266. ret.extend(BLOCK.blockout())
  4267. continue
  4268. #-------------------------[ Raw Text ]----------------------
  4269. # We're already on a raw block
  4270. if BLOCK.block() == 'raw':
  4271. # Closing raw
  4272. if regex['blockRawClose'].search(line):
  4273. ret.extend(BLOCK.blockout())
  4274. continue
  4275. # Normal raw-inside line
  4276. BLOCK.holdadd(line)
  4277. continue
  4278. # Detecting raw block init
  4279. if regex['blockRawOpen'].search(line) \
  4280. and BLOCK.block() not in BLOCK.exclusive:
  4281. ret.extend(BLOCK.blockin('raw'))
  4282. continue
  4283. # One line raw text
  4284. if regex['1lineRaw'].search(line) \
  4285. and BLOCK.block() not in BLOCK.exclusive:
  4286. ret.extend(BLOCK.blockin('raw'))
  4287. line = regex['1lineRaw'].sub('',line)
  4288. BLOCK.holdadd(line)
  4289. ret.extend(BLOCK.blockout())
  4290. continue
  4291. #------------------------[ Verbatim ]----------------------
  4292. #TIP We'll never support beautifiers inside verbatim
  4293. # Closing table mapped to verb
  4294. if BLOCK.block() == 'verb' \
  4295. and BLOCK.prop('mapped') == 'table' \
  4296. and not regex['table'].search(line):
  4297. ret.extend(BLOCK.blockout())
  4298. # We're already on a verb block
  4299. if BLOCK.block() == 'verb':
  4300. # Closing verb
  4301. if regex['blockVerbClose'].search(line):
  4302. ret.extend(BLOCK.blockout())
  4303. continue
  4304. # Normal verb-inside line
  4305. BLOCK.holdadd(line)
  4306. continue
  4307. # Detecting verb block init
  4308. if regex['blockVerbOpen'].search(line) \
  4309. and BLOCK.block() not in BLOCK.exclusive:
  4310. ret.extend(BLOCK.blockin('verb'))
  4311. f_lastwasblank = 0
  4312. continue
  4313. # One line verb-formatted text
  4314. if regex['1lineVerb'].search(line) \
  4315. and BLOCK.block() not in BLOCK.exclusive:
  4316. ret.extend(BLOCK.blockin('verb'))
  4317. line = regex['1lineVerb'].sub('',line)
  4318. BLOCK.holdadd(line)
  4319. ret.extend(BLOCK.blockout())
  4320. f_lastwasblank = 0
  4321. continue
  4322. # Tables are mapped to verb when target is not table-aware
  4323. if not rules['tableable'] and regex['table'].search(line):
  4324. if not BLOCK.isblock('verb'):
  4325. ret.extend(BLOCK.blockin('verb'))
  4326. BLOCK.propset('mapped', 'table')
  4327. BLOCK.holdadd(line)
  4328. continue
  4329. #---------------------[ blank lines ]-----------------------
  4330. if regex['blankline'].search(line):
  4331. # Close open paragraph
  4332. if BLOCK.isblock('para'):
  4333. ret.extend(BLOCK.blockout())
  4334. f_lastwasblank = 1
  4335. continue
  4336. # Close all open tables
  4337. if BLOCK.isblock('table'):
  4338. ret.extend(BLOCK.blockout())
  4339. f_lastwasblank = 1
  4340. continue
  4341. # Close all open quotes
  4342. while BLOCK.isblock('quote'):
  4343. ret.extend(BLOCK.blockout())
  4344. # Closing all open lists
  4345. if f_lastwasblank: # 2nd consecutive blank
  4346. if BLOCK.block().endswith('list'):
  4347. BLOCK.holdaddsub('') # helps parser
  4348. while BLOCK.depth: # closes list (if any)
  4349. ret.extend(BLOCK.blockout())
  4350. continue # ignore consecutive blanks
  4351. # Paragraph (if any) is wanted inside lists also
  4352. if BLOCK.block().endswith('list'):
  4353. BLOCK.holdaddsub('')
  4354. f_lastwasblank = 1
  4355. continue
  4356. #---------------------[ special ]---------------------------
  4357. if regex['special'].search(line):
  4358. # Include command
  4359. targ, key, val = ConfigLines().parse_line(line, 'include', target)
  4360. if key:
  4361. Debug("Found config '%s', value '%s'" % (key, val), 1, linenr)
  4362. incpath = os.path.dirname(CONF['sourcefile'])
  4363. incfile = val
  4364. err = _('A file cannot include itself (loop!)')
  4365. if CONF['sourcefile'] == incfile:
  4366. Error("%s: %s"%(err,incfile))
  4367. inctype, inclines = get_include_contents(incfile, incpath)
  4368. # Verb, raw and tagged are easy
  4369. if inctype != 't2t':
  4370. ret.extend(BLOCK.blockin(inctype))
  4371. BLOCK.holdextend(inclines)
  4372. ret.extend(BLOCK.blockout())
  4373. else:
  4374. # Insert include lines into body
  4375. #TODO include maxdepth limit
  4376. bodylines = bodylines[:lineref] + inclines + bodylines[lineref:]
  4377. #TODO fix path if include@include
  4378. # Remove %!include call
  4379. if CONF['dump-source']:
  4380. dump_source.pop()
  4381. continue
  4382. else:
  4383. Debug('Bogus Special Line',1,linenr)
  4384. #---------------------[ dump-source ]-----------------------
  4385. # We don't need to go any further
  4386. if CONF['dump-source']:
  4387. continue
  4388. #---------------------[ Comments ]--------------------------
  4389. # Just skip them (if not macro)
  4390. if regex['comment'].search(line) and not \
  4391. regex['macros'].match(line) and not \
  4392. regex['toc'].match(line):
  4393. continue
  4394. #---------------------[ Triggers ]--------------------------
  4395. # Valid line, reset blank status
  4396. f_lastwasblank = 0
  4397. # Any NOT quote line closes all open quotes
  4398. if BLOCK.isblock('quote') and not regex['quote'].search(line):
  4399. while BLOCK.isblock('quote'):
  4400. ret.extend(BLOCK.blockout())
  4401. # Any NOT table line closes an open table
  4402. if BLOCK.isblock('table') and not regex['table'].search(line):
  4403. ret.extend(BLOCK.blockout())
  4404. #---------------------[ Horizontal Bar ]--------------------
  4405. if regex['bar'].search(line):
  4406. # Bars inside quotes are handled on the Quote processing
  4407. # Otherwise we parse the bars right here
  4408. #
  4409. if not (BLOCK.isblock('quote') or regex['quote'].search(line)) \
  4410. or (BLOCK.isblock('quote') and not rules['barinsidequote']):
  4411. # Close all the opened blocks
  4412. ret.extend(BLOCK.blockin('bar'))
  4413. # Extract the bar chars (- or =)
  4414. m = regex['bar'].search(line)
  4415. bar_chars = m.group(2)
  4416. # Process and dump the tagged bar
  4417. BLOCK.holdadd(bar_chars)
  4418. ret.extend(BLOCK.blockout())
  4419. Debug("BAR: %s"%line, 6)
  4420. # We're done, nothing more to process
  4421. continue
  4422. #---------------------[ Title ]-----------------------------
  4423. if (regex['title'].search(line) or regex['numtitle'].search(line)) \
  4424. and not BLOCK.block().endswith('list'):
  4425. if regex['title'].search(line):
  4426. name = 'title'
  4427. else:
  4428. name = 'numtitle'
  4429. # Close all the opened blocks
  4430. ret.extend(BLOCK.blockin(name))
  4431. # Process title
  4432. TITLE.add(line)
  4433. ret.extend(BLOCK.blockout())
  4434. # We're done, nothing more to process
  4435. continue
  4436. #---------------------[ %%toc ]-----------------------
  4437. # %%toc line closes paragraph
  4438. if BLOCK.block() == 'para' and regex['toc'].search(line):
  4439. ret.extend(BLOCK.blockout())
  4440. #---------------------[ apply masks ]-----------------------
  4441. line = MASK.mask(line)
  4442. #XXX from here, only block-inside lines will pass
  4443. #---------------------[ Quote ]-----------------------------
  4444. if regex['quote'].search(line):
  4445. # Store number of leading TABS
  4446. quotedepth = len(regex['quote'].search(line).group(0))
  4447. # SGML doesn't support nested quotes
  4448. if rules['quotenotnested']: quotedepth = 1
  4449. # Don't cross depth limit
  4450. maxdepth = rules['quotemaxdepth']
  4451. if maxdepth and quotedepth > maxdepth:
  4452. quotedepth = maxdepth
  4453. # New quote
  4454. if not BLOCK.isblock('quote'):
  4455. ret.extend(BLOCK.blockin('quote'))
  4456. # New subquotes
  4457. while BLOCK.depth < quotedepth:
  4458. BLOCK.blockin('quote')
  4459. # Closing quotes
  4460. while quotedepth < BLOCK.depth:
  4461. ret.extend(BLOCK.blockout())
  4462. # Bar inside quote
  4463. if regex['bar'].search(line) and rules['barinsidequote']:
  4464. tempBlock = BlockMaster()
  4465. tagged_bar = []
  4466. tagged_bar.extend(tempBlock.blockin('bar'))
  4467. tempBlock.holdadd(line)
  4468. tagged_bar.extend(tempBlock.blockout())
  4469. BLOCK.holdextend(tagged_bar)
  4470. continue
  4471. #---------------------[ Lists ]-----------------------------
  4472. # An empty item also closes the current list
  4473. if BLOCK.block().endswith('list'):
  4474. m = regex['listclose'].match(line)
  4475. if m:
  4476. listindent = m.group(1)
  4477. listtype = m.group(2)
  4478. currlisttype = BLOCK.prop('type')
  4479. currlistindent = BLOCK.prop('indent')
  4480. if listindent == currlistindent and \
  4481. listtype == currlisttype:
  4482. ret.extend(BLOCK.blockout())
  4483. continue
  4484. if regex['list'].search(line) or \
  4485. regex['numlist'].search(line) or \
  4486. regex['deflist'].search(line):
  4487. listindent = BLOCK.prop('indent')
  4488. listids = ''.join(LISTNAMES.keys())
  4489. m = re.match('^( *)([%s]) '%listids, line)
  4490. listitemindent = m.group(1)
  4491. listtype = m.group(2)
  4492. listname = LISTNAMES[listtype]
  4493. results_box = BLOCK.holdadd
  4494. # Del list ID (and separate term from definition)
  4495. if listname == 'deflist':
  4496. term = parse_deflist_term(line)
  4497. line = regex['deflist'].sub(
  4498. SEPARATOR+term+SEPARATOR,line)
  4499. else:
  4500. line = regex[listname].sub(SEPARATOR,line)
  4501. # Don't cross depth limit
  4502. maxdepth = rules['listmaxdepth']
  4503. if maxdepth and BLOCK.depth == maxdepth:
  4504. if len(listitemindent) > len(listindent):
  4505. listitemindent = listindent
  4506. # List bumping (same indent, diff mark)
  4507. # Close the currently open list to clear the mess
  4508. if BLOCK.block().endswith('list') \
  4509. and listname != BLOCK.block() \
  4510. and len(listitemindent) == len(listindent):
  4511. ret.extend(BLOCK.blockout())
  4512. listindent = BLOCK.prop('indent')
  4513. # Open mother list or sublist
  4514. if not BLOCK.block().endswith('list') or \
  4515. len(listitemindent) > len(listindent):
  4516. ret.extend(BLOCK.blockin(listname))
  4517. BLOCK.propset('indent',listitemindent)
  4518. BLOCK.propset('type',listtype)
  4519. # Closing sublists
  4520. while len(listitemindent) < len(BLOCK.prop('indent')):
  4521. ret.extend(BLOCK.blockout())
  4522. # O-oh, sublist before list ("\n\n - foo\n- foo")
  4523. # Fix: close sublist (as mother), open another list
  4524. if not BLOCK.block().endswith('list'):
  4525. ret.extend(BLOCK.blockin(listname))
  4526. BLOCK.propset('indent',listitemindent)
  4527. BLOCK.propset('type',listtype)
  4528. #---------------------[ Table ]-----------------------------
  4529. #TODO escape undesired format inside table
  4530. #TODO add pm6 target
  4531. if regex['table'].search(line):
  4532. if not BLOCK.isblock('table'): # first table line!
  4533. ret.extend(BLOCK.blockin('table'))
  4534. BLOCK.tableparser.__init__(line)
  4535. tablerow = TableMaster().parse_row(line)
  4536. BLOCK.tableparser.add_row(tablerow) # save config
  4537. # Maintain line to unmask and inlines
  4538. # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
  4539. # TODO isolate unmask+inlines parsing to use here
  4540. line = SEPARATOR.join(tablerow['cells'])
  4541. #---------------------[ Paragraph ]-------------------------
  4542. if not BLOCK.block() and \
  4543. not line.count(MASK.tocmask): # new para!
  4544. ret.extend(BLOCK.blockin('para'))
  4545. ############################################################
  4546. ############################################################
  4547. ############################################################
  4548. #---------------------[ Final Parses ]----------------------
  4549. # The target-specific special char escapes for body lines
  4550. line = doEscape(target,line)
  4551. line = add_inline_tags(line)
  4552. line = MASK.undo(line)
  4553. #---------------------[ Hold or Return? ]-------------------
  4554. ### Now we must choose where to put the parsed line
  4555. #
  4556. if not results_box:
  4557. # List item extra lines
  4558. if BLOCK.block().endswith('list'):
  4559. results_box = BLOCK.holdaddsub
  4560. # Other blocks
  4561. elif BLOCK.block():
  4562. results_box = BLOCK.holdadd
  4563. # No blocks
  4564. else:
  4565. line = doFinalEscape(target, line)
  4566. results_box = ret.append
  4567. results_box(line)
  4568. # EOF: close any open para/verb/lists/table/quotes
  4569. Debug('EOF',7)
  4570. while BLOCK.block():
  4571. ret.extend(BLOCK.blockout())
  4572. # Maybe close some opened title area?
  4573. if rules['titleblocks']:
  4574. ret.extend(TITLE.close_all())
  4575. # Maybe a major tag to enclose body? (like DIV for CSS)
  4576. if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
  4577. if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
  4578. if CONF['toc-only']: ret = []
  4579. marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
  4580. # If dump-source, all parsing is ignored
  4581. if CONF['dump-source']: ret = dump_source[:]
  4582. return ret, marked_toc
  4583. ##############################################################################
  4584. ################################### GUI ######################################
  4585. ##############################################################################
  4586. #
  4587. # Tk help: http://python.org/topics/tkinter/
  4588. # Tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
  4589. # /usr/lib/python*/lib-tk/Tkinter.py
  4590. #
  4591. # grid table : row=0, column=0, columnspan=2, rowspan=2
  4592. # grid align : sticky='n,s,e,w' (North, South, East, West)
  4593. # pack place : side='top,bottom,right,left'
  4594. # pack fill : fill='x,y,both,none', expand=1
  4595. # pack align : anchor='n,s,e,w' (North, South, East, West)
  4596. # padding : padx=10, pady=10, ipadx=10, ipady=10 (internal)
  4597. # checkbox : offvalue is return if the _user_ deselected the box
  4598. # label align: justify=left,right,center
  4599. def load_GUI_resources():
  4600. "Load all extra modules and methods used by GUI"
  4601. global askopenfilename, showinfo, showwarning, showerror, Tkinter
  4602. from tkFileDialog import askopenfilename
  4603. from tkMessageBox import showinfo,showwarning,showerror
  4604. import Tkinter
  4605. class Gui:
  4606. "Graphical Tk Interface"
  4607. def __init__(self, conf={}):
  4608. self.root = Tkinter.Tk() # mother window, come to butthead
  4609. self.root.title(my_name) # window title bar text
  4610. self.window = self.root # variable "focus" for inclusion
  4611. self.row = 0 # row count for grid()
  4612. self.action_length = 150 # left column length (pixel)
  4613. self.frame_margin = 10 # frame margin size (pixel)
  4614. self.frame_border = 6 # frame border size (pixel)
  4615. # The default Gui colors, can be changed by %!guicolors
  4616. self.dft_gui_colors = ['#6c6','white','#cf9','#030']
  4617. self.gui_colors = []
  4618. self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
  4619. # On Tk, vars need to be set/get using setvar()/get()
  4620. self.infile = self.setvar('')
  4621. self.target = self.setvar('')
  4622. self.target_name = self.setvar('')
  4623. # The checks appearance order
  4624. self.checks = [
  4625. 'headers', 'enum-title', 'toc', 'mask-email', 'toc-only', 'stdout'
  4626. ]
  4627. # Creating variables for all checks
  4628. for check in self.checks:
  4629. setattr(self, 'f_'+check, self.setvar(''))
  4630. # Load RC config
  4631. self.conf = {}
  4632. if conf: self.load_config(conf)
  4633. def load_config(self, conf):
  4634. self.conf = conf
  4635. self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
  4636. self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
  4637. self.root.config(bd=15,bg=self.bg1)
  4638. ### Config as dic for python 1.5 compat (**opts don't work :( )
  4639. def entry(self, **opts): return Tkinter.Entry(self.window, opts)
  4640. def label(self, txt='', bg=None, **opts):
  4641. opts.update({'text':txt,'bg':bg or self.bg1})
  4642. return Tkinter.Label(self.window, opts)
  4643. def button(self,name,cmd,**opts):
  4644. opts.update({'text':name,'command':cmd})
  4645. return Tkinter.Button(self.window, opts)
  4646. def check(self,name,checked=0,**opts):
  4647. bg, fg = self.bg2, self.fg2
  4648. opts.update({
  4649. 'text':name,
  4650. 'onvalue':1,
  4651. 'offvalue':0,
  4652. 'activeforeground':fg,
  4653. 'activebackground':bg,
  4654. 'highlightbackground':bg,
  4655. 'fg':fg,
  4656. 'bg':bg,
  4657. 'anchor':'w'
  4658. })
  4659. chk = Tkinter.Checkbutton(self.window, opts)
  4660. if checked: chk.select()
  4661. chk.grid(columnspan=2, sticky='w', padx=0)
  4662. def menu(self,sel,items):
  4663. return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
  4664. # Handy auxiliary functions
  4665. def action(self, txt):
  4666. self.label(
  4667. txt,
  4668. fg=self.fg1,
  4669. bg=self.bg1,
  4670. wraplength=self.action_length).grid(column=0,row=self.row)
  4671. def frame_open(self):
  4672. self.window = Tkinter.Frame(
  4673. self.root,
  4674. bg=self.bg2,
  4675. borderwidth=self.frame_border)
  4676. def frame_close(self):
  4677. self.window.grid(
  4678. column=1,
  4679. row=self.row,
  4680. sticky='w',
  4681. padx=self.frame_margin)
  4682. self.window = self.root
  4683. self.label('').grid()
  4684. self.row += 2 # update row count
  4685. def target_name2key(self):
  4686. name = self.target_name.get()
  4687. target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
  4688. try : key = target[0]
  4689. except: key = ''
  4690. self.target = self.setvar(key)
  4691. def target_key2name(self):
  4692. key = self.target.get()
  4693. name = TARGET_NAMES.get(key) or key
  4694. self.target_name = self.setvar(name)
  4695. def exit(self): self.root.destroy()
  4696. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  4697. def askfile(self):
  4698. ftypes= [(_('txt2tags files'), ('*.t2t','*.txt')), (_('All files'),'*')]
  4699. newfile = askopenfilename(filetypes=ftypes)
  4700. if newfile:
  4701. self.infile.set(newfile)
  4702. newconf = process_source_file(newfile)[0]
  4703. newconf = ConfigMaster().sanity(newconf, gui=1)
  4704. # Restate all checkboxes after file selection
  4705. #TODO how to make a refresh without killing it?
  4706. self.root.destroy()
  4707. self.__init__(newconf)
  4708. self.mainwindow()
  4709. def scrollwindow(self, txt='no text!', title=''):
  4710. # Create components
  4711. win = Tkinter.Toplevel() ; win.title(title)
  4712. frame = Tkinter.Frame(win)
  4713. scroll = Tkinter.Scrollbar(frame)
  4714. text = Tkinter.Text(frame,yscrollcommand=scroll.set)
  4715. button = Tkinter.Button(win)
  4716. # Config
  4717. text.insert(Tkinter.END, '\n'.join(txt))
  4718. scroll.config(command=text.yview)
  4719. button.config(text=_('Close'), command=win.destroy)
  4720. button.focus_set()
  4721. # Packing
  4722. text.pack(side='left', fill='both', expand=1)
  4723. scroll.pack(side='right', fill='y')
  4724. frame.pack(fill='both', expand=1)
  4725. button.pack(ipadx=30)
  4726. def runprogram(self):
  4727. global CMDLINE_RAW
  4728. # Prepare
  4729. self.target_name2key()
  4730. infile, target = self.infile.get(), self.target.get()
  4731. # Sanity
  4732. if not target:
  4733. showwarning(my_name,_("You must select a target type!"))
  4734. return
  4735. if not infile:
  4736. showwarning(my_name,_("You must provide the source file location!"))
  4737. return
  4738. # Compose cmdline
  4739. guiflags = []
  4740. real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
  4741. if real_cmdline_conf.has_key('infile'):
  4742. del real_cmdline_conf['infile']
  4743. if real_cmdline_conf.has_key('target'):
  4744. del real_cmdline_conf['target']
  4745. real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
  4746. default_outfile = ConfigMaster().get_outfile_name(
  4747. {'sourcefile':infile, 'outfile':'', 'target':target})
  4748. for opt in self.checks:
  4749. val = int(getattr(self, 'f_%s'%opt).get() or "0")
  4750. if opt == 'stdout': opt = 'outfile'
  4751. on_config = self.conf.get(opt) or 0
  4752. on_cmdline = real_cmdline_conf.get(opt) or 0
  4753. if opt == 'outfile':
  4754. if on_config == STDOUT: on_config = 1
  4755. else: on_config = 0
  4756. if on_cmdline == STDOUT: on_cmdline = 1
  4757. else: on_cmdline = 0
  4758. if val != on_config or (
  4759. val == on_config == on_cmdline and
  4760. real_cmdline_conf.has_key(opt)):
  4761. if val:
  4762. # Was not set, but user selected on GUI
  4763. Debug("user turned ON: %s"%opt)
  4764. if opt == 'outfile': opt = '-o-'
  4765. else: opt = '--%s'%opt
  4766. else:
  4767. # Was set, but user deselected on GUI
  4768. Debug("user turned OFF: %s"%opt)
  4769. if opt == 'outfile':
  4770. opt = "-o%s"%default_outfile
  4771. else: opt = '--no-%s'%opt
  4772. guiflags.append(opt)
  4773. cmdline = [my_name, '-t', target] + real_cmdline + guiflags + [infile]
  4774. Debug('Gui/Tk cmdline: %s' % cmdline, 5)
  4775. # Run!
  4776. cmdline_raw_orig = CMDLINE_RAW
  4777. try:
  4778. # Fake the GUI cmdline as the real one, and parse file
  4779. CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
  4780. data = process_source_file(infile)
  4781. # On GUI, convert_* returns the data, not finish_him()
  4782. outlist, config = convert_this_files([data])
  4783. # On GUI and STDOUT, finish_him() returns the data
  4784. result = finish_him(outlist, config)
  4785. # Show outlist in s a nice new window
  4786. if result:
  4787. outlist, config = result
  4788. title = _('%s: %s converted to %s') % (
  4789. my_name,
  4790. os.path.basename(infile),
  4791. config['target'].upper())
  4792. self.scrollwindow(outlist, title)
  4793. # Show the "file saved" message
  4794. else:
  4795. msg = "%s\n\n %s\n%s\n\n %s\n%s"%(
  4796. _('Conversion done!'),
  4797. _('FROM:'), infile,
  4798. _('TO:'), config['outfile'])
  4799. showinfo(my_name, msg)
  4800. except error: # common error (windowed), not quit
  4801. pass
  4802. except: # fatal error (windowed and printed)
  4803. errormsg = getUnknownErrorMessage()
  4804. print errormsg
  4805. showerror(_('%s FATAL ERROR!')%my_name,errormsg)
  4806. self.exit()
  4807. CMDLINE_RAW = cmdline_raw_orig
  4808. def mainwindow(self):
  4809. self.infile.set(self.conf.get('sourcefile') or '')
  4810. self.target.set(self.conf.get('target') or _('-- select one --'))
  4811. outfile = self.conf.get('outfile')
  4812. if outfile == STDOUT: # map -o-
  4813. self.conf['stdout'] = 1
  4814. if self.conf.get('headers') == None:
  4815. self.conf['headers'] = 1 # map default
  4816. action1 = _("Enter the source file location:")
  4817. action2 = _("Choose the target document type:")
  4818. action3 = _("Some options you may check:")
  4819. action4 = _("Some extra options:")
  4820. checks_txt = {
  4821. 'headers' : _("Include headers on output"),
  4822. 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
  4823. 'toc' : _("Do TOC also (Table of Contents)"),
  4824. 'mask-email': _("Hide e-mails from SPAM robots"),
  4825. 'toc-only' : _("Just do TOC, nothing more"),
  4826. 'stdout' : _("Dump to screen (Don't save target file)")
  4827. }
  4828. targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
  4829. # Header
  4830. self.label("%s %s"%(my_name.upper(), my_version),
  4831. bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
  4832. self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
  4833. bg=self.bg1, fg=self.fg1).grid(columnspan=2)
  4834. self.row = 2
  4835. # Choose input file
  4836. self.action(action1) ; self.frame_open()
  4837. e_infile = self.entry(textvariable=self.infile,width=25)
  4838. e_infile.grid(row=self.row, column=0, sticky='e')
  4839. if not self.infile.get(): e_infile.focus_set()
  4840. self.button(_("Browse"), self.askfile).grid(
  4841. row=self.row, column=1, sticky='w', padx=10)
  4842. # Show outfile name, style and encoding (if any)
  4843. txt = ''
  4844. if outfile:
  4845. txt = outfile
  4846. if outfile == STDOUT: txt = _('<screen>')
  4847. l_output = self.label(_('Output: ')+txt, fg=self.fg2, bg=self.bg2)
  4848. l_output.grid(columnspan=2, sticky='w')
  4849. for setting in ['style','encoding']:
  4850. if self.conf.get(setting):
  4851. name = setting.capitalize()
  4852. val = self.conf[setting]
  4853. self.label('%s: %s'%(name, val),
  4854. fg=self.fg2, bg=self.bg2).grid(
  4855. columnspan=2, sticky='w')
  4856. # Choose target
  4857. self.frame_close() ; self.action(action2)
  4858. self.frame_open()
  4859. self.target_key2name()
  4860. self.menu(self.target_name, targets_menu).grid(
  4861. columnspan=2, sticky='w')
  4862. # Options checkboxes label
  4863. self.frame_close() ; self.action(action3)
  4864. self.frame_open()
  4865. # Compose options check boxes, example:
  4866. # self.check(checks_txt['toc'],1,variable=self.f_toc)
  4867. for check in self.checks:
  4868. # Extra options label
  4869. if check == 'toc-only':
  4870. self.frame_close() ; self.action(action4)
  4871. self.frame_open()
  4872. txt = checks_txt[check]
  4873. var = getattr(self, 'f_'+check)
  4874. checked = self.conf.get(check)
  4875. self.check(txt,checked,variable=var)
  4876. self.frame_close()
  4877. # Spacer and buttons
  4878. self.label('').grid() ; self.row += 1
  4879. b_quit = self.button(_("Quit"), self.exit)
  4880. b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
  4881. b_conv = self.button(_("Convert!"), self.runprogram)
  4882. b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
  4883. if self.target.get() and self.infile.get():
  4884. b_conv.focus_set()
  4885. # As documentation told me
  4886. if sys.platform.startswith('win'):
  4887. self.root.iconify()
  4888. self.root.update()
  4889. self.root.deiconify()
  4890. self.root.mainloop()
  4891. ##############################################################################
  4892. ##############################################################################
  4893. def exec_command_line(user_cmdline=[]):
  4894. global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, AA_CHARS, AA_LCHARS, Error
  4895. # Extract command line data
  4896. cmdline_data = user_cmdline or sys.argv[1:]
  4897. CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
  4898. cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
  4899. DEBUG = cmdline_parsed.get('debug' ) or 0
  4900. VERBOSE = cmdline_parsed.get('verbose') or 0
  4901. QUIET = cmdline_parsed.get('quiet' ) or 0
  4902. GUI = cmdline_parsed.get('gui' ) or 0
  4903. infiles = cmdline_parsed.get('infile' ) or []
  4904. if 'ascii-art' in cmdline_parsed:
  4905. lascii = cmdline_parsed['ascii-art']
  4906. diff = len(AA_LCHARS)-len(lascii)
  4907. if not diff:
  4908. AA_CHARS = dict(zip(AA_LCHARS, lascii))
  4909. else:
  4910. Error(_("%i char(s) to the right number")%diff)
  4911. Message(_("Txt2tags %s processing begins")%my_version,1)
  4912. # The easy ones
  4913. if cmdline_parsed.get('help' ): Quit(USAGE)
  4914. if cmdline_parsed.get('version'): Quit(VERSIONSTR)
  4915. # Multifile haters
  4916. if len(infiles) > 1:
  4917. errmsg=_("Option --%s can't be used with multiple input files")
  4918. for option in NO_MULTI_INPUT:
  4919. if cmdline_parsed.get(option):
  4920. Error(errmsg%option)
  4921. Debug("system platform: %s"%sys.platform)
  4922. Debug("python version: %s"%(sys.version.split('(')[0]))
  4923. Debug("line break char: %s"%repr(LB))
  4924. Debug("command line: %s"%sys.argv)
  4925. Debug("command line raw config: %s"%CMDLINE_RAW,1)
  4926. # Extract RC file config
  4927. if cmdline_parsed.get('rc') == 0:
  4928. Message(_("Ignoring user configuration file"),1)
  4929. else:
  4930. rc_file = get_rc_path()
  4931. if os.path.isfile(rc_file):
  4932. Message(_("Loading user configuration file"),1)
  4933. RC_RAW = ConfigLines(file=rc_file).get_raw_config()
  4934. Debug("rc file: %s"%rc_file)
  4935. Debug("rc file raw config: %s"%RC_RAW,1)
  4936. # Get all infiles config (if any)
  4937. infiles_config = get_infiles_config(infiles)
  4938. # Is GUI available?
  4939. # Try to load and start GUI interface for --gui
  4940. # If program was called with no arguments, try GUI also
  4941. if GUI or not infiles:
  4942. try:
  4943. load_GUI_resources()
  4944. Debug("GUI resources OK (Tk module is installed)")
  4945. winbox = Gui()
  4946. Debug("GUI display OK")
  4947. GUI = 1
  4948. except:
  4949. Debug("GUI Error: no Tk module or no DISPLAY")
  4950. GUI = 0
  4951. # User forced --gui, but it's not available
  4952. if cmdline_parsed.get('gui') and not GUI:
  4953. print getTraceback(); print
  4954. Error(
  4955. "Sorry, I can't run my Graphical Interface - GUI\n"
  4956. "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
  4957. "- Make sure you are in a graphical environment (like X)")
  4958. # Okay, we will use GUI
  4959. if GUI:
  4960. Message(_("We are on GUI interface"),1)
  4961. # Redefine Error function to raise exception instead sys.exit()
  4962. def Error(msg):
  4963. showerror(_('txt2tags ERROR!'), msg)
  4964. raise error
  4965. # If no input file, get RC+cmdline config, else full config
  4966. if not infiles:
  4967. gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
  4968. else:
  4969. try : gui_conf = infiles_config[0][0]
  4970. except: gui_conf = {}
  4971. # Sanity is needed to set outfile and other things
  4972. gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
  4973. Debug("GUI config: %s"%gui_conf,5)
  4974. # Insert config and populate the nice window!
  4975. winbox.load_config(gui_conf)
  4976. winbox.mainwindow()
  4977. # Console mode rocks forever!
  4978. else:
  4979. Message(_("We are on Command Line interface"),1)
  4980. # Called with no arguments, show error
  4981. if not infiles: Error(_('Missing input file (try --help)'))
  4982. convert_this_files(infiles_config)
  4983. Message(_("Txt2tags finished sucessfuly"),1)
  4984. if __name__ == '__main__':
  4985. try:
  4986. exec_command_line()
  4987. except error, msg:
  4988. sys.stderr.write("%s\n"%msg)
  4989. sys.stderr.flush()
  4990. sys.exit(1)
  4991. except SystemExit:
  4992. pass
  4993. except:
  4994. sys.stderr.write(getUnknownErrorMessage())
  4995. sys.stderr.flush()
  4996. sys.exit(1)
  4997. Quit()
  4998. # The End.