PageRenderTime 98ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/old/txt2tags-2.5.py

http://txt2tags.googlecode.com/
Python | 4991 lines | 4488 code | 195 blank | 308 comment | 203 complexity | 4eaee5f18ce5088297939ff6fb92a688 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Aurelio Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. ########################################################################
  20. #
  21. # BORING CODE EXPLANATION AHEAD
  22. #
  23. # Just read it if you wish to understand how the txt2tags code works.
  24. #
  25. ########################################################################
  26. #
  27. # The code that [1] parses the marked text is separated from the
  28. # code that [2] insert the target tags.
  29. #
  30. # [1] made by: def convert()
  31. # [2] made by: class BlockMaster
  32. #
  33. # The structures of the marked text are identified and its contents are
  34. # extracted into a data holder (Python lists and dictionaries).
  35. #
  36. # When parsing the source file, the blocks (para, lists, quote, table)
  37. # are opened with BlockMaster, right when found. Then its contents,
  38. # which spans on several lines, are feeded into a special holder on the
  39. # BlockMaster instance. Just when the block is closed, the target tags
  40. # are inserted for the full block as a whole, in one pass. This way, we
  41. # have a better control on blocks. Much better than the previous line by
  42. # line approach.
  43. #
  44. # In other words, whenever inside a block, the parser *holds* the tag
  45. # insertion process, waiting until the full block is read. That was
  46. # needed primary to close paragraphs for the XHTML target, but
  47. # proved to be a very good adding, improving many other processing.
  48. #
  49. # -------------------------------------------------------------------
  50. #
  51. # These important classes are all documented:
  52. # CommandLine, SourceDocument, ConfigMaster, ConfigLines.
  53. #
  54. # There is a RAW Config format and all kind of configuration is first
  55. # converted to this format. Then a generic method parses it.
  56. #
  57. # These functions get information about the input file(s) and take
  58. # care of the init processing:
  59. # get_infiles_config(), process_source_file() and convert_this_files()
  60. #
  61. ########################################################################
  62. #XXX Python coding warning
  63. # Avoid common mistakes:
  64. # - do NOT use newlist=list instead newlist=list[:]
  65. # - do NOT use newdic=dic instead newdic=dic.copy()
  66. # - do NOT use dic[key] instead dic.get(key)
  67. # - do NOT use del dic[key] without has_key() before
  68. #XXX Smart Image Align don't work if the image is a link
  69. # Can't fix that because the image is expanded together with the
  70. # link, at the linkbank filling moment. Only the image is passed
  71. # to parse_images(), not the full line, so it is always 'middle'.
  72. #XXX Paragraph separation not valid inside Quote
  73. # Quote will not have <p></p> inside, instead will close and open
  74. # again the <blockquote>. This really sux in CSS, when defining a
  75. # different background color. Still don't know how to fix it.
  76. #XXX TODO (maybe)
  77. # New mark or macro which expands to an anchor full title.
  78. # It is necessary to parse the full document in this order:
  79. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  80. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  81. # 3rd scan: BODY: read titles and compose TOC info
  82. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  83. # Steps 2 and 3 can be made together, with no tag adding.
  84. # Two complete body scans will be *slow*, don't know if it worths.
  85. # One solution may be add the titles as postproc rules
  86. ##############################################################################
  87. # User config (1=ON, 0=OFF)
  88. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  89. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  90. BG_LIGHT = 0 # your terminal background color is light (default is 0)
  91. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  92. ##############################################################################
  93. # These are all the core Python modules used by txt2tags (KISS!)
  94. import re, string, os, sys, time, getopt
  95. # Program information
  96. my_url = 'http://txt2tags.sf.net'
  97. my_name = 'txt2tags'
  98. my_email = 'verde@aurelio.net'
  99. my_version = '2.5'
  100. # i18n - just use if available
  101. if USE_I18N:
  102. try:
  103. import gettext
  104. # If your locale dir is different, change it here
  105. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  106. _ = cat.gettext
  107. except:
  108. _ = lambda x:x
  109. else:
  110. _ = lambda x:x
  111. # FLAGS : the conversion related flags , may be used in %!options
  112. # OPTIONS : the conversion related options, may be used in %!options
  113. # ACTIONS : the other behavior modifiers, valid on command line only
  114. # MACROS : the valid macros with their default values for formatting
  115. # SETTINGS: global miscellaneous settings, valid on RC file only
  116. # NO_TARGET: actions that don't require a target specification
  117. # NO_MULTI_INPUT: actions that don't accept more than one input file
  118. # CONFIG_KEYWORDS: the valid %!key:val keywords
  119. #
  120. # FLAGS and OPTIONS are configs that affect the converted document.
  121. # They usually have also a --no-<option> to turn them OFF.
  122. #
  123. # ACTIONS are needed because when doing multiple input files, strange
  124. # behavior would be found, as use command line interface for the
  125. # first file and gui for the second. There is no --no-<action>.
  126. # --version and --help inside %!options are also odd
  127. #
  128. TARGETS = 'html xhtml sgml tex lout man mgp wiki gwiki doku moin pm6 txt'.split()
  129. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  130. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  131. 'css-sugar' :0 , 'css-suggar' :0 , 'css-inside' :0 ,
  132. 'quiet' :0 }
  133. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  134. 'infile' :'', 'outfile' :'', 'encoding' :'',
  135. 'config-file':'', 'split' :0 , 'lang' :'',
  136. 'show-config-value':'' }
  137. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  138. 'verbose' :0 , 'debug' :0 , 'dump-config':0 ,
  139. 'dump-source':0 }
  140. MACROS = {'date' : '%Y%m%d', 'infile': '%f',
  141. 'mtime': '%Y%m%d', 'outfile': '%f'}
  142. SETTINGS = {} # for future use
  143. NO_TARGET = ['help', 'version', 'gui', 'toc-only', 'dump-config', 'dump-source']
  144. NO_MULTI_INPUT = ['gui','dump-config','dump-source']
  145. CONFIG_KEYWORDS = [
  146. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  147. 'guicolors']
  148. TARGET_NAMES = {
  149. 'html' : _('HTML page'),
  150. 'xhtml': _('XHTML page'),
  151. 'sgml' : _('SGML document'),
  152. 'tex' : _('LaTeX document'),
  153. 'lout' : _('Lout document'),
  154. 'man' : _('UNIX Manual page'),
  155. 'mgp' : _('MagicPoint presentation'),
  156. 'wiki' : _('Wikipedia page'),
  157. 'gwiki': _('Google Wiki page'),
  158. 'doku' : _('DokuWiki page'),
  159. 'moin' : _('MoinMoin page'),
  160. 'pm6' : _('PageMaker document'),
  161. 'txt' : _('Plain Text'),
  162. }
  163. DEBUG = 0 # do not edit here, please use --debug
  164. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  165. QUIET = 0 # do not edit here, please use --quiet
  166. GUI = 0 # do not edit here, please use --gui
  167. AUTOTOC = 1 # do not edit here, please use --no-toc or %%toc
  168. RC_RAW = []
  169. CMDLINE_RAW = []
  170. CONF = {}
  171. BLOCK = None
  172. regex = {}
  173. TAGS = {}
  174. rules = {}
  175. lang = 'english'
  176. TARGET = ''
  177. STDIN = STDOUT = '-'
  178. MODULEIN = MODULEOUT = '-module-'
  179. ESCCHAR = '\x00'
  180. SEPARATOR = '\x01'
  181. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  182. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  183. # Platform specific settings
  184. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  185. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  186. USAGE = string.join([
  187. '',
  188. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  189. '',
  190. _(" -t, --target=TYPE set target document type. currently supported:"),
  191. ' %s,' % string.join(TARGETS[:8], ', '),
  192. ' %s' % string.join(TARGETS[8:], ', '),
  193. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  194. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  195. _(" -H, --no-headers suppress header, title and footer contents"),
  196. _(" --headers show header, title and footer contents (default ON)"),
  197. _(" --encoding=ENC set target file encoding (utf-8, iso-8859-1, etc)"),
  198. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  199. _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
  200. _(" --css-inside insert CSS file contents inside HTML/XHTML headers"),
  201. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  202. _(" --toc add TOC (Table of Contents) to target document"),
  203. _(" --toc-only print document TOC and exit"),
  204. _(" --toc-level=N set maximum TOC level (depth) to N"),
  205. _(" -n, --enum-title enumerate all titles as 1, 1.1, 1.1.1, etc"),
  206. _(" -C, --config-file=F read config from file F"),
  207. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  208. _(" --gui invoke Graphical Tk Interface"),
  209. _(" -q, --quiet quiet mode, suppress all output (except errors)"),
  210. _(" -v, --verbose print informative messages during conversion"),
  211. _(" -h, --help print this help information and exit"),
  212. _(" -V, --version print program version and exit"),
  213. _(" --dump-config print all the config found and exit"),
  214. _(" --dump-source print the document source, with includes expanded"),
  215. '',
  216. _("Turn OFF options:"),
  217. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  218. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  219. " --no-css-sugar, --no-css-inside, --no-quiet, --no-dump-config",
  220. " --no-dump-source",
  221. '',
  222. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  223. '',
  224. _("By default, converted output is saved to 'infile.<target>'."),
  225. _("Use --outfile to force an output file name."),
  226. _("If input file is '-', reads from STDIN."),
  227. _("If output file is '-', dumps output to STDOUT."),
  228. '',
  229. 'http://txt2tags.sourceforge.net',
  230. ''
  231. ], '\n')
  232. ##############################################################################
  233. # Here is all the target's templates
  234. # You may edit them to fit your needs
  235. # - the %(HEADERn)s strings represent the Header lines
  236. # - the %(STYLE)s string is changed by --style contents
  237. # - the %(ENCODING)s string is changed by --encoding contents
  238. # - if any of the above is empty, the full line is removed
  239. # - use %% to represent a literal %
  240. #
  241. HEADER_TEMPLATE = {
  242. 'txt': """\
  243. %(HEADER1)s
  244. %(HEADER2)s
  245. %(HEADER3)s
  246. """,
  247. 'sgml': """\
  248. <!doctype linuxdoc system>
  249. <article>
  250. <title>%(HEADER1)s
  251. <author>%(HEADER2)s
  252. <date>%(HEADER3)s
  253. """,
  254. 'html': """\
  255. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  256. <HTML>
  257. <HEAD>
  258. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  259. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  260. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  261. <TITLE>%(HEADER1)s</TITLE>
  262. </HEAD><BODY BGCOLOR="white" TEXT="black">
  263. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  264. <FONT SIZE="4">
  265. <I>%(HEADER2)s</I><BR>
  266. %(HEADER3)s
  267. </FONT></CENTER>
  268. """,
  269. 'htmlcss': """\
  270. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  271. <HTML>
  272. <HEAD>
  273. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  274. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  275. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  276. <TITLE>%(HEADER1)s</TITLE>
  277. </HEAD>
  278. <BODY>
  279. <DIV CLASS="header" ID="header">
  280. <H1>%(HEADER1)s</H1>
  281. <H2>%(HEADER2)s</H2>
  282. <H3>%(HEADER3)s</H3>
  283. </DIV>
  284. """,
  285. 'xhtml': """\
  286. <?xml version="1.0"
  287. encoding="%(ENCODING)s"
  288. ?>
  289. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  290. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  291. <html xmlns="http://www.w3.org/1999/xhtml">
  292. <head>
  293. <title>%(HEADER1)s</title>
  294. <meta name="generator" content="http://txt2tags.sf.net" />
  295. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  296. </head>
  297. <body bgcolor="white" text="black">
  298. <div align="center">
  299. <h1>%(HEADER1)s</h1>
  300. <h2>%(HEADER2)s</h2>
  301. <h3>%(HEADER3)s</h3>
  302. </div>
  303. """,
  304. 'xhtmlcss': """\
  305. <?xml version="1.0"
  306. encoding="%(ENCODING)s"
  307. ?>
  308. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  309. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  310. <html xmlns="http://www.w3.org/1999/xhtml">
  311. <head>
  312. <title>%(HEADER1)s</title>
  313. <meta name="generator" content="http://txt2tags.sf.net" />
  314. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  315. </head>
  316. <body>
  317. <div class="header" id="header">
  318. <h1>%(HEADER1)s</h1>
  319. <h2>%(HEADER2)s</h2>
  320. <h3>%(HEADER3)s</h3>
  321. </div>
  322. """,
  323. 'man': """\
  324. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  325. """,
  326. # TODO style to <HR>
  327. 'pm6': """\
  328. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  329. ><@Normal=
  330. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  331. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  332. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  333. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  334. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  335. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  336. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  337. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  338. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  339. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  340. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  341. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  342. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  343. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  344. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  345. ><@Title4=<@-PARENT "Title3">
  346. ><@Title5=<@-PARENT "Title3">
  347. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  348. %(HEADER1)s
  349. %(HEADER2)s
  350. %(HEADER3)s
  351. """,
  352. 'mgp': """\
  353. #!/usr/X11R6/bin/mgp -t 90
  354. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  355. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  356. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  357. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  358. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  359. %%default 1 size 5
  360. %%default 2 size 8, fore "yellow", font "normal-b", center
  361. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  362. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  363. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  364. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  365. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  366. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  367. %%%%------------------------- end of headers -----------------------------
  368. %%page
  369. %%size 10, center, fore "yellow"
  370. %(HEADER1)s
  371. %%font "normal-i", size 6, fore "white", center
  372. %(HEADER2)s
  373. %%font "mono", size 7, center
  374. %(HEADER3)s
  375. """,
  376. 'moin': """\
  377. '''%(HEADER1)s'''
  378. ''%(HEADER2)s''
  379. %(HEADER3)s
  380. """,
  381. 'gwiki': """\
  382. *%(HEADER1)s*
  383. %(HEADER2)s
  384. _%(HEADER3)s_
  385. """,
  386. 'doku': """\
  387. ===== %(HEADER1)s =====
  388. **//%(HEADER2)s//**
  389. //%(HEADER3)s//
  390. """,
  391. 'wiki': """\
  392. '''%(HEADER1)s'''
  393. %(HEADER2)s
  394. ''%(HEADER3)s''
  395. """,
  396. 'tex': \
  397. r"""\documentclass{article}
  398. \usepackage{graphicx}
  399. \usepackage[normalem]{ulem} %% needed by strike
  400. \usepackage[urlcolor=blue,colorlinks=true]{hyperref}
  401. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  402. \usepackage{%(STYLE)s} %% user defined
  403. \title{%(HEADER1)s}
  404. \author{%(HEADER2)s}
  405. \begin{document}
  406. \date{%(HEADER3)s}
  407. \maketitle
  408. \clearpage
  409. """,
  410. 'lout': """\
  411. @SysInclude { doc }
  412. @Document
  413. @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ...
  414. @PageOrientation { Portrait } # Portrait, Landscape
  415. @ColumnNumber { 1 } # Number of columns (2, 3, ...)
  416. @PageHeaders { Simple } # None, Simple, Titles, NoTitles
  417. @InitialLanguage { English } # German, French, Portuguese, ...
  418. @OptimizePages { Yes } # Yes/No smart page break feature
  419. //
  420. @Text @Begin
  421. @Display @Heading { %(HEADER1)s }
  422. @Display @I { %(HEADER2)s }
  423. @Display { %(HEADER3)s }
  424. #@NP # Break page after Headers
  425. """
  426. # @SysInclude { tbl } # Tables support
  427. # setup: @MakeContents { Yes } # show TOC
  428. # setup: @SectionGap # break page at each section
  429. }
  430. ##############################################################################
  431. def getTags(config):
  432. "Returns all the known tags for the specified target"
  433. keys = """
  434. title1 numtitle1
  435. title2 numtitle2
  436. title3 numtitle3
  437. title4 numtitle4
  438. title5 numtitle5
  439. title1Open title1Close
  440. title2Open title2Close
  441. title3Open title3Close
  442. title4Open title4Close
  443. title5Open title5Close
  444. blocktitle1Open blocktitle1Close
  445. blocktitle2Open blocktitle2Close
  446. blocktitle3Open blocktitle3Close
  447. paragraphOpen paragraphClose
  448. blockVerbOpen blockVerbClose
  449. blockQuoteOpen blockQuoteClose blockQuoteLine
  450. blockCommentOpen blockCommentClose
  451. fontMonoOpen fontMonoClose
  452. fontBoldOpen fontBoldClose
  453. fontItalicOpen fontItalicClose
  454. fontUnderlineOpen fontUnderlineClose
  455. fontStrikeOpen fontStrikeClose
  456. listOpen listClose
  457. listItemOpen listItemClose listItemLine
  458. numlistOpen numlistClose
  459. numlistItemOpen numlistItemClose numlistItemLine
  460. deflistOpen deflistClose
  461. deflistItem1Open deflistItem1Close
  462. deflistItem2Open deflistItem2Close deflistItem2LinePrefix
  463. bar1 bar2
  464. url urlMark
  465. email emailMark
  466. img imgAlignLeft imgAlignRight imgAlignCenter
  467. _imgAlignLeft _imgAlignRight _imgAlignCenter
  468. tableOpen tableClose
  469. _tableBorder _tableAlignLeft _tableAlignCenter
  470. tableRowOpen tableRowClose tableRowSep
  471. tableTitleRowOpen tableTitleRowClose
  472. tableCellOpen tableCellClose tableCellSep
  473. tableTitleCellOpen tableTitleCellClose tableTitleCellSep
  474. _tableColAlignLeft _tableColAlignRight _tableColAlignCenter
  475. _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
  476. _tableCellColSpan tableColAlignSep
  477. bodyOpen bodyClose
  478. cssOpen cssClose
  479. tocOpen tocClose TOC
  480. anchor
  481. comment
  482. pageBreak
  483. EOD
  484. """.split()
  485. # TIP: \a represents the current text on the mark
  486. # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
  487. alltags = {
  488. 'txt': {
  489. 'title1' : ' \a' ,
  490. 'title2' : '\t\a' ,
  491. 'title3' : '\t\t\a' ,
  492. 'title4' : '\t\t\t\a' ,
  493. 'title5' : '\t\t\t\t\a',
  494. 'blockQuoteLine' : '\t' ,
  495. 'listItemOpen' : '- ' ,
  496. 'numlistItemOpen' : '\a. ' ,
  497. 'bar1' : '\a' ,
  498. 'url' : '\a' ,
  499. 'urlMark' : '\a (\a)' ,
  500. 'email' : '\a' ,
  501. 'emailMark' : '\a (\a)' ,
  502. 'img' : '[\a]' ,
  503. },
  504. 'html': {
  505. 'paragraphOpen' : '<P>' ,
  506. 'paragraphClose' : '</P>' ,
  507. 'title1' : '~A~<H1>\a</H1>' ,
  508. 'title2' : '~A~<H2>\a</H2>' ,
  509. 'title3' : '~A~<H3>\a</H3>' ,
  510. 'title4' : '~A~<H4>\a</H4>' ,
  511. 'title5' : '~A~<H5>\a</H5>' ,
  512. 'anchor' : '<A NAME="\a"></A>\n',
  513. 'blockVerbOpen' : '<PRE>' ,
  514. 'blockVerbClose' : '</PRE>' ,
  515. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  516. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  517. 'fontMonoOpen' : '<CODE>' ,
  518. 'fontMonoClose' : '</CODE>' ,
  519. 'fontBoldOpen' : '<B>' ,
  520. 'fontBoldClose' : '</B>' ,
  521. 'fontItalicOpen' : '<I>' ,
  522. 'fontItalicClose' : '</I>' ,
  523. 'fontUnderlineOpen' : '<U>' ,
  524. 'fontUnderlineClose' : '</U>' ,
  525. 'fontStrikeOpen' : '<S>' ,
  526. 'fontStrikeClose' : '</S>' ,
  527. 'listOpen' : '<UL>' ,
  528. 'listClose' : '</UL>' ,
  529. 'listItemOpen' : '<LI>' ,
  530. 'numlistOpen' : '<OL>' ,
  531. 'numlistClose' : '</OL>' ,
  532. 'numlistItemOpen' : '<LI>' ,
  533. 'deflistOpen' : '<DL>' ,
  534. 'deflistClose' : '</DL>' ,
  535. 'deflistItem1Open' : '<DT>' ,
  536. 'deflistItem1Close' : '</DT>' ,
  537. 'deflistItem2Open' : '<DD>' ,
  538. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  539. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  540. 'url' : '<A HREF="\a">\a</A>' ,
  541. 'urlMark' : '<A HREF="\a">\a</A>' ,
  542. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  543. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  544. 'img' : '<IMG~A~ SRC="\a" BORDER="0" ALT="">',
  545. '_imgAlignLeft' : ' ALIGN="left"' ,
  546. '_imgAlignCenter' : ' ALIGN="middle"',
  547. '_imgAlignRight' : ' ALIGN="right"' ,
  548. 'tableOpen' : '<TABLE~A~~B~ CELLPADDING="4">',
  549. 'tableClose' : '</TABLE>' ,
  550. 'tableRowOpen' : '<TR>' ,
  551. 'tableRowClose' : '</TR>' ,
  552. 'tableCellOpen' : '<TD~A~~S~>' ,
  553. 'tableCellClose' : '</TD>' ,
  554. 'tableTitleCellOpen' : '<TH~S~>' ,
  555. 'tableTitleCellClose' : '</TH>' ,
  556. '_tableBorder' : ' BORDER="1"' ,
  557. '_tableAlignCenter' : ' ALIGN="center"',
  558. '_tableCellAlignRight' : ' ALIGN="right"' ,
  559. '_tableCellAlignCenter': ' ALIGN="center"',
  560. '_tableCellColSpan' : ' COLSPAN="\a"' ,
  561. 'cssOpen' : '<STYLE TYPE="text/css">',
  562. 'cssClose' : '</STYLE>' ,
  563. 'comment' : '<!-- \a -->' ,
  564. 'EOD' : '</BODY></HTML>'
  565. },
  566. #TIP xhtml inherits all HTML definitions (lowercased)
  567. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  568. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  569. 'xhtml': {
  570. 'listItemClose' : '</li>' ,
  571. 'numlistItemClose' : '</li>' ,
  572. 'deflistItem2Close' : '</dd>' ,
  573. 'bar1' : '<hr class="light" />',
  574. 'bar2' : '<hr class="heavy" />',
  575. 'anchor' : '<a id="\a" name="\a"></a>\n',
  576. 'img' : '<img~A~ src="\a" border="0" alt=""/>',
  577. },
  578. 'sgml': {
  579. 'paragraphOpen' : '<p>' ,
  580. 'title1' : '<sect>\a~A~<p>' ,
  581. 'title2' : '<sect1>\a~A~<p>' ,
  582. 'title3' : '<sect2>\a~A~<p>' ,
  583. 'title4' : '<sect3>\a~A~<p>' ,
  584. 'title5' : '<sect4>\a~A~<p>' ,
  585. 'anchor' : '<label id="\a">' ,
  586. 'blockVerbOpen' : '<tscreen><verb>' ,
  587. 'blockVerbClose' : '</verb></tscreen>' ,
  588. 'blockQuoteOpen' : '<quote>' ,
  589. 'blockQuoteClose' : '</quote>' ,
  590. 'fontMonoOpen' : '<tt>' ,
  591. 'fontMonoClose' : '</tt>' ,
  592. 'fontBoldOpen' : '<bf>' ,
  593. 'fontBoldClose' : '</bf>' ,
  594. 'fontItalicOpen' : '<em>' ,
  595. 'fontItalicClose' : '</em>' ,
  596. 'fontUnderlineOpen' : '<bf><em>' ,
  597. 'fontUnderlineClose' : '</em></bf>' ,
  598. 'listOpen' : '<itemize>' ,
  599. 'listClose' : '</itemize>' ,
  600. 'listItemOpen' : '<item>' ,
  601. 'numlistOpen' : '<enum>' ,
  602. 'numlistClose' : '</enum>' ,
  603. 'numlistItemOpen' : '<item>' ,
  604. 'deflistOpen' : '<descrip>' ,
  605. 'deflistClose' : '</descrip>' ,
  606. 'deflistItem1Open' : '<tag>' ,
  607. 'deflistItem1Close' : '</tag>' ,
  608. 'bar1' : '<!-- \a -->' ,
  609. 'url' : '<htmlurl url="\a" name="\a">' ,
  610. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  611. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  612. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  613. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  614. '</figure>' ,
  615. 'tableOpen' : '<table><tabular ca="~C~">' ,
  616. 'tableClose' : '</tabular></table>' ,
  617. 'tableRowSep' : '<rowsep>' ,
  618. 'tableCellSep' : '<colsep>' ,
  619. '_tableColAlignLeft' : 'l' ,
  620. '_tableColAlignRight' : 'r' ,
  621. '_tableColAlignCenter': 'c' ,
  622. 'comment' : '<!-- \a -->' ,
  623. 'TOC' : '<toc>' ,
  624. 'EOD' : '</article>'
  625. },
  626. 'tex': {
  627. 'title1' : '\n~A~\section*{\a}' ,
  628. 'title2' : '~A~\\subsection*{\a}' ,
  629. 'title3' : '~A~\\subsubsection*{\a}',
  630. # title 4/5: DIRTY: para+BF+\\+\n
  631. 'title4' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
  632. 'title5' : '~A~\\paragraph{}\\textbf{\a}\\\\\n',
  633. 'numtitle1' : '\n~A~\section{\a}' ,
  634. 'numtitle2' : '~A~\\subsection{\a}' ,
  635. 'numtitle3' : '~A~\\subsubsection{\a}' ,
  636. 'anchor' : '\\hypertarget{\a}{}\n' ,
  637. 'blockVerbOpen' : '\\begin{verbatim}' ,
  638. 'blockVerbClose' : '\\end{verbatim}' ,
  639. 'blockQuoteOpen' : '\\begin{quotation}' ,
  640. 'blockQuoteClose' : '\\end{quotation}' ,
  641. 'fontMonoOpen' : '\\texttt{' ,
  642. 'fontMonoClose' : '}' ,
  643. 'fontBoldOpen' : '\\textbf{' ,
  644. 'fontBoldClose' : '}' ,
  645. 'fontItalicOpen' : '\\textit{' ,
  646. 'fontItalicClose' : '}' ,
  647. 'fontUnderlineOpen' : '\\underline{' ,
  648. 'fontUnderlineClose' : '}' ,
  649. 'fontStrikeOpen' : '\\sout{' ,
  650. 'fontStrikeClose' : '}' ,
  651. 'listOpen' : '\\begin{itemize}' ,
  652. 'listClose' : '\\end{itemize}' ,
  653. 'listItemOpen' : '\\item ' ,
  654. 'numlistOpen' : '\\begin{enumerate}' ,
  655. 'numlistClose' : '\\end{enumerate}' ,
  656. 'numlistItemOpen' : '\\item ' ,
  657. 'deflistOpen' : '\\begin{description}',
  658. 'deflistClose' : '\\end{description}' ,
  659. 'deflistItem1Open' : '\\item[' ,
  660. 'deflistItem1Close' : ']' ,
  661. 'bar1' : '\n\\hrulefill{}\n' ,
  662. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  663. 'url' : '\\htmladdnormallink{\a}{\a}',
  664. 'urlMark' : '\\htmladdnormallink{\a}{\a}',
  665. 'email' : '\\htmladdnormallink{\a}{mailto:\a}',
  666. 'emailMark' : '\\htmladdnormallink{\a}{mailto:\a}',
  667. 'img' : '\\includegraphics{\a}',
  668. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  669. 'tableClose' : '\\end{tabular}\\end{center}',
  670. 'tableRowOpen' : '\\hline ' ,
  671. 'tableRowClose' : ' \\\\' ,
  672. 'tableCellSep' : ' & ' ,
  673. '_tableColAlignLeft' : 'l' ,
  674. '_tableColAlignRight' : 'r' ,
  675. '_tableColAlignCenter': 'c' ,
  676. 'tableColAlignSep' : '|' ,
  677. 'comment' : '% \a' ,
  678. 'TOC' : '\\tableofcontents',
  679. 'pageBreak' : '\\clearpage',
  680. 'EOD' : '\\end{document}'
  681. },
  682. 'lout': {
  683. 'paragraphOpen' : '@LP' ,
  684. 'blockTitle1Open' : '@BeginSections' ,
  685. 'blockTitle1Close' : '@EndSections' ,
  686. 'blockTitle2Open' : ' @BeginSubSections' ,
  687. 'blockTitle2Close' : ' @EndSubSections' ,
  688. 'blockTitle3Open' : ' @BeginSubSubSections' ,
  689. 'blockTitle3Close' : ' @EndSubSubSections' ,
  690. 'title1Open' : '\n~A~@Section @Title { \a } @Begin',
  691. 'title1Close' : '@End @Section' ,
  692. 'title2Open' : '\n~A~ @SubSection @Title { \a } @Begin',
  693. 'title2Close' : ' @End @SubSection' ,
  694. 'title3Open' : '\n~A~ @SubSubSection @Title { \a } @Begin',
  695. 'title3Close' : ' @End @SubSubSection' ,
  696. 'title4Open' : '\n~A~@LP @LeftDisplay @B { \a }',
  697. 'title5Open' : '\n~A~@LP @LeftDisplay @B { \a }',
  698. 'anchor' : '@Tag { \a }\n' ,
  699. 'blockVerbOpen' : '@LP @ID @F @RawVerbatim @Begin',
  700. 'blockVerbClose' : '@End @RawVerbatim' ,
  701. 'blockQuoteOpen' : '@QD {' ,
  702. 'blockQuoteClose' : '}' ,
  703. # enclosed inside {} to deal with joined**words**
  704. 'fontMonoOpen' : '{@F {' ,
  705. 'fontMonoClose' : '}}' ,
  706. 'fontBoldOpen' : '{@B {' ,
  707. 'fontBoldClose' : '}}' ,
  708. 'fontItalicOpen' : '{@II {' ,
  709. 'fontItalicClose' : '}}' ,
  710. 'fontUnderlineOpen' : '{@Underline{' ,
  711. 'fontUnderlineClose' : '}}' ,
  712. # the full form is more readable, but could be BL EL LI NL TL DTI
  713. 'listOpen' : '@BulletList' ,
  714. 'listClose' : '@EndList' ,
  715. 'listItemOpen' : '@ListItem{' ,
  716. 'listItemClose' : '}' ,
  717. 'numlistOpen' : '@NumberedList' ,
  718. 'numlistClose' : '@EndList' ,
  719. 'numlistItemOpen' : '@ListItem{' ,
  720. 'numlistItemClose' : '}' ,
  721. 'deflistOpen' : '@TaggedList' ,
  722. 'deflistClose' : '@EndList' ,
  723. 'deflistItem1Open' : '@DropTagItem {' ,
  724. 'deflistItem1Close' : '}' ,
  725. 'deflistItem2Open' : '{' ,
  726. 'deflistItem2Close' : '}' ,
  727. 'bar1' : '\n@DP @FullWidthRule\n' ,
  728. 'url' : '{blue @Colour { \a }}' ,
  729. 'urlMark' : '\a ({blue @Colour { \a }})' ,
  730. 'email' : '{blue @Colour { \a }}' ,
  731. 'emailMark' : '\a ({blue Colour{ \a }})' ,
  732. 'img' : '~A~@IncludeGraphic { \a }' , # eps only!
  733. '_imgAlignLeft' : '@LeftDisplay ' ,
  734. '_imgAlignRight' : '@RightDisplay ' ,
  735. '_imgAlignCenter' : '@CentredDisplay ' ,
  736. # lout tables are *way* complicated, no support for now
  737. #'tableOpen' : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
  738. #'tableClose' : '}' ,
  739. #'tableRowOpen' : '@Rowa\n' ,
  740. #'tableTitleRowOpen' : '@HeaderRowa' ,
  741. #'tableCenterAlign' : '@CentredDisplay ' ,
  742. #'tableCellOpen' : '\a {' , # A, B, ...
  743. #'tableCellClose' : '}' ,
  744. #'_tableBorder' : '\nrule {yes}' ,
  745. 'comment' : '# \a' ,
  746. # @MakeContents must be on the config file
  747. 'TOC' : '@DP @ContentsGoesHere @DP',
  748. 'pageBreak' : '\n@NP\n' ,
  749. 'EOD' : '@End @Text'
  750. },
  751. # http://moinmo.in/SyntaxReference
  752. 'moin': {
  753. 'title1' : '= \a =' ,
  754. 'title2' : '== \a ==' ,
  755. 'title3' : '=== \a ===' ,
  756. 'title4' : '==== \a ====' ,
  757. 'title5' : '===== \a =====',
  758. 'blockVerbOpen' : '{{{' ,
  759. 'blockVerbClose' : '}}}' ,
  760. 'blockQuoteLine' : ' ' ,
  761. 'fontMonoOpen' : '{{{' ,
  762. 'fontMonoClose' : '}}}' ,
  763. 'fontBoldOpen' : "'''" ,
  764. 'fontBoldClose' : "'''" ,
  765. 'fontItalicOpen' : "''" ,
  766. 'fontItalicClose' : "''" ,
  767. 'fontUnderlineOpen' : '__' ,
  768. 'fontUnderlineClose' : '__' ,
  769. 'fontStrikeOpen' : '--(' ,
  770. 'fontStrikeClose' : ')--' ,
  771. 'listItemOpen' : ' * ' ,
  772. 'numlistItemOpen' : ' \a. ' ,
  773. 'deflistItem1Open' : ' ' ,
  774. 'deflistItem1Close' : '::' ,
  775. 'deflistItem2LinePrefix': ' :: ' ,
  776. 'bar1' : '----' ,
  777. 'bar2' : '--------' ,
  778. 'url' : '[\a]' ,
  779. 'urlMark' : '[\a \a]' ,
  780. 'email' : '[\a]' ,
  781. 'emailMark' : '[\a \a]' ,
  782. 'img' : '[\a]' ,
  783. 'tableRowOpen' : '||' ,
  784. 'tableCellOpen' : '~A~' ,
  785. 'tableCellClose' : '||' ,
  786. 'tableTitleCellClose' : '||' ,
  787. '_tableCellAlignRight' : '<)>' ,
  788. '_tableCellAlignCenter' : '<:>' ,
  789. 'comment' : '/* \a */' ,
  790. 'TOC' : '[[TableOfContents]]'
  791. },
  792. # http://code.google.com/p/support/wiki/WikiSyntax
  793. 'gwiki': {
  794. 'title1' : '= \a =' ,
  795. 'title2' : '== \a ==' ,
  796. 'title3' : '=== \a ===' ,
  797. 'title4' : '==== \a ====' ,
  798. 'title5' : '===== \a =====',
  799. 'blockVerbOpen' : '{{{' ,
  800. 'blockVerbClose' : '}}}' ,
  801. 'blockQuoteLine' : ' ' ,
  802. 'fontMonoOpen' : '{{{' ,
  803. 'fontMonoClose' : '}}}' ,
  804. 'fontBoldOpen' : '*' ,
  805. 'fontBoldClose' : '*' ,
  806. 'fontItalicOpen' : '_' , # underline == italic
  807. 'fontItalicClose' : '_' ,
  808. 'fontStrikeOpen' : '~~' ,
  809. 'fontStrikeClose' : '~~' ,
  810. 'listItemOpen' : ' * ' ,
  811. 'numlistItemOpen' : ' # ' ,
  812. 'url' : '\a' ,
  813. 'urlMark' : '[\a \a]' ,
  814. 'email' : 'mailto:\a' ,
  815. 'emailMark' : '[mailto:\a \a]',
  816. 'img' : '[\a]' ,
  817. 'tableRowOpen' : '|| ' ,
  818. 'tableRowClose' : ' ||' ,
  819. 'tableCellSep' : ' || ' ,
  820. },
  821. # http://wiki.splitbrain.org/wiki:syntax
  822. # Hint: <br> is \\ $
  823. # Hint: You can add footnotes ((This is a footnote))
  824. 'doku': {
  825. 'title1' : '===== \a =====',
  826. 'title2' : '==== \a ====' ,
  827. 'title3' : '=== \a ===' ,
  828. 'title4' : '== \a ==' ,
  829. 'title5' : '= \a =' ,
  830. # DokuWiki uses ' ' identation to mark verb blocks (see indentverbblock)
  831. 'blockQuoteLine' : '>' ,
  832. 'fontMonoOpen' : "''" ,
  833. 'fontMonoClose' : "''" ,
  834. 'fontBoldOpen' : "**" ,
  835. 'fontBoldClose' : "**" ,
  836. 'fontItalicOpen' : "//" ,
  837. 'fontItalicClose' : "//" ,
  838. 'fontUnderlineOpen' : "__" ,
  839. 'fontUnderlineClose' : "__" ,
  840. 'fontStrikeOpen' : '<del>' ,
  841. 'fontStrikeClose' : '</del>' ,
  842. 'listItemOpen' : ' * ' ,
  843. 'numlistItemOpen' : ' - ' ,
  844. 'bar1' : '----' ,
  845. 'url' : '[[\a]]' ,
  846. 'urlMark' : '[[\a|\a]]' ,
  847. 'email' : '[[\a]]' ,
  848. 'emailMark' : '[[\a|\a]]' ,
  849. 'img' : '{{\a}}' ,
  850. 'imgAlignLeft' : '{{\a }}' ,
  851. 'imgAlignRight' : '{{ \a}}' ,
  852. 'imgAlignCenter' : '{{ \a }}' ,
  853. 'tableTitleRowOpen' : '^ ' ,
  854. 'tableTitleRowClose' : ' ^' ,
  855. 'tableTitleCellSep' : ' ^ ' ,
  856. 'tableRowOpen' : '| ' ,
  857. 'tableRowClose' : ' |' ,
  858. 'tableCellSep' : ' | ' ,
  859. # DokuWiki has no attributes. The content must be aligned!
  860. # '_tableCellAlignRight' : '<)>' , # ??
  861. # '_tableCellAlignCenter': '<:>' , # ??
  862. # DokuWiki colspan is the same as txt2tags' with multiple |||
  863. # 'comment' : '## \a' , # ??
  864. # TOC is automatic
  865. },
  866. # http://en.wikipedia.org/wiki/Help:Editing
  867. 'wiki': {
  868. 'title1' : '== \a ==' ,
  869. 'title2' : '=== \a ===' ,
  870. 'title3' : '==== \a ====' ,
  871. 'title4' : '===== \a =====' ,
  872. 'title5' : '====== \a ======',
  873. 'blockVerbOpen' : '<pre>' ,
  874. 'blockVerbClose' : '</pre>' ,
  875. 'blockQuoteOpen' : '<blockquote>' ,
  876. 'blockQuoteClose' : '</blockquote>' ,
  877. 'fontMonoOpen' : '<tt>' ,
  878. 'fontMonoClose' : '</tt>' ,
  879. 'fontBoldOpen' : "'''" ,
  880. 'fontBoldClose' : "'''" ,
  881. 'fontItalicOpen' : "''" ,
  882. 'fontItalicClose' : "''" ,
  883. 'fontUnderlineOpen' : '<u>' ,
  884. 'fontUnderlineClose' : '</u>' ,
  885. 'fontStrikeOpen' : '<s>' ,
  886. 'fontStrikeClose' : '</s>' ,
  887. #XXX Mixed lists not working: *#* list inside numlist inside list
  888. 'listItemLine' : '*' ,
  889. 'numlistItemLine' : '#' ,
  890. 'deflistItem1Open' : '; ' ,
  891. 'deflistItem2LinePrefix': ': ' ,
  892. 'bar1' : '----' ,
  893. 'url' : '[\a]' ,
  894. 'urlMark' : '[\a \a]' ,
  895. 'email' : 'mailto:\a' ,
  896. 'emailMark' : '[mailto:\a \a]' ,
  897. # [[Image:foo.png|right|Optional alt/caption text]] (right, left, center, none)
  898. 'img' : '[[Image:\a~A~]]' ,
  899. '_imgAlignLeft' : '|left' ,
  900. '_imgAlignCenter' : '|center' ,
  901. '_imgAlignRight' : '|right' ,
  902. # {| border="1" cellspacing="0" cellpadding="4" align="center"
  903. 'tableOpen' : '{|~A~~B~ cellpadding="4"',
  904. 'tableClose' : '|}' ,
  905. 'tableRowOpen' : '|-\n| ' ,
  906. 'tableTitleRowOpen' : '|-\n! ' ,
  907. 'tableCellSep' : ' || ' ,
  908. 'tableTitleCellSep' : ' !! ' ,
  909. '_tableBorder' : ' border="1"' ,
  910. '_tableAlignCenter' : ' align="center"' ,
  911. 'comment' : '<!-- \a -->' ,
  912. 'TOC' : '__TOC__' ,
  913. },
  914. # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
  915. # http://en.wikipedia.org/wiki/MagicPoint
  916. 'mgp': {
  917. 'paragraphOpen' : '%font "normal", size 5' ,
  918. 'title1' : '%page\n\n\a\n' ,
  919. 'title2' : '%page\n\n\a\n' ,
  920. 'title3' : '%page\n\n\a\n' ,
  921. 'title4' : '%page\n\n\a\n' ,
  922. 'title5' : '%page\n\n\a\n' ,
  923. 'blockVerbOpen' : '%font "mono"' ,
  924. 'blockVerbClose' : '%font "normal"' ,
  925. 'blockQuoteOpen' : '%prefix " "' ,
  926. 'blockQuoteClose' : '%prefix " "' ,
  927. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  928. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  929. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  930. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  931. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  932. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  933. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  934. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  935. 'listItemLine' : '\t' ,
  936. 'numlistItemLine' : '\t' ,
  937. 'numlistItemOpen' : '\a. ' ,
  938. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  939. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  940. 'bar1' : '%bar "white" 5' ,
  941. 'bar2' : '%pause' ,
  942. 'url' : '\n%cont, fore "cyan"\n\a' +\
  943. '\n%cont, fore "white"\n' ,
  944. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  945. '\n%cont, fore "white"\n' ,
  946. 'email' : '\n%cont, fore "cyan"\n\a' +\
  947. '\n%cont, fore "white"\n' ,
  948. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  949. '\n%cont, fore "white"\n' ,
  950. 'img' : '~A~\n%newimage "\a"\n%left\n',
  951. '_imgAlignLeft' : '\n%left' ,
  952. '_imgAlignRight' : '\n%right' ,
  953. '_imgAlignCenter' : '\n%center' ,
  954. 'comment' : '%% \a' ,
  955. 'pageBreak' : '%page\n\n\n' ,
  956. 'EOD' : '%%EOD'
  957. },
  958. # man groff_man ; man 7 groff
  959. 'man': {
  960. 'paragraphOpen' : '.P' ,
  961. 'title1' : '.SH \a' ,
  962. 'title2' : '.SS \a' ,
  963. 'title3' : '.SS \a' ,
  964. 'title4' : '.SS \a' ,
  965. 'title5' : '.SS \a' ,
  966. 'blockVerbOpen' : '.nf' ,
  967. 'blockVerbClose' : '.fi\n' ,
  968. 'blockQuoteOpen' : '.RS' ,
  969. 'blockQuoteClose' : '.RE' ,
  970. 'fontBoldOpen' : '\\fB' ,
  971. 'fontBoldClose' : '\\fR' ,
  972. 'fontItalicOpen' : '\\fI' ,
  973. 'fontItalicClose' : '\\fR' ,
  974. 'listOpen' : '.RS' ,
  975. 'listItemOpen' : '.IP \(bu 3\n',
  976. 'listClose' : '.RE' ,
  977. 'numlistOpen' : '.RS' ,
  978. 'numlistItemOpen' : '.IP \a. 3\n',
  979. 'numlistClose' : '.RE' ,
  980. 'deflistItem1Open' : '.TP\n' ,
  981. 'bar1' : '\n\n' ,
  982. 'url' : '\a' ,
  983. 'urlMark' : '\a (\a)',
  984. 'email' : '\a' ,
  985. 'emailMark' : '\a (\a)',
  986. 'img' : '\a' ,
  987. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  988. 'tableClose' : '.TE' ,
  989. 'tableRowOpen' : ' ' ,
  990. 'tableCellSep' : '^' ,
  991. '_tableAlignCenter' : 'center, ',
  992. '_tableBorder' : 'allbox, ',
  993. '_tableColAlignLeft' : 'l' ,
  994. '_tableColAlignRight' : 'r' ,
  995. '_tableColAlignCenter': 'c' ,
  996. 'comment' : '.\\" \a'
  997. },
  998. 'pm6': {
  999. 'paragraphOpen' : '<@Normal:>' ,
  1000. 'title1' : '\n<@Title1:>\a',
  1001. 'title2' : '\n<@Title2:>\a',
  1002. 'title3' : '\n<@Title3:>\a',
  1003. 'title4' : '\n<@Title4:>\a',
  1004. 'title5' : '\n<@Title5:>\a',
  1005. 'blockVerbOpen' : '<@PreFormat:>' ,
  1006. 'blockQuoteLine' : '<@Quote:>' ,
  1007. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  1008. 'fontMonoClose' : '<SIZE$><FONT$>',
  1009. 'fontBoldOpen' : '<B>' ,
  1010. 'fontBoldClose' : '<P>' ,
  1011. 'fontItalicOpen' : '<I>' ,
  1012. 'fontItalicClose' : '<P>' ,
  1013. 'fontUnderlineOpen' : '<U>' ,
  1014. 'fontUnderlineClose' : '<P>' ,
  1015. 'listOpen' : '<@Bullet:>' ,
  1016. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  1017. 'numlistOpen' : '<@Bullet:>' ,
  1018. 'numlistItemOpen' : '\x95\t' ,
  1019. 'bar1' : '\a' ,
  1020. 'url' : '<U>\a<P>' , # underline
  1021. 'urlMark' : '\a <U>\a<P>' ,
  1022. 'email' : '\a' ,
  1023. 'emailMark' : '\a \a' ,
  1024. 'img' : '\a'
  1025. }
  1026. }
  1027. # Exceptions for --css-sugar
  1028. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1029. # Change just HTML because XHTML inherits it
  1030. htmltags = alltags['html']
  1031. # Table with no cellpadding
  1032. htmltags['tableOpen'] = string.replace(
  1033. htmltags['tableOpen'], ' CELLPADDING="4"', '')
  1034. # DIVs
  1035. htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
  1036. htmltags['tocClose'] = '</DIV>'
  1037. htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
  1038. htmltags['bodyClose']= '</DIV>'
  1039. # Make the HTML -> XHTML inheritance
  1040. xhtml = alltags['html'].copy()
  1041. for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
  1042. # Some like HTML tags as lowercase, some don't... (headers out)
  1043. if HTML_LOWER: alltags['html'] = xhtml.copy()
  1044. xhtml.update(alltags['xhtml'])
  1045. alltags['xhtml'] = xhtml.copy()
  1046. # Compose the target tags dictionary
  1047. tags = {}
  1048. target_tags = alltags[config['target']].copy()
  1049. for key in keys: tags[key] = '' # create empty keys
  1050. for key in target_tags.keys():
  1051. tags[key] = maskEscapeChar(target_tags[key]) # populate
  1052. # Map strong line to separator if not defined
  1053. if not tags['bar2'] and tags['bar1']:
  1054. tags['bar2'] = tags['bar1']
  1055. return tags
  1056. ##############################################################################
  1057. def getRules(config):
  1058. "Returns all the target-specific syntax rules"
  1059. ret = {}
  1060. allrules = [
  1061. # target rules (ON/OFF)
  1062. 'linkable', # target supports external links
  1063. 'tableable', # target supports tables
  1064. 'imglinkable', # target supports images as links
  1065. 'imgalignable', # target supports image alignment
  1066. 'imgasdefterm', # target supports image as definition term
  1067. 'autonumberlist', # target supports numbered lists natively
  1068. 'autonumbertitle', # target supports numbered titles natively
  1069. 'stylable', # target supports external style files
  1070. 'parainsidelist', # lists items supports paragraph
  1071. 'spacedlistitem', # lists support blank lines between items
  1072. 'listnotnested', # lists cannot be nested
  1073. 'quotenotnested', # quotes cannot be nested
  1074. 'verbblocknotescaped', # don't escape specials in verb block
  1075. 'verbblockfinalescape', # do final escapes in verb block
  1076. 'escapeurl', # escape special in link URL
  1077. 'onelinepara', # dump paragraph as a single long line
  1078. 'tabletitlerowinbold', # manually bold any cell on table titles
  1079. 'tablecellstrip', # strip extra spaces from each table cell
  1080. 'tablecellspannable', # the table cells can have span attribute
  1081. 'barinsidequote', # bars are allowed inside quote blocks
  1082. 'finalescapetitle', # perform final escapes on title lines
  1083. 'autotocnewpagebefore', # break page before automatic TOC
  1084. 'autotocnewpageafter', # break page after automatic TOC
  1085. 'autotocwithbars', # automatic TOC surrounded by bars
  1086. 'mapbar2pagebreak', # map the strong bar to a page break
  1087. 'titleblocks', # titles must be on open/close section blocks
  1088. # Target code beautify (ON/OFF)
  1089. 'indentverbblock', # add leading spaces to verb block lines
  1090. 'breaktablecell', # break lines after any table cell
  1091. 'breaktablelineopen', # break line after opening table line
  1092. 'breaktitleopen', # break line after any title
  1093. 'notbreaklistopen', # don't break line after opening a new list
  1094. 'notbreakparaopen', # don't break line after opening a new para
  1095. 'keepquoteindent', # don't remove the leading TABs on quotes
  1096. 'keeplistindent', # don't remove the leading spaces on lists
  1097. 'blankendmotherlist', # append a blank line at the mother list end
  1098. 'blankendtable', # append a blank line at the table end
  1099. 'blankendautotoc', # append a blank line at the auto TOC end
  1100. 'tagnotindentable', # tags must be placed at the line begining
  1101. 'spacedlistitemopen', # append a space after the list item open tag
  1102. 'spacednumlistitemopen',# append a space after the numlist item open tag
  1103. 'deflisttextstrip', # strip the contents of the deflist text
  1104. # Value settings
  1105. 'listmaxdepth', # maximum depth for lists
  1106. 'quotemaxdepth', # maximum depth for quotes
  1107. 'tablecellaligntype', # type of table cell align: cell, column
  1108. ]
  1109. rules_bank = {
  1110. 'txt' : {
  1111. 'indentverbblock':1,
  1112. 'spacedlistitem':1,
  1113. 'parainsidelist':1,
  1114. 'keeplistindent':1,
  1115. 'barinsidequote':1,
  1116. 'autotocwithbars':1,
  1117. 'blankendmotherlist':1,
  1118. },
  1119. 'html': {
  1120. 'indentverbblock':1,
  1121. 'linkable':1,
  1122. 'stylable':1,
  1123. 'escapeurl':1,
  1124. 'imglinkable':1,
  1125. 'imgalignable':1,
  1126. 'imgasdefterm':1,
  1127. 'autonumberlist':1,
  1128. 'spacedlistitem':1,
  1129. 'parainsidelist':1,
  1130. 'blankendmotherlist':1,
  1131. 'tableable':1,
  1132. 'tablecellstrip':1,
  1133. 'blankendtable':1,
  1134. 'breaktablecell':1,
  1135. 'breaktablelineopen':1,
  1136. 'keeplistindent':1,
  1137. 'keepquoteindent':1,
  1138. 'barinsidequote':1,
  1139. 'autotocwithbars':1,
  1140. 'tablecellspannable':1,
  1141. 'tablecellaligntype':'cell',
  1142. },
  1143. #TIP xhtml inherits all HTML rules
  1144. 'xhtml': {
  1145. },
  1146. 'sgml': {
  1147. 'linkable':1,
  1148. 'escapeurl':1,
  1149. 'autonumberlist':1,
  1150. 'spacedlistitem':1,
  1151. 'blankendmotherlist':1,
  1152. 'tableable':1,
  1153. 'tablecellstrip':1,
  1154. 'blankendtable':1,
  1155. 'blankendautotoc':1,
  1156. 'quotenotnested':1,
  1157. 'keeplistindent':1,
  1158. 'keepquoteindent':1,
  1159. 'barinsidequote':1,
  1160. 'finalescapetitle':1,
  1161. 'tablecellaligntype':'column',
  1162. },
  1163. 'mgp' : {
  1164. 'blankendmotherlist':1,
  1165. 'tagnotindentable':1,
  1166. 'spacedlistitem':1,
  1167. 'imgalignable':1,
  1168. 'autotocnewpagebefore':1,
  1169. },
  1170. 'tex' : {
  1171. 'stylable':1,
  1172. 'escapeurl':1,
  1173. 'autonumberlist':1,
  1174. 'autonumbertitle':1,
  1175. 'spacedlistitem':1,
  1176. 'blankendmotherlist':1,
  1177. 'tableable':1,
  1178. 'tablecellstrip':1,
  1179. 'tabletitlerowinbold':1,
  1180. 'blankendtable':1,
  1181. 'verbblocknotescaped':1,
  1182. 'keeplistindent':1,
  1183. 'listmaxdepth':4, # deflist is 6
  1184. 'quotemaxdepth':6,
  1185. 'barinsidequote':1,
  1186. 'finalescapetitle':1,
  1187. 'autotocnewpageafter':1,
  1188. 'mapbar2pagebreak':1,
  1189. 'tablecellaligntype':'column',
  1190. },
  1191. 'lout': {
  1192. 'keepquoteindent':1,
  1193. 'keeplistindent':1,
  1194. 'deflisttextstrip':1,
  1195. 'escapeurl':1,
  1196. 'verbblocknotescaped':1,
  1197. 'imgalignable':1,
  1198. 'mapbar2pagebreak':1,
  1199. 'titleblocks':1,
  1200. 'autonumberlist':1,
  1201. 'notbreakparaopen':1,
  1202. },
  1203. 'moin': {
  1204. 'spacedlistitem':1,
  1205. 'linkable':1,
  1206. 'blankendmotherlist':1,
  1207. 'keeplistindent':1,
  1208. 'tableable':1,
  1209. 'barinsidequote':1,
  1210. 'blankendtable':1,
  1211. 'tabletitlerowinbold':1,
  1212. 'tablecellstrip':1,
  1213. 'autotocwithbars':1,
  1214. 'tablecellaligntype':'cell',
  1215. 'deflisttextstrip':1,
  1216. },
  1217. 'gwiki': {
  1218. 'spacedlistitem':1,
  1219. 'linkable':1,
  1220. 'blankendmotherlist':1,
  1221. 'keeplistindent':1,
  1222. 'tableable':1,
  1223. 'tabletitlerowinbold':1,
  1224. 'tablecellstrip':1,
  1225. 'autonumberlist':1,
  1226. 'breaktitleopen':1,
  1227. },
  1228. 'doku': {
  1229. 'indentverbblock':1, # DokuWiki uses ' ' to mark verb blocks
  1230. 'spacedlistitem':1,
  1231. 'linkable':1,
  1232. 'blankendmotherlist':1,
  1233. 'keeplistindent':1,
  1234. 'tableable':1,
  1235. 'barinsidequote':1,
  1236. 'blankendtable':1,
  1237. 'tablecellstrip':1,
  1238. 'autotocwithbars':1,
  1239. 'autonumberlist':1,
  1240. 'imgalignable':1,
  1241. 'tablecellaligntype':'cell',
  1242. },
  1243. 'wiki': {
  1244. 'linkable':1,
  1245. 'blankendmotherlist':1,
  1246. 'tableable':1,
  1247. 'blankendtable':1,
  1248. 'tablecellstrip':1,
  1249. 'autotocwithbars':1,
  1250. 'spacedlistitemopen':1,
  1251. 'spacednumlistitemopen':1,
  1252. 'deflisttextstrip':1,
  1253. 'autonumberlist':1,
  1254. 'imgalignable':1,
  1255. },
  1256. 'man' : {
  1257. 'spacedlistitem':1,
  1258. 'indentverbblock':1,
  1259. 'blankendmotherlist':1,
  1260. 'tagnotindentable':1,
  1261. 'tableable':1,
  1262. 'tablecellaligntype':'column',
  1263. 'tabletitlerowinbold':1,
  1264. 'tablecellstrip':1,
  1265. 'blankendtable':1,
  1266. 'barinsidequote':1,
  1267. 'parainsidelist':0,
  1268. },
  1269. 'pm6' : {
  1270. 'keeplistindent':1,
  1271. 'verbblockfinalescape':1,
  1272. #TODO add support for these - maybe set a JOINNEXT char and
  1273. # do it on addLineBreaks()
  1274. 'notbreaklistopen':1,
  1275. 'notbreakparaopen':1,
  1276. 'barinsidequote':1,
  1277. 'autotocwithbars':1,
  1278. 'onelinepara':1,
  1279. }
  1280. }
  1281. # Exceptions for --css-sugar
  1282. if config['css-sugar'] and config['target'] in ('html','xhtml'):
  1283. rules_bank['html']['indentverbblock'] = 0
  1284. rules_bank['html']['autotocwithbars'] = 0
  1285. # Get the target specific rules
  1286. if config['target'] == 'xhtml':
  1287. myrules = rules_bank['html'].copy() # inheritance
  1288. myrules.update(rules_bank['xhtml']) # get XHTML specific
  1289. else:
  1290. myrules = rules_bank[config['target']].copy()
  1291. # Populate return dictionary
  1292. for key in allrules: ret[key] = 0 # reset all
  1293. ret.update(myrules) # get rules
  1294. return ret
  1295. ##############################################################################
  1296. def getRegexes():
  1297. "Returns all the regexes used to find the t2t marks"
  1298. bank = {
  1299. 'blockVerbOpen':
  1300. re.compile(r'^```\s*$'),
  1301. 'blockVerbClose':
  1302. re.compile(r'^```\s*$'),
  1303. 'blockRawOpen':
  1304. re.compile(r'^"""\s*$'),
  1305. 'blockRawClose':
  1306. re.compile(r'^"""\s*$'),
  1307. 'blockCommentOpen':
  1308. re.compile(r'^%%%\s*$'),
  1309. 'blockCommentClose':
  1310. re.compile(r'^%%%\s*$'),
  1311. 'quote':
  1312. re.compile(r'^\t+'),
  1313. '1lineVerb':
  1314. re.compile(r'^``` (?=.)'),
  1315. '1lineRaw':
  1316. re.compile(r'^""" (?=.)'),
  1317. # mono, raw, bold, italic, underline:
  1318. # - marks must be glued with the contents, no boundary spaces
  1319. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  1320. 'fontMono':
  1321. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  1322. 'raw':
  1323. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  1324. 'fontBold':
  1325. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  1326. 'fontItalic':
  1327. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  1328. 'fontUnderline':
  1329. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  1330. 'fontStrike':
  1331. re.compile( r'--([^\s](|.*?[^\s])-*)--'),
  1332. 'list':
  1333. re.compile(r'^( *)(-) (?=[^ ])'),
  1334. 'numlist':
  1335. re.compile(r'^( *)(\+) (?=[^ ])'),
  1336. 'deflist':
  1337. re.compile(r'^( *)(:) (.*)$'),
  1338. 'listclose':
  1339. re.compile(r'^( *)([-+:])\s*$'),
  1340. 'bar':
  1341. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  1342. 'table':
  1343. re.compile(r'^ *\|\|? '),
  1344. 'blankline':
  1345. re.compile(r'^\s*$'),
  1346. 'comment':
  1347. re.compile(r'^%'),
  1348. # Auxiliary tag regexes
  1349. '_imgAlign' : re.compile(r'~A~', re.I),
  1350. '_tableAlign' : re.compile(r'~A~', re.I),
  1351. '_anchor' : re.compile(r'~A~', re.I),
  1352. '_tableBorder' : re.compile(r'~B~', re.I),
  1353. '_tableColAlign' : re.compile(r'~C~', re.I),
  1354. '_tableCellColSpan': re.compile(r'~S~', re.I),
  1355. '_tableCellAlign' : re.compile(r'~A~', re.I),
  1356. }
  1357. # Special char to place data on TAGs contents (\a == bell)
  1358. bank['x'] = re.compile('\a')
  1359. # %%macroname [ (formatting) ]
  1360. bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
  1361. string.join(MACROS.keys(), '|')), re.I)
  1362. # %%TOC special macro for TOC positioning
  1363. bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
  1364. # Almost complicated title regexes ;)
  1365. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
  1366. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  1367. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  1368. ### Complicated regexes begin here ;)
  1369. #
  1370. # Textual descriptions on --help's style: [...] is optional, | is OR
  1371. ### First, some auxiliary variables
  1372. #
  1373. # [image.EXT]
  1374. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1375. # Link things
  1376. # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
  1377. # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
  1378. # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
  1379. # Also works : scheme://user:pass@domain/path#anchor?query=foo
  1380. # TODO form: !'():
  1381. urlskel = {
  1382. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1383. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1384. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1385. 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
  1386. 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
  1387. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1388. 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
  1389. 'punct' : r'.,;:!?'
  1390. }
  1391. # username [ :password ] @
  1392. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1393. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1394. # [ #anchor | ?form=data ]
  1395. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]*)?'%(
  1396. urlskel['proto'],patt_url_login, urlskel['guess'],
  1397. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1398. # filename | [ filename ] #anchor
  1399. retxt_url_local = r'[%s]+|[%s]*(#[%s]*)'%(
  1400. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1401. # user@domain [ ?form=data ]
  1402. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1403. urlskel['login'],urlskel['form'])
  1404. # Saving for future use
  1405. bank['_urlskel'] = urlskel
  1406. ### And now the real regexes
  1407. #
  1408. bank['email'] = re.compile(patt_email,re.I)
  1409. # email | url
  1410. bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1411. # \[ label | imagetag url | email | filename \]
  1412. bank['linkmark'] = re.compile(
  1413. r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1414. patt_img, retxt_url, patt_email, retxt_url_local),
  1415. re.L+re.I)
  1416. # Image
  1417. bank['img'] = re.compile(patt_img, re.L+re.I)
  1418. # Special things
  1419. bank['special'] = re.compile(r'^%!\s*')
  1420. return bank
  1421. ### END OF regex nightmares
  1422. ##############################################################################
  1423. class error(Exception):
  1424. pass
  1425. def echo(msg): # for quick debug
  1426. print '\033[32;1m%s\033[m'%msg
  1427. def Quit(msg=''):
  1428. if msg: print msg
  1429. sys.exit(0)
  1430. def Error(msg):
  1431. msg = _("%s: Error: ")%my_name + msg
  1432. raise error, msg
  1433. def getTraceback():
  1434. try:
  1435. from traceback import format_exception
  1436. etype, value, tb = sys.exc_info()
  1437. return string.join(format_exception(etype, value, tb), '')
  1438. except: pass
  1439. def getUnknownErrorMessage():
  1440. msg = '%s\n%s (%s):\n\n%s'%(
  1441. _('Sorry! Txt2tags aborted by an unknown error.'),
  1442. _('Please send the following Error Traceback to the author'),
  1443. my_email, getTraceback())
  1444. return msg
  1445. def Message(msg,level):
  1446. if level <= VERBOSE and not QUIET:
  1447. prefix = '-'*5
  1448. print "%s %s"%(prefix*level, msg)
  1449. def Debug(msg,id=0,linenr=None):
  1450. "Show debug messages, categorized (colored or not)"
  1451. if QUIET or not DEBUG: return
  1452. if int(id) not in range(8): id = 0
  1453. # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light
  1454. ids = ['INI','CFG','SRC','BLK','HLD','GUI','OUT','DET']
  1455. colors_bgdark = ['7;1','1;1','3;1','6;1','4;1','5;1','2;1','7;1']
  1456. colors_bglight = ['0' ,'1' ,'3' ,'6' ,'4' ,'5' ,'2' ,'0' ]
  1457. if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
  1458. if COLOR_DEBUG:
  1459. if BG_LIGHT: color = colors_bglight[id]
  1460. else : color = colors_bgdark[id]
  1461. msg = '\033[3%sm%s\033[m'%(color,msg)
  1462. print "++ %s: %s"%(ids[id],msg)
  1463. def Readfile(file, remove_linebreaks=0, ignore_error=0):
  1464. data = []
  1465. if file == '-':
  1466. try: data = sys.stdin.readlines()
  1467. except:
  1468. if not ignore_error:
  1469. Error(_('You must feed me with data on STDIN!'))
  1470. else:
  1471. try: f = open(file); data = f.readlines() ; f.close()
  1472. except:
  1473. if not ignore_error:
  1474. Error(_("Cannot read file:")+" %s"%file)
  1475. if remove_linebreaks:
  1476. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  1477. Message(_("File read (%d lines): %s")%(len(data),file),2)
  1478. return data
  1479. def Savefile(file, contents):
  1480. try: f = open(file, 'wb')
  1481. except: Error(_("Cannot open file for writing:")+" %s"%file)
  1482. if type(contents) == type([]): doit = f.writelines
  1483. else: doit = f.write
  1484. doit(contents) ; f.close()
  1485. def showdic(dic):
  1486. for k in dic.keys(): print "%15s : %s" % (k,dic[k])
  1487. def dotted_spaces(txt=''):
  1488. return string.replace(txt,' ','.')
  1489. # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
  1490. def get_rc_path():
  1491. "Return the full path for the users' RC file"
  1492. # Try to get the path from an env var. if yes, we're done
  1493. user_defined = os.environ.get('T2TCONFIG')
  1494. if user_defined: return user_defined
  1495. # Env var not found, so perform automatic path composing
  1496. # Set default filename according system platform
  1497. rc_names = {'default':'.txt2tagsrc', 'win':'_t2trc'}
  1498. rc_file = rc_names.get(sys.platform[:3]) or rc_names['default']
  1499. # The file must be on the user directory, but where is this dir?
  1500. rc_dir_search = ['HOME', 'HOMEPATH']
  1501. for var in rc_dir_search:
  1502. rc_dir = os.environ.get(var)
  1503. if rc_dir: break
  1504. # rc dir found, now we must join dir+file to compose the full path
  1505. if rc_dir:
  1506. # Compose path and return it if the file exists
  1507. rc_path = os.path.join(rc_dir, rc_file)
  1508. # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
  1509. if sys.platform[:3] == 'win':
  1510. rc_drive = os.environ.get('HOMEDRIVE')
  1511. rc_path = os.path.join(rc_drive,rc_path)
  1512. return rc_path
  1513. # Sorry, not found
  1514. return ''
  1515. ##############################################################################
  1516. class CommandLine:
  1517. """
  1518. Command Line class - Masters command line
  1519. This class checks and extract data from the provided command line.
  1520. The --long options and flags are taken from the global OPTIONS,
  1521. FLAGS and ACTIONS dictionaries. The short options are registered
  1522. here, and also their equivalence to the long ones.
  1523. METHODS:
  1524. _compose_short_opts() -> str
  1525. _compose_long_opts() -> list
  1526. Compose the valid short and long options list, on the
  1527. 'getopt' format.
  1528. parse() -> (opts, args)
  1529. Call getopt to check and parse the command line.
  1530. It expects to receive the command line as a list, and
  1531. without the program name (sys.argv[1:]).
  1532. get_raw_config() -> [RAW config]
  1533. Scans command line and convert the data to the RAW config
  1534. format. See ConfigMaster class to the RAW format description.
  1535. Optional 'ignore' and 'filter' arguments are used to filter
  1536. in or out specified keys.
  1537. compose_cmdline(dict) -> [Command line]
  1538. Compose a command line list from an already parsed config
  1539. dictionary, generated from RAW by ConfigMaster(). Use
  1540. this to compose an optimal command line for a group of
  1541. options.
  1542. The get_raw_config() calls parse(), so the tipical use of this
  1543. class is:
  1544. raw = CommandLine().get_raw_config(sys.argv[1:])
  1545. """
  1546. def __init__(self):
  1547. self.all_options = OPTIONS.keys()
  1548. self.all_flags = FLAGS.keys()
  1549. self.all_actions = ACTIONS.keys()
  1550. # short:long options equivalence
  1551. self.short_long = {
  1552. 'h':'help' , 'V':'version',
  1553. 'n':'enum-title', 'i':'infile' ,
  1554. 'H':'no-headers', 'o':'outfile',
  1555. 'v':'verbose' , 't':'target' ,
  1556. 'q':'quiet' , 'C':'config-file'
  1557. }
  1558. # Compose valid short and long options data for getopt
  1559. self.short_opts = self._compose_short_opts()
  1560. self.long_opts = self._compose_long_opts()
  1561. def _compose_short_opts(self):
  1562. "Returns a string like 'hVt:o' with all short options/flags"
  1563. ret = []
  1564. for opt in self.short_long.keys():
  1565. long = self.short_long[opt]
  1566. if long in self.all_options: # is flag or option?
  1567. opt = opt+':' # option: have param
  1568. ret.append(opt)
  1569. #Debug('Valid SHORT options: %s'%ret)
  1570. return string.join(ret, '')
  1571. def _compose_long_opts(self):
  1572. "Returns a list with all the valid long options/flags"
  1573. ret = map(lambda x:x+'=', self.all_options) # add =
  1574. ret.extend(self.all_flags) # flag ON
  1575. ret.extend(self.all_actions) # acts
  1576. ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
  1577. ret.extend(['no-style','no-encoding']) # turn OFF
  1578. ret.extend(['no-outfile','no-infile']) # turn OFF
  1579. ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF
  1580. #Debug('Valid LONG options: %s'%ret)
  1581. return ret
  1582. def _tokenize(self, cmd_string=''):
  1583. "Convert a command line string to a list"
  1584. #TODO protect quotes contents -- Don't use it, pass cmdline as list
  1585. return string.split(cmd_string)
  1586. def parse(self, cmdline=[]):
  1587. "Check/Parse a command line list TIP: no program name!"
  1588. # Get the valid options
  1589. short, long = self.short_opts, self.long_opts
  1590. # Parse it!
  1591. try:
  1592. opts, args = getopt.getopt(cmdline, short, long)
  1593. except getopt.error, errmsg:
  1594. Error(_("%s (try --help)")%errmsg)
  1595. return (opts, args)
  1596. def get_raw_config(self, cmdline=[], ignore=[], filter=[], relative=0):
  1597. "Returns the options/arguments found as RAW config"
  1598. if not cmdline: return []
  1599. ret = []
  1600. # We need lists, not strings
  1601. if type(cmdline) in (type(''), type(u'')):
  1602. cmdline = self._tokenize(cmdline)
  1603. opts, args = self.parse(cmdline[:])
  1604. # Parse all options
  1605. for name,value in opts:
  1606. # Remove leading - and --
  1607. name = re.sub('^--?', '', name)
  1608. # Alias to old misspelled 'suGGar'
  1609. if name == 'css-suggar': name = 'css-sugar'
  1610. elif name == 'no-css-suggar': name = 'no-css-sugar'
  1611. # Translate short opt to long
  1612. if len(name) == 1: name = self.short_long.get(name)
  1613. # Outfile exception: path relative to PWD
  1614. if name == 'outfile' and relative \
  1615. and value not in [STDOUT, MODULEOUT]:
  1616. value = os.path.abspath(value)
  1617. # config-file inclusion, path relative to PWD
  1618. if name == 'config-file':
  1619. configs = ConfigLines().include_config_file(
  1620. value)
  1621. # Remove the 'target' item of all configs
  1622. configs = map(lambda c: [c[1],c[2]], configs)
  1623. ret.extend(configs)
  1624. continue
  1625. # Save it
  1626. ret.append([name, value])
  1627. # Get infile, if any
  1628. while args:
  1629. infile = args.pop(0)
  1630. ret.append(['infile', infile])
  1631. # Apply 'ignore' and 'filter' rules (filter is stronger)
  1632. temp = ret[:] ; ret = []
  1633. for name,value in temp:
  1634. if (not filter and not ignore) or \
  1635. (filter and name in filter) or \
  1636. (ignore and name not in ignore):
  1637. ret.append( ['all', name, value] )
  1638. # Add the original command line string as 'realcmdline'
  1639. ret.append( ['all', 'realcmdline', cmdline] )
  1640. return ret
  1641. def compose_cmdline(self, conf={}, no_check=0):
  1642. "compose a full (and diet) command line from CONF dict"
  1643. if not conf: return []
  1644. args = []
  1645. dft_options = OPTIONS.copy()
  1646. cfg = conf.copy()
  1647. valid_opts = self.all_options + self.all_flags
  1648. use_short = {'no-headers':'H', 'enum-title':'n'}
  1649. # Remove useless options
  1650. if not no_check and cfg.get('toc-only'):
  1651. if cfg.has_key('no-headers'):
  1652. del cfg['no-headers']
  1653. if cfg.has_key('outfile'):
  1654. del cfg['outfile'] # defaults to STDOUT
  1655. if cfg.get('target') == 'txt':
  1656. del cfg['target'] # already default
  1657. args.append('--toc-only') # must be the first
  1658. del cfg['toc-only']
  1659. # Add target type
  1660. if cfg.has_key('target'):
  1661. args.append('-t '+cfg['target'])
  1662. del cfg['target']
  1663. # Add other options
  1664. for key in cfg.keys():
  1665. if key not in valid_opts: continue # may be a %!setting
  1666. if key == 'outfile' or key == 'infile': continue # later
  1667. val = cfg[key]
  1668. if not val: continue
  1669. # Default values are useless on cmdline
  1670. if val == dft_options.get(key): continue
  1671. # -short format
  1672. if key in use_short.keys():
  1673. args.append('-'+use_short[key])
  1674. continue
  1675. # --long format
  1676. if key in self.all_flags: # add --option
  1677. args.append('--'+key)
  1678. else: # add --option=value
  1679. args.append('--%s=%s'%(key,val))
  1680. # The outfile using -o
  1681. if cfg.has_key('outfile') and \
  1682. cfg['outfile'] != dft_options.get('outfile'):
  1683. args.append('-o '+cfg['outfile'])
  1684. # Place input file(s) always at the end
  1685. if cfg.has_key('infile'):
  1686. args.append(string.join(cfg['infile'],' '))
  1687. # Return as a nice list
  1688. Debug("Diet command line: %s"%string.join(args,' '), 1)
  1689. return args
  1690. ##############################################################################
  1691. class SourceDocument:
  1692. """
  1693. SourceDocument class - scan document structure, extract data
  1694. It knows about full files. It reads a file and identify all
  1695. the areas begining (Head,Conf,Body). With this info it can
  1696. extract each area contents.
  1697. Note: the original line break is removed.
  1698. DATA:
  1699. self.arearef - Save Head, Conf, Body init line number
  1700. self.areas - Store the area names which are not empty
  1701. self.buffer - The full file contents (with NO \\r, \\n)
  1702. METHODS:
  1703. get() - Access the contents of an Area. Example:
  1704. config = SourceDocument(file).get('conf')
  1705. split() - Get all the document Areas at once. Example:
  1706. head, conf, body = SourceDocument(file).split()
  1707. RULES:
  1708. * The document parts are sequential: Head, Conf and Body.
  1709. * One ends when the next begins.
  1710. * The Conf Area is optional, so a document can have just
  1711. Head and Body Areas.
  1712. These are the Areas limits:
  1713. - Head Area: the first three lines
  1714. - Body Area: from the first valid text line to the end
  1715. - Conf Area: the comments between Head and Body Areas
  1716. Exception: If the first line is blank, this means no
  1717. header info, so the Head Area is just the first line.
  1718. """
  1719. def __init__(self, filename='', contents=[]):
  1720. self.areas = ['head','conf','body']
  1721. self.arearef = []
  1722. self.areas_fancy = ''
  1723. self.filename = filename
  1724. self.buffer = []
  1725. if filename:
  1726. self.scan_file(filename)
  1727. elif contents:
  1728. self.scan(contents)
  1729. def split(self):
  1730. "Returns all document parts, splitted into lists."
  1731. return self.get('head'), self.get('conf'), self.get('body')
  1732. def get(self, areaname):
  1733. "Returns head|conf|body contents from self.buffer"
  1734. # Sanity
  1735. if areaname not in self.areas: return []
  1736. if not self.buffer : return []
  1737. # Go get it
  1738. bufini = 1
  1739. bufend = len(self.buffer)
  1740. if areaname == 'head':
  1741. ini = bufini
  1742. end = self.arearef[1] or self.arearef[2] or bufend
  1743. elif areaname == 'conf':
  1744. ini = self.arearef[1]
  1745. end = self.arearef[2] or bufend
  1746. elif areaname == 'body':
  1747. ini = self.arearef[2]
  1748. end = bufend
  1749. else:
  1750. Error("Unknown Area name '%s'"%areaname)
  1751. lines = self.buffer[ini:end]
  1752. # Make sure head will always have 3 lines
  1753. while areaname == 'head' and len(lines) < 3:
  1754. lines.append('')
  1755. return lines
  1756. def scan_file(self, filename):
  1757. Debug("source file: %s"%filename)
  1758. Message(_("Loading source document"),1)
  1759. buf = Readfile(filename, remove_linebreaks=1)
  1760. self.scan(buf)
  1761. def scan(self, lines):
  1762. "Run through source file and identify head/conf/body areas"
  1763. buf = lines
  1764. if len(buf) == 0:
  1765. Error(_('The input file is empty: %s')%self.filename)
  1766. cfg_parser = ConfigLines().parse_line
  1767. buf.insert(0, '') # text start at pos 1
  1768. ref = [1,4,0]
  1769. if not string.strip(buf[1]): # no header
  1770. ref[0] = 0 ; ref[1] = 2
  1771. rgx = getRegexes()
  1772. on_comment_block = 0
  1773. for i in xrange(ref[1],len(buf)): # find body init:
  1774. # Handle comment blocks inside config area
  1775. if not on_comment_block \
  1776. and rgx['blockCommentOpen'].search(buf[i]):
  1777. on_comment_block = 1
  1778. continue
  1779. if on_comment_block \
  1780. and rgx['blockCommentOpen'].search(buf[i]):
  1781. on_comment_block = 0
  1782. continue
  1783. if on_comment_block: continue
  1784. if string.strip(buf[i]) and ( # ... not blank and
  1785. buf[i][0] != '%' or # ... not comment or
  1786. rgx['macros'].match(buf[i]) or # ... %%macro
  1787. rgx['toc'].match(buf[i]) or # ... %%toc
  1788. cfg_parser(buf[i],'include')[1]): # ... %!include
  1789. ref[2] = i ; break
  1790. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  1791. for i in 0,1,2: # del !existent
  1792. if ref[i] >= len(buf): ref[i] = 0 # title-only
  1793. if not ref[i]: self.areas[i] = ''
  1794. Debug('Head,Conf,Body start line: %s'%ref)
  1795. self.arearef = ref # save results
  1796. self.buffer = buf
  1797. # Fancyness sample: head conf body (1 4 8)
  1798. self.areas_fancy = "%s (%s)"%(
  1799. string.join(self.areas),
  1800. string.join(map(str, map(lambda x:x or '', ref))))
  1801. Message(_("Areas found: %s")%self.areas_fancy, 2)
  1802. def get_raw_config(self):
  1803. "Handy method to get the CONF area RAW config (if any)"
  1804. if not self.areas.count('conf'): return []
  1805. Message(_("Scanning source document CONF area"),1)
  1806. raw = ConfigLines(
  1807. file=self.filename, lines=self.get('conf'),
  1808. first_line=self.arearef[1]).get_raw_config()
  1809. Debug("document raw config: %s"%raw, 1)
  1810. return raw
  1811. ##############################################################################
  1812. class ConfigMaster:
  1813. """
  1814. ConfigMaster class - the configuration wizard
  1815. This class is the configuration master. It knows how to handle
  1816. the RAW and PARSED config format. It also performs the sanity
  1817. checking for a given configuration.
  1818. DATA:
  1819. self.raw - Stores the config on the RAW format
  1820. self.parsed - Stores the config on the PARSED format
  1821. self.defaults - Stores the default values for all keys
  1822. self.off - Stores the OFF values for all keys
  1823. self.multi - List of keys which can have multiple values
  1824. self.numeric - List of keys which value must be a number
  1825. self.incremental - List of keys which are incremental
  1826. RAW FORMAT:
  1827. The RAW format is a list of lists, being each mother list item
  1828. a full configuration entry. Any entry is a 3 item list, on
  1829. the following format: [ TARGET, KEY, VALUE ]
  1830. Being a list, the order is preserved, so it's easy to use
  1831. different kinds of configs, as CONF area and command line,
  1832. respecting the precedence.
  1833. The special target 'all' is used when no specific target was
  1834. defined on the original config.
  1835. PARSED FORMAT:
  1836. The PARSED format is a dictionary, with all the 'key : value'
  1837. found by reading the RAW config. The self.target contents
  1838. matters, so this dictionary only contains the target's
  1839. config. The configs of other targets are ignored.
  1840. The CommandLine and ConfigLines classes have the get_raw_config()
  1841. method which convert the configuration found to the RAW format.
  1842. Just feed it to parse() and get a brand-new ready-to-use config
  1843. dictionary. Example:
  1844. >>> raw = CommandLine().get_raw_config(['-n', '-H'])
  1845. >>> print raw
  1846. [['all', 'enum-title', ''], ['all', 'no-headers', '']]
  1847. >>> parsed = ConfigMaster(raw).parse()
  1848. >>> print parsed
  1849. {'enum-title': 1, 'headers': 0}
  1850. """
  1851. def __init__(self, raw=[], target=''):
  1852. self.raw = raw
  1853. self.target = target
  1854. self.parsed = {}
  1855. self.dft_options = OPTIONS.copy()
  1856. self.dft_flags = FLAGS.copy()
  1857. self.dft_actions = ACTIONS.copy()
  1858. self.dft_settings = SETTINGS.copy()
  1859. self.defaults = self._get_defaults()
  1860. self.off = self._get_off()
  1861. self.incremental = ['verbose']
  1862. self.numeric = ['toc-level','split']
  1863. self.multi = ['infile', 'preproc', 'postproc',
  1864. 'options', 'style']
  1865. def _get_defaults(self):
  1866. "Get the default values for all config/options/flags"
  1867. empty = {}
  1868. for kw in CONFIG_KEYWORDS: empty[kw] = ''
  1869. empty.update(self.dft_options)
  1870. empty.update(self.dft_flags)
  1871. empty.update(self.dft_actions)
  1872. empty.update(self.dft_settings)
  1873. empty['realcmdline'] = '' # internal use only
  1874. empty['sourcefile'] = '' # internal use only
  1875. return empty
  1876. def _get_off(self):
  1877. "Turns OFF all the config/options/flags"
  1878. off = {}
  1879. for key in self.defaults.keys():
  1880. kind = type(self.defaults[key])
  1881. if kind == type(9):
  1882. off[key] = 0
  1883. elif kind == type('') or kind == type(u''):
  1884. off[key] = ''
  1885. elif kind == type([]):
  1886. off[key] = []
  1887. else:
  1888. Error('ConfigMaster: %s: Unknown type'+key)
  1889. return off
  1890. def _check_target(self):
  1891. "Checks if the target is already defined. If not, do it"
  1892. if not self.target:
  1893. self.target = self.find_value('target')
  1894. def get_target_raw(self):
  1895. "Returns the raw config for self.target or 'all'"
  1896. ret = []
  1897. self._check_target()
  1898. for entry in self.raw:
  1899. if entry[0] == self.target or entry[0] == 'all':
  1900. ret.append(entry)
  1901. return ret
  1902. def add(self, key, val):
  1903. "Adds the key:value pair to the config dictionary (if needed)"
  1904. # %!options
  1905. if key == 'options':
  1906. ignoreme = self.dft_actions.keys() + ['target']
  1907. ignoreme.remove('dump-config')
  1908. ignoreme.remove('dump-source')
  1909. raw_opts = CommandLine().get_raw_config(
  1910. val, ignore=ignoreme)
  1911. for target, key, val in raw_opts:
  1912. self.add(key, val)
  1913. return
  1914. # The no- prefix turns OFF this key
  1915. if key[:3] == 'no-':
  1916. key = key[3:] # remove prefix
  1917. val = self.off.get(key) # turn key OFF
  1918. # Is this key valid?
  1919. if key not in self.defaults.keys():
  1920. Debug('Bogus Config %s:%s'%(key,val),1)
  1921. return
  1922. # Is this value the default one?
  1923. if val == self.defaults.get(key):
  1924. # If default value, remove previous key:val
  1925. if self.parsed.has_key(key):
  1926. del self.parsed[key]
  1927. # Nothing more to do
  1928. return
  1929. # Flags ON comes empty. we'll add the 1 value now
  1930. if val == '' and (
  1931. key in self.dft_flags.keys() or
  1932. key in self.dft_actions.keys()):
  1933. val = 1
  1934. # Multi value or single?
  1935. if key in self.multi:
  1936. # First one? start new list
  1937. if not self.parsed.has_key(key):
  1938. self.parsed[key] = []
  1939. self.parsed[key].append(val)
  1940. # Incremental value? so let's add it
  1941. elif key in self.incremental:
  1942. self.parsed[key] = (self.parsed.get(key) or 0) + val
  1943. else:
  1944. self.parsed[key] = val
  1945. fancykey = dotted_spaces("%12s"%key)
  1946. Message(_("Added config %s : %s")%(fancykey,val),3)
  1947. def get_outfile_name(self, config={}):
  1948. "Dirname is the same for {in,out}file"
  1949. infile, outfile = config['sourcefile'], config['outfile']
  1950. if outfile and outfile not in (STDOUT, MODULEOUT) \
  1951. and not os.path.isabs(outfile):
  1952. outfile = os.path.join(os.path.dirname(infile), outfile)
  1953. if infile == STDIN and not outfile: outfile = STDOUT
  1954. if infile == MODULEIN and not outfile: outfile = MODULEOUT
  1955. if not outfile and (infile and config.get('target')):
  1956. basename = re.sub('\.(txt|t2t)$','',infile)
  1957. outfile = "%s.%s"%(basename, config['target'])
  1958. Debug(" infile: '%s'"%infile , 1)
  1959. Debug("outfile: '%s'"%outfile, 1)
  1960. return outfile
  1961. def sanity(self, config, gui=0):
  1962. "Basic config sanity checking"
  1963. if not config: return {}
  1964. target = config.get('target')
  1965. # Some actions don't require target specification
  1966. if not target:
  1967. for action in NO_TARGET:
  1968. if config.get(action):
  1969. target = 'txt'
  1970. break
  1971. # On GUI, some checking are skipped
  1972. if not gui:
  1973. # We *need* a target
  1974. if not target:
  1975. Error(_('No target specified (try --help)')+\
  1976. '\n\n'+\
  1977. _('Maybe trying to convert an old v1.x file?'))
  1978. # And of course, an infile also
  1979. if not config.get('infile'):
  1980. Error(_('Missing input file (try --help)'))
  1981. # Is the target valid?
  1982. if not TARGETS.count(target):
  1983. Error(_("Invalid target '%s' (try --help)")%\
  1984. target)
  1985. # Ensure all keys are present
  1986. empty = self.defaults.copy() ; empty.update(config)
  1987. config = empty.copy()
  1988. # Check integers options
  1989. for key in config.keys():
  1990. if key in self.numeric:
  1991. try: config[key] = int(config[key])
  1992. except: Error(_('--%s value must be a number'
  1993. )%key)
  1994. # Check split level value
  1995. if config['split'] not in (0,1,2):
  1996. Error(_('Option --split must be 0, 1 or 2'))
  1997. # --toc-only is stronger than others
  1998. if config['toc-only']:
  1999. config['headers'] = 0
  2000. config['toc'] = 0
  2001. config['split'] = 0
  2002. config['gui'] = 0
  2003. config['outfile'] = config['outfile'] or STDOUT
  2004. # Splitting is disable for now (future: HTML only, no STDOUT)
  2005. config['split'] = 0
  2006. # Restore target
  2007. config['target'] = target
  2008. # Set output file name
  2009. config['outfile'] = self.get_outfile_name(config)
  2010. # Checking suicide
  2011. if config['sourcefile'] == config['outfile'] and \
  2012. config['outfile'] not in [STDOUT,MODULEOUT] and not gui:
  2013. Error(_("Input and Output files are the same: %s")%(
  2014. config['outfile']))
  2015. return config
  2016. def parse(self):
  2017. "Returns the parsed config for the current target"
  2018. raw = self.get_target_raw()
  2019. for target, key, value in raw:
  2020. self.add(key, value)
  2021. Message(_("Added the following keys: %s")%string.join(
  2022. self.parsed.keys(),', '),2)
  2023. return self.parsed.copy()
  2024. def find_value(self, key='', target=''):
  2025. "Scans ALL raw config to find the desired key"
  2026. ret = []
  2027. # Scan and save all values found
  2028. for targ, k, val in self.raw:
  2029. if k == key and (targ == target or targ == 'all'):
  2030. ret.append(val)
  2031. if not ret: return ''
  2032. # If not multi value, return only the last found
  2033. if key in self.multi: return ret
  2034. else : return ret[-1]
  2035. ########################################################################
  2036. class ConfigLines:
  2037. """
  2038. ConfigLines class - the config file data extractor
  2039. This class reads and parse the config lines on the %!key:val
  2040. format, converting it to RAW config. It deals with user
  2041. config file (RC file), source document CONF area and
  2042. %!includeconf directives.
  2043. Call it passing a file name or feed the desired config lines.
  2044. Then just call the get_raw_config() method and wait to
  2045. receive the full config data on the RAW format. This method
  2046. also follows the possible %!includeconf directives found on
  2047. the config lines. Example:
  2048. raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
  2049. The parse_line() method is also useful to be used alone,
  2050. to identify and tokenize a single config line. For example,
  2051. to get the %!include command components, on the source
  2052. document BODY:
  2053. target, key, value = ConfigLines().parse_line(body_line)
  2054. """
  2055. def __init__(self, file='', lines=[], first_line=1):
  2056. self.file = file or 'NOFILE'
  2057. self.lines = lines
  2058. self.first_line = first_line
  2059. def load_lines(self):
  2060. "Make sure we've loaded the file contents into buffer"
  2061. if not self.lines and not self.file:
  2062. Error("ConfigLines: No file or lines provided")
  2063. if not self.lines:
  2064. self.lines = self.read_config_file(self.file)
  2065. def read_config_file(self, filename=''):
  2066. "Read a Config File contents, aborting on invalid line"
  2067. if not filename: return []
  2068. errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
  2069. lines = Readfile(filename, remove_linebreaks=1)
  2070. # Sanity: try to find invalid config lines
  2071. for i in xrange(len(lines)):
  2072. line = string.rstrip(lines[i])
  2073. if not line: continue # empty
  2074. if line[0] != '%': Error(errormsg%(filename,i+1,line))
  2075. return lines
  2076. def include_config_file(self, file=''):
  2077. "Perform the %!includeconf action, returning RAW config"
  2078. if not file: return []
  2079. # Current dir relative to the current file (self.file)
  2080. current_dir = os.path.dirname(self.file)
  2081. file = os.path.join(current_dir, file)
  2082. # Read and parse included config file contents
  2083. lines = self.read_config_file(file)
  2084. return ConfigLines(file=file, lines=lines).get_raw_config()
  2085. def get_raw_config(self):
  2086. "Scan buffer and extract all config as RAW (including includes)"
  2087. ret = []
  2088. self.load_lines()
  2089. first = self.first_line
  2090. for i in xrange(len(self.lines)):
  2091. line = self.lines[i]
  2092. Message(_("Processing line %03d: %s")%(first+i,line),2)
  2093. target, key, val = self.parse_line(line)
  2094. if not key: continue # no config on this line
  2095. if key == 'includeconf':
  2096. err = _('A file cannot include itself (loop!)')
  2097. if val == self.file:
  2098. Error("%s: %%!includeconf: %s"%(
  2099. err, self.file))
  2100. more_raw = self.include_config_file(val)
  2101. ret.extend(more_raw)
  2102. Message(_("Finished Config file inclusion: %s"
  2103. )%(val),2)
  2104. else:
  2105. ret.append([target, key, val])
  2106. Message(_("Added %s")%key,3)
  2107. return ret
  2108. def parse_line(self, line='', keyname='', target=''):
  2109. "Detects %!key:val config lines and extract data from it"
  2110. empty = ['', '', '']
  2111. if not line: return empty
  2112. no_target = ['target', 'includeconf']
  2113. re_name = keyname or '[a-z]+'
  2114. re_target = target or '[a-z]*'
  2115. # XXX TODO <value>\S.+? requires TWO chars, breaks %!include:a
  2116. cfgregex = re.compile("""
  2117. ^%%!\s* # leading id with opt spaces
  2118. (?P<name>%s)\s* # config name
  2119. (\((?P<target>%s)\))? # optional target spec inside ()
  2120. \s*:\s* # key:value delimiter with opt spaces
  2121. (?P<value>\S.+?) # config value
  2122. \s*$ # rstrip() spaces and hit EOL
  2123. """%(re_name,re_target), re.I+re.VERBOSE)
  2124. prepostregex = re.compile("""
  2125. # ---[ PATTERN ]---
  2126. ^( "([^"]*)" # "double quoted" or
  2127. | '([^']*)' # 'single quoted' or
  2128. | ([^\s]+) # single_word
  2129. )
  2130. \s+ # separated by spaces
  2131. # ---[ REPLACE ]---
  2132. ( "([^"]*)" # "double quoted" or
  2133. | '([^']*)' # 'single quoted' or
  2134. | (.*) # anything
  2135. )
  2136. \s*$
  2137. """, re.VERBOSE)
  2138. guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
  2139. match = cfgregex.match(line)
  2140. if not match: return empty
  2141. name = string.lower(match.group('name') or '')
  2142. target = string.lower(match.group('target') or 'all')
  2143. value = match.group('value')
  2144. # NO target keywords: force all targets
  2145. if name in no_target: target = 'all'
  2146. # Special config for GUI colors
  2147. if name == 'guicolors':
  2148. valmatch = guicolors.search(value)
  2149. if not valmatch: return empty
  2150. value = re.split('\s+', value)
  2151. # Special config with two quoted values (%!preproc: "foo" 'bar')
  2152. if name == 'preproc' or name == 'postproc':
  2153. valmatch = prepostregex.search(value)
  2154. if not valmatch: return empty
  2155. getval = valmatch.group
  2156. patt = getval(2) or getval(3) or getval(4) or ''
  2157. repl = getval(6) or getval(7) or getval(8) or ''
  2158. value = (patt, repl)
  2159. return [target, name, value]
  2160. ##############################################################################
  2161. class MaskMaster:
  2162. "(Un)Protect important structures from escaping and formatting"
  2163. def __init__(self):
  2164. self.linkmask = 'vvvLINKvvv'
  2165. self.monomask = 'vvvMONOvvv'
  2166. self.macromask = 'vvvMACROvvv'
  2167. self.rawmask = 'vvvRAWvvv'
  2168. self.tocmask = 'vvvTOCvvv'
  2169. self.macroman = MacroMaster()
  2170. self.reset()
  2171. def reset(self):
  2172. self.linkbank = []
  2173. self.monobank = []
  2174. self.macrobank = []
  2175. self.rawbank = []
  2176. def mask(self, line=''):
  2177. global AUTOTOC
  2178. # Protect raw text
  2179. while regex['raw'].search(line):
  2180. txt = regex['raw'].search(line).group(1)
  2181. txt = doEscape(TARGET,txt)
  2182. self.rawbank.append(txt)
  2183. line = regex['raw'].sub(self.rawmask,line,1)
  2184. # Protect pre-formatted font text
  2185. while regex['fontMono'].search(line):
  2186. txt = regex['fontMono'].search(line).group(1)
  2187. txt = doEscape(TARGET,txt)
  2188. self.monobank.append(txt)
  2189. line = regex['fontMono'].sub(self.monomask,line,1)
  2190. # Protect macros
  2191. while regex['macros'].search(line):
  2192. txt = regex['macros'].search(line).group()
  2193. self.macrobank.append(txt)
  2194. line = regex['macros'].sub(self.macromask,line,1)
  2195. # Protect TOC location
  2196. while regex['toc'].search(line):
  2197. line = regex['toc'].sub(self.tocmask,line)
  2198. AUTOTOC = 0
  2199. # Protect URLs and emails
  2200. while regex['linkmark'].search(line) or \
  2201. regex['link' ].search(line):
  2202. # Try to match plain or named links
  2203. match_link = regex['link'].search(line)
  2204. match_named = regex['linkmark'].search(line)
  2205. # Define the current match
  2206. if match_link and match_named:
  2207. # Both types found, which is the first?
  2208. m = match_link
  2209. if match_named.start() < match_link.start():
  2210. m = match_named
  2211. else:
  2212. # Just one type found, we're fine
  2213. m = match_link or match_named
  2214. # Extract link data and apply mask
  2215. if m == match_link: # plain link
  2216. link = m.group()
  2217. label = ''
  2218. link_re = regex['link']
  2219. else: # named link
  2220. link = m.group('link')
  2221. label = string.rstrip(m.group('label'))
  2222. link_re = regex['linkmark']
  2223. line = link_re.sub(self.linkmask,line,1)
  2224. # Save link data to the link bank
  2225. self.linkbank.append((label, link))
  2226. return line
  2227. def undo(self, line):
  2228. # url & email
  2229. for label,url in self.linkbank:
  2230. link = get_tagged_link(label, url)
  2231. line = string.replace(line, self.linkmask, link, 1)
  2232. # Expand macros
  2233. for macro in self.macrobank:
  2234. macro = self.macroman.expand(macro)
  2235. line = string.replace(line, self.macromask, macro, 1)
  2236. # Expand verb
  2237. for mono in self.monobank:
  2238. open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
  2239. tagged = open+mono+close
  2240. line = string.replace(line, self.monomask, tagged, 1)
  2241. # Expand raw
  2242. for raw in self.rawbank:
  2243. line = string.replace(line, self.rawmask, raw, 1)
  2244. return line
  2245. ##############################################################################
  2246. class TitleMaster:
  2247. "Title things"
  2248. def __init__(self):
  2249. self.count = ['',0,0,0,0,0]
  2250. self.toc = []
  2251. self.level = 0
  2252. self.kind = ''
  2253. self.txt = ''
  2254. self.label = ''
  2255. self.tag = ''
  2256. self.tag_hold = []
  2257. self.last_level = 0
  2258. self.count_id = ''
  2259. self.user_labels = {}
  2260. self.anchor_count = 0
  2261. self.anchor_prefix = 'toc'
  2262. def _open_close_blocks(self):
  2263. "Open new title blocks, closing the previous (if any)"
  2264. if not rules['titleblocks']: return
  2265. tag = ''
  2266. last = self.last_level
  2267. curr = self.level
  2268. # Same level, just close the previous
  2269. if curr == last:
  2270. tag = TAGS.get('title%dClose'%last)
  2271. if tag: self.tag_hold.append(tag)
  2272. # Section -> subsection, more depth
  2273. while curr > last:
  2274. last = last + 1
  2275. # Open the new block of subsections
  2276. tag = TAGS.get('blockTitle%dOpen'%last)
  2277. if tag: self.tag_hold.append(tag)
  2278. # Jump from title1 to title3 or more
  2279. # Fill the gap with an empty section
  2280. if curr - last > 0:
  2281. tag = TAGS.get('title%dOpen'%last)
  2282. tag = regex['x'].sub('', tag) # del \a
  2283. if tag: self.tag_hold.append(tag)
  2284. # Section <- subsection, less depth
  2285. while curr < last:
  2286. # Close the current opened subsection
  2287. tag = TAGS.get('title%dClose'%last)
  2288. if tag: self.tag_hold.append(tag)
  2289. # Close the current opened block of subsections
  2290. tag = TAGS.get('blockTitle%dClose'%last)
  2291. if tag: self.tag_hold.append(tag)
  2292. last = last - 1
  2293. # Close the previous section of the same level
  2294. # The subsections were under it
  2295. if curr == last:
  2296. tag = TAGS.get('title%dClose'%last)
  2297. if tag: self.tag_hold.append(tag)
  2298. def add(self, line):
  2299. "Parses a new title line."
  2300. if not line: return
  2301. self._set_prop(line)
  2302. self._open_close_blocks()
  2303. self._set_count_id()
  2304. self._set_label()
  2305. self._save_toc_info()
  2306. def close_all(self):
  2307. "Closes all opened title blocks"
  2308. ret = []
  2309. ret.extend(self.tag_hold)
  2310. while self.level:
  2311. tag = TAGS.get('title%dClose'%self.level)
  2312. if tag: ret.append(tag)
  2313. tag = TAGS.get('blockTitle%dClose'%self.level)
  2314. if tag: ret.append(tag)
  2315. self.level = self.level - 1
  2316. return ret
  2317. def _save_toc_info(self):
  2318. "Save TOC info, used by self.dump_marked_toc()"
  2319. self.toc.append((self.level, self.count_id,
  2320. self.txt , self.label ))
  2321. def _set_prop(self, line=''):
  2322. "Extract info from original line and set data holders."
  2323. # Detect title type (numbered or not)
  2324. id = string.lstrip(line)[0]
  2325. if id == '=': kind = 'title'
  2326. elif id == '+': kind = 'numtitle'
  2327. else: Error("Unknown Title ID '%s'"%id)
  2328. # Extract line info
  2329. match = regex[kind].search(line)
  2330. level = len(match.group('id'))
  2331. txt = string.strip(match.group('txt'))
  2332. label = match.group('label')
  2333. # Parse info & save
  2334. if CONF['enum-title']: kind = 'numtitle' # force
  2335. if rules['titleblocks']:
  2336. self.tag = TAGS.get('%s%dOpen'%(kind,level)) or \
  2337. TAGS.get('title%dOpen'%level)
  2338. else:
  2339. self.tag = TAGS.get(kind+`level`) or \
  2340. TAGS.get('title'+`level`)
  2341. self.last_level = self.level
  2342. self.kind = kind
  2343. self.level = level
  2344. self.txt = txt
  2345. self.label = label
  2346. def _set_count_id(self):
  2347. "Compose and save the title count identifier (if needed)."
  2348. count_id = ''
  2349. if self.kind == 'numtitle' and not rules['autonumbertitle']:
  2350. # Manually increase title count
  2351. self.count[self.level] = self.count[self.level] +1
  2352. # Reset sublevels count (if any)
  2353. max_levels = len(self.count)
  2354. if self.level < max_levels-1:
  2355. for i in xrange(self.level+1, max_levels):
  2356. self.count[i] = 0
  2357. # Compose count id from hierarchy
  2358. for i in xrange(self.level):
  2359. count_id= "%s%d."%(count_id, self.count[i+1])
  2360. self.count_id = count_id
  2361. def _set_label(self):
  2362. "Compose and save title label, used by anchors."
  2363. # Remove invalid chars from label set by user
  2364. self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
  2365. # Generate name as 15 first :alnum: chars
  2366. #TODO how to translate safely accented chars to plain?
  2367. #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
  2368. # 'tocN' label - sequential count, ignoring 'toc-level'
  2369. #self.label = self.anchor_prefix + str(len(self.toc)+1)
  2370. def _get_tagged_anchor(self):
  2371. "Return anchor if user defined a label, or TOC is on."
  2372. ret = ''
  2373. label = self.label
  2374. if CONF['toc'] and self.level <= CONF['toc-level']:
  2375. # This count is needed bcos self.toc stores all
  2376. # titles, regardless of the 'toc-level' setting,
  2377. # so we can't use self.toc length to number anchors
  2378. self.anchor_count = self.anchor_count + 1
  2379. # Autonumber label (if needed)
  2380. label = label or '%s%s'%(
  2381. self.anchor_prefix, self.anchor_count)
  2382. if label and TAGS['anchor']:
  2383. ret = regex['x'].sub(label,TAGS['anchor'])
  2384. return ret
  2385. def _get_full_title_text(self):
  2386. "Returns the full title contents, already escaped."
  2387. ret = self.txt
  2388. # Insert count_id (if any) before text
  2389. if self.count_id:
  2390. ret = '%s %s'%(self.count_id, ret)
  2391. # Escape specials
  2392. ret = doEscape(TARGET, ret)
  2393. # Same targets needs final escapes on title lines
  2394. # It's here because there is a 'continue' after title
  2395. if rules['finalescapetitle']:
  2396. ret = doFinalEscape(TARGET, ret)
  2397. return ret
  2398. def get(self):
  2399. "Returns the tagged title as a list."
  2400. ret = []
  2401. # Maybe some anchoring before?
  2402. anchor = self._get_tagged_anchor()
  2403. self.tag = regex['_anchor'].sub(anchor, self.tag)
  2404. ### Compose & escape title text (TOC uses unescaped)
  2405. full_title = self._get_full_title_text()
  2406. # Close previous section area
  2407. ret.extend(self.tag_hold)
  2408. self.tag_hold = []
  2409. tagged = regex['x'].sub(full_title, self.tag)
  2410. if rules['breaktitleopen']:
  2411. tagged = tagged + LB
  2412. # Adds "underline" on TXT target
  2413. if TARGET == 'txt':
  2414. ret.append('') # blank line before
  2415. ret.append(tagged)
  2416. # Get the right letter count for UTF
  2417. if string.lower(CONF['encoding']) == 'utf-8':
  2418. i = len(full_title.decode('utf-8'))
  2419. else:
  2420. i = len(full_title)
  2421. ret.append(regex['x'].sub('='*i, self.tag))
  2422. ret.append('') # blank line after
  2423. else:
  2424. ret.append(tagged)
  2425. return ret
  2426. def dump_marked_toc(self, max_level=99):
  2427. "Dumps all toc itens as a valid t2t markup list"
  2428. #TODO maybe use quote+linebreaks instead lists
  2429. ret = []
  2430. toc_count = 1
  2431. for level, count_id, txt, label in self.toc:
  2432. if level > max_level: continue # ignore
  2433. indent = ' '*level
  2434. id_txt = string.lstrip('%s %s'%(count_id, txt))
  2435. label = label or self.anchor_prefix+`toc_count`
  2436. toc_count = toc_count + 1
  2437. # TOC will have links
  2438. if TAGS['anchor']:
  2439. # TOC is more readable with master topics
  2440. # not linked at number. This is a stoled
  2441. # idea from Windows .CHM help files
  2442. if CONF['enum-title'] and level == 1:
  2443. tocitem = '%s+ [""%s"" #%s]'%(
  2444. indent, txt, label)
  2445. else:
  2446. tocitem = '%s- [""%s"" #%s]'%(
  2447. indent, id_txt, label)
  2448. # No links on TOC, just text
  2449. else:
  2450. # man don't reformat TOC lines, cool!
  2451. if TARGET == 'txt' or TARGET == 'man':
  2452. tocitem = '%s""%s""' %(
  2453. indent, id_txt)
  2454. else:
  2455. tocitem = '%s- ""%s""'%(
  2456. indent, id_txt)
  2457. ret.append(tocitem)
  2458. return ret
  2459. ##############################################################################
  2460. #TODO check all this table mess
  2461. # Trata linhas TABLE, com as prop do parse_row
  2462. # O metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
  2463. class TableMaster:
  2464. def __init__(self, line=''):
  2465. self.rows = []
  2466. self.border = 0
  2467. self.align = 'Left'
  2468. self.cellalign = []
  2469. self.cellspan = []
  2470. if line:
  2471. prop = self.parse_row(line)
  2472. self.border = prop['border']
  2473. self.align = prop['align']
  2474. self.cellalign = prop['cellalign']
  2475. self.cellspan = prop['cellspan']
  2476. def _get_open_tag(self):
  2477. topen = TAGS['tableOpen']
  2478. tborder = TAGS['_tableBorder']
  2479. talign = TAGS['_tableAlign'+self.align]
  2480. calignsep = TAGS['tableColAlignSep']
  2481. calign = ''
  2482. # The first line defines if table has border or not
  2483. if not self.border: tborder = ''
  2484. # Set the columns alignment
  2485. if rules['tablecellaligntype'] == 'column':
  2486. calign = map(lambda x: TAGS['_tableColAlign%s'%x],
  2487. self.cellalign)
  2488. calign = string.join(calign, calignsep)
  2489. # Align full table, set border and Column align (if any)
  2490. topen = regex['_tableAlign' ].sub(talign , topen)
  2491. topen = regex['_tableBorder' ].sub(tborder, topen)
  2492. topen = regex['_tableColAlign'].sub(calign , topen)
  2493. # Tex table spec, border or not: {|l|c|r|} , {lcr}
  2494. if calignsep and not self.border:
  2495. # Remove cell align separator
  2496. topen = string.replace(topen, calignsep, '')
  2497. return topen
  2498. def _get_cell_align(self, cells):
  2499. ret = []
  2500. for cell in cells:
  2501. align = 'Left'
  2502. if string.strip(cell):
  2503. if cell[0] == ' ' and cell[-1] == ' ':
  2504. align = 'Center'
  2505. elif cell[0] == ' ':
  2506. align = 'Right'
  2507. ret.append(align)
  2508. return ret
  2509. def _get_cell_span(self, cells):
  2510. ret = []
  2511. for cell in cells:
  2512. span = 0
  2513. m = re.search('\a(\|+)$', cell)
  2514. if m: span = len(m.group(1))+1
  2515. ret.append(span)
  2516. return ret
  2517. def _tag_cells(self, rowdata):
  2518. row = []
  2519. cells = rowdata['cells']
  2520. open = TAGS['tableCellOpen']
  2521. close = TAGS['tableCellClose']
  2522. sep = TAGS['tableCellSep']
  2523. calign = map(lambda x: TAGS['_tableCellAlign'+x],
  2524. rowdata['cellalign'])
  2525. # Populate the span tag
  2526. cspan = []
  2527. for i in rowdata['cellspan']:
  2528. if i > 0:
  2529. cspan.append(regex['x'].sub(
  2530. str(i), TAGS['_tableCellColSpan']))
  2531. else:
  2532. cspan.append('')
  2533. # Maybe is it a title row?
  2534. if rowdata['title']:
  2535. open = TAGS['tableTitleCellOpen'] or open
  2536. close = TAGS['tableTitleCellClose'] or close
  2537. sep = TAGS['tableTitleCellSep'] or sep
  2538. # Should we break the line on *each* table cell?
  2539. if rules['breaktablecell']: close = close+'\n'
  2540. # Cells pre processing
  2541. if rules['tablecellstrip']:
  2542. cells = map(lambda x: string.strip(x), cells)
  2543. if rowdata['title'] and rules['tabletitlerowinbold']:
  2544. cells = map(lambda x: enclose_me('fontBold',x), cells)
  2545. # Add cell BEGIN/END tags
  2546. for cell in cells:
  2547. copen = open
  2548. # Make sure we will pop from some filled lists
  2549. # Fixes empty line bug '| |'
  2550. this_align = this_span = ''
  2551. if calign: this_align = calign.pop(0)
  2552. if cspan : this_span = cspan.pop(0)
  2553. # Insert cell align into open tag (if cell is alignable)
  2554. if rules['tablecellaligntype'] == 'cell':
  2555. copen = regex['_tableCellAlign'].sub(
  2556. this_align, copen)
  2557. if rules['tablecellspannable']:
  2558. copen = regex['_tableCellColSpan'].sub(
  2559. this_span, copen)
  2560. row.append(copen + cell + close)
  2561. # Maybe there are cell separators?
  2562. return string.join(row, sep)
  2563. def add_row(self, cells):
  2564. self.rows.append(cells)
  2565. def parse_row(self, line):
  2566. # Default table properties
  2567. ret = {'border':0,'title':0,'align':'Left',
  2568. 'cells':[],'cellalign':[], 'cellspan':[]}
  2569. # Detect table align (and remove spaces mark)
  2570. if line[0] == ' ': ret['align'] = 'Center'
  2571. line = string.lstrip(line)
  2572. # Detect title mark
  2573. if line[1] == '|': ret['title'] = 1
  2574. # Detect border mark and normalize the EOL
  2575. m = re.search(' (\|+) *$', line)
  2576. if m: line = line+' ' ; ret['border'] = 1
  2577. else: line = line+' | '
  2578. # Delete table mark
  2579. line = regex['table'].sub('', line)
  2580. # Detect colspan | foo | bar baz |||
  2581. line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
  2582. # Split cells (the last is fake)
  2583. ret['cells'] = string.split(line, ' | ')[:-1]
  2584. # Find cells span
  2585. ret['cellspan'] = self._get_cell_span(ret['cells'])
  2586. # Remove span ID
  2587. ret['cells'] = map(lambda x:re.sub('\a\|+$','',x),ret['cells'])
  2588. # Find cells align
  2589. ret['cellalign'] = self._get_cell_align(ret['cells'])
  2590. # Hooray!
  2591. Debug('Table Prop: %s' % ret, 7)
  2592. return ret
  2593. def dump(self):
  2594. open = self._get_open_tag()
  2595. rows = self.rows
  2596. close = TAGS['tableClose']
  2597. rowopen = TAGS['tableRowOpen']
  2598. rowclose = TAGS['tableRowClose']
  2599. rowsep = TAGS['tableRowSep']
  2600. titrowopen = TAGS['tableTitleRowOpen'] or rowopen
  2601. titrowclose = TAGS['tableTitleRowClose'] or rowclose
  2602. if rules['breaktablelineopen']:
  2603. rowopen = rowopen + '\n'
  2604. titrowopen = titrowopen + '\n'
  2605. # Tex gotchas
  2606. if TARGET == 'tex':
  2607. if not self.border:
  2608. rowopen = titrowopen = ''
  2609. else:
  2610. close = rowopen + close
  2611. # Now we tag all the table cells on each row
  2612. #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
  2613. tagged_cells = []
  2614. for cell in rows: tagged_cells.append(self._tag_cells(cell))
  2615. # Add row separator tags between lines
  2616. tagged_rows = []
  2617. if rowsep:
  2618. #!py15
  2619. #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
  2620. for cell in tagged_cells:
  2621. tagged_rows.append(cell+rowsep)
  2622. # Remove last rowsep, because the table is over
  2623. tagged_rows[-1] = string.replace(
  2624. tagged_rows[-1], rowsep, '')
  2625. # Add row BEGIN/END tags for each line
  2626. else:
  2627. for rowdata in rows:
  2628. if rowdata['title']:
  2629. o,c = titrowopen, titrowclose
  2630. else:
  2631. o,c = rowopen, rowclose
  2632. row = tagged_cells.pop(0)
  2633. tagged_rows.append(o + row + c)
  2634. fulltable = [open] + tagged_rows + [close]
  2635. if rules['blankendtable']: fulltable.append('')
  2636. return fulltable
  2637. ##############################################################################
  2638. class BlockMaster:
  2639. "TIP: use blockin/out to add/del holders"
  2640. def __init__(self):
  2641. self.BLK = []
  2642. self.HLD = []
  2643. self.PRP = []
  2644. self.depth = 0
  2645. self.last = ''
  2646. self.tableparser = None
  2647. self.contains = {
  2648. 'para' :['comment','raw'],
  2649. 'verb' :[],
  2650. 'table' :['comment'],
  2651. 'raw' :[],
  2652. 'tagged' :[],
  2653. 'comment' :[],
  2654. 'quote' :['quote','comment','raw'],
  2655. 'list' :['list' ,'numlist' ,'deflist','para','verb',
  2656. 'comment', 'raw'],
  2657. 'numlist' :['list' ,'numlist' ,'deflist','para','verb',
  2658. 'comment', 'raw'],
  2659. 'deflist' :['list' ,'numlist' ,'deflist','para','verb',
  2660. 'comment', 'raw']
  2661. }
  2662. self.allblocks = self.contains.keys()
  2663. # If one found inside another, ignore the marks
  2664. self.exclusive = ['comment','verb','raw']
  2665. def block(self):
  2666. if not self.BLK: return ''
  2667. return self.BLK[-1]
  2668. def isblock(self, name=''):
  2669. return self.block() == name
  2670. def prop(self, key):
  2671. if not self.PRP: return ''
  2672. return self.PRP[-1].get(key) or ''
  2673. def propset(self, key, val):
  2674. self.PRP[-1][key] = val
  2675. #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
  2676. #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
  2677. def hold(self):
  2678. if not self.HLD: return []
  2679. return self.HLD[-1]
  2680. def holdadd(self, line):
  2681. if self.block()[-4:] == 'list': line = [line]
  2682. self.HLD[-1].append(line)
  2683. Debug('HOLD add: %s'%repr(line), 4)
  2684. Debug('FULL HOLD: %s'%self.HLD, 4)
  2685. def holdaddsub(self, line):
  2686. self.HLD[-1][-1].append(line)
  2687. Debug('HOLD addsub: %s'%repr(line), 4)
  2688. Debug('FULL HOLD: %s'%self.HLD, 4)
  2689. def holdextend(self, lines):
  2690. if self.block()[-4:] == 'list': lines = [lines]
  2691. self.HLD[-1].extend(lines)
  2692. Debug('HOLD extend: %s'%repr(lines), 4)
  2693. Debug('FULL HOLD: %s'%self.HLD, 4)
  2694. def blockin(self, block):
  2695. ret = []
  2696. if block not in self.allblocks:
  2697. Error("Invalid block '%s'"%block)
  2698. # First, let's close other possible open blocks
  2699. while self.block() and block not in self.contains[self.block()]:
  2700. ret.extend(self.blockout())
  2701. # Now we can gladly add this new one
  2702. self.BLK.append(block)
  2703. self.HLD.append([])
  2704. self.PRP.append({})
  2705. if block == 'table': self.tableparser = TableMaster()
  2706. # Deeper and deeper
  2707. self.depth = len(self.BLK)
  2708. Debug('block ++ (%s): %s' % (block,self.BLK), 3)
  2709. return ret
  2710. def blockout(self):
  2711. if not self.BLK: Error('No block to pop')
  2712. self.last = self.BLK.pop()
  2713. result = getattr(self, self.last)()
  2714. parsed = self.HLD.pop()
  2715. self.PRP.pop()
  2716. self.depth = len(self.BLK)
  2717. if self.last == 'table': del self.tableparser
  2718. # Inserting a nested block into mother
  2719. if self.block():
  2720. if self.last != 'comment': # ignore comment blocks
  2721. if self.block()[-4:] == 'list':
  2722. self.HLD[-1][-1].append(result)
  2723. else:
  2724. self.HLD[-1].append(result)
  2725. # Reset now. Mother block will have it all
  2726. result = []
  2727. Debug('block -- (%s): %s' % (self.last,self.BLK), 3)
  2728. Debug('RELEASED (%s): %s' % (self.last,parsed), 3)
  2729. if result: Debug('BLOCK: %s'%result, 6)
  2730. return result
  2731. def _last_escapes(self, line):
  2732. return doFinalEscape(TARGET, line)
  2733. def _get_escaped_hold(self):
  2734. ret = []
  2735. for line in self.hold():
  2736. linetype = type(line)
  2737. if linetype == type('') or linetype == type(u''):
  2738. ret.append(self._last_escapes(line))
  2739. elif linetype == type([]):
  2740. ret.extend(line)
  2741. else:
  2742. Error("BlockMaster: Unknown HOLD item type:"
  2743. " %s"%linetype)
  2744. return ret
  2745. def _remove_twoblanks(self, lastitem):
  2746. if len(lastitem) > 1 and lastitem[-2:] == ['','']:
  2747. return lastitem[:-2]
  2748. return lastitem
  2749. def tagged(self):
  2750. return self.hold()
  2751. def comment(self):
  2752. return ''
  2753. def raw(self):
  2754. lines = self.hold()
  2755. return map(lambda x: doEscape(TARGET, x), lines)
  2756. def para(self):
  2757. result = []
  2758. open = TAGS['paragraphOpen']
  2759. close = TAGS['paragraphClose']
  2760. lines = self._get_escaped_hold()
  2761. # Open (or not) paragraph
  2762. if not open+close and self.last == 'para':
  2763. pass # avoids multiple blank lines
  2764. else:
  2765. result.append(open)
  2766. # Pagemaker likes a paragraph as a single long line
  2767. if rules['onelinepara']:
  2768. result.append(string.join(lines,' '))
  2769. # Others are normal :)
  2770. else:
  2771. result.extend(lines)
  2772. result.append(close)
  2773. # Very very very very very very very very very UGLY fix
  2774. # Needed because <center> can't appear inside <p>
  2775. try:
  2776. if len(lines) == 1 and \
  2777. TARGET in ('html', 'xhtml') and \
  2778. re.match('^\s*<center>.*</center>\s*$', lines[0]):
  2779. result = [lines[0]]
  2780. except: pass
  2781. return result
  2782. def verb(self):
  2783. "Verbatim lines are not masked, so there's no need to unmask"
  2784. result = []
  2785. result.append(TAGS['blockVerbOpen'])
  2786. for line in self.hold():
  2787. if self.prop('mapped') == 'table':
  2788. line = MacroMaster().expand(line)
  2789. if not rules['verbblocknotescaped']:
  2790. line = doEscape(TARGET,line)
  2791. if rules['indentverbblock']:
  2792. line = ' '+line
  2793. if rules['verbblockfinalescape']:
  2794. line = doFinalEscape(TARGET, line)
  2795. result.append(line)
  2796. #TODO maybe use if not TAGS['blockVerbClose']
  2797. if TARGET != 'pm6':
  2798. result.append(TAGS['blockVerbClose'])
  2799. return result
  2800. def table(self):
  2801. # Rewrite all table cells by the unmasked and escaped data
  2802. lines = self._get_escaped_hold()
  2803. for i in xrange(len(lines)):
  2804. cells = string.split(lines[i], SEPARATOR)
  2805. self.tableparser.rows[i]['cells'] = cells
  2806. return self.tableparser.dump()
  2807. def quote(self):
  2808. result = []
  2809. myre = regex['quote']
  2810. open = TAGS['blockQuoteOpen'] # block based
  2811. close = TAGS['blockQuoteClose']
  2812. qline = TAGS['blockQuoteLine'] # line based
  2813. indent = tagindent = '\t'*self.depth
  2814. if rules['tagnotindentable']: tagindent = ''
  2815. if not rules['keepquoteindent']: indent = ''
  2816. if open: result.append(tagindent+open) # open block
  2817. for item in self.hold():
  2818. if type(item) == type([]):
  2819. result.extend(item) # subquotes
  2820. else:
  2821. item = myre.sub('', item) # del TABs
  2822. if rules['barinsidequote']:
  2823. item = get_tagged_bar(item)
  2824. item = self._last_escapes(item)
  2825. item = qline*self.depth + item
  2826. result.append(indent+item) # quote line
  2827. if close: result.append(tagindent+close) # close block
  2828. return result
  2829. def deflist(self): return self.list('deflist')
  2830. def numlist(self): return self.list('numlist')
  2831. def list(self, name='list'):
  2832. result = []
  2833. items = self.hold()
  2834. indent = self.prop('indent')
  2835. tagindent = indent
  2836. listopen = TAGS.get(name+'Open')
  2837. listclose = TAGS.get(name+'Close')
  2838. listline = TAGS.get(name+'ItemLine')
  2839. itemcount = 0
  2840. if rules['tagnotindentable']: tagindent = ''
  2841. if not rules['keeplistindent']: indent = tagindent = ''
  2842. if name == 'deflist':
  2843. itemopen = TAGS[name+'Item1Open']
  2844. itemclose = TAGS[name+'Item2Close']
  2845. itemsep = TAGS[name+'Item1Close']+\
  2846. TAGS[name+'Item2Open']
  2847. else:
  2848. itemopen = TAGS[name+'ItemOpen']
  2849. itemclose = TAGS[name+'ItemClose']
  2850. itemsep = ''
  2851. # ItemLine: number of leading chars identifies list depth
  2852. if listline:
  2853. itemopen = listline*self.depth + itemopen
  2854. # Adds trailing space on opening tags
  2855. if (name == 'list' and rules['spacedlistitemopen']) or \
  2856. (name == 'numlist' and rules['spacednumlistitemopen']):
  2857. itemopen = itemopen + ' '
  2858. # Remove two-blanks from list ending mark, to avoid <p>
  2859. items[-1] = self._remove_twoblanks(items[-1])
  2860. # Open list (not nestable lists are only opened at mother)
  2861. if listopen and not \
  2862. (rules['listnotnested'] and BLOCK.depth != 1):
  2863. result.append(tagindent+listopen)
  2864. # Tag each list item (multiline items)
  2865. itemopenorig = itemopen
  2866. for item in items:
  2867. # Add "manual" item count for noautonum targets
  2868. itemcount = itemcount + 1
  2869. if name == 'numlist' and not rules['autonumberlist']:
  2870. n = str(itemcount)
  2871. itemopen = regex['x'].sub(n, itemopenorig)
  2872. del n
  2873. # Tag it
  2874. item[0] = self._last_escapes(item[0])
  2875. if name == 'deflist':
  2876. z,term,rest = string.split(item[0],SEPARATOR,2)
  2877. item[0] = rest
  2878. if not item[0]: del item[0] # to avoid <p>
  2879. result.append(tagindent+itemopen+term+itemsep)
  2880. else:
  2881. fullitem = tagindent+itemopen
  2882. result.append(string.replace(
  2883. item[0], SEPARATOR, fullitem))
  2884. del item[0]
  2885. # Process next lines for this item (if any)
  2886. for line in item:
  2887. if type(line) == type([]): # sublist inside
  2888. result.extend(line)
  2889. else:
  2890. line = self._last_escapes(line)
  2891. # Blank lines turns to <p>
  2892. if not line and rules['parainsidelist']:
  2893. line = string.rstrip(indent +\
  2894. TAGS['paragraphOpen']+\
  2895. TAGS['paragraphClose'])
  2896. # Some targets don't like identation here (wiki)
  2897. if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']):
  2898. line = string.lstrip(line)
  2899. # Maybe we have a line prefix to add? (wiki)
  2900. if name == 'deflist' and TAGS['deflistItem2LinePrefix']:
  2901. line = TAGS['deflistItem2LinePrefix'] + line
  2902. result.append(line)
  2903. # Close item (if needed)
  2904. if itemclose: result.append(tagindent+itemclose)
  2905. # Close list (not nestable lists are only closed at mother)
  2906. if listclose and not \
  2907. (rules['listnotnested'] and BLOCK.depth != 1):
  2908. result.append(tagindent+listclose)
  2909. if rules['blankendmotherlist'] and BLOCK.depth == 1:
  2910. result.append('')
  2911. return result
  2912. ##############################################################################
  2913. class MacroMaster:
  2914. def __init__(self, config={}):
  2915. self.name = ''
  2916. self.config = config or CONF
  2917. self.infile = self.config['sourcefile']
  2918. self.outfile = self.config['outfile']
  2919. self.currdate = time.localtime(time.time())
  2920. self.rgx = regex.get('macros') or getRegexes()['macros']
  2921. self.fileinfo = { 'infile': None, 'outfile': None }
  2922. self.dft_fmt = MACROS
  2923. def walk_file_format(self, fmt):
  2924. "Walks the %%{in/out}file format string, expanding the % flags"
  2925. i = 0; ret = '' # counter/hold
  2926. while i < len(fmt): # char by char
  2927. c = fmt[i]; i = i + 1
  2928. if c == '%': # hot char!
  2929. if i == len(fmt): # % at the end
  2930. ret = ret + c
  2931. break
  2932. c = fmt[i]; i = i + 1 # read next
  2933. ret = ret + self.expand_file_flag(c)
  2934. else:
  2935. ret = ret +c # common char
  2936. return ret
  2937. def expand_file_flag(self, flag):
  2938. "%f: filename %F: filename (w/o extension)"
  2939. "%d: dirname %D: dirname (only parent dir)"
  2940. "%p: file path %e: extension"
  2941. info = self.fileinfo[self.name] # get dict
  2942. if flag == '%': x = '%' # %% -> %
  2943. elif flag == 'f': x = info['name']
  2944. elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
  2945. elif flag == 'd': x = info['dir']
  2946. elif flag == 'D': x = os.path.split(info['dir'])[-1]
  2947. elif flag == 'p': x = info['path']
  2948. elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
  2949. ).group(2) or ''
  2950. #TODO simpler way for %e ?
  2951. else : x = '%'+flag # false alarm
  2952. return x
  2953. def set_file_info(self, macroname):
  2954. if self.fileinfo.get(macroname): return # already done
  2955. file = getattr(self, self.name) # self.infile
  2956. if file == STDOUT or file == MODULEOUT:
  2957. dir = ''; path = name = file
  2958. else:
  2959. path = os.path.abspath(file)
  2960. dir = os.path.dirname(path)
  2961. name = os.path.basename(path)
  2962. self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
  2963. def expand(self, line=''):
  2964. "Expand all macros found on the line"
  2965. while self.rgx.search(line):
  2966. m = self.rgx.search(line)
  2967. name = self.name = string.lower(m.group('name'))
  2968. fmt = m.group('fmt') or self.dft_fmt.get(name)
  2969. if name == 'date':
  2970. txt = time.strftime(fmt,self.currdate)
  2971. elif name == 'mtime':
  2972. if self.infile in (STDIN, MODULEIN):
  2973. fdate = self.currdate
  2974. else:
  2975. mtime = os.path.getmtime(self.infile)
  2976. fdate = time.localtime(mtime)
  2977. txt = time.strftime(fmt,fdate)
  2978. elif name == 'infile' or name == 'outfile':
  2979. self.set_file_info(name)
  2980. txt = self.walk_file_format(fmt)
  2981. else:
  2982. Error("Unknown macro name '%s'"%name)
  2983. line = self.rgx.sub(txt,line,1)
  2984. return line
  2985. ##############################################################################
  2986. def dumpConfig(source_raw, parsed_config):
  2987. onoff = {1:_('ON'), 0:_('OFF')}
  2988. data = [
  2989. (_('RC file') , RC_RAW ),
  2990. (_('source document'), source_raw ),
  2991. (_('command line') , CMDLINE_RAW)
  2992. ]
  2993. # First show all RAW data found
  2994. for label, cfg in data:
  2995. print _('RAW config for %s')%label
  2996. for target,key,val in cfg:
  2997. target = '(%s)'%target
  2998. key = dotted_spaces("%-14s"%key)
  2999. val = val or _('ON')
  3000. print ' %-8s %s: %s'%(target,key,val)
  3001. print
  3002. # Then the parsed results of all of them
  3003. print _('Full PARSED config')
  3004. keys = parsed_config.keys() ; keys.sort() # sorted
  3005. for key in keys:
  3006. val = parsed_config[key]
  3007. # Filters are the last
  3008. if key == 'preproc' or key == 'postproc':
  3009. continue
  3010. # Flag beautifier
  3011. if key in FLAGS.keys() or key in ACTIONS.keys():
  3012. val = onoff.get(val) or val
  3013. # List beautifier
  3014. if type(val) == type([]):
  3015. if key == 'options': sep = ' '
  3016. else : sep = ', '
  3017. val = string.join(val, sep)
  3018. print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
  3019. print
  3020. print _('Active filters')
  3021. for filter in ['preproc','postproc']:
  3022. for rule in parsed_config.get(filter) or []:
  3023. print "%25s: %s -> %s"%(
  3024. dotted_spaces("%-14s"%filter),rule[0],rule[1])
  3025. def get_file_body(file):
  3026. "Returns all the document BODY lines"
  3027. return process_source_file(file, noconf=1)[1][2]
  3028. def finish_him(outlist, config):
  3029. "Writing output to screen or file"
  3030. outfile = config['outfile']
  3031. outlist = unmaskEscapeChar(outlist)
  3032. outlist = expandLineBreaks(outlist)
  3033. # Apply PostProc filters
  3034. if config['postproc']:
  3035. filters = compile_filters(config['postproc'],
  3036. _('Invalid PostProc filter regex'))
  3037. postoutlist = []
  3038. errmsg = _('Invalid PostProc filter replacement')
  3039. for line in outlist:
  3040. for rgx,repl in filters:
  3041. try: line = rgx.sub(repl, line)
  3042. except: Error("%s: '%s'"%(errmsg, repl))
  3043. postoutlist.append(line)
  3044. outlist = postoutlist[:]
  3045. if outfile == MODULEOUT:
  3046. return outlist
  3047. elif outfile == STDOUT:
  3048. if GUI:
  3049. return outlist, config
  3050. else:
  3051. for line in outlist: print line
  3052. else:
  3053. Savefile(outfile, addLineBreaks(outlist))
  3054. if not GUI and not QUIET:
  3055. print _('%s wrote %s')%(my_name,outfile)
  3056. if config['split']:
  3057. if not QUIET: print "--- html..."
  3058. sgml2html = 'sgml2html -s %s -l %s %s'%(
  3059. config['split'],config['lang'] or lang,outfile)
  3060. if not QUIET: print "Running system command:", sgml2html
  3061. os.system(sgml2html)
  3062. def toc_inside_body(body, toc, config):
  3063. ret = []
  3064. if AUTOTOC: return body # nothing to expand
  3065. toc_mark = MaskMaster().tocmask
  3066. # Expand toc mark with TOC contents
  3067. for line in body:
  3068. if string.count(line, toc_mark): # toc mark found
  3069. if config['toc']:
  3070. ret.extend(toc) # include if --toc
  3071. else:
  3072. pass # or remove %%toc line
  3073. else:
  3074. ret.append(line) # common line
  3075. return ret
  3076. def toc_tagger(toc, config):
  3077. "Convert t2t-marked TOC (it is a list) to target-tagged TOC"
  3078. ret = []
  3079. # Tag if TOC-only TOC "by hand" (target don't have a TOC tag)
  3080. if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
  3081. fakeconf = config.copy()
  3082. fakeconf['headers'] = 0
  3083. fakeconf['toc-only'] = 0
  3084. fakeconf['mask-email'] = 0
  3085. fakeconf['preproc'] = []
  3086. fakeconf['postproc'] = []
  3087. fakeconf['css-sugar'] = 0
  3088. ret,foo = convert(toc, fakeconf)
  3089. set_global_config(config) # restore config
  3090. # Target TOC is a tag
  3091. elif config['toc'] and TAGS['TOC']:
  3092. ret = [TAGS['TOC']]
  3093. return ret
  3094. def toc_formatter(toc, config):
  3095. "Formats TOC for automatic placement between headers and body"
  3096. if config['toc-only']: return toc # no formatting needed
  3097. if not config['toc'] : return [] # TOC disabled
  3098. ret = toc
  3099. # TOC open/close tags (if any)
  3100. if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
  3101. if TAGS['tocClose']: ret.append(TAGS['tocClose'])
  3102. # Autotoc specific formatting
  3103. if AUTOTOC:
  3104. if rules['autotocwithbars']: # TOC between bars
  3105. para = TAGS['paragraphOpen']+TAGS['paragraphClose']
  3106. bar = regex['x'].sub('-'*72,TAGS['bar1'])
  3107. tocbar = [para, bar, para]
  3108. ret = tocbar + ret + tocbar
  3109. if rules['blankendautotoc']: # blank line after TOC
  3110. ret.append('')
  3111. if rules['autotocnewpagebefore']: # page break before TOC
  3112. ret.insert(0,TAGS['pageBreak'])
  3113. if rules['autotocnewpageafter']: # page break after TOC
  3114. ret.append(TAGS['pageBreak'])
  3115. return ret
  3116. def doHeader(headers, config):
  3117. if not config['headers']: return []
  3118. if not headers: headers = ['','','']
  3119. target = config['target']
  3120. if not HEADER_TEMPLATE.has_key(target):
  3121. Error("doheader: Unknow target '%s'"%target)
  3122. if target in ('html','xhtml') and config.get('css-sugar'):
  3123. template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
  3124. else:
  3125. template = string.split(HEADER_TEMPLATE[target], '\n')
  3126. head_data = {'STYLE':[], 'ENCODING':''}
  3127. for key in head_data.keys():
  3128. val = config.get(string.lower(key))
  3129. # Remove .sty extension from each style filename (freaking tex)
  3130. # XXX Can't handle --style foo.sty,bar.sty
  3131. if target == 'tex' and key == 'STYLE':
  3132. val = map(lambda x:re.sub('(?i)\.sty$','',x), val)
  3133. if key == 'ENCODING':
  3134. val = get_encoding_string(val, target)
  3135. head_data[key] = val
  3136. # Parse header contents
  3137. for i in 0,1,2:
  3138. # Expand macros
  3139. contents = MacroMaster(config=config).expand(headers[i])
  3140. # Escapes - on tex, just do it if any \tag{} present
  3141. if target != 'tex' or \
  3142. (target == 'tex' and re.search(r'\\\w+{', contents)):
  3143. contents = doEscape(target, contents)
  3144. if target == 'lout':
  3145. contents = doFinalEscape(target, contents)
  3146. head_data['HEADER%d'%(i+1)] = contents
  3147. # css-inside removes STYLE line
  3148. #XXX In tex, this also removes the modules call (%!style:amsfonts)
  3149. if target in ('html','xhtml') and config.get('css-inside') and \
  3150. config.get('style'):
  3151. head_data['STYLE'] = []
  3152. Debug("Header Data: %s"%head_data, 1)
  3153. # Scan for empty dictionary keys
  3154. # If found, scan template lines for that key reference
  3155. # If found, remove the reference
  3156. # If there isn't any other key reference on the same line, remove it
  3157. #TODO loop by template line > key
  3158. for key in head_data.keys():
  3159. if head_data.get(key): continue
  3160. for line in template:
  3161. if string.count(line, '%%(%s)s'%key):
  3162. sline = string.replace(line, '%%(%s)s'%key, '')
  3163. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  3164. template.remove(line)
  3165. # Style is a multiple tag.
  3166. # - If none or just one, use default template
  3167. # - If two or more, insert extra lines in a loop (and remove original)
  3168. styles = head_data['STYLE']
  3169. if len(styles) == 1:
  3170. head_data['STYLE'] = styles[0]
  3171. elif len(styles) > 1:
  3172. style_mark = '%(STYLE)s'
  3173. for i in xrange(len(template)):
  3174. if string.count(template[i], style_mark):
  3175. while styles:
  3176. template.insert(i+1,
  3177. string.replace(
  3178. template[i],
  3179. style_mark,
  3180. styles.pop()))
  3181. del template[i]
  3182. break
  3183. # Populate template with data (dict expansion)
  3184. template = string.join(template, '\n') % head_data
  3185. # Adding CSS contents into template (for --css-inside)
  3186. # This code sux. Dirty++
  3187. if target in ('html','xhtml') and config.get('css-inside') and \
  3188. config.get('style'):
  3189. set_global_config(config) # usually on convert(), needed here
  3190. for i in xrange(len(config['style'])):
  3191. cssfile = config['style'][i]
  3192. if not os.path.isabs(cssfile):
  3193. infile = config.get('sourcefile')
  3194. cssfile = os.path.join(
  3195. os.path.dirname(infile), cssfile)
  3196. try:
  3197. contents = Readfile(cssfile, 1)
  3198. css = "\n%s\n%s\n%s\n%s\n" % (
  3199. doCommentLine("Included %s" % cssfile),
  3200. TAGS['cssOpen'],
  3201. string.join(contents, '\n'),
  3202. TAGS['cssClose'])
  3203. # Style now is content, needs escaping (tex)
  3204. #css = maskEscapeChar(css)
  3205. except:
  3206. errmsg = "CSS include failed for %s" % cssfile
  3207. css = "\n%s\n" % (doCommentLine(errmsg))
  3208. # Insert this CSS file contents on the template
  3209. template = re.sub('(?i)(</HEAD>)', css+r'\1', template)
  3210. # template = re.sub(r'(?i)(\\begin{document})',
  3211. # css+'\n'+r'\1', template) # tex
  3212. # The last blank line to keep everything separated
  3213. template = re.sub('(?i)(</HEAD>)', '\n'+r'\1', template)
  3214. return string.split(template, '\n')
  3215. def doCommentLine(txt):
  3216. # The -- string ends a (h|sg|xht)ml comment :(
  3217. txt = maskEscapeChar(txt)
  3218. if string.count(TAGS['comment'], '--') and \
  3219. string.count(txt, '--'):
  3220. txt = re.sub('-(?=-)', r'-\\', txt)
  3221. if TAGS['comment']:
  3222. return regex['x'].sub(txt, TAGS['comment'])
  3223. return ''
  3224. def doFooter(config):
  3225. if not config['headers']: return []
  3226. ret = []
  3227. target = config['target']
  3228. cmdline = config['realcmdline']
  3229. typename = target
  3230. if target == 'tex': typename = 'LaTeX2e'
  3231. ppgd = '%s code generated by %s %s (%s)'%(
  3232. typename,my_name,my_version,my_url)
  3233. cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
  3234. ret.append('')
  3235. ret.append(doCommentLine(ppgd))
  3236. ret.append(doCommentLine(cmdline))
  3237. ret.append(TAGS['EOD'])
  3238. return ret
  3239. def doEscape(target,txt):
  3240. "Target-specific special escapes. Apply *before* insert any tag."
  3241. tmpmask = 'vvvvThisEscapingSuxvvvv'
  3242. if target in ('html','sgml','xhtml'):
  3243. txt = re.sub('&','&amp;',txt)
  3244. txt = re.sub('<','&lt;',txt)
  3245. txt = re.sub('>','&gt;',txt)
  3246. if target == 'sgml':
  3247. txt = re.sub('\xff','&yuml;',txt) # "+y
  3248. elif target == 'pm6':
  3249. txt = re.sub('<','<\#60>',txt)
  3250. elif target == 'mgp':
  3251. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  3252. elif target == 'man':
  3253. txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
  3254. txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
  3255. elif target == 'lout':
  3256. # TIP: / moved to FinalEscape to avoid //italic//
  3257. # TIP: these are also converted by lout: ... --- --
  3258. txt = string.replace(txt, ESCCHAR, tmpmask) # \
  3259. txt = string.replace(txt, '"', '"%s""'%ESCCHAR) # "\""
  3260. txt = re.sub('([|&{}@#^~])', '"\\1"',txt) # "@"
  3261. txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2)) # "\\"
  3262. elif target == 'tex':
  3263. # Mark literal \ to be changed to $\backslash$ later
  3264. txt = string.replace( txt, ESCCHAR, tmpmask)
  3265. txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
  3266. txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
  3267. txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
  3268. txt = string.replace(txt, tmpmask,
  3269. maskEscapeChar(r'$\backslash$'))
  3270. # TIP the _ is escaped at the end
  3271. return txt
  3272. # TODO man: where - really needs to be escaped?
  3273. def doFinalEscape(target, txt):
  3274. "Last escapes of each line"
  3275. if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
  3276. elif target == 'man' : txt = string.replace(txt, '-', r'\-')
  3277. elif target == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  3278. elif target == 'lout': txt = string.replace(txt, '/', '"/"')
  3279. elif target == 'tex' :
  3280. txt = string.replace(txt, '_', r'\_')
  3281. txt = string.replace(txt, 'vvvvTexUndervvvv', '_') # shame!
  3282. return txt
  3283. def EscapeCharHandler(action, data):
  3284. "Mask/Unmask the Escape Char on the given string"
  3285. if not string.strip(data): return data
  3286. if action not in ('mask','unmask'):
  3287. Error("EscapeCharHandler: Invalid action '%s'"%action)
  3288. if action == 'mask': return string.replace(data,'\\',ESCCHAR)
  3289. else: return string.replace(data,ESCCHAR,'\\')
  3290. def maskEscapeChar(data):
  3291. "Replace any Escape Char \ with a text mask (Input: str or list)"
  3292. if type(data) == type([]):
  3293. return map(lambda x: EscapeCharHandler('mask', x), data)
  3294. return EscapeCharHandler('mask',data)
  3295. def unmaskEscapeChar(data):
  3296. "Undo the Escape char \ masking (Input: str or list)"
  3297. if type(data) == type([]):
  3298. return map(lambda x: EscapeCharHandler('unmask', x), data)
  3299. return EscapeCharHandler('unmask',data)
  3300. def addLineBreaks(mylist):
  3301. "use LB to respect sys.platform"
  3302. ret = []
  3303. for line in mylist:
  3304. line = string.replace(line,'\n',LB) # embedded \n's
  3305. ret.append(line+LB) # add final line break
  3306. return ret
  3307. # Convert ['foo\nbar'] to ['foo', 'bar']
  3308. def expandLineBreaks(mylist):
  3309. ret = []
  3310. for line in mylist:
  3311. ret.extend(string.split(line, '\n'))
  3312. return ret
  3313. def compile_filters(filters, errmsg='Filter'):
  3314. if filters:
  3315. for i in xrange(len(filters)):
  3316. patt,repl = filters[i]
  3317. try: rgx = re.compile(patt)
  3318. except: Error("%s: '%s'"%(errmsg, patt))
  3319. filters[i] = (rgx,repl)
  3320. return filters
  3321. def enclose_me(tagname, txt):
  3322. return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
  3323. def beautify_me(name, line):
  3324. "where name is: bold, italic, underline or strike"
  3325. # Exception: Doesn't parse an horizontal bar as strike
  3326. if name == 'strike' and regex['bar'].search(line): return line
  3327. name = 'font%s' % string.capitalize(name)
  3328. open = TAGS['%sOpen'%name]
  3329. close = TAGS['%sClose'%name]
  3330. txt = r'%s\1%s'%(open, close)
  3331. line = regex[name].sub(txt,line)
  3332. return line
  3333. def get_tagged_link(label, url):
  3334. ret = ''
  3335. target = CONF['target']
  3336. image_re = regex['img']
  3337. # Set link type
  3338. if regex['email'].match(url):
  3339. linktype = 'email'
  3340. else:
  3341. linktype = 'url';
  3342. # Escape specials from TEXT parts
  3343. label = doEscape(target,label)
  3344. # Escape specials from link URL
  3345. if not rules['linkable'] or rules['escapeurl']:
  3346. url = doEscape(target, url)
  3347. # Adding protocol to guessed link
  3348. guessurl = ''
  3349. if linktype == 'url' and \
  3350. re.match('(?i)'+regex['_urlskel']['guess'], url):
  3351. if url[0] in 'Ww': guessurl = 'http://' +url
  3352. else : guessurl = 'ftp://' +url
  3353. # Not link aware targets -> protocol is useless
  3354. if not rules['linkable']: guessurl = ''
  3355. # Simple link (not guessed)
  3356. if not label and not guessurl:
  3357. if CONF['mask-email'] and linktype == 'email':
  3358. # Do the email mask feature (no TAGs, just text)
  3359. url = string.replace(url,'@',' (a) ')
  3360. url = string.replace(url,'.',' ')
  3361. url = "<%s>" % url
  3362. if rules['linkable']: url = doEscape(target, url)
  3363. ret = url
  3364. else:
  3365. # Just add link data to tag
  3366. tag = TAGS[linktype]
  3367. ret = regex['x'].sub(url,tag)
  3368. # Named link or guessed simple link
  3369. else:
  3370. # Adjusts for guessed link
  3371. if not label: label = url # no protocol
  3372. if guessurl : url = guessurl # with protocol
  3373. # Image inside link!
  3374. if image_re.match(label):
  3375. if rules['imglinkable']: # get image tag
  3376. label = parse_images(label)
  3377. else: # img@link !supported
  3378. label = "(%s)"%image_re.match(label).group(1)
  3379. # Putting data on the right appearance order
  3380. if rules['linkable']:
  3381. urlorder = [url, label] # link before label
  3382. else:
  3383. urlorder = [label, url] # label before link
  3384. # Add link data to tag (replace \a's)
  3385. ret = TAGS["%sMark"%linktype]
  3386. for data in urlorder:
  3387. ret = regex['x'].sub(data,ret,1)
  3388. return ret
  3389. def parse_deflist_term(line):
  3390. "Extract and parse definition list term contents"
  3391. img_re = regex['img']
  3392. term = regex['deflist'].search(line).group(3)
  3393. # Mask image inside term as (image.jpg), where not supported
  3394. if not rules['imgasdefterm'] and img_re.search(term):
  3395. while img_re.search(term):
  3396. imgfile = img_re.search(term).group(1)
  3397. term = img_re.sub('(%s)'%imgfile, term, 1)
  3398. #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
  3399. return term
  3400. def get_tagged_bar(line):
  3401. m = regex['bar'].search(line)
  3402. if not m: return line
  3403. txt = m.group(2)
  3404. # Map strong bar to pagebreak
  3405. if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
  3406. TAGS['bar2'] = TAGS['pageBreak']
  3407. # Set bar type
  3408. if txt[0] == '=': bar = TAGS['bar2']
  3409. else : bar = TAGS['bar1']
  3410. # To avoid comment tag confusion like <!-- ------ -->
  3411. if string.count(TAGS['comment'], '--'):
  3412. txt = string.replace(txt,'--','__')
  3413. # Tag line
  3414. return regex['x'].sub(txt, bar)
  3415. def get_image_align(line):
  3416. "Return the image (first found) align for the given line"
  3417. # First clear marks that can mess align detection
  3418. line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep
  3419. line = re.sub('^'+SEPARATOR, '', line) # remove list sep
  3420. line = re.sub('^[\t]+' , '', line) # remove quote mark
  3421. # Get image position on the line
  3422. m = regex['img'].search(line)
  3423. ini = m.start() ; head = 0
  3424. end = m.end() ; tail = len(line)
  3425. # The align detection algorithm
  3426. if ini == head and end != tail: align = 'left' # ^img + text$
  3427. elif ini != head and end == tail: align = 'right' # ^text + img$
  3428. else : align = 'center' # default align
  3429. # Some special cases
  3430. if BLOCK.isblock('table'): align = 'center' # ignore when table
  3431. # if TARGET == 'mgp' and align == 'center': align = 'center'
  3432. return align
  3433. # Reference: http://www.iana.org/assignments/character-sets
  3434. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  3435. def get_encoding_string(enc, target):
  3436. if not enc: return ''
  3437. # Target specific translation table
  3438. translate = {
  3439. 'tex': {
  3440. # missing: ansinew , applemac , cp437 , cp437de , cp865
  3441. 'us-ascii' : 'ascii',
  3442. 'windows-1250': 'cp1250',
  3443. 'windows-1252': 'cp1252',
  3444. 'ibm850' : 'cp850',
  3445. 'ibm852' : 'cp852',
  3446. 'iso-8859-1' : 'latin1',
  3447. 'iso-8859-2' : 'latin2',
  3448. 'iso-8859-3' : 'latin3',
  3449. 'iso-8859-4' : 'latin4',
  3450. 'iso-8859-5' : 'latin5',
  3451. 'iso-8859-9' : 'latin9',
  3452. 'koi8-r' : 'koi8-r'
  3453. }
  3454. }
  3455. # Normalization
  3456. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  3457. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  3458. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  3459. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  3460. # Apply translation table
  3461. try: enc = translate[target][string.lower(enc)]
  3462. except: pass
  3463. return enc
  3464. ##############################################################################
  3465. ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
  3466. ##############################################################################
  3467. def process_source_file(file='', noconf=0, contents=[]):
  3468. """
  3469. Find and Join all the configuration available for a source file.
  3470. No sanity checking is done on this step.
  3471. It also extracts the source document parts into separate holders.
  3472. The config scan order is:
  3473. 1. The user configuration file (i.e. $HOME/.txt2tagsrc)
  3474. 2. The source document's CONF area
  3475. 3. The command line options
  3476. The return data is a tuple of two items:
  3477. 1. The parsed config dictionary
  3478. 2. The document's parts, as a (head, conf, body) tuple
  3479. All the conversion process will be based on the data and
  3480. configuration returned by this function.
  3481. The source files is read on this step only.
  3482. """
  3483. if contents:
  3484. source = SourceDocument(contents=contents)
  3485. else:
  3486. source = SourceDocument(file)
  3487. head, conf, body = source.split()
  3488. Message(_("Source document contents stored"),2)
  3489. if not noconf:
  3490. # Read document config
  3491. source_raw = source.get_raw_config()
  3492. # Join all the config directives found, then parse it
  3493. full_raw = RC_RAW + source_raw + CMDLINE_RAW
  3494. Message(_("Parsing and saving all config found (%03d items)")%(
  3495. len(full_raw)),1)
  3496. full_parsed = ConfigMaster(full_raw).parse()
  3497. # Add manually the filename to the conf dic
  3498. if contents:
  3499. full_parsed['sourcefile'] = MODULEIN
  3500. full_parsed['infile'] = MODULEIN
  3501. full_parsed['outfile'] = MODULEOUT
  3502. else:
  3503. full_parsed['sourcefile'] = file
  3504. # Maybe should we dump the config found?
  3505. if full_parsed.get('dump-config'):
  3506. dumpConfig(source_raw, full_parsed)
  3507. Quit()
  3508. # The user just want to know a single config value (hidden feature)
  3509. #TODO pick a better name than --show-config-value
  3510. elif full_parsed.get('show-config-value'):
  3511. config_value = full_parsed.get(full_parsed['show-config-value'])
  3512. if config_value:
  3513. if type(config_value) == type([]):
  3514. print '\n'.join(config_value)
  3515. else:
  3516. print config_value
  3517. Quit()
  3518. # Okay, all done
  3519. Debug("FULL config for this file: %s"%full_parsed, 1)
  3520. else:
  3521. full_parsed = {}
  3522. return full_parsed, (head,conf,body)
  3523. def get_infiles_config(infiles):
  3524. """
  3525. Find and Join into a single list, all configuration available
  3526. for each input file. This function is supposed to be the very
  3527. first one to be called, before any processing.
  3528. """
  3529. return map(process_source_file, infiles)
  3530. def convert_this_files(configs):
  3531. global CONF
  3532. for myconf,doc in configs: # multifile support
  3533. target_head = []
  3534. target_toc = []
  3535. target_body = []
  3536. target_foot = []
  3537. source_head, source_conf, source_body = doc
  3538. myconf = ConfigMaster().sanity(myconf)
  3539. # Compose the target file Headers
  3540. #TODO escape line before?
  3541. #TODO see exceptions by tex and mgp
  3542. Message(_("Composing target Headers"),1)
  3543. target_head = doHeader(source_head, myconf)
  3544. # Parse the full marked body into tagged target
  3545. first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
  3546. Message(_("Composing target Body"),1)
  3547. target_body, marked_toc = convert(source_body, myconf,
  3548. firstlinenr=first_body_line)
  3549. # If dump-source, we're done
  3550. if myconf['dump-source']:
  3551. for line in source_head+source_conf+target_body:
  3552. print line
  3553. return
  3554. # Make TOC (if needed)
  3555. Message(_("Composing target TOC"),1)
  3556. tagged_toc = toc_tagger(marked_toc, myconf)
  3557. target_toc = toc_formatter(tagged_toc, myconf)
  3558. target_body = toc_inside_body(target_body, target_toc, myconf)
  3559. if not AUTOTOC and not myconf['toc-only']: target_toc = []
  3560. # Compose the target file Footer
  3561. Message(_("Composing target Footer"),1)
  3562. target_foot = doFooter(myconf)
  3563. # Finally, we have our document
  3564. outlist = target_head + target_toc + target_body + target_foot
  3565. # If on GUI, abort before finish_him
  3566. # If module, return finish_him as list
  3567. # Else, write results to file or STDOUT
  3568. if GUI:
  3569. return outlist, myconf
  3570. elif myconf.get('outfile') == MODULEOUT:
  3571. return finish_him(outlist, myconf), myconf
  3572. else:
  3573. Message(_("Saving results to the output file"),1)
  3574. finish_him(outlist, myconf)
  3575. def parse_images(line):
  3576. "Tag all images found"
  3577. while regex['img'].search(line) and TAGS['img'] != '[\a]':
  3578. txt = regex['img'].search(line).group(1)
  3579. tag = TAGS['img']
  3580. # If target supports image alignment, here we go
  3581. if rules['imgalignable']:
  3582. align = get_image_align(line) # right
  3583. align_name = string.capitalize(align) # Right
  3584. # The align is a full tag, or part of the image tag (~A~)
  3585. if TAGS['imgAlign'+align_name]:
  3586. tag = TAGS['imgAlign'+align_name]
  3587. else:
  3588. align_tag = TAGS['_imgAlign'+align_name]
  3589. tag = regex['_imgAlign'].sub(align_tag, tag, 1)
  3590. # Dirty fix to allow centered solo images
  3591. if align == 'center' and TARGET in ('html','xhtml'):
  3592. rest = regex['img'].sub('',line,1)
  3593. if re.match('^\s+$', rest):
  3594. tag = "<center>%s</center>" %tag
  3595. if TARGET == 'tex':
  3596. tag = re.sub(r'\\b',r'\\\\b',tag)
  3597. txt = string.replace(txt, '_', 'vvvvTexUndervvvv')
  3598. line = regex['img'].sub(tag,line,1)
  3599. line = regex['x'].sub(txt,line,1)
  3600. return line
  3601. def add_inline_tags(line):
  3602. # Beautifiers
  3603. for beauti in ('bold', 'italic', 'underline', 'strike'):
  3604. if regex['font%s'%beauti.capitalize()].search(line):
  3605. line = beautify_me(beauti, line)
  3606. line = parse_images(line)
  3607. return line
  3608. def get_include_contents(file, path=''):
  3609. "Parses %!include: value and extract file contents"
  3610. ids = {'`':'verb', '"':'raw', "'":'tagged' }
  3611. id = 't2t'
  3612. # Set include type and remove identifier marks
  3613. mark = file[0]
  3614. if mark in ids.keys():
  3615. if file[:2] == file[-2:] == mark*2:
  3616. id = ids[mark] # set type
  3617. file = file[2:-2] # remove marks
  3618. # Handle remote dir execution
  3619. filepath = os.path.join(path, file)
  3620. # Read included file contents
  3621. lines = Readfile(filepath, remove_linebreaks=1)
  3622. # Default txt2tags marked text, just BODY matters
  3623. if id == 't2t':
  3624. lines = get_file_body(filepath)
  3625. lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
  3626. # This appears when included hit EOF with verbatim area open
  3627. #lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
  3628. return id, lines
  3629. def set_global_config(config):
  3630. global CONF, TAGS, regex, rules, TARGET
  3631. CONF = config
  3632. TAGS = getTags(CONF)
  3633. rules = getRules(CONF)
  3634. regex = getRegexes()
  3635. TARGET = config['target'] # save for buggy functions that need global
  3636. def convert(bodylines, config, firstlinenr=1):
  3637. global BLOCK
  3638. set_global_config(config)
  3639. target = config['target']
  3640. BLOCK = BlockMaster()
  3641. MASK = MaskMaster()
  3642. TITLE = TitleMaster()
  3643. ret = []
  3644. dump_source = []
  3645. f_lastwasblank = 0
  3646. # Compiling all PreProc regexes
  3647. pre_filter = compile_filters(
  3648. CONF['preproc'], _('Invalid PreProc filter regex'))
  3649. # Let's mark it up!
  3650. linenr = firstlinenr-1
  3651. lineref = 0
  3652. while lineref < len(bodylines):
  3653. # Defaults
  3654. MASK.reset()
  3655. results_box = ''
  3656. untouchedline = bodylines[lineref]
  3657. dump_source.append(untouchedline)
  3658. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  3659. # Apply PreProc filters
  3660. if pre_filter:
  3661. errmsg = _('Invalid PreProc filter replacement')
  3662. for rgx,repl in pre_filter:
  3663. try: line = rgx.sub(repl, line)
  3664. except: Error("%s: '%s'"%(errmsg, repl))
  3665. line = maskEscapeChar(line) # protect \ char
  3666. linenr = linenr +1
  3667. lineref = lineref +1
  3668. Debug(repr(line), 2, linenr) # heavy debug: show each line
  3669. #------------------[ Comment Block ]------------------------
  3670. # We're already on a comment block
  3671. if BLOCK.block() == 'comment':
  3672. # Closing comment
  3673. if regex['blockCommentClose'].search(line):
  3674. ret.extend(BLOCK.blockout() or [])
  3675. continue
  3676. # Normal comment-inside line. Ignore it.
  3677. continue
  3678. # Detecting comment block init
  3679. if regex['blockCommentOpen'].search(line) \
  3680. and BLOCK.block() not in BLOCK.exclusive:
  3681. ret.extend(BLOCK.blockin('comment'))
  3682. continue
  3683. #-------------------------[ Raw Text ]----------------------
  3684. # We're already on a raw block
  3685. if BLOCK.block() == 'raw':
  3686. # Closing raw
  3687. if regex['blockRawClose'].search(line):
  3688. ret.extend(BLOCK.blockout())
  3689. continue
  3690. # Normal raw-inside line
  3691. BLOCK.holdadd(line)
  3692. continue
  3693. # Detecting raw block init
  3694. if regex['blockRawOpen'].search(line) \
  3695. and BLOCK.block() not in BLOCK.exclusive:
  3696. ret.extend(BLOCK.blockin('raw'))
  3697. continue
  3698. # One line raw text
  3699. if regex['1lineRaw'].search(line) \
  3700. and BLOCK.block() not in BLOCK.exclusive:
  3701. ret.extend(BLOCK.blockin('raw'))
  3702. line = regex['1lineRaw'].sub('',line)
  3703. BLOCK.holdadd(line)
  3704. ret.extend(BLOCK.blockout())
  3705. continue
  3706. #------------------------[ Verbatim ]----------------------
  3707. #TIP We'll never support beautifiers inside verbatim
  3708. # Closing table mapped to verb
  3709. if BLOCK.block() == 'verb' \
  3710. and BLOCK.prop('mapped') == 'table' \
  3711. and not regex['table'].search(line):
  3712. ret.extend(BLOCK.blockout())
  3713. # We're already on a verb block
  3714. if BLOCK.block() == 'verb':
  3715. # Closing verb
  3716. if regex['blockVerbClose'].search(line):
  3717. ret.extend(BLOCK.blockout())
  3718. continue
  3719. # Normal verb-inside line
  3720. BLOCK.holdadd(line)
  3721. continue
  3722. # Detecting verb block init
  3723. if regex['blockVerbOpen'].search(line) \
  3724. and BLOCK.block() not in BLOCK.exclusive:
  3725. ret.extend(BLOCK.blockin('verb'))
  3726. f_lastwasblank = 0
  3727. continue
  3728. # One line verb-formatted text
  3729. if regex['1lineVerb'].search(line) \
  3730. and BLOCK.block() not in BLOCK.exclusive:
  3731. ret.extend(BLOCK.blockin('verb'))
  3732. line = regex['1lineVerb'].sub('',line)
  3733. BLOCK.holdadd(line)
  3734. ret.extend(BLOCK.blockout())
  3735. f_lastwasblank = 0
  3736. continue
  3737. # Tables are mapped to verb when target is not table-aware
  3738. if not rules['tableable'] and regex['table'].search(line):
  3739. if not BLOCK.isblock('verb'):
  3740. ret.extend(BLOCK.blockin('verb'))
  3741. BLOCK.propset('mapped', 'table')
  3742. BLOCK.holdadd(line)
  3743. continue
  3744. #---------------------[ blank lines ]-----------------------
  3745. if regex['blankline'].search(line):
  3746. # Close open paragraph
  3747. if BLOCK.isblock('para'):
  3748. ret.extend(BLOCK.blockout())
  3749. f_lastwasblank = 1
  3750. continue
  3751. # Close all open tables
  3752. if BLOCK.isblock('table'):
  3753. ret.extend(BLOCK.blockout())
  3754. f_lastwasblank = 1
  3755. continue
  3756. # Close all open quotes
  3757. while BLOCK.isblock('quote'):
  3758. ret.extend(BLOCK.blockout())
  3759. # Closing all open lists
  3760. if f_lastwasblank: # 2nd consecutive blank
  3761. if BLOCK.block()[-4:] == 'list':
  3762. BLOCK.holdaddsub('') # helps parser
  3763. while BLOCK.depth: # closes list (if any)
  3764. ret.extend(BLOCK.blockout())
  3765. continue # ignore consecutive blanks
  3766. # Paragraph (if any) is wanted inside lists also
  3767. if BLOCK.block()[-4:] == 'list':
  3768. BLOCK.holdaddsub('')
  3769. else:
  3770. # html: show blank line (needs tag)
  3771. if target in ('html','xhtml'):
  3772. ret.append(TAGS['paragraphOpen']+\
  3773. TAGS['paragraphClose'])
  3774. # Otherwise we just show a blank line
  3775. else:
  3776. ret.append('')
  3777. f_lastwasblank = 1
  3778. continue
  3779. #---------------------[ special ]---------------------------
  3780. if regex['special'].search(line):
  3781. # Include command
  3782. targ, key, val = ConfigLines().parse_line(
  3783. line, 'include', target)
  3784. if key:
  3785. Debug("Found config '%s', value '%s'"%(
  3786. key,val),1,linenr)
  3787. incpath = os.path.dirname(CONF['sourcefile'])
  3788. incfile = val
  3789. err = _('A file cannot include itself (loop!)')
  3790. if CONF['sourcefile'] == incfile:
  3791. Error("%s: %s"%(err,incfile))
  3792. inctype, inclines = get_include_contents(
  3793. incfile, incpath)
  3794. # Verb, raw and tagged are easy
  3795. if inctype != 't2t':
  3796. ret.extend(BLOCK.blockin(inctype))
  3797. BLOCK.holdextend(inclines)
  3798. ret.extend(BLOCK.blockout())
  3799. else:
  3800. # Insert include lines into body
  3801. #TODO include maxdepth limit
  3802. bodylines = bodylines[:lineref] \
  3803. +inclines \
  3804. +bodylines[lineref:]
  3805. #TODO fix path if include@include
  3806. # Remove %!include call
  3807. if CONF['dump-source']:
  3808. dump_source.pop()
  3809. continue
  3810. else:
  3811. Debug('Bogus Special Line',1,linenr)
  3812. #---------------------[ dump-source ]-----------------------
  3813. # We don't need to go any further
  3814. if CONF['dump-source']:
  3815. continue
  3816. #---------------------[ Comments ]--------------------------
  3817. # Just skip them (if not macro)
  3818. if regex['comment'].search(line) and not \
  3819. regex['macros'].match(line) and not \
  3820. regex['toc'].match(line):
  3821. continue
  3822. #---------------------[ Triggers ]--------------------------
  3823. # Valid line, reset blank status
  3824. f_lastwasblank = 0
  3825. # Any NOT quote line closes all open quotes
  3826. if BLOCK.isblock('quote') and not regex['quote'].search(line):
  3827. while BLOCK.isblock('quote'):
  3828. ret.extend(BLOCK.blockout())
  3829. # Any NOT table line closes an open table
  3830. if BLOCK.isblock('table') and not regex['table'].search(line):
  3831. ret.extend(BLOCK.blockout())
  3832. #---------------------[ Horizontal Bar ]--------------------
  3833. if regex['bar'].search(line):
  3834. # A bar closes a paragraph
  3835. if BLOCK.isblock('para'):
  3836. ret.extend(BLOCK.blockout())
  3837. # We need to close all opened quote blocks
  3838. # if bar isn't allowed inside or if not a quote line
  3839. if BLOCK.isblock('quote'):
  3840. if not rules['barinsidequote'] or \
  3841. not regex['quote'].search(line):
  3842. while BLOCK.isblock('quote'):
  3843. ret.extend(BLOCK.blockout())
  3844. # Quote + bar: continue processing for quoting
  3845. if rules['barinsidequote'] and \
  3846. regex['quote'].search(line):
  3847. pass
  3848. # Just bar: save tagged line and we're done
  3849. else:
  3850. line = get_tagged_bar(line)
  3851. if BLOCK.block()[-4:] == 'list':
  3852. BLOCK.holdaddsub(line)
  3853. elif BLOCK.block():
  3854. BLOCK.holdadd(line)
  3855. else:
  3856. ret.append(line)
  3857. Debug("BAR: %s"%line, 6)
  3858. continue
  3859. #---------------------[ Title ]-----------------------------
  3860. #TODO set next blank and set f_lastwasblank or f_lasttitle
  3861. if (regex['title'].search(line) or
  3862. regex['numtitle'].search(line)) and \
  3863. BLOCK.block()[-4:] != 'list':
  3864. # A title closes a paragraph
  3865. if BLOCK.isblock('para'):
  3866. ret.extend(BLOCK.blockout())
  3867. TITLE.add(line)
  3868. tagged_title = TITLE.get()
  3869. ret.extend(tagged_title)
  3870. Debug("TITLE: %s"%tagged_title, 6)
  3871. f_lastwasblank = 1
  3872. continue
  3873. #---------------------[ %%toc ]-----------------------
  3874. # %%toc line closes paragraph
  3875. if BLOCK.block() == 'para' and regex['toc'].search(line):
  3876. ret.extend(BLOCK.blockout())
  3877. #---------------------[ apply masks ]-----------------------
  3878. line = MASK.mask(line)
  3879. #XXX from here, only block-inside lines will pass
  3880. #---------------------[ Quote ]-----------------------------
  3881. if regex['quote'].search(line):
  3882. # Store number of leading TABS
  3883. quotedepth = len(regex['quote'].search(line).group(0))
  3884. # SGML doesn't support nested quotes
  3885. if rules['quotenotnested']: quotedepth = 1
  3886. # Don't cross depth limit
  3887. maxdepth = rules['quotemaxdepth']
  3888. if maxdepth and quotedepth > maxdepth:
  3889. quotedepth = maxdepth
  3890. # New quote
  3891. if not BLOCK.isblock('quote'):
  3892. ret.extend(BLOCK.blockin('quote'))
  3893. # New subquotes
  3894. while BLOCK.depth < quotedepth:
  3895. BLOCK.blockin('quote')
  3896. # Closing quotes
  3897. while quotedepth < BLOCK.depth:
  3898. ret.extend(BLOCK.blockout())
  3899. #---------------------[ Lists ]-----------------------------
  3900. # An empty item also closes the current list
  3901. if BLOCK.block()[-4:] == 'list':
  3902. m = regex['listclose'].match(line)
  3903. if m:
  3904. listindent = m.group(1)
  3905. listtype = m.group(2)
  3906. currlisttype = BLOCK.prop('type')
  3907. currlistindent = BLOCK.prop('indent')
  3908. if listindent == currlistindent and \
  3909. listtype == currlisttype:
  3910. ret.extend(BLOCK.blockout())
  3911. continue
  3912. if regex['list'].search(line) or \
  3913. regex['numlist'].search(line) or \
  3914. regex['deflist'].search(line):
  3915. listindent = BLOCK.prop('indent')
  3916. listids = string.join(LISTNAMES.keys(), '')
  3917. m = re.match('^( *)([%s]) '%listids, line)
  3918. listitemindent = m.group(1)
  3919. listtype = m.group(2)
  3920. listname = LISTNAMES[listtype]
  3921. results_box = BLOCK.holdadd
  3922. # Del list ID (and separate term from definition)
  3923. if listname == 'deflist':
  3924. term = parse_deflist_term(line)
  3925. line = regex['deflist'].sub(
  3926. SEPARATOR+term+SEPARATOR,line)
  3927. else:
  3928. line = regex[listname].sub(SEPARATOR,line)
  3929. # Don't cross depth limit
  3930. maxdepth = rules['listmaxdepth']
  3931. if maxdepth and BLOCK.depth == maxdepth:
  3932. if len(listitemindent) > len(listindent):
  3933. listitemindent = listindent
  3934. # List bumping (same indent, diff mark)
  3935. # Close the currently open list to clear the mess
  3936. if BLOCK.block()[-4:] == 'list' \
  3937. and listname != BLOCK.block() \
  3938. and len(listitemindent) == len(listindent):
  3939. ret.extend(BLOCK.blockout())
  3940. listindent = BLOCK.prop('indent')
  3941. # Open mother list or sublist
  3942. if BLOCK.block()[-4:] != 'list' or \
  3943. len(listitemindent) > len(listindent):
  3944. ret.extend(BLOCK.blockin(listname))
  3945. BLOCK.propset('indent',listitemindent)
  3946. BLOCK.propset('type',listtype)
  3947. # Closing sublists
  3948. while len(listitemindent) < len(BLOCK.prop('indent')):
  3949. ret.extend(BLOCK.blockout())
  3950. # O-oh, sublist before list ("\n\n - foo\n- foo")
  3951. # Fix: close sublist (as mother), open another list
  3952. if BLOCK.block()[-4:] != 'list':
  3953. ret.extend(BLOCK.blockin(listname))
  3954. BLOCK.propset('indent',listitemindent)
  3955. BLOCK.propset('type',listtype)
  3956. #---------------------[ Table ]-----------------------------
  3957. #TODO escape undesired format inside table
  3958. #TODO add pm6 target
  3959. if regex['table'].search(line):
  3960. if not BLOCK.isblock('table'): # first table line!
  3961. ret.extend(BLOCK.blockin('table'))
  3962. BLOCK.tableparser.__init__(line)
  3963. tablerow = TableMaster().parse_row(line)
  3964. BLOCK.tableparser.add_row(tablerow) # save config
  3965. # Maintain line to unmask and inlines
  3966. # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
  3967. # TODO isolate unmask+inlines parsing to use here
  3968. line = string.join(tablerow['cells'], SEPARATOR)
  3969. #---------------------[ Paragraph ]-------------------------
  3970. if not BLOCK.block() and \
  3971. not string.count(line, MASK.tocmask): # new para!
  3972. ret.extend(BLOCK.blockin('para'))
  3973. ############################################################
  3974. ############################################################
  3975. ############################################################
  3976. #---------------------[ Final Parses ]----------------------
  3977. # The target-specific special char escapes for body lines
  3978. line = doEscape(target,line)
  3979. line = add_inline_tags(line)
  3980. line = MASK.undo(line)
  3981. #---------------------[ Hold or Return? ]-------------------
  3982. ### Now we must choose where to put the parsed line
  3983. #
  3984. if not results_box:
  3985. # List item extra lines
  3986. if BLOCK.block()[-4:] == 'list':
  3987. results_box = BLOCK.holdaddsub
  3988. # Other blocks
  3989. elif BLOCK.block():
  3990. results_box = BLOCK.holdadd
  3991. # No blocks
  3992. else:
  3993. line = doFinalEscape(target, line)
  3994. results_box = ret.append
  3995. results_box(line)
  3996. # EOF: close any open para/verb/lists/table/quotes
  3997. Debug('EOF',7)
  3998. while BLOCK.block():
  3999. ret.extend(BLOCK.blockout())
  4000. # Maybe close some opened title area?
  4001. if rules['titleblocks']:
  4002. ret.extend(TITLE.close_all())
  4003. # Maybe a major tag to enclose body? (like DIV for CSS)
  4004. if TAGS['bodyOpen' ]: ret.insert(0, TAGS['bodyOpen'])
  4005. if TAGS['bodyClose']: ret.append(TAGS['bodyClose'])
  4006. if CONF['toc-only']: ret = []
  4007. marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
  4008. # If dump-source, all parsing is ignored
  4009. if CONF['dump-source']: ret = dump_source[:]
  4010. return ret, marked_toc
  4011. ##############################################################################
  4012. ################################### GUI ######################################
  4013. ##############################################################################
  4014. #
  4015. # Tk help: http://python.org/topics/tkinter/
  4016. # Tuto: http://ibiblio.org/obp/py4fun/gui/tkPhone.html
  4017. # /usr/lib/python*/lib-tk/Tkinter.py
  4018. #
  4019. # grid table : row=0, column=0, columnspan=2, rowspan=2
  4020. # grid align : sticky='n,s,e,w' (North, South, East, West)
  4021. # pack place : side='top,bottom,right,left'
  4022. # pack fill : fill='x,y,both,none', expand=1
  4023. # pack align : anchor='n,s,e,w' (North, South, East, West)
  4024. # padding : padx=10, pady=10, ipadx=10, ipady=10 (internal)
  4025. # checkbox : offvalue is return if the _user_ deselected the box
  4026. # label align: justify=left,right,center
  4027. def load_GUI_resources():
  4028. "Load all extra modules and methods used by GUI"
  4029. global askopenfilename, showinfo, showwarning, showerror, Tkinter
  4030. from tkFileDialog import askopenfilename
  4031. from tkMessageBox import showinfo,showwarning,showerror
  4032. import Tkinter
  4033. class Gui:
  4034. "Graphical Tk Interface"
  4035. def __init__(self, conf={}):
  4036. self.root = Tkinter.Tk() # mother window, come to butthead
  4037. self.root.title(my_name) # window title bar text
  4038. self.window = self.root # variable "focus" for inclusion
  4039. self.row = 0 # row count for grid()
  4040. self.action_length = 150 # left column length (pixel)
  4041. self.frame_margin = 10 # frame margin size (pixel)
  4042. self.frame_border = 6 # frame border size (pixel)
  4043. # The default Gui colors, can be changed by %!guicolors
  4044. self.dft_gui_colors = ['#6c6','white','#cf9','#030']
  4045. self.gui_colors = []
  4046. self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
  4047. # On Tk, vars need to be set/get using setvar()/get()
  4048. self.infile = self.setvar('')
  4049. self.target = self.setvar('')
  4050. self.target_name = self.setvar('')
  4051. # The checks appearance order
  4052. self.checks = [
  4053. 'headers','enum-title','toc','mask-email',
  4054. 'toc-only','stdout']
  4055. # Creating variables for all checks
  4056. for check in self.checks:
  4057. setattr(self, 'f_'+check, self.setvar(''))
  4058. # Load RC config
  4059. self.conf = {}
  4060. if conf: self.load_config(conf)
  4061. def load_config(self, conf):
  4062. self.conf = conf
  4063. self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
  4064. self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
  4065. self.root.config(bd=15,bg=self.bg1)
  4066. ### Config as dic for python 1.5 compat (**opts don't work :( )
  4067. def entry(self, **opts): return Tkinter.Entry(self.window, opts)
  4068. def label(self, txt='', bg=None, **opts):
  4069. opts.update({'text':txt,'bg':bg or self.bg1})
  4070. return Tkinter.Label(self.window, opts)
  4071. def button(self,name,cmd,**opts):
  4072. opts.update({'text':name,'command':cmd})
  4073. return Tkinter.Button(self.window, opts)
  4074. def check(self,name,checked=0,**opts):
  4075. bg, fg = self.bg2, self.fg2
  4076. opts.update({
  4077. 'text':name, 'onvalue':1, 'offvalue':0,
  4078. 'activeforeground':fg, 'fg':fg,
  4079. 'activebackground':bg, 'bg':bg,
  4080. 'highlightbackground':bg, 'anchor':'w'
  4081. })
  4082. chk = Tkinter.Checkbutton(self.window, opts)
  4083. if checked: chk.select()
  4084. chk.grid(columnspan=2, sticky='w', padx=0)
  4085. def menu(self,sel,items):
  4086. return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
  4087. # Handy auxiliary functions
  4088. def action(self, txt):
  4089. self.label(txt, fg=self.fg1, bg=self.bg1,
  4090. wraplength=self.action_length).grid(column=0,row=self.row)
  4091. def frame_open(self):
  4092. self.window = Tkinter.Frame(self.root,bg=self.bg2,
  4093. borderwidth=self.frame_border)
  4094. def frame_close(self):
  4095. self.window.grid(column=1, row=self.row, sticky='w',
  4096. padx=self.frame_margin)
  4097. self.window = self.root
  4098. self.label('').grid()
  4099. self.row = self.row + 2 # update row count
  4100. def target_name2key(self):
  4101. name = self.target_name.get()
  4102. target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
  4103. try : key = target[0]
  4104. except: key = ''
  4105. self.target = self.setvar(key)
  4106. def target_key2name(self):
  4107. key = self.target.get()
  4108. name = TARGET_NAMES.get(key) or key
  4109. self.target_name = self.setvar(name)
  4110. def exit(self): self.root.destroy()
  4111. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  4112. def askfile(self):
  4113. ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
  4114. (_('All files'),'*')]
  4115. newfile = askopenfilename(filetypes=ftypes)
  4116. if newfile:
  4117. self.infile.set(newfile)
  4118. newconf = process_source_file(newfile)[0]
  4119. newconf = ConfigMaster().sanity(newconf, gui=1)
  4120. # Restate all checkboxes after file selection
  4121. #TODO how to make a refresh without killing it?
  4122. self.root.destroy()
  4123. self.__init__(newconf)
  4124. self.mainwindow()
  4125. def scrollwindow(self, txt='no text!', title=''):
  4126. # Create components
  4127. win = Tkinter.Toplevel() ; win.title(title)
  4128. frame = Tkinter.Frame(win)
  4129. scroll = Tkinter.Scrollbar(frame)
  4130. text = Tkinter.Text(frame,yscrollcommand=scroll.set)
  4131. button = Tkinter.Button(win)
  4132. # Config
  4133. text.insert(Tkinter.END, string.join(txt,'\n'))
  4134. scroll.config(command=text.yview)
  4135. button.config(text=_('Close'), command=win.destroy)
  4136. button.focus_set()
  4137. # Packing
  4138. text.pack(side='left', fill='both', expand=1)
  4139. scroll.pack(side='right', fill='y')
  4140. frame.pack(fill='both', expand=1)
  4141. button.pack(ipadx=30)
  4142. def runprogram(self):
  4143. global CMDLINE_RAW
  4144. # Prepare
  4145. self.target_name2key()
  4146. infile, target = self.infile.get(), self.target.get()
  4147. # Sanity
  4148. if not target:
  4149. showwarning(my_name,_("You must select a target type!"))
  4150. return
  4151. if not infile:
  4152. showwarning(my_name,
  4153. _("You must provide the source file location!"))
  4154. return
  4155. # Compose cmdline
  4156. guiflags = []
  4157. real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
  4158. if real_cmdline_conf.has_key('infile'):
  4159. del real_cmdline_conf['infile']
  4160. if real_cmdline_conf.has_key('target'):
  4161. del real_cmdline_conf['target']
  4162. real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
  4163. default_outfile = ConfigMaster().get_outfile_name(
  4164. {'sourcefile':infile, 'outfile':'', 'target':target})
  4165. for opt in self.checks:
  4166. val = int(getattr(self, 'f_%s'%opt).get() or "0")
  4167. if opt == 'stdout': opt = 'outfile'
  4168. on_config = self.conf.get(opt) or 0
  4169. on_cmdline = real_cmdline_conf.get(opt) or 0
  4170. if opt == 'outfile':
  4171. if on_config == STDOUT: on_config = 1
  4172. else: on_config = 0
  4173. if on_cmdline == STDOUT: on_cmdline = 1
  4174. else: on_cmdline = 0
  4175. if val != on_config or (
  4176. val == on_config == on_cmdline and
  4177. real_cmdline_conf.has_key(opt)):
  4178. if val:
  4179. # Was not set, but user selected on GUI
  4180. Debug("user turned ON: %s"%opt)
  4181. if opt == 'outfile': opt = '-o-'
  4182. else: opt = '--%s'%opt
  4183. else:
  4184. # Was set, but user deselected on GUI
  4185. Debug("user turned OFF: %s"%opt)
  4186. if opt == 'outfile':
  4187. opt = "-o%s"%default_outfile
  4188. else: opt = '--no-%s'%opt
  4189. guiflags.append(opt)
  4190. cmdline = [my_name, '-t', target] +real_cmdline \
  4191. +guiflags +[infile]
  4192. Debug('Gui/Tk cmdline: %s'%cmdline,5)
  4193. # Run!
  4194. cmdline_raw_orig = CMDLINE_RAW
  4195. try:
  4196. # Fake the GUI cmdline as the real one, and parse file
  4197. CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
  4198. data = process_source_file(infile)
  4199. # On GUI, convert_* returns the data, not finish_him()
  4200. outlist, config = convert_this_files([data])
  4201. # On GUI and STDOUT, finish_him() returns the data
  4202. result = finish_him(outlist, config)
  4203. # Show outlist in s a nice new window
  4204. if result:
  4205. outlist, config = result
  4206. title = _('%s: %s converted to %s')%(
  4207. my_name, os.path.basename(infile),
  4208. string.upper(config['target']))
  4209. self.scrollwindow(outlist, title)
  4210. # Show the "file saved" message
  4211. else:
  4212. msg = "%s\n\n %s\n%s\n\n %s\n%s"%(
  4213. _('Conversion done!'),
  4214. _('FROM:'), infile,
  4215. _('TO:'), config['outfile'])
  4216. showinfo(my_name, msg)
  4217. except error: # common error (windowed), not quit
  4218. pass
  4219. except: # fatal error (windowed and printed)
  4220. errormsg = getUnknownErrorMessage()
  4221. print errormsg
  4222. showerror(_('%s FATAL ERROR!')%my_name,errormsg)
  4223. self.exit()
  4224. CMDLINE_RAW = cmdline_raw_orig
  4225. def mainwindow(self):
  4226. self.infile.set(self.conf.get('sourcefile') or '')
  4227. self.target.set(self.conf.get('target') or \
  4228. _('-- select one --'))
  4229. outfile = self.conf.get('outfile')
  4230. if outfile == STDOUT: # map -o-
  4231. self.conf['stdout'] = 1
  4232. if self.conf.get('headers') == None:
  4233. self.conf['headers'] = 1 # map default
  4234. action1 = _("Enter the source file location:")
  4235. action2 = _("Choose the target document type:")
  4236. action3 = _("Some options you may check:")
  4237. action4 = _("Some extra options:")
  4238. checks_txt = {
  4239. 'headers' : _("Include headers on output"),
  4240. 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
  4241. 'toc' : _("Do TOC also (Table of Contents)"),
  4242. 'mask-email': _("Hide e-mails from SPAM robots"),
  4243. 'toc-only' : _("Just do TOC, nothing more"),
  4244. 'stdout' : _("Dump to screen (Don't save target file)")
  4245. }
  4246. targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
  4247. # Header
  4248. self.label("%s %s"%(string.upper(my_name), my_version),
  4249. bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
  4250. self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
  4251. bg=self.bg1, fg=self.fg1).grid(columnspan=2)
  4252. self.row = 2
  4253. # Choose input file
  4254. self.action(action1) ; self.frame_open()
  4255. e_infile = self.entry(textvariable=self.infile,width=25)
  4256. e_infile.grid(row=self.row, column=0, sticky='e')
  4257. if not self.infile.get(): e_infile.focus_set()
  4258. self.button(_("Browse"), self.askfile).grid(
  4259. row=self.row, column=1, sticky='w', padx=10)
  4260. # Show outfile name, style and encoding (if any)
  4261. txt = ''
  4262. if outfile:
  4263. txt = outfile
  4264. if outfile == STDOUT: txt = _('<screen>')
  4265. l_output = self.label(_('Output: ')+txt,
  4266. fg=self.fg2,bg=self.bg2)
  4267. l_output.grid(columnspan=2, sticky='w')
  4268. for setting in ['style','encoding']:
  4269. if self.conf.get(setting):
  4270. name = string.capitalize(setting)
  4271. val = self.conf[setting]
  4272. self.label('%s: %s'%(name, val),
  4273. fg=self.fg2, bg=self.bg2).grid(
  4274. columnspan=2, sticky='w')
  4275. # Choose target
  4276. self.frame_close() ; self.action(action2)
  4277. self.frame_open()
  4278. self.target_key2name()
  4279. self.menu(self.target_name, targets_menu).grid(
  4280. columnspan=2, sticky='w')
  4281. # Options checkboxes label
  4282. self.frame_close() ; self.action(action3)
  4283. self.frame_open()
  4284. # Compose options check boxes, example:
  4285. # self.check(checks_txt['toc'],1,variable=self.f_toc)
  4286. for check in self.checks:
  4287. # Extra options label
  4288. if check == 'toc-only':
  4289. self.frame_close() ; self.action(action4)
  4290. self.frame_open()
  4291. txt = checks_txt[check]
  4292. var = getattr(self, 'f_'+check)
  4293. checked = self.conf.get(check)
  4294. self.check(txt,checked,variable=var)
  4295. self.frame_close()
  4296. # Spacer and buttons
  4297. self.label('').grid() ; self.row = self.row + 1
  4298. b_quit = self.button(_("Quit"), self.exit)
  4299. b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
  4300. b_conv = self.button(_("Convert!"), self.runprogram)
  4301. b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
  4302. if self.target.get() and self.infile.get():
  4303. b_conv.focus_set()
  4304. # As documentation told me
  4305. if sys.platform[:3] == 'win':
  4306. self.root.iconify()
  4307. self.root.update()
  4308. self.root.deiconify()
  4309. self.root.mainloop()
  4310. ##############################################################################
  4311. ##############################################################################
  4312. def exec_command_line(user_cmdline=[]):
  4313. global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, GUI, Error
  4314. # Extract command line data
  4315. cmdline_data = user_cmdline or sys.argv[1:]
  4316. CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=1)
  4317. cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
  4318. DEBUG = cmdline_parsed.get('debug' ) or 0
  4319. VERBOSE = cmdline_parsed.get('verbose') or 0
  4320. QUIET = cmdline_parsed.get('quiet' ) or 0
  4321. GUI = cmdline_parsed.get('gui' ) or 0
  4322. infiles = cmdline_parsed.get('infile' ) or []
  4323. Message(_("Txt2tags %s processing begins")%my_version,1)
  4324. # The easy ones
  4325. if cmdline_parsed.get('help' ): Quit(USAGE)
  4326. if cmdline_parsed.get('version'): Quit(VERSIONSTR)
  4327. # Multifile haters
  4328. if len(infiles) > 1:
  4329. errmsg=_("Option --%s can't be used with multiple input files")
  4330. for option in NO_MULTI_INPUT:
  4331. if cmdline_parsed.get(option):
  4332. Error(errmsg%option)
  4333. Debug("system platform: %s"%sys.platform)
  4334. Debug("python version: %s"%(string.split(sys.version,'(')[0]))
  4335. Debug("line break char: %s"%repr(LB))
  4336. Debug("command line: %s"%sys.argv)
  4337. Debug("command line raw config: %s"%CMDLINE_RAW,1)
  4338. # Extract RC file config
  4339. if cmdline_parsed.get('rc') == 0:
  4340. Message(_("Ignoring user configuration file"),1)
  4341. else:
  4342. rc_file = get_rc_path()
  4343. if os.path.isfile(rc_file):
  4344. Message(_("Loading user configuration file"),1)
  4345. RC_RAW = ConfigLines(file=rc_file).get_raw_config()
  4346. Debug("rc file: %s"%rc_file)
  4347. Debug("rc file raw config: %s"%RC_RAW,1)
  4348. # Get all infiles config (if any)
  4349. infiles_config = get_infiles_config(infiles)
  4350. # Is GUI available?
  4351. # Try to load and start GUI interface for --gui
  4352. # If program was called with no arguments, try GUI also
  4353. if GUI or not infiles:
  4354. try:
  4355. load_GUI_resources()
  4356. Debug("GUI resources OK (Tk module is installed)")
  4357. winbox = Gui()
  4358. Debug("GUI display OK")
  4359. GUI = 1
  4360. except:
  4361. Debug("GUI Error: no Tk module or no DISPLAY")
  4362. GUI = 0
  4363. # User forced --gui, but it's not available
  4364. if cmdline_parsed.get('gui') and not GUI:
  4365. print getTraceback(); print
  4366. Error("Sorry, I can't run my Graphical Interface - GUI\n"
  4367. "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
  4368. "- Make sure you are in a graphical environment (like X)")
  4369. # Okay, we will use GUI
  4370. if GUI:
  4371. Message(_("We are on GUI interface"),1)
  4372. # Redefine Error function to raise exception instead sys.exit()
  4373. def Error(msg):
  4374. showerror(_('txt2tags ERROR!'), msg)
  4375. raise error
  4376. # If no input file, get RC+cmdline config, else full config
  4377. if not infiles:
  4378. gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
  4379. else:
  4380. try : gui_conf = infiles_config[0][0]
  4381. except: gui_conf = {}
  4382. # Sanity is needed to set outfile and other things
  4383. gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
  4384. Debug("GUI config: %s"%gui_conf,5)
  4385. # Insert config and populate the nice window!
  4386. winbox.load_config(gui_conf)
  4387. winbox.mainwindow()
  4388. # Console mode rocks forever!
  4389. else:
  4390. Message(_("We are on Command Line interface"),1)
  4391. # Called with no arguments, show error
  4392. if not infiles: Error(_('Missing input file (try --help)'))
  4393. convert_this_files(infiles_config)
  4394. Message(_("Txt2tags finished sucessfuly"),1)
  4395. if __name__ == '__main__':
  4396. try:
  4397. exec_command_line()
  4398. except error, msg:
  4399. sys.stderr.write("%s\n"%msg)
  4400. sys.stderr.flush()
  4401. sys.exit(1)
  4402. except SystemExit:
  4403. pass
  4404. except:
  4405. sys.stderr.write(getUnknownErrorMessage())
  4406. sys.stderr.flush()
  4407. sys.exit(1)
  4408. Quit()
  4409. # The End.