PageRenderTime 77ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/old/txt2tags-2.0.py

http://txt2tags.googlecode.com/
Python | 4024 lines | 3605 code | 161 blank | 258 comment | 163 complexity | b2aedac450024e883a7707b81b88866f MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004 Aurelio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. ##################################################################
  20. # #
  21. # - IMPORTANT - #
  22. # #
  23. # Due the major syntax changes, the new 2.x series BREAKS #
  24. # backwards compatibility. #
  25. # #
  26. # Use the 't2tconv' script to upgrade your existing .t2t files #
  27. # to conform the new v2.0 syntax. #
  28. # #
  29. # Do a visual inspection on the new converted file. #
  30. # Specially Pre & Post proc filters can break. Check them! #
  31. # #
  32. ##################################################################
  33. ########################################################################
  34. #
  35. # BORING CODE EXPLANATION AHEAD
  36. #
  37. # Just read if you wish to understand how the txt2tags code works
  38. #
  39. ########################################################################
  40. #
  41. # Version 2.0 was a complete rewrite for the program 'core'.
  42. #
  43. # Now the code that [1] parses the marked text is separated from the
  44. # code that [2] insert the target tags.
  45. #
  46. # [1] made by: def convert()
  47. # [2] made by: class BlockMaster
  48. #
  49. # The structures of the marked text are identifyed and its contents are
  50. # extracted into a data holder (Python lists and dictionaries).
  51. #
  52. # When parsing the source file, the blocks (para, lists, quote, table)
  53. # are opened with BlockMaster, right when found. Then its contents,
  54. # which spans on several lines, are feeded into a special holder on the
  55. # BlockMaster instance. Just when the block is closed, the target tags
  56. # are inserted for the full block as a whole, in one pass. This way, we
  57. # have a better control on blocks. Much better than the previous line by
  58. # line approach.
  59. #
  60. # In other words, whenever inside a block, the parser *holds* the tag
  61. # insertion process, waiting until the full block is readed. That was
  62. # needed primary to close paragraphs for the new XHTML target, but
  63. # proved to be a very good adding, improving many other processings.
  64. #
  65. # -------------------------------------------------------------------
  66. #
  67. # There is also a brand new code for the Configuration schema, 100%
  68. # rewritten. There are new classes, all self documented: CommandLine,
  69. # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
  70. # Config format was created, and all kind of configuration is first
  71. # converted to this format, and then a generic method parses it.
  72. #
  73. # The init processing was changed also, and now the functions which
  74. # gets informations about the input files are: get_infiles_config(),
  75. # process_source_file() and convert_this_files()
  76. #
  77. # Other parts are untouched, and remains the same as in v1.7, as the
  78. # marks regexes, target Headers and target Tags&Rules.
  79. #
  80. ########################################################################
  81. # Now I think the code is nice, easier to read and understand
  82. #XXX Python coding warning
  83. # Avoid common mistakes:
  84. # - do NOT use newlist=list instead newlist=list[:]
  85. # - do NOT use newdic=dic instead newdic=dic.copy()
  86. # - do NOT use dic[key] instead dic.get(key)
  87. # - do NOT use del dic[key] without has_key() before
  88. #XXX Smart Image Align don't work if the image is a link
  89. # Can't fix that because the image is expanded together with the
  90. # link, at the linkbank filling moment. Only the image is passed
  91. # to parse_images(), not the full line, so it is always 'middle'.
  92. #XXX Paragraph separation not valid inside Quote
  93. # Quote will not have <p></p> inside, instead will close and open
  94. # again the <blockquote>. This really sux in CSS, when defining a
  95. # diferent background color. Still don't know how to fix it.
  96. #XXX TODO (maybe)
  97. # New mark or macro which expands to and anchor full title.
  98. # It is necessary to parse the full document in this order:
  99. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  100. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  101. # 3rd scan: BODY: read titles and compose TOC info
  102. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  103. # Steps 2 and 3 can be made together, with no tag adding.
  104. # Two complete body scans will be *slow*, don't know if it worths.
  105. ##############################################################################
  106. # User config (1=ON, 0=OFF)
  107. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  108. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  109. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  110. ##############################################################################
  111. # these are all the core Python modules used by txt2tags (KISS!)
  112. import re, string, os, sys, getopt
  113. from time import strftime,time,localtime
  114. # program information
  115. my_url = 'http://txt2tags.sf.net'
  116. my_name = 'txt2tags'
  117. my_email = 'verde@aurelio.net'
  118. my_version = '2.0' #-betaN
  119. # i18n - just use if available
  120. if USE_I18N:
  121. try:
  122. import gettext
  123. # if your locale dir is different, change it here
  124. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  125. _ = cat.gettext
  126. except:
  127. _ = lambda x:x
  128. else:
  129. _ = lambda x:x
  130. # FLAGS : the convertion related flags , may be used in %!options
  131. # OPTIONS : the convertion related options, may be used in %!options
  132. # ACTIONS : the other behaviour modifiers, valid on command line only
  133. # SETTINGS: global miscelaneous settings, valid on RC file only
  134. # CONFIG_KEYWORDS: the valid %!key:val keywords
  135. #
  136. # FLAGS and OPTIONS are configs that affect the converted document.
  137. # They usually have also a --no-<option> to turn them OFF.
  138. # ACTIONS are needed because when doing multiple input files, strange
  139. # behaviour would be found, as use command line interface for the
  140. # first file and gui for the second. There is no --no-<action>.
  141. # --version and --help inside %!options are also odd
  142. #
  143. TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'man', 'mgp', 'moin', 'pm6', 'txt']
  144. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  145. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  146. 'css-suggar' :0 }
  147. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  148. 'infile' :'', 'outfile' :'', 'encoding' :'',
  149. 'split' :0 , 'lang' :''}
  150. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  151. 'verbose' :0 , 'debug' :0 , 'dump-config':0 }
  152. SETTINGS = {} # for future use
  153. CONFIG_KEYWORDS = [
  154. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  155. 'guicolors']
  156. TARGET_NAMES = {
  157. 'html' : _('HTML page'),
  158. 'xhtml': _('XHTML page'),
  159. 'sgml' : _('SGML document'),
  160. 'tex' : _('LaTeX document'),
  161. 'man' : _('UNIX Manual page'),
  162. 'mgp' : _('Magic Point presentation'),
  163. 'moin' : _('MoinMoin page'),
  164. 'pm6' : _('PageMaker 6.0 document'),
  165. 'txt' : _('Plain Text'),
  166. }
  167. DEBUG = 0 # do not edit here, please use --debug
  168. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  169. GUI = 0
  170. RC_RAW = []
  171. CMDLINE_RAW = []
  172. CONF = {}
  173. BLOCK = None
  174. regex = {}
  175. TAGS = {}
  176. rules = {}
  177. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  178. lang = 'english'
  179. TARGET = ''
  180. STDIN = STDOUT = '-'
  181. ESCCHAR = '\x00'
  182. SEPARATOR = '\x01'
  183. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  184. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  185. RCFILE = {'default':'.txt2tagsrc', 'win':'_t2trc'}
  186. #my_version = my_version + '-dev' + currdate[4:] # devel!
  187. # plataform specific settings
  188. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  189. RC = RCFILE.get(sys.platform[:3]) or RCFILE['default']
  190. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  191. USAGE = string.join([
  192. '',
  193. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  194. '',
  195. _(" -t, --target set target document type. currently supported:"),
  196. ' %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
  197. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  198. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  199. _(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"),
  200. _(" -H, --no-headers suppress header, title and footer contents"),
  201. _(" --headers show header, title and footer contents (default ON)"),
  202. _(" --encoding set target file encoding (utf-8, iso-8859-1, etc)"),
  203. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  204. _(" --css-suggar insert CSS-friendly tags for HTML and XHTML targets"),
  205. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  206. _(" --toc add TOC (Table of Contents) to target document"),
  207. _(" --toc-only print document TOC and exit"),
  208. _(" --toc-level=N set maximum TOC level (depth) to N"),
  209. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  210. _(" --gui invoke Graphical Tk Interface"),
  211. _(" -v, --verbose print informative messages during convertion"),
  212. _(" -h, --help print this help information and exit"),
  213. _(" -V, --version print program version and exit"),
  214. _(" --dump-config print all the config found and exit"),
  215. '',
  216. _("Turn OFF options:"),
  217. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  218. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  219. " --no-css-suggar",
  220. '',
  221. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  222. '',
  223. _("By default, converted output is saved to 'infile.<target>'."),
  224. _("Use --outfile to force an output file name."),
  225. _("If input file is '-', reads from STDIN."),
  226. _("If output file is '-', dumps output to STDOUT."),
  227. ''
  228. ], '\n')
  229. ##############################################################################
  230. # here is all the target's templates
  231. # you may edit them to fit your needs
  232. # - the %(HEADERn)s strings represent the Header lines
  233. # - the %(STYLE)s string is changed by --style contents
  234. # - the %(ENCODING)s string is changed by --encoding contents
  235. # - if any of the above is empty, the full line is removed
  236. # - use %% to represent a literal %
  237. #
  238. HEADER_TEMPLATE = {
  239. 'txt': """\
  240. %(HEADER1)s
  241. %(HEADER2)s
  242. %(HEADER3)s
  243. """,
  244. 'sgml': """\
  245. <!doctype linuxdoc system>
  246. <article>
  247. <title>%(HEADER1)s
  248. <author>%(HEADER2)s
  249. <date>%(HEADER3)s
  250. """,
  251. 'html': """\
  252. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  253. <HTML>
  254. <HEAD>
  255. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  256. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  257. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  258. <TITLE>%(HEADER1)s</TITLE>
  259. </HEAD><BODY BGCOLOR="white" TEXT="black">
  260. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  261. <FONT SIZE="4">
  262. <I>%(HEADER2)s</I><BR>
  263. %(HEADER3)s
  264. </FONT></CENTER>
  265. """,
  266. 'htmlcss': """\
  267. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  268. <HTML>
  269. <HEAD>
  270. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  271. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  272. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  273. <TITLE>%(HEADER1)s</TITLE>
  274. </HEAD>
  275. <BODY>
  276. <DIV CLASS="header" ID="header">
  277. <H1>%(HEADER1)s</H1>
  278. <H2>%(HEADER2)s</H2>
  279. <H3>%(HEADER3)s</H3>
  280. </DIV>
  281. """,
  282. 'xhtml': """\
  283. <?xml version="1.0"?>
  284. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  285. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  286. <html xmlns="http://www.w3.org/1999/xhtml">
  287. <head>
  288. <title>%(HEADER1)s</title>
  289. <meta name="generator" content="http://txt2tags.sf.net" />
  290. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  291. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  292. </head>
  293. <body bgcolor="white" text="black">
  294. <div align="center">
  295. <h1>%(HEADER1)s</h1>
  296. <h2>%(HEADER2)s</h2>
  297. <h3>%(HEADER3)s</h3>
  298. </div>
  299. """,
  300. 'xhtmlcss': """\
  301. <?xml version="1.0"?>
  302. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  303. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  304. <html xmlns="http://www.w3.org/1999/xhtml">
  305. <head>
  306. <title>%(HEADER1)s</title>
  307. <meta name="generator" content="http://txt2tags.sf.net" />
  308. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  309. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  310. </head>
  311. <body>
  312. <div class="header" id="header">
  313. <h1>%(HEADER1)s</h1>
  314. <h2>%(HEADER2)s</h2>
  315. <h3>%(HEADER3)s</h3>
  316. </div>
  317. """,
  318. 'man': """\
  319. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  320. """,
  321. # TODO style to <HR>
  322. 'pm6': """\
  323. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  324. ><@Normal=
  325. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  326. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  327. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  328. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  329. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  330. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  331. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  332. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  333. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  334. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  335. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  336. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  337. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  338. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  339. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  340. ><@Title4=<@-PARENT "Title3">
  341. ><@Title5=<@-PARENT "Title3">
  342. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  343. %(HEADER1)s
  344. %(HEADER2)s
  345. %(HEADER3)s
  346. """,
  347. 'mgp': """\
  348. #!/usr/X11R6/bin/mgp -t 90
  349. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  350. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  351. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  352. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  353. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  354. %%default 1 size 5
  355. %%default 2 size 8, fore "yellow", font "normal-b", center
  356. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  357. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  358. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  359. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  360. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  361. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  362. %%%%------------------------- end of headers -----------------------------
  363. %%page
  364. %%size 10, center, fore "yellow"
  365. %(HEADER1)s
  366. %%font "normal-i", size 6, fore "white", center
  367. %(HEADER2)s
  368. %%font "mono", size 7, center
  369. %(HEADER3)s
  370. """,
  371. # TODO please, improve me!
  372. 'moin': """\
  373. '''%(HEADER1)s'''
  374. ''%(HEADER2)s''
  375. %(HEADER3)s
  376. """,
  377. 'tex': \
  378. r"""\documentclass[11pt,a4paper]{article}
  379. \usepackage{amsfonts,graphicx,url}
  380. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  381. \usepackage{%(STYLE)s} %% user defined package
  382. \pagestyle{plain} %% do page numbering ('empty' turns off)
  383. \frenchspacing %% no aditional spaces after periods
  384. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  385. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  386. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  387. \title{%(HEADER1)s}
  388. \author{%(HEADER2)s}
  389. \begin{document}
  390. \date{%(HEADER3)s}
  391. \maketitle
  392. \clearpage
  393. """
  394. }
  395. ##############################################################################
  396. def getTags(target):
  397. "Returns all the known tags for the specified target"
  398. keys = [
  399. 'paragraphOpen','paragraphClose',
  400. 'title1','title2','title3','title4','title5',
  401. 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
  402. 'blockVerbOpen','blockVerbClose',
  403. 'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
  404. 'fontMonoOpen','fontMonoClose',
  405. 'fontBoldOpen','fontBoldClose',
  406. 'fontItalicOpen','fontItalicClose',
  407. 'fontUnderlineOpen','fontUnderlineClose',
  408. 'listOpen','listClose',
  409. 'listItemOpen','listItemClose','listItemLine',
  410. 'numlistOpen','numlistClose',
  411. 'numlistItemOpen','numlistItemClose','numlistItemLine',
  412. 'deflistOpen','deflistClose',
  413. 'deflistItem1Open','deflistItem1Close',
  414. 'deflistItem2Open','deflistItem2Close',
  415. 'bar1','bar2',
  416. 'url','urlMark','email','emailMark',
  417. 'img',
  418. 'tableOpen','tableClose',
  419. 'tableRowOpen','tableRowClose','tableRowSep',
  420. 'tableCellOpen','tableCellClose','tableCellSep',
  421. 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
  422. 'tableTitleRowOpen','tableTitleRowClose',
  423. 'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
  424. 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
  425. 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
  426. 'tableColAlignSep',
  427. 'anchor','comment',
  428. 'TOC','tocOpen','tocClose','tocOpenCss','tocCloseCss',
  429. 'bodyOpenCss','bodyCloseCss',
  430. 'EOD'
  431. ]
  432. alltags = {
  433. 'txt': {
  434. 'title1' : ' \a' ,
  435. 'title2' : '\t\a' ,
  436. 'title3' : '\t\t\a' ,
  437. 'title4' : '\t\t\t\a' ,
  438. 'title5' : '\t\t\t\t\a',
  439. 'blockQuoteLine' : '\t' ,
  440. 'listItemOpen' : '- ' ,
  441. 'numlistItemOpen' : '\a. ' ,
  442. 'bar1' : '\a' ,
  443. 'bar2' : '\a' ,
  444. 'url' : '\a' ,
  445. 'urlMark' : '\a (\a)' ,
  446. 'email' : '\a' ,
  447. 'emailMark' : '\a (\a)' ,
  448. 'img' : '[\a]' ,
  449. },
  450. 'html': {
  451. 'paragraphOpen' : '<P>' ,
  452. 'paragraphClose' : '</P>' ,
  453. 'title1' : '~A~<H1>\a</H1>' ,
  454. 'title2' : '~A~<H2>\a</H2>' ,
  455. 'title3' : '~A~<H3>\a</H3>' ,
  456. 'title4' : '~A~<H4>\a</H4>' ,
  457. 'title5' : '~A~<H5>\a</H5>' ,
  458. 'blockVerbOpen' : '<PRE>' ,
  459. 'blockVerbClose' : '</PRE>' ,
  460. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  461. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  462. 'fontMonoOpen' : '<CODE>' ,
  463. 'fontMonoClose' : '</CODE>' ,
  464. 'fontBoldOpen' : '<B>' ,
  465. 'fontBoldClose' : '</B>' ,
  466. 'fontItalicOpen' : '<I>' ,
  467. 'fontItalicClose' : '</I>' ,
  468. 'fontUnderlineOpen' : '<U>' ,
  469. 'fontUnderlineClose' : '</U>' ,
  470. 'listOpen' : '<UL>' ,
  471. 'listClose' : '</UL>' ,
  472. 'listItemOpen' : '<LI>' ,
  473. 'numlistOpen' : '<OL>' ,
  474. 'numlistClose' : '</OL>' ,
  475. 'numlistItemOpen' : '<LI>' ,
  476. 'deflistOpen' : '<DL>' ,
  477. 'deflistClose' : '</DL>' ,
  478. 'deflistItem1Open' : '<DT>' ,
  479. 'deflistItem1Close' : '</DT>' ,
  480. 'deflistItem2Open' : '<DD>' ,
  481. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  482. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  483. 'url' : '<A HREF="\a">\a</A>' ,
  484. 'urlMark' : '<A HREF="\a">\a</A>' ,
  485. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  486. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  487. 'img' :'<IMG ALIGN="~A~" SRC="\a" BORDER="0" ALT="">',
  488. 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>',
  489. 'tableClose' : '</TABLE>' ,
  490. 'tableRowOpen' : '<TR>' ,
  491. 'tableRowClose' : '</TR>' ,
  492. 'tableCellOpen' : '<TD\a>' ,
  493. 'tableCellClose' : '</TD>' ,
  494. 'tableTitleCellOpen' : '<TH>' ,
  495. 'tableTitleCellClose' : '</TH>' ,
  496. 'tableBorder' : ' BORDER="1"' ,
  497. 'tableAlignCenter' : ' ALIGN="center"',
  498. 'tableCellAlignRight' : ' ALIGN="right"' ,
  499. 'tableCellAlignCenter': ' ALIGN="center"',
  500. 'anchor' : '<A NAME="\a"></A>\n',
  501. 'tocOpenCss' : '<DIV CLASS="toc" ID="toc">',
  502. 'tocCloseCss' : '</DIV>',
  503. 'bodyOpenCss' : '<DIV CLASS="body" ID="body">',
  504. 'bodyCloseCss' : '</DIV>',
  505. 'comment' : '<!-- \a -->' ,
  506. 'EOD' : '</BODY></HTML>'
  507. },
  508. #TIP xhtml inherits all HTML definitions (lowercased)
  509. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  510. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  511. 'xhtml': {
  512. 'listItemClose' : '</li>' ,
  513. 'numlistItemClose' : '</li>' ,
  514. 'deflistItem2Close' : '</dd>' ,
  515. 'bar1' : '<hr class="light" />',
  516. 'bar2' : '<hr class="heavy" />',
  517. 'anchor' : '<a id="\a" name="\a"></a>\n',
  518. 'img' :'<img align="~A~" src="\a" border="0" alt=""/>',
  519. },
  520. 'sgml': {
  521. 'paragraphOpen' : '<p>' ,
  522. 'title1' : '<sect>\a~A~<p>' ,
  523. 'title2' : '<sect1>\a~A~<p>' ,
  524. 'title3' : '<sect2>\a~A~<p>' ,
  525. 'title4' : '<sect3>\a~A~<p>' ,
  526. 'title5' : '<sect4>\a~A~<p>' ,
  527. 'blockVerbOpen' : '<tscreen><verb>' ,
  528. 'blockVerbClose' : '</verb></tscreen>' ,
  529. 'blockQuoteOpen' : '<quote>' ,
  530. 'blockQuoteClose' : '</quote>' ,
  531. 'fontMonoOpen' : '<tt>' ,
  532. 'fontMonoClose' : '</tt>' ,
  533. 'fontBoldOpen' : '<bf>' ,
  534. 'fontBoldClose' : '</bf>' ,
  535. 'fontItalicOpen' : '<em>' ,
  536. 'fontItalicClose' : '</em>' ,
  537. 'fontUnderlineOpen' : '<bf><em>' ,
  538. 'fontUnderlineClose' : '</em></bf>' ,
  539. 'listOpen' : '<itemize>' ,
  540. 'listClose' : '</itemize>' ,
  541. 'listItemOpen' : '<item>' ,
  542. 'numlistOpen' : '<enum>' ,
  543. 'numlistClose' : '</enum>' ,
  544. 'numlistItemOpen' : '<item>' ,
  545. 'deflistOpen' : '<descrip>' ,
  546. 'deflistClose' : '</descrip>' ,
  547. 'deflistItem1Open' : '<tag>' ,
  548. 'deflistItem1Close' : '</tag>' ,
  549. 'bar1' : '<!-- \a -->' ,
  550. 'bar2' : '<!-- \a -->' ,
  551. 'url' : '<htmlurl url="\a" name="\a">' ,
  552. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  553. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  554. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  555. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  556. '</figure>' ,
  557. 'tableOpen' : '<table><tabular ca="~C~">' ,
  558. 'tableClose' : '</tabular></table>' ,
  559. 'tableRowSep' : '<rowsep>' ,
  560. 'tableCellSep' : '<colsep>' ,
  561. 'tableColAlignLeft' : 'l' ,
  562. 'tableColAlignRight' : 'r' ,
  563. 'tableColAlignCenter' : 'c' ,
  564. 'comment' : '<!-- \a -->' ,
  565. 'anchor' : '<label id="\a">' ,
  566. 'TOC' : '<toc>' ,
  567. 'EOD' : '</article>'
  568. },
  569. 'tex': {
  570. 'title1' : '\n\section*{\a}',
  571. 'title2' : '\\subsection*{\a}' ,
  572. 'title3' : '\\subsubsection*{\a}' ,
  573. # title 4/5: DIRTY: para+BF+\\+\n
  574. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  575. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  576. 'numtitle1' : '\n\section{\a}',
  577. 'numtitle2' : '\\subsection{\a}' ,
  578. 'numtitle3' : '\\subsubsection{\a}' ,
  579. 'blockVerbOpen' : '\\begin{verbatim}' ,
  580. 'blockVerbClose' : '\\end{verbatim}' ,
  581. 'blockQuoteOpen' : '\\begin{quotation}' ,
  582. 'blockQuoteClose' : '\\end{quotation}' ,
  583. 'fontMonoOpen' : '\\texttt{' ,
  584. 'fontMonoClose' : '}' ,
  585. 'fontBoldOpen' : '\\textbf{' ,
  586. 'fontBoldClose' : '}' ,
  587. 'fontItalicOpen' : '\\textit{' ,
  588. 'fontItalicClose' : '}' ,
  589. 'fontUnderlineOpen' : '\\underline{' ,
  590. 'fontUnderlineClose' : '}' ,
  591. 'listOpen' : '\\begin{itemize}' ,
  592. 'listClose' : '\\end{itemize}' ,
  593. 'listItemOpen' : '\\item ' ,
  594. 'numlistOpen' : '\\begin{enumerate}' ,
  595. 'numlistClose' : '\\end{enumerate}' ,
  596. 'numlistItemOpen' : '\\item ' ,
  597. 'deflistOpen' : '\\begin{description}',
  598. 'deflistClose' : '\\end{description}' ,
  599. 'deflistItem1Open' : '\\item[' ,
  600. 'deflistItem1Close' : ']' ,
  601. 'bar1' : '\n\\hrulefill{}\n' ,
  602. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  603. 'url' : '\\url{\a}' ,
  604. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  605. 'email' : '\\url{\a}' ,
  606. 'emailMark' : '\\textit{\a} (\\url{\a})' ,
  607. 'img' : '\\includegraphics{\a}',
  608. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  609. 'tableClose' : '\\end{tabular}\\end{center}',
  610. 'tableRowOpen' : '\\hline ' ,
  611. 'tableRowClose' : ' \\\\' ,
  612. 'tableCellSep' : ' & ' ,
  613. 'tableColAlignLeft' : 'l' ,
  614. 'tableColAlignRight' : 'r' ,
  615. 'tableColAlignCenter' : 'c' ,
  616. 'tableColAlignSep' : '|' ,
  617. 'comment' : '% \a' ,
  618. 'TOC' : '\\tableofcontents\\clearpage',
  619. 'EOD' : '\\end{document}'
  620. },
  621. 'moin': {
  622. 'title1' : '= \a =' ,
  623. 'title2' : '== \a ==' ,
  624. 'title3' : '=== \a ===' ,
  625. 'title4' : '==== \a ====' ,
  626. 'title5' : '===== \a =====',
  627. 'blockVerbOpen' : '{{{' ,
  628. 'blockVerbClose' : '}}}' ,
  629. 'blockQuoteLine' : ' ' ,
  630. 'fontMonoOpen' : '{{{' ,
  631. 'fontMonoClose' : '}}}' ,
  632. 'fontBoldOpen' : "'''" ,
  633. 'fontBoldClose' : "'''" ,
  634. 'fontItalicOpen' : "''" ,
  635. 'fontItalicClose' : "''" ,
  636. 'fontUnderlineOpen' : "__" ,
  637. 'fontUnderlineClose' : "__" ,
  638. 'listItemOpen' : ' * ' ,
  639. 'numlistItemOpen' : ' \a. ' ,
  640. 'bar1' : '----' ,
  641. 'bar2' : '----' ,
  642. 'url' : '[\a]' ,
  643. 'urlMark' : '[\a \a]' ,
  644. 'email' : '[\a]' ,
  645. 'emailMark' : '[\a \a]' ,
  646. 'img' : '[\a]' ,
  647. 'tableRowOpen' : '||' ,
  648. 'tableCellOpen' : '\a' ,
  649. 'tableCellClose' : '||' ,
  650. 'tableTitleCellClose' : '||' ,
  651. 'tableCellAlignRight' : '<)>' ,
  652. 'tableCellAlignCenter': '<:>' ,
  653. 'comment' : '## \a' ,
  654. 'TOC' : '[[TableOfContents]]'
  655. },
  656. 'mgp': {
  657. 'paragraphOpen' : '%font "normal", size 5' ,
  658. 'title1' : '%page\n\n\a\n' ,
  659. 'title2' : '%page\n\n\a\n' ,
  660. 'title3' : '%page\n\n\a\n' ,
  661. 'title4' : '%page\n\n\a\n' ,
  662. 'title5' : '%page\n\n\a\n' ,
  663. 'blockVerbOpen' : '%font "mono"' ,
  664. 'blockVerbClose' : '%font "normal"' ,
  665. 'blockQuoteOpen' : '%prefix " "' ,
  666. 'blockQuoteClose' : '%prefix " "' ,
  667. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  668. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  669. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  670. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  671. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  672. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  673. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  674. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  675. 'listItemLine' : '\t' ,
  676. 'numlistItemLine' : '\t' ,
  677. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  678. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  679. 'bar1' : '%bar "white" 5' ,
  680. 'bar2' : '%pause' ,
  681. 'url' : '\n%cont, fore "cyan"\n\a' +\
  682. '\n%cont, fore "white"\n' ,
  683. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  684. '\n%cont, fore "white"\n' ,
  685. 'email' : '\n%cont, fore "cyan"\n\a' +\
  686. '\n%cont, fore "white"\n' ,
  687. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  688. '\n%cont, fore "white"\n' ,
  689. 'img' : '\n%~A~\n%newimage "\a"\n%left\n',
  690. 'comment' : '%% \a' ,
  691. 'tocOpen' : '%page\n\n\n' ,
  692. 'EOD' : '%%EOD'
  693. },
  694. # man groff_man ; man 7 groff
  695. 'man': {
  696. 'paragraphOpen' : '.P' ,
  697. 'title1' : '.SH \a' ,
  698. 'title2' : '.SS \a' ,
  699. 'title3' : '.SS \a' ,
  700. 'title4' : '.SS \a' ,
  701. 'title5' : '.SS \a' ,
  702. 'blockVerbOpen' : '.nf' ,
  703. 'blockVerbClose' : '.fi\n' ,
  704. 'blockQuoteOpen' : '.RS' ,
  705. 'blockQuoteClose' : '.RE' ,
  706. 'fontBoldOpen' : '\\fB' ,
  707. 'fontBoldClose' : '\\fR' ,
  708. 'fontItalicOpen' : '\\fI' ,
  709. 'fontItalicClose' : '\\fR' ,
  710. 'listOpen' : '.RS' ,
  711. 'listItemOpen' : '.IP \(bu 3\n',
  712. 'listClose' : '.RE' ,
  713. 'numlistOpen' : '.RS' ,
  714. 'numlistItemOpen' : '.IP \a. 3\n',
  715. 'numlistClose' : '.RE' ,
  716. 'deflistItem1Open' : '.TP\n' ,
  717. 'bar1' : '\n\n' ,
  718. 'bar2' : '\n\n' ,
  719. 'url' : '\a' ,
  720. 'urlMark' : '\a (\a)',
  721. 'email' : '\a' ,
  722. 'emailMark' : '\a (\a)',
  723. 'img' : '\a' ,
  724. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  725. 'tableClose' : '.TE' ,
  726. 'tableRowOpen' : ' ' ,
  727. 'tableCellSep' : '^' ,
  728. 'tableAlignCenter' : 'center, ',
  729. 'tableBorder' : 'allbox, ',
  730. 'tableColAlignLeft' : 'l' ,
  731. 'tableColAlignRight' : 'r' ,
  732. 'tableColAlignCenter' : 'c' ,
  733. 'comment' : '.\\" \a'
  734. },
  735. 'pm6': {
  736. 'paragraphOpen' : '<@Normal:>' ,
  737. 'title1' : '\n<@Title1:>\a',
  738. 'title2' : '\n<@Title2:>\a',
  739. 'title3' : '\n<@Title3:>\a',
  740. 'title4' : '\n<@Title4:>\a',
  741. 'title5' : '\n<@Title5:>\a',
  742. 'blockVerbOpen' : '<@PreFormat:>' ,
  743. 'blockQuoteLine' : '<@Quote:>' ,
  744. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  745. 'fontMonoClose' : '<SIZE$><FONT$>',
  746. 'fontBoldOpen' : '<B>' ,
  747. 'fontBoldClose' : '<P>' ,
  748. 'fontItalicOpen' : '<I>' ,
  749. 'fontItalicClose' : '<P>' ,
  750. 'fontUnderlineOpen' : '<U>' ,
  751. 'fontUnderlineClose' : '<P>' ,
  752. 'listOpen' : '<@Bullet:>' ,
  753. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  754. 'numlistOpen' : '<@Bullet:>' ,
  755. 'numlistItemOpen' : '\x95\t' ,
  756. 'bar1' : '\a' ,
  757. 'bar2' : '\a' ,
  758. 'url' : '<U>\a<P>' , # underline
  759. 'urlMark' : '\a <U>\a<P>' ,
  760. 'email' : '\a' ,
  761. 'emailMark' : '\a \a' ,
  762. 'img' : '\a'
  763. }
  764. }
  765. # make the HTML -> XHTML inheritance
  766. xhtml = alltags['html'].copy()
  767. for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
  768. # some like HTML tags as lowercase, some don't... (headers out)
  769. if HTML_LOWER: alltags['html'] = xhtml.copy()
  770. xhtml.update(alltags['xhtml'])
  771. alltags['xhtml'] = xhtml.copy()
  772. # compose the target tags dictionary
  773. tags = {}
  774. target_tags = alltags[target].copy()
  775. for key in keys: tags[key] = '' # create empty keys
  776. for key in target_tags.keys():
  777. tags[key] = maskEscapeChar(target_tags[key]) # populate
  778. return tags
  779. ##############################################################################
  780. def getRules(target):
  781. "Returns all the target-specific syntax rules"
  782. ret = {}
  783. allrules = [
  784. # target rules (ON/OFF)
  785. 'linkable', # target supports external links
  786. 'tableable', # target supports tables
  787. 'imglinkable', # target supports images as links
  788. 'imgalignable', # target supports image alignment
  789. 'imgasdefterm', # target supports image as definition term
  790. 'autonumberlist', # target supports numbered lists natively
  791. 'autonumbertitle', # target supports numbered titles natively
  792. 'parainsidelist', # lists items supports paragraph
  793. 'spacedlistitem', # lists support blank lines between items
  794. 'listnotnested', # lists cannot be nested
  795. 'quotenotnested', # quotes cannot be nested
  796. 'verbblocknotescaped', # don't escape specials in verb block
  797. 'verbblockfinalescape', # do final escapes in verb block
  798. 'escapeurl', # escape special in link URL
  799. 'onelinepara', # dump paragraph as a single long line
  800. 'tabletitlerowinbold', # manually bold any cell on table titles
  801. 'tablecellstrip', # strip extra spaces from each table cell
  802. 'barinsidequote', # bars are allowed inside quote blocks
  803. 'finalescapetitle', # perform final escapes on title lines
  804. # target code beautify (ON/OFF)
  805. 'indentverbblock', # add leading spaces to verb block lines
  806. 'breaktablecell', # break lines after any table cell
  807. 'breaktablelineopen', # break line after opening table line
  808. 'notbreaklistopen', # don't break line after opening a new list
  809. 'notbreakparaopen', # don't break line after opening a new para
  810. 'keepquoteindent', # don't remove the leading TABs on quotes
  811. 'keeplistindent', # don't remove the leading spaces on lists
  812. 'blankendmotherlist', # append a blank line at the mother list end
  813. 'blankendtable', # append a blank line at the table end
  814. 'tagnotindentable', # tags must be placed at the line begining
  815. # value settings
  816. 'listmaxdepth', # maximum depth for lists
  817. 'tablecellaligntype' # type of table cell align: cell, column
  818. ]
  819. rules_bank = {
  820. 'txt' : {
  821. 'indentverbblock':1,
  822. 'spacedlistitem':1,
  823. 'parainsidelist':1,
  824. 'keeplistindent':1,
  825. 'barinsidequote':1,
  826. 'blankendmotherlist':1
  827. },
  828. 'html': {
  829. 'indentverbblock':1,
  830. 'linkable':1,
  831. 'escapeurl':1,
  832. 'imglinkable':1,
  833. 'imgalignable':1,
  834. 'imgasdefterm':1,
  835. 'autonumberlist':1,
  836. 'spacedlistitem':1,
  837. 'parainsidelist':1,
  838. 'blankendmotherlist':1,
  839. 'tableable':1,
  840. 'tablecellstrip':1,
  841. 'blankendtable':1,
  842. 'breaktablecell':1,
  843. 'breaktablelineopen':1,
  844. 'keeplistindent':1,
  845. 'keepquoteindent':1,
  846. 'barinsidequote':1,
  847. 'tablecellaligntype':'cell'
  848. },
  849. #TIP xhtml inherits all HTML rules
  850. 'xhtml': {
  851. },
  852. 'sgml': {
  853. 'linkable':1,
  854. 'escapeurl':1,
  855. 'autonumberlist':1,
  856. 'spacedlistitem':1,
  857. 'blankendmotherlist':1,
  858. 'tableable':1,
  859. 'tablecellstrip':1,
  860. 'blankendtable':1,
  861. 'quotenotnested':1,
  862. 'keeplistindent':1,
  863. 'keepquoteindent':1,
  864. 'barinsidequote':1,
  865. 'finalescapetitle':1,
  866. 'tablecellaligntype':'column'
  867. },
  868. 'mgp' : {
  869. 'blankendmotherlist':1,
  870. 'tagnotindentable':1,
  871. 'spacedlistitem':1,
  872. 'imgalignable':1,
  873. },
  874. 'tex' : {
  875. 'autonumberlist':1,
  876. 'autonumbertitle':1,
  877. 'spacedlistitem':1,
  878. 'blankendmotherlist':1,
  879. 'tableable':1,
  880. 'tablecellstrip':1,
  881. 'tabletitlerowinbold':1,
  882. 'blankendtable':1,
  883. 'verbblocknotescaped':1,
  884. 'keeplistindent':1,
  885. 'listmaxdepth':4,
  886. 'barinsidequote':1,
  887. 'finalescapetitle':1,
  888. 'tablecellaligntype':'column'
  889. },
  890. 'moin': {
  891. 'spacedlistitem':1,
  892. 'linkable':1,
  893. 'blankendmotherlist':1,
  894. 'keeplistindent':1,
  895. 'tableable':1,
  896. 'barinsidequote':1,
  897. 'blankendtable':1,
  898. 'tabletitlerowinbold':1,
  899. 'tablecellstrip':1,
  900. 'tablecellaligntype':'cell'
  901. },
  902. 'man' : {
  903. 'spacedlistitem':1,
  904. 'indentverbblock':1,
  905. 'blankendmotherlist':1,
  906. 'tagnotindentable':1,
  907. 'tableable':1,
  908. 'tablecellaligntype':'column',
  909. 'tabletitlerowinbold':1,
  910. 'tablecellstrip':1,
  911. 'blankendtable':1,
  912. 'keeplistindent':0,
  913. 'barinsidequote':1,
  914. 'parainsidelist':0,
  915. },
  916. 'pm6' : {
  917. 'keeplistindent':1,
  918. 'verbblockfinalescape':1,
  919. #TODO add support for these - maybe set a JOINNEXT char and
  920. # do it on addLineBreaks()
  921. 'notbreaklistopen':1,
  922. 'notbreakparaopen':1,
  923. 'barinsidequote':1,
  924. 'onelinepara':1,
  925. }
  926. }
  927. # get the target specific rules
  928. if target == 'xhtml':
  929. myrules = rules_bank['html'].copy() # inheritance
  930. myrules.update(rules_bank['xhtml']) # get XHTML specific
  931. else:
  932. myrules = rules_bank[target].copy()
  933. # populate return dictionary
  934. for key in allrules: ret[key] = 0 # reset all
  935. ret.update(myrules) # get rules
  936. return ret
  937. ##############################################################################
  938. def getRegexes():
  939. "Returns all the regexes used to find the t2t marks"
  940. bank = {
  941. 'blockVerbOpen':
  942. re.compile(r'^```\s*$'),
  943. 'blockVerbClose':
  944. re.compile(r'^```\s*$'),
  945. 'blockRawOpen':
  946. re.compile(r'^"""\s*$'),
  947. 'blockRawClose':
  948. re.compile(r'^"""\s*$'),
  949. 'quote':
  950. re.compile(r'^\t+'),
  951. '1lineVerb':
  952. re.compile(r'^``` (?=.)'),
  953. '1lineRaw':
  954. re.compile(r'^""" (?=.)'),
  955. # mono, raw, bold, italic, underline:
  956. # - marks must be glued with the contents, no boundary spaces
  957. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  958. 'fontMono':
  959. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  960. 'raw':
  961. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  962. 'fontBold':
  963. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  964. 'fontItalic':
  965. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  966. 'fontUnderline':
  967. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  968. 'list':
  969. re.compile(r'^( *)(-) (?=[^ ])'),
  970. 'numlist':
  971. re.compile(r'^( *)(\+) (?=[^ ])'),
  972. 'deflist':
  973. re.compile(r'^( *)(:) (.*)$'),
  974. 'bar':
  975. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  976. 'table':
  977. re.compile(r'^ *\|\|? '),
  978. 'blankline':
  979. re.compile(r'^\s*$'),
  980. 'comment':
  981. re.compile(r'^%'),
  982. # auxiliar tag regexes
  983. '_imgAlign' : re.compile(r'~A~',re.I),
  984. '_tableAlign' : re.compile(r'~A~',re.I),
  985. '_anchor' : re.compile(r'~A~',re.I),
  986. '_tableBorder' : re.compile(r'~B~',re.I),
  987. '_tableColAlign': re.compile(r'~C~',re.I),
  988. }
  989. # special char to place data on TAGs contents (\a == bell)
  990. bank['x'] = re.compile('\a')
  991. # %%date [ (formatting) ]
  992. bank['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  993. # almost complicated title regexes ;)
  994. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>\w*)\])?\s*$'
  995. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  996. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  997. ### complicated regexes begin here ;)
  998. #
  999. # textual descriptions on --help's style: [...] is optional, | is OR
  1000. ### first, some auxiliar variables
  1001. #
  1002. # [image.EXT]
  1003. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1004. # link things
  1005. urlskel = {
  1006. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1007. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1008. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1009. 'pass' : r'[^ @]*', # for ftp://login:password@dom.com
  1010. 'chars' : r'A-Za-z0-9%._/~:,=$@&-',# %20(space), :80(port), D&D
  1011. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1012. 'form' : r'A-Za-z0-9/%&=+.,@*_-', # .,@*_-(as is)
  1013. 'punct' : r'.,;:!?'
  1014. }
  1015. # username [ :password ] @
  1016. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1017. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1018. # [ #anchor | ?form=data ]
  1019. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  1020. urlskel['proto'],patt_url_login, urlskel['guess'],
  1021. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1022. # filename | [ filename ] #anchor
  1023. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  1024. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1025. # user@domain [ ?form=data ]
  1026. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1027. urlskel['login'],urlskel['form'])
  1028. # saving for future use
  1029. bank['_urlskel'] = urlskel
  1030. ### and now the real regexes
  1031. #
  1032. bank['email'] = re.compile(patt_email,re.I)
  1033. # email | url
  1034. bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1035. # \[ label | imagetag url | email | filename \]
  1036. bank['linkmark'] = re.compile(
  1037. r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1038. patt_img, retxt_url, patt_email, retxt_url_local),
  1039. re.L+re.I)
  1040. # image
  1041. bank['img'] = re.compile(patt_img, re.L+re.I)
  1042. # all macros
  1043. bank['macro'] = bank['date']
  1044. # special things
  1045. bank['special'] = re.compile(r'^%!\s*')
  1046. return bank
  1047. ### END OF regex nightmares
  1048. ##############################################################################
  1049. def echo(msg): # for quick debug
  1050. print '\033[32;1m%s\033[m'%msg
  1051. def Quit(msg, exitcode=0):
  1052. print msg
  1053. sys.exit(exitcode)
  1054. def Error(msg):
  1055. sys.stderr.write(_("%s: Error: ")%my_name + "%s\n"%msg)
  1056. sys.stderr.flush()
  1057. sys.exit(1)
  1058. def ShowTraceback():
  1059. try:
  1060. from traceback import print_exc
  1061. print_exc() ; print ; print
  1062. except: pass
  1063. def Message(msg,level):
  1064. if level <= VERBOSE:
  1065. prefix = '-'*5
  1066. print "%s %s"%(prefix*level, msg)
  1067. def Debug(msg,color=0,linenr=None):
  1068. "0gray=init,1red=conf,3yellow=line,6cyan=block,2green=detail,5pink=gui"
  1069. if not DEBUG: return
  1070. if COLOR_DEBUG: msg = '\033[3%s;1m%s\033[m'%(color,msg)
  1071. if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
  1072. print "** %s"%msg
  1073. def Readfile(file, remove_linebreaks=0):
  1074. if file == '-':
  1075. try: data = sys.stdin.readlines()
  1076. except: Error(_('You must feed me with data on STDIN!'))
  1077. else:
  1078. try: f = open(file); data = f.readlines() ; f.close()
  1079. except: Error(_("Cannot read file:")+"\n %s"%file)
  1080. if remove_linebreaks:
  1081. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  1082. Message(_("Readed file (%d lines): %s")%(len(data),file),2)
  1083. return data
  1084. def Savefile(file, contents):
  1085. try: f = open(file, 'wb')
  1086. except: Error(_("Cannot open file for writing:")+"\n %s"%file)
  1087. if type(contents) == type([]): doit = f.writelines
  1088. else: doit = f.write
  1089. doit(contents) ; f.close()
  1090. def showdic(dic):
  1091. for k in dic.keys(): print "%15s : %s" % (k,dic[k])
  1092. def dotted_spaces(txt=''):
  1093. return string.replace(txt,' ','.')
  1094. def get_rc_path():
  1095. rc_file = RC
  1096. # try to get rc dir name (usually $HOME on win and linux)
  1097. rc_dir = os.environ.get('HOME')
  1098. if rc_dir:
  1099. # compose path and return it if the file exists
  1100. rc_path = os.path.join(rc_dir, rc_file)
  1101. if os.path.isfile(rc_path):
  1102. return rc_path
  1103. return ''
  1104. ##############################################################################
  1105. class CommandLine:
  1106. """Command Line class - Masters command line
  1107. This class checks and extract data from the provided command line.
  1108. The --long options and flags are taken from the global OPTIONS,
  1109. FLAGS and ACTIONS dictionaries. The short options are registered
  1110. here, and also their equivalence to the long ones.
  1111. METHODS:
  1112. _compose_short_opts() -> str
  1113. _compose_long_opts() -> list
  1114. Compose the valid short and long options list, on the
  1115. 'getopt' format.
  1116. parse() -> (opts, args)
  1117. Call getopt to check and parse the command line.
  1118. It expects to receive the command line as a list, and
  1119. without the program name (sys.argv[1:]).
  1120. get_raw_config() -> [RAW config]
  1121. Scans command line and convert the data to the RAW config
  1122. format. See ConfigMaster class to the RAW format description.
  1123. Optional 'ignore' and 'filter' arguments are used to filter
  1124. in or out specified keys.
  1125. compose_cmdline(dict) -> [Command line]
  1126. Compose a command line list from an already parsed config
  1127. dictionary, generated from RAW by ConfigMaster(). Use
  1128. this to compose an optimal command line for a group of
  1129. options.
  1130. The get_raw_config() calls parse(), so the tipical use of this
  1131. class is:
  1132. raw = CommandLine().get_raw_config(sys.argv[1:])
  1133. """
  1134. def __init__(self):
  1135. self.all_options = OPTIONS.keys()
  1136. self.all_flags = FLAGS.keys()
  1137. self.all_actions = ACTIONS.keys()
  1138. # short:long options equivalence
  1139. self.short_long = {
  1140. 'h':'help' , 'V':'version',
  1141. 'n':'enum-title', 'i':'infile' ,
  1142. 'H':'no-headers', 'o':'outfile',
  1143. 'v':'verbose' , 't':'target'
  1144. }
  1145. # compose valid short and long options data for getopt
  1146. self.short_opts = self._compose_short_opts()
  1147. self.long_opts = self._compose_long_opts()
  1148. def _compose_short_opts(self):
  1149. "Returns a string like 'hVt:o' with all short options/flags"
  1150. ret = []
  1151. for opt in self.short_long.keys():
  1152. long = self.short_long[opt]
  1153. if long in self.all_options: # is flag or option?
  1154. opt = opt+':' # option: have param
  1155. ret.append(opt)
  1156. Debug('Valid SHORT options: %s'%ret)
  1157. return string.join(ret, '')
  1158. def _compose_long_opts(self):
  1159. "Returns a list with all the valid long options/flags"
  1160. ret = map(lambda x:x+'=', self.all_options) # add =
  1161. ret.extend(self.all_flags) # flag ON
  1162. ret.extend(self.all_actions) # acts
  1163. ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
  1164. ret.extend(['no-style']) # turn OFF option
  1165. ret.extend(['no-encoding']) # turn OFF option
  1166. ret.extend(['no-outfile']) # turn OFF option
  1167. Debug('Valid LONG options: %s'%ret)
  1168. return ret
  1169. def _tokenize(self, cmd_string=''):
  1170. "Convert a command line string to a list"
  1171. #TODO protect quotes contents
  1172. return string.split(cmd_string)
  1173. def parse(self, cmdline=[]):
  1174. "Check/Parse a command line list TIP: no program name!"
  1175. # get the valid options
  1176. short, long = self.short_opts, self.long_opts
  1177. # parse it!
  1178. try:
  1179. opts, args = getopt.getopt(cmdline, short, long)
  1180. except getopt.error, errmsg:
  1181. Error(_("%s (try --help)")%errmsg)
  1182. return (opts, args)
  1183. def get_raw_config(self, cmdline=[], ignore=[], filter=[]):
  1184. "Returns the options/arguments found as RAW config"
  1185. if not cmdline: return []
  1186. ret = []
  1187. # we need lists, not strings
  1188. if type(cmdline) == type(''): cmdline = self._tokenize(cmdline)
  1189. Debug("cmdline: %s"%cmdline)
  1190. opts, args = self.parse(cmdline[:])
  1191. # get infile, if any
  1192. while args:
  1193. infile = args.pop(0)
  1194. ret.append(['infile', infile])
  1195. # parse all options
  1196. for name,value in opts:
  1197. # remove leading - and --
  1198. name = re.sub('^--?', '', name)
  1199. # translate short opt to long
  1200. if len(name) == 1: name = self.short_long.get(name)
  1201. # save it (if allowed)
  1202. ret.append([name, value])
  1203. # apply 'ignore' and 'filter' rules (filter is stronger)
  1204. temp = ret[:] ; ret = []
  1205. for name,value in temp:
  1206. if (not filter and not ignore) or \
  1207. (filter and name in filter) or \
  1208. (ignore and name not in ignore):
  1209. ret.append( ['all', name, value] )
  1210. # add the original command line string as 'realcmdline'
  1211. ret.append( ['all', 'realcmdline', cmdline] )
  1212. return ret
  1213. def compose_cmdline(self, conf={}, no_check=0):
  1214. "compose a full (and diet) command line from CONF dict"
  1215. if not conf: return []
  1216. args = []
  1217. dft_options = OPTIONS.copy()
  1218. cfg = conf.copy()
  1219. valid_opts = self.all_options + self.all_flags
  1220. use_short = {'no-headers':'H', 'enum-title':'n'}
  1221. # remove useless options
  1222. if not no_check and cfg.get('toc-only'):
  1223. if cfg.has_key('no-headers'):
  1224. del cfg['no-headers']
  1225. if cfg.has_key('outfile'):
  1226. del cfg['outfile'] # defaults to STDOUT
  1227. if cfg.get('target') == 'txt':
  1228. del cfg['target'] # already default
  1229. args.append('--toc-only') # must be the first
  1230. del cfg['toc-only']
  1231. # add target type
  1232. if cfg.has_key('target'):
  1233. args.append('-t '+cfg['target'])
  1234. del cfg['target']
  1235. # add other options
  1236. for key in cfg.keys():
  1237. if key not in valid_opts: continue # may be a %!setting
  1238. if key in ['outfile','infile']: continue # later
  1239. val = cfg[key]
  1240. if not val: continue
  1241. # default values are useless on cmdline
  1242. if val == dft_options.get(key): continue
  1243. # -short format
  1244. if key in use_short.keys():
  1245. args.append('-'+use_short[key])
  1246. continue
  1247. # --long format
  1248. if key in self.all_flags: # add --option
  1249. args.append('--'+key)
  1250. else: # add --option=value
  1251. args.append('--%s=%s'%(key,val))
  1252. # the outfile using -o
  1253. if cfg.has_key('outfile') and \
  1254. cfg['outfile'] != dft_options.get('outfile'):
  1255. args.append('-o '+cfg['outfile'])
  1256. # place input file(s) always at the end
  1257. if cfg.has_key('infile'):
  1258. args.append(string.join(cfg['infile'],' '))
  1259. # return as a nice list
  1260. Debug("Diet command line: %s"%string.join(args,' '), 1)
  1261. return args
  1262. ##############################################################################
  1263. class SourceDocument:
  1264. """
  1265. SourceDocument class - scan document structure, extract data
  1266. It knows about full files. It reads a file and identify all
  1267. the areas begining (Head,Conf,Body). With this info it can
  1268. extract each area contents.
  1269. Note: the original line break is removed.
  1270. DATA:
  1271. self.arearef - Save Head, Conf, Body init line number
  1272. self.areas - Store the area names which are not empty
  1273. self.buffer - The full file contents (with NO \\r, \\n)
  1274. METHODS:
  1275. get() - Access the contents of an Area. Example:
  1276. config = SourceDocument(file).get('conf')
  1277. split() - Get all the document Areas at once. Example:
  1278. head, conf, body = SourceDocument(file).split()
  1279. RULES:
  1280. * The document parts are sequential: Head, Conf and Body.
  1281. * One ends when the next begins.
  1282. * The Conf Area is optional, so a document can have just
  1283. Head and Body Areas.
  1284. These are the Areas limits:
  1285. - Head Area: the first three lines
  1286. - Body Area: from the first valid text line to the end
  1287. - Conf Area: the comments between Head and Body Areas
  1288. Exception: If the first line is blank, this means no
  1289. header info, so the Head Area is just the first line.
  1290. """
  1291. def __init__(self, filename=''):
  1292. self.areas = ['head','conf','body']
  1293. self.arearef = []
  1294. self.areas_fancy = ''
  1295. self.filename = filename
  1296. self.buffer = []
  1297. if filename: self.scan(filename)
  1298. def split(self):
  1299. "Returns all document parts, splitted into lists."
  1300. return self.get('head'), self.get('conf'), self.get('body')
  1301. def get(self, areaname):
  1302. "Returns head|conf|body contents from self.buffer"
  1303. # sanity
  1304. if areaname not in self.areas: return []
  1305. if not self.buffer : return []
  1306. # go get it
  1307. bufini = 1
  1308. bufend = len(self.buffer)
  1309. if areaname == 'head':
  1310. ini = bufini
  1311. end = self.arearef[1] or self.arearef[2] or bufend
  1312. elif areaname == 'conf':
  1313. ini = self.arearef[1]
  1314. end = self.arearef[2] or bufend
  1315. elif areaname == 'body':
  1316. ini = self.arearef[2]
  1317. end = bufend
  1318. else:
  1319. Error("Unknown Area name '%s'"%areaname)
  1320. lines = self.buffer[ini:end]
  1321. # make sure head will always have 3 lines
  1322. while areaname == 'head' and len(lines) < 3:
  1323. lines.append('')
  1324. return lines
  1325. def scan(self, filename):
  1326. "Run through source file and identify head/conf/body areas"
  1327. Debug("source file: %s"%filename)
  1328. Message(_("Loading source document"),1)
  1329. buf = Readfile(filename, remove_linebreaks=1)
  1330. cfg_parser = ConfigLines().parse_line
  1331. buf.insert(0, '') # text start at pos 1
  1332. ref = [1,4,0]
  1333. if not string.strip(buf[1]): # no header
  1334. ref[0] = 0 ; ref[1] = 2
  1335. for i in range(ref[1],len(buf)): # find body init:
  1336. if string.strip(buf[i]) and ( # ... not blank and
  1337. buf[i][0] != '%' or # ... not comment or
  1338. cfg_parser(buf[i],'include')[1]): # ... %!include
  1339. ref[2] = i ; break
  1340. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  1341. for i in 0,1,2: # del !existent
  1342. if ref[i] >= len(buf): ref[i] = 0 # title-only
  1343. if not ref[i]: self.areas[i] = ''
  1344. Debug('Head,Conf,Body start line: %s'%ref)
  1345. self.arearef = ref # save results
  1346. self.buffer = buf
  1347. # fancyness sample: head conf body (1 4 8)
  1348. self.areas_fancy = "%s (%s)"%(
  1349. string.join(self.areas),
  1350. string.join(map(str, map(lambda x:x or '', ref))))
  1351. Message(_("Areas found: %s")%self.areas_fancy, 2)
  1352. def get_raw_config(self):
  1353. "Handy method to get the CONF area RAW config (if any)"
  1354. if not self.areas.count('conf'): return []
  1355. Message(_("Scanning source document CONF area"),1)
  1356. raw = ConfigLines(
  1357. file=self.filename, lines=self.get('conf'),
  1358. first_line=self.arearef[1]).get_raw_config()
  1359. Debug("document raw config: %s"%raw, 1)
  1360. return raw
  1361. ##############################################################################
  1362. class ConfigMaster:
  1363. """ConfigMaster class - the configuration wizard
  1364. This class is the configuration master. It knows how to handle
  1365. the RAW and PARSED config format. It also performs the sanity
  1366. checkings for a given configuration.
  1367. DATA:
  1368. self.raw - Stores the config on the RAW format
  1369. self.parsed - Stores the config on the PARSED format
  1370. self.defaults - Stores the default values for all keys
  1371. self.off - Stores the OFF values for all keys
  1372. self.multi - List of keys which can have multiple values
  1373. self.numeric - List of keys which value must be a number
  1374. self.incremental - List of keys which are incremental
  1375. RAW FORMAT:
  1376. The RAW format is a list of lists, being each mother list item
  1377. a full configuration entry. Any entry is a 3 item list, on
  1378. the following format: [ TARGET, KEY, VALUE ]
  1379. Being a list, the order is preserved, so it's easy to use
  1380. different kinds of configs, as CONF area and command line,
  1381. respecting the precedence.
  1382. The special target 'all' is used when no specific target was
  1383. defined on the original config.
  1384. PARSED FORMAT:
  1385. The PARSED format is a dictionary, with all the 'key : value'
  1386. found by reading the RAW config. The self.target contents
  1387. matters, so this dictionary only contains the target's
  1388. config. The configs of other targets are ignored.
  1389. The CommandLine and ConfigLines classes have the get_raw_config()
  1390. method which convert the configuration found to the RAW format.
  1391. Just feed it to parse() and get a brand-new ready-to-use config
  1392. dictionary. Example:
  1393. >>> raw = CommandLine().get_raw_config(['-n', '-H'])
  1394. >>> print raw
  1395. [['all', 'enum-title', ''], ['all', 'no-headers', '']]
  1396. >>> parsed = ConfigMaster(raw).parse()
  1397. >>> print parsed
  1398. {'enum-title': 1, 'headers': 0}
  1399. """
  1400. def __init__(self, raw=[], target=''):
  1401. self.raw = raw
  1402. self.target = target
  1403. self.parsed = {}
  1404. self.dft_options = OPTIONS.copy()
  1405. self.dft_flags = FLAGS.copy()
  1406. self.dft_actions = ACTIONS.copy()
  1407. self.dft_settings = SETTINGS.copy()
  1408. self.defaults = self._get_defaults()
  1409. self.off = self._get_off()
  1410. self.multi = ['infile', 'options','preproc','postproc']
  1411. self.incremental = ['verbose']
  1412. self.numeric = ['toc-level','split']
  1413. def _get_defaults(self):
  1414. "Get the default values for all config/options/flags"
  1415. empty = {}
  1416. for kw in CONFIG_KEYWORDS: empty[kw] = ''
  1417. empty.update(self.dft_options)
  1418. empty.update(self.dft_flags)
  1419. empty.update(self.dft_actions)
  1420. empty.update(self.dft_settings)
  1421. empty['realcmdline'] = '' # internal use only
  1422. empty['sourcefile'] = '' # internal use only
  1423. return empty
  1424. def _get_off(self):
  1425. "Turns OFF all the config/options/flags"
  1426. off = {}
  1427. for key in self.defaults.keys():
  1428. kind = type(self.defaults[key])
  1429. if kind == type(9):
  1430. off[key] = 0
  1431. elif kind == type(''):
  1432. off[key] = ''
  1433. elif kind == type([]):
  1434. off[key] = []
  1435. else:
  1436. Error('ConfigMaster: %s: Unknown type'+key)
  1437. return off
  1438. def _check_target(self):
  1439. "Checks if the target is already defined. If not, do it"
  1440. if not self.target:
  1441. self.target = self.find_value('target')
  1442. def get_target_raw(self):
  1443. "Returns the raw config for self.target or 'all'"
  1444. ret = []
  1445. self._check_target()
  1446. for entry in self.raw:
  1447. if entry[0] in [self.target, 'all']:
  1448. ret.append(entry)
  1449. return ret
  1450. def add(self, key, val):
  1451. "Adds the key:value pair to the config dictionary (if needed)"
  1452. # %!options
  1453. if key == 'options':
  1454. ignoreme = self.dft_actions.keys() + ['target']
  1455. raw_opts = CommandLine().get_raw_config(
  1456. val, ignore=ignoreme)
  1457. for target, key, val in raw_opts:
  1458. self.add(key, val)
  1459. return
  1460. # the no- prefix turns OFF this key
  1461. if key[:3] == 'no-':
  1462. key = key[3:] # remove prefix
  1463. val = self.off.get(key) # turn key OFF
  1464. # is this key valid?
  1465. if key not in self.defaults.keys():
  1466. Debug('Bogus Config %s:%s'%(key,val),1)
  1467. return
  1468. # is this value the default one?
  1469. if val == self.defaults.get(key):
  1470. # if default value, remove previous key:val
  1471. if self.parsed.has_key(key):
  1472. del self.parsed[key]
  1473. # nothing more to do
  1474. return
  1475. # flags ON comes empty. we'll add the 1 value now
  1476. if val == '' and \
  1477. key in self.dft_flags.keys()+self.dft_actions.keys():
  1478. val = 1
  1479. # multi value or single?
  1480. if key in self.multi:
  1481. # first one? start new list
  1482. if not self.parsed.has_key(key):
  1483. self.parsed[key] = []
  1484. self.parsed[key].append(val)
  1485. # incremental value? so let's add it
  1486. elif key in self.incremental:
  1487. self.parsed[key] = (self.parsed.get(key) or 0) + val
  1488. else:
  1489. self.parsed[key] = val
  1490. fancykey = dotted_spaces("%12s"%key)
  1491. Message(_("Added config %s : %s")%(fancykey,val),3)
  1492. def get_outfile_name(self, config={}):
  1493. "Dirname is the same for {in,out}file"
  1494. infile, outfile = config['sourcefile'], config['outfile']
  1495. if infile == STDIN and not outfile: outfile = STDOUT
  1496. if not outfile and (infile and config.get('target')):
  1497. basename = re.sub('\.(txt|t2t)$','',infile)
  1498. outfile = "%s.%s"%(basename, config['target'])
  1499. Debug(" infile: '%s'"%infile , 1)
  1500. Debug("outfile: '%s'"%outfile, 1)
  1501. return outfile
  1502. def sanity(self, config, gui=0):
  1503. "Basic config sanity checkings"
  1504. if not config: return {}
  1505. target = config.get('target')
  1506. # --toc-only doesn't require target specification
  1507. if not target and config.get('toc-only'):
  1508. target = 'txt'
  1509. # on GUI, some checkings are skipped
  1510. if not gui:
  1511. # we *need* a target
  1512. if not target:
  1513. Error(_('No target specified (try --help)')+\
  1514. '\n\n'+\
  1515. _('Maybe trying to convert an old v1.x file?'))
  1516. # and of course, an infile also
  1517. if not config['infile']:
  1518. Error(_('Missing input file (try --help)'))
  1519. # is the target valid?
  1520. if not TARGETS.count(target):
  1521. Error(_("Invalid target '%s' (try --help)"
  1522. )%target)
  1523. # ensure all keys are present
  1524. empty = self.defaults.copy() ; empty.update(config)
  1525. config = empty.copy()
  1526. # check integers options
  1527. for key in config.keys():
  1528. if key in self.numeric:
  1529. try: config[key] = int(config[key])
  1530. except: Error(_('--%s value must be a number'
  1531. )%key)
  1532. # check split level value
  1533. if config['split'] not in [0,1,2]:
  1534. Error(_('Option --split must be 0, 1 or 2'))
  1535. # --toc-only is stronger than others
  1536. if config['toc-only']:
  1537. config['headers'] = 0
  1538. config['toc'] = 0
  1539. config['split'] = 0
  1540. config['gui'] = 0
  1541. config['outfile'] = STDOUT
  1542. # splitting is disable for now (future: HTML only, no STDOUT)
  1543. config['split'] = 0
  1544. # restore target
  1545. config['target'] = target
  1546. # set output file name
  1547. config['outfile'] = self.get_outfile_name(config)
  1548. # checking suicide
  1549. if config['sourcefile'] == config['outfile'] and \
  1550. config['outfile'] != STDOUT and not gui:
  1551. Error(_("Input and Output files are the same: %s")%(
  1552. config['outfile']))
  1553. return config
  1554. def parse(self):
  1555. "Returns the parsed config for the current target"
  1556. raw = self.get_target_raw()
  1557. for target, key, value in raw:
  1558. self.add(key, value)
  1559. Message(_("Added the following keys: %s")%string.join(
  1560. self.parsed.keys(),', '),2)
  1561. return self.parsed.copy()
  1562. def find_value(self, key='', target=''):
  1563. "Scans ALL raw config to find the desired key"
  1564. ret = []
  1565. # scan and save all values found
  1566. for targ, k, val in self.raw:
  1567. if targ in [target, 'all'] and k == key:
  1568. ret.append(val)
  1569. if not ret: return ''
  1570. # if not multi value, return only the last found
  1571. if key in self.multi: return ret
  1572. else : return ret[-1]
  1573. ########################################################################
  1574. class ConfigLines:
  1575. """ConfigLines class - the config file data extractor
  1576. This class reads and parse the config lines on the %!key:val
  1577. format, converting it to RAW config. It deals with user
  1578. config file (RC file), source document CONF area and
  1579. %!includeconf directives.
  1580. Call it passing a file name or feed the desired config lines.
  1581. Then just call the get_raw_config() method and wait to
  1582. receive the full config data on the RAW format. This method
  1583. also follows the possible %!includeconf directives found on
  1584. the config lines. Example:
  1585. raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
  1586. The parse_line() method is also useful to be used alone,
  1587. to identify and tokenize a single config line. For example,
  1588. to get the %!include command components, on the source
  1589. document BODY:
  1590. target, key, value = ConfigLines().parse_line(body_line)
  1591. """
  1592. def __init__(self, file='', lines=[], first_line=1):
  1593. self.file = file or 'NOFILE'
  1594. self.lines = lines
  1595. self.first_line = first_line
  1596. def load_lines(self):
  1597. "Make sure we've loaded the file contents into buffer"
  1598. if not self.lines and not self.file:
  1599. Error("ConfigLines: No file or lines provided")
  1600. if not self.lines:
  1601. self.lines = self.read_config_file(self.file)
  1602. def read_config_file(self, filename=''):
  1603. "Read a Config File contents, aborting on invalid line"
  1604. if not filename: return []
  1605. errormsg = _("Invalid CONFIG line on %s")+"\n%03d:%s"
  1606. lines = Readfile(filename, remove_linebreaks=1)
  1607. # sanity: try to find invalid config lines
  1608. for i in range(len(lines)):
  1609. line = string.rstrip(lines[i])
  1610. if not line: continue # empty
  1611. if line[0] != '%': Error(errormsg%(filename,i+1,line))
  1612. return lines
  1613. def include_config_file(self, file=''):
  1614. "Perform the %!includeconf action, returning RAW config"
  1615. if not file: return []
  1616. # current dir relative to the current file (self.file)
  1617. current_dir = os.path.dirname(self.file)
  1618. file = os.path.join(current_dir, file)
  1619. # read and parse included config file contents
  1620. lines = self.read_config_file(file)
  1621. return ConfigLines(file=file, lines=lines).get_raw_config()
  1622. def get_raw_config(self):
  1623. "Scan buffer and extract all config as RAW (including includes)"
  1624. ret = []
  1625. self.load_lines()
  1626. first = self.first_line
  1627. for i in range(len(self.lines)):
  1628. line = self.lines[i]
  1629. Message(_("Processing line %03d: %s")%(first+i,line),2)
  1630. target, key, val = self.parse_line(line)
  1631. if not key: continue # no config on this line
  1632. if key == 'includeconf':
  1633. more_raw = self.include_config_file(val)
  1634. ret.extend(more_raw)
  1635. Message(_("Finished Config file inclusion: %s"
  1636. )%(val),2)
  1637. else:
  1638. ret.append([target, key, val])
  1639. Message(_("Added %s")%key,3)
  1640. return ret
  1641. def parse_line(self, line='', keyname='', target=''):
  1642. "Detects %!key:val config lines and extract data from it"
  1643. empty = ['', '', '']
  1644. if not line: return empty
  1645. no_target = ['target', 'includeconf']
  1646. re_name = keyname or '[a-z]+'
  1647. re_target = target or '[a-z]*'
  1648. cfgregex = re.compile("""
  1649. ^%%!\s* # leading id with opt spaces
  1650. (?P<name>%s)\s* # config name
  1651. (\((?P<target>%s)\))? # optional target spec inside ()
  1652. \s*:\s* # key:value delimiter with opt spaces
  1653. (?P<value>\S.+?) # config value
  1654. \s*$ # rstrip() spaces and hit EOL
  1655. """%(re_name,re_target), re.I+re.VERBOSE)
  1656. prepostregex = re.compile("""
  1657. # ---[ PATTERN ]---
  1658. ^( "([^"]*)" # "double quoted" or
  1659. | '([^']*)' # 'single quoted' or
  1660. | ([^\s]+) # single_word
  1661. )
  1662. \s+ # separated by spaces
  1663. # ---[ REPLACE ]---
  1664. ( "([^"]*)" # "double quoted" or
  1665. | '([^']*)' # 'single quoted' or
  1666. | (.*) # anything
  1667. )
  1668. \s*$
  1669. """, re.VERBOSE)
  1670. guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens
  1671. match = cfgregex.match(line)
  1672. if not match: return empty
  1673. name = string.lower(match.group('name') or '')
  1674. target = string.lower(match.group('target') or 'all')
  1675. value = match.group('value')
  1676. # NO target keywords: force all targets
  1677. if name in no_target: target = 'all'
  1678. # special config for GUI colors
  1679. if name == 'guicolors':
  1680. valmatch = guicolors.search(value)
  1681. if not valmatch: return empty
  1682. value = re.split('\s+', value)
  1683. # Special config with two quoted values (%!preproc: "foo" 'bar')
  1684. if name in ['preproc','postproc']:
  1685. valmatch = prepostregex.search(value)
  1686. if not valmatch: return empty
  1687. getval = valmatch.group
  1688. patt = getval(2) or getval(3) or getval(4) or ''
  1689. repl = getval(6) or getval(7) or getval(8) or ''
  1690. value = (patt, repl)
  1691. return [target, name, value]
  1692. ##############################################################################
  1693. class MaskMaster:
  1694. "(Un)Protect important structures from escaping and formatting"
  1695. def __init__(self):
  1696. self.linkmask = '@@_link_@@'
  1697. self.monomask = '@@_mono_@@'
  1698. self.macromask = '@@_macro_@@'
  1699. self.rawmask = '@@_raw_@@'
  1700. self.reset()
  1701. def reset(self):
  1702. self.linkbank = []
  1703. self.monobank = []
  1704. self.macrobank = []
  1705. self.rawbank = []
  1706. def mask(self, line=''):
  1707. # protect raw text
  1708. while regex['raw'].search(line):
  1709. txt = regex['raw'].search(line).group(1)
  1710. txt = doEscape(TARGET,txt)
  1711. self.rawbank.append(txt)
  1712. line = regex['raw'].sub(self.rawmask,line,1)
  1713. # protect pre-formatted font text
  1714. while regex['fontMono'].search(line):
  1715. txt = regex['fontMono'].search(line).group(1)
  1716. txt = doEscape(TARGET,txt)
  1717. self.monobank.append(txt)
  1718. line = regex['fontMono'].sub(self.monomask,line,1)
  1719. # protect macros
  1720. while regex['macro'].search(line):
  1721. txt = regex['macro'].search(line).group()
  1722. self.macrobank.append(txt)
  1723. line = regex['macro'].sub(self.macromask,line,1)
  1724. # protect URLs and emails
  1725. while regex['linkmark'].search(line) or \
  1726. regex['link' ].search(line):
  1727. # try to match plain or named links
  1728. match_link = regex['link'].search(line)
  1729. match_named = regex['linkmark'].search(line)
  1730. # define the current match
  1731. if match_link and match_named:
  1732. # both types found, which is the first?
  1733. m = match_link
  1734. if match_named.start() < match_link.start():
  1735. m = match_named
  1736. else:
  1737. # just one type found, we're fine
  1738. m = match_link or match_named
  1739. # extract link data and apply mask
  1740. if m == match_link: # plain link
  1741. link = m.group()
  1742. label = ''
  1743. link_re = regex['link']
  1744. else: # named link
  1745. link = m.group('link')
  1746. label = string.rstrip(m.group('label'))
  1747. link_re = regex['linkmark']
  1748. line = link_re.sub(self.linkmask,line,1)
  1749. # save link data to the link bank
  1750. self.linkbank.append((label, link))
  1751. return line
  1752. def undo(self, line):
  1753. # url & email
  1754. for label,url in self.linkbank:
  1755. link = get_tagged_link(label, url)
  1756. line = string.replace(line, self.linkmask, link, 1)
  1757. # expand macros
  1758. for macro in self.macrobank:
  1759. line = string.replace(line, self.macromask, macro,1)
  1760. if self.macrobank:
  1761. line = doDateMacro(line)
  1762. # expand verb
  1763. for mono in self.monobank:
  1764. open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
  1765. tagged = open+mono+close
  1766. line = string.replace(line,self.monomask,tagged,1)
  1767. # expand raw
  1768. for raw in self.rawbank:
  1769. line = string.replace(line,self.rawmask,raw,1)
  1770. return line
  1771. ##############################################################################
  1772. class TitleMaster:
  1773. "Title things"
  1774. def __init__(self):
  1775. self.count = ['',0,0,0,0,0]
  1776. self.toc = []
  1777. self.level = 0
  1778. self.kind = ''
  1779. self.txt = ''
  1780. self.label = ''
  1781. self.tag = ''
  1782. self.count_id = ''
  1783. self.user_labels = {}
  1784. self.anchor_count = 0
  1785. self.anchor_prefix = 'toc'
  1786. def add(self, line):
  1787. "Parses a new title line."
  1788. if not line: return
  1789. self._set_prop(line)
  1790. self._set_count_id()
  1791. self._set_label()
  1792. self._save_toc_info()
  1793. def _save_toc_info(self):
  1794. "Save TOC info, used by self.dump_marked_toc()"
  1795. self.toc.append((self.level, self.count_id,
  1796. self.txt , self.label ))
  1797. def _set_prop(self, line=''):
  1798. "Extract info from original line and set data holders."
  1799. # detect title type (numbered or not)
  1800. id = string.lstrip(line)[0]
  1801. if id == '=': kind = 'title'
  1802. elif id == '+': kind = 'numtitle'
  1803. else: Error("Unknown Title ID '%s'"%id)
  1804. # extract line info
  1805. match = regex[kind].search(line)
  1806. level = len(match.group('id'))
  1807. txt = string.strip(match.group('txt'))
  1808. label = match.group('label')
  1809. # parse info & save
  1810. if CONF['enum-title']: kind = 'numtitle' # force
  1811. self.tag = TAGS[kind+`level`] or TAGS['title'+`level`]
  1812. self.kind = kind
  1813. self.level = level
  1814. self.txt = txt
  1815. self.label = label
  1816. def _set_count_id(self):
  1817. "Compose and save the title count identifier (if needed)."
  1818. count_id = ''
  1819. if self.kind == 'numtitle' and not rules['autonumbertitle']:
  1820. # manually increase title count
  1821. self.count[self.level] = self.count[self.level] +1
  1822. # reset sublevels count (if any)
  1823. max_levels = len(self.count)
  1824. if self.level < max_levels-1:
  1825. for i in range(self.level+1, max_levels):
  1826. self.count[i] = 0
  1827. # compose count id from hierarchy
  1828. for i in range(self.level):
  1829. count_id= "%s%d."%(count_id, self.count[i+1])
  1830. self.count_id = count_id
  1831. def _set_label(self):
  1832. "Compose and save title label, used by anchors."
  1833. # remove invalid chars from label set by user
  1834. self.label = re.sub('[^A-Za-z0-9_]', '', self.label or '')
  1835. # generate name as 15 first :alnum: chars
  1836. #TODO how to translate safely accented chars to plain?
  1837. #self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
  1838. # 'tocN' label - sequential count, ignoring 'toc-level'
  1839. #self.label = self.anchor_prefix + str(len(self.toc)+1)
  1840. def _get_tagged_anchor(self):
  1841. "Return anchor if user defined a label, or TOC is on."
  1842. ret = ''
  1843. label = self.label
  1844. if CONF['toc'] and self.level <= CONF['toc-level']:
  1845. # this count is needed bcos self.toc stores all
  1846. # titles, regardless of the 'toc-level' setting,
  1847. # so we can't use self.toc lenght to number anchors
  1848. self.anchor_count = self.anchor_count + 1
  1849. # autonumber label (if needed)
  1850. label = label or '%s%s'%(
  1851. self.anchor_prefix, self.anchor_count)
  1852. if label and TAGS['anchor']:
  1853. ret = regex['x'].sub(label,TAGS['anchor'])
  1854. return ret
  1855. def _get_full_title_text(self):
  1856. "Returns the full title contents, already escaped."
  1857. ret = self.txt
  1858. # insert count_id (if any) before text
  1859. if self.count_id:
  1860. ret = '%s %s'%(self.count_id, ret)
  1861. # escape specials
  1862. ret = doEscape(TARGET, ret)
  1863. # same targets needs final escapes on title lines
  1864. # it's here because there is a 'continue' after title
  1865. if rules['finalescapetitle']:
  1866. ret = doFinalEscape(TARGET, ret)
  1867. return ret
  1868. def get(self):
  1869. "Returns the tagged title as a list."
  1870. ret = []
  1871. # maybe some anchoring before?
  1872. anchor = self._get_tagged_anchor()
  1873. self.tag = regex['_anchor'].sub(anchor, self.tag)
  1874. ### compose & escape title text (TOC uses unescaped)
  1875. full_title = self._get_full_title_text()
  1876. # finish title, adding "underline" on TXT target
  1877. tagged = regex['x'].sub(full_title, self.tag)
  1878. if TARGET == 'txt':
  1879. ret.append('') # blank line before
  1880. ret.append(tagged)
  1881. ret.append(regex['x'].sub('='*len(full_title),self.tag))
  1882. ret.append('') # blank line after
  1883. else:
  1884. ret.append(tagged)
  1885. return ret
  1886. def dump_marked_toc(self, max_level=99):
  1887. "Dumps all toc itens as a valid t2t markup list"
  1888. #TODO maybe use quote+linebreaks instead lists
  1889. ret = []
  1890. toc_count = 1
  1891. for level, count_id, txt, label in self.toc:
  1892. if level > max_level: continue # ignore
  1893. indent = ' '*level
  1894. id_txt = string.lstrip('%s %s'%(count_id, txt))
  1895. label = label or self.anchor_prefix+`toc_count`
  1896. toc_count = toc_count + 1
  1897. # TOC will have links
  1898. if TAGS['anchor']:
  1899. # TOC is more readable with master topics
  1900. # not linked at number. This is a stoled
  1901. # idea from Windows .CHM help files
  1902. if CONF['enum-title'] and level == 1:
  1903. tocitem = '%s+ [""%s"" #%s]'%(
  1904. indent, txt, label)
  1905. else:
  1906. tocitem = '%s- [""%s"" #%s]'%(
  1907. indent, id_txt, label)
  1908. # no links on TOC, just text
  1909. else:
  1910. # man don't reformat TOC lines, cool!
  1911. if TARGET in ['txt', 'man']:
  1912. tocitem = '%s""%s""' %(
  1913. indent, id_txt)
  1914. else:
  1915. tocitem = '%s- ""%s""'%(
  1916. indent, id_txt)
  1917. ret.append(tocitem)
  1918. return ret
  1919. ##############################################################################
  1920. #TODO check all this table mess
  1921. # trata linhas TABLE, com as prop do parse_row
  1922. # o metodo table() do BLOCK xunxa e troca as celulas pelas parseadas
  1923. class TableMaster:
  1924. def __init__(self, line=''):
  1925. self.rows = []
  1926. self.border = 0
  1927. self.align = 'Left'
  1928. self.cellalign = []
  1929. if line:
  1930. prop = self.parse_row(line)
  1931. self.border = prop['border']
  1932. self.align = prop['align']
  1933. self.cellalign = prop['cellalign']
  1934. def _get_open_tag(self):
  1935. topen = TAGS['tableOpen']
  1936. tborder = TAGS['tableBorder']
  1937. talign = TAGS['tableAlign'+self.align]
  1938. calignsep = TAGS['tableColAlignSep']
  1939. calign = ''
  1940. # the first line defines if table has border or not
  1941. if not self.border: tborder = ''
  1942. # set the columns alignment
  1943. if rules['tablecellaligntype'] == 'column':
  1944. calign = map(lambda x: TAGS['tableColAlign%s'%x],
  1945. self.cellalign)
  1946. calign = string.join(calign, calignsep)
  1947. # align full table, set border and Column align (if any)
  1948. topen = regex['_tableAlign' ].sub(talign , topen)
  1949. topen = regex['_tableBorder' ].sub(tborder, topen)
  1950. topen = regex['_tableColAlign'].sub(calign , topen)
  1951. # tex table spec, border or not: {|l|c|r|} , {lcr}
  1952. if calignsep and not self.border:
  1953. # remove cell align separator
  1954. topen = string.replace(topen, calignsep, '')
  1955. return topen
  1956. def _get_cell_align(self, cells):
  1957. ret = []
  1958. for cell in cells:
  1959. align = 'Left'
  1960. if string.strip(cell):
  1961. if cell[0] == ' ' and cell[-1] == ' ':
  1962. align = 'Center'
  1963. elif cell[0] == ' ':
  1964. align = 'Right'
  1965. ret.append(align)
  1966. return ret
  1967. def _tag_cells(self, rowdata):
  1968. row = []
  1969. cells = rowdata['cells']
  1970. open = TAGS['tableCellOpen']
  1971. close = TAGS['tableCellClose']
  1972. sep = TAGS['tableCellSep']
  1973. calign = map(lambda x: TAGS['tableCellAlign'+x],
  1974. rowdata['cellalign'])
  1975. # maybe is it a title row?
  1976. if rowdata['title']:
  1977. open = TAGS['tableTitleCellOpen'] or open
  1978. close = TAGS['tableTitleCellClose'] or close
  1979. sep = TAGS['tableTitleCellSep'] or sep
  1980. # should we break the line on *each* table cell?
  1981. if rules['breaktablecell']: close = close+'\n'
  1982. # cells pre processing
  1983. if rules['tablecellstrip']:
  1984. cells = map(lambda x: string.strip(x), cells)
  1985. if rowdata['title'] and rules['tabletitlerowinbold']:
  1986. cells = map(lambda x: enclose_me('fontBold',x), cells)
  1987. # add cell BEGIN/END tags
  1988. for cell in cells:
  1989. # insert cell align into open tag (if cell is alignable)
  1990. if rules['tablecellaligntype'] == 'cell':
  1991. copen = string.replace(open,'\a',calign.pop(0))
  1992. else:
  1993. copen = open
  1994. row.append(copen + cell + close)
  1995. # maybe there are cell separators?
  1996. return string.join(row, sep)
  1997. def add_row(self, cells):
  1998. self.rows.append(cells)
  1999. def parse_row(self, line):
  2000. # default table proprierties
  2001. ret = {'border':0,'title':0,'align':'Left',
  2002. 'cells':[],'cellalign':[]}
  2003. # detect table align (and remove spaces mark)
  2004. if line[0] == ' ': ret['align'] = 'Center'
  2005. line = string.lstrip(line)
  2006. # detect title mark
  2007. if line[1] == '|': ret['title'] = 1
  2008. # delete trailing spaces after last cell border
  2009. line = re.sub('\|\s*$','|', line)
  2010. # detect (and delete) border mark (and leading space)
  2011. if line[-1] == '|': ret['border'] = 1 ; line = line[:-2]
  2012. # delete table mark
  2013. line = regex['table'].sub('', line)
  2014. # split cells
  2015. ret['cells'] = string.split(line, ' | ')
  2016. # find cells align
  2017. ret['cellalign'] = self._get_cell_align(ret['cells'])
  2018. Debug('Table Prop: %s' % ret, 2)
  2019. return ret
  2020. def dump(self):
  2021. open = self._get_open_tag()
  2022. rows = self.rows
  2023. close = TAGS['tableClose']
  2024. rowopen = TAGS['tableRowOpen']
  2025. rowclose = TAGS['tableRowClose']
  2026. rowsep = TAGS['tableRowSep']
  2027. titrowopen = TAGS['tableTitleRowOpen'] or rowopen
  2028. titrowclose = TAGS['tableTitleRowClose'] or rowclose
  2029. if rules['breaktablelineopen']:
  2030. rowopen = rowopen + '\n'
  2031. titrowopen = titrowopen + '\n'
  2032. # tex gotchas
  2033. if TARGET == 'tex':
  2034. if not self.border:
  2035. rowopen = titrowopen = ''
  2036. else:
  2037. close = rowopen + close
  2038. # now we tag all the table cells on each row
  2039. #tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
  2040. tagged_cells = []
  2041. for cell in rows: tagged_cells.append(self._tag_cells(cell))
  2042. # add row separator tags between lines
  2043. tagged_rows = []
  2044. if rowsep:
  2045. #!py15
  2046. #tagged_rows = map(lambda x:x+rowsep, tagged_cells)
  2047. for cell in tagged_cells:
  2048. tagged_rows.append(cell+rowsep)
  2049. # remove last rowsep, because the table is over
  2050. tagged_rows[-1] = string.replace(
  2051. tagged_rows[-1], rowsep, '')
  2052. # add row BEGIN/END tags for each line
  2053. else:
  2054. for rowdata in rows:
  2055. if rowdata['title']:
  2056. o,c = titrowopen, titrowclose
  2057. else:
  2058. o,c = rowopen, rowclose
  2059. row = tagged_cells.pop(0)
  2060. tagged_rows.append(o + row + c)
  2061. fulltable = [open] + tagged_rows + [close]
  2062. if rules['blankendtable']: fulltable.append('')
  2063. return fulltable
  2064. ##############################################################################
  2065. class BlockMaster:
  2066. "TIP: use blockin/out to add/del holders"
  2067. def __init__(self):
  2068. self.BLK = []
  2069. self.HLD = []
  2070. self.PRP = []
  2071. self.depth = 0
  2072. self.last = ''
  2073. self.tableparser = None
  2074. self.contains = {
  2075. 'para' :['passthru','raw'],
  2076. 'verb' :[],
  2077. 'table' :[],
  2078. 'raw' :[],
  2079. 'passthru':[],
  2080. 'quote' :['quote','passthru','raw'],
  2081. 'list' :['list' ,'numlist' ,'deflist','para','verb',
  2082. 'raw' ,'passthru'],
  2083. 'numlist' :['list' ,'numlist' ,'deflist','para','verb',
  2084. 'raw' ,'passthru'],
  2085. 'deflist' :['list' ,'numlist' ,'deflist','para','verb',
  2086. 'raw' ,'passthru']
  2087. }
  2088. self.allblocks = self.contains.keys()
  2089. def block(self):
  2090. if not self.BLK: return ''
  2091. return self.BLK[-1]
  2092. def isblock(self, name=''):
  2093. return self.block() == name
  2094. def prop(self, key):
  2095. if not self.PRP: return ''
  2096. return self.PRP[-1].get(key) or ''
  2097. def propset(self, key, val):
  2098. self.PRP[-1][key] = val
  2099. #Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
  2100. #Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
  2101. def hold(self):
  2102. if not self.HLD: return []
  2103. return self.HLD[-1]
  2104. def holdadd(self, line):
  2105. if self.block()[-4:] == 'list': line = [line]
  2106. self.HLD[-1].append(line)
  2107. Debug('HOLD add: %s'%repr(line), 5)
  2108. Debug('FULL HOLD: %s'%self.HLD, 2)
  2109. def holdaddsub(self, line):
  2110. self.HLD[-1][-1].append(line)
  2111. Debug('HOLD addsub: %s'%repr(line), 5)
  2112. Debug('FULL HOLD: %s'%self.HLD, 2)
  2113. def holdextend(self, lines):
  2114. if self.block()[-4:] == 'list': lines = [lines]
  2115. self.HLD[-1].extend(lines)
  2116. Debug('HOLD extend: %s'%repr(lines), 5)
  2117. Debug('FULL HOLD: %s'%self.HLD, 2)
  2118. def blockin(self, block):
  2119. ret = []
  2120. if block not in self.allblocks:
  2121. Error("Invalid block '%s'"%block)
  2122. # first, let's close other possible open blocks
  2123. while self.block() and block not in self.contains[self.block()]:
  2124. ret.extend(self.blockout())
  2125. # now we can gladly add this new one
  2126. self.BLK.append(block)
  2127. self.HLD.append([])
  2128. self.PRP.append({})
  2129. if block == 'table': self.tableparser = TableMaster()
  2130. # deeper and deeper
  2131. self.depth = len(self.BLK)
  2132. Debug('block ++ (%s): %s' % (block,self.BLK), 6)
  2133. return ret
  2134. def blockout(self):
  2135. if not self.BLK: Error('No block to pop')
  2136. self.last = self.BLK.pop()
  2137. tagged = getattr(self, self.last)()
  2138. parsed = self.HLD.pop()
  2139. self.PRP.pop()
  2140. self.depth = len(self.BLK)
  2141. if self.last == 'table': del self.tableparser
  2142. # inserting a nested block into mother
  2143. if self.block():
  2144. if self.block()[-4:] == 'list':
  2145. self.HLD[-1][-1].append(tagged)
  2146. else:
  2147. self.HLD[-1].append(tagged)
  2148. tagged = [] # reset. mother will have it all
  2149. Debug('block -- (%s): %s' % (self.last,self.BLK), 6)
  2150. Debug('RELEASED (%s): %s' % (self.last,parsed), 6)
  2151. if tagged: Debug('DUMPED: %s'%tagged, 2)
  2152. return tagged
  2153. def _last_escapes(self, line):
  2154. return doFinalEscape(TARGET, line)
  2155. def _get_escaped_hold(self):
  2156. ret = []
  2157. for line in self.hold():
  2158. linetype = type(line)
  2159. if linetype == type(''):
  2160. ret.append(self._last_escapes(line))
  2161. elif linetype == type([]):
  2162. ret.extend(line)
  2163. else:
  2164. Error("BlockMaster: Unknown HOLD item type:"
  2165. " %s"%linetype)
  2166. return ret
  2167. def _remove_twoblanks(self, lastitem):
  2168. if len(lastitem) > 1 and lastitem[-2:] == ['','']:
  2169. return lastitem[:-2]
  2170. return lastitem
  2171. def passthru(self):
  2172. return self.hold()
  2173. def raw(self):
  2174. lines = self.hold()
  2175. return map(lambda x: doEscape(TARGET, x), lines)
  2176. def para(self):
  2177. tagged = []
  2178. open = TAGS['paragraphOpen']
  2179. close = TAGS['paragraphClose']
  2180. lines = self._get_escaped_hold()
  2181. # open (or not) paragraph
  2182. if not open+close and self.last == 'para':
  2183. pass # avoids multiple blank lines
  2184. else:
  2185. tagged.append(open)
  2186. # pagemaker likes a paragraph as a single long line
  2187. if rules['onelinepara']:
  2188. tagged.append(string.join(lines,' '))
  2189. # others are normal :)
  2190. else:
  2191. tagged.extend(lines)
  2192. tagged.append(close)
  2193. # very very very very very very very very very UGLY fix
  2194. # needed because <center> can't appear inside <p>
  2195. try:
  2196. if len(lines) == 1 and \
  2197. TARGET in ('html', 'xhtml') and \
  2198. re.match('^\s*<center>.*</center>\s*$', lines[0]):
  2199. tagged = [lines[0]]
  2200. except: pass
  2201. return tagged
  2202. def verb(self):
  2203. "Verbatim lines are not masked, so there's no need to unmask"
  2204. tagged = []
  2205. tagged.append(TAGS['blockVerbOpen'])
  2206. for line in self.hold():
  2207. if not rules['verbblocknotescaped']:
  2208. line = doEscape(TARGET,line)
  2209. if rules['indentverbblock']:
  2210. line = ' '+line
  2211. if rules['verbblockfinalescape']:
  2212. line = doFinalEscape(TARGET, line)
  2213. tagged.append(line)
  2214. #TODO maybe use if not TAGS['blockVerbClose']
  2215. if TARGET != 'pm6':
  2216. tagged.append(TAGS['blockVerbClose'])
  2217. return tagged
  2218. def table(self):
  2219. # rewrite all table cells by the unmasked and escaped data
  2220. lines = self._get_escaped_hold()
  2221. for i in range(len(lines)):
  2222. cells = string.split(lines[i], SEPARATOR)
  2223. self.tableparser.rows[i]['cells'] = cells
  2224. return self.tableparser.dump()
  2225. def quote(self):
  2226. tagged = []
  2227. myre = regex['quote']
  2228. open = TAGS['blockQuoteOpen'] # block based
  2229. close = TAGS['blockQuoteClose']
  2230. qline = TAGS['blockQuoteLine'] # line based
  2231. indent = tagindent = '\t'*self.depth
  2232. if rules['tagnotindentable']: tagindent = ''
  2233. if not rules['keepquoteindent']: indent = ''
  2234. if open: tagged.append(tagindent+open) # open block
  2235. for item in self.hold():
  2236. if type(item) == type([]):
  2237. tagged.extend(item) # subquotes
  2238. else:
  2239. item = myre.sub('', item) # del TABs
  2240. if rules['barinsidequote']:
  2241. item = get_tagged_bar(item)
  2242. item = self._last_escapes(item)
  2243. item = qline*self.depth + item
  2244. tagged.append(indent+item) # quote line
  2245. if close: tagged.append(tagindent+close) # close block
  2246. return tagged
  2247. def deflist(self): return self.list('deflist')
  2248. def numlist(self): return self.list('numlist')
  2249. def list(self, name='list'):
  2250. tagged = []
  2251. items = self.hold()
  2252. indent = self.prop('indent')
  2253. tagindent = indent
  2254. listopen = TAGS.get(name+'Open')
  2255. listclose = TAGS.get(name+'Close')
  2256. listline = TAGS.get(name+'ItemLine')
  2257. itemcount = 0
  2258. if rules['tagnotindentable']: tagindent = ''
  2259. if not rules['keeplistindent']: indent = ''
  2260. if name == 'deflist':
  2261. itemopen = TAGS[name+'Item1Open']
  2262. itemclose = TAGS[name+'Item2Close']
  2263. itemsep = TAGS[name+'Item1Close']+\
  2264. TAGS[name+'Item2Open']
  2265. else:
  2266. itemopen = TAGS[name+'ItemOpen']
  2267. itemclose = TAGS[name+'ItemClose']
  2268. itemsep = ''
  2269. # ItemLine: number of leading chars identifies list depth
  2270. if listline:
  2271. itemopen = listline*self.depth
  2272. # dirty fix for mgp
  2273. if name == 'numlist': itemopen = itemopen + '\a. '
  2274. # remove two-blanks from list ending mark, to avoid <p>
  2275. items[-1] = self._remove_twoblanks(items[-1])
  2276. # open list (not nestable lists are only opened at mother)
  2277. if listopen and not \
  2278. (rules['listnotnested'] and BLOCK.depth != 1):
  2279. tagged.append(tagindent+listopen)
  2280. # tag each list item (multine items)
  2281. itemopenorig = itemopen
  2282. for item in items:
  2283. # add "manual" item count for noautonum targets
  2284. itemcount = itemcount + 1
  2285. if name == 'numlist' and not rules['autonumberlist']:
  2286. n = str(itemcount)
  2287. itemopen = regex['x'].sub(n, itemopenorig)
  2288. del n
  2289. item[0] = self._last_escapes(item[0])
  2290. if name == 'deflist':
  2291. term, rest = string.split(item[0],SEPARATOR,1)
  2292. item[0] = rest
  2293. if not item[0]: del item[0] # to avoid <p>
  2294. tagged.append(tagindent+itemopen+term+itemsep)
  2295. else:
  2296. fullitem = tagindent+itemopen
  2297. tagged.append(string.replace(
  2298. item[0], SEPARATOR, fullitem))
  2299. del item[0]
  2300. # process next lines for this item (if any)
  2301. for line in item:
  2302. if type(line) == type([]): # sublist inside
  2303. tagged.extend(line)
  2304. else:
  2305. line = self._last_escapes(line)
  2306. # blank lines turns to <p>
  2307. if not line and rules['parainsidelist']:
  2308. line = string.rstrip(indent +\
  2309. TAGS['paragraphOpen']+\
  2310. TAGS['paragraphClose'])
  2311. if not rules['keeplistindent']:
  2312. line = string.lstrip(line)
  2313. tagged.append(line)
  2314. # close item (if needed)
  2315. if itemclose: tagged.append(tagindent+itemclose)
  2316. # close list (not nestable lists are only closed at mother)
  2317. if listclose and not \
  2318. (rules['listnotnested'] and BLOCK.depth != 1):
  2319. tagged.append(tagindent+listclose)
  2320. if rules['blankendmotherlist'] and BLOCK.depth == 1:
  2321. tagged.append('')
  2322. return tagged
  2323. ##############################################################################
  2324. def dumpConfig(source_raw, parsed_config):
  2325. onoff = {1:_('ON'), 0:_('OFF')}
  2326. data = [
  2327. (_('RC file') , RC_RAW ),
  2328. (_('source document'), source_raw ),
  2329. (_('command line') , CMDLINE_RAW)
  2330. ]
  2331. # first show all RAW data found
  2332. for label, cfg in data:
  2333. print _('RAW config for %s')%label
  2334. for target,key,val in cfg:
  2335. target = '(%s)'%target
  2336. key = dotted_spaces("%-14s"%key)
  2337. val = val or _('ON')
  2338. print ' %-8s %s: %s'%(target,key,val)
  2339. print
  2340. # then the parsed results of all of them
  2341. print _('Full PARSED config')
  2342. keys = parsed_config.keys() ; keys.sort() # sorted
  2343. for key in keys:
  2344. val = parsed_config[key]
  2345. # filters are the last
  2346. if key in ['preproc', 'postproc']:
  2347. continue
  2348. # flag beautifier
  2349. if key in FLAGS.keys()+ACTIONS.keys():
  2350. val = onoff.get(val) or val
  2351. # list beautifier
  2352. if type(val) == type([]):
  2353. if key == 'options': sep = ' '
  2354. else : sep = ', '
  2355. val = string.join(val, sep)
  2356. print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
  2357. print
  2358. print _('Active filters')
  2359. for filter in ['preproc','postproc']:
  2360. for rule in parsed_config.get(filter) or []:
  2361. print "%25s: %s -> %s"%(
  2362. dotted_spaces("%-14s"%filter),rule[0],rule[1])
  2363. def get_file_body(file):
  2364. "Returns all the document BODY lines"
  2365. return process_source_file(file, noconf=1)[1][2]
  2366. def finish_him(outlist, config):
  2367. "Writing output to screen or file"
  2368. outfile = config['outfile']
  2369. outlist = unmaskEscapeChar(outlist)
  2370. # do PostProc
  2371. if config['postproc']:
  2372. postoutlist = []
  2373. errmsg = _('Invalid PostProc filter regex')
  2374. for line in outlist:
  2375. for patt,repl in config['postproc']:
  2376. try : line = re.sub(patt, repl, line)
  2377. except: Error("%s: '%s'"% (errmsg,patt))
  2378. postoutlist.append(line)
  2379. outlist = postoutlist[:]
  2380. if outfile == STDOUT:
  2381. if GUI:
  2382. return outlist, config
  2383. else:
  2384. for line in outlist: print line
  2385. else:
  2386. Savefile(outfile, addLineBreaks(outlist))
  2387. if not GUI: print _('%s wrote %s')%(my_name,outfile)
  2388. if config['split']:
  2389. print "--- html..."
  2390. sgml2html = 'sgml2html -s %s -l %s %s'%(
  2391. config['split'],config['lang'] or lang,outfile)
  2392. print "Running system command:", sgml2html
  2393. os.system(sgml2html)
  2394. def toc_maker(toc, config):
  2395. "Compose TOC list 'by hand'"
  2396. ret = []
  2397. # TOC is a tag, so there's nothing to do here
  2398. if TAGS['TOC'] and not config['toc-only']: return []
  2399. # TOC is a valid t2t marked text (list type), that is converted
  2400. if config['toc'] or config['toc-only']:
  2401. fakeconf = config.copy()
  2402. fakeconf['headers'] = 0
  2403. fakeconf['toc-only'] = 0
  2404. fakeconf['mask-email'] = 0
  2405. fakeconf['preproc'] = []
  2406. fakeconf['postproc'] = []
  2407. fakeconf['css-suggar'] = 0
  2408. ret,foo = convert(toc, fakeconf)
  2409. # TOC between bars (not for --toc-only)
  2410. if config['toc']:
  2411. if TAGS['tocOpenCss'] and config['css-suggar']:
  2412. ret = [TAGS['tocOpenCss']] +ret +[TAGS['tocCloseCss']]
  2413. else:
  2414. para = TAGS['paragraphOpen']+TAGS['paragraphClose']
  2415. tag = regex['x'].sub('-'*72,TAGS['bar1'])
  2416. tocbar = [para, tag, para]
  2417. ret = tocbar + ret + tocbar
  2418. open, close = TAGS['tocOpen'], TAGS['tocClose']
  2419. if open : ret = [open] + ret
  2420. if close: ret = ret + [close]
  2421. return ret
  2422. def doHeader(headers, config):
  2423. if not config['headers']: return []
  2424. if not headers: headers = ['','','']
  2425. target = config['target']
  2426. if not HEADER_TEMPLATE.has_key(target):
  2427. Error("doheader: Unknow target '%s'"%target)
  2428. if target in ['html','xhtml'] and config.get('css-suggar'):
  2429. template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
  2430. else:
  2431. template = string.split(HEADER_TEMPLATE[target], '\n')
  2432. head_data = {'STYLE':'', 'ENCODING':''}
  2433. for key in head_data.keys():
  2434. val = config.get(string.lower(key))
  2435. if key == 'ENCODING': val = get_encoding_string(val, target)
  2436. head_data[key] = val
  2437. # parse header contents
  2438. for i in 0,1,2:
  2439. contents = doDateMacro(headers[i]) # expand %%date
  2440. # Escapes - on tex, just do it if any \tag{} present
  2441. if target != 'tex' or \
  2442. (target == 'tex' and re.search(r'\\\w+{', contents)):
  2443. contents = doEscape(target, contents)
  2444. head_data['HEADER%d'%(i+1)] = contents
  2445. Debug("Header Data: %s"%head_data, 1)
  2446. # scan for empty dictionary keys
  2447. # if found, scan template lines for that key reference
  2448. # if found, remove the reference
  2449. # if there isn't any other key reference on the same line, remove it
  2450. for key in head_data.keys():
  2451. if head_data.get(key): continue
  2452. for line in template:
  2453. if string.count(line, '%%(%s)s'%key):
  2454. sline = string.replace(line, '%%(%s)s'%key, '')
  2455. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  2456. template.remove(line)
  2457. # populate template with data
  2458. template = string.join(template, '\n') % head_data
  2459. ### post processing
  2460. #
  2461. # let tex format today
  2462. # DISABLED: not a good idea have date format different on tex
  2463. #if target == 'tex' and head_data['HEADER3'] == currdate:
  2464. # template = re.sub(r'\\date\{.*?}', r'\date', template)
  2465. return string.split(template, '\n')
  2466. def doDateMacro(line):
  2467. re_date = getRegexes()['date']
  2468. while re_date.search(line):
  2469. m = re_date.search(line)
  2470. fmt = m.group('fmt') or ''
  2471. dateme = currdate
  2472. if fmt: dateme = strftime(fmt,localtime(time()))
  2473. line = re_date.sub(dateme,line,1)
  2474. return line
  2475. def doCommentLine(txt):
  2476. # the -- string ends a (h|sg|xht)ml comment :(
  2477. txt = maskEscapeChar(txt)
  2478. if string.count(TAGS['comment'], '--') and \
  2479. string.count(txt, '--'):
  2480. txt = re.sub('-(?=-)', r'-\\', txt)
  2481. if TAGS['comment']:
  2482. return regex['x'].sub(txt, TAGS['comment'])
  2483. return ''
  2484. def doFooter(config):
  2485. if not config['headers']: return []
  2486. ret = []
  2487. target = config['target']
  2488. cmdline = config['realcmdline']
  2489. typename = target
  2490. if target == 'tex': typename = 'LaTeX2e'
  2491. ppgd = '%s code generated by %s %s (%s)'%(
  2492. typename,my_name,my_version,my_url)
  2493. cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
  2494. ret.append('\n'+doCommentLine(ppgd))
  2495. ret.append(doCommentLine(cmdline))
  2496. ret.append(TAGS['EOD'])
  2497. return ret
  2498. def doEscape(target,txt):
  2499. "Target-specific special escapes. Apply *before* insert any tag."
  2500. if target in ['html','sgml','xhtml']:
  2501. txt = re.sub('&','&amp;',txt)
  2502. txt = re.sub('<','&lt;',txt)
  2503. txt = re.sub('>','&gt;',txt)
  2504. if target == 'sgml':
  2505. txt = re.sub('\xff','&yuml;',txt) # "+y
  2506. elif target == 'pm6':
  2507. txt = re.sub('<','<\#60>',txt)
  2508. elif target == 'mgp':
  2509. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  2510. elif target == 'man':
  2511. txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
  2512. txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
  2513. elif target == 'tex':
  2514. # mark literal \ to be changed to $\backslash$ later
  2515. txt = string.replace( txt, ESCCHAR, '@@LaTeX-escaping-SUX@@')
  2516. txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
  2517. txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
  2518. txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
  2519. txt = string.replace(txt, '@@LaTeX-escaping-SUX@@',
  2520. maskEscapeChar(r'$\backslash$'))
  2521. # TIP the _ is escaped at the end
  2522. return txt
  2523. # TODO man: where - really needs to be escaped?
  2524. def doFinalEscape(target, txt):
  2525. "Last escapes of each line"
  2526. if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
  2527. elif target == 'man' : txt = string.replace(txt, '-', r'\-')
  2528. elif target == 'tex' : txt = string.replace(txt, '_', r'\_')
  2529. elif target == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  2530. return txt
  2531. def EscapeCharHandler(action, data):
  2532. "Mask/Unmask the Escape Char on the given string"
  2533. if not string.strip(data): return data
  2534. if action not in ['mask','unmask']:
  2535. Error("EscapeCharHandler: Invalid action '%s'"%action)
  2536. if action == 'mask': return string.replace(data,'\\',ESCCHAR)
  2537. else: return string.replace(data,ESCCHAR,'\\')
  2538. def maskEscapeChar(data):
  2539. "Replace any Escape Char \ with a text mask (Input: str or list)"
  2540. if type(data) == type([]):
  2541. return map(lambda x: EscapeCharHandler('mask', x), data)
  2542. return EscapeCharHandler('mask',data)
  2543. def unmaskEscapeChar(data):
  2544. "Undo the Escape char \ masking (Input: str or list)"
  2545. if type(data) == type([]):
  2546. return map(lambda x: EscapeCharHandler('unmask', x), data)
  2547. return EscapeCharHandler('unmask',data)
  2548. def addLineBreaks(list):
  2549. "use LB to respect sys.platform"
  2550. ret = []
  2551. for line in list:
  2552. line = string.replace(line,'\n',LB) # embedded \n's
  2553. ret.append(line+LB) # add final line break
  2554. return ret
  2555. def enclose_me(tagname, txt):
  2556. return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
  2557. def beautify_me(name, line):
  2558. "where name is: bold, italic or underline"
  2559. name = 'font%s' % string.capitalize(name)
  2560. open = TAGS['%sOpen'%name]
  2561. close = TAGS['%sClose'%name]
  2562. txt = r'%s\1%s'%(open, close)
  2563. line = regex[name].sub(txt,line)
  2564. return line
  2565. def get_tagged_link(label, url):
  2566. ret = ''
  2567. target = CONF['target']
  2568. image_re = regex['img']
  2569. # set link type
  2570. if regex['email'].match(url):
  2571. linktype = 'email'
  2572. else:
  2573. linktype = 'url';
  2574. # escape specials from TEXT parts
  2575. label = doEscape(target,label)
  2576. # escape specials from link URL
  2577. if rules['linkable'] and rules['escapeurl']:
  2578. url = doEscape(target, url)
  2579. # if not linkable, the URL is plain text, that needs escape
  2580. if not rules['linkable']:
  2581. if target == 'tex':
  2582. url = re.sub('^#', '\#', url) # ugly, but compile
  2583. else:
  2584. url = doEscape(target,url)
  2585. # adding protocol to guessed link
  2586. guessurl = ''
  2587. if linktype == 'url' and \
  2588. re.match(regex['_urlskel']['guess'], url):
  2589. if url[0] == 'w': guessurl = 'http://' +url
  2590. else : guessurl = 'ftp://' +url
  2591. # not link aware targets -> protocol is useless
  2592. if not rules['linkable']: guessurl = ''
  2593. # simple link (not guessed)
  2594. if not label and not guessurl:
  2595. if CONF['mask-email'] and linktype == 'email':
  2596. # do the email mask feature (no TAGs, just text)
  2597. url = string.replace(url,'@',' (a) ')
  2598. url = string.replace(url,'.',' ')
  2599. url = "<%s>" % url
  2600. if rules['linkable']: url = doEscape(target, url)
  2601. ret = url
  2602. else:
  2603. # just add link data to tag
  2604. tag = TAGS[linktype]
  2605. ret = regex['x'].sub(url,tag)
  2606. # named link or guessed simple link
  2607. else:
  2608. # adjusts for guessed link
  2609. if not label: label = url # no protocol
  2610. if guessurl : url = guessurl # with protocol
  2611. # image inside link!
  2612. if image_re.match(label):
  2613. if rules['imglinkable']: # get image tag
  2614. label = parse_images(label)
  2615. else: # img@link !supported
  2616. label = "(%s)"%image_re.match(label).group(1)
  2617. # putting data on the right appearance order
  2618. if rules['linkable']:
  2619. urlorder = [url, label] # link before label
  2620. else:
  2621. urlorder = [label, url] # label before link
  2622. # add link data to tag (replace \a's)
  2623. ret = TAGS["%sMark"%linktype]
  2624. for data in urlorder:
  2625. ret = regex['x'].sub(data,ret,1)
  2626. return ret
  2627. def parse_deflist_term(line):
  2628. "Extract and parse definition list term contents"
  2629. img_re = regex['img']
  2630. term = regex['deflist'].search(line).group(3)
  2631. # mask image inside term as (image.jpg), where not supported
  2632. if not rules['imgasdefterm'] and img_re.search(term):
  2633. while img_re.search(term):
  2634. imgfile = img_re.search(term).group(1)
  2635. term = img_re.sub('(%s)'%imgfile, term, 1)
  2636. #TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
  2637. return term
  2638. def get_tagged_bar(line):
  2639. m = regex['bar'].search(line)
  2640. if not m: return line
  2641. txt = m.group(2)
  2642. # set bar type
  2643. if txt[0] == '=': bar = TAGS['bar2']
  2644. else : bar = TAGS['bar1']
  2645. # to avoid comment tag confusion like <!-- ------ -->
  2646. if string.count(TAGS['comment'], '--'):
  2647. txt = string.replace(txt,'--','__')
  2648. # tag line
  2649. return regex['x'].sub(txt, bar)
  2650. def get_image_align(line):
  2651. "Return the image (first found) align for the given line"
  2652. # first clear marks that can mess align detection
  2653. line = re.sub(SEPARATOR+'$', '', line) # remove deflist sep
  2654. line = re.sub('^'+SEPARATOR, '', line) # remove list sep
  2655. line = re.sub('^[\t]+' , '', line) # remove quote mark
  2656. # get image position on the line
  2657. m = regex['img'].search(line)
  2658. ini = m.start() ; head = 0
  2659. end = m.end() ; tail = len(line)
  2660. # the align detection algorithm
  2661. if ini == head and end != tail: align = 'left' # ^img + text$
  2662. elif ini != head and end == tail: align = 'right' # ^text + img$
  2663. else : align = 'middle' # default align
  2664. # some special cases
  2665. if BLOCK.isblock('table'): align = 'middle' # ignore when table
  2666. if TARGET == 'mgp' and align == 'middle': align = 'center'
  2667. return align
  2668. # reference: http://www.iana.org/assignments/character-sets
  2669. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  2670. def get_encoding_string(enc, target):
  2671. if not enc: return ''
  2672. # target specific translation table
  2673. translate = {
  2674. 'tex': {
  2675. # missing: ansinew , applemac , cp437 , cp437de , cp865
  2676. 'us-ascii' : 'ascii',
  2677. 'windows-1250': 'cp1250',
  2678. 'windows-1252': 'cp1252',
  2679. 'ibm850' : 'cp850',
  2680. 'ibm852' : 'cp852',
  2681. 'iso-8859-1' : 'latin1',
  2682. 'iso-8859-2' : 'latin2',
  2683. 'iso-8859-3' : 'latin3',
  2684. 'iso-8859-4' : 'latin4',
  2685. 'iso-8859-5' : 'latin5',
  2686. 'iso-8859-9' : 'latin9',
  2687. 'koi8-r' : 'koi8-r'
  2688. }
  2689. }
  2690. # normalization
  2691. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  2692. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  2693. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  2694. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  2695. # apply translation table
  2696. try: enc = translate[target][string.lower(enc)]
  2697. except: pass
  2698. return enc
  2699. ##############################################################################
  2700. ##MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove##
  2701. ##############################################################################
  2702. def process_source_file(file, noconf=0):
  2703. """
  2704. Find and Join all the configuration available for a source file.
  2705. No sanity checkings are done on this step.
  2706. It also extracts the source document parts into separate holders.
  2707. The config scan order is:
  2708. 1. The user configuration file (i.e. $HOME/.txt2tagsrc)
  2709. 2. The source document's CONF area
  2710. 3. The command line options
  2711. The return data is a tuple of two items:
  2712. 1. The parsed config dictionary
  2713. 2. The document's parts, as a (head, conf, body) tuple
  2714. All the convertion process will be based on the data and
  2715. configuration returned by this function.
  2716. The source files is readed on this step only.
  2717. """
  2718. source = SourceDocument(file)
  2719. head, conf, body = source.split()
  2720. Message(_("Source document contents stored"),2)
  2721. if not noconf:
  2722. # read document config
  2723. source_raw = source.get_raw_config()
  2724. # join all the config directives found, then parse it
  2725. full_raw = RC_RAW + source_raw + CMDLINE_RAW
  2726. Message(_("Parsing and saving all config found (%03d items)")%(
  2727. len(full_raw)),1)
  2728. full_parsed = ConfigMaster(full_raw).parse()
  2729. # add manually the filemane to the conf dic
  2730. full_parsed['sourcefile'] = file
  2731. # maybe should we dump the config found?
  2732. if full_parsed.get('dump-config'):
  2733. dumpConfig(source_raw, full_parsed)
  2734. sys.exit()
  2735. # okay, all done
  2736. Debug("FULL config for this file: %s"%full_parsed, 1)
  2737. else:
  2738. full_parsed = {}
  2739. return full_parsed, (head,conf,body)
  2740. def get_infiles_config(infiles):
  2741. """Find and Join into a single list, all configuration available
  2742. for each input file. This function is supposed to be the very
  2743. first one to be called, before any processing.
  2744. """
  2745. ret = []
  2746. if not infiles: return []
  2747. for infile in infiles:
  2748. ret.append((process_source_file(infile)))
  2749. return ret
  2750. def convert_this_files(configs):
  2751. global CONF
  2752. for myconf,doc in configs: # multifile support
  2753. target_head = []
  2754. target_toc = []
  2755. target_body = []
  2756. target_foot = []
  2757. source_head, source_conf, source_body = doc
  2758. myconf = ConfigMaster().sanity(myconf)
  2759. # compose the target file Headers
  2760. #TODO escape line before?
  2761. #TODO see exceptions by tex and mgp
  2762. Message(_("Composing target Headers"),1)
  2763. target_head = doHeader(source_head, myconf)
  2764. # parse the full marked body into tagged target
  2765. first_body_line = (len(source_head) or 1)+ len(source_conf) + 1
  2766. Message(_("Composing target Body"),1)
  2767. target_body, marked_toc = convert(source_body, myconf,
  2768. firstlinenr=first_body_line)
  2769. # make TOC (if needed)
  2770. Message(_("Composing target TOC"),1)
  2771. target_toc = toc_maker(marked_toc,myconf)
  2772. # compose the target file Footer
  2773. Message(_("Composing target Footer"),1)
  2774. target_foot = doFooter(myconf)
  2775. # finally, we have our document
  2776. outlist = target_head + target_toc + target_body + target_foot
  2777. # if on GUI, abort before finish_him
  2778. # else, write results to file or STDOUT
  2779. if GUI:
  2780. return outlist, myconf
  2781. else:
  2782. Message(_("Saving results to the output file"),1)
  2783. finish_him(outlist, myconf)
  2784. def parse_images(line):
  2785. "Tag all images found"
  2786. while regex['img'].search(line) and TAGS['img'] != '[\a]':
  2787. txt = regex['img'].search(line).group(1)
  2788. tag = TAGS['img']
  2789. # HTML, XHTML and mgp!
  2790. if rules['imgalignable']:
  2791. align = get_image_align(line)
  2792. # add align on tag
  2793. tag = regex['_imgAlign'].sub(align, tag, 1)
  2794. # dirty fix to allow centered solo images
  2795. if align == 'middle' and TARGET in ['html','xhtml']:
  2796. rest = regex['img'].sub('',line,1)
  2797. if re.match('^\s+$', rest):
  2798. tag = "<center>%s</center>" %tag
  2799. if TARGET == 'tex': tag = re.sub(r'\\b',r'\\\\b',tag)
  2800. line = regex['img'].sub(tag,line,1)
  2801. line = regex['x'].sub(txt,line,1)
  2802. return line
  2803. def add_inline_tags(line):
  2804. # beautifiers
  2805. for beauti in ['Bold', 'Italic', 'Underline']:
  2806. if regex['font%s'%beauti].search(line):
  2807. line = beautify_me(beauti, line)
  2808. line = parse_images(line)
  2809. return line
  2810. def get_include_contents(file, path=''):
  2811. "Parses %!include: value and extract file contents"
  2812. ids = {'`':'verb', '"':'raw', "'":'passthru' }
  2813. id = 't2t'
  2814. # set include type and remove identifier marks
  2815. mark = file[0]
  2816. if mark in ids.keys():
  2817. if file[:2] == file[-2:] == mark*2:
  2818. id = ids[mark] # set type
  2819. file = file[2:-2] # remove marks
  2820. # handle remote dir execution
  2821. filepath = os.path.join(path, file)
  2822. # read included file contents
  2823. lines = Readfile(filepath, remove_linebreaks=1)
  2824. # default txt2tags marked text, just BODY matters
  2825. if id == 't2t':
  2826. lines = get_file_body(filepath)
  2827. lines.insert(0, '%%INCLUDED(%s) starts here: %s'%(id,file))
  2828. lines.append('%%INCLUDED(%s) ends here: %s'%(id,file))
  2829. return id, lines
  2830. def convert(bodylines, config, firstlinenr=1):
  2831. # global vars for doClose*()
  2832. global TAGS, regex, rules, TARGET, BLOCK, CONF
  2833. CONF = config
  2834. target = CONF['target']
  2835. TAGS = getTags(target)
  2836. rules = getRules(target)
  2837. regex = getRegexes()
  2838. TARGET = target # save for buggy functions that need global
  2839. BLOCK = BlockMaster()
  2840. MASK = MaskMaster()
  2841. TITLE = TitleMaster()
  2842. ret = []
  2843. f_lastwasblank = 0
  2844. # if TOC is a header tag, add it
  2845. if CONF['toc'] and TAGS['TOC']:
  2846. ret.append(TAGS['TOC']+'\n')
  2847. # no forced indent for verbatim block when using CSS
  2848. if target in ('html','xhtml') and CONF['css-suggar']:
  2849. rules['indentverbblock'] = 0
  2850. # let's mark it up!
  2851. linenr = firstlinenr-1
  2852. lineref = 0
  2853. while lineref < len(bodylines):
  2854. # defaults
  2855. MASK.reset()
  2856. results_box = ''
  2857. untouchedline = bodylines[lineref]
  2858. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  2859. # apply PreProc rules
  2860. if CONF['preproc']:
  2861. errmsg = _('Invalid PreProc filter regex')
  2862. for patt,repl in CONF['preproc']:
  2863. try : line = re.sub(patt, repl, line)
  2864. except: Error("%s: '%s'"% (errmsg,patt))
  2865. line = maskEscapeChar(line) # protect \ char
  2866. linenr = linenr +1
  2867. lineref = lineref +1
  2868. Debug(repr(line), 3, linenr) # heavy debug: show each line
  2869. # any NOT table line (or comment), closes an open table
  2870. if ( BLOCK.isblock('table') or
  2871. ( BLOCK.isblock('verb') and
  2872. BLOCK.prop('mapped') == 'table'
  2873. )
  2874. ) \
  2875. and not regex['table'].search(line) \
  2876. and not regex['comment'].search(line):
  2877. ret.extend(BLOCK.blockout())
  2878. # any NOT quote line (or comment) closes all open quotes
  2879. if BLOCK.isblock('quote') \
  2880. and not regex['quote'].search(line) \
  2881. and not regex['comment'].search(line):
  2882. while BLOCK.isblock('quote'):
  2883. ret.extend(BLOCK.blockout())
  2884. #-------------------------[ Raw Text ]----------------------
  2885. # we're already on a raw block
  2886. if BLOCK.block() == 'raw':
  2887. # closing raw
  2888. if regex['blockRawClose'].search(line):
  2889. ret.extend(BLOCK.blockout())
  2890. continue
  2891. # normal raw-inside line
  2892. BLOCK.holdadd(line)
  2893. continue
  2894. # detecting raw block init
  2895. if regex['blockRawOpen'].search(line):
  2896. ret.extend(BLOCK.blockin('raw'))
  2897. continue
  2898. # one line verb-formatted text
  2899. if regex['1lineRaw'].search(line):
  2900. ret.extend(BLOCK.blockin('raw'))
  2901. line = regex['1lineRaw'].sub('',line)
  2902. BLOCK.holdadd(line)
  2903. ret.extend(BLOCK.blockout())
  2904. continue
  2905. #-----------------[ Verbatim (PRE-formatted) ]--------------
  2906. #TIP we'll never support beautifiers inside verbatim
  2907. # we're already on a verb block
  2908. if BLOCK.block() == 'verb':
  2909. # closing verb
  2910. if regex['blockVerbClose'].search(line):
  2911. ret.extend(BLOCK.blockout())
  2912. continue
  2913. # normal verb-inside line
  2914. BLOCK.holdadd(line)
  2915. continue
  2916. # detecting verb block init
  2917. if regex['blockVerbOpen'].search(line):
  2918. ret.extend(BLOCK.blockin('verb'))
  2919. f_lastwasblank = 0
  2920. continue
  2921. # one line verb-formatted text
  2922. if regex['1lineVerb'].search(line):
  2923. ret.extend(BLOCK.blockin('verb'))
  2924. line = regex['1lineVerb'].sub('',line)
  2925. BLOCK.holdadd(line)
  2926. ret.extend(BLOCK.blockout())
  2927. f_lastwasblank = 0
  2928. continue
  2929. # tables are mapped to verb when target is not table-aware
  2930. if not rules['tableable'] and regex['table'].search(line):
  2931. if not BLOCK.isblock('verb'):
  2932. ret.extend(BLOCK.blockin('verb'))
  2933. BLOCK.propset('mapped', 'table')
  2934. BLOCK.holdadd(line)
  2935. continue
  2936. #---------------------[ blank lines ]-----------------------
  2937. if regex['blankline'].search(line):
  2938. # close open paragraph
  2939. if BLOCK.isblock('para'):
  2940. ret.extend(BLOCK.blockout())
  2941. f_lastwasblank = 1
  2942. continue
  2943. # close all open quotes
  2944. while BLOCK.isblock('quote'):
  2945. ret.extend(BLOCK.blockout())
  2946. # closing all open lists
  2947. if f_lastwasblank: # 2nd consecutive blank
  2948. if BLOCK.block()[-4:] == 'list':
  2949. BLOCK.holdaddsub('') # helps parser
  2950. while BLOCK.depth: # closes list (if any)
  2951. ret.extend(BLOCK.blockout())
  2952. continue # ignore consecutive blanks
  2953. # paragraph (if any) is wanted inside lists also
  2954. if BLOCK.block()[-4:] == 'list':
  2955. BLOCK.holdaddsub('')
  2956. else:
  2957. # html: show blank line (needs tag)
  2958. if target in ['html','xhtml']:
  2959. ret.append(TAGS['paragraphOpen']+\
  2960. TAGS['paragraphClose'])
  2961. # otherwise we just show a blank line
  2962. else:
  2963. ret.append('')
  2964. f_lastwasblank = 1
  2965. continue
  2966. #---------------------[ special ]---------------------------
  2967. if regex['special'].search(line):
  2968. # include command
  2969. targ, key, val = ConfigLines().parse_line(
  2970. line, 'include', target)
  2971. if key:
  2972. Debug("Found config '%s', value '%s'"%(
  2973. key,val),1,linenr)
  2974. incpath = os.path.dirname(CONF['sourcefile'])
  2975. incfile = val
  2976. err = _('A file cannot include itself (loop!)')
  2977. if CONF['sourcefile'] == incfile:
  2978. Error("%s: %s"%(err,incfile))
  2979. inctype, inclines = get_include_contents(
  2980. incfile, incpath)
  2981. # verb, raw and passthru are easy
  2982. if inctype != 't2t':
  2983. ret.extend(BLOCK.blockin(inctype))
  2984. BLOCK.holdextend(inclines)
  2985. ret.extend(BLOCK.blockout())
  2986. else:
  2987. # insert include lines into body
  2988. #TODO del %!include command call
  2989. #TODO include maxdepth limit
  2990. bodylines = bodylines[:lineref] \
  2991. +inclines \
  2992. +bodylines[lineref:]
  2993. continue
  2994. else:
  2995. Debug('Bogus Special Line',1,linenr)
  2996. #---------------------[ comments ]--------------------------
  2997. # just skip them (if not macro or config)
  2998. if regex['comment'].search(line) and not \
  2999. regex['date'].match(line):
  3000. continue
  3001. # valid line, reset blank status
  3002. f_lastwasblank = 0
  3003. #---------------------[ Horizontal Bar ]--------------------
  3004. if regex['bar'].search(line):
  3005. # a bar closes a paragraph
  3006. if BLOCK.isblock('para'):
  3007. ret.extend(BLOCK.blockout())
  3008. # we need to close all opened quote blocks
  3009. # if bar isn't allowed inside or if not a quote line
  3010. if BLOCK.isblock('quote'):
  3011. if not rules['barinsidequote'] or \
  3012. not regex['quote'].search(line):
  3013. while BLOCK.isblock('quote'):
  3014. ret.extend(BLOCK.blockout())
  3015. # quote + bar: continue processing for quoting
  3016. if rules['barinsidequote'] and \
  3017. regex['quote'].search(line):
  3018. pass
  3019. # just quote: save tagged line and we're done
  3020. else:
  3021. line = get_tagged_bar(line)
  3022. if BLOCK.block()[-4:] == 'list':
  3023. BLOCK.holdaddsub(line)
  3024. elif BLOCK.block():
  3025. BLOCK.holdadd(line)
  3026. else:
  3027. ret.append(line)
  3028. continue
  3029. #---------------------[ Title ]-----------------------------
  3030. #TODO set next blank and set f_lastwasblank or f_lasttitle
  3031. if (regex['title'].search(line) or
  3032. regex['numtitle'].search(line)) and \
  3033. BLOCK.block()[-4:] != 'list':
  3034. # a title closes a paragraph
  3035. if BLOCK.isblock('para'):
  3036. ret.extend(BLOCK.blockout())
  3037. TITLE.add(line)
  3038. ret.extend(TITLE.get())
  3039. f_lastwasblank = 1
  3040. continue
  3041. #---------------------[ apply masks ]-----------------------
  3042. line = MASK.mask(line)
  3043. #XXX from here, only block-inside lines will pass
  3044. #---------------------[ Quote ]-----------------------------
  3045. if regex['quote'].search(line):
  3046. # store number of leading TABS
  3047. quotedepth = len(regex['quote'].search(line).group(0))
  3048. # SGML doesn't support nested quotes
  3049. if rules['quotenotnested']: quotedepth = 1
  3050. # new quote
  3051. if not BLOCK.isblock('quote'):
  3052. ret.extend(BLOCK.blockin('quote'))
  3053. # new subquotes
  3054. while BLOCK.depth < quotedepth:
  3055. BLOCK.blockin('quote')
  3056. # closing quotes
  3057. while quotedepth < BLOCK.depth:
  3058. ret.extend(BLOCK.blockout())
  3059. #---------------------[ Lists ]-----------------------------
  3060. if regex['list'].search(line) or \
  3061. regex['numlist'].search(line) or \
  3062. regex['deflist'].search(line):
  3063. listindent = BLOCK.prop('indent')
  3064. listids = string.join(LISTNAMES.keys(), '')
  3065. m = re.match('^( *)([%s]) '%listids, line)
  3066. listitemindent = m.group(1)
  3067. listtype = m.group(2)
  3068. listname = LISTNAMES[listtype]
  3069. results_box = BLOCK.holdadd
  3070. # del list ID (and separate term from definition)
  3071. if listname == 'deflist':
  3072. term = parse_deflist_term(line)
  3073. line = regex['deflist'].sub(term+SEPARATOR,line)
  3074. else:
  3075. line = regex[listname].sub(SEPARATOR,line)
  3076. # don't cross depth limit
  3077. maxdepth = rules['listmaxdepth']
  3078. if maxdepth and BLOCK.depth == maxdepth:
  3079. if len(listitemindent) > len(listindent):
  3080. listitemindent = listindent
  3081. # open mother list or sublist
  3082. if BLOCK.block()[-4:] != 'list' or \
  3083. len(listitemindent) > len(listindent):
  3084. ret.extend(BLOCK.blockin(listname))
  3085. BLOCK.propset('indent',listitemindent)
  3086. # closing sublists
  3087. while len(listitemindent) < len(BLOCK.prop('indent')):
  3088. ret.extend(BLOCK.blockout())
  3089. #---------------------[ Table ]-----------------------------
  3090. #TODO escape undesired format inside table
  3091. #TODO add pm6 target
  3092. if regex['table'].search(line):
  3093. if not BLOCK.isblock('table'): # first table line!
  3094. ret.extend(BLOCK.blockin('table'))
  3095. BLOCK.tableparser.__init__(line)
  3096. tablerow = TableMaster().parse_row(line)
  3097. BLOCK.tableparser.add_row(tablerow) # save config
  3098. # maintain line to unmask and inlines
  3099. line = string.join(tablerow['cells'], SEPARATOR)
  3100. #---------------------[ Paragraph ]-------------------------
  3101. if not BLOCK.block(): # new para!
  3102. ret.extend(BLOCK.blockin('para'))
  3103. ############################################################
  3104. ############################################################
  3105. ############################################################
  3106. #---------------------[ Final Parses ]----------------------
  3107. # the target-specific special char escapes for body lines
  3108. line = doEscape(target,line)
  3109. line = add_inline_tags(line)
  3110. line = MASK.undo(line)
  3111. #---------------------[ Hold or Return? ]-------------------
  3112. ### now we must choose here to put the parsed line
  3113. #
  3114. if not results_box:
  3115. # list item extra lines
  3116. if BLOCK.block()[-4:] == 'list':
  3117. results_box = BLOCK.holdaddsub
  3118. # other blocks
  3119. elif BLOCK.block():
  3120. results_box = BLOCK.holdadd
  3121. # no blocks
  3122. else:
  3123. line = doFinalEscape(target, line)
  3124. results_box = ret.append
  3125. results_box(line)
  3126. # EOF: close any open para/verb/lists/table/quotes
  3127. Debug('EOF',2)
  3128. while BLOCK.block():
  3129. ret.extend(BLOCK.blockout())
  3130. # if CSS, enclose body inside DIV
  3131. if TAGS['bodyOpenCss'] and config['css-suggar']:
  3132. ret.insert(0, TAGS['bodyOpenCss'])
  3133. ret.append(TAGS['bodyCloseCss'])
  3134. if CONF['toc-only']: ret = []
  3135. marked_toc = TITLE.dump_marked_toc(CONF['toc-level'])
  3136. return ret, marked_toc
  3137. ##############################################################################
  3138. ################################### GUI ######################################
  3139. ##############################################################################
  3140. #
  3141. # tk help: http://python.org/topics/tkinter/
  3142. # /usr/lib/python*/lib-tk/Tkinter.py
  3143. #
  3144. # grid table : row=0, column=0, columnspan=2, rowspan=2
  3145. # grid align : sticky='n,s,e,w' (North, South, East, West)
  3146. # pack place : side='top,bottom,right,left'
  3147. # pack fill : fill='x,y,both,none', expand=1
  3148. # pack align : anchor='n,s,e,w' (North, South, East, West)
  3149. # padding : padx=10, pady=10, ipadx=10, ipady=10 (internal)
  3150. # checkbox : offvalue is return if the _user_ deselected the box
  3151. # label align: justify=left,right,center
  3152. def load_GUI_resources():
  3153. "Load all extra modules and methods used by GUI"
  3154. global askopenfilename, showinfo, showwarning, showerror, Tkinter
  3155. from tkFileDialog import askopenfilename
  3156. from tkMessageBox import showinfo,showwarning,showerror
  3157. import Tkinter
  3158. class Gui:
  3159. "Graphical Tk Interface"
  3160. def __init__(self, conf={}):
  3161. self.root = Tkinter.Tk() # mother window, come to butthead
  3162. self.root.title(my_name) # window title bar text
  3163. self.window = self.root # variable "focus" for inclusion
  3164. self.row = 0 # row count for grid()
  3165. self.action_lenght = 150 # left column lenght (pixel)
  3166. self.frame_margin = 10 # frame margin size (pixel)
  3167. self.frame_border = 6 # frame border size (pixel)
  3168. # the default Gui colors, can be changed by %!guicolors
  3169. self.dft_gui_colors = ['blue','white','lightblue','black']
  3170. self.gui_colors = []
  3171. self.bg1 = self.fg1 = self.bg2 = self.fg2 = ''
  3172. # on Tk, vars need to be set/get using setvar()/get()
  3173. self.infile = self.setvar('')
  3174. self.target = self.setvar('')
  3175. self.target_name = self.setvar('')
  3176. # the checks appearance order
  3177. self.checks = [
  3178. 'headers','enum-title','toc','mask-email',
  3179. 'toc-only','stdout']
  3180. # creating variables for all checks
  3181. for check in self.checks:
  3182. setattr(self, 'f_'+check, self.setvar(''))
  3183. # load RC config
  3184. self.conf = {}
  3185. if conf: self.load_config(conf)
  3186. def load_config(self, conf):
  3187. self.conf = conf
  3188. self.gui_colors = conf.get('guicolors') or self.dft_gui_colors
  3189. self.bg1, self.fg1, self.bg2, self.fg2 = self.gui_colors
  3190. self.root.config(bd=15,bg=self.bg1)
  3191. ### config as dic for python 1.5 compat (**opts don't work :( )
  3192. def entry(self, **opts): return Tkinter.Entry(self.window, opts)
  3193. def label(self, txt='', bg=None, **opts):
  3194. opts.update({'text':txt,'bg':bg or self.bg1})
  3195. return Tkinter.Label(self.window, opts)
  3196. def button(self,name,cmd,**opts):
  3197. opts.update({'text':name,'command':cmd})
  3198. return Tkinter.Button(self.window, opts)
  3199. def check(self,name,checked=0,**opts):
  3200. bg, fg = self.bg2, self.fg2
  3201. opts.update({
  3202. 'text':name, 'onvalue':1, 'offvalue':0,
  3203. 'activeforeground':fg, 'fg':fg,
  3204. 'activebackground':bg, 'bg':bg,
  3205. 'highlightbackground':bg, 'anchor':'w'
  3206. })
  3207. chk = Tkinter.Checkbutton(self.window, opts)
  3208. if checked: chk.select()
  3209. chk.grid(columnspan=2, sticky='w', padx=0)
  3210. def menu(self,sel,items):
  3211. return apply(Tkinter.OptionMenu,(self.window,sel)+tuple(items))
  3212. # handy auxiliar functions
  3213. def action(self, txt):
  3214. self.label(txt, fg=self.fg1, bg=self.bg1,
  3215. wraplength=self.action_lenght).grid(column=0,row=self.row)
  3216. def frame_open(self):
  3217. self.window = Tkinter.Frame(self.root,bg=self.bg2,
  3218. borderwidth=self.frame_border)
  3219. def frame_close(self):
  3220. self.window.grid(column=1, row=self.row, sticky='w',
  3221. padx=self.frame_margin)
  3222. self.window = self.root
  3223. self.label('').grid()
  3224. self.row = self.row + 2 # update row count
  3225. def target_name2key(self):
  3226. name = self.target_name.get()
  3227. target = filter(lambda x: TARGET_NAMES[x] == name, TARGETS)
  3228. try : key = target[0]
  3229. except: key = ''
  3230. self.target = self.setvar(key)
  3231. def target_key2name(self):
  3232. key = self.target.get()
  3233. name = TARGET_NAMES.get(key) or key
  3234. self.target_name = self.setvar(name)
  3235. def exit(self): self.root.destroy()
  3236. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  3237. def askfile(self):
  3238. ftypes= [(_('txt2tags files'),('*.t2t','*.txt')),
  3239. (_('All files'),'*')]
  3240. newfile = askopenfilename(filetypes=ftypes)
  3241. if newfile:
  3242. self.infile.set(newfile)
  3243. newconf = process_source_file(newfile)[0]
  3244. newconf = ConfigMaster().sanity(newconf, gui=1)
  3245. # restate all checkboxes after file selection
  3246. #TODO how to make a refresh without killing it?
  3247. self.root.destroy()
  3248. self.__init__(newconf)
  3249. self.mainwindow()
  3250. def scrollwindow(self, txt='no text!', title=''):
  3251. # create components
  3252. win = Tkinter.Toplevel() ; win.title(title)
  3253. frame = Tkinter.Frame(win)
  3254. scroll = Tkinter.Scrollbar(frame)
  3255. text = Tkinter.Text(frame,yscrollcommand=scroll.set)
  3256. button = Tkinter.Button(win)
  3257. # config
  3258. text.insert(Tkinter.END, string.join(txt,'\n'))
  3259. scroll.config(command=text.yview)
  3260. button.config(text=_('Close'), command=win.destroy)
  3261. button.focus_set()
  3262. # packing
  3263. text.pack(side='left',fill='both')
  3264. scroll.pack(side='right',fill='y')
  3265. frame.pack()
  3266. button.pack(ipadx=30)
  3267. def runprogram(self):
  3268. global CMDLINE_RAW
  3269. # prepare
  3270. self.target_name2key()
  3271. infile, target = self.infile.get(), self.target.get()
  3272. # sanity
  3273. if not target:
  3274. showwarning(my_name,_("You must select a target type!"))
  3275. return
  3276. if not infile:
  3277. showwarning(my_name,
  3278. _("You must provide the source file location!"))
  3279. return
  3280. # compose cmdline
  3281. guiflags = []
  3282. real_cmdline_conf = ConfigMaster(CMDLINE_RAW).parse()
  3283. if real_cmdline_conf.has_key('infile'):
  3284. del real_cmdline_conf['infile']
  3285. if real_cmdline_conf.has_key('target'):
  3286. del real_cmdline_conf['target']
  3287. real_cmdline = CommandLine().compose_cmdline(real_cmdline_conf)
  3288. default_outfile = ConfigMaster().get_outfile_name(
  3289. {'sourcefile':infile, 'outfile':'', 'target':target})
  3290. for opt in self.checks:
  3291. val = int(getattr(self, 'f_%s'%opt).get() or "0")
  3292. if opt == 'stdout': opt = 'outfile'
  3293. on_config = self.conf.get(opt) or 0
  3294. on_cmdline = real_cmdline_conf.get(opt) or 0
  3295. if opt == 'outfile':
  3296. if on_config == STDOUT: on_config = 1
  3297. else: on_config = 0
  3298. if on_cmdline == STDOUT: on_cmdline = 1
  3299. else: on_cmdline = 0
  3300. if val != on_config or (
  3301. val == on_config == on_cmdline and
  3302. real_cmdline_conf.has_key(opt)):
  3303. if val:
  3304. # was not set, but user selected on GUI
  3305. Debug("user turned ON: %s"%opt)
  3306. if opt == 'outfile': opt = '-o-'
  3307. else: opt = '--%s'%opt
  3308. else:
  3309. # was set, but user deselected on GUI
  3310. Debug("user turned OFF: %s"%opt)
  3311. if opt == 'outfile':
  3312. opt = "-o%s"%default_outfile
  3313. else: opt = '--no-%s'%opt
  3314. guiflags.append(opt)
  3315. cmdline = [my_name, '-t', target] +real_cmdline \
  3316. +guiflags +[infile]
  3317. Debug('Gui/Tk cmdline: %s'%cmdline,5)
  3318. # run!
  3319. cmdline_raw_orig = CMDLINE_RAW
  3320. try:
  3321. # fake the GUI cmdline as the real one, and parse file
  3322. CMDLINE_RAW = CommandLine().get_raw_config(cmdline[1:])
  3323. data = process_source_file(infile)
  3324. # on GUI, convert_* returns the data, not finish_him()
  3325. outlist, config = convert_this_files([data])
  3326. # on GUI and STDOUT, finish_him() returns the data
  3327. result = finish_him(outlist, config)
  3328. # show outlist in s a nice new window
  3329. if result:
  3330. outlist, config = result
  3331. title = _('%s: %s converted to %s')%(
  3332. my_name, os.path.basename(infile),
  3333. string.upper(config['target']))
  3334. self.scrollwindow(outlist, title)
  3335. # show the "file saved" message
  3336. else:
  3337. msg = "%s\n\n %s\n%s\n\n %s\n%s"%(
  3338. _('Conversion done!'),
  3339. _('FROM:'), infile,
  3340. _('TO:'), config['outfile'])
  3341. showinfo(my_name, msg)
  3342. except ZeroDivisionError: # common error, not quit
  3343. pass
  3344. except: # fatal error
  3345. ShowTraceback()
  3346. print _('Sorry! txt2tags-Tk Fatal Error.')
  3347. errmsg = '%s\n\n%s\n %s'%(
  3348. _('Unknown error occurred.'),
  3349. _('Please send the Error Traceback to the author:'),
  3350. my_email)
  3351. showerror(_('%s FATAL ERROR!')%my_name,errmsg)
  3352. self.exit()
  3353. CMDLINE_RAW = cmdline_raw_orig
  3354. def mainwindow(self):
  3355. self.infile.set(self.conf.get('sourcefile') or '')
  3356. self.target.set(self.conf.get('target') or \
  3357. _('-- select one --'))
  3358. outfile = self.conf.get('outfile')
  3359. if outfile == STDOUT: # map -o-
  3360. self.conf['stdout'] = 1
  3361. if self.conf.get('headers') == None:
  3362. self.conf['headers'] = 1 # map default
  3363. action1 = _("Enter the source file location:")
  3364. action2 = _("Choose the target document type:")
  3365. action3 = _("Some options you may check:")
  3366. action4 = _("Some extra options:")
  3367. checks_txt = {
  3368. 'headers' : _("Include headers on output"),
  3369. 'enum-title': _("Number titles (1, 1.1, 1.1.1, etc)"),
  3370. 'toc' : _("Do TOC also (Table of Contents)"),
  3371. 'mask-email': _("Hide e-mails from SPAM robots"),
  3372. 'toc-only' : _("Just do TOC, nothing more"),
  3373. 'stdout' : _("Dump to screen (Don't save target file)")
  3374. }
  3375. targets_menu = map(lambda x: TARGET_NAMES[x], TARGETS)
  3376. # header
  3377. self.label("%s %s"%(string.upper(my_name), my_version),
  3378. bg=self.bg2, fg=self.fg2).grid(columnspan=2, ipadx=10)
  3379. self.label(_("ONE source, MULTI targets")+'\n%s\n'%my_url,
  3380. bg=self.bg1, fg=self.fg1).grid(columnspan=2)
  3381. self.row = 2
  3382. # choose input file
  3383. self.action(action1) ; self.frame_open()
  3384. e_infile = self.entry(textvariable=self.infile,width=25)
  3385. e_infile.grid(row=self.row, column=0, sticky='e')
  3386. if not self.infile.get(): e_infile.focus_set()
  3387. self.button(_("Browse"), self.askfile).grid(
  3388. row=self.row, column=1, sticky='w', padx=10)
  3389. # show outfile name, style and encoding (if any)
  3390. txt = ''
  3391. if outfile:
  3392. txt = outfile
  3393. if outfile == STDOUT: txt = _('<screen>')
  3394. l_output = self.label(_('Output: ')+txt,
  3395. fg=self.fg2,bg=self.bg2)
  3396. l_output.grid(columnspan=2, sticky='w')
  3397. for setting in ['style','encoding']:
  3398. if self.conf.get(setting):
  3399. name = string.capitalize(setting)
  3400. val = self.conf[setting]
  3401. self.label('%s: %s'%(name, val),
  3402. fg=self.fg2, bg=self.bg2).grid(
  3403. columnspan=2, sticky='w')
  3404. # choose target
  3405. self.frame_close() ; self.action(action2)
  3406. self.frame_open()
  3407. self.target_key2name()
  3408. self.menu(self.target_name, targets_menu).grid(
  3409. columnspan=2, sticky='w')
  3410. # options checkboxes label
  3411. self.frame_close() ; self.action(action3)
  3412. self.frame_open()
  3413. # compose options check boxes, example:
  3414. # self.check(checks_txt['toc'],1,variable=self.f_toc)
  3415. for check in self.checks:
  3416. # extra options label
  3417. if check == 'toc-only':
  3418. self.frame_close() ; self.action(action4)
  3419. self.frame_open()
  3420. txt = checks_txt[check]
  3421. var = getattr(self, 'f_'+check)
  3422. checked = self.conf.get(check)
  3423. self.check(txt,checked,variable=var)
  3424. self.frame_close()
  3425. # spacer and buttons
  3426. self.label('').grid() ; self.row = self.row + 1
  3427. b_quit = self.button(_("Quit"), self.exit)
  3428. b_quit.grid(row=self.row, column=0, sticky='w', padx=30)
  3429. b_conv = self.button(_("Convert!"), self.runprogram)
  3430. b_conv.grid(row=self.row, column=1, sticky='e', padx=30)
  3431. if self.target.get() and self.infile.get():
  3432. b_conv.focus_set()
  3433. # as documentation told me
  3434. if sys.platform[:3] == 'win':
  3435. self.root.iconify()
  3436. self.root.update()
  3437. self.root.deiconify()
  3438. self.root.mainloop()
  3439. ##############################################################################
  3440. ##############################################################################
  3441. def exec_command_line(user_cmdline=[]):
  3442. global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, GUI, Error
  3443. # extract command line data
  3444. cmdline_data = user_cmdline or sys.argv[1:]
  3445. CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data)
  3446. cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
  3447. DEBUG = cmdline_parsed.get('debug' ) or 0
  3448. VERBOSE = cmdline_parsed.get('verbose') or 0
  3449. GUI = cmdline_parsed.get('gui' ) or 0
  3450. infiles = cmdline_parsed.get('infile' ) or []
  3451. Message(_("Txt2tags %s processing begins")%my_version,1)
  3452. # the easy ones
  3453. if cmdline_parsed.get('help' ): Quit(USAGE)
  3454. if cmdline_parsed.get('version'): Quit(VERSIONSTR)
  3455. # multifile haters
  3456. if len(infiles) > 1:
  3457. errmsg=_("Option --%s can't be used with multiple input files")
  3458. for option in ['gui','dump-config']:
  3459. if cmdline_parsed.get(option):
  3460. Error(errmsg%option)
  3461. Debug("system platform: %s"%sys.platform)
  3462. Debug("line break char: %s"%repr(LB))
  3463. Debug("command line: %s"%sys.argv)
  3464. Debug("command line raw config: %s"%CMDLINE_RAW,1)
  3465. # extract RC file config
  3466. if cmdline_parsed.get('rc') == 0:
  3467. Message(_("Ignoring user configuration file"),1)
  3468. else:
  3469. rc_file = get_rc_path()
  3470. if rc_file:
  3471. Message(_("Loading user configuration file"),1)
  3472. RC_RAW = ConfigLines(file=rc_file).get_raw_config()
  3473. Debug("rc file: %s"%rc_file)
  3474. Debug("rc file raw config: %s"%RC_RAW,1)
  3475. # get all infiles config (if any)
  3476. infiles_config = get_infiles_config(infiles)
  3477. # is GUI available?
  3478. # try to load and start GUI interface for --gui
  3479. # if program was called with no arguments, try GUI also
  3480. if GUI or not infiles:
  3481. try:
  3482. load_GUI_resources()
  3483. Debug("GUI resources OK (Tk module is installed)")
  3484. winbox = Gui()
  3485. Debug("GUI display OK")
  3486. GUI = 1
  3487. except:
  3488. Debug("GUI Error: no Tk module or no DISPLAY")
  3489. GUI = 0
  3490. # user forced --gui, but it's not available
  3491. if cmdline_parsed.get('gui') and not GUI:
  3492. ShowTraceback()
  3493. Error("Sorry, I can't run my Graphical Interface - GUI\n"
  3494. "- Check if Python Tcl/Tk module is installed (Tkinter)\n"
  3495. "- Make sure you are in a graphical environment (like X)")
  3496. # Okay, we will use GUI
  3497. if GUI:
  3498. Message(_("We are on GUI interface"),1)
  3499. # redefine Error function to raise exception instead sys.exit()
  3500. def Error(msg):
  3501. showerror(_('txt2tags ERROR!'), msg)
  3502. raise ZeroDivisionError
  3503. # if no input file, get RC+cmdline config, else full config
  3504. if not infiles:
  3505. gui_conf = ConfigMaster(RC_RAW+CMDLINE_RAW).parse()
  3506. else:
  3507. try : gui_conf = infiles_config[0][0]
  3508. except: gui_conf = {}
  3509. # sanity is needed to set outfile and other things
  3510. gui_conf = ConfigMaster().sanity(gui_conf, gui=1)
  3511. Debug("GUI config: %s"%gui_conf,5)
  3512. # insert config and populate the nice window!
  3513. winbox.load_config(gui_conf)
  3514. winbox.mainwindow()
  3515. # console mode rocks forever!
  3516. else:
  3517. Message(_("We are on Command Line interface"),1)
  3518. # called with no arguments, show error
  3519. if not infiles: Error(_('Missing input file (try --help)'))
  3520. convert_this_files(infiles_config)
  3521. Message(_("Txt2tags finished sucessfuly"),1)
  3522. sys.exit(0)
  3523. if __name__ == '__main__':
  3524. exec_command_line()
  3525. # vim: ts=8