PageRenderTime 76ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 1ms

/old/txt2tags-2.0.py

http://txt2tags.googlecode.com/
Python | 4024 lines | 3605 code | 161 blank | 258 comment | 163 complexity | b2aedac450024e883a7707b81b88866f MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003, 2004 Aurelio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. ##################################################################
  20. # #
  21. # - IMPORTANT - #
  22. # #
  23. # Due the major syntax changes, the new 2.x series BREAKS #
  24. # backwards compatibility. #
  25. # #
  26. # Use the 't2tconv' script to upgrade your existing .t2t files #
  27. # to conform the new v2.0 syntax. #
  28. # #
  29. # Do a visual inspection on the new converted file. #
  30. # Specially Pre & Post proc filters can break. Check them! #
  31. # #
  32. ##################################################################
  33. ########################################################################
  34. #
  35. # BORING CODE EXPLANATION AHEAD
  36. #
  37. # Just read if you wish to understand how the txt2tags code works
  38. #
  39. ########################################################################
  40. #
  41. # Version 2.0 was a complete rewrite for the program 'core'.
  42. #
  43. # Now the code that [1] parses the marked text is separated from the
  44. # code that [2] insert the target tags.
  45. #
  46. # [1] made by: def convert()
  47. # [2] made by: class BlockMaster
  48. #
  49. # The structures of the marked text are identifyed and its contents are
  50. # extracted into a data holder (Python lists and dictionaries).
  51. #
  52. # When parsing the source file, the blocks (para, lists, quote, table)
  53. # are opened with BlockMaster, right when found. Then its contents,
  54. # which spans on several lines, are feeded into a special holder on the
  55. # BlockMaster instance. Just when the block is closed, the target tags
  56. # are inserted for the full block as a whole, in one pass. This way, we
  57. # have a better control on blocks. Much better than the previous line by
  58. # line approach.
  59. #
  60. # In other words, whenever inside a block, the parser *holds* the tag
  61. # insertion process, waiting until the full block is readed. That was
  62. # needed primary to close paragraphs for the new XHTML target, but
  63. # proved to be a very good adding, improving many other processings.
  64. #
  65. # -------------------------------------------------------------------
  66. #
  67. # There is also a brand new code for the Configuration schema, 100%
  68. # rewritten. There are new classes, all self documented: CommandLine,
  69. # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
  70. # Config format was created, and all kind of configuration is first
  71. # converted to this format, and then a generic method parses it.
  72. #
  73. # The init processing was changed also, and now the functions which
  74. # gets informations about the input files are: get_infiles_config(),
  75. # process_source_file() and convert_this_files()
  76. #
  77. # Other parts are untouched, and remains the same as in v1.7, as the
  78. # marks regexes, target Headers and target Tags&Rules.
  79. #
  80. ########################################################################
  81. # Now I think the code is nice, easier to read and understand
  82. #XXX Python coding warning
  83. # Avoid common mistakes:
  84. # - do NOT use newlist=list instead newlist=list[:]
  85. # - do NOT use newdic=dic instead newdic=dic.copy()
  86. # - do NOT use dic[key] instead dic.get(key)
  87. # - do NOT use del dic[key] without has_key() before
  88. #XXX Smart Image Align don't work if the image is a link
  89. # Can't fix that because the image is expanded together with the
  90. # link, at the linkbank filling moment. Only the image is passed
  91. # to parse_images(), not the full line, so it is always 'middle'.
  92. #XXX Paragraph separation not valid inside Quote
  93. # Quote will not have <p></p> inside, instead will close and open
  94. # again the <blockquote>. This really sux in CSS, when defining a
  95. # diferent background color. Still don't know how to fix it.
  96. #XXX TODO (maybe)
  97. # New mark or macro which expands to and anchor full title.
  98. # It is necessary to parse the full document in this order:
  99. # DONE 1st scan: HEAD: get all settings, including %!includeconf
  100. # DONE 2nd scan: BODY: expand includes & apply %!preproc
  101. # 3rd scan: BODY: read titles and compose TOC info
  102. # 4th scan: BODY: full parsing, expanding [#anchor] 1st
  103. # Steps 2 and 3 can be made together, with no tag adding.
  104. # Two complete body scans will be *slow*, don't know if it worths.
  105. ##############################################################################
  106. # User config (1=ON, 0=OFF)
  107. USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
  108. COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
  109. HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
  110. ##############################################################################
  111. # these are all the core Python modules used by txt2tags (KISS!)
  112. import re, string, os, sys, getopt
  113. from time import strftime,time,localtime
  114. # program information
  115. my_url = 'http://txt2tags.sf.net'
  116. my_name = 'txt2tags'
  117. my_email = 'verde@aurelio.net'
  118. my_version = '2.0' #-betaN
  119. # i18n - just use if available
  120. if USE_I18N:
  121. try:
  122. import gettext
  123. # if your locale dir is different, change it here
  124. cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
  125. _ = cat.gettext
  126. except:
  127. _ = lambda x:x
  128. else:
  129. _ = lambda x:x
  130. # FLAGS : the convertion related flags , may be used in %!options
  131. # OPTIONS : the convertion related options, may be used in %!options
  132. # ACTIONS : the other behaviour modifiers, valid on command line only
  133. # SETTINGS: global miscelaneous settings, valid on RC file only
  134. # CONFIG_KEYWORDS: the valid %!key:val keywords
  135. #
  136. # FLAGS and OPTIONS are configs that affect the converted document.
  137. # They usually have also a --no-<option> to turn them OFF.
  138. # ACTIONS are needed because when doing multiple input files, strange
  139. # behaviour would be found, as use command line interface for the
  140. # first file and gui for the second. There is no --no-<action>.
  141. # --version and --help inside %!options are also odd
  142. #
  143. TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'man', 'mgp', 'moin', 'pm6', 'txt']
  144. FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
  145. 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
  146. 'css-suggar' :0 }
  147. OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
  148. 'infile' :'', 'outfile' :'', 'encoding' :'',
  149. 'split' :0 , 'lang' :''}
  150. ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
  151. 'verbose' :0 , 'debug' :0 , 'dump-config':0 }
  152. SETTINGS = {} # for future use
  153. CONFIG_KEYWORDS = [
  154. 'target', 'encoding', 'style', 'options', 'preproc','postproc',
  155. 'guicolors']
  156. TARGET_NAMES = {
  157. 'html' : _('HTML page'),
  158. 'xhtml': _('XHTML page'),
  159. 'sgml' : _('SGML document'),
  160. 'tex' : _('LaTeX document'),
  161. 'man' : _('UNIX Manual page'),
  162. 'mgp' : _('Magic Point presentation'),
  163. 'moin' : _('MoinMoin page'),
  164. 'pm6' : _('PageMaker 6.0 document'),
  165. 'txt' : _('Plain Text'),
  166. }
  167. DEBUG = 0 # do not edit here, please use --debug
  168. VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
  169. GUI = 0
  170. RC_RAW = []
  171. CMDLINE_RAW = []
  172. CONF = {}
  173. BLOCK = None
  174. regex = {}
  175. TAGS = {}
  176. rules = {}
  177. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  178. lang = 'english'
  179. TARGET = ''
  180. STDIN = STDOUT = '-'
  181. ESCCHAR = '\x00'
  182. SEPARATOR = '\x01'
  183. LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
  184. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  185. RCFILE = {'default':'.txt2tagsrc', 'win':'_t2trc'}
  186. #my_version = my_version + '-dev' + currdate[4:] # devel!
  187. # plataform specific settings
  188. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  189. RC = RCFILE.get(sys.platform[:3]) or RCFILE['default']
  190. VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
  191. USAGE = string.join([
  192. '',
  193. _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
  194. '',
  195. _(" -t, --target set target document type. currently supported:"),
  196. ' %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
  197. _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
  198. _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
  199. _(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"),
  200. _(" -H, --no-headers suppress header, title and footer contents"),
  201. _(" --headers show header, title and footer contents (default ON)"),
  202. _(" --encoding set target file encoding (utf-8, iso-8859-1, etc)"),
  203. _(" --style=FILE use FILE as the document style (like HTML CSS)"),
  204. _(" --css-suggar insert CSS-friendly tags for HTML and XHTML targets"),
  205. _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
  206. _(" --toc add TOC (Table of Contents) to target document"),
  207. _(" --toc-only print document TOC and exit"),
  208. _(" --toc-level=N set maximum TOC level (depth) to N"),
  209. _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
  210. _(" --gui invoke Graphical Tk Interface"),
  211. _(" -v, --verbose print informative messages during convertion"),
  212. _(" -h, --help print this help information and exit"),
  213. _(" -V, --version print program version and exit"),
  214. _(" --dump-config print all the config found and exit"),
  215. '',
  216. _("Turn OFF options:"),
  217. " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
  218. " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
  219. " --no-css-suggar",
  220. '',
  221. _("Example:\n %s -t html --toc myfile.t2t") % my_name,
  222. '',
  223. _("By default, converted output is saved to 'infile.<target>'."),
  224. _("Use --outfile to force an output file name."),
  225. _("If input file is '-', reads from STDIN."),
  226. _("If output file is '-', dumps output to STDOUT."),
  227. ''
  228. ], '\n')
  229. ##############################################################################
  230. # here is all the target's templates
  231. # you may edit them to fit your needs
  232. # - the %(HEADERn)s strings represent the Header lines
  233. # - the %(STYLE)s string is changed by --style contents
  234. # - the %(ENCODING)s string is changed by --encoding contents
  235. # - if any of the above is empty, the full line is removed
  236. # - use %% to represent a literal %
  237. #
  238. HEADER_TEMPLATE = {
  239. 'txt': """\
  240. %(HEADER1)s
  241. %(HEADER2)s
  242. %(HEADER3)s
  243. """,
  244. 'sgml': """\
  245. <!doctype linuxdoc system>
  246. <article>
  247. <title>%(HEADER1)s
  248. <author>%(HEADER2)s
  249. <date>%(HEADER3)s
  250. """,
  251. 'html': """\
  252. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  253. <HTML>
  254. <HEAD>
  255. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  256. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  257. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  258. <TITLE>%(HEADER1)s</TITLE>
  259. </HEAD><BODY BGCOLOR="white" TEXT="black">
  260. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  261. <FONT SIZE="4">
  262. <I>%(HEADER2)s</I><BR>
  263. %(HEADER3)s
  264. </FONT></CENTER>
  265. """,
  266. 'htmlcss': """\
  267. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  268. <HTML>
  269. <HEAD>
  270. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  271. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  272. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  273. <TITLE>%(HEADER1)s</TITLE>
  274. </HEAD>
  275. <BODY>
  276. <DIV CLASS="header" ID="header">
  277. <H1>%(HEADER1)s</H1>
  278. <H2>%(HEADER2)s</H2>
  279. <H3>%(HEADER3)s</H3>
  280. </DIV>
  281. """,
  282. 'xhtml': """\
  283. <?xml version="1.0"?>
  284. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  285. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  286. <html xmlns="http://www.w3.org/1999/xhtml">
  287. <head>
  288. <title>%(HEADER1)s</title>
  289. <meta name="generator" content="http://txt2tags.sf.net" />
  290. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  291. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  292. </head>
  293. <body bgcolor="white" text="black">
  294. <div align="center">
  295. <h1>%(HEADER1)s</h1>
  296. <h2>%(HEADER2)s</h2>
  297. <h3>%(HEADER3)s</h3>
  298. </div>
  299. """,
  300. 'xhtmlcss': """\
  301. <?xml version="1.0"?>
  302. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
  303. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  304. <html xmlns="http://www.w3.org/1999/xhtml">
  305. <head>
  306. <title>%(HEADER1)s</title>
  307. <meta name="generator" content="http://txt2tags.sf.net" />
  308. <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
  309. <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
  310. </head>
  311. <body>
  312. <div class="header" id="header">
  313. <h1>%(HEADER1)s</h1>
  314. <h2>%(HEADER2)s</h2>
  315. <h3>%(HEADER3)s</h3>
  316. </div>
  317. """,
  318. 'man': """\
  319. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  320. """,
  321. # TODO style to <HR>
  322. 'pm6': """\
  323. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  324. ><@Normal=
  325. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  326. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  327. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  328. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  329. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  330. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  331. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  332. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  333. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  334. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  335. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  336. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  337. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  338. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  339. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  340. ><@Title4=<@-PARENT "Title3">
  341. ><@Title5=<@-PARENT "Title3">
  342. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  343. %(HEADER1)s
  344. %(HEADER2)s
  345. %(HEADER3)s
  346. """,
  347. 'mgp': """\
  348. #!/usr/X11R6/bin/mgp -t 90
  349. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  350. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  351. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  352. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  353. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  354. %%default 1 size 5
  355. %%default 2 size 8, fore "yellow", font "normal-b", center
  356. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  357. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  358. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  359. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  360. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  361. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  362. %%%%------------------------- end of headers -----------------------------
  363. %%page
  364. %%size 10, center, fore "yellow"
  365. %(HEADER1)s
  366. %%font "normal-i", size 6, fore "white", center
  367. %(HEADER2)s
  368. %%font "mono", size 7, center
  369. %(HEADER3)s
  370. """,
  371. # TODO please, improve me!
  372. 'moin': """\
  373. '''%(HEADER1)s'''
  374. ''%(HEADER2)s''
  375. %(HEADER3)s
  376. """,
  377. 'tex': \
  378. r"""\documentclass[11pt,a4paper]{article}
  379. \usepackage{amsfonts,graphicx,url}
  380. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  381. \usepackage{%(STYLE)s} %% user defined package
  382. \pagestyle{plain} %% do page numbering ('empty' turns off)
  383. \frenchspacing %% no aditional spaces after periods
  384. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  385. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  386. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  387. \title{%(HEADER1)s}
  388. \author{%(HEADER2)s}
  389. \begin{document}
  390. \date{%(HEADER3)s}
  391. \maketitle
  392. \clearpage
  393. """
  394. }
  395. ##############################################################################
  396. def getTags(target):
  397. "Returns all the known tags for the specified target"
  398. keys = [
  399. 'paragraphOpen','paragraphClose',
  400. 'title1','title2','title3','title4','title5',
  401. 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
  402. 'blockVerbOpen','blockVerbClose',
  403. 'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
  404. 'fontMonoOpen','fontMonoClose',
  405. 'fontBoldOpen','fontBoldClose',
  406. 'fontItalicOpen','fontItalicClose',
  407. 'fontUnderlineOpen','fontUnderlineClose',
  408. 'listOpen','listClose',
  409. 'listItemOpen','listItemClose','listItemLine',
  410. 'numlistOpen','numlistClose',
  411. 'numlistItemOpen','numlistItemClose','numlistItemLine',
  412. 'deflistOpen','deflistClose',
  413. 'deflistItem1Open','deflistItem1Close',
  414. 'deflistItem2Open','deflistItem2Close',
  415. 'bar1','bar2',
  416. 'url','urlMark','email','emailMark',
  417. 'img',
  418. 'tableOpen','tableClose',
  419. 'tableRowOpen','tableRowClose','tableRowSep',
  420. 'tableCellOpen','tableCellClose','tableCellSep',
  421. 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
  422. 'tableTitleRowOpen','tableTitleRowClose',
  423. 'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
  424. 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
  425. 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
  426. 'tableColAlignSep',
  427. 'anchor','comment',
  428. 'TOC','tocOpen','tocClose','tocOpenCss','tocCloseCss',
  429. 'bodyOpenCss','bodyCloseCss',
  430. 'EOD'
  431. ]
  432. alltags = {
  433. 'txt': {
  434. 'title1' : ' \a' ,
  435. 'title2' : '\t\a' ,
  436. 'title3' : '\t\t\a' ,
  437. 'title4' : '\t\t\t\a' ,
  438. 'title5' : '\t\t\t\t\a',
  439. 'blockQuoteLine' : '\t' ,
  440. 'listItemOpen' : '- ' ,
  441. 'numlistItemOpen' : '\a. ' ,
  442. 'bar1' : '\a' ,
  443. 'bar2' : '\a' ,
  444. 'url' : '\a' ,
  445. 'urlMark' : '\a (\a)' ,
  446. 'email' : '\a' ,
  447. 'emailMark' : '\a (\a)' ,
  448. 'img' : '[\a]' ,
  449. },
  450. 'html': {
  451. 'paragraphOpen' : '<P>' ,
  452. 'paragraphClose' : '</P>' ,
  453. 'title1' : '~A~<H1>\a</H1>' ,
  454. 'title2' : '~A~<H2>\a</H2>' ,
  455. 'title3' : '~A~<H3>\a</H3>' ,
  456. 'title4' : '~A~<H4>\a</H4>' ,
  457. 'title5' : '~A~<H5>\a</H5>' ,
  458. 'blockVerbOpen' : '<PRE>' ,
  459. 'blockVerbClose' : '</PRE>' ,
  460. 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
  461. 'blockQuoteClose' : '</BLOCKQUOTE>' ,
  462. 'fontMonoOpen' : '<CODE>' ,
  463. 'fontMonoClose' : '</CODE>' ,
  464. 'fontBoldOpen' : '<B>' ,
  465. 'fontBoldClose' : '</B>' ,
  466. 'fontItalicOpen' : '<I>' ,
  467. 'fontItalicClose' : '</I>' ,
  468. 'fontUnderlineOpen' : '<U>' ,
  469. 'fontUnderlineClose' : '</U>' ,
  470. 'listOpen' : '<UL>' ,
  471. 'listClose' : '</UL>' ,
  472. 'listItemOpen' : '<LI>' ,
  473. 'numlistOpen' : '<OL>' ,
  474. 'numlistClose' : '</OL>' ,
  475. 'numlistItemOpen' : '<LI>' ,
  476. 'deflistOpen' : '<DL>' ,
  477. 'deflistClose' : '</DL>' ,
  478. 'deflistItem1Open' : '<DT>' ,
  479. 'deflistItem1Close' : '</DT>' ,
  480. 'deflistItem2Open' : '<DD>' ,
  481. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  482. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  483. 'url' : '<A HREF="\a">\a</A>' ,
  484. 'urlMark' : '<A HREF="\a">\a</A>' ,
  485. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  486. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  487. 'img' :'<IMG ALIGN="~A~" SRC="\a" BORDER="0" ALT="">',
  488. 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>',
  489. 'tableClose' : '</TABLE>' ,
  490. 'tableRowOpen' : '<TR>' ,
  491. 'tableRowClose' : '</TR>' ,
  492. 'tableCellOpen' : '<TD\a>' ,
  493. 'tableCellClose' : '</TD>' ,
  494. 'tableTitleCellOpen' : '<TH>' ,
  495. 'tableTitleCellClose' : '</TH>' ,
  496. 'tableBorder' : ' BORDER="1"' ,
  497. 'tableAlignCenter' : ' ALIGN="center"',
  498. 'tableCellAlignRight' : ' ALIGN="right"' ,
  499. 'tableCellAlignCenter': ' ALIGN="center"',
  500. 'anchor' : '<A NAME="\a"></A>\n',
  501. 'tocOpenCss' : '<DIV CLASS="toc" ID="toc">',
  502. 'tocCloseCss' : '</DIV>',
  503. 'bodyOpenCss' : '<DIV CLASS="body" ID="body">',
  504. 'bodyCloseCss' : '</DIV>',
  505. 'comment' : '<!-- \a -->' ,
  506. 'EOD' : '</BODY></HTML>'
  507. },
  508. #TIP xhtml inherits all HTML definitions (lowercased)
  509. #TIP http://www.w3.org/TR/xhtml1/#guidelines
  510. #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
  511. 'xhtml': {
  512. 'listItemClose' : '</li>' ,
  513. 'numlistItemClose' : '</li>' ,
  514. 'deflistItem2Close' : '</dd>' ,
  515. 'bar1' : '<hr class="light" />',
  516. 'bar2' : '<hr class="heavy" />',
  517. 'anchor' : '<a id="\a" name="\a"></a>\n',
  518. 'img' :'<img align="~A~" src="\a" border="0" alt=""/>',
  519. },
  520. 'sgml': {
  521. 'paragraphOpen' : '<p>' ,
  522. 'title1' : '<sect>\a~A~<p>' ,
  523. 'title2' : '<sect1>\a~A~<p>' ,
  524. 'title3' : '<sect2>\a~A~<p>' ,
  525. 'title4' : '<sect3>\a~A~<p>' ,
  526. 'title5' : '<sect4>\a~A~<p>' ,
  527. 'blockVerbOpen' : '<tscreen><verb>' ,
  528. 'blockVerbClose' : '</verb></tscreen>' ,
  529. 'blockQuoteOpen' : '<quote>' ,
  530. 'blockQuoteClose' : '</quote>' ,
  531. 'fontMonoOpen' : '<tt>' ,
  532. 'fontMonoClose' : '</tt>' ,
  533. 'fontBoldOpen' : '<bf>' ,
  534. 'fontBoldClose' : '</bf>' ,
  535. 'fontItalicOpen' : '<em>' ,
  536. 'fontItalicClose' : '</em>' ,
  537. 'fontUnderlineOpen' : '<bf><em>' ,
  538. 'fontUnderlineClose' : '</em></bf>' ,
  539. 'listOpen' : '<itemize>' ,
  540. 'listClose' : '</itemize>' ,
  541. 'listItemOpen' : '<item>' ,
  542. 'numlistOpen' : '<enum>' ,
  543. 'numlistClose' : '</enum>' ,
  544. 'numlistItemOpen' : '<item>' ,
  545. 'deflistOpen' : '<descrip>' ,
  546. 'deflistClose' : '</descrip>' ,
  547. 'deflistItem1Open' : '<tag>' ,
  548. 'deflistItem1Close' : '</tag>' ,
  549. 'bar1' : '<!-- \a -->' ,
  550. 'bar2' : '<!-- \a -->' ,
  551. 'url' : '<htmlurl url="\a" name="\a">' ,
  552. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  553. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  554. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  555. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  556. '</figure>' ,
  557. 'tableOpen' : '<table><tabular ca="~C~">' ,
  558. 'tableClose' : '</tabular></table>' ,
  559. 'tableRowSep' : '<rowsep>' ,
  560. 'tableCellSep' : '<colsep>' ,
  561. 'tableColAlignLeft' : 'l' ,
  562. 'tableColAlignRight' : 'r' ,
  563. 'tableColAlignCenter' : 'c' ,
  564. 'comment' : '<!-- \a -->' ,
  565. 'anchor' : '<label id="\a">' ,
  566. 'TOC' : '<toc>' ,
  567. 'EOD' : '</article>'
  568. },
  569. 'tex': {
  570. 'title1' : '\n\section*{\a}',
  571. 'title2' : '\\subsection*{\a}' ,
  572. 'title3' : '\\subsubsection*{\a}' ,
  573. # title 4/5: DIRTY: para+BF+\\+\n
  574. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  575. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  576. 'numtitle1' : '\n\section{\a}',
  577. 'numtitle2' : '\\subsection{\a}' ,
  578. 'numtitle3' : '\\subsubsection{\a}' ,
  579. 'blockVerbOpen' : '\\begin{verbatim}' ,
  580. 'blockVerbClose' : '\\end{verbatim}' ,
  581. 'blockQuoteOpen' : '\\begin{quotation}' ,
  582. 'blockQuoteClose' : '\\end{quotation}' ,
  583. 'fontMonoOpen' : '\\texttt{' ,
  584. 'fontMonoClose' : '}' ,
  585. 'fontBoldOpen' : '\\textbf{' ,
  586. 'fontBoldClose' : '}' ,
  587. 'fontItalicOpen' : '\\textit{' ,
  588. 'fontItalicClose' : '}' ,
  589. 'fontUnderlineOpen' : '\\underline{' ,
  590. 'fontUnderlineClose' : '}' ,
  591. 'listOpen' : '\\begin{itemize}' ,
  592. 'listClose' : '\\end{itemize}' ,
  593. 'listItemOpen' : '\\item ' ,
  594. 'numlistOpen' : '\\begin{enumerate}' ,
  595. 'numlistClose' : '\\end{enumerate}' ,
  596. 'numlistItemOpen' : '\\item ' ,
  597. 'deflistOpen' : '\\begin{description}',
  598. 'deflistClose' : '\\end{description}' ,
  599. 'deflistItem1Open' : '\\item[' ,
  600. 'deflistItem1Close' : ']' ,
  601. 'bar1' : '\n\\hrulefill{}\n' ,
  602. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  603. 'url' : '\\url{\a}' ,
  604. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  605. 'email' : '\\url{\a}' ,
  606. 'emailMark' : '\\textit{\a} (\\url{\a})' ,
  607. 'img' : '\\includegraphics{\a}',
  608. 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
  609. 'tableClose' : '\\end{tabular}\\end{center}',
  610. 'tableRowOpen' : '\\hline ' ,
  611. 'tableRowClose' : ' \\\\' ,
  612. 'tableCellSep' : ' & ' ,
  613. 'tableColAlignLeft' : 'l' ,
  614. 'tableColAlignRight' : 'r' ,
  615. 'tableColAlignCenter' : 'c' ,
  616. 'tableColAlignSep' : '|' ,
  617. 'comment' : '% \a' ,
  618. 'TOC' : '\\tableofcontents\\clearpage',
  619. 'EOD' : '\\end{document}'
  620. },
  621. 'moin': {
  622. 'title1' : '= \a =' ,
  623. 'title2' : '== \a ==' ,
  624. 'title3' : '=== \a ===' ,
  625. 'title4' : '==== \a ====' ,
  626. 'title5' : '===== \a =====',
  627. 'blockVerbOpen' : '{{{' ,
  628. 'blockVerbClose' : '}}}' ,
  629. 'blockQuoteLine' : ' ' ,
  630. 'fontMonoOpen' : '{{{' ,
  631. 'fontMonoClose' : '}}}' ,
  632. 'fontBoldOpen' : "'''" ,
  633. 'fontBoldClose' : "'''" ,
  634. 'fontItalicOpen' : "''" ,
  635. 'fontItalicClose' : "''" ,
  636. 'fontUnderlineOpen' : "__" ,
  637. 'fontUnderlineClose' : "__" ,
  638. 'listItemOpen' : ' * ' ,
  639. 'numlistItemOpen' : ' \a. ' ,
  640. 'bar1' : '----' ,
  641. 'bar2' : '----' ,
  642. 'url' : '[\a]' ,
  643. 'urlMark' : '[\a \a]' ,
  644. 'email' : '[\a]' ,
  645. 'emailMark' : '[\a \a]' ,
  646. 'img' : '[\a]' ,
  647. 'tableRowOpen' : '||' ,
  648. 'tableCellOpen' : '\a' ,
  649. 'tableCellClose' : '||' ,
  650. 'tableTitleCellClose' : '||' ,
  651. 'tableCellAlignRight' : '<)>' ,
  652. 'tableCellAlignCenter': '<:>' ,
  653. 'comment' : '## \a' ,
  654. 'TOC' : '[[TableOfContents]]'
  655. },
  656. 'mgp': {
  657. 'paragraphOpen' : '%font "normal", size 5' ,
  658. 'title1' : '%page\n\n\a\n' ,
  659. 'title2' : '%page\n\n\a\n' ,
  660. 'title3' : '%page\n\n\a\n' ,
  661. 'title4' : '%page\n\n\a\n' ,
  662. 'title5' : '%page\n\n\a\n' ,
  663. 'blockVerbOpen' : '%font "mono"' ,
  664. 'blockVerbClose' : '%font "normal"' ,
  665. 'blockQuoteOpen' : '%prefix " "' ,
  666. 'blockQuoteClose' : '%prefix " "' ,
  667. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  668. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  669. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  670. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  671. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  672. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  673. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  674. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  675. 'listItemLine' : '\t' ,
  676. 'numlistItemLine' : '\t' ,
  677. 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
  678. 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
  679. 'bar1' : '%bar "white" 5' ,
  680. 'bar2' : '%pause' ,
  681. 'url' : '\n%cont, fore "cyan"\n\a' +\
  682. '\n%cont, fore "white"\n' ,
  683. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  684. '\n%cont, fore "white"\n' ,
  685. 'email' : '\n%cont, fore "cyan"\n\a' +\
  686. '\n%cont, fore "white"\n' ,
  687. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  688. '\n%cont, fore "white"\n' ,
  689. 'img' : '\n%~A~\n%newimage "\a"\n%left\n',
  690. 'comment' : '%% \a' ,
  691. 'tocOpen' : '%page\n\n\n' ,
  692. 'EOD' : '%%EOD'
  693. },
  694. # man groff_man ; man 7 groff
  695. 'man': {
  696. 'paragraphOpen' : '.P' ,
  697. 'title1' : '.SH \a' ,
  698. 'title2' : '.SS \a' ,
  699. 'title3' : '.SS \a' ,
  700. 'title4' : '.SS \a' ,
  701. 'title5' : '.SS \a' ,
  702. 'blockVerbOpen' : '.nf' ,
  703. 'blockVerbClose' : '.fi\n' ,
  704. 'blockQuoteOpen' : '.RS' ,
  705. 'blockQuoteClose' : '.RE' ,
  706. 'fontBoldOpen' : '\\fB' ,
  707. 'fontBoldClose' : '\\fR' ,
  708. 'fontItalicOpen' : '\\fI' ,
  709. 'fontItalicClose' : '\\fR' ,
  710. 'listOpen' : '.RS' ,
  711. 'listItemOpen' : '.IP \(bu 3\n',
  712. 'listClose' : '.RE' ,
  713. 'numlistOpen' : '.RS' ,
  714. 'numlistItemOpen' : '.IP \a. 3\n',
  715. 'numlistClose' : '.RE' ,
  716. 'deflistItem1Open' : '.TP\n' ,
  717. 'bar1' : '\n\n' ,
  718. 'bar2' : '\n\n' ,
  719. 'url' : '\a' ,
  720. 'urlMark' : '\a (\a)',
  721. 'email' : '\a' ,
  722. 'emailMark' : '\a (\a)',
  723. 'img' : '\a' ,
  724. 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
  725. 'tableClose' : '.TE' ,
  726. 'tableRowOpen' : ' ' ,
  727. 'tableCellSep' : '^' ,
  728. 'tableAlignCenter' : 'center, ',
  729. 'tableBorder' : 'allbox, ',
  730. 'tableColAlignLeft' : 'l' ,
  731. 'tableColAlignRight' : 'r' ,
  732. 'tableColAlignCenter' : 'c' ,
  733. 'comment' : '.\\" \a'
  734. },
  735. 'pm6': {
  736. 'paragraphOpen' : '<@Normal:>' ,
  737. 'title1' : '\n<@Title1:>\a',
  738. 'title2' : '\n<@Title2:>\a',
  739. 'title3' : '\n<@Title3:>\a',
  740. 'title4' : '\n<@Title4:>\a',
  741. 'title5' : '\n<@Title5:>\a',
  742. 'blockVerbOpen' : '<@PreFormat:>' ,
  743. 'blockQuoteLine' : '<@Quote:>' ,
  744. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  745. 'fontMonoClose' : '<SIZE$><FONT$>',
  746. 'fontBoldOpen' : '<B>' ,
  747. 'fontBoldClose' : '<P>' ,
  748. 'fontItalicOpen' : '<I>' ,
  749. 'fontItalicClose' : '<P>' ,
  750. 'fontUnderlineOpen' : '<U>' ,
  751. 'fontUnderlineClose' : '<P>' ,
  752. 'listOpen' : '<@Bullet:>' ,
  753. 'listItemOpen' : '\x95\t' , # \x95 == ~U
  754. 'numlistOpen' : '<@Bullet:>' ,
  755. 'numlistItemOpen' : '\x95\t' ,
  756. 'bar1' : '\a' ,
  757. 'bar2' : '\a' ,
  758. 'url' : '<U>\a<P>' , # underline
  759. 'urlMark' : '\a <U>\a<P>' ,
  760. 'email' : '\a' ,
  761. 'emailMark' : '\a \a' ,
  762. 'img' : '\a'
  763. }
  764. }
  765. # make the HTML -> XHTML inheritance
  766. xhtml = alltags['html'].copy()
  767. for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
  768. # some like HTML tags as lowercase, some don't... (headers out)
  769. if HTML_LOWER: alltags['html'] = xhtml.copy()
  770. xhtml.update(alltags['xhtml'])
  771. alltags['xhtml'] = xhtml.copy()
  772. # compose the target tags dictionary
  773. tags = {}
  774. target_tags = alltags[target].copy()
  775. for key in keys: tags[key] = '' # create empty keys
  776. for key in target_tags.keys():
  777. tags[key] = maskEscapeChar(target_tags[key]) # populate
  778. return tags
  779. ##############################################################################
  780. def getRules(target):
  781. "Returns all the target-specific syntax rules"
  782. ret = {}
  783. allrules = [
  784. # target rules (ON/OFF)
  785. 'linkable', # target supports external links
  786. 'tableable', # target supports tables
  787. 'imglinkable', # target supports images as links
  788. 'imgalignable', # target supports image alignment
  789. 'imgasdefterm', # target supports image as definition term
  790. 'autonumberlist', # target supports numbered lists natively
  791. 'autonumbertitle', # target supports numbered titles natively
  792. 'parainsidelist', # lists items supports paragraph
  793. 'spacedlistitem', # lists support blank lines between items
  794. 'listnotnested', # lists cannot be nested
  795. 'quotenotnested', # quotes cannot be nested
  796. 'verbblocknotescaped', # don't escape specials in verb block
  797. 'verbblockfinalescape', # do final escapes in verb block
  798. 'escapeurl', # escape special in link URL
  799. 'onelinepara', # dump paragraph as a single long line
  800. 'tabletitlerowinbold', # manually bold any cell on table titles
  801. 'tablecellstrip', # strip extra spaces from each table cell
  802. 'barinsidequote', # bars are allowed inside quote blocks
  803. 'finalescapetitle', # perform final escapes on title lines
  804. # target code beautify (ON/OFF)
  805. 'indentverbblock', # add leading spaces to verb block lines
  806. 'breaktablecell', # break lines after any table cell
  807. 'breaktablelineopen', # break line after opening table line
  808. 'notbreaklistopen', # don't break line after opening a new list
  809. 'notbreakparaopen', # don't break line after opening a new para
  810. 'keepquoteindent', # don't remove the leading TABs on quotes
  811. 'keeplistindent', # don't remove the leading spaces on lists
  812. 'blankendmotherlist', # append a blank line at the mother list end
  813. 'blankendtable', # append a blank line at the table end
  814. 'tagnotindentable', # tags must be placed at the line begining
  815. # value settings
  816. 'listmaxdepth', # maximum depth for lists
  817. 'tablecellaligntype' # type of table cell align: cell, column
  818. ]
  819. rules_bank = {
  820. 'txt' : {
  821. 'indentverbblock':1,
  822. 'spacedlistitem':1,
  823. 'parainsidelist':1,
  824. 'keeplistindent':1,
  825. 'barinsidequote':1,
  826. 'blankendmotherlist':1
  827. },
  828. 'html': {
  829. 'indentverbblock':1,
  830. 'linkable':1,
  831. 'escapeurl':1,
  832. 'imglinkable':1,
  833. 'imgalignable':1,
  834. 'imgasdefterm':1,
  835. 'autonumberlist':1,
  836. 'spacedlistitem':1,
  837. 'parainsidelist':1,
  838. 'blankendmotherlist':1,
  839. 'tableable':1,
  840. 'tablecellstrip':1,
  841. 'blankendtable':1,
  842. 'breaktablecell':1,
  843. 'breaktablelineopen':1,
  844. 'keeplistindent':1,
  845. 'keepquoteindent':1,
  846. 'barinsidequote':1,
  847. 'tablecellaligntype':'cell'
  848. },
  849. #TIP xhtml inherits all HTML rules
  850. 'xhtml': {
  851. },
  852. 'sgml': {
  853. 'linkable':1,
  854. 'escapeurl':1,
  855. 'autonumberlist':1,
  856. 'spacedlistitem':1,
  857. 'blankendmotherlist':1,
  858. 'tableable':1,
  859. 'tablecellstrip':1,
  860. 'blankendtable':1,
  861. 'quotenotnested':1,
  862. 'keeplistindent':1,
  863. 'keepquoteindent':1,
  864. 'barinsidequote':1,
  865. 'finalescapetitle':1,
  866. 'tablecellaligntype':'column'
  867. },
  868. 'mgp' : {
  869. 'blankendmotherlist':1,
  870. 'tagnotindentable':1,
  871. 'spacedlistitem':1,
  872. 'imgalignable':1,
  873. },
  874. 'tex' : {
  875. 'autonumberlist':1,
  876. 'autonumbertitle':1,
  877. 'spacedlistitem':1,
  878. 'blankendmotherlist':1,
  879. 'tableable':1,
  880. 'tablecellstrip':1,
  881. 'tabletitlerowinbold':1,
  882. 'blankendtable':1,
  883. 'verbblocknotescaped':1,
  884. 'keeplistindent':1,
  885. 'listmaxdepth':4,
  886. 'barinsidequote':1,
  887. 'finalescapetitle':1,
  888. 'tablecellaligntype':'column'
  889. },
  890. 'moin': {
  891. 'spacedlistitem':1,
  892. 'linkable':1,
  893. 'blankendmotherlist':1,
  894. 'keeplistindent':1,
  895. 'tableable':1,
  896. 'barinsidequote':1,
  897. 'blankendtable':1,
  898. 'tabletitlerowinbold':1,
  899. 'tablecellstrip':1,
  900. 'tablecellaligntype':'cell'
  901. },
  902. 'man' : {
  903. 'spacedlistitem':1,
  904. 'indentverbblock':1,
  905. 'blankendmotherlist':1,
  906. 'tagnotindentable':1,
  907. 'tableable':1,
  908. 'tablecellaligntype':'column',
  909. 'tabletitlerowinbold':1,
  910. 'tablecellstrip':1,
  911. 'blankendtable':1,
  912. 'keeplistindent':0,
  913. 'barinsidequote':1,
  914. 'parainsidelist':0,
  915. },
  916. 'pm6' : {
  917. 'keeplistindent':1,
  918. 'verbblockfinalescape':1,
  919. #TODO add support for these - maybe set a JOINNEXT char and
  920. # do it on addLineBreaks()
  921. 'notbreaklistopen':1,
  922. 'notbreakparaopen':1,
  923. 'barinsidequote':1,
  924. 'onelinepara':1,
  925. }
  926. }
  927. # get the target specific rules
  928. if target == 'xhtml':
  929. myrules = rules_bank['html'].copy() # inheritance
  930. myrules.update(rules_bank['xhtml']) # get XHTML specific
  931. else:
  932. myrules = rules_bank[target].copy()
  933. # populate return dictionary
  934. for key in allrules: ret[key] = 0 # reset all
  935. ret.update(myrules) # get rules
  936. return ret
  937. ##############################################################################
  938. def getRegexes():
  939. "Returns all the regexes used to find the t2t marks"
  940. bank = {
  941. 'blockVerbOpen':
  942. re.compile(r'^```\s*$'),
  943. 'blockVerbClose':
  944. re.compile(r'^```\s*$'),
  945. 'blockRawOpen':
  946. re.compile(r'^"""\s*$'),
  947. 'blockRawClose':
  948. re.compile(r'^"""\s*$'),
  949. 'quote':
  950. re.compile(r'^\t+'),
  951. '1lineVerb':
  952. re.compile(r'^``` (?=.)'),
  953. '1lineRaw':
  954. re.compile(r'^""" (?=.)'),
  955. # mono, raw, bold, italic, underline:
  956. # - marks must be glued with the contents, no boundary spaces
  957. # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
  958. 'fontMono':
  959. re.compile( r'``([^\s](|.*?[^\s])`*)``'),
  960. 'raw':
  961. re.compile( r'""([^\s](|.*?[^\s])"*)""'),
  962. 'fontBold':
  963. re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
  964. 'fontItalic':
  965. re.compile( r'//([^\s](|.*?[^\s])/*)//'),
  966. 'fontUnderline':
  967. re.compile( r'__([^\s](|.*?[^\s])_*)__'),
  968. 'list':
  969. re.compile(r'^( *)(-) (?=[^ ])'),
  970. 'numlist':
  971. re.compile(r'^( *)(\+) (?=[^ ])'),
  972. 'deflist':
  973. re.compile(r'^( *)(:) (.*)$'),
  974. 'bar':
  975. re.compile(r'^(\s*)([_=-]{20,})\s*$'),
  976. 'table':
  977. re.compile(r'^ *\|\|? '),
  978. 'blankline':
  979. re.compile(r'^\s*$'),
  980. 'comment':
  981. re.compile(r'^%'),
  982. # auxiliar tag regexes
  983. '_imgAlign' : re.compile(r'~A~',re.I),
  984. '_tableAlign' : re.compile(r'~A~',re.I),
  985. '_anchor' : re.compile(r'~A~',re.I),
  986. '_tableBorder' : re.compile(r'~B~',re.I),
  987. '_tableColAlign': re.compile(r'~C~',re.I),
  988. }
  989. # special char to place data on TAGs contents (\a == bell)
  990. bank['x'] = re.compile('\a')
  991. # %%date [ (formatting) ]
  992. bank['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  993. # almost complicated title regexes ;)
  994. titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>\w*)\])?\s*$'
  995. bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
  996. bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
  997. ### complicated regexes begin here ;)
  998. #
  999. # textual descriptions on --help's style: [...] is optional, | is OR
  1000. ### first, some auxiliar variables
  1001. #
  1002. # [image.EXT]
  1003. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1004. # link things
  1005. urlskel = {
  1006. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1007. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1008. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1009. 'pass' : r'[^ @]*', # for ftp://login:password@dom.com
  1010. 'chars' : r'A-Za-z0-9%._/~:,=$@&-',# %20(space), :80(port), D&D
  1011. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1012. 'form' : r'A-Za-z0-9/%&=+.,@*_-', # .,@*_-(as is)
  1013. 'punct' : r'.,;:!?'
  1014. }
  1015. # username [ :password ] @
  1016. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1017. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1018. # [ #anchor | ?form=data ]
  1019. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  1020. urlskel['proto'],patt_url_login, urlskel['guess'],
  1021. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1022. # filename | [ filename ] #anchor
  1023. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  1024. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1025. # user@domain [ ?form=data ]
  1026. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1027. urlskel['login'],urlskel['form'])
  1028. # saving for future use
  1029. bank['_urlskel'] = urlskel
  1030. ### and now the real regexes
  1031. #
  1032. bank['email'] = re.compile(patt_email,re.I)
  1033. # email | url
  1034. bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1035. # \[ label | imagetag url | email | filename \]
  1036. bank['linkmark'] = re.compile(
  1037. r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1038. patt_img, retxt_url, patt_email, retxt_url_local),
  1039. re.L+re.I)
  1040. # image
  1041. bank['img'] = re.compile(patt_img, re.L+re.I)
  1042. # all macros
  1043. bank['macro'] = bank['date']
  1044. # special things
  1045. bank['special'] = re.compile(r'^%!\s*')
  1046. return bank
  1047. ### END OF regex nightmares
  1048. ##############################################################################
  1049. def echo(msg): # for quick debug
  1050. print '\033[32;1m%s\033[m'%msg
  1051. def Quit(msg, exitcode=0):
  1052. print msg
  1053. sys.exit(exitcode)
  1054. def Error(msg):
  1055. sys.stderr.write(_("%s: Error: ")%my_name + "%s\n"%msg)
  1056. sys.stderr.flush()
  1057. sys.exit(1)
  1058. def ShowTraceback():
  1059. try:
  1060. from traceback import print_exc
  1061. print_exc() ; print ; print
  1062. except: pass
  1063. def Message(msg,level):
  1064. if level <= VERBOSE:
  1065. prefix = '-'*5
  1066. print "%s %s"%(prefix*level, msg)
  1067. def Debug(msg,color=0,linenr=None):
  1068. "0gray=init,1red=conf,3yellow=line,6cyan=block,2green=detail,5pink=gui"
  1069. if not DEBUG: return
  1070. if COLOR_DEBUG: msg = '\033[3%s;1m%s\033[m'%(color,msg)
  1071. if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
  1072. print "** %s"%msg
  1073. def Readfile(file, remove_linebreaks=0):
  1074. if file == '-':
  1075. try: data = sys.stdin.readlines()
  1076. except: Error(_('You must feed me with data on STDIN!'))
  1077. else:
  1078. try: f = open(file); data = f.readlines() ; f.close()
  1079. except: Error(_("Cannot read file:")+"\n %s"%file)
  1080. if remove_linebreaks:
  1081. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  1082. Message(_("Readed file (%d lines): %s")%(len(data),file),2)
  1083. return data
  1084. def Savefile(file, contents):
  1085. try: f = open(file, 'wb')
  1086. except: Error(_("Cannot open file for writing:")+"\n %s"%file)
  1087. if type(contents) == type([]): doit = f.writelines
  1088. else: doit = f.write
  1089. doit(contents) ; f.close()
  1090. def showdic(dic):
  1091. for k in dic.keys(): print "%15s : %s" % (k,dic[k])
  1092. def dotted_spaces(txt=''):
  1093. return string.replace(txt,' ','.')
  1094. def get_rc_path():
  1095. rc_file = RC
  1096. # try to get rc dir name (usually $HOME on win and linux)
  1097. rc_dir = os.environ.get('HOME')
  1098. if rc_dir:
  1099. # compose path and return it if the file exists
  1100. rc_path = os.path.join(rc_dir, rc_file)
  1101. if os.path.isfile(rc_path):
  1102. return rc_path
  1103. return ''
  1104. ##############################################################################
  1105. class CommandLine:
  1106. """Command Line class - Masters command line
  1107. This class checks and extract data from the provided command line.
  1108. The --long options and flags are taken from the global OPTIONS,
  1109. FLAGS and ACTIONS dictionaries. The short options are registered
  1110. here, and also their equivalence to the long ones.
  1111. METHODS:
  1112. _compose_short_opts() -> str
  1113. _compose_long_opts() -> list
  1114. Compose the valid short and long options list, on the
  1115. 'getopt' format.
  1116. parse() -> (opts, args)
  1117. Call getopt to check and parse the command line.
  1118. It expects to receive the command line as a list, and
  1119. without the program name (sys.argv[1:]).
  1120. get_raw_config() -> [RAW config]
  1121. Scans command line and convert the data to the RAW config
  1122. format. See ConfigMaster class to the RAW format description.
  1123. Optional 'ignore' and 'filter' arguments are used to filter
  1124. in or out specified keys.
  1125. compose_cmdline(dict) -> [Command line]
  1126. Compose a command line list from an already parsed config
  1127. dictionary, generated from RAW by ConfigMaster(). Use
  1128. this to compose an optimal command line for a group of
  1129. options.
  1130. The get_raw_config() calls parse(), so the tipical use of this
  1131. class is:
  1132. raw = CommandLine().get_raw_config(sys.argv[1:])
  1133. """
  1134. def __init__(self):
  1135. self.all_options = OPTIONS.keys()
  1136. self.all_flags = FLAGS.keys()
  1137. self.all_actions = ACTIONS.keys()
  1138. # short:long options equivalence
  1139. self.short_long = {
  1140. 'h':'help' , 'V':'version',
  1141. 'n':'enum-title', 'i':'infile' ,
  1142. 'H':'no-headers', 'o':'outfile',
  1143. 'v':'verbose' , 't':'target'
  1144. }
  1145. # compose valid short and long options data for getopt
  1146. self.short_opts = self._compose_short_opts()
  1147. self.long_opts = self._compose_long_opts()
  1148. def _compose_short_opts(self):
  1149. "Returns a string like 'hVt:o' with all short options/flags"
  1150. ret = []
  1151. for opt in self.short_long.keys():
  1152. long = self.short_long[opt]
  1153. if long in self.all_options: # is flag or option?
  1154. opt = opt+':' # option: have param
  1155. ret.append(opt)
  1156. Debug('Valid SHORT options: %s'%ret)
  1157. return string.join(ret, '')
  1158. def _compose_long_opts(self):
  1159. "Returns a list with all the valid long options/flags"
  1160. ret = map(lambda x:x+'=', self.all_options) # add =
  1161. ret.extend(self.all_flags) # flag ON
  1162. ret.extend(self.all_actions) # acts
  1163. ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
  1164. ret.extend(['no-style']) # turn OFF option
  1165. ret.extend(['no-encoding']) # turn OFF option
  1166. ret.extend(['no-outfile']) # turn OFF option
  1167. Debug('Valid LONG options: %s'%ret)
  1168. return ret
  1169. def _tokenize(self, cmd_string=''):
  1170. "Convert a command line string to a list"
  1171. #TODO protect quotes contents
  1172. return string.split(cmd_string)
  1173. def parse(self, cmdline=[]):
  1174. "Check/Parse a command line list TIP: no program name!"
  1175. # get the valid options
  1176. short, long = self.short_opts, self.long_opts
  1177. # parse it!
  1178. try:
  1179. opts, args = getopt.getopt(cmdline, short, long)
  1180. except getopt.error, errmsg:
  1181. Error(_("%s (try --help)")%errmsg)
  1182. return (opts, args)
  1183. def get_raw_config(self, cmdline=[], ignore=[], filter=[]):
  1184. "Returns the options/arguments found as RAW config"
  1185. if not cmdline: return []
  1186. ret = []
  1187. # we need lists, not strings
  1188. if type(cmdline) == type(''): cmdline = self._tokenize(cmdline)
  1189. Debug("cmdline: %s"%cmdline)
  1190. opts, args = self.parse(cmdline[:])
  1191. # get infile, if any
  1192. while args:
  1193. infile = args.pop(0)
  1194. ret.append(['infile', infile])
  1195. # parse all options
  1196. for name,value in opts:
  1197. # remove leading - and --
  1198. name = re.sub('^--?', '', name)
  1199. # translate short opt to long
  1200. if len(name) == 1: name = self.short_long.get(name)
  1201. # save it (if allowed)
  1202. ret.append([name, value])
  1203. # apply 'ignore' and 'filter' rules (filter is stronger)
  1204. temp = ret[:] ; ret = []
  1205. for name,value in temp:
  1206. if (not filter and not ignore) or \
  1207. (filter and name in filter) or \
  1208. (ignore and name not in ignore):
  1209. ret.append( ['all', name, value] )
  1210. # add the original command line string as 'realcmdline'
  1211. ret.append( ['all', 'realcmdline', cmdline] )
  1212. return ret
  1213. def compose_cmdline(self, conf={}, no_check=0):
  1214. "compose a full (and diet) command line from CONF dict"
  1215. if not conf: return []
  1216. args = []
  1217. dft_options = OPTIONS.copy()
  1218. cfg = conf.copy()
  1219. valid_opts = self.all_options + self.all_flags
  1220. use_short = {'no-headers':'H', 'enum-title':'n'}
  1221. # remove useless options
  1222. if not no_check and cfg.get('toc-only'):
  1223. if cfg.has_key('no-headers'):
  1224. del cfg['no-headers']
  1225. if cfg.has_key('outfile'):
  1226. del cfg['outfile'] # de

Large files files are truncated, but you can click here to view the full file