/old/txt2tags-2.1.py
Python | 4229 lines | 3805 code | 158 blank | 266 comment | 160 complexity | 4e16129e62cf5a83c262b3a3d8a3d1c0 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env python
- # txt2tags - generic text conversion tool
- # http://txt2tags.sf.net
- #
- # Copyright 2001, 2002, 2003, 2004 Aurelio Marinho Jargas
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, version 2.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You have received a copy of the GNU General Public License along
- # with this program, on the COPYING file.
- #
- #
- #
- # +-------------------------------------------------------------+
- # | IMPORTANT MESSAGES, PLEASE READ |
- # +-------------------------------------------------------------+
- # | |
- # | |
- # | v1.x COMPATIBILITY |
- # | ------------------ |
- # | |
- # | Due the major syntax changes, the new 2.x series |
- # | BREAKS backwards compatibility. |
- # | |
- # | Use the 't2tconv' script to upgrade your existing |
- # | v1.x files to conform the new v2.x syntax. |
- # | |
- # | Do a visual inspection on the new converted file. |
- # | Specially Pre & Post proc filters can break. |
- # | Check them! |
- # | |
- # | |
- # +-------------------------------------------------------------+
- #
- #
- ########################################################################
- #
- # BORING CODE EXPLANATION AHEAD
- #
- # Just read if you wish to understand how the txt2tags code works
- #
- ########################################################################
- #
- # Version 2.0 was a complete rewrite for the program 'core'.
- #
- # Now the code that [1] parses the marked text is separated from the
- # code that [2] insert the target tags.
- #
- # [1] made by: def convert()
- # [2] made by: class BlockMaster
- #
- # The structures of the marked text are identifyed and its contents are
- # extracted into a data holder (Python lists and dictionaries).
- #
- # When parsing the source file, the blocks (para, lists, quote, table)
- # are opened with BlockMaster, right when found. Then its contents,
- # which spans on several lines, are feeded into a special holder on the
- # BlockMaster instance. Just when the block is closed, the target tags
- # are inserted for the full block as a whole, in one pass. This way, we
- # have a better control on blocks. Much better than the previous line by
- # line approach.
- #
- # In other words, whenever inside a block, the parser *holds* the tag
- # insertion process, waiting until the full block is readed. That was
- # needed primary to close paragraphs for the new XHTML target, but
- # proved to be a very good adding, improving many other processings.
- #
- # -------------------------------------------------------------------
- #
- # There is also a brand new code for the Configuration schema, 100%
- # rewritten. There are new classes, all self documented: CommandLine,
- # SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
- # Config format was created, and all kind of configuration is first
- # converted to this format, and then a generic method parses it.
- #
- # The init processing was changed also, and now the functions which
- # gets informations about the input files are: get_infiles_config(),
- # process_source_file() and convert_this_files()
- #
- # Other parts are untouched, and remains the same as in v1.7, as the
- # marks regexes, target Headers and target Tags&Rules.
- #
- ########################################################################
- # Now I think the code is nice, easier to read and understand
- #XXX Python coding warning
- # Avoid common mistakes:
- # - do NOT use newlist=list instead newlist=list[:]
- # - do NOT use newdic=dic instead newdic=dic.copy()
- # - do NOT use dic[key] instead dic.get(key)
- # - do NOT use del dic[key] without has_key() before
- #XXX Smart Image Align don't work if the image is a link
- # Can't fix that because the image is expanded together with the
- # link, at the linkbank filling moment. Only the image is passed
- # to parse_images(), not the full line, so it is always 'middle'.
- #XXX Paragraph separation not valid inside Quote
- # Quote will not have <p></p> inside, instead will close and open
- # again the <blockquote>. This really sux in CSS, when defining a
- # diferent background color. Still don't know how to fix it.
- #XXX TODO (maybe)
- # New mark or macro which expands to an anchor full title.
- # It is necessary to parse the full document in this order:
- # DONE 1st scan: HEAD: get all settings, including %!includeconf
- # DONE 2nd scan: BODY: expand includes & apply %!preproc
- # 3rd scan: BODY: read titles and compose TOC info
- # 4th scan: BODY: full parsing, expanding [#anchor] 1st
- # Steps 2 and 3 can be made together, with no tag adding.
- # Two complete body scans will be *slow*, don't know if it worths.
- ##############################################################################
- # User config (1=ON, 0=OFF)
- USE_I18N = 1 # use gettext for i18ned messages? (default is 1)
- COLOR_DEBUG = 1 # show debug messages in colors? (default is 1)
- HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0)
- ##############################################################################
- # these are all the core Python modules used by txt2tags (KISS!)
- import re, string, os, sys, time, getopt
- # program information
- my_url = 'http://txt2tags.sf.net'
- my_name = 'txt2tags'
- my_email = 'verde@aurelio.net'
- my_version = '2.1'
- # i18n - just use if available
- if USE_I18N:
- try:
- import gettext
- # if your locale dir is different, change it here
- cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
- _ = cat.gettext
- except:
- _ = lambda x:x
- else:
- _ = lambda x:x
- # FLAGS : the conversion related flags , may be used in %!options
- # OPTIONS : the conversion related options, may be used in %!options
- # ACTIONS : the other behaviour modifiers, valid on command line only
- # MACROS : the valid macros with their default values for formatting
- # SETTINGS: global miscelaneous settings, valid on RC file only
- # CONFIG_KEYWORDS: the valid %!key:val keywords
- #
- # FLAGS and OPTIONS are configs that affect the converted document.
- # They usually have also a --no-<option> to turn them OFF.
- # ACTIONS are needed because when doing multiple input files, strange
- # behaviour would be found, as use command line interface for the
- # first file and gui for the second. There is no --no-<action>.
- # --version and --help inside %!options are also odd
- #
- TARGETS = ['html', 'xhtml', 'sgml', 'tex', 'man', 'mgp', 'moin', 'pm6', 'txt']
- FLAGS = {'headers' :1 , 'enum-title' :0 , 'mask-email' :0 ,
- 'toc-only' :0 , 'toc' :0 , 'rc' :1 ,
- 'css-sugar' :0 , 'css-suggar' :0 , 'quiet' :0 }
- OPTIONS = {'target' :'', 'toc-level' :3 , 'style' :'',
- 'infile' :'', 'outfile' :'', 'encoding' :'',
- 'split' :0 , 'lang' :''}
- ACTIONS = {'help' :0 , 'version' :0 , 'gui' :0 ,
- 'verbose' :0 , 'debug' :0 , 'dump-config':0 }
- MACROS = {'date' : '%Y%m%d', 'infile': '%f',
- 'mtime': '%Y%m%d', 'outfile': '%f'}
- SETTINGS = {} # for future use
- CONFIG_KEYWORDS = [
- 'target', 'encoding', 'style', 'options', 'preproc','postproc',
- 'guicolors']
- TARGET_NAMES = {
- 'html' : _('HTML page'),
- 'xhtml': _('XHTML page'),
- 'sgml' : _('SGML document'),
- 'tex' : _('LaTeX document'),
- 'man' : _('UNIX Manual page'),
- 'mgp' : _('Magic Point presentation'),
- 'moin' : _('MoinMoin page'),
- 'pm6' : _('PageMaker 6.0 document'),
- 'txt' : _('Plain Text'),
- }
- DEBUG = 0 # do not edit here, please use --debug
- VERBOSE = 0 # do not edit here, please use -v, -vv or -vvv
- QUIET = 0 # do not edit here, please use --quiet
- GUI = 0
- AUTOTOC = 1
- RC_RAW = []
- CMDLINE_RAW = []
- CONF = {}
- BLOCK = None
- regex = {}
- TAGS = {}
- rules = {}
- lang = 'english'
- TARGET = ''
- STDIN = STDOUT = '-'
- ESCCHAR = '\x00'
- SEPARATOR = '\x01'
- LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
- LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
- RCFILE = {'default':'.txt2tagsrc', 'win':'_t2trc'}
- # plataform specific settings
- LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
- RC = RCFILE.get(sys.platform[:3]) or RCFILE['default']
- # identify a development version
- #dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time()))
- #my_version = my_version + dev_suffix
- VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)
- USAGE = string.join([
- '',
- _("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
- '',
- _(" -t, --target set target document type. currently supported:"),
- ' %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
- _(" -i, --infile=FILE set FILE as the input file name ('-' for STDIN)"),
- _(" -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)"),
- _(" -n, --enum-title enumerate all title lines as 1, 1.1, 1.1.1, etc"),
- _(" -H, --no-headers suppress header, title and footer contents"),
- _(" --headers show header, title and footer contents (default ON)"),
- _(" --encoding set target file encoding (utf-8, iso-8859-1, etc)"),
- _(" --style=FILE use FILE as the document style (like HTML CSS)"),
- _(" --css-sugar insert CSS-friendly tags for HTML and XHTML targets"),
- _(" --mask-email hide email from spam robots. x@y.z turns <x (a) y z>"),
- _(" --toc add TOC (Table of Contents) to target document"),
- _(" --toc-only print document TOC and exit"),
- _(" --toc-level=N set maximum TOC level (depth) to N"),
- _(" --rc read user config file ~/.txt2tagsrc (default ON)"),
- _(" --gui invoke Graphical Tk Interface"),
- _(" -q, --quiet quiet mode, suppress all output (except errors)"),
- _(" -v, --verbose print informative messages during conversion"),
- _(" -h, --help print this help information and exit"),
- _(" -V, --version print program version and exit"),
- _(" --dump-config print all the config found and exit"),
- '',
- _("Turn OFF options:"),
- " --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
- " --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
- " --no-css-sugar, --no-quiet",
- '',
- _("Example:\n %s -t html --toc myfile.t2t") % my_name,
- '',
- _("By default, converted output is saved to 'infile.<target>'."),
- _("Use --outfile to force an output file name."),
- _("If input file is '-', reads from STDIN."),
- _("If output file is '-', dumps output to STDOUT."),
- ''
- ], '\n')
- ##############################################################################
- # here is all the target's templates
- # you may edit them to fit your needs
- # - the %(HEADERn)s strings represent the Header lines
- # - the %(STYLE)s string is changed by --style contents
- # - the %(ENCODING)s string is changed by --encoding contents
- # - if any of the above is empty, the full line is removed
- # - use %% to represent a literal %
- #
- HEADER_TEMPLATE = {
- 'txt': """\
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'sgml': """\
- <!doctype linuxdoc system>
- <article>
- <title>%(HEADER1)s
- <author>%(HEADER2)s
- <date>%(HEADER3)s
- """,
- 'html': """\
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
- <HTML>
- <HEAD>
- <META NAME="generator" CONTENT="http://txt2tags.sf.net">
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
- <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
- <TITLE>%(HEADER1)s</TITLE>
- </HEAD><BODY BGCOLOR="white" TEXT="black">
- <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
- <FONT SIZE="4">
- <I>%(HEADER2)s</I><BR>
- %(HEADER3)s
- </FONT></CENTER>
- """,
- 'htmlcss': """\
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
- <HTML>
- <HEAD>
- <META NAME="generator" CONTENT="http://txt2tags.sf.net">
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
- <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
- <TITLE>%(HEADER1)s</TITLE>
- </HEAD>
- <BODY>
- <DIV CLASS="header" ID="header">
- <H1>%(HEADER1)s</H1>
- <H2>%(HEADER2)s</H2>
- <H3>%(HEADER3)s</H3>
- </DIV>
- """,
- 'xhtml': """\
- <?xml version="1.0"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <title>%(HEADER1)s</title>
- <meta name="generator" content="http://txt2tags.sf.net" />
- <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
- <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
- </head>
- <body bgcolor="white" text="black">
- <div align="center">
- <h1>%(HEADER1)s</h1>
- <h2>%(HEADER2)s</h2>
- <h3>%(HEADER3)s</h3>
- </div>
- """,
- 'xhtmlcss': """\
- <?xml version="1.0"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <title>%(HEADER1)s</title>
- <meta name="generator" content="http://txt2tags.sf.net" />
- <meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
- <link rel="stylesheet" type="text/css" href="%(STYLE)s" />
- </head>
- <body>
- <div class="header" id="header">
- <h1>%(HEADER1)s</h1>
- <h2>%(HEADER2)s</h2>
- <h3>%(HEADER3)s</h3>
- </div>
- """,
- 'man': """\
- .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
- """,
- # TODO style to <HR>
- 'pm6': """\
- <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
- ><@Normal=
- <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
- <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
- <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
- <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
- <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
- <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
- <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
- ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
- <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
- ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
- <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
- ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
- <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
- ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
- ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
- ><@Title4=<@-PARENT "Title3">
- ><@Title5=<@-PARENT "Title3">
- ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'mgp': """\
- #!/usr/X11R6/bin/mgp -t 90
- %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
- %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
- %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
- %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
- %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
- %%default 1 size 5
- %%default 2 size 8, fore "yellow", font "normal-b", center
- %%default 3 size 5, fore "white", font "normal", left, prefix " "
- %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
- %%tab 2 prefix " ", icon arc "orange" 40, leftfill
- %%tab 3 prefix " ", icon arc "brown" 40, leftfill
- %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
- %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
- %%%%------------------------- end of headers -----------------------------
- %%page
- %%size 10, center, fore "yellow"
- %(HEADER1)s
- %%font "normal-i", size 6, fore "white", center
- %(HEADER2)s
- %%font "mono", size 7, center
- %(HEADER3)s
- """,
- # TODO please, improve me!
- 'moin': """\
- '''%(HEADER1)s'''
- ''%(HEADER2)s''
- %(HEADER3)s
- """,
- 'tex': \
- r"""\documentclass[11pt,a4paper]{article}
- \usepackage{amsfonts,graphicx,url}
- \usepackage[%(ENCODING)s]{inputenc} %% char encoding
- \usepackage{%(STYLE)s} %% user defined package
- \pagestyle{plain} %% do page numbering ('empty' turns off)
- \frenchspacing %% no aditional spaces after periods
- \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
- %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
- %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
- \title{%(HEADER1)s}
- \author{%(HEADER2)s}
- \begin{document}
- \date{%(HEADER3)s}
- \maketitle
- \clearpage
- """
- }
- ##############################################################################
- def getTags(config):
- "Returns all the known tags for the specified target"
-
- keys = [
- 'paragraphOpen','paragraphClose',
- 'title1','title2','title3','title4','title5',
- 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
- 'blockVerbOpen','blockVerbClose',
- 'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
- 'fontMonoOpen','fontMonoClose',
- 'fontBoldOpen','fontBoldClose',
- 'fontItalicOpen','fontItalicClose',
- 'fontUnderlineOpen','fontUnderlineClose',
- 'listOpen','listClose',
- 'listItemOpen','listItemClose','listItemLine',
- 'numlistOpen','numlistClose',
- 'numlistItemOpen','numlistItemClose','numlistItemLine',
- 'deflistOpen','deflistClose',
- 'deflistItem1Open','deflistItem1Close',
- 'deflistItem2Open','deflistItem2Close',
- 'bar1','bar2',
- 'url','urlMark','email','emailMark',
- 'img',
- 'tableOpen','tableClose',
- 'tableRowOpen','tableRowClose','tableRowSep',
- 'tableCellOpen','tableCellClose','tableCellSep',
- 'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
- 'tableTitleRowOpen','tableTitleRowClose',
- 'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
- 'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
- 'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
- 'tableColAlignSep',
- 'anchor','comment','pageBreak',
- 'TOC','tocOpen','tocClose',
- 'bodyOpen','bodyClose',
- 'EOD'
- ]
-
- alltags = {
-
- 'txt': {
- 'title1' : ' \a' ,
- 'title2' : '\t\a' ,
- 'title3' : '\t\t\a' ,
- 'title4' : '\t\t\t\a' ,
- 'title5' : '\t\t\t\t\a',
- 'blockQuoteLine' : '\t' ,
- 'listItemOpen' : '- ' ,
- 'numlistItemOpen' : '\a. ' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)' ,
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)' ,
- 'img' : '[\a]' ,
- },
-
- 'html': {
- 'paragraphOpen' : '<P>' ,
- 'paragraphClose' : '</P>' ,
- 'title1' : '~A~<H1>\a</H1>' ,
- 'title2' : '~A~<H2>\a</H2>' ,
- 'title3' : '~A~<H3>\a</H3>' ,
- 'title4' : '~A~<H4>\a</H4>' ,
- 'title5' : '~A~<H5>\a</H5>' ,
- 'blockVerbOpen' : '<PRE>' ,
- 'blockVerbClose' : '</PRE>' ,
- 'blockQuoteOpen' : '<BLOCKQUOTE>' ,
- 'blockQuoteClose' : '</BLOCKQUOTE>' ,
- 'fontMonoOpen' : '<CODE>' ,
- 'fontMonoClose' : '</CODE>' ,
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '</B>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '</I>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '</U>' ,
- 'listOpen' : '<UL>' ,
- 'listClose' : '</UL>' ,
- 'listItemOpen' : '<LI>' ,
- 'numlistOpen' : '<OL>' ,
- 'numlistClose' : '</OL>' ,
- 'numlistItemOpen' : '<LI>' ,
- 'deflistOpen' : '<DL>' ,
- 'deflistClose' : '</DL>' ,
- 'deflistItem1Open' : '<DT>' ,
- 'deflistItem1Close' : '</DT>' ,
- 'deflistItem2Open' : '<DD>' ,
- 'bar1' : '<HR NOSHADE SIZE=1>' ,
- 'bar2' : '<HR NOSHADE SIZE=5>' ,
- 'url' : '<A HREF="\a">\a</A>' ,
- 'urlMark' : '<A HREF="\a">\a</A>' ,
- 'email' : '<A HREF="mailto:\a">\a</A>' ,
- 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
- 'img' :'<IMG ALIGN="~A~" SRC="\a" BORDER="0" ALT="">',
- 'tableOpen' : '<TABLE~A~ CELLPADDING="4"~B~>',
- 'tableClose' : '</TABLE>' ,
- 'tableRowOpen' : '<TR>' ,
- 'tableRowClose' : '</TR>' ,
- 'tableCellOpen' : '<TD\a>' ,
- 'tableCellClose' : '</TD>' ,
- 'tableTitleCellOpen' : '<TH>' ,
- 'tableTitleCellClose' : '</TH>' ,
- 'tableBorder' : ' BORDER="1"' ,
- 'tableAlignCenter' : ' ALIGN="center"',
- 'tableCellAlignRight' : ' ALIGN="right"' ,
- 'tableCellAlignCenter': ' ALIGN="center"',
- 'anchor' : '<A NAME="\a"></A>\n',
- 'comment' : '<!-- \a -->' ,
- 'EOD' : '</BODY></HTML>'
- },
-
- #TIP xhtml inherits all HTML definitions (lowercased)
- #TIP http://www.w3.org/TR/xhtml1/#guidelines
- #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
- 'xhtml': {
- 'listItemClose' : '</li>' ,
- 'numlistItemClose' : '</li>' ,
- 'deflistItem2Close' : '</dd>' ,
- 'bar1' : '<hr class="light" />',
- 'bar2' : '<hr class="heavy" />',
- 'anchor' : '<a id="\a" name="\a"></a>\n',
- 'img' :'<img align="~A~" src="\a" border="0" alt=""/>',
- },
-
- 'sgml': {
- 'paragraphOpen' : '<p>' ,
- 'title1' : '<sect>\a~A~<p>' ,
- 'title2' : '<sect1>\a~A~<p>' ,
- 'title3' : '<sect2>\a~A~<p>' ,
- 'title4' : '<sect3>\a~A~<p>' ,
- 'title5' : '<sect4>\a~A~<p>' ,
- 'blockVerbOpen' : '<tscreen><verb>' ,
- 'blockVerbClose' : '</verb></tscreen>' ,
- 'blockQuoteOpen' : '<quote>' ,
- 'blockQuoteClose' : '</quote>' ,
- 'fontMonoOpen' : '<tt>' ,
- 'fontMonoClose' : '</tt>' ,
- 'fontBoldOpen' : '<bf>' ,
- 'fontBoldClose' : '</bf>' ,
- 'fontItalicOpen' : '<em>' ,
- 'fontItalicClose' : '</em>' ,
- 'fontUnderlineOpen' : '<bf><em>' ,
- 'fontUnderlineClose' : '</em></bf>' ,
- 'listOpen' : '<itemize>' ,
- 'listClose' : '</itemize>' ,
- 'listItemOpen' : '<item>' ,
- 'numlistOpen' : '<enum>' ,
- 'numlistClose' : '</enum>' ,
- 'numlistItemOpen' : '<item>' ,
- 'deflistOpen' : '<descrip>' ,
- 'deflistClose' : '</descrip>' ,
- 'deflistItem1Open' : '<tag>' ,
- 'deflistItem1Close' : '</tag>' ,
- 'bar1' : '<!-- \a -->' ,
- 'bar2' : '<!-- \a -->' ,
- 'url' : '<htmlurl url="\a" name="\a">' ,
- 'urlMark' : '<htmlurl url="\a" name="\a">' ,
- 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'img' : '<figure><ph vspace=""><img src="\a">'+\
- '</figure>' ,
- 'tableOpen' : '<table><tabular ca="~C~">' ,
- 'tableClose' : '</tabular></table>' ,
- 'tableRowSep' : '<rowsep>' ,
- 'tableCellSep' : '<colsep>' ,
- 'tableColAlignLeft' : 'l' ,
- 'tableColAlignRight' : 'r' ,
- 'tableColAlignCenter' : 'c' ,
- 'comment' : '<!-- \a -->' ,
- 'anchor' : '<label id="\a">' ,
- 'TOC' : '<toc>' ,
- 'EOD' : '</article>'
- },
-
- 'tex': {
- 'title1' : '\n\section*{\a}',
- 'title2' : '\\subsection*{\a}' ,
- 'title3' : '\\subsubsection*{\a}' ,
- # title 4/5: DIRTY: para+BF+\\+\n
- 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
- 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
- 'numtitle1' : '\n\section{\a}',
- 'numtitle2' : '\\subsection{\a}' ,
- 'numtitle3' : '\\subsubsection{\a}' ,
- 'blockVerbOpen' : '\\begin{verbatim}' ,
- 'blockVerbClose' : '\\end{verbatim}' ,
- 'blockQuoteOpen' : '\\begin{quotation}' ,
- 'blockQuoteClose' : '\\end{quotation}' ,
- 'fontMonoOpen' : '\\texttt{' ,
- 'fontMonoClose' : '}' ,
- 'fontBoldOpen' : '\\textbf{' ,
- 'fontBoldClose' : '}' ,
- 'fontItalicOpen' : '\\textit{' ,
- 'fontItalicClose' : '}' ,
- 'fontUnderlineOpen' : '\\underline{' ,
- 'fontUnderlineClose' : '}' ,
- 'listOpen' : '\\begin{itemize}' ,
- 'listClose' : '\\end{itemize}' ,
- 'listItemOpen' : '\\item ' ,
- 'numlistOpen' : '\\begin{enumerate}' ,
- 'numlistClose' : '\\end{enumerate}' ,
- 'numlistItemOpen' : '\\item ' ,
- 'deflistOpen' : '\\begin{description}',
- 'deflistClose' : '\\end{description}' ,
- 'deflistItem1Open' : '\\item[' ,
- 'deflistItem1Close' : ']' ,
- 'bar1' : '\n\\hrulefill{}\n' ,
- 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
- 'url' : '\\url{\a}' ,
- 'urlMark' : '\\textit{\a} (\\url{\a})' ,
- 'email' : '\\url{\a}' ,
- 'emailMark' : '\\textit{\a} (\\url{\a})' ,
- 'img' : '\\includegraphics{\a}',
- 'tableOpen' : '\\begin{center}\\begin{tabular}{|~C~|}',
- 'tableClose' : '\\end{tabular}\\end{center}',
- 'tableRowOpen' : '\\hline ' ,
- 'tableRowClose' : ' \\\\' ,
- 'tableCellSep' : ' & ' ,
- 'tableColAlignLeft' : 'l' ,
- 'tableColAlignRight' : 'r' ,
- 'tableColAlignCenter' : 'c' ,
- 'tableColAlignSep' : '|' ,
- 'comment' : '% \a' ,
- 'TOC' : '\\tableofcontents',
- 'pageBreak' : '\\clearpage',
- 'EOD' : '\\end{document}'
- },
-
- 'moin': {
- 'title1' : '= \a =' ,
- 'title2' : '== \a ==' ,
- 'title3' : '=== \a ===' ,
- 'title4' : '==== \a ====' ,
- 'title5' : '===== \a =====',
- 'blockVerbOpen' : '{{{' ,
- 'blockVerbClose' : '}}}' ,
- 'blockQuoteLine' : ' ' ,
- 'fontMonoOpen' : '{{{' ,
- 'fontMonoClose' : '}}}' ,
- 'fontBoldOpen' : "'''" ,
- 'fontBoldClose' : "'''" ,
- 'fontItalicOpen' : "''" ,
- 'fontItalicClose' : "''" ,
- 'fontUnderlineOpen' : "__" ,
- 'fontUnderlineClose' : "__" ,
- 'listItemOpen' : ' * ' ,
- 'numlistItemOpen' : ' \a. ' ,
- 'bar1' : '----' ,
- 'bar2' : '----' ,
- 'url' : '[\a]' ,
- 'urlMark' : '[\a \a]' ,
- 'email' : '[\a]' ,
- 'emailMark' : '[\a \a]' ,
- 'img' : '[\a]' ,
- 'tableRowOpen' : '||' ,
- 'tableCellOpen' : '\a' ,
- 'tableCellClose' : '||' ,
- 'tableTitleCellClose' : '||' ,
- 'tableCellAlignRight' : '<)>' ,
- 'tableCellAlignCenter': '<:>' ,
- 'comment' : '## \a' ,
- 'TOC' : '[[TableOfContents]]'
- },
-
- 'mgp': {
- 'paragraphOpen' : '%font "normal", size 5' ,
- 'title1' : '%page\n\n\a\n' ,
- 'title2' : '%page\n\n\a\n' ,
- 'title3' : '%page\n\n\a\n' ,
- 'title4' : '%page\n\n\a\n' ,
- 'title5' : '%page\n\n\a\n' ,
- 'blockVerbOpen' : '%font "mono"' ,
- 'blockVerbClose' : '%font "normal"' ,
- 'blockQuoteOpen' : '%prefix " "' ,
- 'blockQuoteClose' : '%prefix " "' ,
- 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
- 'fontMonoClose' : '\n%cont, font "normal"\n' ,
- 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
- 'fontBoldClose' : '\n%cont, font "normal"\n' ,
- 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
- 'fontItalicClose' : '\n%cont, font "normal"\n' ,
- 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
- 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
- 'listItemLine' : '\t' ,
- 'numlistItemLine' : '\t' ,
- 'deflistItem1Open' : '\t\n%cont, font "normal-b"\n',
- 'deflistItem1Close' : '\n%cont, font "normal"\n' ,
- 'bar1' : '%bar "white" 5' ,
- 'bar2' : '%pause' ,
- 'url' : '\n%cont, fore "cyan"\n\a' +\
- '\n%cont, fore "white"\n' ,
- 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
- '\n%cont, fore "white"\n' ,
- 'email' : '\n%cont, fore "cyan"\n\a' +\
- '\n%cont, fore "white"\n' ,
- 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
- '\n%cont, fore "white"\n' ,
- 'img' : '\n%~A~\n%newimage "\a"\n%left\n',
- 'comment' : '%% \a' ,
- 'pageBreak' : '%page\n\n\n' ,
- 'EOD' : '%%EOD'
- },
-
- # man groff_man ; man 7 groff
- 'man': {
- 'paragraphOpen' : '.P' ,
- 'title1' : '.SH \a' ,
- 'title2' : '.SS \a' ,
- 'title3' : '.SS \a' ,
- 'title4' : '.SS \a' ,
- 'title5' : '.SS \a' ,
- 'blockVerbOpen' : '.nf' ,
- 'blockVerbClose' : '.fi\n' ,
- 'blockQuoteOpen' : '.RS' ,
- 'blockQuoteClose' : '.RE' ,
- 'fontBoldOpen' : '\\fB' ,
- 'fontBoldClose' : '\\fR' ,
- 'fontItalicOpen' : '\\fI' ,
- 'fontItalicClose' : '\\fR' ,
- 'listOpen' : '.RS' ,
- 'listItemOpen' : '.IP \(bu 3\n',
- 'listClose' : '.RE' ,
- 'numlistOpen' : '.RS' ,
- 'numlistItemOpen' : '.IP \a. 3\n',
- 'numlistClose' : '.RE' ,
- 'deflistItem1Open' : '.TP\n' ,
- 'bar1' : '\n\n' ,
- 'bar2' : '\n\n' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)',
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)',
- 'img' : '\a' ,
- 'tableOpen' : '.TS\n~A~~B~tab(^); ~C~.',
- 'tableClose' : '.TE' ,
- 'tableRowOpen' : ' ' ,
- 'tableCellSep' : '^' ,
- 'tableAlignCenter' : 'center, ',
- 'tableBorder' : 'allbox, ',
- 'tableColAlignLeft' : 'l' ,
- 'tableColAlignRight' : 'r' ,
- 'tableColAlignCenter' : 'c' ,
- 'comment' : '.\\" \a'
- },
-
- 'pm6': {
- 'paragraphOpen' : '<@Normal:>' ,
- 'title1' : '\n<@Title1:>\a',
- 'title2' : '\n<@Title2:>\a',
- 'title3' : '\n<@Title3:>\a',
- 'title4' : '\n<@Title4:>\a',
- 'title5' : '\n<@Title5:>\a',
- 'blockVerbOpen' : '<@PreFormat:>' ,
- 'blockQuoteLine' : '<@Quote:>' ,
- 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
- 'fontMonoClose' : '<SIZE$><FONT$>',
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '<P>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '<P>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '<P>' ,
- 'listOpen' : '<@Bullet:>' ,
- 'listItemOpen' : '\x95\t' , # \x95 == ~U
- 'numlistOpen' : '<@Bullet:>' ,
- 'numlistItemOpen' : '\x95\t' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '<U>\a<P>' , # underline
- 'urlMark' : '\a <U>\a<P>' ,
- 'email' : '\a' ,
- 'emailMark' : '\a \a' ,
- 'img' : '\a'
- }
- }
-
- # exceptions for --css-sugar
- if config['css-sugar'] and config['target'] in ('html','xhtml'):
- # change just HTML because XHTML inherits it
- htmltags = alltags['html']
- # table with no cellpadding
- htmltags['tableOpen'] = string.replace(
- htmltags['tableOpen'], ' CELLPADDING="4"', '')
- # DIVs
- htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
- htmltags['tocClose'] = '</DIV>'
- htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
- htmltags['bodyClose']= '</DIV>'
-
- # make the HTML -> XHTML inheritance
- xhtml = alltags['html'].copy()
- for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
- # some like HTML tags as lowercase, some don't... (headers out)
- if HTML_LOWER: alltags['html'] = xhtml.copy()
- xhtml.update(alltags['xhtml'])
- alltags['xhtml'] = xhtml.copy()
-
- # compose the target tags dictionary
- tags = {}
- target_tags = alltags[config['target']].copy()
-
- for key in keys: tags[key] = '' # create empty keys
- for key in target_tags.keys():
- tags[key] = maskEscapeChar(target_tags[key]) # populate
-
- return tags
- ##############################################################################
- def getRules(config):
- "Returns all the target-specific syntax rules"
-
- ret = {}
- allrules = [
-
- # target rules (ON/OFF)
- 'linkable', # target supports external links
- 'tableable', # target supports tables
- 'imglinkable', # target supports images as links
- 'imgalignable', # target supports image alignment
- 'imgasdefterm', # target supports image as definition term
- 'autonumberlist', # target supports numbered lists natively
- 'autonumbertitle', # target supports numbered titles natively
- 'parainsidelist', # lists items supports paragraph
- 'spacedlistitem', # lists support blank lines between items
- 'listnotnested', # lists cannot be nested
- 'quotenotnested', # quotes cannot be nested
- 'verbblocknotescaped', # don't escape specials in verb block
- 'verbblockfinalescape', # do final escapes in verb block
- 'escapeurl', # escape special in link URL
- 'onelinepara', # dump paragraph as a single long line
- 'tabletitlerowinbold', # manually bold any cell on table titles
- 'tablecellstrip', # strip extra spaces from each table cell
- 'barinsidequote', # bars are allowed inside quote blocks
- 'finalescapetitle', # perform final escapes on title lines
- 'autotocnewpagebefore', # break page before automatic TOC
- 'autotocnewpageafter', # break page after automatic TOC
- 'autotocwithbars', # automatic TOC surrounded by bars
-
- # target code beautify (ON/OFF)
- 'indentverbblock', # add leading spaces to verb block lines
- 'breaktablecell', # break lines after any table cell
- 'breaktablelineopen', # break line after opening table line
- 'notbreaklistopen', # don't break line after opening a new list
- 'notbreakparaopen', # don't break line after opening a new para
- 'keepquoteindent', # don't remove the leading TABs on quotes
- 'keeplistindent', # don't remove the leading spaces on lists
- 'blankendmotherlist', # append a blank line at the mother list end
- 'blankendtable', # append a blank line at the table end
- 'blankendautotoc', # append a blank line at the auto TOC end
- 'tagnotindentable', # tags must be placed at the line begining
-
- # value settings
- 'listmaxdepth', # maximum depth for lists
- 'tablecellaligntype' # type of table cell align: cell, column
- ]
-
- rules_bank = {
- 'txt' : {
- 'indentverbblock':1,
- 'spacedlistitem':1,
- 'parainsidelist':1,
- 'keeplistindent':1,
- 'barinsidequote':1,
- 'autotocwithbars':1,
- 'blankendmotherlist':1
- },
- 'html': {
- 'indentverbblock':1,
- 'linkable':1,
- 'escapeurl':1,
- 'imglinkable':1,
- 'imgalignable':1,
- 'imgasdefterm':1,
- 'autonumberlist':1,
- 'spacedlistitem':1,
- 'parainsidelist':1,
- 'blankendmotherlist':1,
- 'tableable':1,
- 'tablecellstrip':1,
- 'blankendtable':1,
- 'breaktablecell':1,
- 'breaktablelineopen':1,
- 'keeplistindent':1,
- 'keepquoteindent':1,
- 'barinsidequote':1,
- 'autotocwithbars':1,
- 'tablecellaligntype':'cell'
- },
- #TIP xhtml inherits all HTML rules
- 'xhtml': {
- },
- 'sgml': {
- 'linkable':1,
- 'escapeurl':1,
- 'autonumberlist':1,
- 'spacedlistitem':1,
- 'blankendmotherlist':1,
- 'tableable':1,
- 'tablecellstrip':1,
- 'blankendtable':1,
- 'blankendautotoc':1,
- 'quotenotnested':1,
- 'keeplistindent':1,
- 'keepquoteindent':1,
- 'barinsidequote':1,
- 'finalescapetitle':1,
- 'tablecellaligntype':'column'
- },
- 'mgp' : {
- 'blankendmotherlist':1,
- 'tagnotindentable':1,
- 'spacedlistitem':1,
- 'imgalignable':1,
- 'autotocnewpagebefore':1,
- },
- 'tex' : {
- 'autonumberlist':1,
- 'autonumbertitle':1,
- 'spacedlistitem':1,
- 'blankendmotherlist':1,
- 'tableable':1,
- 'tablecellstrip':1,
- 'tabletitlerowinbold':1,
- 'blankendtable':1,
- 'verbblocknotescaped':1,
- 'keeplistindent':1,
- 'listmaxdepth':4,
- 'barinsidequote':1,
- 'finalescapetitle':1,
- 'autotocnewpageafter':1,
- 'tablecellaligntype':'column'
- },
- 'moin': {
- 'spacedlistitem':1,
- 'linkable':1,
- 'blankendmotherlist':1,
- 'keeplistindent':1,
- 'tableable':1,
- 'barinsidequote':1,
- 'blankendtable':1,
- 'tabletitlerowinbold':1,
- 'tablecellstrip':1,
- 'autotocwithbars':1,
- 'tablecellaligntype':'cell'
- },
- 'man' : {
- 'spacedlistitem':1,
- 'indentverbblock':1,
- 'blankendmotherlist':1,
- 'tagnotindentable':1,
- 'tableable':1,
- 'tablecellaligntype':'column',
- 'tabletitlerowinbold':1,
- 'tablecellstrip':1,
- 'blankendtable':1,
- 'keeplistindent':0,
- 'barinsidequote':1,
- 'parainsidelist':0,
- },
- 'pm6' : {
- 'keeplistindent':1,
- 'verbblockfinalescape':1,
- #TODO add support for these - maybe set a JOINNEXT char and
- # do it on addLineBreaks()
- 'notbreaklistopen':1,
- 'notbreakparaopen':1,
- 'barinsidequote':1,
- 'autotocwithbars':1,
- 'onelinepara':1,
- }
- }
-
- # exceptions for --css-sugar
- if config['css-sugar'] and config['target'] in ('html','xhtml'):
- rules_bank['html']['indentverbblock'] = 0
- rules_bank['html']['autotocwithbars'] = 0
-
- # get the target specific rules
- if config['target'] == 'xhtml':
- myrules = rules_bank['html'].copy() # inheritance
- myrules.update(rules_bank['xhtml']) # get XHTML specific
- else:
- myrules = rules_bank[config['target']].copy()
-
- # populate return dictionary
- for key in allrules: ret[key] = 0 # reset all
- ret.update(myrules) # get rules
-
- return ret
- ##############################################################################
- def getRegexes():
- "Returns all the regexes used to find the t2t marks"
-
- bank = {
- 'blockVerbOpen':
- re.compile(r'^```\s*$'),
- 'blockVerbClose':
- re.compile(r'^```\s*$'),
- 'blockRawOpen':
- re.compile(r'^"""\s*$'),
- 'blockRawClose':
- re.compile(r'^"""\s*$'),
- 'quote':
- re.compile(r'^\t+'),
- '1lineVerb':
- re.compile(r'^``` (?=.)'),
- '1lineRaw':
- re.compile(r'^""" (?=.)'),
- # mono, raw, bold, italic, underline:
- # - marks must be glued with the contents, no boundary spaces
- # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
- 'fontMono':
- re.compile( r'``([^\s](|.*?[^\s])`*)``'),
- 'raw':
- re.compile( r'""([^\s](|.*?[^\s])"*)""'),
- 'fontBold':
- re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
- 'fontItalic':
- re.compile( r'//([^\s](|.*?[^\s])/*)//'),
- 'fontUnderline':
- re.compile( r'__([^\s](|.*?[^\s])_*)__'),
- 'list':
- re.compile(r'^( *)(-) (?=[^ ])'),
- 'numlist':
- re.compile(r'^( *)(\+) (?=[^ ])'),
- 'deflist':
- re.compile(r'^( *)(:) (.*)$'),
- 'listclose':
- re.compile(r'^( *)([-+:])\s*$'),
- 'bar':
- re.compile(r'^(\s*)([_=-]{20,})\s*$'),
- 'table':
- re.compile(r'^ *\|\|? '),
- 'blankline':
- re.compile(r'^\s*$'),
- 'comment':
- re.compile(r'^%'),
-
- # auxiliar tag regexes
- '_imgAlign' : re.compile(r'~A~',re.I),
- '_tableAlign' : re.compile(r'~A~',re.I),
- '_anchor' : re.compile(r'~A~',re.I),
- '_tableBorder' : re.compile(r'~B~',re.I),
- '_tableColAlign': re.compile(r'~C~',re.I),
- }
-
- # special char to place data on TAGs contents (\a == bell)
- bank['x'] = re.compile('\a')
-
- # %%macroname [ (formatting) ]
- bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
- string.join(MACROS.keys(), '|')), re.I)
-
- # %%TOC special macro for TOC positioning
- bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
-
- # almost complicated title regexes ;)
- titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
- bank[ 'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
- bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
-
- ### complicated regexes begin here ;)
- #
- # textual descriptions on --help's style: [...] is optional, | is OR
-
-
- ### first, some auxiliar variables
- #
-
- # [image.EXT]
- patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
-
- # link things
- urlskel = {
- 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
- 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
- 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
- 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com
- 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D
- 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
- 'form' : r'A-Za-z0-9/%&=+;.,$@*_-', # .,@*_-(as is)
- 'punct' : r'.,;:!?'
- }
-
- # username [ :password ] @
- patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
-
- # [ http:// ] [ username:password@ ] domain.com [ / ]
- # [ #anchor | ?form=data ]
- retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
- urlskel['proto'],patt_url_login, urlskel['guess'],
- urlskel['chars'],urlskel['form'],urlskel['anchor'])
-
- # filename | [ filename ] #anchor
- retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
- urlskel['chars'],urlskel['chars'],urlskel['anchor'])
-
- # user@domain [ ?form=data ]
- patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
- urlskel['login'],urlskel['form'])
-
- # saving for future use
- bank['_urlskel'] = urlskel
-
- ### and now the real regexes
- #
-
- bank['email'] = re.compile(patt_email,re.I)
-
- # email | url
- bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
-
- # \[ label | imagetag url | email | filename \]
- bank['linkmark'] = re.compile(
- r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
- patt_img, retxt_url, patt_email, retxt_url_local),
- re.L+re.I)
-
- # image
- bank['img'] = re.compile(patt_img, re.L+re.I)
-
- # special things
- bank['special'] = re.compile(r'^%!\s*')
- return bank
- ### END OF regex nightmares
- ##############################################################################
- def echo(msg): # for quick debug
- print '\033[32;1m%s\033[m'%msg
- def Quit(msg, exitcode=0):
- print msg
- sys.exit(exitcode)
- def Error(msg):
- sys.stderr.write(_("%s: Error: ")%my_name + "%s\n"%msg)
- sys.stderr.flush()
- sys.exit(1)
- def ShowTraceback():
- try:
- from traceback import print_exc
- print_exc() ; print ; print
- except: pass
- def Message(msg,level):
- if level <= VERBOSE and not QUIET:
- prefix = '-'*5
- print "%s %s"%(prefix*level, msg)
- def Debug(msg,color=0,linenr=None):
- "0gray=init,1red=conf,3yellow=line,6cyan=block,2green=detail,5pink=gui"
- if QUIET or not DEBUG: return
- if COLOR_DEBUG: msg = '\033[3%s;1m%s\033[m'%(color,msg)
- if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
- print "** %s"%msg
- def Readfile(file, remove_linebreaks=0):
- if file == '-':
- try: data = sys.stdin.readlines()
- except: Error(_('You must feed me with data on STDIN!'))
- else:
- try: f = open(file); data = f.readlines() ; f.close()
- except: Error(_("Cannot read file:")+"\n %s"%file)
- if remove_linebreaks:
- data = map(lambda x:re.sub('[\n\r]+$','',x), data)
- Message(_("Readed file (%d lines): %s")%(len(data),file),2)
- return data
- def Savefile(file, contents):
- try: f = open(file, 'wb')
- except: Error(_("Cannot open file for writing:")+"\n %s"%file)
- if type(contents) == type([]): doit = f.writelines
- else: doit = f.write
- doit(contents) ; f.close()
- def showdic(dic):
- for k in dic.keys(): print "%15s : %s" % (k,dic[k])
- def dotted_spaces(txt=''):
- return string.replace(txt,' ','.')
- def get_rc_path():
- "Return the full path for the users' RC file"
- rc_file = RC
- # search the RC dir on the specified system variables
- # TIP: win: http://www.winnetmag.com/Article/ArticleID/23873/23873.html
- rc_dir_search = ['HOME', 'HOMEPATH']
- for var in rc_dir_search:
- rc_dir = os.environ.get(var)
- if rc_dir: break
- if rc_dir:
- # compose path and return it if the file exists
- rc_path = os.path.join(rc_dir, rc_file)
- # on windows, prefix with the drive (%homedrive%: 2k/XP/NT)
- if sys.platform[:3] == 'win':
- rc_drive = os.environ.get('HOMEDRIVE')
- rc_path = os.path.join(rc_drive,rc_path)
- return rc_path
- return ''
- ##############################################################################
- class CommandLine:
- """
- Command Line class - Masters command line
- This class checks and extract data from the provided command line.
- The --long options and flags are taken from the global OPTIONS,
- FLAGS and ACTIONS dictionaries. The short options are registered
- here, and also their equivalence to the long ones.
- METHODS:
- _compose_short_opts() -> str
- _compose_long_opts() -> list
- Compose the valid short and long options list, on the
- 'getopt' format.
-
- parse() -> (opts, args)
- Call getopt to check and parse the command line.
- It expects to receive the command line as a list, and
- without the program name (sys.argv[1:]).
-
- get_raw_config() -> [RAW config]
- Scans command line and convert the data to the RAW config
- format. See ConfigMaster class to the RAW format description.
- Optional 'ignore' and 'filter' arguments are used to filter
- in or out specified keys.
-
- compose_cmdline(dict) -> [Command line]
- Compose a command line list from an already parsed config
- dictionary, generated from RAW by ConfigMaster(). Use
- this to compose an optimal command line for a group of
- options.
-
- The get_raw_config() calls parse(), so the tipical use of this
- class is:
-
- raw = CommandLine().get_raw_config(sys.argv[1:])
- """
- def __init__(self):
- self.all_options = OPTIONS.keys()
- self.all_flags = FLAGS.keys()
- self.all_actions = ACTIONS.keys()
-
- # short:long options equivalence
- self.short_long = {
- 'h':'help' , 'V':'version',
- 'n':'enum-title', 'i':'infile' ,
- 'H':'no-headers', 'o':'outfile',
- 'v':'verbose' , 't':'target' ,
- 'q':'quiet'
- }
-
- # compose valid short and long options data for getopt
- self.short_opts = self._compose_short_opts()
- self.long_opts = self._compose_long_opts()
-
- def _compose_short_opts(self):
- "Returns a string like 'hVt:o' with all short options/flags"
- ret = []
- for opt in self.short_long.keys():
- long = self.short_long[opt]
- if long in self.all_options: # is flag or option?
- opt = opt+':' # option: have param
- ret.append(opt)
- Debug('Valid SHORT options: %s'%ret)
- return string.join(ret, '')
-
- def _compose_long_opts(self):
- "Returns a list with all the valid long options/flags"
- ret = map(lambda x:x+'=', self.all_options) # add =
- ret.extend(self.all_flags) # flag ON
- ret.extend(self.all_actions) # acts
- ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
- ret.extend(['no-style']) # turn…
Large files files are truncated, but you can click here to view the full file