txt2tags-2.1.py | searchcode

/old/txt2tags-2.1.py

Large files files are truncated, but you can click here to view the full file

#!/usr/bin/env python
# txt2tags - generic text conversion tool
# http://txt2tags.sf.net
#
# Copyright 2001, 2002, 2003, 2004 Aurelio Marinho Jargas
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, version 2.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You have received a copy of the GNU General Public License along
#   with this program, on the COPYING file.
#
#
#
#   +-------------------------------------------------------------+
#   |               IMPORTANT MESSAGES, PLEASE READ               |
#   +-------------------------------------------------------------+
#   |                                                             |
#   |                                                             |
#   |                     v1.x COMPATIBILITY                      |
#   |                     ------------------                      |
#   |                                                             |
#   |      Due the major syntax changes, the new 2.x series       |
#   |      BREAKS backwards compatibility.                        |
#   |                                                             |
#   |      Use the 't2tconv' script to upgrade your existing      |
#   |      v1.x files to conform the new v2.x syntax.             |
#   |                                                             |
#   |      Do a visual inspection on the new converted file.      |
#   |      Specially Pre & Post proc filters can break.           |
#   |      Check them!                                            |
#   |                                                             |
#   |                                                             |
#   +-------------------------------------------------------------+
#
#
########################################################################
#
#   BORING CODE EXPLANATION AHEAD
#
# Just read if you wish to understand how the txt2tags code works
#
########################################################################
#
# Version 2.0 was a complete rewrite for the program 'core'.
#
# Now the code that [1] parses the marked text is separated from the
# code that [2] insert the target tags.
#
#   [1] made by: def convert()
#   [2] made by: class BlockMaster
#
# The structures of the marked text are identifyed and its contents are
# extracted into a data holder (Python lists and dictionaries).
#
# When parsing the source file, the blocks (para, lists, quote, table)
# are opened with BlockMaster, right when found. Then its contents,
# which spans on several lines, are feeded into a special holder on the
# BlockMaster instance. Just when the block is closed, the target tags
# are inserted for the full block as a whole, in one pass. This way, we
# have a better control on blocks. Much better than the previous line by
# line approach.
#
# In other words, whenever inside a block, the parser *holds* the tag
# insertion process, waiting until the full block is readed. That was
# needed primary to close paragraphs for the new XHTML target, but
# proved to be a very good adding, improving many other processings.
#
# -------------------------------------------------------------------
#
# There is also a brand new code for the Configuration schema, 100%
# rewritten. There are new classes, all self documented: CommandLine,
# SourceDocument, ConfigMaster and ConfigLines. In short, a new RAW
# Config format was created, and all kind of configuration is first
# converted to this format, and then a generic method parses it.
#
# The init processing was changed also, and now the functions which
# gets informations about the input files are: get_infiles_config(),
#  process_source_file() and convert_this_files()
#
# Other parts are untouched, and remains the same as in v1.7, as the
# marks regexes, target Headers and target Tags&Rules.
#
########################################################################

# Now I think the code is nice, easier to read and understand

#XXX Python coding warning
# Avoid common mistakes:
# - do NOT use newlist=list instead newlist=list[:]
# - do NOT use newdic=dic   instead newdic=dic.copy()
# - do NOT use dic[key]     instead dic.get(key)
# - do NOT use del dic[key] without has_key() before

#XXX Smart Image Align don't work if the image is a link
# Can't fix that because the image is expanded together with the
# link, at the linkbank filling moment. Only the image is passed
# to parse_images(), not the full line, so it is always 'middle'.

#XXX Paragraph separation not valid inside Quote
# Quote will not have <p></p> inside, instead will close and open
# again the <blockquote>. This really sux in CSS, when defining a
# diferent background color. Still don't know how to fix it.

#XXX TODO (maybe)
# New mark or macro which expands to an anchor full title.
# It is necessary to parse the full document in this order:
#  DONE  1st scan: HEAD: get all settings, including %!includeconf
#  DONE  2nd scan: BODY: expand includes & apply %!preproc
#        3rd scan: BODY: read titles and compose TOC info
#        4th scan: BODY: full parsing, expanding [#anchor] 1st
# Steps 2 and 3 can be made together, with no tag adding.
# Two complete body scans will be *slow*, don't know if it worths.

##############################################################################

# User config (1=ON, 0=OFF)

USE_I18N    = 1   # use gettext for i18ned messages?        (default is 1)
COLOR_DEBUG = 1   # show debug messages in colors?          (default is 1)
HTML_LOWER  = 0   # use lowercased HTML tags instead upper? (default is 0)

##############################################################################


# these are all the core Python modules used by txt2tags (KISS!)
import re, string, os, sys, time, getopt

# program information
my_url = 'http://txt2tags.sf.net'
my_name = 'txt2tags'
my_email = 'verde@aurelio.net'
my_version = '2.1'

# i18n - just use if available
if USE_I18N:
	try:
		import gettext
		# if your locale dir is different, change it here
		cat = gettext.Catalog('txt2tags',localedir='/usr/share/locale/')
		_ = cat.gettext
	except:
		_ = lambda x:x
else:
	_ = lambda x:x

# FLAGS   : the conversion related flags  , may be used in %!options
# OPTIONS : the conversion related options, may be used in %!options
# ACTIONS : the other behaviour modifiers, valid on command line only
# MACROS  : the valid macros with their default values for formatting
# SETTINGS: global miscelaneous settings, valid on RC file only
# CONFIG_KEYWORDS: the valid %!key:val keywords
#
# FLAGS and OPTIONS are configs that affect the converted document.
# They usually have also a --no-<option> to turn them OFF.
# ACTIONS are needed because when doing multiple input files, strange
# behaviour would be found, as use command line interface for the
# first file and gui for the second. There is no --no-<action>.
# --version and --help inside %!options are also odd
#
TARGETS  = ['html', 'xhtml', 'sgml', 'tex', 'man', 'mgp', 'moin', 'pm6', 'txt']
FLAGS    = {'headers'    :1 , 'enum-title' :0 , 'mask-email' :0 ,
            'toc-only'   :0 , 'toc'        :0 , 'rc'         :1 ,
            'css-sugar'  :0 , 'css-suggar' :0 , 'quiet'      :0 }
OPTIONS  = {'target'     :'', 'toc-level'  :3 , 'style'      :'',
            'infile'     :'', 'outfile'    :'', 'encoding'   :'',
            'split'      :0 , 'lang'       :''}
ACTIONS  = {'help'       :0 , 'version'    :0 , 'gui'        :0 ,
            'verbose'    :0 , 'debug'      :0 , 'dump-config':0 }
MACROS   = {'date' : '%Y%m%d',  'infile': '%f',
            'mtime': '%Y%m%d', 'outfile': '%f'}
SETTINGS = {}         # for future use
CONFIG_KEYWORDS = [
            'target', 'encoding', 'style', 'options', 'preproc','postproc',
            'guicolors']
TARGET_NAMES = {
  'html' : _('HTML page'),
  'xhtml': _('XHTML page'),
  'sgml' : _('SGML document'),
  'tex'  : _('LaTeX document'),
  'man'  : _('UNIX Manual page'),
  'mgp'  : _('Magic Point presentation'),
  'moin' : _('MoinMoin page'),
  'pm6'  : _('PageMaker 6.0 document'),
  'txt'  : _('Plain Text'),
}

DEBUG = 0     # do not edit here, please use --debug
VERBOSE = 0   # do not edit here, please use -v, -vv or -vvv
QUIET = 0     # do not edit here, please use --quiet
GUI = 0
AUTOTOC = 1
RC_RAW = []
CMDLINE_RAW = []
CONF = {}
BLOCK = None
regex = {}
TAGS = {}
rules = {}

lang = 'english'
TARGET = ''

STDIN = STDOUT = '-'
ESCCHAR   = '\x00'
SEPARATOR = '\x01'
LISTNAMES = {'-':'list', '+':'numlist', ':':'deflist'}
LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
RCFILE    = {'default':'.txt2tagsrc', 'win':'_t2trc'}

# plataform specific settings
LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
RC =    RCFILE.get(sys.platform[:3]) or    RCFILE['default']

# identify a development version
#dev_suffix = '-dev'+time.strftime('%m%d',time.localtime(time.time()))
#my_version = my_version + dev_suffix

VERSIONSTR = _("%s version %s <%s>")%(my_name,my_version,my_url)

USAGE = string.join([
'',
_("Usage: %s [OPTIONS] [infile.t2t ...]") % my_name,
'',
_("  -t, --target        set target document type. currently supported:"),
'                      %s' % re.sub(r"[]'[]",'',repr(TARGETS)),
_("  -i, --infile=FILE   set FILE as the input file name ('-' for STDIN)"),
_("  -o, --outfile=FILE  set FILE as the output file name ('-' for STDOUT)"),
_("  -n, --enum-title    enumerate all title lines as 1, 1.1, 1.1.1, etc"),
_("  -H, --no-headers    suppress header, title and footer contents"),
_("      --headers       show header, title and footer contents (default ON)"),
_("      --encoding      set target file encoding (utf-8, iso-8859-1, etc)"),
_("      --style=FILE    use FILE as the document style (like HTML CSS)"),
_("      --css-sugar     insert CSS-friendly tags for HTML and XHTML targets"),
_("      --mask-email    hide email from spam robots. x@y.z turns <x (a) y z>"),
_("      --toc           add TOC (Table of Contents) to target document"),
_("      --toc-only      print document TOC and exit"),
_("      --toc-level=N   set maximum TOC level (depth) to N"),
_("      --rc            read user config file ~/.txt2tagsrc (default ON)"),
_("      --gui           invoke Graphical Tk Interface"),
_("  -q, --quiet         quiet mode, suppress all output (except errors)"),
_("  -v, --verbose       print informative messages during conversion"),
_("  -h, --help          print this help information and exit"),
_("  -V, --version       print program version and exit"),
_("      --dump-config   print all the config found and exit"),
'',
_("Turn OFF options:"),
"     --no-outfile, --no-infile, --no-style, --no-encoding, --no-headers",
"     --no-toc, --no-toc-only, --no-mask-email, --no-enum-title, --no-rc",
"     --no-css-sugar, --no-quiet",
'',
_("Example:\n     %s -t html --toc myfile.t2t") % my_name,
'',
_("By default, converted output is saved to 'infile.<target>'."),
_("Use --outfile to force an output file name."),
_("If  input file is '-', reads from STDIN."),
_("If output file is '-', dumps output to STDOUT."),
''
], '\n')


##############################################################################


# here is all the target's templates
# you may edit them to fit your needs
#  - the %(HEADERn)s strings represent the Header lines
#  - the %(STYLE)s string is changed by --style contents
#  - the %(ENCODING)s string is changed by --encoding contents
#  - if any of the above is empty, the full line is removed
#  - use %% to represent a literal %
#
HEADER_TEMPLATE = {
  'txt': """\
%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'sgml': """\
<!doctype linuxdoc system>
<article>
<title>%(HEADER1)s
<author>%(HEADER2)s
<date>%(HEADER3)s
""",

  'html': """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
<TITLE>%(HEADER1)s</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
<FONT SIZE="4">
<I>%(HEADER2)s</I><BR>
%(HEADER3)s
</FONT></CENTER>
""",

  'htmlcss': """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
<TITLE>%(HEADER1)s</TITLE>
</HEAD>
<BODY>

<DIV CLASS="header" ID="header">
<H1>%(HEADER1)s</H1>
<H2>%(HEADER2)s</H2>
<H3>%(HEADER3)s</H3>
</DIV>
""",

  'xhtml': """\
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>%(HEADER1)s</title>
<meta name="generator" content="http://txt2tags.sf.net" />
<meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
<link rel="stylesheet" type="text/css" href="%(STYLE)s" />
</head>
<body bgcolor="white" text="black">
<div align="center">
<h1>%(HEADER1)s</h1>
<h2>%(HEADER2)s</h2>
<h3>%(HEADER3)s</h3>
</div>
""",

  'xhtmlcss': """\
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>%(HEADER1)s</title>
<meta name="generator" content="http://txt2tags.sf.net" />
<meta http-equiv="Content-Type" content="text/html; charset=%(ENCODING)s" />
<link rel="stylesheet" type="text/css" href="%(STYLE)s" />
</head>
<body>

<div class="header" id="header">
<h1>%(HEADER1)s</h1>
<h2>%(HEADER2)s</h2>
<h3>%(HEADER3)s</h3>
</div>
""",

  'man': """\
.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
""",

# TODO style to <HR>
  'pm6': """\
<PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
><@Normal=
  <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
><@Title4=<@-PARENT "Title3">
><@Title5=<@-PARENT "Title3">
><@Quote=<@-PARENT "Normal"><SIZE 10><I>>

%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'mgp': """\
#!/usr/X11R6/bin/mgp -t 90
%%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
%%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
%%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
%%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
%%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
%%default 1 size 5
%%default 2 size 8, fore "yellow", font "normal-b", center
%%default 3 size 5, fore "white",  font "normal", left, prefix "  "
%%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
%%tab 2 prefix "            ", icon arc "orange" 40, leftfill
%%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
%%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
%%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
%%%%------------------------- end of headers -----------------------------
%%page





%%size 10, center, fore "yellow"
%(HEADER1)s

%%font "normal-i", size 6, fore "white", center
%(HEADER2)s

%%font "mono", size 7, center
%(HEADER3)s
""",

# TODO please, improve me!
  'moin': """\
'''%(HEADER1)s'''

''%(HEADER2)s''

%(HEADER3)s
""",

  'tex': \
r"""\documentclass[11pt,a4paper]{article}
\usepackage{amsfonts,graphicx,url}
\usepackage[%(ENCODING)s]{inputenc}  %% char encoding
\usepackage{%(STYLE)s}  %% user defined package
\pagestyle{plain}   %% do page numbering ('empty' turns off)
\frenchspacing      %% no aditional spaces after periods
\setlength{\parskip}{8pt}\parindent=0pt  %% no paragraph indentation
%% uncomment next line for fancy PDF output on Adobe Acrobat Reader
%%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}

\title{%(HEADER1)s}
\author{%(HEADER2)s}
\begin{document}
\date{%(HEADER3)s}
\maketitle
\clearpage
"""
}


##############################################################################


def getTags(config):
	"Returns all the known tags for the specified target"
	
	keys = [
	'paragraphOpen','paragraphClose',
	'title1','title2','title3','title4','title5',
	'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
	'blockVerbOpen','blockVerbClose',
	'blockQuoteOpen','blockQuoteClose','blockQuoteLine',
	'fontMonoOpen','fontMonoClose',
	'fontBoldOpen','fontBoldClose',
	'fontItalicOpen','fontItalicClose',
	'fontUnderlineOpen','fontUnderlineClose',
	'listOpen','listClose',
	'listItemOpen','listItemClose','listItemLine',
	'numlistOpen','numlistClose',
	'numlistItemOpen','numlistItemClose','numlistItemLine',
	'deflistOpen','deflistClose',
	'deflistItem1Open','deflistItem1Close',
	'deflistItem2Open','deflistItem2Close',
	'bar1','bar2',
	'url','urlMark','email','emailMark',
	'img',
	'tableOpen','tableClose',
	'tableRowOpen','tableRowClose','tableRowSep',
	'tableCellOpen','tableCellClose','tableCellSep',
	'tableTitleCellOpen','tableTitleCellClose','tableTitleCellSep',
	'tableTitleRowOpen','tableTitleRowClose',
	'tableBorder', 'tableAlignLeft', 'tableAlignCenter',
	'tableCellAlignLeft','tableCellAlignRight','tableCellAlignCenter',
	'tableColAlignLeft','tableColAlignRight','tableColAlignCenter',
	'tableColAlignSep',
	'anchor','comment','pageBreak',
	'TOC','tocOpen','tocClose',
	'bodyOpen','bodyClose',
	'EOD'
	]
	
	alltags = {
	
	'txt': {
	   'title1'              : '  \a'      ,
	   'title2'              : '\t\a'      ,
	   'title3'              : '\t\t\a'    ,
	   'title4'              : '\t\t\t\a'  ,
	   'title5'              : '\t\t\t\t\a',
	   'blockQuoteLine'      : '\t'        ,
	   'listItemOpen'        : '- '        ,
	   'numlistItemOpen'     : '\a. '      ,
	   'bar1'                : '\a'        ,
	   'bar2'                : '\a'        ,
	   'url'                 : '\a'        ,
	   'urlMark'             : '\a (\a)'   ,
	   'email'               : '\a'        ,
	   'emailMark'           : '\a (\a)'   ,
	   'img'                 : '[\a]'      ,
	},
	
	'html': {
	   'paragraphOpen'       : '<P>'            ,
	   'paragraphClose'      : '</P>'           ,
	   'title1'              : '~A~<H1>\a</H1>' ,
	   'title2'              : '~A~<H2>\a</H2>' ,
	   'title3'              : '~A~<H3>\a</H3>' ,
	   'title4'              : '~A~<H4>\a</H4>' ,
	   'title5'              : '~A~<H5>\a</H5>' ,
	   'blockVerbOpen'       : '<PRE>'          ,
	   'blockVerbClose'      : '</PRE>'         ,
	   'blockQuoteOpen'      : '<BLOCKQUOTE>'   ,
	   'blockQuoteClose'     : '</BLOCKQUOTE>'  ,
	   'fontMonoOpen'        : '<CODE>'         ,
	   'fontMonoClose'       : '</CODE>'        ,
	   'fontBoldOpen'        : '<B>'            ,
	   'fontBoldClose'       : '</B>'           ,
	   'fontItalicOpen'      : '<I>'            ,
	   'fontItalicClose'     : '</I>'           ,
	   'fontUnderlineOpen'   : '<U>'            ,
	   'fontUnderlineClose'  : '</U>'           ,
	   'listOpen'            : '<UL>'           ,
	   'listClose'           : '</UL>'          ,
	   'listItemOpen'        : '<LI>'           ,
	   'numlistOpen'         : '<OL>'           ,
	   'numlistClose'        : '</OL>'          ,
	   'numlistItemOpen'     : '<LI>'           ,
	   'deflistOpen'         : '<DL>'           ,
	   'deflistClose'        : '</DL>'          ,
	   'deflistItem1Open'    : '<DT>'           ,
	   'deflistItem1Close'   : '</DT>'          ,
	   'deflistItem2Open'    : '<DD>'           ,
	   'bar1'                : '<HR NOSHADE SIZE=1>'        ,
	   'bar2'                : '<HR NOSHADE SIZE=5>'        ,
	   'url'                 : '<A HREF="\a">\a</A>'        ,
	   'urlMark'             : '<A HREF="\a">\a</A>'        ,
	   'email'               : '<A HREF="mailto:\a">\a</A>' ,
	   'emailMark'           : '<A HREF="mailto:\a">\a</A>' ,
	   'img'                :'<IMG ALIGN="~A~" SRC="\a" BORDER="0" ALT="">',
	   'tableOpen'           : '<TABLE~A~ CELLPADDING="4"~B~>',
	   'tableClose'          : '</TABLE>'       ,
	   'tableRowOpen'        : '<TR>'           ,
	   'tableRowClose'       : '</TR>'          ,
	   'tableCellOpen'       : '<TD\a>'         ,
	   'tableCellClose'      : '</TD>'          ,
	   'tableTitleCellOpen'  : '<TH>'           ,
	   'tableTitleCellClose' : '</TH>'          ,
	   'tableBorder'         : ' BORDER="1"'    ,
	   'tableAlignCenter'    : ' ALIGN="center"',
	   'tableCellAlignRight' : ' ALIGN="right"' ,
	   'tableCellAlignCenter': ' ALIGN="center"',
	   'anchor'              : '<A NAME="\a"></A>\n',
	   'comment'             : '<!-- \a -->'    ,
	   'EOD'                 : '</BODY></HTML>'
	},
	
	#TIP xhtml inherits all HTML definitions (lowercased)
	#TIP http://www.w3.org/TR/xhtml1/#guidelines
	#TIP http://www.htmlref.com/samples/Chapt17/17_08.htm
	'xhtml': {
	   'listItemClose'       : '</li>'          ,
	   'numlistItemClose'    : '</li>'          ,
	   'deflistItem2Close'   : '</dd>'          ,
	   'bar1'                : '<hr class="light" />',
	   'bar2'                : '<hr class="heavy" />',
	   'anchor'              : '<a id="\a" name="\a"></a>\n',
	   'img'               :'<img align="~A~" src="\a" border="0" alt=""/>',
	},
	
	'sgml': {
	   'paragraphOpen'       : '<p>'                ,
	   'title1'              : '<sect>\a~A~<p>'     ,
	   'title2'              : '<sect1>\a~A~<p>'    ,
	   'title3'              : '<sect2>\a~A~<p>'    ,
	   'title4'              : '<sect3>\a~A~<p>'    ,
	   'title5'              : '<sect4>\a~A~<p>'    ,
	   'blockVerbOpen'       : '<tscreen><verb>'    ,
	   'blockVerbClose'      : '</verb></tscreen>'  ,
	   'blockQuoteOpen'      : '<quote>'            ,
	   'blockQuoteClose'     : '</quote>'           ,
	   'fontMonoOpen'        : '<tt>'               ,
	   'fontMonoClose'       : '</tt>'              ,
	   'fontBoldOpen'        : '<bf>'               ,
	   'fontBoldClose'       : '</bf>'              ,
	   'fontItalicOpen'      : '<em>'               ,
	   'fontItalicClose'     : '</em>'              ,
	   'fontUnderlineOpen'   : '<bf><em>'           ,
	   'fontUnderlineClose'  : '</em></bf>'         ,
	   'listOpen'            : '<itemize>'          ,
	   'listClose'           : '</itemize>'         ,
	   'listItemOpen'        : '<item>'             ,
	   'numlistOpen'         : '<enum>'             ,
	   'numlistClose'        : '</enum>'            ,
	   'numlistItemOpen'     : '<item>'             ,
	   'deflistOpen'         : '<descrip>'          ,
	   'deflistClose'        : '</descrip>'         ,
	   'deflistItem1Open'    : '<tag>'              ,
	   'deflistItem1Close'   : '</tag>'             ,
	   'bar1'                : '<!-- \a -->'        ,
	   'bar2'                : '<!-- \a -->'        ,
	   'url'                 : '<htmlurl url="\a" name="\a">'        ,
	   'urlMark'             : '<htmlurl url="\a" name="\a">'        ,
	   'email'               : '<htmlurl url="mailto:\a" name="\a">' ,
	   'emailMark'           : '<htmlurl url="mailto:\a" name="\a">' ,
	   'img'                 : '<figure><ph vspace=""><img src="\a">'+\
	                           '</figure>'                           ,
	   'tableOpen'           : '<table><tabular ca="~C~">'           ,
	   'tableClose'          : '</tabular></table>' ,
	   'tableRowSep'         : '<rowsep>'           ,
	   'tableCellSep'        : '<colsep>'           ,
	   'tableColAlignLeft'   : 'l'                  ,
	   'tableColAlignRight'  : 'r'                  ,
	   'tableColAlignCenter' : 'c'                  ,
	   'comment'             : '<!-- \a -->'        ,
	   'anchor'              : '<label id="\a">'    ,
	   'TOC'                 : '<toc>'              ,
	   'EOD'                 : '</article>'
	},
	
	'tex': {
	   'title1'              : '\n\section*{\a}',
	   'title2'              : '\\subsection*{\a}'       ,
	   'title3'              : '\\subsubsection*{\a}'    ,
	   # title 4/5: DIRTY: para+BF+\\+\n
	   'title4'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'title5'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'numtitle1'           : '\n\section{\a}',
	   'numtitle2'           : '\\subsection{\a}'       ,
	   'numtitle3'           : '\\subsubsection{\a}'    ,
	   'blockVerbOpen'       : '\\begin{verbatim}'   ,
	   'blockVerbClose'      : '\\end{verbatim}'     ,
	   'blockQuoteOpen'      : '\\begin{quotation}'  ,
	   'blockQuoteClose'     : '\\end{quotation}'    ,
	   'fontMonoOpen'        : '\\texttt{'           ,
	   'fontMonoClose'       : '}'                   ,
	   'fontBoldOpen'        : '\\textbf{'           ,
	   'fontBoldClose'       : '}'                   ,
	   'fontItalicOpen'      : '\\textit{'           ,
	   'fontItalicClose'     : '}'                   ,
	   'fontUnderlineOpen'   : '\\underline{'        ,
	   'fontUnderlineClose'  : '}'                   ,
	   'listOpen'            : '\\begin{itemize}'    ,
	   'listClose'           : '\\end{itemize}'      ,
	   'listItemOpen'        : '\\item '             ,
	   'numlistOpen'         : '\\begin{enumerate}'  ,
	   'numlistClose'        : '\\end{enumerate}'    ,
	   'numlistItemOpen'     : '\\item '             ,
	   'deflistOpen'         : '\\begin{description}',
	   'deflistClose'        : '\\end{description}'  ,
	   'deflistItem1Open'    : '\\item['             ,
	   'deflistItem1Close'   : ']'                   ,
	   'bar1'                : '\n\\hrulefill{}\n'   ,
	   'bar2'                : '\n\\rule{\linewidth}{1mm}\n',
	   'url'                 : '\\url{\a}'                  ,
	   'urlMark'             : '\\textit{\a} (\\url{\a})'   ,
	   'email'               : '\\url{\a}'                  ,
	   'emailMark'           : '\\textit{\a} (\\url{\a})'   ,
	   'img'                 : '\\includegraphics{\a}',
	   'tableOpen'           : '\\begin{center}\\begin{tabular}{|~C~|}',
	   'tableClose'          : '\\end{tabular}\\end{center}',
	   'tableRowOpen'        : '\\hline ' ,
	   'tableRowClose'       : ' \\\\'    ,
	   'tableCellSep'        : ' & '      ,
	   'tableColAlignLeft'   : 'l'        ,
	   'tableColAlignRight'  : 'r'        ,
	   'tableColAlignCenter' : 'c'        ,
	   'tableColAlignSep'    : '|'        ,
	   'comment'             : '% \a'     ,
	   'TOC'                 : '\\tableofcontents',
	   'pageBreak'           : '\\clearpage',
	   'EOD'                 : '\\end{document}'
	},
	
	'moin': {
	   'title1'              : '= \a ='        ,
	   'title2'              : '== \a =='      ,
	   'title3'              : '=== \a ==='    ,
	   'title4'              : '==== \a ===='  ,
	   'title5'              : '===== \a =====',
	   'blockVerbOpen'       : '{{{'           ,
	   'blockVerbClose'      : '}}}'           ,
	   'blockQuoteLine'      : '  '            ,
	   'fontMonoOpen'        : '{{{'           ,
	   'fontMonoClose'       : '}}}'           ,
	   'fontBoldOpen'        : "'''"           ,
	   'fontBoldClose'       : "'''"           ,
	   'fontItalicOpen'      : "''"            ,
	   'fontItalicClose'     : "''"            ,
	   'fontUnderlineOpen'   : "__"            ,
	   'fontUnderlineClose'  : "__"            ,
	   'listItemOpen'        : ' * '           ,
	   'numlistItemOpen'     : ' \a. '         ,
	   'bar1'                : '----'          ,
	   'bar2'                : '----'          ,
	   'url'                 : '[\a]'          ,
	   'urlMark'             : '[\a \a]'       ,
	   'email'               : '[\a]'          ,
	   'emailMark'           : '[\a \a]'       ,
	   'img'                 : '[\a]'          ,
	   'tableRowOpen'        : '||'            ,
	   'tableCellOpen'       : '\a'            ,
	   'tableCellClose'      : '||'            ,
	   'tableTitleCellClose' : '||'            ,
	   'tableCellAlignRight' : '<)>'           ,
	   'tableCellAlignCenter': '<:>'           ,
	   'comment'             : '## \a'         ,
	   'TOC'                 : '[[TableOfContents]]'
	},
	
	'mgp': {
	   'paragraphOpen'       : '%font "normal", size 5'     ,
	   'title1'              : '%page\n\n\a\n'              ,
	   'title2'              : '%page\n\n\a\n'              ,
	   'title3'              : '%page\n\n\a\n'              ,
	   'title4'              : '%page\n\n\a\n'              ,
	   'title5'              : '%page\n\n\a\n'              ,
	   'blockVerbOpen'       : '%font "mono"'               ,
	   'blockVerbClose'      : '%font "normal"'             ,
	   'blockQuoteOpen'      : '%prefix "       "'          ,
	   'blockQuoteClose'     : '%prefix "  "'               ,
	   'fontMonoOpen'        : '\n%cont, font "mono"\n'     ,
	   'fontMonoClose'       : '\n%cont, font "normal"\n'   ,
	   'fontBoldOpen'        : '\n%cont, font "normal-b"\n' ,
	   'fontBoldClose'       : '\n%cont, font "normal"\n'   ,
	   'fontItalicOpen'      : '\n%cont, font "normal-i"\n' ,
	   'fontItalicClose'     : '\n%cont, font "normal"\n'   ,
	   'fontUnderlineOpen'   : '\n%cont, fore "cyan"\n'     ,
	   'fontUnderlineClose'  : '\n%cont, fore "white"\n'    ,
	   'listItemLine'        : '\t'                         ,
	   'numlistItemLine'     : '\t'                         ,
	   'deflistItem1Open'    : '\t\n%cont, font "normal-b"\n',
	   'deflistItem1Close'   : '\n%cont, font "normal"\n'   ,
	   'bar1'                : '%bar "white" 5'             ,
	   'bar2'                : '%pause'                     ,
	   'url'                 : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'urlMark'             : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'email'               : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'emailMark'           : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'img'                 : '\n%~A~\n%newimage "\a"\n%left\n',
	   'comment'             : '%% \a'                      ,
	   'pageBreak'           : '%page\n\n\n'                ,
	   'EOD'                 : '%%EOD'
	},
	
	# man groff_man ; man 7 groff
	'man': {
	   'paragraphOpen'       : '.P'     ,
	   'title1'              : '.SH \a' ,
	   'title2'              : '.SS \a' ,
	   'title3'              : '.SS \a' ,
	   'title4'              : '.SS \a' ,
	   'title5'              : '.SS \a' ,
	   'blockVerbOpen'       : '.nf'    ,
	   'blockVerbClose'      : '.fi\n'  ,
	   'blockQuoteOpen'      : '.RS'    ,
	   'blockQuoteClose'     : '.RE'    ,
	   'fontBoldOpen'        : '\\fB'   ,
	   'fontBoldClose'       : '\\fR'   ,
	   'fontItalicOpen'      : '\\fI'   ,
	   'fontItalicClose'     : '\\fR'   ,
	   'listOpen'            : '.RS'    ,
	   'listItemOpen'        : '.IP \(bu 3\n',
	   'listClose'           : '.RE'    ,
	   'numlistOpen'         : '.RS'    ,
	   'numlistItemOpen'     : '.IP \a. 3\n',
	   'numlistClose'        : '.RE'    ,
	   'deflistItem1Open'    : '.TP\n'  ,
	   'bar1'                : '\n\n'   ,
	   'bar2'                : '\n\n'   ,
	   'url'                 : '\a'     ,
	   'urlMark'             : '\a (\a)',
	   'email'               : '\a'     ,
	   'emailMark'           : '\a (\a)',
	   'img'                 : '\a'     ,
	   'tableOpen'           : '.TS\n~A~~B~tab(^); ~C~.',
	   'tableClose'          : '.TE'     ,
	   'tableRowOpen'        : ' '       ,
	   'tableCellSep'        : '^'       ,
	   'tableAlignCenter'    : 'center, ',
	   'tableBorder'         : 'allbox, ',
	   'tableColAlignLeft'   : 'l'       ,
	   'tableColAlignRight'  : 'r'       ,
	   'tableColAlignCenter' : 'c'       ,
	   'comment'             : '.\\" \a'
	},
	
	'pm6': {
	   'paragraphOpen'       : '<@Normal:>'    ,
	   'title1'              : '\n<@Title1:>\a',
	   'title2'              : '\n<@Title2:>\a',
	   'title3'              : '\n<@Title3:>\a',
	   'title4'              : '\n<@Title4:>\a',
	   'title5'              : '\n<@Title5:>\a',
	   'blockVerbOpen'       : '<@PreFormat:>' ,
	   'blockQuoteLine'      : '<@Quote:>'     ,
	   'fontMonoOpen'        : '<FONT "Lucida Console"><SIZE 9>' ,
	   'fontMonoClose'       : '<SIZE$><FONT$>',
	   'fontBoldOpen'        : '<B>'           ,
	   'fontBoldClose'       : '<P>'           ,
	   'fontItalicOpen'      : '<I>'           ,
	   'fontItalicClose'     : '<P>'           ,
	   'fontUnderlineOpen'   : '<U>'           ,
	   'fontUnderlineClose'  : '<P>'           ,
	   'listOpen'            : '<@Bullet:>'    ,
	   'listItemOpen'        : '\x95\t'        ,  # \x95 == ~U
	   'numlistOpen'         : '<@Bullet:>'    ,
	   'numlistItemOpen'     : '\x95\t'        ,
	   'bar1'                : '\a'            ,
	   'bar2'                : '\a'            ,
	   'url'                 : '<U>\a<P>'      ,  # underline
	   'urlMark'             : '\a <U>\a<P>'   ,
	   'email'               : '\a'            ,
	   'emailMark'           : '\a \a'         ,
	   'img'                 : '\a'
	}
	}
	
	# exceptions for --css-sugar
	if config['css-sugar'] and config['target'] in ('html','xhtml'):
		# change just HTML because XHTML inherits it
		htmltags = alltags['html']
		# table with no cellpadding
		htmltags['tableOpen'] = string.replace(
			htmltags['tableOpen'], ' CELLPADDING="4"', '')
		# DIVs
		htmltags['tocOpen' ] = '<DIV CLASS="toc" ID="toc">'
		htmltags['tocClose'] = '</DIV>'
		htmltags['bodyOpen'] = '<DIV CLASS="body" ID="body">'
		htmltags['bodyClose']= '</DIV>'
	
	# make the HTML -> XHTML inheritance
	xhtml = alltags['html'].copy()
	for key in xhtml.keys(): xhtml[key] = string.lower(xhtml[key])
	# some like HTML tags as lowercase, some don't... (headers out)
	if HTML_LOWER: alltags['html'] = xhtml.copy()
	xhtml.update(alltags['xhtml'])
	alltags['xhtml'] = xhtml.copy()
	
	# compose the target tags dictionary
	tags = {}
	target_tags = alltags[config['target']].copy()
	
	for key in keys: tags[key] = ''     # create empty keys
	for key in target_tags.keys():
		tags[key] = maskEscapeChar(target_tags[key]) # populate
	
	return tags


##############################################################################


def getRules(config):
	"Returns all the target-specific syntax rules"
	
	ret = {}
	allrules = [
	
	 # target rules (ON/OFF)
	  'linkable',             # target supports external links
	  'tableable',            # target supports tables
	  'imglinkable',          # target supports images as links
	  'imgalignable',         # target supports image alignment
	  'imgasdefterm',         # target supports image as definition term
	  'autonumberlist',       # target supports numbered lists natively
	  'autonumbertitle',      # target supports numbered titles natively
	  'parainsidelist',       # lists items supports paragraph
	  'spacedlistitem',       # lists support blank lines between items
	  'listnotnested',        # lists cannot be nested
	  'quotenotnested',       # quotes cannot be nested
	  'verbblocknotescaped',  # don't escape specials in verb block
	  'verbblockfinalescape', # do final escapes in verb block
	  'escapeurl',            # escape special in link URL
	  'onelinepara',          # dump paragraph as a single long line
	  'tabletitlerowinbold',  # manually bold any cell on table titles
	  'tablecellstrip',       # strip extra spaces from each table cell
	  'barinsidequote',       # bars are allowed inside quote blocks
	  'finalescapetitle',     # perform final escapes on title lines
	  'autotocnewpagebefore', # break page before automatic TOC
	  'autotocnewpageafter',  # break page after automatic TOC
	  'autotocwithbars',      # automatic TOC surrounded by bars
	
	# target code beautify (ON/OFF)
	  'indentverbblock',      # add leading spaces to verb block lines
	  'breaktablecell',       # break lines after any table cell
	  'breaktablelineopen',   # break line after opening table line
	  'notbreaklistopen',     # don't break line after opening a new list
	  'notbreakparaopen',     # don't break line after opening a new para
	  'keepquoteindent',      # don't remove the leading TABs on quotes
	  'keeplistindent',       # don't remove the leading spaces on lists
	  'blankendmotherlist',   # append a blank line at the mother list end
	  'blankendtable',        # append a blank line at the table end
	  'blankendautotoc',      # append a blank line at the auto TOC end
	  'tagnotindentable',     # tags must be placed at the line begining
	
	# value settings
	  'listmaxdepth',         # maximum depth for lists
	  'tablecellaligntype'    # type of table cell align: cell, column
	]
	
	rules_bank = {
	  'txt' : {
	    'indentverbblock':1,
	    'spacedlistitem':1,
	    'parainsidelist':1,
	    'keeplistindent':1,
	    'barinsidequote':1,
	    'autotocwithbars':1,
	    'blankendmotherlist':1
	    },
	  'html': {
	    'indentverbblock':1,
	    'linkable':1,
	    'escapeurl':1,
	    'imglinkable':1,
	    'imgalignable':1,
	    'imgasdefterm':1,
	    'autonumberlist':1,
	    'spacedlistitem':1,
	    'parainsidelist':1,
	    'blankendmotherlist':1,
	    'tableable':1,
	    'tablecellstrip':1,
	    'blankendtable':1,
	    'breaktablecell':1,
	    'breaktablelineopen':1,
	    'keeplistindent':1,
	    'keepquoteindent':1,
	    'barinsidequote':1,
	    'autotocwithbars':1,
	    'tablecellaligntype':'cell'
	    },
	  #TIP xhtml inherits all HTML rules
	  'xhtml': {
	    },
	  'sgml': {
	    'linkable':1,
	    'escapeurl':1,
	    'autonumberlist':1,
	    'spacedlistitem':1,
	    'blankendmotherlist':1,
	    'tableable':1,
	    'tablecellstrip':1,
	    'blankendtable':1,
	    'blankendautotoc':1,
	    'quotenotnested':1,
	    'keeplistindent':1,
	    'keepquoteindent':1,
	    'barinsidequote':1,
	    'finalescapetitle':1,
	    'tablecellaligntype':'column'
	    },
	  'mgp' : {
	    'blankendmotherlist':1,
	    'tagnotindentable':1,
	    'spacedlistitem':1,
	    'imgalignable':1,
	    'autotocnewpagebefore':1,
	    },
	  'tex' : {
	    'autonumberlist':1,
	    'autonumbertitle':1,
	    'spacedlistitem':1,
	    'blankendmotherlist':1,
	    'tableable':1,
	    'tablecellstrip':1,
	    'tabletitlerowinbold':1,
	    'blankendtable':1,
	    'verbblocknotescaped':1,
	    'keeplistindent':1,
	    'listmaxdepth':4,
	    'barinsidequote':1,
	    'finalescapetitle':1,
	    'autotocnewpageafter':1,
	    'tablecellaligntype':'column'
	    },
	  'moin': {
	    'spacedlistitem':1,
	    'linkable':1,
	    'blankendmotherlist':1,
	    'keeplistindent':1,
	    'tableable':1,
	    'barinsidequote':1,
	    'blankendtable':1,
	    'tabletitlerowinbold':1,
	    'tablecellstrip':1,
	    'autotocwithbars':1,
	    'tablecellaligntype':'cell'
	    },
	  'man' : {
	    'spacedlistitem':1,
	    'indentverbblock':1,
	    'blankendmotherlist':1,
	    'tagnotindentable':1,
	    'tableable':1,
	    'tablecellaligntype':'column',
	    'tabletitlerowinbold':1,
	    'tablecellstrip':1,
	    'blankendtable':1,
	    'keeplistindent':0,
	    'barinsidequote':1,
	    'parainsidelist':0,
	    },
	  'pm6' : {
	    'keeplistindent':1,
	    'verbblockfinalescape':1,
	    #TODO add support for these - maybe set a JOINNEXT char and
	    #     do it on addLineBreaks()
	    'notbreaklistopen':1,
	    'notbreakparaopen':1,
	    'barinsidequote':1,
	    'autotocwithbars':1,
	    'onelinepara':1,
	    }
	}
	
	# exceptions for --css-sugar
	if config['css-sugar'] and config['target'] in ('html','xhtml'):
		rules_bank['html']['indentverbblock'] = 0
		rules_bank['html']['autotocwithbars'] = 0
	
	# get the target specific rules
	if config['target'] == 'xhtml':
		myrules = rules_bank['html'].copy()   # inheritance
		myrules.update(rules_bank['xhtml'])   # get XHTML specific
	else:
		myrules = rules_bank[config['target']].copy()
	
	# populate return dictionary
	for key in allrules: ret[key] = 0        # reset all
	ret.update(myrules)                      # get rules
	
	return ret


##############################################################################


def getRegexes():
	"Returns all the regexes used to find the t2t marks"
	
	bank = {
	'blockVerbOpen':
		re.compile(r'^```\s*$'),
	'blockVerbClose':
		re.compile(r'^```\s*$'),
	'blockRawOpen':
		re.compile(r'^"""\s*$'),
	'blockRawClose':
		re.compile(r'^"""\s*$'),
	'quote':
		re.compile(r'^\t+'),
	'1lineVerb':
		re.compile(r'^``` (?=.)'),
	'1lineRaw':
		re.compile(r'^""" (?=.)'),
	# mono, raw, bold, italic, underline:
	# - marks must be glued with the contents, no boundary spaces
	# - they are greedy, so in ****bold****, turns to <b>**bold**</b>
	'fontMono':
		re.compile(  r'``([^\s](|.*?[^\s])`*)``'),
	'raw':
		re.compile(  r'""([^\s](|.*?[^\s])"*)""'),
	'fontBold':
		re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'),
	'fontItalic':
		re.compile(  r'//([^\s](|.*?[^\s])/*)//'),
	'fontUnderline':
		re.compile(  r'__([^\s](|.*?[^\s])_*)__'),
	'list':
		re.compile(r'^( *)(-) (?=[^ ])'),
	'numlist':
		re.compile(r'^( *)(\+) (?=[^ ])'),
	'deflist':
		re.compile(r'^( *)(:) (.*)$'),
	'listclose':
		re.compile(r'^( *)([-+:])\s*$'),
	'bar':
		re.compile(r'^(\s*)([_=-]{20,})\s*$'),
	'table':
		re.compile(r'^ *\|\|? '),
	'blankline':
		re.compile(r'^\s*$'),
	'comment':
		re.compile(r'^%'),
	
	# auxiliar tag regexes
	'_imgAlign'     : re.compile(r'~A~',re.I),
	'_tableAlign'   : re.compile(r'~A~',re.I),
	'_anchor'       : re.compile(r'~A~',re.I),
	'_tableBorder'  : re.compile(r'~B~',re.I),
	'_tableColAlign': re.compile(r'~C~',re.I),
	}
	
	# special char to place data on TAGs contents  (\a == bell)
	bank['x'] = re.compile('\a')
	
	# %%macroname [ (formatting) ]
	bank['macros'] = re.compile(r'%%%%(?P<name>%s)\b(\((?P<fmt>.*?)\))?'%(
	                            string.join(MACROS.keys(), '|')), re.I)
	
	# %%TOC special macro for TOC positioning
	bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I)
	
	# almost complicated title regexes ;)
	titskel = r'^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$'
	bank[   'title'] = re.compile(titskel%('[=]{1,5}','[^=](|.*[^=])'))
	bank['numtitle'] = re.compile(titskel%('[+]{1,5}','[^+](|.*[^+])'))
	
	### complicated regexes begin here ;)
	#
	# textual descriptions on --help's style: [...] is optional, | is OR
	
	
	### first, some auxiliar variables
	#
	
	# [image.EXT]
	patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
	
	# link things
	urlskel = {
	  'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
	  'guess' : r'(www[23]?|ftp)\.',         # w/out proto, try to guess
	  'login' : r'A-Za-z0-9_.-',             # for ftp://login@domain.com
	  'pass'  : r'[^ @]*',                   # for ftp://login:pass@dom.com
	  'chars' : r'A-Za-z0-9%._/~:,=$@&+-',   # %20(space), :80(port), D&D
	  'anchor': r'A-Za-z0-9%._-',            # %nn(encoded)
	  'form'  : r'A-Za-z0-9/%&=+;.,$@*_-',   # .,@*_-(as is)
	  'punct' : r'.,;:!?'
	}
	
	# username [ :password ] @
	patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
	
	# [ http:// ] [ username:password@ ] domain.com [ / ]
	#     [ #anchor | ?form=data ]
	retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
	             urlskel['proto'],patt_url_login, urlskel['guess'],
	             urlskel['chars'],urlskel['form'],urlskel['anchor'])
	
	# filename | [ filename ] #anchor
	retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
	             urlskel['chars'],urlskel['chars'],urlskel['anchor'])
	
	# user@domain [ ?form=data ]
	patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
	             urlskel['login'],urlskel['form'])
	
	# saving for future use
	bank['_urlskel'] = urlskel
	
	### and now the real regexes
	#
	
	bank['email'] = re.compile(patt_email,re.I)
	
	# email | url
	bank['link'] = re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
	
	# \[ label | imagetag    url | email | filename \]
	bank['linkmark'] = re.compile(
		r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
		  patt_img, retxt_url, patt_email, retxt_url_local),
		re.L+re.I)
	
	# image
	bank['img'] = re.compile(patt_img, re.L+re.I)
	
	# special things
	bank['special'] = re.compile(r'^%!\s*')
	return bank
### END OF regex nightmares


##############################################################################


def echo(msg):   # for quick debug
	print '\033[32;1m%s\033[m'%msg
def Quit(msg, exitcode=0):
	print msg
	sys.exit(exitcode)
def Error(msg):
	sys.stderr.write(_("%s: Error: ")%my_name + "%s\n"%msg)
	sys.stderr.flush()
	sys.exit(1)
def ShowTraceback():
	try:
		from traceback import print_exc
		print_exc() ; print ; print
	except: pass
def Message(msg,level):
	if level <= VERBOSE and not QUIET:
		prefix = '-'*5
		print "%s %s"%(prefix*level, msg)
def Debug(msg,color=0,linenr=None):
	"0gray=init,1red=conf,3yellow=line,6cyan=block,2green=detail,5pink=gui"
	if QUIET or not DEBUG: return
	if COLOR_DEBUG: msg = '\033[3%s;1m%s\033[m'%(color,msg)
	if linenr is not None: msg = "LINE %04d: %s"%(linenr,msg)
	print "** %s"%msg
def Readfile(file, remove_linebreaks=0):
	if file == '-':
		try: data = sys.stdin.readlines()
		except: Error(_('You must feed me with data on STDIN!'))
	else:
		try: f = open(file); data = f.readlines() ; f.close()
		except: Error(_("Cannot read file:")+"\n    %s"%file)
	if remove_linebreaks:
		data = map(lambda x:re.sub('[\n\r]+$','',x), data)
	Message(_("Readed file (%d lines): %s")%(len(data),file),2)
	return data
def Savefile(file, contents):
	try: f = open(file, 'wb')
	except: Error(_("Cannot open file for writing:")+"\n    %s"%file)
	if type(contents) == type([]): doit = f.writelines
	else: doit = f.write
	doit(contents) ; f.close()

def showdic(dic):
	for k in dic.keys(): print "%15s : %s" % (k,dic[k])
def dotted_spaces(txt=''):
	return string.replace(txt,' ','.')

def get_rc_path():
	"Return the full path for the users' RC file"
	rc_file = RC
	# search the RC dir on the specified system variables
	# TIP: win: http://www.winnetmag.com/Article/ArticleID/23873/23873.html
	rc_dir_search = ['HOME', 'HOMEPATH']
	for var in rc_dir_search:
		rc_dir = os.environ.get(var)
		if rc_dir: break
	if rc_dir:
		# compose path and return it if the file exists
		rc_path = os.path.join(rc_dir, rc_file)
		# on windows, prefix with the drive (%homedrive%: 2k/XP/NT)
		if sys.platform[:3] == 'win':
			rc_drive = os.environ.get('HOMEDRIVE')
			rc_path = os.path.join(rc_drive,rc_path)
		return rc_path
	return ''



##############################################################################

class CommandLine:
	"""
	Command Line class - Masters command line

	This class checks and extract data from the provided command line.
	The --long options and flags are taken from the global OPTIONS,
	FLAGS and ACTIONS dictionaries. The short options are registered
	here, and also their equivalence to the long ones.

	METHODS:
	  _compose_short_opts() -> str
	  _compose_long_opts() -> list
	      Compose the valid short and long options list, on the
	      'getopt' format.
	  
	  parse() -> (opts, args)
	      Call getopt to check and parse the command line.
	      It expects to receive the command line as a list, and
	      without the program name (sys.argv[1:]).
	  
	  get_raw_config() -> [RAW config]
	      Scans command line and convert the data to the RAW config
	      format. See ConfigMaster class to the RAW format description.
	      Optional 'ignore' and 'filter' arguments are used to filter
	      in or out specified keys.
	  
	  compose_cmdline(dict) -> [Command line]
	      Compose a command line list from an already parsed config
	      dictionary, generated from RAW by ConfigMaster(). Use
	      this to compose an optimal command line for a group of
	      options.
	
	The get_raw_config() calls parse(), so the tipical use of this
	class is:
	
            raw = CommandLine().get_raw_config(sys.argv[1:])
	"""
	def __init__(self):
		self.all_options = OPTIONS.keys()
		self.all_flags   = FLAGS.keys()
		self.all_actions = ACTIONS.keys()
		
		# short:long options equivalence
		self.short_long = {
		  'h':'help'     ,   'V':'version',
		  'n':'enum-title',  'i':'infile' ,
		  'H':'no-headers',  'o':'outfile',
		  'v':'verbose'   ,  't':'target' ,
		  'q':'quiet'
		}
		
		# compose valid short and long options data for getopt
		self.short_opts = self._compose_short_opts()
		self.long_opts  = self._compose_long_opts()
	
	def _compose_short_opts(self):
		"Returns a string like 'hVt:o' with all short options/flags"
		ret = []
		for opt in self.short_long.keys():
			long = self.short_long[opt]
			if long in self.all_options: # is flag or option?
				opt = opt+':'        # option: have param
			ret.append(opt)
		Debug('Valid SHORT options: %s'%ret)
		return string.join(ret, '')
	
	def _compose_long_opts(self):
		"Returns a list with all the valid long options/flags"
		ret = map(lambda x:x+'=', self.all_options)       # add =
		ret.extend(self.all_flags)                        # flag ON
		ret.extend(self.all_actions)                      # acts
		ret.extend(map(lambda x:'no-'+x, self.all_flags)) # add no-*
		ret.extend(['no-style'])                   # turn…
Large files files are truncated, but you can click here to view the full file