txt2tags-1.7.py | searchcode

/old/txt2tags-1.7.py

http://txt2tags.googlecode.com/
Python | 2628 lines | 2582 code | 16 blank | 30 comment | 9 complexity | f705939eba7a6267cceef0ee2a39b8ff MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

#!/usr/bin/env python
# txt2tags - generic text conversion tool
# http://txt2tags.sf.net
#
# Copyright 2001, 2002, 2003 Aurelio Marinho Jargas
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, version 2.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You have received a copy of the GNU General Public License along
#   with this program, on the COPYING file.
#

# the code is better, even readable now, but needs more improvements
# please wait for the upcoming 2.0 series for a cleaner one

#XXX Python coding warning
# Avoid common mistakes:
# - do NOT use newlist=list instead newlist=list[:]
# - do NOT use newdic=dic   instead newdic=dic.copy()
# - do NOT use dic[key]     instead dic.get(key)

import re, string, os, sys, getopt, traceback
from time import strftime,time,localtime

my_url = 'http://txt2tags.sf.net'
my_email = 'verde@aurelio.net'
my_version = '1.7'                               #-betaN

DEBUG = 0   # do not edit here, please use --debug
targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
FLAGS   = {'noheaders':0,'enumtitle':0 ,'maskemail':0 ,'stdout'  :0,
           'toconly'  :0,'toc'      :0 ,'gui'      :0 ,'dump-source':0}
OPTIONS = {'toclevel' :3,'style'    :'','type'     :'','outfile' :'',
           'split':0, 'lang':''}
CONFIG_KEYWORDS = ['encoding', 'style', 'cmdline','preproc','postproc']
CONF = {}
regex = {}
TAGS = {}
rules = {}

currdate = strftime('%Y%m%d',localtime(time()))    # ISO current date
lang = 'english'
doctype = outfile = ''
STDIN = STDOUT = '-'

ESCCHAR = '\x00'
LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}

#my_version = my_version + '-dev' + currdate[4:]  # devel!

# global vars for doClose*()
quotedepth = []
listindent = []
listids = []
subarea = None
tableborder = 0
# set the Line Break across platforms
LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']

versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
usage = """
%s

Usage: txt2tags -t <type> [OPTIONS] file.t2t

  -t, --type         set target document type. currently supported:
                     %s

  -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)   	  
      --stdout       same as '-o -' or '--outfile -' (deprecated option)
  -H, --noheaders    suppress header, title and footer information
  -n, --enumtitle    enumerate all title lines as 1, 1.1, 1.1.1, etc
      --maskemail    hide email from spam robots. x@y.z turns <x (a) y z>

      --toc          add TOC (Table of Contents) to target document
      --toconly      print document TOC and exit
      --toclevel=N   set maximum TOC level (depth) to N

      --gui          invoke Graphical Tk Interface
      --style=FILE   use FILE as the document style (like Html CSS)

  -h, --help         print this help information and exit
  -V, --version      print program version and exit

Extra options for HTML target (needs sgml-tools):
      --split        split documents. values: 0, 1, 2 (default 0)
      --lang         document language (default english)

By default, converted output is saved to 'file.<type>'.
Use --outfile to force an output file name.
If input file is '-', reads from STDIN.
If output file is '-', dumps output to STDOUT.\
"""%(versionstr, re.sub(r"[]'[]",'',repr(targets)))


# here is all the target's templates
# you may edit them to fit your needs
#  - the %(HEADERn)s strings represent the Header lines
#  - use %% to represent a literal %
#
HEADER_TEMPLATE = {
  'txt': """\
%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'sgml': """\
<!doctype linuxdoc system>
<article>
<title>%(HEADER1)s
<author>%(HEADER2)s
<date>%(HEADER3)s
""",

  'html': """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
<TITLE>%(HEADER1)s</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
<FONT SIZE=4>
<I>%(HEADER2)s</I><BR>
%(HEADER3)s
</FONT></CENTER>
""",


# TODO man section 1 is hardcoded...
  'man': """\
.TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
""",

# TODO style to <HR>
  'pm6': """\
<PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
><@Normal=
  <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
><@Title4=<@-PARENT "Title3">
><@Title5=<@-PARENT "Title3">
><@Quote=<@-PARENT "Normal"><SIZE 10><I>>

%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'mgp': """\
#!/usr/X11R6/bin/mgp -t 90
%%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
%%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
%%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
%%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
%%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
%%default 1 size 5
%%default 2 size 8, fore "yellow", font "normal-b", center
%%default 3 size 5, fore "white",  font "normal", left, prefix "  "
%%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
%%tab 2 prefix "            ", icon arc "orange" 40, leftfill
%%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
%%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
%%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
%%%%------------------------- end of headers -----------------------------
%%page





%%size 10, center, fore "yellow"
%(HEADER1)s

%%font "normal-i", size 6, fore "white", center
%(HEADER2)s

%%font "mono", size 7, center
%(HEADER3)s
""",

# TODO please, improve me!
  'moin': """\
%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'tex': \
r"""\documentclass[11pt,a4paper]{article}
\usepackage{amsfonts,amssymb,graphicx,url}
\usepackage[%(ENCODING)s]{inputenc}  %% char encoding
\pagestyle{plain}   %% do page numbering ('empty' turns off)
\frenchspacing      %% no aditional spaces after periods
\setlength{\parskip}{8pt}\parindent=0pt  %% no paragraph indentation
%% uncomment next line for fancy PDF output on Adobe Acrobat Reader
%%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}

\title{%(HEADER1)s}
\author{%(HEADER2)s}
\begin{document}
\date{%(HEADER3)s}
\maketitle
"""
}

#-----------------------------------------------------------------------

def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
def Error(msg): print "ERROR: %s"%msg ; sys.exit()
def echo(msg): print '\033[32;1m%s\033[m'%msg # quick debug
def Debug(msg,i=0,linenr=None):
	if i > DEBUG: return
	if linenr is not None:
		print "(%d) %04d:%s"%(i,linenr,msg)
	else:
		print "(%d) %s"%(i,msg)
def Readfile(file, remove_linebreaks=0):
	if file == '-':
		try: data = sys.stdin.readlines()
		except: Error('You must feed me with data on STDIN!')
	else:
		try: f = open(file); data = f.readlines() ; f.close()
		except: Error("Cannot read file:\n    %s"%file)
	if remove_linebreaks:
		data = map(lambda x:re.sub('[\n\r]+$','',x), data)
	return data
def Savefile(file, contents):
	try: f = open(file, 'wb')
	except: Error("Cannot open file for writing:\n    %s"%file)
	if type(contents) == type([]): doit = f.writelines
	else: doit = f.write
	doit(contents) ; f.close()


def get_include_contents(file, path=''):
	"Parses %!include: value and extract file contents"
	# set include type
	id = 'T2T'
	if file[0] == file[-1] == '`':
		id = 'VERB'
		file = file[1:-1]  # remove ``
	elif file[0] == file[-1] == "'":
		id = 'PASS'
		file = file[1:-1]  # remove ''
	# handle remote dir execution
	filepath = os.path.join(path, file)
	# pass-thru
	if id == 'PASS':
		return id, Readfile(filepath, remove_linebreaks=1)
	# VERB text
	if id == 'VERB':
		lines = Readfile(filepath, remove_linebreaks=1)
		# escape inner '---' that would end VERB block
		lines = map(lambda x: re.sub('^---$','--- ',x), lines)
		# add VERB block identifiers
		lines = ['---'] + lines + ['---']
	# default txt2tags marked text
	else:
		id = 'T2T'
		lines = get_file_body(filepath)
	# add delimiter comments
	lines.insert(0, '%%INCLUDED_%s starts here: %s'%(id,file))
	lines.append('%%INCLUDED_%s ends here: %s'%(id,file))
	return id, lines


def ParseConfig(text='',name='', target=''):
	ret = {}
	if not text: return ret
	re_name = name or '[a-z]+'
	re_target = target or '[a-z]*'
	cfgregex = re.compile("""
	  ^%%!\s*               # leading id with opt spaces
	  (?P<name>%s)\s*       # config name 
	  (\((?P<target>%s)\))? # optional target spec inside ()
	  \s*:\s*               # key:value delimiter with opt spaces
	  (?P<value>\S.+?)      # config value
	  \s*$                  # rstrip() spaces and hit EOL
	  """%(re_name,re_target), re.I+re.VERBOSE)
	prepostregex = re.compile("""
	                        # ---[ PATTERN ]---
	  ^( "([^"]*)"            # "double quoted" or
	   | '([^']*)'            # 'single quoted' or
	   | ([^\s]+)             # single_word
	   )
	    \s+                 # separated by spaces
	   
		                    # ---[ REPLACE ]---
	       ( "([^"]*)"        # "double quoted" or
	       | '([^']*)'        # 'single quoted' or
	       | (.*)             # anything
		   )
		    \s*$
	  """, re.VERBOSE)
	match = cfgregex.match(text)
	if match:
		ret = {'name'  :string.lower(match.group('name') or ''),
		       'target':string.lower(match.group('target') or 'all'),
		       'value' :match.group('value') }
		
		# Special config with two quoted values (%!preproc: "foo" 'bar')
		if ret['name'] in ['preproc','postproc']:
			valmatch = prepostregex.search(ret['value'])
			if not valmatch: return None
			getval = valmatch.group
			patt = getval(2) or getval(3) or getval(4) or ''
			repl = getval(6) or getval(7) or getval(8) or ''
			ret['value'] = (patt, repl)
	
	return ret


class Cmdline:
	def __init__(self, cmdline=[], nocheck=0):
		self.conf = {}
		self.cmdline = cmdline
		self.cmdline_conf = {}
		self.dft_options = OPTIONS.copy()
		self.dft_flags   = FLAGS.copy()
		self.all_options = self.dft_options.keys()
		self.all_flags   = self.dft_flags.keys()
		self.defaults = self._get_empty_conf()
		self.nocheck = nocheck
		if cmdline: self.parse()
	
	#TODO protect quotes contents
	def _tokenize(self, cmd_string):
		return string.split(cmd_string)
	
	def parse(self):
		"return a dic with all options:value found"
		if not self.cmdline: return {}
		Debug("cmdline: %s"%self.cmdline, 1)
		options = {'infile': '', 'infiles':''}
		# compose valid options list
		longopts = ['help','version'] + self.all_flags + \
		           map(lambda x:x+'=', self.all_options) # add =
		cmdline = self.cmdline[1:]           # del prog name  
		# get cmdline options
		try: (opt, args) = getopt.getopt(cmdline, 'hVnHt:o:', longopts)
		except getopt.error, errmsg:
			Error("%s (try --help)"%errmsg)
		# get infile, if any
		if args:
			options['infile'] = args[0]
			options['infiles'] = args  # multi
		# parse all options
		for name,val in opt:
			if   name in ['-h','--help'   ]: Quit(usage)
			elif name in ['-V','--version']: Quit(versionstr)
			elif name in ['-t','--type'     ]: options['type'] = val
			elif name in ['-o','--outfile'  ]: options['outfile'] = val
			elif name in ['-n','--enumtitle']: options['enumtitle'] = 1
			elif name in ['-H','--noheaders']: options['noheaders'] = 1
			elif name in ['--stdout']: options['outfile'] = STDOUT
			else: options[name[2:]] = val or 1 # del --
		# save results
		Debug("cmdline arguments: %s"%options, 1)
		self.cmdline_conf = options
	
	def compose(self, conf={}):
		"compose full command line from CONF dict"
		if not conf: return ''
		args = []
		cfg = conf.copy()
		valid_opts = self.all_options + self.all_flags
		use_short = {'noheaders':'H', 'enumtitle':'n'}
		# remove useless options
		if cfg.get('toconly'):
			del cfg['noheaders']
			del cfg['outfile']        # defaults to STDOUT
			if cfg.get('type') == 'txt':
				del cfg['type']       # already default
			args.append('--toconly')  # must be the first
			del cfg['toconly']
		# add target type
		if cfg.has_key('type'):
			args.append('-t '+cfg['type'])
			del cfg['type']
		# add other options
		for key in cfg.keys():
			if key not in valid_opts: continue  # must be a %!setting
			if key == 'outfile': continue       # later
			val = cfg[key]
			if not val: continue
			# default values are useless on cmdline
			if val == self.dft_options.get(key): continue
			# -short format
			if key in use_short.keys():
				args.append('-'+use_short[key])
				continue
			# --long format
			if key in self.all_flags: # add --option
				args.append('--'+key)
			else:                     # add --option=value
				args.append('--%s=%s'%(key,val))
		# the outfile using -o
		if cfg.has_key('outfile') and \
		   cfg['outfile'] != self.dft_options.get('outfile'):
			args.append('-o '+cfg['outfile'])
		# the input file is always at the end
		if cfg.has_key('infile'):
			args.append(cfg['infile'])
		# return as a single string
		ret = string.join(args,' ')
		Debug("Diet command line: %s"%ret, 1)
		return ret
	
	def merge(self, extraopts=''):
		"insert cmdline portion BEFORE current cmdline"
		if not extraopts: return
		if type(extraopts) == type(''):
			extraopts = self._tokenize(extraopts)
		if not self.cmdline: self.cmdline = extraopts
		else: self.cmdline = ['t2t-merged'] +extraopts +self.cmdline[1:]
		self.parse()
	
	def _get_outfile_name(self, conf):
		"dirname is the same for {in,out}file"
		infile = conf['infile']
		if not infile: return ''
		if infile == STDIN or conf['outfile'] == STDOUT:
			outfile = STDOUT
		else:
			basename = re.sub('\.(txt|t2t)$','',infile)
			outfile = "%s.%s"%(basename, conf['type'])
		self.dft_options['outfile'] = outfile # save for self.compose()
		Debug(" infile: '%s'"%infile , 1)
		Debug("outfile: '%s'"%outfile, 1)
		return outfile
	
	def _sanity(self, dic):
		"basic cmdline syntax checkings"
		if not dic: return {}
		if not dic['infile'] or not dic['type']:
			Quit(usage, 1)                  # no filename/doctype
		if not targets.count(dic['type']):      # check target
			Error("Invalid document type '%s' (try --help)"%(
			       dic['type']))
		#DISABLED: conflicting with %!cmdline: -o foo	   
		#if len(dic['infiles']) > 1 and dic['outfile']: # -o FILE *.t2t
		#	Error("--outfile can't be used with multiple files")
		for opt in self.all_options:            # check numeric options
			opttype = type(self.dft_options[opt])
			if dic.get(opt) and opttype == type(9):
				try: dic[opt] = int(dic.get(opt)) # save
				except: Error('--%s value must be a number'%opt)
		if dic['split'] not in [0,1,2]:         # check split level
			Error('Option --split must be 0, 1 or 2')
		return dic
	
	def merge_conf(self, newconfs={}, override=0):
		"include Config Area settings into self.conf"
		if not self.conf: self.get_conf()
		if not newconfs: return self.conf
		for key in newconfs.keys():
			if key == 'cmdline': continue   # already done
			# filters are always accumulative
			if key in ['preproc','postproc']:
				if not self.conf.has_key(key):
					self.conf[key] = []
				self.conf[key].extend(newconfs[key])
				continue
			# add anyway
			if override:
				self.conf[key] = newconfs[key]
				continue
			# just update if still 'virgin'
			if self.conf.has_key(key) and \
			   self.conf[key] == self.defaults.get(key):
				self.conf[key] = newconfs[key]
			# add new
			if not self.conf.has_key(key):
				self.conf[key] = newconfs[key]
		
		Debug("Merged CONF (override=%s): %s"%(override,self.conf), 1)
		return self.conf
	
	def _get_empty_conf(self):
		econf = self.dft_options.copy()
		for k in self.dft_flags.keys(): econf[k] = self.dft_flags[k]
		return econf
	
	def get_conf(self):
		"set vars and flags according to options dic"
		if not self.cmdline_conf:
			if not self.cmdline: return {}
			self.parse()
		dic = self.cmdline_conf
		conf = self.defaults.copy()
		
		## store flags & options
		for flag in self.all_flags:
			if dic.has_key(flag): conf[flag] = 1
		for opt in self.all_options + ['infile', 'infiles']:
			if dic.has_key(opt): conf[opt] = dic.get(opt)
		
		if not conf['type'] and conf['toconly']: conf['type'] = 'txt'
		if not conf['type'] and conf['dump-source']: conf['type'] = 'txt'
		if not self.nocheck: conf = self._sanity(conf)
		
		## some gotchas for specific issues
		doctype = conf['type']
		infile = conf['infile']
		
		# toconly is stronger than others
		if conf['toconly']:
			conf['noheaders'] = 1
			conf['toc'] = 0
			conf['split'] = 0
			conf['gui'] = 0
			conf['outfile'] = STDOUT
			conf['toclevel'] = conf['toclevel'] or \
			                   self.dft_options['toclevel']
		
		# dump-source is stronger than others (including toconly)
		if conf['dump-source']:
			conf['toconly'] = 0
			conf['noheaders'] = 0
			conf['toc'] = 0
			conf['split'] = 0
			conf['gui'] = 0
			conf['outfile'] = STDOUT
		
		# split: just HTML, no stdout, 1st do a sgml, then sgml2html
		if conf['split']:
			if doctype != 'html':
				conf['split'] = 0
			else:
				conf['type'] = 'sgml' 
				if conf['outfile'] == STDOUT:
					conf['outfile'] = ''
		
		outfile = conf['outfile'] or self._get_outfile_name(conf)
		
		# final checkings
		if conf['split'] and outfile == STDOUT:
			Error('--split: You must provide a FILE (not STDIN)')
		if infile == outfile and outfile != STDOUT:
			Error("SUICIDE WARNING!!!  (see --outfile)\n  source"+\
			      " and target files has the same name: "+outfile)
		### author's note: "yes, i've got my sample.t2t file deleted
		### before add this test... :/"
		
		conf['outfile'] = outfile
		conf['cmdline'] = self.cmdline
		Debug("CONF data: %s\n"%conf, 1)
		self.conf = conf
		return self.conf
#
### End of Cmdline class




class Proprierties:
	def __init__(self, filename=''):
		self.buffer = ['']   # text start at pos 1
		self.areas = ['head','conf','body']
		self.arearef = []
		self.headers = ['','','']
		self.config = self.get_empty_config()
		self.lastline = 0
		self.filename = filename
		self.conflines = []
		self.bodylines = []
		if filename:
			self.read_file(filename)
			self.find_areas()
			self.set_headers()
			self.set_config()
	
	def read_file(self, file):
		lines = Readfile(file)
		if not lines: Error('Empty file! %s'%file)
		self.buffer.extend(lines)
	
	def get_empty_config(self):
		empty = {}
		for targ in targets+['all']: empty[targ] = {}
		return empty
	
	def find_areas(self):
		"Run through buffer and identify head/conf/body areas"
		buf = self.buffer ; ref = [1,4,0]       # defaults
		if not string.strip(buf[1]):            # no header
			ref[0] = 0 ; ref[1] = 2
		for i in range(ref[1],len(buf)):        # find body init
			if string.strip(buf[i]) and buf[i][0] != '%':
				ref[2] = i ; break      # !blank, !comment
			if ParseConfig(buf[i], 'include'):
				ref[2] = i ; break      # %!include command
		if ref[1] == ref[2]: ref[1] = 0         # no conf area
		for i in 0,1,2:                         # del !existent
			if not ref[i]: self.areas[i] = ''
		self.arearef = ref                      # save results
		self.lastline = len(self.buffer)-1
		Debug('Head,Conf,Body start line: %s'%ref, 1)
		# store CONF and BODY lines found
		cfgend = ref[2] or len(buf)
		self.conflines = buf[ref[1]:cfgend]
		if ref[2]: self.bodylines = buf[ref[2]:]
	
	
	def set_headers(self):
		"Extract and save headers contents"
		if not self.arearef: self.find_areas()
		if not self.areas.count('head'): return
		if self.lastline < 3:
			#TODO on gui this checking is !working
			Error(
			"Premature end of Headers on '%s'."%self.filename +\
			'\n\nFile has %s line(s), but '%self.lastline     +\
			'Headers should be composed by 3 lines. '         +\
			'\nMaybe you should left the first line blank? '  +\
			'(for no headers)')
		for i in 0,1,2:
			self.headers[i] = string.strip(self.buffer[i+1])
		Debug("Headers found: %s"%self.headers, 1, i+1)
	
	def set_config(self):
		"Extract and save config contents (including includes)"
		if not self.arearef: self.find_areas()
		if not self.areas.count('conf'): return
		keywords = string.join(CONFIG_KEYWORDS, '|')
		linenr = self.arearef[1]-1  # for debug messages
		for line in self.conflines:
			linenr = linenr + 1
			if len(line) < 3: continue
			if line[:2] != '%!': continue
			cfg = ParseConfig(line, keywords)
			# any _valid_ config found?
			if not cfg:
				Debug('Bogus Config Line',1,linenr)
				continue
			# get data	
			targ, key, val = cfg['target'],cfg['name'], cfg['value']
			# check config target specification
			if targ not in targets+['all']:
				Debug("Config Error: Invalid target '%s', ignoring"%targ,
				      1,linenr)
				continue
			# filters are multiple config
			if key in ['preproc','postproc']:
				if not self.config['all'].has_key(key):  # 1st one
					self.config['all'][key] = []
				# all filters are saved to target 'all'
				# finish_him will decide what to consider
				self.config['all'][key].append((targ,)+val)
			else:
				self.config[targ][key] = val
			Debug("Found config for target '%s': '%s', value '%s'"%(
			       targ,key,val),1,linenr)
		Debug("All %%!CONFIG: %s"%self.config, 1)


def get_file_body(file):
	"Returns all the document BODY lines (including includes)"
	prop = Proprierties()
	prop.read_file(file)
	prop.find_areas()
	return prop.bodylines


def finish_him(outlist, CONF):
	"Writing output to screen or file"
	outfile = CONF['outfile']
	outlist = unmaskEscapeChar(outlist)
	
	# do PostProc
	if CONF['postproc']:
		postoutlist = []
		for line in outlist:
			for targ,patt,repl in CONF['postproc']:
				if targ not in [CONF['type'], 'all']: continue
				try   : line = re.sub(patt, repl, line)
				except: Error("Invalid PostProc filter regex: '%s'"%patt)
			postoutlist.append(line)
		outlist = postoutlist[:]
	
	if outfile == STDOUT:
		if CONF['gui']:
			return outlist
		else:
			for line in outlist: print line
	else:
		Savefile(outfile, addLineBreaks(outlist))
		if not CONF['gui']: print 'wrote %s'%(outfile)
	
	if CONF['split']:
		print "--- html..."
		sgml2html = 'sgml2html -s %s -l %s %s'%(
		            CONF['split'],CONF['lang'] or lang,outfile)
		print "Running system command:", sgml2html
		os.system(sgml2html)


def toc_maker(toc, conf):
	"Compose TOC list 'by hand'"
	# TOC is a tag, so there's nothing to do here
	if TAGS['TOC']: return []
	# toc is a valid t2t marked text (list type), that is converted
	if conf['toc'] or conf['toconly']:
		fakeconf = conf.copy()
		fakeconf['noheaders'] = 1
		fakeconf['toconly']   = 0
		fakeconf['maskemail'] = 0
		fakeconf['dump-source'] = 0
		fakeconf['preproc'] = []
		fakeconf['postproc'] = []
		toc,foo = convert(toc, fakeconf)
	# TOC between bars (not for --toconly)
	if conf['toc']:
		para = TAGS['paragraph']
		tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
		toc = tocbar + toc + tocbar
	return toc


def getTags(doctype):
	keys = [
	'paragraph','title1','title2','title3','title4','title5',
	'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
	'areaPreOpen','areaPreClose',
	'areaQuoteOpen','areaQuoteClose',
	'fontMonoOpen','fontMonoClose',
	'fontBoldOpen','fontBoldClose',
	'fontItalicOpen','fontItalicClose',
	'fontBolditalicOpen','fontBolditalicClose',
	'fontUnderlineOpen','fontUnderlineClose',
	'listOpen','listClose','listItem',
	'numlistOpen','numlistClose','numlistItem',
	'deflistOpen','deflistClose','deflistItem1','deflistItem2',
	'bar1','bar2',
	'url','urlMark','email','emailMark',
	'img','imgsolo',
	'tableOpen','tableClose','tableLineOpen','tableLineClose',
	'tableCellOpen','tableCellClose',
	'tableTitleCellOpen','tableTitleCellClose',
	'anchor','comment','TOC',
	'EOD'
	]
	
	alltags = {
	
	'txt': {
	   'title1'              : '  \a'      ,
	   'title2'              : '\t\a'      ,
	   'title3'              : '\t\t\a'    ,
	   'title4'              : '\t\t\t\a'  ,
	   'title5'              : '\t\t\t\t\a',
	   'areaQuoteOpen'       : '    '      ,
	   'listItem'            : '- '        ,
	   'numlistItem'         : '\a. '      ,
	   'bar1'                : '\a'        ,
	   'bar2'                : '\a'        ,
	   'url'                 : '\a'        ,
	   'urlMark'             : '\a (\a)'   ,
	   'email'               : '\a'        ,
	   'emailMark'           : '\a (\a)'   ,
	   'img'                 : '[\a]'      ,
	},
	
	'html': {
	   'paragraph'           : '<P>'            ,
	   'title1'              : '<H1>\a</H1>'    ,
	   'title2'              : '<H2>\a</H2>'    ,
	   'title3'              : '<H3>\a</H3>'    ,
	   'title4'              : '<H4>\a</H4>'    ,
	   'title5'              : '<H5>\a</H5>'    ,
	   'areaPreOpen'         : '<PRE>'          ,
	   'areaPreClose'        : '</PRE>'         ,
	   'areaQuoteOpen'       : '<BLOCKQUOTE>'   ,
	   'areaQuoteClose'      : '</BLOCKQUOTE>'  ,
	   'fontMonoOpen'        : '<CODE>'         ,
	   'fontMonoClose'       : '</CODE>'        ,
	   'fontBoldOpen'        : '<B>'            ,
	   'fontBoldClose'       : '</B>'           ,
	   'fontItalicOpen'      : '<I>'            ,
	   'fontItalicClose'     : '</I>'           ,
	   'fontBolditalicOpen'  : '<B><I>'         ,
	   'fontBolditalicClose' : '</I></B>'       ,
	   'fontUnderlineOpen'   : '<U>'            ,
	   'fontUnderlineClose'  : '</U>'           ,
	   'listOpen'            : '<UL>'           ,
	   'listClose'           : '</UL>'          ,
	   'listItem'            : '<LI>'           ,
	   'numlistOpen'         : '<OL>'           ,
	   'numlistClose'        : '</OL>'          ,
	   'numlistItem'         : '<LI>'           ,
	   'deflistOpen'         : '<DL>'           ,
	   'deflistClose'        : '</DL>'          ,
	   'deflistItem1'        : '<DT>\a</DT>'    ,
	   'deflistItem2'        : '<DD>'           ,
	   'bar1'                : '<HR NOSHADE SIZE=1>'        ,
	   'bar2'                : '<HR NOSHADE SIZE=5>'        ,
	   'url'                 : '<A HREF="\a">\a</A>'        ,
	   'urlMark'             : '<A HREF="\a">\a</A>'        ,
	   'email'               : '<A HREF="mailto:\a">\a</A>' ,
	   'emailMark'           : '<A HREF="mailto:\a">\a</A>' ,
	   'img'                 : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
	   'imgsolo'             : '<P ALIGN="center">\a</P>'   ,
	   'tableOpen'           : '<table\a cellpadding=4 border=\a>',
	   'tableClose'          : '</table>'       ,
	   'tableLineOpen'       : '<tr>'           ,
	   'tableLineClose'      : '</tr>'          ,
	   'tableCellOpen'       : '<td\a>'         ,
	   'tableCellClose'      : '</td>'          ,
	   'tableTitleCellOpen'  : '<th>'           ,
	   'tableTitleCellClose' : '</th>'          ,
	   'tableAlignLeft'      : ''               ,
	   'tableAlignCenter'    : ' align="center"',
	   'tableCellAlignLeft'  : ''               ,
	   'tableCellAlignRight' : ' align="right"' ,
	   'tableCellAlignCenter': ' align="center"',
	   'anchor'              : '<a name="\a"></a>',
	   'comment'             : '<!-- \a -->'    ,
	   'EOD'                 : '</BODY></HTML>'
	},
	
	'sgml': {
	   'paragraph'           : '<p>'                ,
	   'title1'              : '<sect>\a<p>'        ,
	   'title2'              : '<sect1>\a<p>'       ,
	   'title3'              : '<sect2>\a<p>'       ,
	   'title4'              : '<sect3>\a<p>'       ,
	   'title5'              : '<sect4>\a<p>'       ,
	   'areaPreOpen'         : '<tscreen><verb>'    ,
	   'areaPreClose'        : '</verb></tscreen>'  ,
	   'areaQuoteOpen'       : '<quote>'            ,
	   'areaQuoteClose'      : '</quote>'           ,
	   'fontMonoOpen'        : '<tt>'               ,
	   'fontMonoClose'       : '</tt>'              ,
	   'fontBoldOpen'        : '<bf>'               ,
	   'fontBoldClose'       : '</bf>'              ,
	   'fontItalicOpen'      : '<em>'               ,
	   'fontItalicClose'     : '</em>'              ,
	   'fontBolditalicOpen'  : '<bf><em>'           ,
	   'fontBolditalicClose' : '</em></bf>'         ,
	   'fontUnderlineOpen'   : '<bf><em>'           ,
	   'fontUnderlineClose'  : '</em></bf>'         ,
	   'listOpen'            : '<itemize>'          ,
	   'listClose'           : '</itemize>'         ,
	   'listItem'            : '<item>'             ,
	   'numlistOpen'         : '<enum>'             ,
	   'numlistClose'        : '</enum>'            ,
	   'numlistItem'         : '<item>'             ,
	   'deflistOpen'         : '<descrip>'          ,
	   'deflistClose'        : '</descrip>'         ,
	   'deflistItem1'        : '<tag>\a</tag>'      ,
	   'bar1'                : '<!-- \a -->'        ,
	   'bar2'                : '<!-- \a -->'        ,
	   'url'                 : '<htmlurl url="\a" name="\a">'        ,
	   'urlMark'             : '<htmlurl url="\a" name="\a">'        ,
	   'email'               : '<htmlurl url="mailto:\a" name="\a">' ,
	   'emailMark'           : '<htmlurl url="mailto:\a" name="\a">' ,
	   'img'                 : '<figure><ph vspace=""><img src="\a">'+\
	                           '</figure>'                           ,
	   'tableOpen'           : '<table><tabular ca="\a">'            ,
	   'tableClose'          : '</tabular></table>' ,
	   'tableLineClose'      : '<rowsep>'           ,
	   'tableCellClose'      : '<colsep>'           ,
	   'tableTitleCellClose' : '<colsep>'           ,
	   'tableColAlignLeft'   : 'l'                  ,
	   'tableColAlignRight'  : 'r'                  ,
	   'tableColAlignCenter' : 'c'                  ,
	   'comment'             : '<!-- \a -->'        ,
	   'TOC'                 : '<toc>'              ,
	   'EOD'                 : '</article>'
	},
	   
	'tex': {
	   'title1'              : '\n\\newpage\section*{\a}',
	   'title2'              : '\\subsection*{\a}'       ,
	   'title3'              : '\\subsubsection*{\a}'    ,
	   # title 4/5: DIRTY: para+BF+\\+\n
	   'title4'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'title5'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'numtitle1'           : '\n\\newpage\section{\a}',
	   'numtitle2'           : '\\subsection{\a}'       ,
	   'numtitle3'           : '\\subsubsection{\a}'    ,
	   'areaPreOpen'         : '\\begin{verbatim}'   ,
	   'areaPreClose'        : '\\end{verbatim}'     ,
	   'areaQuoteOpen'       : '\\begin{quotation}'  ,
	   'areaQuoteClose'      : '\\end{quotation}'    ,
	   'fontMonoOpen'        : '\\texttt{'           ,
	   'fontMonoClose'       : '}'                   ,
	   'fontBoldOpen'        : '\\textbf{'           ,
	   'fontBoldClose'       : '}'                   ,
	   'fontItalicOpen'      : '\\textit{'           ,
	   'fontItalicClose'     : '}'                   ,
	   'fontBolditalicOpen'  : '\\textbf{\\textit{'  ,
	   'fontBolditalicClose' : '}}'                  ,
	   'fontUnderlineOpen'   : '\\underline{'        ,
	   'fontUnderlineClose'  : '}'                   ,
	   'listOpen'            : '\\begin{itemize}'    ,
	   'listClose'           : '\\end{itemize}'      ,
	   'listItem'            : '\\item '             ,
	   'numlistOpen'         : '\\begin{enumerate}'  ,
	   'numlistClose'        : '\\end{enumerate}'    ,
	   'numlistItem'         : '\\item '             ,
	   'deflistOpen'         : '\\begin{description}',
	   'deflistClose'        : '\\end{description}'  ,
	   'deflistItem1'        : '\\item[\a]'          ,
	   'bar1'                : '\n\\hrulefill{}\n'   ,
	   'bar2'                : '\n\\rule{\linewidth}{1mm}\n',
	   'url'                 : '\\url{\a}'                  ,
	   'urlMark'             : '\\textit{\a} (\\url{\a})'   ,
	   'email'               : '\\url{\a}'                  ,
	   'emailMark'           : '\\textit{\a} (\\url{\a})'   ,
	   'img'                 : '\\begin{figure}\\includegraphics{\a}'+\
	                           '\\end{figure}',
	   'tableOpen'           : '\\begin{center}\\begin{tabular}{\a|}',
	   'tableClose'          : '\\end{tabular}\\end{center}',
	   'tableLineOpen'       : '\\hline ' ,
	   'tableLineClose'      : ' \\\\'    ,
	   'tableCellClose'      : ' & '      ,
	   'tableTitleCellOpen'  : '\\textbf{',
	   'tableTitleCellClose' : '} & '     ,
	   'tableColAlignLeft'   : '|l'       ,
	   'tableColAlignRight'  : '|r'       ,
	   'tableColAlignCenter' : '|c'       ,
	   'comment'             : '% \a'     ,
	   'TOC'                 : '\\newpage\\tableofcontents',
	   'EOD'                 : '\\end{document}'
	},
	
	'moin': {
	   'title1'              : '= \a ='        ,
	   'title2'              : '== \a =='      ,
	   'title3'              : '=== \a ==='    ,
	   'title4'              : '==== \a ===='  ,
	   'title5'              : '===== \a =====',
	   'areaPreOpen'         : '{{{'           ,
	   'areaPreClose'        : '}}}'           ,
	   'areaQuoteOpen'       : ' '             ,
	   'fontMonoOpen'        : '{{{'           ,
	   'fontMonoClose'       : '}}}'           ,
	   'fontBoldOpen'        : "'''"           ,
	   'fontBoldClose'       : "'''"           ,
	   'fontItalicOpen'      : "''"            ,
	   'fontItalicClose'     : "''"            ,
	   'fontBolditalicOpen'  : "'''''"         ,
	   'fontBolditalicClose' : "'''''"         ,
	   'fontUnderlineOpen'   : "'''''"         ,
	   'fontUnderlineClose'  : "'''''"         ,
	   'listItem'            : ' * '           ,
	   'numlistItem'         : ' \a. '         ,
	   'bar1'                : '----'          ,
	   'bar2'                : '----'          ,
	   'url'                 : '[\a]'          ,
	   'urlMark'             : '[\a \a]'       ,
	   'email'               : '[\a]'          ,
	   'emailMark'           : '[\a \a]'       ,
	   'img'                 : '[\a]'          ,
	   'tableLineOpen'       : '||'            ,
	   'tableCellClose'      : '||'            ,
	   'tableTitleCellClose' : '||'
	},
	
	'mgp': {
	   'paragraph'           : '%font "normal", size 5\n'   ,
	   'title1'              : '%page\n\n\a'                ,
	   'title2'              : '%page\n\n\a'                ,
	   'title3'              : '%page\n\n\a'                ,
	   'title4'              : '%page\n\n\a'                ,
	   'title5'              : '%page\n\n\a'                ,
	   'areaPreOpen'         : '\n%font "mono"'             ,
	   'areaPreClose'        : '%font "normal"'             ,
	   'areaQuoteOpen'       : '%prefix "       "'          ,
	   'areaQuoteClose'      : '%prefix "  "'               ,
	   'fontMonoOpen'        : '\n%cont, font "mono"\n'     ,
	   'fontMonoClose'       : '\n%cont, font "normal"\n'   ,
	   'fontBoldOpen'        : '\n%cont, font "normal-b"\n' ,
	   'fontBoldClose'       : '\n%cont, font "normal"\n'   ,
	   'fontItalicOpen'      : '\n%cont, font "normal-i"\n' ,
	   'fontItalicClose'     : '\n%cont, font "normal"\n'   ,
	   'fontBolditalicOpen'  : '\n%cont, font "normal-bi"\n',
	   'fontBolditalicClose' : '\n%cont, font "normal"\n'   ,
	   'fontUnderlineOpen'   : '\n%cont, fore "cyan"\n'     ,
	   'fontUnderlineClose'  : '\n%cont, fore "white"\n'    ,
	   'numlistItem'         : '\a. '                       ,
	   'bar1'                : '%bar "white" 5'             ,
	   'bar2'                : '%pause'                     ,
	   'url'                 : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'urlMark'             : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'email'               : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'emailMark'           : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'img'                 : '\n%center\n%newimage "\a", left\n',
	   'comment'             : '%% \a'                      ,
	   'EOD'                 : '%%EOD'
	},
	
	'man': {
	   'paragraph'           : '.P'     ,
	   'title1'              : '.SH \a' ,
	   'title2'              : '.SS \a' ,
	   'title3'              : '.SS \a' ,
	   'title4'              : '.SS \a' ,
	   'title5'              : '.SS \a' ,
	   'areaPreOpen'         : '.nf'    ,
	   'areaPreClose'        : '.fi\n'  ,
	   'areaQuoteOpen'       : '\n'     ,
	   'areaQuoteClose'      : '\n'     ,
	   'fontBoldOpen'        : '\\fB'   ,
	   'fontBoldClose'       : '\\fP'   ,
	   'fontItalicOpen'      : '\\fI'   ,
	   'fontItalicClose'     : '\\fP'   ,
	   'fontBolditalicOpen'  : '\\fI'   ,
	   'fontBolditalicClose' : '\\fP'   ,
	   'listOpen'            : '\n.nf'  ,  # pre
	   'listClose'           : '.fi\n'  ,
	   'listItem'            : '* '     ,
	   'numlistOpen'         : '\n.nf'  ,  # pre
	   'numlistClose'        : '.fi\n'  ,
	   'numlistItem'         : '\a. '   ,
	   'bar1'                : '\n\n'   ,
	   'bar2'                : '\n\n'   ,
	   'url'                 : '\a'     ,
	   'urlMark'             : '\a (\a)',
	   'email'               : '\a'     ,
	   'emailMark'           : '\a (\a)',
	   'img'                 : '\a'     ,
	   'comment'             : '.\\" \a'
	},
	
	'pm6': {
	   'paragraph'           : '<@Normal:>'    ,
	   'title1'              : '\n<@Title1:>\a',
	   'title2'              : '\n<@Title2:>\a',
	   'title3'              : '\n<@Title3:>\a',
	   'title4'              : '\n<@Title4:>\a',
	   'title5'              : '\n<@Title5:>\a',
	   'areaPreOpen'         : '<@PreFormat:>' ,
	   'areaQuoteOpen'       : '<@Quote:>'     ,
	   'fontMonoOpen'        : '<FONT "Lucida Console"><SIZE 9>' ,
	   'fontMonoClose'       : '<SIZE$><FONT$>',
	   'fontBoldOpen'        : '<B>'           ,
	   'fontBoldClose'       : '<P>'           ,
	   'fontItalicOpen'      : '<I>'           ,
	   'fontItalicClose'     : '<P>'           ,
	   'fontBolditalicOpen'  : '<B><I>'        ,
	   'fontBolditalicClose' : '<P>'           ,
	   'fontUnderlineOpen'   : '<U>'           ,
	   'fontUnderlineClose'  : '<P>'           ,
	   'listOpen'            : '<@Bullet:>'    ,
	   'listItem'            : '\x95	'       ,  # \x95 == ~U
	   'numlistOpen'         : '<@Bullet:>'    ,
	   'numlistItem'         : '\x95    '      ,
	   'bar1'                : '\a'            ,
	   'bar2'                : '\a'            ,
	   'url'                 : '<U>\a<P>'      ,  # underline
	   'urlMark'             : '\a <U>\a<P>'   ,
	   'email'               : '\a'            ,
	   'emailMark'           : '\a \a'         ,
	   'img'                 : '\a'
	}
	}
	
	# compose the target tags dictionary
	tags = {}
	target_tags = alltags[doctype]
	for key in keys: tags[key] = ''     # create empty keys
	for key in target_tags.keys():
		tags[key] = maskEscapeChar(target_tags[key]) # populate
	
	return tags


def getRules(doctype):
	ret = {}
	allrules = [
	
	 # target rules (ON/OFF)
	  'linkable',           # target supports external links
	  'tableable',          # target supports tables
	  'imglinkable',        # target supports images as links
	  'imgalignable',       # target supports image alignment
	  'imgasdefterm',       # target supports image as definition term
	  'tablealignable',     # target supports table alignment
	  'autonumberlist',     # target supports numbered lists natively
	  'autonumbertitle',    # target supports numbered titles natively
	  'tablecellsplit',     # place delimiters only *between* cells
	  'listnotnested',      # lists cannot be nested
	  'quotenotnested',     # quotes cannot be nested
	  'preareanotescaped',  # don't escape specials in PRE area
	  'escapeurl',          # escape special in link URL
	  
	# target code beautify (ON/OFF)
	  'indentprearea',      # add leading spaces to PRE area lines
	  'breaktablecell',     # break lines after any table cell
	  'breaktablelineopen', # break line after opening table line
	  'keepquoteindent',    # don't remove the leading TABs on quotes
	
	# value settings
	  'listmaxdepth',       # maximum depth for lists
	  'tablecellaligntype'  # type of table cell align: cell, column
	]
	
	rules = {
	  'txt' : {
	    'indentprearea':1
	    },
	  'html': {
	    'indentprearea':1,
	    'linkable':1,
	    'imglinkable':1,
	    'imgalignable':1,
	    'imgasdefterm':1,
	    'autonumberlist':1,
	    'tableable':1,
	    'breaktablecell':1,
	    'breaktablelineopen':1,
	    'keepquoteindent':1,
	    'tablealignable':1,
	    'tablecellaligntype':'cell'
	    },
	  'sgml': {
	    'linkable':1,
	    'escapeurl':1,
	    'autonumberlist':1,
	    'tableable':1,
	    'tablecellsplit':1,
	    'quotenotnested':1,
	    'keepquoteindent':1,
	    'tablecellaligntype':'column'
	    },
	  'mgp' : {
	    },
	  'tex' : {
	    'autonumberlist':1,
	    'autonumbertitle':1,
	    'tableable':1,
	    'tablecellsplit':1,
	    'preareanotescaped':1,
	    'listmaxdepth':4,
	    'tablecellaligntype':'column'
	    },
	  'moin': {
	    'linkable':1,
	    'tableable':1
	    },
	  'man' : {
	    'indentprearea':1,
	    'listnotnested':1
	    },
	  'pm6' : {
	    }
	}
	
	
	# populate return dictionary
	myrules = rules[doctype]
	for key in allrules      : ret[key] = 0            # reset all
	for key in myrules.keys(): ret[key] = myrules[key] # turn ON
	return ret


def getRegexes():
	regex = {
	# extra at end: (\[(?P<label>\w+)\])?
	'title':
		re.compile(r'^\s*(?P<id>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
	'numtitle':
		re.compile(r'^\s*(?P<id>\+{1,5})(?P<txt>[^+].*[^+])\1\s*$'),
	'areaPreOpen':
		re.compile(r'^---$'),
	'areaPreClose':
		re.compile(r'^---$'),
	'quote':
		re.compile(r'^\t+'),
	'1linePre':
		re.compile(r'^--- (?=.)'),
	'fontMono':
		re.compile(r'`([^`]+)`'),
	'fontBold':
		re.compile(r'\*\*([^\s*].*?)\*\*'),
	'fontItalic':
		re.compile(r'(^|[^:])//([^ /].*?)//'),
	'fontUnderline':
		re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
	'fontBolditalic':
		re.compile(r'\*/([^/].*?)/\*'),
	'list':
		re.compile(r'^( *)([+-]) ([^ ])'),
	'deflist':
		re.compile(r'^( *)(=) ([^:]+):'),
	'bar':
		re.compile(r'^\s*([_=-]{20,})\s*$'),
	'table':
		re.compile(r'^ *\|\|? '),
	'blankline':
		re.compile(r'^\s*$'),
	'comment':
		re.compile(r'^%'),
	'raw':
		re.compile(r'``(.+?)``')
	}
	
	# special char to place data on TAGs contents  (\a == bell)
	regex['x'] = re.compile('\a')
	
	# %%date [ (formatting) ]
	regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
	
	
	### complicated regexes begin here ;)
	#
	# textual descriptions on --help's style: [...] is optional, | is OR
	
	
	### first, some auxiliar variables
	#
	
	# [image.EXT]
	patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
	
	# link things
	urlskel = {
	  'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
	  'guess' : r'(www[23]?|ftp)\.',    # w/out proto, try to guess
	  'login' : r'A-Za-z0-9_.-',        # for ftp://login@domain.com
	  'pass'  : r'[^ @]*',              # for ftp://login:password@dom.com
	  'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
	  'anchor': r'A-Za-z0-9%._-',       # %nn(encoded)
	  'form'  : r'A-Za-z0-9/%&=+.,@*_-',# .,@*_-(as is)
	  'punct' : r'.,;:!?'
	}
	
	# username [ :password ] @
	patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
	
	# [ http:// ] [ username:password@ ] domain.com [ / ]
	#     [ #anchor | ?form=data ]
	retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
	             urlskel['proto'],patt_url_login, urlskel['guess'],
	             urlskel['chars'],urlskel['form'],urlskel['anchor'])
	
	# filename | [ filename ] #anchor
	retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
	             urlskel['chars'],urlskel['chars'],urlskel['anchor'])
	
	# user@domain [ ?form=data ]
	patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
	             urlskel['login'],urlskel['form'])
	
	
	# saving for future use
	regex['_urlskel'] = urlskel
	
	### and now the real regexes
	#
	
	regex['email'] = re.compile(patt_email,re.I)
	
	# email | url
	regex['link'] = \
		re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
	
	# \[ label | imagetag    url | email | filename \]
	regex['linkmark'] = \
		re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
		   patt_img, retxt_url, patt_email, retxt_url_local),
		   re.L+re.I)
	
	# image
	regex['img'] = re.compile(patt_img, re.L+re.I)
	
	# all macros
	regex['macro'] = regex['date']
	
	# special things
	regex['special'] = re.compile(r'^%!\s*')
	regex['command'] = re.compile(r'(Include)\s*:\s*(.+)\s*$',re.I)
	return regex
### END OF regex nightmares


class SubareaMaster:
	def __init__(self) : self.x = []
	def __call__(self) :
		if not self.x: return ''
		return self.x[-1]
	def add(self, area):
		if not self.x or (self.x and self.x[-1] != area):
			self.x.append(area)
		Debug('subarea ++ (%s): %s' % (area,self.x), 1)
	def pop(self, area=None):
		if area and self.x[-1] == area: self.x.pop()
		Debug('subarea -- (%s): %s' % (area,self.x), 1)

def doHeader(headers, CONF):
	if CONF['noheaders']: return []
	doctype = CONF['type']
	if not HEADER_TEMPLATE.has_key(doctype):
		Error("doheader: Unknow doctype '%s'"%doctype)
	
	template = string.split(HEADER_TEMPLATE[doctype], '\n')
	
	head_data = {'STYLE':'', 'ENCODING':''}
	for key in head_data.keys():
		val = CONF.get(string.lower(key))
		if key == 'ENCODING': val = get_encoding_string(val, doctype)
		head_data[key] = val
	# parse header contents
	for i in 0,1,2:
		contents = doDateMacro(headers[i])  # expand %%date
		# Escapes - on tex, just do it if any \tag{} present
		if doctype != 'tex' or \
		  (doctype == 'tex' and re.search(r'\\\w+{', contents)):
			contents = doEscape(doctype, contents)
		
		head_data['HEADER%d'%(i+1)] = contents
	Debug("Header Data: %s"%head_data, 1)
	# scan for empty dictionary keys
	# if found, scan template lines for that key reference
	# if found, remove the reference
	# if there isn't any other key reference on the same line, remove it
	for key in head_data.keys():
		if head_data.get(key): continue
		for line in template:
			if string.count(line, '%%(%s)s'%key):
				sline = string.replace(line, '%%(%s)s'%key, '')
				if not re.search(r'%\([A-Z0-9]+\)s', sline):
					template.remove(line)
	# populate template with data
	template = string.join(template, '\n') % head_data
	### post processing
	#
	# let tex format today
	if doctype == 'tex' and head_data['HEADER3'] == currdate:
		template = re.sub(r'\\date\{.*?}', r'\date', template)
	
	return string.split(template, '\n')

def doDateMacro(line):
	re_date = getRegexes()['date']
	while re_date.search(line):
		m = re_date.search(line)
		fmt = m.group('fmt') or ''
		dateme = currdate
		if fmt: dateme = strftime(fmt,localtime(time()))
		line = re_date.sub(dateme,line,1)
	return line

def doCommentLine(txt):
	# the -- string ends a sgml/html comment :(
	txt = maskEscapeChar(txt)
	if string.count(TAGS['comment'], '--') and \
		string.count(txt, '--'):
		txt = re.sub('-(?=-)', r'-\\', txt)
	
	if TAGS['comment']:
		return regex['x'].sub(txt, TAGS['comment'])
	return ''

def doFooter(CONF):
	ret = []
	doctype = CONF['type']
	cmdline = CONF['cmdline']
	typename = doctype
	if doctype == 'tex': typename = 'LaTeX2e'
	ppgd = '%s code generated by txt2tags %s (%s)'%(
	        typename,my_version,my_url)
	cmdline = 'cmdline: txt2tags %s'%string.join(cmdline[1:], ' ')
	ret.append('\n'+doCommentLine(ppgd))
	ret.append(doCommentLine(cmdline))
	ret.append(TAGS['EOD'])
	return ret

# TODO mgp: any line (header or not) can't begin with % (add a space before)
def doEscape(doctype,txt):
	if doctype in ['html','sgml']:
		txt = re.sub('&','&amp;',txt)
		txt = re.sub('<','&lt;',txt)
		txt = re.sub('>','&gt;',txt)
		if doctype == 'sgml':
			txt = re.sub('\xff','&yuml;',txt)  # "+y
	elif doctype == 'pm6':
		txt = re.sub('<','<\#60>',txt)
	elif doctype == 'mgp':
		txt = re.sub('^%',' %',txt)  # add leading blank to avoid parse
	elif doctype == 'man':
		txt = re.sub("^([.'])", '\\&\\1',txt)           # command ID
		txt = string.replace(txt,ESCCHAR, ESCCHAR+'e')  # \e
	elif doctype == 'tex':
		txt = string.replace(txt, ESCCHAR, '@@LaTeX-escaping-SUX@@')
		txt = re.sub('([#$&%{}])', r'\\\1', txt)
		txt = string.replace(txt, '~', maskEscapeChar(r'\~{}'))
		txt = string.replace(txt, '^', maskEscapeChar(r'\^{}'))
		txt = string.replace(txt, '@@LaTeX-escaping-SUX@@',
		                     maskEscapeChar(r'$\backslash$'))
		# TIP the _ is escaped at the end
	return txt

def doFinalEscape(doctype, txt):
	"Last escapes of each line"
	if   doctype == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
	elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
	elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
	elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
	return txt

def EscapeCharHandler(action, data):
	"Mask/Unmask the Escape Char on the given string"
	if not string.strip(data): return data
	if action not in ['mask','unmask']:
		Error("EscapeCharHandler: Invalid action '%s'"%action)
	if action == 'mask': return string.replace(data,'\\',ESCCHAR)
	else:                return string.replace(data,ESCCHAR,'\\')

def maskEscapeChar(data):
	"Replace any Escape Char \ with a text mask (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('mask', x), data)
	return EscapeCharHandler('mask',data)

def unmaskEscapeChar(data):
	"Undo the Escape char \ masking (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('unmask', x), data)
	return EscapeCharHandler('unmask',data)

def addLineBreaks(list):
	"use LB to respect sys.platform"
	ret = []
	for line in list:
		line = string.replace(line,'\n',LB)  # embedded \n's
		ret.append(line+LB)                  # add final line break
	return ret

def doPreLine(doctype,line):
	"Parsing procedures for preformatted (verbatim) lines"
	if not rules['preareanotescaped']: line = doEscape(doctype,line)
	if rules['indentprearea']: line = '  '+line
	if doctype == 'pm6': line = doFinalEscape(doctype, line)
	return line

def doCloseTable(doctype):
	global subarea, tableborder
	ret = ''
	if rules['tableable']:
		if doctype == 'tex' and tableborder:
			ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
		else:
			ret = TAGS['tableClose']+'\n'
	else:
		ret = TAGS['areaPreClose']
	tableborder = 0
	subarea.pop('table')
	return ret

def doCloseQuote(howmany=None):
	global quotedepth
	ret = []
	if not howmany: howmany = len(quotedepth)
	for i in range(howmany):
		quotedepth.pop()
		#TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
		ret.append(TAGS['areaQuoteClose'])
	
	if not quotedepth: subarea.pop('quote')
	return string.join(ret,'\n')

def doCloseList(howmany=None):
	global listindent, listids
	ret = []
	if not howmany: howmany = len(listindent)
	for i in range(howmany):
		if   listids[-1] == '-': tag = TAGS['listClose']
		elif listids[-1] == '+': tag = TAGS['numlistClose']
		elif listids[-1] == '=': tag = TAGS['deflistClose']
		if not tag: tag = TAGS['listClose'] # default
		if tag:
			# unnested lists are only closed at mother-list
			if rules['listnotnested']:
				if len(listindent) == 1:
					ret.append(tag)
			else:
				ret.append(listindent[-1]+tag)
		del listindent[-1]
		del listids[-1]
	
	if not listindent: subarea.pop('list')
	return string.join(ret,'\n')


def beautify_me(name, line):
	"where name is: bold, italic, underline or bolditalic"
	name  = 'font%s' % string.capitalize(name)
	open  = TAGS['%sOpen'%name]
	close = TAGS['%sClose'%name]
	txt = r'%s\1%s'%(open, close)
	if name == 'fontItalic':
		txt = r'\1%s\2%s'%(open, close)
	line = regex[name].sub(txt,line)
	return line


def get_tagged_link(label, url, CONF):
	ret = ''
	doctype = CONF['type']
	
	# set link type
	if regex['email'].match(url):
		linktype = 'email'
	else:
		linktype = 'url';
	
	# escape specials from TEXT parts
	label = doEscape(doctype,label)
	
	# escape specials from link URL
	if rules['linkable'] and rules['escapeurl']:
		url = doEscape(doctype, url)
	
	# if not linkable, the URL is plain text, that needs escape
	if not rules['linkable']:
		if doctype == 'tex':
			url = re.sub('^#', '\#', url) # ugly, but compile
		else:
			url = doEscape(doctype,url)
	
	# adding protocol to guessed link
	guessurl = ''
	if linktype == 'url' and \
	   re.match(regex['_urlskel']['guess'], url):
		if url[0] == 'w': guessurl = 'http://' +url
		else            : guessurl =  'ftp://' +url
		
		# not link aware targets -> protocol is useless
		if not rules['linkable']: guessurl = ''
	
	# simple link (not guessed)
	if not label and not guessurl:
		if CONF['maskemail'] and linktype == 'email':
			# do the email mask feature (no TAGs, just text)
			url = string.replace(url,'@',' (a) ')
			url = string.replace(url,'.',' ')
			url = "<%s>" % url
			if rules['linkable']: url = doEscape(doctype, url)
			ret = url
		else:
			# just add link data to tag
			tag = TAGS[linktype]
			ret = regex['x'].sub(url,tag)
	
	# named link or guessed simple link
	else:
		# adjusts for guessed link
		if not label: label = url       # no   protocol
		if guessurl : url   = guessurl  # with protocol
		
		# change image tag for !supported img+link targets
		if regex['img'].match(label) and not rules['imglinkable']:
			label = "(%s)"%regex['img'].match(label).group(1)
		
		# putting data on the right appearance order
		if rules['linkable']:
			urlorder = [url, label]   # link before label
		else:
			urlorder = [label, url]   # label before link
		
		# add link data to tag (replace \a's)
		ret = TAGS["%sMark"%linktype]
		for data in urlorder:
			ret = regex['x'].sub(data,ret,1)
	
	return ret


def get_image_align(line):
	align = ''
	line = string.strip(line)
	m = regex['img'].search(line)
	ini = m.start() ; head = 0
	end = m.end()   ; tail = len(line)
	
	align = 'middle'  # default align              # ^text +img +text$
	if ini == head and end == tail: align = 'para' # ^img$
	elif ini == head: align = 'left'               # ^img + text$
	elif end == tail: align = 'right'              # ^text + img$
	
	return align


def get_tablecell_align(cells):
	ret = []
	for cell in cells:
		align = 'Left'
		if string.strip(cell): 
			if cell[0] == ' ' and cell[-1] == ' ': align = 'Center'
			elif cell[0] == ' ': align = 'Right'
		ret.append(align)
	return ret


def get_table_prop(line):
	# default table proprierties
	ret = {'border':0,'header':0,'align':'Left','cells':[],'cellalign':[]}
	# detect table align (and remove spaces mark)
	if line[0] == ' ': ret['align'] = 'Center'
	line = string.lstrip(line)
	# detect header (title) mark
	if line[1] == '|':
		ret['header'] = 1
	# delete trailing spaces after last cell border
	line = re.sub('\|\s*$','|', line)
	# detect (and delete) border mark (and leading space)
	if line[-1] == '|':
		ret['border'] = 1 ; line = line[:-2]
	# delete table mark
	line = regex['table'].sub('', line)
	# split cells
	ret['cells'] = string.split(line, ' | ')
	# find cells align
	ret['cellalign'] = get_tablecell_align(ret['cells'])
	
	Debug('Table Prop: %s' % ret, 1)
	return ret


def tag_table_cells(table, doctype):
	ret = ''
	open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
	# title cell
	if table['header']:
		open = TAGS['tableTitleCellOpen']
		close = TAGS['tableTitleCellClose']
	# should we break the line?
	if rules['breaktablecell']: close = close+'\n'
	# here we go
	while table['cells']:
		openalign = open
		cel = table['cells'].pop(0)
		# set each cell align
		if rules['tablecellaligntype'] == 'cell':
			align = table['cellalign'].pop(0)
			align = TAGS['tableCellAlign%s'%align]
			openalign = string.replace(open,'\a',align)
		# show empty cell on HTML
		if not cel and doctype == 'html': cel = '&nbsp;'
		# last cell gotchas
		if not table['cells']:
			# don't need cell separator
			if rules['tablecellsplit']: close = ''
			# close beautifier for last title cell
			if doctype == 'tex' and table['header']: close = '}'
		# join it all
		newcell = openalign + string.strip(cel) + close
		ret = ret + newcell
	return ret


def get_tableopen_tag(table_prop, doctype):
	global tableborder
	open = TAGS['tableOpen'] # the default one
	# the first line defines if table has border or not
	tableborder = table_prop['border']
	# align full table
	if rules['tablealignable']:
		talign = TAGS['tableAlign'+table_prop['align']]
		open = regex['x'].sub(talign, open, 1)
	# set the columns alignment
	if rules['tablecellaligntype'] == 'column':
		calign = map(lambda x: TAGS['tableColAlign%s'%x],
		             table_prop['cellalign'])
		calign = string.join(calign,'')
		open = regex['x'].sub(calign, open, 1)
	# tex table spec, border or not: {|l|c|r|} , {lcr}
	if doctype == 'tex' and not tableborder:
		open = string.replace(open,'|','')
	# we're almost done, just border left
	tag = regex['x'].sub(`tableborder`, open)
	return tag


# reference: http://www.iana.org/assignments/character-sets
# http://www.drclue.net/F1.cgi/HTML/META/META.html
def get_encoding_string(enc, doctype):
	if not enc: return ''
	# target specific translation table
	translate = {
	'tex': {
	  # missing: ansinew , applemac , cp437 , cp437de , cp865
	  'us-ascii'    : 'ascii',
	  'windows-1250': 'cp1250',
	  'windows-1252': 'cp1252',
	  'ibm850'      : 'cp850',
	  'ibm852'      : 'cp852',
	  'iso-8859-1'  : 'latin1',
	  'iso-8859-2'  : 'latin2',
	  'iso-8859-3'  : 'latin3',
	  'iso-8859-4'  : 'latin4',
	  'iso-8859-5'  : 'latin5',
	  'iso-8859-9'  : 'latin9',
	  'koi8-r'      : 'koi8-r'
	  }
	}
	# normalization
	enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii'  , enc)
	enc = re.sub('(?i)(ibm|cp)?85([02])'        ,'ibm85\\2'  , enc)
	enc = re.sub('(?i)(iso[_-]?)?8859[_-]?'     ,'iso-8859-' , enc)
	enc = re.sub('iso-8859-($|[^1-9]).*'        ,'iso-8859-1', enc)
	# apply translation table
	try: enc = translate[doctype][string.lower(enc)]
	except: pass
	return enc


################################################################################
###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
################################################################################


def getAllConf(cmdlinelist, nocheck=0):
	"""
	Returns a list of (File Configuration, File Proprierties) tuples
	for all the given Input files. The Configuration is the merge of
	command line options and %!cmdline settings.
	"""
	all_confs = []
	# parse command line to get input files list
	cmdline = Cmdline(cmdlinelist, nocheck)
	infiles = cmdline.cmdline_conf.get('infiles')
	if not infiles: return []
	
	for infile in infiles:                 # multifile support
		# the first file doesn't need to recall Cmdline()
		if all_confs: cmdline = Cmdline(cmdlinelist, nocheck)
		# extract file Headers and Config
		prop = Proprierties(infile)
		# decide to use generic or target specfic (if any) %!cmdline:
		cmdline_target = cmdline.cmdline_conf.get('type')
		if cmdline_target and cmdline_target in targets and \
		   prop.config[cmdline_target].get('cmdline'):
			cfgcmdline_target = cmdline_target
		else:
			cfgcmdline_target = 'all'
		# merge %!cmdline contents (if any) into original cmdline
		cmdline.merge(prop.config[cfgcmdline_target].get('cmdline'))
		# force infile
		cmdline.cmdline_conf['infile'] = infile
		# get all the configuration (flags/options) for this file
		# it saves general AND specific config (not OR as in %!cmdline)
		myconf = cmdline.merge_conf(prop.config['all'])
		myconf = cmdline.merge_conf(prop.config.get(myconf['type']), override=1)
		# adding %!cmdline contents to config (used by GUI)
		myconf['%!cmdline'] = prop.config[cfgcmdline_target].get('cmdline')
		# ensure the configuration has ALL keys defined
		for key in FLAGS.keys() + OPTIONS.keys() + CONFIG_KEYWORDS:
			if not myconf.has_key(key): myconf[key] = ''
		# append the (configuration, proprierties) tuple
		all_confs.append((myconf,prop))
		# remove what has left
		del cmdline, prop
	return all_confs

def convertAllFiles(confs):
	if not confs: Quit(usage, 1)
	header = []
	for myconf,prop in confs:                 # multifile support
		# --dump-source option handler
		# hidden option, maybe will be removed on next versions
		if myconf['dump-source']:
			comment_id = '%--------------------- Area Delimiter:' 
			for line in prop.headers: print line
			print '%s HEADER --> CONFIG'%comment_id
			for line in prop.conflines: print string.rstrip(line)
			print '%s CONFIG --> BODY'%comment_id
			doc = convert(prop.bodylines, myconf)
			for line in doc: print line
			print '%s EOD'%comment_id
			continue
		# compose the target file Headers
		#TODO escape line before?
		#TODO see exceptions by tex and mgp
		header = doHeader(prop.headers, myconf)
		# get the marked file BODY that has left
		body = prop.bodylines
		# parse the full marked body into tagged target
		doc,toc = convert(body, myconf, firstlinenr=prop.arearef[-1])
		# make TOC (if needed)
		toc = toc_maker(toc,myconf)
		# finally, we have our document
		outlist = header + toc + doc
		# break here if Gui - it has some more processing to do
		if myconf['gui']: return outlist, myconf
		# write results to file or STDOUT
		finish_him(outlist, myconf)

def reallydoitall(cmdlinelist):
	confs = getAllConf(cmdlinelist)
	return convertAllFiles(confs)

def convert(bodylines, CONF, firstlinenr=1):
	# global vars for doClose*()
	global TAGS, regex, rules, quotedepth, listindent, listids
	global subarea, tableborder
	
	doctype = CONF['type']
	outfile = CONF['outfile']
	TAGS = getTags(doctype)
	rules = getRules(doctype)
	regex = getRegexes()
	
	# the defaults
	linkmask  = '@@_link_@@'
	monomask  = '@@_mono_@@'
	macromask = '@@_macro_@@'
	rawmask   = '@@_raw_@@'
	
	subarea = SubareaMaster()
	ret = []
	incdumpbuf = []
	toclist = []
	f_tt = 0
	listindent = []
	listids = []
	listcount = []
	titlecount = ['',0,0,0,0,0]
	f_lastwasblank = 0
	holdspace = ''
	listholdspace = ''
	quotedepth = []
	tableborder = 0
	
	if outfile != STDOUT:
		if not CONF['gui']:
			print "--- %s..."%doctype
	
	# if TOC is a header tag
	if CONF['toc'] and TAGS['TOC']:
		ret.append(TAGS['TOC']+'\n')
	
	# let's put the opening paragraph
	if doctype != 'pm6':
		ret.append(TAGS['paragraph'])
	
	# let's mark it up!
	linenr = firstlinenr-1
	lineref = -1
	while lineref < len(bodylines)-1:
#	for lineref in range(len(bodylines)):
		lineref = lineref + 1
#		print lineref, len(bodylines)
		skip_continue = 0
		linkbank = []
		monobank = []
		macrobank = []
		rawbank = []
		
		untouchedline = bodylines[lineref]
		line = re.sub('[\n\r]+$','',untouchedline)   # del line break
		incdumpbuf.append(line)                      # for --dump-source
		
		# apply PreProc rules
		if CONF['preproc']:
			for targ,patt,repl in CONF['preproc']:
				if targ not in [CONF['type'], 'all']: continue
				try   : line = re.sub(patt, repl, line)
				except: Error("Invalid PreProc filter regex: '%s'"%patt)
		
		line = maskEscapeChar(line)                  # protect \ char
		linenr = linenr +1
		
		Debug('LINE %04d: %s'%(linenr,repr(line)), 1)  # heavy debug
		
		# we need (not really) to mark each paragraph
		#TODO check if this is really needed
		if doctype == 'pm6' and f_lastwasblank:
			if f_tt or listindent:
				holdspace = ''
			else:
				holdspace = TAGS['paragraph']+'\n'
		
		# any NOT table line (or comment), closes an open table
		#if subarea() == 'table' and not regex['table'].search(line):
		if subarea() == 'table' \
		  and not regex['table'].search(line) \
		  and not regex['comment'].search(line):
			ret.append(doCloseTable(doctype))
		
		
		#---------------------[ PRE formatted ]----------------------
		
		#TIP we'll never support beautifiers inside pre-formatted
		
		# we're already on a PRE area
		if f_tt:
			# closing PRE
			if regex['areaPreClose'].search(line):
				if doctype != 'pm6':
					ret.append(TAGS['areaPreClose'])
				f_tt = 0
				continue
			
			# normal PRE-inside line
			line = doPreLine(doctype, line)
			ret.append(line)
			continue
		
		# detecting PRE area init
		if regex['areaPreOpen'].search(line):
			ret.append(TAGS['areaPreOpen'])
			f_lastwasblank = 0
			f_tt = 1
			continue
		
		# one line PRE-formatted text
		if regex['1linePre'].search(line):
			f_lastwasblank = 0
			line = regex['1linePre'].sub('',line)
			line = doPreLine(doctype, line)
			t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
			ret.append('%s\n%s\n%s'%(t1,line,t2))
			continue
		
		#---------------------[ blank lines ]-----------------------
		
		#TODO "holdspace" to save <p> to not show in closelist
		if regex['blankline'].search(line):
			
			# closing all open quotes
			if quotedepth:
				ret.append(doCloseQuote())
			
			# closing all open lists
			if f_lastwasblank:   # 2nd consecutive blank line
				if listindent:   # closes list (if any)
					ret.append(doCloseList())
					holdspace = ''
				continue         # consecutive blanks are trash
			
			# normal blank line
			if doctype != 'pm6':
				# paragraph (if any) is wanted inside lists also
				if listindent:
					para = TAGS['paragraph'] + '\n'
					holdspace = holdspace + para
				elif doctype == 'html':
					ret.append(TAGS['paragraph'])
				# sgml: quote close tag must not be \n\n</quote>
				elif doctype == 'sgml' and quotedepth:
					skip_continue = 1
				# otherwise we just show a blank line
				else:
					ret.append('')
			
			f_lastwasblank = 1
			if not skip_continue: continue
		
		
		#---------------------[ special ]------------------------
		
		if regex['special'].search(line):
			
			# include command
			m = ParseConfig(line, 'include', doctype)
			if m:
				incpath = os.path.dirname(CONF['infile'])
				incfile = m['value']
				if CONF['infile'] == incfile:
					Error('A file cannot include itself (loop!): %s'%incfile)
				inctype, inclines = get_include_contents(incfile, incpath)
				
				if inctype == 'PASS':
					ret.extend(inclines)
					continue
				
				# change %!include command by comment
				incdumpbuf[-1] = inclines[0]
				
				# insert include lines into bodylines list
				# removing the %!include command call
				bodylines = bodylines[:lineref] +inclines \
				           +bodylines[lineref+1:]
				continue
		
		#---------------------[ comments ]-----------------------
		
		# just skip them (if not macro or config)
		if regex['comment'].search(line) and not \
		   regex['date'].match(line):
			continue
		f_lastwasblank = 0       # reset blank status
		
		#---------------------[ Title ]-----------------------
		
		#TODO set next blank and set f_lastwasblank or f_lasttitle
		if (regex['title'].search(line) or
		    regex['numtitle'].search(line)) and not listindent:
			
			if string.lstrip(line)[0] == '=':
				titletype = 'title'
			else:
				titletype = 'numtitle'
			
			m = regex[titletype].search(line)
			level = len(m.group('id'))
			tag = TAGS['title%s'%level]
			txt = string.strip(m.group('txt'))
			
			### numbered title
			if CONF['enumtitle'] or titletype == 'numtitle':
				if rules['autonumbertitle']:
					tag = TAGS['numtitle%s'%level] or tag
					idtxt = txt
				else:
					# add count manually
					id = '' ; n = level
					titlecount[n] = titlecount[n] +1
					if n < len(titlecount)-1: # reset sublevels count
						for i in range(n+1, len(titlecount)):
							titlecount[i] = 0
					for i in range(n):        # compose id from hierarchy
						id = "%s%d."%(id,titlecount[i+1])
					idtxt = "%s %s"%(id, txt) # add id to title
			else:
				idtxt = txt
			
			anchorid = 'toc%d'%(len(toclist)+1)
			if TAGS['anchor'] and CONF['toc'] \
			  and level <= CONF['toclevel']:
				ret.append(regex['x'].sub(anchorid,TAGS['anchor']))
			
			# place title tag overriding line
			line = regex[titletype].sub(tag,line)
			
			### escape title text (unescaped text is used for TOC)
			#
			esctxt = doEscape(doctype,idtxt)
			# sgml: [ is special on title (and lists) - here bcos 'continue'
			if doctype in ['sgml','tex']:
				esctxt = doFinalEscape(doctype, esctxt)
			# txt: blank before
			if doctype == 'txt': ret.append('')
			# finish title line
			ret.append(regex['x'].sub(esctxt,line))
			
			# add "underline" to text titles
			if doctype == 'txt':
				ret.append(regex['x'].sub('='*len(idtxt),tag))
				ret.append('') # blank line after
			
			# let's do some TOC!
			if not CONF['toc'] and not CONF['toconly']: continue
			if level > CONF['toclevel']: continue    # max level
			if TAGS['TOC']: continue                 # TOC is a tag
			if TAGS['anchor']:
				# tocitemid = '#toc%d'%(len(toclist)+1)
				# TOC more readable with master topics not
				# linked at number stoled idea from windows .CHM
				# files (help system)
				if CONF['enumtitle'] and level == 1:
					tocitem = '%s+ [``%s`` #%s]'%(' '*level,txt,anchorid)
				else:
					tocitem = '%s- [``%s`` #%s]'%(' '*level,idtxt,anchorid)
			else:
				tocitem = '%s- ``%s``'%(' '*level,idtxt)
				if doctype in ['txt', 'man']:
					tocitem = '%s``%s``' %('  '*level,idtxt)
			toclist.append(tocitem)
			
			continue
		
		#TODO!	labeltxt = ''
		#		label = m.group('label')
		#		if label: labeltxt = '<label id="%s">' %label
		
		
		#---------------------[ apply masks ]-----------------------
		
		### protect important structures from escaping and formatting
		while regex['raw'].search(line):
			txt = regex['raw'].search(line).group(1)
			txt = doEscape(doctype,txt)
			rawbank.append(txt)
			line = regex['raw'].sub(rawmask,line,1)
		
		# protect pre-formatted font text
		while regex['fontMono'].search(line):
			txt = regex['fontMono'].search(line).group(1)
			txt = doEscape(doctype,txt)
			monobank.append(txt)
			line = regex['fontMono'].sub(monomask,line,1)
		
		# protect macros
		while regex['macro'].search(line):
			txt = regex['macro'].search(line).group()
			macrobank.append(txt)
			line = regex['macro'].sub(macromask,line,1)
		
		# protect URLs and emails
		while regex['linkmark'].search(line) or regex['link'].search(line):
			
			# try to match plain or named links
			match_link  = regex['link'].search(line)
			match_named = regex['linkmark'].search(line)
			
			# define the current match
			if match_link and match_named:
				# both types found, which is the first?
				m = match_link
				if match_named.start() < match_link.start():
					m = match_named
			else:
				# just one type found, we're fine
				m = match_link or match_named
			
			# extract link data and apply mask
			if m == match_link:              # plain link
				label = ''
				link  = m.group()
				line  = regex['link'].sub(linkmask,line,1)
			else:                            # named link
				label = string.rstrip(m.group('label'))
				link  = m.group('link')
				line = regex['linkmark'].sub(linkmask,line,1)
			
			# save link data to the link bank
			linkbank.append((label, link))
		
		#---------------------[ do Escapes ]-----------------------
		
		# the target-specific special char escapes for body lines
		line = doEscape(doctype,line)
		
		#---------------------[ Horizontal Bar ]--------------------
		
		if regex['bar'].search(line):
			txt = regex['bar'].search(line).group(1)
			if txt[0] == '=': bar = TAGS['bar2']
			else            : bar = TAGS['bar1']
			
			# to avoid comment tag confusion
			if doctype == 'sgml':
				txt = string.replace(txt,'--','__')
			
			line = regex['bar'].sub(bar,line)
			ret.append(regex['x'].sub(txt,line))
			continue
		
		#---------------------[ Quote ]-----------------------
		
		if regex['quote'].search(line):
			subarea.add('quote')
			
			# store number of leading TABS
			currquotedepth = len(regex['quote'].search(line).group(0))
			
			# SGML doesn't support nested quotes
			if rules['quotenotnested']:
				if quotedepth and currquotedepth > quotedepth[-1]:
					currquotedepth = quotedepth[-1]
			
			# for don't-close-me quote tags
			if not TAGS['areaQuoteClose']:
				line = regex['quote'].sub(TAGS['areaQuoteOpen']*currquotedepth, line)
			else:
				# new (sub)quote
				if not quotedepth or currquotedepth > quotedepth[-1]:
					quotedepth.append(currquotedepth)
					ret.append(TAGS['areaQuoteOpen'])
				
				# remove leading TABs
				if not rules['keepquoteindent']:
					line = regex['quote'].sub('', line)
				
				# closing quotes
				while currquotedepth < quotedepth[-1]:
					ret.append(doCloseQuote(1))
		else:
			# closing all quotes (not quote line)
			if quotedepth: ret.append(doCloseQuote())
		
		
		#---------------------[ Lists ]-----------------------
		
		if (regex['list'].search(line) or regex['deflist'].search(line)):
			subarea.add('list')
			
			if regex['list'].search(line): rgx = regex['list']
			else: rgx = regex['deflist']
			
			m = rgx.search(line)
			listitemindent = m.group(1)
			listtype = m.group(2)
			extra = m.group(3)        # regex anchor char
			
			if listtype == '=':
				listdefterm = m.group(3)
				extra = ''
				if doctype == 'tex':
					# on tex, brackets are term delimiters
					# TODO escape ] at list definition
					# \], \rbrack{} and \verb!]!  don't work :(
					#listdefterm = string.replace(listdefterm, ']', '???')
					pass
				if not rules['imgasdefterm'] and \
				   regex['img'].search(listdefterm):
					while regex['img'].search(listdefterm):
						img = regex['img'].search(listdefterm).group(1)
						masked = '(%s)'%img
						listdefterm = regex['img'].sub(masked,listdefterm,1)
			
			# don't cross depth limit
			maxdepth =  rules['listmaxdepth']
			if maxdepth and len(listindent) == maxdepth:
				if len(listitemindent) > len(listindent[-1]):
					listitemindent = listindent[-1]
			
			# new sublist
			if not listindent or len(listitemindent) > len(listindent[-1]):
				listindent.append(listitemindent)
				listids.append(listtype)
				if   listids[-1] == '-': tag = TAGS['listOpen']
				elif listids[-1] == '+': tag = TAGS['numlistOpen']
				elif listids[-1] == '=': tag = TAGS['deflistOpen']
				if not tag: tag = TAGS['listOpen'] # default
				# no need to reopen <pre> tag on man sublists
				if rules['listnotnested'] and len(listindent) != 1:
					tag = ''
				openlist = listindent[-1]+tag
				if doctype == 'pm6':
					listholdspace = openlist
				else:
					if string.strip(openlist): ret.append(openlist)
				# reset item manual count
				listcount.append(0)
			
			# closing sublists
			while len(listitemindent) < len(listindent[-1]):
				close = doCloseList(1)
				if close: ret.append(close)
				if listcount: del listcount[-1]
			
			# normal item
			listid = listindent[-1]
			if listids[-1] == '-':
				tag = TAGS['listItem']
			elif listids[-1] == '+':
				tag = TAGS['numlistItem']
				listcount[-1] = listcount[-1] +1
				if not rules['autonumberlist']:
					tag = regex['x'].sub(str(listcount[-1]), tag)
			elif listids[-1] == '=':
				if not TAGS['deflistItem1']:
					# emulate def list, with <li><b>def</b>:
					tag = TAGS['listItem'] +TAGS['fontBoldOpen'] +listdefterm
					tag = tag +TAGS['fontBoldClose'] +':'
				else:
					tag = regex['x'].sub(listdefterm, TAGS['deflistItem1'])
				tag = tag + TAGS['deflistItem2']  # open <DD>
			if doctype == 'mgp': listid = len(listindent)*'\t'
			line = rgx.sub(listid+tag+extra,line)
			if listholdspace:
				line = listholdspace+line
				listholdspace = ''
		
		
		#---------------------[ Table ]-----------------------
		
		#TODO escape undesired format inside table
		#TODO add man, pm6 targets
		if regex['table'].search(line):
			
			table = get_table_prop(line)
			
			if subarea() != 'table':
				subarea.add('table')        # first table line!
				if rules['tableable']:      # table-aware target
					ret.append(get_tableopen_tag(table,doctype))
				else:                       # if not, use verb
					ret.append(TAGS['areaPreOpen'])
			
			if rules['tableable']:
				# setting line tags
				tl1 = TAGS['tableLineOpen']
				tl2 = TAGS['tableLineClose']
				# little table gotchas
				if rules['breaktablelineopen']:
					tl1 = tl1+'\n'
				if doctype == 'tex' and not tableborder:
					tl1 = ''
				# do cells and finish
				cells = tag_table_cells(table, doctype)
				line = tl1 + cells + tl2
		
		
		### BEGIN of at-any-part-of-the-line/various-per-line TAGs.
		
		for beauti in ['Bold', 'Italic', 'Bolditalic', 'Underline']:
			if regex['font%s'%beauti].search(line):
				line = beautify_me(beauti, line)
		
		#---------------------[ URL & E-mail ]-----------------------
		
		for label,url in linkbank:
			link = get_tagged_link(label, url, CONF)
			line = string.replace(line, linkmask, link, 1)
		
		#---------------------[ Image ]-----------------------
		
		#TODO fix smart align when image is a link label
		while regex['img'].search(line) and TAGS['img'] != '[\a]':
			txt = regex['img'].search(line).group(1)
			tag = TAGS['img']
			
			# HTML is the only align-aware target for now
			if rules['imgalignable']:
				align = get_image_align(line)
				if align == 'para':
					align = 'center'
					tag= regex['x'].sub(tag,TAGS['imgsolo'])
				# add align on tag
				tag = regex['x'].sub(align, tag, 1)
			
			if doctype == 'tex': tag = re.sub(r'\\b',r'\\\\b',tag)
			line = regex['img'].sub(tag,line,1)
			line = regex['x'].sub(txt,line,1)
		
		#---------------------[ Expand Macros ]-----------------------
		
		if macrobank:
			for macro in macrobank:
				line = string.replace(line, macromask, macro,1)
			# now the line is full of macros again
			line = doDateMacro(line)
		
		#---------------------[ Expand PREs ]-----------------------
		
		for mono in monobank:
			open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
			tagged = open+mono+close
			line = string.replace(line,monomask,tagged,1)
		
		#---------------------[ Expand raw ]-----------------------
		
		for raw in rawbank:
			line = string.replace(line,rawmask,raw,1)
		
		#---------------------[ Final Escapes ]-----------------------
		
		line = doFinalEscape(doctype, line)
		ret.append(holdspace+line)
		holdspace = ''
	
	# We just need the included dump
	if CONF['dump-source']: return incdumpbuf
	
	# EOF: close any open lists/tables/quotes
	#TODO take table exception out when self.doctype
	while subarea():
		func = eval("doClose%s" % string.capitalize(subarea()))
		parm = None
		if subarea() == 'table': parm = doctype
		txt = func(parm)
		if txt: ret.append(txt)
	
	# add footer
	if not CONF['noheaders']:
		ret.extend(doFooter(CONF))
	
	if CONF['toconly']: ret = []
	return ret, toclist



################################################################################
##################################### GUI ######################################
################################################################################

# tk help: http://python.org/topics/tkinter/
class Gui:
	"Graphical Tk Interface"
	def __init__(self, conf={}):
		self.bg = 'orange'
		self.root = Tkinter.Tk()
		self.root.config(bd=15,bg=self.bg)
		self.root.title("txt2tags")
		self.frame1 = Tkinter.Frame(self.root,bg=self.bg)
		self.frame1.pack(fill='x')
		self.frame2 = Tkinter.Frame(self.root,bg=self.bg)
		self.frame2.pack()
		self.frame3 = Tkinter.Frame(self.root,bg=self.bg)
		self.frame3.pack(fill='x')
		self.frame = self.root
		
		self.conf    = conf
		self.infile  = self.setvar('')
		#self.infile = self.setvar('C:/aurelio/a.txt')
		self.doctype = self.setvar('')
		self.checks  = ['noheaders','enumtitle','toc','toconly','stdout']
		
		# creating variables
		for check in self.checks:
			setattr(self, 'f_'+check, self.setvar(''))
	
	### config as dic for python 1.5 compat (**opts don't work :( )
	def entry(self, **opts): return Tkinter.Entry(self.frame, opts)
	def label(self, txt='', **opts):
		opts.update({'text':txt,'bg':self.bg})
		return Tkinter.Label(self.frame, opts)
	def button(self,name,cmd,**opts):
		opts.update({'text':name,'command':cmd})
		return Tkinter.Button(self.frame, opts)
	def check(self,name,val,checked=0,**opts):
		opts.update( {'text':name, 'onvalue':val, 'offvalue':'',
		  'anchor':'w', 'bg':self.bg, 'activebackground':self.bg} )
		chk = Tkinter.Checkbutton(self.frame, opts)
		if checked: chk.select()
		chk.pack(fill='x',padx=10)
	
	def exit(self): self.root.destroy(); sys.exit()
	def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
	def menu(self,sel,items):
		return apply(Tkinter.OptionMenu,(self.frame,sel)+tuple(items))
	def askfile(self):
		ftypes= [("txt2tags files",("*.t2t","*.txt")),("All files","*")]
		newfile = askopenfilename(filetypes=ftypes)
		if newfile:
			self.infile.set(newfile)
			newconf = getAllConf(['foo',newfile], nocheck=1)
			if newconf: newconf = newconf[0][0]
			# restate all checkboxes after file selection
			# TODO how to make a refresh without killing it?
			self.root.destroy()
			self.__init__(newconf)
			self.mainwindow()
	
	def scrollwindow(self,txt='no text!',title=''):
		win = Tkinter.Toplevel() ; win.title(title)
		scroll = Tkinter.Scrollbar(win)
		text = Tkinter.Text(win,yscrollcommand=scroll.set)
		scroll.config(command=text.yview)
		text.insert(Tkinter.END, string.join(txt,'\n'))
		text.pack(side='left',fill='both')
		scroll.pack(side='right',fill='y')
	
	def runprogram(self):
		# prepare
		infile, doctype = self.infile.get(), self.doctype.get()
		if not infile:
			showwarning('txt2tags',\
			   "You must provide the source file location!")
			return
		# compose cmdline
		guiflags = []
		for flag in self.checks:
			flag = getattr(self, 'f_%s'%flag).get()
			if flag: guiflags.append(flag)
		cmdline = ['txt2tags', '--gui', '-t', doctype] +guiflags +[infile]
		Debug('Gui/Tk cmdline: %s'%cmdline,1)
		# run!
		try:
			outlist, CONF = reallydoitall(cmdline)
			outfile = CONF['outfile']
			infile  = CONF['infile']
			outlist = finish_him(outlist,CONF) or ''
			
			if outfile == STDOUT:
				title = 'txt2tags: %s converted to %s'%(
				  os.path.basename(infile),
				  string.upper(CONF['type']))
				self.scrollwindow(outlist, title)
			else:
				msg = "Conversion done!\n\n" +\
				      "FROM:\n\t%s\n"%infile +\
				      "TO:\n\t%s"%outfile
				showinfo('txt2tags', msg)
		except ZeroDivisionError:   # common error, not quit
			pass
		except:                     # fatal error
			traceback.print_exc()
			print '\nSorry! txt2tags-Tk Fatal Error.'
			errmsg = 'Unknown error occurred.\n\n'+\
			         'Please send the Error Traceback '+\
			         'dumped to the author:\n    %s'%my_email
			showerror('txt2tags FATAL ERROR!',errmsg)
			self.exit()
	
	def mainwindow(self):
		#TODO show outfile somewhere
		#TODO redraw GUI only using grid() because pack() sux
		self.infile.set(self.conf.get('infile') or '')
		self.doctype.set(self.conf.get('type') or 'html')
		if self.conf.get('outfile') == STDOUT: # map -o-
			self.conf['stdout'] = 1
		
		action1 = "  \nChoose the target document type:"
		action2 = "\n\nEnter the tagged source file location:"
		action3 = "\n\nSome options you may check:"
		checks_txt = {
		'noheaders': "Suppress headers from output",
		'enumtitle': "Number titles (1, 1.1, 1.1.1, etc)",
		'toc'      : "Do TOC also (Table of Contents)",
		'toconly'  : "Just do TOC, nothing more",
		'stdout'   : "Dump to screen (Don't save target file)"
		}
		
		self.frame = self.frame1
		self.label("TXT2TAGS\n%s\nv%s"%(my_url,my_version)).pack()
		self.label(action1, anchor='w').pack(fill='x')
		self.menu(self.doctype, targets).pack()
		self.label(action2, anchor='w').pack(fill='x')
		
		self.frame = self.frame2
		self.entry(textvariable=self.infile).grid(row=0, column=0)
		self.button("Browse", self.askfile
		     ).grid(row=0, column=1, padx=10)
		if self.conf.get('%!cmdline'):
			txt = '%%!cmdline: %s' % self.conf['%!cmdline']
			self.label(txt,fg='brown'
			    ).grid(row=1, column=0, columnspan=2, sticky='w')
		
		self.frame = self.frame3
		self.label(action3, anchor='w').pack(fill='x')
		
		# compose options check boxes, example:
		# self.check(checks_txt['toc'], '--toc', 1, variable=self.f_toc)
		for check in self.checks:
			txt = checks_txt[check]
			opt = '--'+check
			var = getattr(self, 'f_'+check)
			onoff = self.conf.get(check)
			self.check(txt,opt,onoff,variable=var)
		
		self.label('\n').pack()
		self.button("Quit", self.exit).pack(side='left',padx=40)
		self.button("Convert!", self.runprogram
		     ).pack(side='right',padx=40)
		
		# as documentation told me
		if sys.platform[:3] == 'win':
			self.root.iconify()
			self.root.update()
			self.root.deiconify()
		
		self.root.mainloop()


################################################################################
################################################################################


if __name__ == '__main__':

	# set debug and remove option from cmdline
	if sys.argv.count('--debug'):
		DEBUG = 1
		sys.argv.remove('--debug')
	
	### check if we will enter on GUI mode
	CONF['gui'] = 0
	# GUI is default on this platforms, when called alone
	if len(sys.argv) == 1 and sys.platform[:3] in ['mac','cyg','win']:
		CONF['gui'] = 1
	# user specified GUI mode	
	if sys.argv.count('--gui'): CONF['gui'] = 1
	
	# check for GUI mode ressorces
	if CONF['gui'] == 1:
		try:
			from tkFileDialog import askopenfilename
			from tkMessageBox import showinfo,showwarning,showerror
			import Tkinter
		except:
			# if GUI was forced, show the error message
			if len(sys.argv) > 1 and sys.argv[1] == '--gui':
				traceback.print_exc()
				sys.exit()
			# or just abandon GUI mode, and continue
			else:
				CONF['gui'] = 0
	
	Debug("system platform: %s"%sys.platform,1)
	Debug("line break char: %s"%repr(LB),1)
	nocheck = CONF['gui']                  # if GUI, no cmdline checking
	CONFS = getAllConf(sys.argv, nocheck)  # get all infiles config (if any)
	
	if CONF['gui'] == 1:
		if len(CONFS) > 1:
			Error("GUI doesn't support multiple Input files.")
		# remove proprierties, get just config
		if CONFS: conf = CONFS[0][0]
		else    : conf = {}
		
		# redefine Error function to raise exception instead sys.exit()
		def Error(msg):
			showerror('txt2tags ERROR!', msg)
			raise ZeroDivisionError
		Gui(conf).mainwindow()
	else:
		# console mode rocks forever!
		convertAllFiles(CONFS)
	
	sys.exit(0)

# vim: ts=4