txt2tags-1.6.py - Copyright 2001, 2002, 2003 Aurelio Marinh…

/old/txt2tags-1.6.py

http://txt2tags.googlecode.com/ · Python · 2542 lines · 1897 code · 302 blank · 343 comment · 453 complexity · 04ccb57d7ce5ada49fd8c0233f13047c MD5 · raw file
Large files are truncated click here to view the full file

#!/usr/bin/env python
# txt2tags - generic text conversion tool
# http://txt2tags.sf.net
#
# Copyright 2001, 2002, 2003 Aurelio Marinho Jargas
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, version 2.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You have received a copy of the GNU General Public License along
#   with this program, on the COPYING file.
#

# the code is better, even readable now, but needs more improvements
# please wait for the upcoming 2.0 series for a cleaner one

#XXX Python coding warning
# Avoid common mistakes:
# - do NOT use newlist=list instead newlist=list[:]
# - do NOT use newdic=dic   instead newdic=dic.copy()
# - do NOT use dic[key]     instead dic.get(key)

import re, string, os, sys, getopt, traceback
from time import strftime,time,localtime

my_url = 'http://txt2tags.sf.net'
my_email = 'verde@aurelio.net'
my_version = '1.6'                               #-betaMMDD

DEBUG = 0   # do not edit here, please use --debug
targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
FLAGS   = {'noheaders':0,'enumtitle':0 ,'maskemail':0 ,'stdout'  :0,
           'toconly'  :0,'toc'      :0 ,'gui'      :0 ,'included':0}
OPTIONS = {'toclevel' :3,'style'    :'','type'     :'','outfile' :'',
           'split':0, 'lang':''}
CONFIG_KEYWORDS = ['encoding', 'style', 'cmdline','preproc','postproc']
CONF = {}
regex = {}
TAGS = {}
rules = {}

currdate = strftime('%Y%m%d',localtime(time()))    # ISO current date
lang = 'english'
doctype = outfile = ''
STDIN = STDOUT = '-'

ESCCHAR = '\x00'
LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}

#my_version = my_version + '-dev' + currdate[4:]  # devel!

# global vars for doClose*()
quotedepth = []
listindent = []
listids = []
subarea = None
tableborder = 0
# set the Line Break across platforms
LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']

versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
usage = """
%s

Usage: txt2tags -t <type> [OPTIONS] file.t2t

  -t, --type         set target document type. actually supported:
                     %s

  -o, --outfile=FILE set FILE as the output filename ('-' for STDOUT)   	  
      --stdout       same as '-o -' or '--outfile -' (deprecated option)
  -H, --noheaders    suppress header, title and footer information
  -n, --enumtitle    enumerate all title lines as 1, 1.1, 1.1.1, etc
      --maskemail    hide email from spam robots. x@y.z turns <x (a) y z>

      --toc          add TOC (Table of Contents) to target document
      --toconly      print document TOC and exit
      --toclevel=N   set maximum TOC level (deepness) to N

      --gui          invoke Graphical Tk Interface
      --style=FILE   use FILE as the document style (like Html CSS)

  -h, --help         print this help information and exit
  -V, --version      print program version and exit

Extra options for HTML target (needs sgml-tools):
      --split        split documents. values: 0, 1, 2 (default 0)
      --lang         document language (default english)

By default, converted output is saved to 'file.<type>'.
Use --outfile to force an output filename.
If input file is '-', reads from STDIN.
If outfile is '-', dumps output to STDOUT.\
"""%(versionstr, re.sub(r"[]'[]",'',repr(targets)))


# here is all the target's templates
# you may edit them to fit your needs
#  - the %(HEADERn)s strings represent the Header lines
#  - use %% to represent a literal %
#
HEADER_TEMPLATE = {
  'txt': """\
%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'sgml': """\
<!doctype linuxdoc system>
<article>
<title>%(HEADER1)s
<author>%(HEADER2)s
<date>%(HEADER3)s
""",

  'html': """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML>
<HEAD>
<META NAME="generator" CONTENT="http://txt2tags.sf.net">
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
<LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
<TITLE>%(HEADER1)s</TITLE>
</HEAD><BODY BGCOLOR="white" TEXT="black">
<P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
<FONT SIZE=4>
<I>%(HEADER2)s</I><BR>
%(HEADER3)s
</FONT></CENTER>
""",


# TODO man section 1 is hardcoded...
  'man': """\
.TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
""",

# TODO style to <HR>
  'pm6': """\
<PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
><@Normal=
  <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
><@Title4=<@-PARENT "Title3">
><@Title5=<@-PARENT "Title3">
><@Quote=<@-PARENT "Normal"><SIZE 10><I>>

%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'mgp': """\
#!/usr/X11R6/bin/mgp -t 90
%%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
%%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
%%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
%%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
%%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
%%default 1 size 5
%%default 2 size 8, fore "yellow", font "normal-b", center
%%default 3 size 5, fore "white",  font "normal", left, prefix "  "
%%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
%%tab 2 prefix "            ", icon arc "orange" 40, leftfill
%%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
%%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
%%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
%%%%------------------------- end of headers -----------------------------
%%page





%%size 10, center, fore "yellow"
%(HEADER1)s

%%font "normal-i", size 6, fore "white", center
%(HEADER2)s

%%font "mono", size 7, center
%(HEADER3)s
""",

# TODO please, improve me!
  'moin': """\
%(HEADER1)s
%(HEADER2)s
%(HEADER3)s
""",

  'tex': \
r"""\documentclass[11pt,a4paper]{article}
\usepackage{amsfonts,amssymb,graphicx,url}
\usepackage[%(ENCODING)s]{inputenc}  %% char encoding
\pagestyle{plain}   %% do page numbering ('empty' turns off)
\frenchspacing      %% no aditional spaces after periods
\setlength{\parskip}{8pt}\parindent=0pt  %% no paragraph indentation
%% uncomment next line for fancy PDF output on Adobe Acrobat Reader
%%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}

\title{%(HEADER1)s}
\author{%(HEADER2)s}
\begin{document}
\date{%(HEADER3)s}
\maketitle
"""
}

#-----------------------------------------------------------------------

def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
def Error(msg): print "ERROR: %s"%msg ; sys.exit()
def echo(msg): print '\033[32;1m%s\033[m'%msg # quick debug
def Debug(msg,i=0,linenr=None):
	if i > DEBUG: return
	if linenr is not None:
		print "(%d) %04d:%s"%(i,linenr,msg)
	else:
		print "(%d) %s"%(i,msg)
def Readfile(file):
	if file == '-':
		try: data = sys.stdin.readlines()
		except: Error('You must feed me with data on STDIN!')
	else:
		try: f = open(file); data = f.readlines() ; f.close()
		except: Error("Cannot read file:\n    %s"%file)
	return data
def Savefile(file, contents):
	try: f = open(file, 'wb')
	except: Error("Cannot open file for writing:\n    %s"%file)
	if type(contents) == type([]): doit = f.writelines
	else: doit = f.write
	doit(contents) ; f.close()

def ParseConfig(text='',name='', target=''):
	ret = {}
	if not text: return ret
	re_name = name or '[a-z]+'
	re_target = target or '[a-z]*'
	cfgregex = re.compile("""
	  ^%%!\s*               # leading id with opt spaces
	  (?P<name>%s)\s*       # config name 
	  (\((?P<target>%s)\))? # optional target spec inside ()
	  \s*:\s*               # key:value delimiter with opt spaces
	  (?P<value>\S.+?)      # config value
	  \s*$                  # rstrip() spaces and hit EOL
	  """%(re_name,re_target), re.I+re.VERBOSE)
	prepostregex = re.compile("""
	                        # ---[ PATTERN ]---
	  ^( "([^"]*)"            # "double quoted" or
	   | '([^']*)'            # 'single quoted' or
	   | ([^\s]+)             # single_word
	   )
	    \s+                 # separated by spaces
	   
		                    # ---[ REPLACE ]---
	       ( "([^"]*)"        # "double quoted" or
	       | '([^']*)'        # 'single quoted' or
	       | (.*)             # anything
		   )
		    \s*$
	  """, re.VERBOSE)
	match = cfgregex.match(text)
	if match:
		ret = {'name'  :string.lower(match.group('name') or ''),
		       'target':string.lower(match.group('target') or 'all'),
		       'value' :match.group('value') }
		
		# Special config with two quoted values (%!preproc: "foo" 'bar')
		if ret['name'] in ['preproc','postproc']:
			valmatch = prepostregex.search(ret['value'])
			if not valmatch: return None
			getval = valmatch.group
			patt = getval(2) or getval(3) or getval(4) or ''
			repl = getval(6) or getval(7) or getval(8) or ''
			ret['value'] = (patt, repl)
	
	return ret


class Cmdline:
	def __init__(self, cmdline=[], nocheck=0):
		self.conf = {}
		self.cmdline = cmdline
		self.cmdline_conf = {}
		self.dft_options = OPTIONS.copy()
		self.dft_flags   = FLAGS.copy()
		self.all_options = self.dft_options.keys()
		self.all_flags   = self.dft_flags.keys()
		self.defaults = self._get_empty_conf()
		self.nocheck = nocheck
		if cmdline: self.parse()
	
	#TODO protect quotes contents
	def _tokenize(self, cmd_string):
		return string.split(cmd_string)
	
	def parse(self):
		"return a dic with all options:value found"
		if not self.cmdline: return {}
		Debug("cmdline: %s"%self.cmdline, 1)
		options = {'infile': '', 'infiles':''}
		# compose valid options list
		longopts = ['help','version'] + self.all_flags + \
		           map(lambda x:x+'=', self.all_options) # add =
		cmdline = self.cmdline[1:]           # del prog name  
		# get cmdline options
		try: (opt, args) = getopt.getopt(cmdline, 'hVnHt:o:', longopts)
		except getopt.GetoptError:
			Error('Bad option or missing argument (try --help)')
		# get infile, if any
		if args:
			options['infile'] = args[0]
			options['infiles'] = args  # multi
		# parse all options
		for name,val in opt:
			if   name in ['-h','--help'   ]: Quit(usage)
			elif name in ['-V','--version']: Quit(versionstr)
			elif name in ['-t','--type'     ]: options['type'] = val
			elif name in ['-o','--outfile'  ]: options['outfile'] = val
			elif name in ['-n','--enumtitle']: options['enumtitle'] = 1
			elif name in ['-H','--noheaders']: options['noheaders'] = 1
			elif name in ['--stdout']: options['outfile'] = STDOUT
			else: options[name[2:]] = val or 1 # del --
		# save results
		Debug("cmdline arguments: %s"%options, 1)
		self.cmdline_conf = options
	
	def compose(self, conf={}):
		"compose full command line from CONF dict"
		if not conf: return ''
		args = []
		cfg = conf.copy()
		valid_opts = self.all_options + self.all_flags
		use_short = {'noheaders':'H', 'enumtitle':'n'}
		# remove useless options
		if cfg.get('toconly'):
			del cfg['noheaders']
			del cfg['outfile']        # defaults to STDOUT
			if cfg.get('type') == 'txt':
				del cfg['type']       # already default
			args.append('--toconly')  # must be the first
			del cfg['toconly']
		# add target type
		if cfg.has_key('type'):
			args.append('-t '+cfg['type'])
			del cfg['type']
		# add other options
		for key in cfg.keys():
			if key not in valid_opts: continue  # must be a %!setting
			if key == 'outfile': continue       # later
			val = cfg[key]
			if not val: continue
			# default values are useless on cmdline
			if val == self.dft_options.get(key): continue
			# -short format
			if key in use_short.keys():
				args.append('-'+use_short[key])
				continue
			# --long format
			if key in self.all_flags: # add --option
				args.append('--'+key)
			else:                     # add --option=value
				args.append('--%s=%s'%(key,val))
		# the outfile using -o
		if cfg.has_key('outfile') and \
		   cfg['outfile'] != self.dft_options.get('outfile'):
			args.append('-o '+cfg['outfile'])
		# the input file is always at the end
		if cfg.has_key('infile'):
			args.append(cfg['infile'])
		# return as a single string
		ret = string.join(args,' ')
		Debug("Diet command line: %s"%ret, 1)
		return ret
	
	def merge(self, extraopts=''):
		"insert cmdline portion BEFORE current cmdline"
		if not extraopts: return
		if type(extraopts) == type(''):
			extraopts = self._tokenize(extraopts)
		if not self.cmdline: self.cmdline = extraopts
		else: self.cmdline = ['t2t-merged'] +extraopts +self.cmdline[1:]
		self.parse()
	
	def _get_outfile_name(self, conf):
		"dirname is the same for {in,out}file"
		infile = conf['infile']
		if not infile: return ''
		if infile == STDIN or conf['outfile'] == STDOUT:
			outfile = STDOUT
		else:
			basename = re.sub('\.(txt|t2t)$','',infile)
			outfile = "%s.%s"%(basename, conf['type'])
		self.dft_options['outfile'] = outfile # save for self.compose()
		Debug(" infile: '%s'"%infile , 1)
		Debug("outfile: '%s'"%outfile, 1)
		return outfile
	
	def _sanity(self, dic):
		"basic cmdline syntax checkings"
		if not dic: return {}
		if not dic['infile'] or not dic['type']:
			Quit(usage, 1)                  # no filename/doctype
		if not targets.count(dic['type']):      # check target
			Error("Invalid document type '%s' (try --help)"%(
			       dic['type']))
		#DISABLED: conflicting with %!cmdline: -o foo	   
		#if len(dic['infiles']) > 1 and dic['outfile']: # -o FILE *.t2t
		#	Error("--outfile can't be used with multiple files")
		for opt in self.all_options:            # check numeric options
			opttype = type(self.dft_options[opt])
			if dic.get(opt) and opttype == type(9):
				try: dic[opt] = int(dic.get(opt)) # save
				except: Error('--%s value must be a number'%opt)
		if dic['split'] not in [0,1,2]:         # check split level
			Error('Option --split must be 0, 1 or 2')
		return dic
	
	def merge_conf(self, newconfs={}, override=0):
		"include Config Area settings into self.conf"
		if not self.conf: self.get_conf()
		if not newconfs: return self.conf
		for key in newconfs.keys():
			if key == 'cmdline': continue   # already done
			# filters are always accumulative
			if key in ['preproc','postproc']:
				if not self.conf.has_key(key):
					self.conf[key] = []
				self.conf[key].extend(newconfs[key])
				continue
			# add anyway
			if override:
				self.conf[key] = newconfs[key]
				continue
			# just update if still 'virgin'
			if self.conf.has_key(key) and \
			   self.conf[key] == self.defaults.get(key):
				self.conf[key] = newconfs[key]
			# add new
			if not self.conf.has_key(key):
				self.conf[key] = newconfs[key]
		
		Debug("Merged CONF (override=%s): %s"%(override,self.conf), 1)
		return self.conf
	
	def _get_empty_conf(self):
		econf = self.dft_options.copy()
		for k in self.dft_flags.keys(): econf[k] = self.dft_flags[k]
		return econf
	
	def get_conf(self):
		"set vars and flags according to options dic"
		if not self.cmdline_conf:
			if not self.cmdline: return {}
			self.parse()
		dic = self.cmdline_conf
		conf = self.defaults.copy()
		
		## store flags & options
		for flag in self.all_flags:
			if dic.has_key(flag): conf[flag] = 1
		for opt in self.all_options + ['infile', 'infiles']:
			if dic.has_key(opt): conf[opt] = dic.get(opt)
		
		if not conf['type'] and conf['toconly']: conf['type'] = 'txt'
		if not self.nocheck: conf = self._sanity(conf)
		
		## some gotchas for specific issues
		doctype = conf['type']
		infile = conf['infile']
		
		# toconly is stronger than others
		if conf['toconly']:
			conf['noheaders'] = 1
			conf['toc'] = 0
			conf['split'] = 0
			conf['gui'] = 0
			conf['outfile'] = STDOUT
			conf['toclevel'] = conf['toclevel'] or \
			                   self.dft_options['toclevel']
		
		# split: just HTML, no stdout, 1st do a sgml, then sgml2html
		if conf['split']:
			if doctype != 'html':
				conf['split'] = 0
			else:
				conf['type'] = 'sgml' 
				if conf['outfile'] == STDOUT:
					conf['outfile'] = ''
		
		outfile = conf['outfile'] or self._get_outfile_name(conf)
		
		# final checkings
		if conf['split'] and outfile == STDOUT:
			Error('--split: You must provide a FILE (not STDIN)')
		if infile == outfile and outfile != STDOUT:
			Error("SUICIDE WARNING!!!  (see --outfile)\n  source"+\
			      " and target files has the same name: "+outfile)
		### author's note: "yes, i've got my sample.t2t file deleted
		### before add this test... :/"
		
		conf['outfile'] = outfile
		conf['cmdline'] = self.cmdline
		Debug("CONF data: %s\n"%conf, 1)
		self.conf = conf
		return self.conf
#
### End of Cmdline class




class Proprierties:
	def __init__(self, filename=''):
		self.buffer = ['']   # text start at pos 1
		self.areas = ['head','conf','body']
		self.arearef = []
		self.headers = ['','','']
		self.config = self.get_empty_config()
		self.lastline = 0
		self.filename = filename
		self.conflines = []
		self.bodylines = []
		if filename:
			self.read_file(filename)
			self.find_areas()
			self.set_headers()
			self.set_config()
	
	def read_file(self, file):
		lines = Readfile(file)
		if not lines: Error('Empty file! %s'%file)
		self.buffer.extend(lines)
	
	def get_empty_config(self):
		empty = {}
		for targ in targets+['all']: empty[targ] = {}
		return empty
	
	def find_areas(self):
		"Run through buffer and identify head/conf/body areas"
		buf = self.buffer ; ref = [1,4,0]       # defaults
		if not string.strip(buf[1]):            # no header
			ref[0] = 0 ; ref[1] = 2
		for i in range(ref[1],len(buf)):        # find body init
			if string.strip(buf[i]) and buf[i][0] != '%':
				ref[2] = i ; break      # !blank, !comment
		if ref[1] == ref[2]: ref[1] = 0         # no conf area
		for i in 0,1,2:                         # del !existent
			if not ref[i]: self.areas[i] = ''
		self.arearef = ref                      # save results
		self.lastline = len(self.buffer)-1
		Debug('Head,Conf,Body start line: %s'%ref, 1)
		# store CONF and BODY lines found
		cfgend = ref[2] or len(buf)
		self.conflines = buf[ref[1]:cfgend]
		if ref[2]: self.bodylines = buf[ref[2]:]
	
	
	def set_headers(self):
		"Extract and save headers contents"
		if not self.arearef: self.find_areas()
		if not self.areas.count('head'): return
		if self.lastline < 3:
			#TODO on gui this checking is !working
			Error(
			"Premature end of Headers on '%s'."%self.filename +\
			'\n\nFile has %s line(s), but '%self.lastline     +\
			'Headers should be composed by 3 lines. '         +\
			'\nMaybe you should left the first line blank? '  +\
			'(for no headers)')
		for i in 0,1,2:
			self.headers[i] = string.strip(self.buffer[i+1])
		Debug("Headers found: %s"%self.headers, 1, i+1)
	
	def set_config(self):
		"Extract and save config contents (including includes)"
		if not self.arearef: self.find_areas()
		if not self.areas.count('conf'): return
		keywords = string.join(CONFIG_KEYWORDS, '|')
		linenr = self.arearef[1]-1  # for debug messages
		for line in self.conflines:
			linenr = linenr + 1
			if len(line) < 3: continue
			if line[:2] != '%!': continue
			cfg = ParseConfig(line, keywords)
			# any _valid_ config found?
			if not cfg:
				Debug('Bogus Config Line',1,linenr)
				continue
			# get data	
			targ, key, val = cfg['target'],cfg['name'], cfg['value']
			# check config target specification
			if targ not in targets+['all']:
				Debug("Config Error: Invalid target '%s', ignoring"%targ,
				      1,linenr)
				continue
			# filters are multiple config
			if key in ['preproc','postproc']:
				if not self.config['all'].has_key(key):  # 1st one
					self.config['all'][key] = []
				# all filters are saved to target 'all'
				# finish_him will decide what to consider
				self.config['all'][key].append((targ,)+val)
			else:
				self.config[targ][key] = val
			Debug("Found config for target '%s': '%s', value '%s'"%(
			       targ,key,val),1,linenr)
		Debug("All %%!CONFIG: %s"%self.config, 1)


def get_file_body(file):
	"Returns all the document BODY lines (including includes)"
	prop = Proprierties()
	prop.read_file(file)
	prop.find_areas()
	return prop.bodylines


def finish_him(outlist, CONF):
	"Writing output to screen or file"
	outfile = CONF['outfile']
	outlist = unmaskEscapeChar(outlist)
	
	# do PostProc
	if CONF['postproc']:
		postoutlist = []
		for line in outlist:
			for targ,patt,repl in CONF['postproc']:
				if targ not in [CONF['type'], 'all']: continue
				line = re.sub(patt, repl, line)
			postoutlist.append(line)
		outlist = postoutlist[:]
	
	if outfile == STDOUT:
		for line in outlist: print line
	else:
		Savefile(outfile, addLineBreaks(outlist))
		if not CONF['gui']: print 'wrote %s'%(outfile)
	
	if CONF['split']:
		print "--- html..."
		sgml2html = 'sgml2html -s %s -l %s %s'%(
		            CONF['split'],CONF['lang'] or lang,outfile)
		print "Running system command:", sgml2html
		os.system(sgml2html)


def toc_maker(toc, conf):
	"Compose TOC list 'by hand'"
	# TOC is a tag, so there's nothing to do here
	if TAGS['TOC']: return []
	# toc is a valid t2t marked text (list type), that is converted
	if conf['toc'] or conf['toconly']:
		fakeconf = conf.copy()
		fakeconf['noheaders'] = 1
		fakeconf['toconly']   = 0
		fakeconf['maskemail'] = 0
		fakeconf['preproc'] = []
		fakeconf['postproc'] = []
		toc,foo = convert(toc, fakeconf)
	# TOC between bars (not for --toconly)
	if conf['toc']:
		para = TAGS['paragraph']
		tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
		toc = tocbar + toc + tocbar
	return toc


def getTags(doctype):
	keys = [
	'paragraph','title1','title2','title3','title4','title5',
	'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
	'areaPreOpen','areaPreClose',
	'areaQuoteOpen','areaQuoteClose',
	'fontMonoOpen','fontMonoClose',
	'fontBoldOpen','fontBoldClose',
	'fontItalicOpen','fontItalicClose',
	'fontBolditalicOpen','fontBolditalicClose',
	'fontUnderlineOpen','fontUnderlineClose',
	'listOpen','listClose','listItem',
	'numlistOpen','numlistClose','numlistItem',
	'deflistOpen','deflistClose','deflistItem1','deflistItem2',
	'bar1','bar2',
	'url','urlMark','email','emailMark',
	'img','imgsolo',
	'tableOpen','tableClose','tableLineOpen','tableLineClose',
	'tableCellOpen','tableCellClose',
	'tableTitleCellOpen','tableTitleCellClose',
	'anchor','comment','TOC',
	'EOD'
	]
	
	alltags = {
	
	'txt': {
	   'title1'              : '  \a'      ,
	   'title2'              : '\t\a'      ,
	   'title3'              : '\t\t\a'    ,
	   'title4'              : '\t\t\t\a'  ,
	   'title5'              : '\t\t\t\t\a',
	   'areaQuoteOpen'       : '    '      ,
	   'listItem'            : '- '        ,
	   'numlistItem'         : '\a. '      ,
	   'bar1'                : '\a'        ,
	   'bar2'                : '\a'        ,
	   'url'                 : '\a'        ,
	   'urlMark'             : '\a (\a)'   ,
	   'email'               : '\a'        ,
	   'emailMark'           : '\a (\a)'   ,
	   'img'                 : '[\a]'      ,
	},
	
	'html': {
	   'paragraph'           : '<P>'            ,
	   'title1'              : '<H1>\a</H1>'    ,
	   'title2'              : '<H2>\a</H2>'    ,
	   'title3'              : '<H3>\a</H3>'    ,
	   'title4'              : '<H4>\a</H4>'    ,
	   'title5'              : '<H5>\a</H5>'    ,
	   'areaPreOpen'         : '<PRE>'          ,
	   'areaPreClose'        : '</PRE>'         ,
	   'areaQuoteOpen'       : '<BLOCKQUOTE>'   ,
	   'areaQuoteClose'      : '</BLOCKQUOTE>'  ,
	   'fontMonoOpen'        : '<CODE>'         ,
	   'fontMonoClose'       : '</CODE>'        ,
	   'fontBoldOpen'        : '<B>'            ,
	   'fontBoldClose'       : '</B>'           ,
	   'fontItalicOpen'      : '<I>'            ,
	   'fontItalicClose'     : '</I>'           ,
	   'fontBolditalicOpen'  : '<B><I>'         ,
	   'fontBolditalicClose' : '</I></B>'       ,
	   'fontUnderlineOpen'   : '<U>'            ,
	   'fontUnderlineClose'  : '</U>'           ,
	   'listOpen'            : '<UL>'           ,
	   'listClose'           : '</UL>'          ,
	   'listItem'            : '<LI>'           ,
	   'numlistOpen'         : '<OL>'           ,
	   'numlistClose'        : '</OL>'          ,
	   'numlistItem'         : '<LI>'           ,
	   'deflistOpen'         : '<DL>'           ,
	   'deflistClose'        : '</DL>'          ,
	   'deflistItem1'        : '<DT>\a</DT>'    ,
	   'deflistItem2'        : '<DD>'           ,
	   'bar1'                : '<HR NOSHADE SIZE=1>'        ,
	   'bar2'                : '<HR NOSHADE SIZE=5>'        ,
	   'url'                 : '<A HREF="\a">\a</A>'        ,
	   'urlMark'             : '<A HREF="\a">\a</A>'        ,
	   'email'               : '<A HREF="mailto:\a">\a</A>' ,
	   'emailMark'           : '<A HREF="mailto:\a">\a</A>' ,
	   'img'                 : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
	   'imgsolo'             : '<P ALIGN="center">\a</P>'   ,
	   'tableOpen'           : '<table\a cellpadding=4 border=\a>',
	   'tableClose'          : '</table>'       ,
	   'tableLineOpen'       : '<tr>'           ,
	   'tableLineClose'      : '</tr>'          ,
	   'tableCellOpen'       : '<td\a>'         ,
	   'tableCellClose'      : '</td>'          ,
	   'tableTitleCellOpen'  : '<th>'           ,
	   'tableTitleCellClose' : '</th>'          ,
	   'tableAlignLeft'      : ''               ,
	   'tableAlignCenter'    : ' align="center"',
	   'tableCellAlignLeft'  : ''               ,
	   'tableCellAlignRight' : ' align="right"' ,
	   'tableCellAlignCenter': ' align="center"',
	   'anchor'              : '<a name="\a"></a>',
	   'comment'             : '<!-- \a -->'    ,
	   'EOD'                 : '</BODY></HTML>'
	},
	
	'sgml': {
	   'paragraph'           : '<p>'                ,
	   'title1'              : '<sect>\a<p>'        ,
	   'title2'              : '<sect1>\a<p>'       ,
	   'title3'              : '<sect2>\a<p>'       ,
	   'title4'              : '<sect3>\a<p>'       ,
	   'title5'              : '<sect4>\a<p>'       ,
	   'areaPreOpen'         : '<tscreen><verb>'    ,
	   'areaPreClose'        : '</verb></tscreen>'  ,
	   'areaQuoteOpen'       : '<quote>'            ,
	   'areaQuoteClose'      : '</quote>'           ,
	   'fontMonoOpen'        : '<tt>'               ,
	   'fontMonoClose'       : '</tt>'              ,
	   'fontBoldOpen'        : '<bf>'               ,
	   'fontBoldClose'       : '</bf>'              ,
	   'fontItalicOpen'      : '<em>'               ,
	   'fontItalicClose'     : '</em>'              ,
	   'fontBolditalicOpen'  : '<bf><em>'           ,
	   'fontBolditalicClose' : '</em></bf>'         ,
	   'fontUnderlineOpen'   : '<bf><em>'           ,
	   'fontUnderlineClose'  : '</em></bf>'         ,
	   'listOpen'            : '<itemize>'          ,
	   'listClose'           : '</itemize>'         ,
	   'listItem'            : '<item>'             ,
	   'numlistOpen'         : '<enum>'             ,
	   'numlistClose'        : '</enum>'            ,
	   'numlistItem'         : '<item>'             ,
	   'deflistOpen'         : '<descrip>'          ,
	   'deflistClose'        : '</descrip>'         ,
	   'deflistItem1'        : '<tag>\a</tag>'      ,
	   'bar1'                : '<!-- \a -->'        ,
	   'bar2'                : '<!-- \a -->'        ,
	   'url'                 : '<htmlurl url="\a" name="\a">'        ,
	   'urlMark'             : '<htmlurl url="\a" name="\a">'        ,
	   'email'               : '<htmlurl url="mailto:\a" name="\a">' ,
	   'emailMark'           : '<htmlurl url="mailto:\a" name="\a">' ,
	   'img'                 : '<figure><ph vspace=""><img src="\a">'+\
	                           '</figure>'                           ,
	   'tableOpen'           : '<table><tabular ca="\a">'            ,
	   'tableClose'          : '</tabular></table>' ,
	   'tableLineClose'      : '<rowsep>'           ,
	   'tableCellClose'      : '<colsep>'           ,
	   'tableTitleCellClose' : '<colsep>'           ,
	   'tableColAlignLeft'   : 'l'                  ,
	   'tableColAlignRight'  : 'r'                  ,
	   'tableColAlignCenter' : 'c'                  ,
	   'comment'             : '<!-- \a -->'        ,
	   'TOC'                 : '<toc>'              ,
	   'EOD'                 : '</article>'
	},
	   
	'tex': {
	   'title1'              : '\n\\newpage\section*{\a}',
	   'title2'              : '\\subsection*{\a}'       ,
	   'title3'              : '\\subsubsection*{\a}'    ,
	   # title 4/5: DIRTY: para+BF+\\+\n
	   'title4'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'title5'              : '\\paragraph{}\\textbf{\a}\\\\\n',
	   'numtitle1'           : '\n\\newpage\section{\a}',
	   'numtitle2'           : '\\subsection{\a}'       ,
	   'numtitle3'           : '\\subsubsection{\a}'    ,
	   'areaPreOpen'         : '\\begin{verbatim}'   ,
	   'areaPreClose'        : '\\end{verbatim}'     ,
	   'areaQuoteOpen'       : '\\begin{quotation}'  ,
	   'areaQuoteClose'      : '\\end{quotation}'    ,
	   'fontMonoOpen'        : '\\texttt{'           ,
	   'fontMonoClose'       : '}'                   ,
	   'fontBoldOpen'        : '\\textbf{'           ,
	   'fontBoldClose'       : '}'                   ,
	   'fontItalicOpen'      : '\\textit{'           ,
	   'fontItalicClose'     : '}'                   ,
	   'fontBolditalicOpen'  : '\\textbf{\\textit{'  ,
	   'fontBolditalicClose' : '}}'                  ,
	   'fontUnderlineOpen'   : '\\underline{'        ,
	   'fontUnderlineClose'  : '}'                   ,
	   'listOpen'            : '\\begin{itemize}'    ,
	   'listClose'           : '\\end{itemize}'      ,
	   'listItem'            : '\\item '             ,
	   'numlistOpen'         : '\\begin{enumerate}'  ,
	   'numlistClose'        : '\\end{enumerate}'    ,
	   'numlistItem'         : '\\item '             ,
	   'deflistOpen'         : '\\begin{description}',
	   'deflistClose'        : '\\end{description}'  ,
	   'deflistItem1'        : '\\item[\a]'          ,
	   'bar1'                : '\n\\hrulefill{}\n'   ,
	   'bar2'                : '\n\\rule{\linewidth}{1mm}\n',
	   'url'                 : '\\url{\a}'                  ,
	   'urlMark'             : '\\textit{\a} (\\url{\a})'   ,
	   'email'               : '\\url{\a}'                  ,
	   'emailMark'           : '\\textit{\a} (\\url{\a})'   ,
	   'img'                 : '\\begin{figure}\\includegraphics{\a}'+\
	                           '\\end{figure}',
	   'tableOpen'           : '\\begin{center}\\begin{tabular}{\a|}',
	   'tableClose'          : '\\end{tabular}\\end{center}',
	   'tableLineOpen'       : '\\hline ' ,
	   'tableLineClose'      : ' \\\\'    ,
	   'tableCellClose'      : ' & '      ,
	   'tableTitleCellOpen'  : '\\textbf{',
	   'tableTitleCellClose' : '} & '     ,
	   'tableColAlignLeft'   : '|l'       ,
	   'tableColAlignRight'  : '|r'       ,
	   'tableColAlignCenter' : '|c'       ,
	   'comment'             : '% \a'     ,
	   'TOC'                 : '\\newpage\\tableofcontents',
	   'EOD'                 : '\\end{document}'
	},
	
	'moin': {
	   'title1'              : '= \a ='        ,
	   'title2'              : '== \a =='      ,
	   'title3'              : '=== \a ==='    ,
	   'title4'              : '==== \a ===='  ,
	   'title5'              : '===== \a =====',
	   'areaPreOpen'         : '{{{'           ,
	   'areaPreClose'        : '}}}'           ,
	   'areaQuoteOpen'       : ' '             ,
	   'fontMonoOpen'        : '{{{'           ,
	   'fontMonoClose'       : '}}}'           ,
	   'fontBoldOpen'        : "'''"           ,
	   'fontBoldClose'       : "'''"           ,
	   'fontItalicOpen'      : "''"            ,
	   'fontItalicClose'     : "''"            ,
	   'fontBolditalicOpen'  : "'''''"         ,
	   'fontBolditalicClose' : "'''''"         ,
	   'fontUnderlineOpen'   : "'''''"         ,
	   'fontUnderlineClose'  : "'''''"         ,
	   'listItem'            : '* '            ,
	   'numlistItem'         : '\a. '          ,
	   'bar1'                : '----'          ,
	   'bar2'                : '----'          ,
	   'url'                 : '[\a]'          ,
	   'urlMark'             : '[\a \a]'       ,
	   'email'               : '[\a]'          ,
	   'emailMark'           : '[\a \a]'       ,
	   'img'                 : '[\a]'          ,
	   'tableLineOpen'       : '||'            ,
	   'tableCellClose'      : '||'            ,
	   'tableTitleCellClose' : '||'
	},
	
	'mgp': {
	   'paragraph'           : '%font "normal", size 5\n'   ,
	   'title1'              : '%page\n\n\a'                ,
	   'title2'              : '%page\n\n\a'                ,
	   'title3'              : '%page\n\n\a'                ,
	   'title4'              : '%page\n\n\a'                ,
	   'title5'              : '%page\n\n\a'                ,
	   'areaPreOpen'         : '\n%font "mono"'             ,
	   'areaPreClose'        : '%font "normal"'             ,
	   'areaQuoteOpen'       : '%prefix "       "'          ,
	   'areaQuoteClose'      : '%prefix "  "'               ,
	   'fontMonoOpen'        : '\n%cont, font "mono"\n'     ,
	   'fontMonoClose'       : '\n%cont, font "normal"\n'   ,
	   'fontBoldOpen'        : '\n%cont, font "normal-b"\n' ,
	   'fontBoldClose'       : '\n%cont, font "normal"\n'   ,
	   'fontItalicOpen'      : '\n%cont, font "normal-i"\n' ,
	   'fontItalicClose'     : '\n%cont, font "normal"\n'   ,
	   'fontBolditalicOpen'  : '\n%cont, font "normal-bi"\n',
	   'fontBolditalicClose' : '\n%cont, font "normal"\n'   ,
	   'fontUnderlineOpen'   : '\n%cont, fore "cyan"\n'     ,
	   'fontUnderlineClose'  : '\n%cont, fore "white"\n'    ,
	   'numlistItem'         : '\a. '                       ,
	   'bar1'                : '%bar "white" 5'             ,
	   'bar2'                : '%pause'                     ,
	   'url'                 : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'urlMark'             : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'email'               : '\n%cont, fore "cyan"\n\a'   +\
	                           '\n%cont, fore "white"\n'    ,
	   'emailMark'           : '\a \n%cont, fore "cyan"\n\a'+\
	                           '\n%cont, fore "white"\n'    ,
	   'img'                 : '\n%center\n%newimage "\a", left\n',
	   'comment'             : '%% \a'                      ,
	   'EOD'                 : '%%EOD'
	},
	
	'man': {
	   'paragraph'           : '.P'     ,
	   'title1'              : '.SH \a' ,
	   'title2'              : '.SS \a' ,
	   'title3'              : '.SS \a' ,
	   'title4'              : '.SS \a' ,
	   'title5'              : '.SS \a' ,
	   'areaPreOpen'         : '.nf'    ,
	   'areaPreClose'        : '.fi\n'  ,
	   'areaQuoteOpen'       : '\n'     ,
	   'areaQuoteClose'      : '\n'     ,
	   'fontBoldOpen'        : '\\fB'   ,
	   'fontBoldClose'       : '\\fP'   ,
	   'fontItalicOpen'      : '\\fI'   ,
	   'fontItalicClose'     : '\\fP'   ,
	   'fontBolditalicOpen'  : '\\fI'   ,
	   'fontBolditalicClose' : '\\fP'   ,
	   'listOpen'            : '\n.nf'  ,  # pre
	   'listClose'           : '.fi\n'  ,
	   'listItem'            : '* '     ,
	   'numlistOpen'         : '\n.nf'  ,  # pre
	   'numlistClose'        : '.fi\n'  ,
	   'numlistItem'         : '\a. '   ,
	   'bar1'                : '\n\n'   ,
	   'bar2'                : '\n\n'   ,
	   'url'                 : '\a'     ,
	   'urlMark'             : '\a (\a)',
	   'email'               : '\a'     ,
	   'emailMark'           : '\a (\a)',
	   'img'                 : '\a'     ,
	   'comment'             : '.\\" \a'
	},
	
	'pm6': {
	   'paragraph'           : '<@Normal:>'    ,
	   'title1'              : '\n<@Title1:>\a',
	   'title2'              : '\n<@Title2:>\a',
	   'title3'              : '\n<@Title3:>\a',
	   'title4'              : '\n<@Title4:>\a',
	   'title5'              : '\n<@Title5:>\a',
	   'areaPreOpen'         : '<@PreFormat:>' ,
	   'areaQuoteOpen'       : '<@Quote:>'     ,
	   'fontMonoOpen'        : '<FONT "Lucida Console"><SIZE 9>' ,
	   'fontMonoClose'       : '<SIZE$><FONT$>',
	   'fontBoldOpen'        : '<B>'           ,
	   'fontBoldClose'       : '<P>'           ,
	   'fontItalicOpen'      : '<I>'           ,
	   'fontItalicClose'     : '<P>'           ,
	   'fontBolditalicOpen'  : '<B><I>'        ,
	   'fontBolditalicClose' : '<P>'           ,
	   'fontUnderlineOpen'   : '<U>'           ,
	   'fontUnderlineClose'  : '<P>'           ,
	   'listOpen'            : '<@Bullet:>'    ,
	   'listItem'            : '\x95	'       ,  # \x95 == ~U
	   'numlistOpen'         : '<@Bullet:>'    ,
	   'numlistItem'         : '\x95    '      ,
	   'bar1'                : '\a'            ,
	   'bar2'                : '\a'            ,
	   'url'                 : '<U>\a<P>'      ,  # underline
	   'urlMark'             : '\a <U>\a<P>'   ,
	   'email'               : '\a'            ,
	   'emailMark'           : '\a \a'         ,
	   'img'                 : '\a'
	}
	}
	
	# compose the target tags dictionary
	tags = {}
	target_tags = alltags[doctype]
	for key in keys: tags[key] = ''     # create empty keys
	for key in target_tags.keys():
		tags[key] = maskEscapeChar(target_tags[key]) # populate
	
	return tags


def getRules(doctype):
	ret = {}
	allrules = [
	
	 # target rules (ON/OFF)
	  'linkable',           # target supports external links
	  'tableable',          # target supports tables
	  'imglinkable',        # target supports images as links
	  'imgalignable',       # target supports image alignment
	  'imgasdefterm',       # target supports image as definition term
	  'tablealignable',     # target supports table alignment
	  'autonumberlist',     # target supports numbered lists natively
	  'autonumbertitle',    # target supports numbered titles natively
	  'tablecellsplit',     # place delimiters only *between* cells
	  'listnotnested',      # lists cannot be nested
	  'quotenotnested',     # quotes cannot be nested
	  'preareanotescaped',  # don't escape specials in PRE area
	  'escapeurl',          # escape special in link URL
	  
	# target code beautify (ON/OFF)
	  'indentprearea',      # add leading spaces to PRE area lines
	  'breaktablecell',     # break lines after any table cell
	  'breaktablelineopen', # break line after opening table line
	  'keepquoteindent',    # don't remove the leading TABs on quotes
	
	# value settings
	  'listmaxdepth',       # maximum depth for lists
	  'tablecellaligntype'  # type of table cell align: cell, column
	]
	
	rules = {
	  'txt' : {
	    'indentprearea':1
	    },
	  'html': {
	    'indentprearea':1,
	    'linkable':1,
	    'imglinkable':1,
	    'imgalignable':1,
	    'imgasdefterm':1,
	    'autonumberlist':1,
	    'tableable':1,
	    'breaktablecell':1,
	    'breaktablelineopen':1,
	    'keepquoteindent':1,
	    'tablealignable':1,
	    'tablecellaligntype':'cell'
	    },
	  'sgml': {
	    'linkable':1,
	    'escapeurl':1,
	    'autonumberlist':1,
	    'tableable':1,
	    'tablecellsplit':1,
	    'quotenotnested':1,
	    'keepquoteindent':1,
	    'tablecellaligntype':'column'
	    },
	  'mgp' : {
	    },
	  'tex' : {
	    'autonumberlist':1,
	    'autonumbertitle':1,
	    'tableable':1,
	    'tablecellsplit':1,
	    'preareanotescaped':1,
	    'listmaxdepth':4,
	    'tablecellaligntype':'column'
	    },
	  'moin': {
	    'linkable':1,
	    'tableable':1
	    },
	  'man' : {
	    'indentprearea':1,
	    'listnotnested':1
	    },
	  'pm6' : {
	    }
	}
	
	
	# populate return dictionary
	myrules = rules[doctype]
	for key in allrules      : ret[key] = 0            # reset all
	for key in myrules.keys(): ret[key] = myrules[key] # turn ON
	return ret


def getRegexes():
	regex = {
	# extra at end: (\[(?P<label>\w+)\])?
	'title':
		re.compile(r'^\s*(?P<id>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
	'numtitle':
		re.compile(r'^\s*(?P<id>\+{1,5})(?P<txt>[^+].*[^+])\1\s*$'),
	'areaPreOpen':
		re.compile(r'^---$'),
	'areaPreClose':
		re.compile(r'^---$'),
	'quote':
		re.compile(r'^\t+'),
	'1linePre':
		re.compile(r'^--- (?=.)'),
	'fontMono':
		re.compile(r'`([^`]+)`'),
	'fontBold':
		re.compile(r'\*\*([^\s*].*?)\*\*'),
	'fontItalic':
		re.compile(r'(^|[^:])//([^ /].*?)//'),
	'fontUnderline':
		re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
	'fontBolditalic':
		re.compile(r'\*/([^/].*?)/\*'),
	'list':
		re.compile(r'^( *)([+-]) ([^ ])'),
	'deflist':
		re.compile(r'^( *)(=) ([^:]+):'),
	'bar':
		re.compile(r'^\s*([_=-]{20,})\s*$'),
	'table':
		re.compile(r'^ *\|\|? '),
	'blankline':
		re.compile(r'^\s*$'),
	'comment':
		re.compile(r'^%'),
	'raw':
		re.compile(r'``(.+?)``')
	}
	
	# special char to place data on TAGs contents  (\a == bell)
	regex['x'] = re.compile('\a')
	
	# %%date [ (formatting) ]
	regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
	
	
	### complicated regexes begin here ;)
	#
	# textual descriptions on --help's style: [...] is optional, | is OR
	
	
	### first, some auxiliar variables
	#
	
	# [image.EXT]
	patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
	
	# link things
	urlskel = {
	  'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
	  'guess' : r'(www[23]?|ftp)\.',    # w/out proto, try to guess
	  'login' : r'A-Za-z0-9_.-',        # for ftp://login@domain.com
	  'pass'  : r'[^ @]*',              # for ftp://login:password@dom.com
	  'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
	  'anchor': r'A-Za-z0-9%._-',       # %nn(encoded)
	  'form'  : r'A-Za-z0-9/%&=+.,@*_-',# .,@*_-(as is)
	  'punct' : r'.,;:!?'
	}
	
	# username [ :password ] @
	patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
	
	# [ http:// ] [ username:password@ ] domain.com [ / ]
	#     [ #anchor | ?form=data ]
	retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
	             urlskel['proto'],patt_url_login, urlskel['guess'],
	             urlskel['chars'],urlskel['form'],urlskel['anchor'])
	
	# filename | [ filename ] #anchor
	retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
	             urlskel['chars'],urlskel['chars'],urlskel['anchor'])
	
	# user@domain [ ?form=data ]
	patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
	             urlskel['login'],urlskel['form'])
	
	
	# saving for future use
	regex['_urlskel'] = urlskel
	
	### and now the real regexes
	#
	
	regex['email'] = re.compile(patt_email,re.I)
	
	# email | url
	regex['link'] = \
		re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
	
	# \[ label | imagetag    url | email | filename \]
	regex['linkmark'] = \
		re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
		   patt_img, retxt_url, patt_email, retxt_url_local),
		   re.L+re.I)
	
	# image
	regex['img'] = re.compile(patt_img, re.L+re.I)
	
	# all macros
	regex['macro'] = regex['date']
	
	# special things
	regex['special'] = re.compile(r'^%!\s*')
	regex['command'] = re.compile(r'(Include)\s*:\s*(.+)\s*$',re.I)
	return regex
### END OF regex nightmares


class SubareaMaster:
	def __init__(self) : self.x = []
	def __call__(self) :
		if not self.x: return ''
		return self.x[-1]
	def add(self, area):
		if not self.x or (self.x and self.x[-1] != area):
			self.x.append(area)
		Debug('subarea ++ (%s): %s' % (area,self.x), 1)
	def pop(self, area=None):
		if area and self.x[-1] == area: self.x.pop()
		Debug('subarea -- (%s): %s' % (area,self.x), 1)

def doHeader(headers, CONF):
	if CONF['noheaders']: return []
	doctype = CONF['type']
	if not HEADER_TEMPLATE.has_key(doctype):
		Error("doheader: Unknow doctype '%s'"%doctype)
	
	template = string.split(HEADER_TEMPLATE[doctype], '\n')
	
	head_data = {'STYLE':'', 'ENCODING':''}
	for key in head_data.keys():
		val = CONF.get(string.lower(key))
		if key == 'ENCODING': val = get_encoding_string(val, doctype)
		head_data[key] = val
	# parse header contents
	for i in 0,1,2:
		contents = doDateMacro(headers[i])  # expand %%date
		# Escapes - on tex, just do it if any \tag{} present
		if doctype != 'tex' or \
		  (doctype == 'tex' and re.search(r'\\\w+{', contents)):
			contents = doEscape(doctype, contents)
		
		head_data['HEADER%d'%(i+1)] = contents
	Debug("Header Data: %s"%head_data, 1)
	# scan for empty dictionary keys
	# if found, scan template lines for that key reference
	# if found, remove the reference
	# if there isn't any other key reference on the same line, remove it
	for key in head_data.keys():
		if head_data.get(key): continue
		for line in template:
			if string.count(line, '%%(%s)s'%key):
				sline = string.replace(line, '%%(%s)s'%key, '')
				if not re.search(r'%\([A-Z0-9]+\)s', sline):
					template.remove(line)
	# populate template with data
	template = string.join(template, '\n') % head_data
	### post processing
	#
	# let tex format today
	if doctype == 'tex' and head_data['HEADER3'] == currdate:
		template = re.sub(r'\\date\{.*?}', r'\date', template)
	
	return string.split(template, '\n')

def doDateMacro(line):
	re_date = getRegexes()['date']
	while re_date.search(line):
		m = re_date.search(line)
		fmt = m.group('fmt') or ''
		dateme = currdate
		if fmt: dateme = strftime(fmt,localtime(time()))
		line = re_date.sub(dateme,line,1)
	return line

def doCommentLine(txt):
	# the -- string ends a sgml/html comment :(
	if string.count(TAGS['comment'], '--') and \
		string.count(txt, '--'):
		txt = re.sub('-(?=-)', r'-\\', txt)
	
	if TAGS['comment']:
		return regex['x'].sub(txt, TAGS['comment'])
	return ''

def doFooter(CONF):
	ret = []
	doctype = CONF['type']
	cmdline = CONF['cmdline']
	typename = doctype
	if doctype == 'tex': typename = 'LaTeX2e'
	ppgd = '%s code generated by txt2tags %s (%s)'%(
	        typename,my_version,my_url)
	cmdline = 'cmdline: txt2tags %s'%string.join(cmdline[1:], ' ')
	ret.append('\n'+doCommentLine(ppgd))
	ret.append(doCommentLine(cmdline))
	ret.append(TAGS['EOD'])
	return ret

# TODO mgp: any line (header or not) can't begin with % (add a space before)
def doEscape(doctype,txt):
	if doctype in ['html','sgml']:
		txt = re.sub('&','&amp;',txt)
		txt = re.sub('<','&lt;',txt)
		txt = re.sub('>','&gt;',txt)
		if doctype == 'sgml':
			txt = re.sub('\xff','&yuml;',txt)  # "+y
	elif doctype == 'pm6':
		txt = re.sub('<','<\#60>',txt)
	elif doctype == 'mgp':
		txt = re.sub('^%',' %',txt)  # add leading blank to avoid parse
	elif doctype == 'man':
		txt = re.sub("^([.'])", '\\&\\1',txt)           # command ID
		txt = string.replace(txt,ESCCHAR, ESCCHAR+'e')  # \e
	elif doctype == 'tex':
		txt = string.replace(txt, ESCCHAR, '@@LaTeX-escaping-SUX@@')
		txt = re.sub('([#$&%{}])', r'\\\1', txt)
		txt = string.replace(txt, '~', maskEscapeChar(r'\~{}'))
		txt = string.replace(txt, '^', maskEscapeChar(r'\^{}'))
		txt = string.replace(txt, '@@LaTeX-escaping-SUX@@',
		                     maskEscapeChar(r'$\backslash$'))
		# TIP the _ is escaped at the end
	return txt

def doFinalEscape(doctype, txt):
	"Last escapes of each line"
	if   doctype == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
	elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
	elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
	elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
	return txt

def EscapeCharHandler(action, data):
	"Mask/Unmask the Escape Char on the given string"
	if not string.strip(data): return data
	if action not in ['mask','unmask']:
		Error("EscapeCharHandler: Invalid action '%s'"%action)
	if action == 'mask': return string.replace(data,'\\',ESCCHAR)
	else:                return string.replace(data,ESCCHAR,'\\')

def maskEscapeChar(data):
	"Replace any Escape Char \ with a text mask (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('mask', x), data)
	return EscapeCharHandler('mask',data)

def unmaskEscapeChar(data):
	"Undo the Escape char \ masking (Input: str or list)"
	if type(data) == type([]):
		return map(lambda x: EscapeCharHandler('unmask', x), data)
	return EscapeCharHandler('unmask',data)

def addLineBreaks(list):
	"use LB to respect sys.platform"
	ret = []
	for line in list:
		line = string.replace(line,'\n',LB)  # embedded \n's
		ret.append(line+LB)                  # add final line break
	return ret

def doPreLine(doctype,line):
	"Parsing procedures for preformatted (verbatim) lines"
	if not rules['preareanotescaped']: line = doEscape(doctype,line)
	if rules['indentprearea']: line = '  '+line
	if doctype == 'pm6': line = doFinalEscape(doctype, line)
	return line

def doCloseTable(doctype):
	global subarea, tableborder
	ret = ''
	if rules['tableable']:
		if doctype == 'tex' and tableborder:
			ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
		else:
			ret = TAGS['tableClose']+'\n'
	else:
		ret = TAGS['areaPreClose']
	tableborder = 0
	subarea.pop('table')
	return ret

def doCloseQuote(howmany=None):
	global quotedepth
	ret = []
	if not howmany: howmany = len(quotedepth)
	for i in range(howmany):
		quotedepth.pop()
		#TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
		ret.append(TAGS['areaQuoteClose'])
	
	if not quotedepth: subarea.pop('quote')
	return string.join(ret,'\n')

def doCloseList(howmany=None):
	global listindent, listids
	ret…
Tech Fingerprint

Alerts (46)

'global' Avoid global variables; use function parameters or class attributes for better scope management
58
'def' Ensure functions have docstrings for documentation
232 233 234 235 241 249 256 319 350 398 441 473 552 557 562 582 598 634 642 671 692 1046 1135
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
244 247 251 436
'type(' Use isinstance() for type checking instead of type()
252 401 433 434
Complexity hotspot; lines 337 to 345 (total complexity: 10)
337 338 339 340 341 342 343 344 345
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
359 360 362 364 368