/old/txt2tags-1.7.py
Python | 2628 lines | 2582 code | 16 blank | 30 comment | 9 complexity | f705939eba7a6267cceef0ee2a39b8ff MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
- #!/usr/bin/env python
- # txt2tags - generic text conversion tool
- # http://txt2tags.sf.net
- #
- # Copyright 2001, 2002, 2003 Aurelio Marinho Jargas
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, version 2.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You have received a copy of the GNU General Public License along
- # with this program, on the COPYING file.
- #
- # the code is better, even readable now, but needs more improvements
- # please wait for the upcoming 2.0 series for a cleaner one
- #XXX Python coding warning
- # Avoid common mistakes:
- # - do NOT use newlist=list instead newlist=list[:]
- # - do NOT use newdic=dic instead newdic=dic.copy()
- # - do NOT use dic[key] instead dic.get(key)
- import re, string, os, sys, getopt, traceback
- from time import strftime,time,localtime
- my_url = 'http://txt2tags.sf.net'
- my_email = 'verde@aurelio.net'
- my_version = '1.7' #-betaN
- DEBUG = 0 # do not edit here, please use --debug
- targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
- FLAGS = {'noheaders':0,'enumtitle':0 ,'maskemail':0 ,'stdout' :0,
- 'toconly' :0,'toc' :0 ,'gui' :0 ,'dump-source':0}
- OPTIONS = {'toclevel' :3,'style' :'','type' :'','outfile' :'',
- 'split':0, 'lang':''}
- CONFIG_KEYWORDS = ['encoding', 'style', 'cmdline','preproc','postproc']
- CONF = {}
- regex = {}
- TAGS = {}
- rules = {}
- currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
- lang = 'english'
- doctype = outfile = ''
- STDIN = STDOUT = '-'
- ESCCHAR = '\x00'
- LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
- #my_version = my_version + '-dev' + currdate[4:] # devel!
- # global vars for doClose*()
- quotedepth = []
- listindent = []
- listids = []
- subarea = None
- tableborder = 0
- # set the Line Break across platforms
- LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
- versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
- usage = """
- %s
- Usage: txt2tags -t <type> [OPTIONS] file.t2t
- -t, --type set target document type. currently supported:
- %s
- -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)
- --stdout same as '-o -' or '--outfile -' (deprecated option)
- -H, --noheaders suppress header, title and footer information
- -n, --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
- --maskemail hide email from spam robots. x@y.z turns <x (a) y z>
- --toc add TOC (Table of Contents) to target document
- --toconly print document TOC and exit
- --toclevel=N set maximum TOC level (depth) to N
- --gui invoke Graphical Tk Interface
- --style=FILE use FILE as the document style (like Html CSS)
- -h, --help print this help information and exit
- -V, --version print program version and exit
- Extra options for HTML target (needs sgml-tools):
- --split split documents. values: 0, 1, 2 (default 0)
- --lang document language (default english)
- By default, converted output is saved to 'file.<type>'.
- Use --outfile to force an output file name.
- If input file is '-', reads from STDIN.
- If output file is '-', dumps output to STDOUT.\
- """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
- # here is all the target's templates
- # you may edit them to fit your needs
- # - the %(HEADERn)s strings represent the Header lines
- # - use %% to represent a literal %
- #
- HEADER_TEMPLATE = {
- 'txt': """\
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'sgml': """\
- <!doctype linuxdoc system>
- <article>
- <title>%(HEADER1)s
- <author>%(HEADER2)s
- <date>%(HEADER3)s
- """,
- 'html': """\
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
- <HTML>
- <HEAD>
- <META NAME="generator" CONTENT="http://txt2tags.sf.net">
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
- <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
- <TITLE>%(HEADER1)s</TITLE>
- </HEAD><BODY BGCOLOR="white" TEXT="black">
- <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
- <FONT SIZE=4>
- <I>%(HEADER2)s</I><BR>
- %(HEADER3)s
- </FONT></CENTER>
- """,
- # TODO man section 1 is hardcoded...
- 'man': """\
- .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
- """,
- # TODO style to <HR>
- 'pm6': """\
- <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
- ><@Normal=
- <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
- <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
- <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
- <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
- <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
- <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
- <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
- ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
- <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
- ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
- <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
- ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
- <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
- ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
- ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
- ><@Title4=<@-PARENT "Title3">
- ><@Title5=<@-PARENT "Title3">
- ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'mgp': """\
- #!/usr/X11R6/bin/mgp -t 90
- %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
- %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
- %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
- %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
- %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
- %%default 1 size 5
- %%default 2 size 8, fore "yellow", font "normal-b", center
- %%default 3 size 5, fore "white", font "normal", left, prefix " "
- %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
- %%tab 2 prefix " ", icon arc "orange" 40, leftfill
- %%tab 3 prefix " ", icon arc "brown" 40, leftfill
- %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
- %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
- %%%%------------------------- end of headers -----------------------------
- %%page
- %%size 10, center, fore "yellow"
- %(HEADER1)s
- %%font "normal-i", size 6, fore "white", center
- %(HEADER2)s
- %%font "mono", size 7, center
- %(HEADER3)s
- """,
- # TODO please, improve me!
- 'moin': """\
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'tex': \
- r"""\documentclass[11pt,a4paper]{article}
- \usepackage{amsfonts,amssymb,graphicx,url}
- \usepackage[%(ENCODING)s]{inputenc} %% char encoding
- \pagestyle{plain} %% do page numbering ('empty' turns off)
- \frenchspacing %% no aditional spaces after periods
- \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
- %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
- %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
- \title{%(HEADER1)s}
- \author{%(HEADER2)s}
- \begin{document}
- \date{%(HEADER3)s}
- \maketitle
- """
- }
- #-----------------------------------------------------------------------
- def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
- def Error(msg): print "ERROR: %s"%msg ; sys.exit()
- def echo(msg): print '\033[32;1m%s\033[m'%msg # quick debug
- def Debug(msg,i=0,linenr=None):
- if i > DEBUG: return
- if linenr is not None:
- print "(%d) %04d:%s"%(i,linenr,msg)
- else:
- print "(%d) %s"%(i,msg)
- def Readfile(file, remove_linebreaks=0):
- if file == '-':
- try: data = sys.stdin.readlines()
- except: Error('You must feed me with data on STDIN!')
- else:
- try: f = open(file); data = f.readlines() ; f.close()
- except: Error("Cannot read file:\n %s"%file)
- if remove_linebreaks:
- data = map(lambda x:re.sub('[\n\r]+$','',x), data)
- return data
- def Savefile(file, contents):
- try: f = open(file, 'wb')
- except: Error("Cannot open file for writing:\n %s"%file)
- if type(contents) == type([]): doit = f.writelines
- else: doit = f.write
- doit(contents) ; f.close()
- def get_include_contents(file, path=''):
- "Parses %!include: value and extract file contents"
- # set include type
- id = 'T2T'
- if file[0] == file[-1] == '`':
- id = 'VERB'
- file = file[1:-1] # remove ``
- elif file[0] == file[-1] == "'":
- id = 'PASS'
- file = file[1:-1] # remove ''
- # handle remote dir execution
- filepath = os.path.join(path, file)
- # pass-thru
- if id == 'PASS':
- return id, Readfile(filepath, remove_linebreaks=1)
- # VERB text
- if id == 'VERB':
- lines = Readfile(filepath, remove_linebreaks=1)
- # escape inner '---' that would end VERB block
- lines = map(lambda x: re.sub('^---$','--- ',x), lines)
- # add VERB block identifiers
- lines = ['---'] + lines + ['---']
- # default txt2tags marked text
- else:
- id = 'T2T'
- lines = get_file_body(filepath)
- # add delimiter comments
- lines.insert(0, '%%INCLUDED_%s starts here: %s'%(id,file))
- lines.append('%%INCLUDED_%s ends here: %s'%(id,file))
- return id, lines
- def ParseConfig(text='',name='', target=''):
- ret = {}
- if not text: return ret
- re_name = name or '[a-z]+'
- re_target = target or '[a-z]*'
- cfgregex = re.compile("""
- ^%%!\s* # leading id with opt spaces
- (?P<name>%s)\s* # config name
- (\((?P<target>%s)\))? # optional target spec inside ()
- \s*:\s* # key:value delimiter with opt spaces
- (?P<value>\S.+?) # config value
- \s*$ # rstrip() spaces and hit EOL
- """%(re_name,re_target), re.I+re.VERBOSE)
- prepostregex = re.compile("""
- # ---[ PATTERN ]---
- ^( "([^"]*)" # "double quoted" or
- | '([^']*)' # 'single quoted' or
- | ([^\s]+) # single_word
- )
- \s+ # separated by spaces
-
- # ---[ REPLACE ]---
- ( "([^"]*)" # "double quoted" or
- | '([^']*)' # 'single quoted' or
- | (.*) # anything
- )
- \s*$
- """, re.VERBOSE)
- match = cfgregex.match(text)
- if match:
- ret = {'name' :string.lower(match.group('name') or ''),
- 'target':string.lower(match.group('target') or 'all'),
- 'value' :match.group('value') }
-
- # Special config with two quoted values (%!preproc: "foo" 'bar')
- if ret['name'] in ['preproc','postproc']:
- valmatch = prepostregex.search(ret['value'])
- if not valmatch: return None
- getval = valmatch.group
- patt = getval(2) or getval(3) or getval(4) or ''
- repl = getval(6) or getval(7) or getval(8) or ''
- ret['value'] = (patt, repl)
-
- return ret
- class Cmdline:
- def __init__(self, cmdline=[], nocheck=0):
- self.conf = {}
- self.cmdline = cmdline
- self.cmdline_conf = {}
- self.dft_options = OPTIONS.copy()
- self.dft_flags = FLAGS.copy()
- self.all_options = self.dft_options.keys()
- self.all_flags = self.dft_flags.keys()
- self.defaults = self._get_empty_conf()
- self.nocheck = nocheck
- if cmdline: self.parse()
-
- #TODO protect quotes contents
- def _tokenize(self, cmd_string):
- return string.split(cmd_string)
-
- def parse(self):
- "return a dic with all options:value found"
- if not self.cmdline: return {}
- Debug("cmdline: %s"%self.cmdline, 1)
- options = {'infile': '', 'infiles':''}
- # compose valid options list
- longopts = ['help','version'] + self.all_flags + \
- map(lambda x:x+'=', self.all_options) # add =
- cmdline = self.cmdline[1:] # del prog name
- # get cmdline options
- try: (opt, args) = getopt.getopt(cmdline, 'hVnHt:o:', longopts)
- except getopt.error, errmsg:
- Error("%s (try --help)"%errmsg)
- # get infile, if any
- if args:
- options['infile'] = args[0]
- options['infiles'] = args # multi
- # parse all options
- for name,val in opt:
- if name in ['-h','--help' ]: Quit(usage)
- elif name in ['-V','--version']: Quit(versionstr)
- elif name in ['-t','--type' ]: options['type'] = val
- elif name in ['-o','--outfile' ]: options['outfile'] = val
- elif name in ['-n','--enumtitle']: options['enumtitle'] = 1
- elif name in ['-H','--noheaders']: options['noheaders'] = 1
- elif name in ['--stdout']: options['outfile'] = STDOUT
- else: options[name[2:]] = val or 1 # del --
- # save results
- Debug("cmdline arguments: %s"%options, 1)
- self.cmdline_conf = options
-
- def compose(self, conf={}):
- "compose full command line from CONF dict"
- if not conf: return ''
- args = []
- cfg = conf.copy()
- valid_opts = self.all_options + self.all_flags
- use_short = {'noheaders':'H', 'enumtitle':'n'}
- # remove useless options
- if cfg.get('toconly'):
- del cfg['noheaders']
- del cfg['outfile'] # defaults to STDOUT
- if cfg.get('type') == 'txt':
- del cfg['type'] # already default
- args.append('--toconly') # must be the first
- del cfg['toconly']
- # add target type
- if cfg.has_key('type'):
- args.append('-t '+cfg['type'])
- del cfg['type']
- # add other options
- for key in cfg.keys():
- if key not in valid_opts: continue # must be a %!setting
- if key == 'outfile': continue # later
- val = cfg[key]
- if not val: continue
- # default values are useless on cmdline
- if val == self.dft_options.get(key): continue
- # -short format
- if key in use_short.keys():
- args.append('-'+use_short[key])
- continue
- # --long format
- if key in self.all_flags: # add --option
- args.append('--'+key)
- else: # add --option=value
- args.append('--%s=%s'%(key,val))
- # the outfile using -o
- if cfg.has_key('outfile') and \
- cfg['outfile'] != self.dft_options.get('outfile'):
- args.append('-o '+cfg['outfile'])
- # the input file is always at the end
- if cfg.has_key('infile'):
- args.append(cfg['infile'])
- # return as a single string
- ret = string.join(args,' ')
- Debug("Diet command line: %s"%ret, 1)
- return ret
-
- def merge(self, extraopts=''):
- "insert cmdline portion BEFORE current cmdline"
- if not extraopts: return
- if type(extraopts) == type(''):
- extraopts = self._tokenize(extraopts)
- if not self.cmdline: self.cmdline = extraopts
- else: self.cmdline = ['t2t-merged'] +extraopts +self.cmdline[1:]
- self.parse()
-
- def _get_outfile_name(self, conf):
- "dirname is the same for {in,out}file"
- infile = conf['infile']
- if not infile: return ''
- if infile == STDIN or conf['outfile'] == STDOUT:
- outfile = STDOUT
- else:
- basename = re.sub('\.(txt|t2t)$','',infile)
- outfile = "%s.%s"%(basename, conf['type'])
- self.dft_options['outfile'] = outfile # save for self.compose()
- Debug(" infile: '%s'"%infile , 1)
- Debug("outfile: '%s'"%outfile, 1)
- return outfile
-
- def _sanity(self, dic):
- "basic cmdline syntax checkings"
- if not dic: return {}
- if not dic['infile'] or not dic['type']:
- Quit(usage, 1) # no filename/doctype
- if not targets.count(dic['type']): # check target
- Error("Invalid document type '%s' (try --help)"%(
- dic['type']))
- #DISABLED: conflicting with %!cmdline: -o foo
- #if len(dic['infiles']) > 1 and dic['outfile']: # -o FILE *.t2t
- # Error("--outfile can't be used with multiple files")
- for opt in self.all_options: # check numeric options
- opttype = type(self.dft_options[opt])
- if dic.get(opt) and opttype == type(9):
- try: dic[opt] = int(dic.get(opt)) # save
- except: Error('--%s value must be a number'%opt)
- if dic['split'] not in [0,1,2]: # check split level
- Error('Option --split must be 0, 1 or 2')
- return dic
-
- def merge_conf(self, newconfs={}, override=0):
- "include Config Area settings into self.conf"
- if not self.conf: self.get_conf()
- if not newconfs: return self.conf
- for key in newconfs.keys():
- if key == 'cmdline': continue # already done
- # filters are always accumulative
- if key in ['preproc','postproc']:
- if not self.conf.has_key(key):
- self.conf[key] = []
- self.conf[key].extend(newconfs[key])
- continue
- # add anyway
- if override:
- self.conf[key] = newconfs[key]
- continue
- # just update if still 'virgin'
- if self.conf.has_key(key) and \
- self.conf[key] == self.defaults.get(key):
- self.conf[key] = newconfs[key]
- # add new
- if not self.conf.has_key(key):
- self.conf[key] = newconfs[key]
-
- Debug("Merged CONF (override=%s): %s"%(override,self.conf), 1)
- return self.conf
-
- def _get_empty_conf(self):
- econf = self.dft_options.copy()
- for k in self.dft_flags.keys(): econf[k] = self.dft_flags[k]
- return econf
-
- def get_conf(self):
- "set vars and flags according to options dic"
- if not self.cmdline_conf:
- if not self.cmdline: return {}
- self.parse()
- dic = self.cmdline_conf
- conf = self.defaults.copy()
-
- ## store flags & options
- for flag in self.all_flags:
- if dic.has_key(flag): conf[flag] = 1
- for opt in self.all_options + ['infile', 'infiles']:
- if dic.has_key(opt): conf[opt] = dic.get(opt)
-
- if not conf['type'] and conf['toconly']: conf['type'] = 'txt'
- if not conf['type'] and conf['dump-source']: conf['type'] = 'txt'
- if not self.nocheck: conf = self._sanity(conf)
-
- ## some gotchas for specific issues
- doctype = conf['type']
- infile = conf['infile']
-
- # toconly is stronger than others
- if conf['toconly']:
- conf['noheaders'] = 1
- conf['toc'] = 0
- conf['split'] = 0
- conf['gui'] = 0
- conf['outfile'] = STDOUT
- conf['toclevel'] = conf['toclevel'] or \
- self.dft_options['toclevel']
-
- # dump-source is stronger than others (including toconly)
- if conf['dump-source']:
- conf['toconly'] = 0
- conf['noheaders'] = 0
- conf['toc'] = 0
- conf['split'] = 0
- conf['gui'] = 0
- conf['outfile'] = STDOUT
-
- # split: just HTML, no stdout, 1st do a sgml, then sgml2html
- if conf['split']:
- if doctype != 'html':
- conf['split'] = 0
- else:
- conf['type'] = 'sgml'
- if conf['outfile'] == STDOUT:
- conf['outfile'] = ''
-
- outfile = conf['outfile'] or self._get_outfile_name(conf)
-
- # final checkings
- if conf['split'] and outfile == STDOUT:
- Error('--split: You must provide a FILE (not STDIN)')
- if infile == outfile and outfile != STDOUT:
- Error("SUICIDE WARNING!!! (see --outfile)\n source"+\
- " and target files has the same name: "+outfile)
- ### author's note: "yes, i've got my sample.t2t file deleted
- ### before add this test... :/"
-
- conf['outfile'] = outfile
- conf['cmdline'] = self.cmdline
- Debug("CONF data: %s\n"%conf, 1)
- self.conf = conf
- return self.conf
- #
- ### End of Cmdline class
- class Proprierties:
- def __init__(self, filename=''):
- self.buffer = [''] # text start at pos 1
- self.areas = ['head','conf','body']
- self.arearef = []
- self.headers = ['','','']
- self.config = self.get_empty_config()
- self.lastline = 0
- self.filename = filename
- self.conflines = []
- self.bodylines = []
- if filename:
- self.read_file(filename)
- self.find_areas()
- self.set_headers()
- self.set_config()
-
- def read_file(self, file):
- lines = Readfile(file)
- if not lines: Error('Empty file! %s'%file)
- self.buffer.extend(lines)
-
- def get_empty_config(self):
- empty = {}
- for targ in targets+['all']: empty[targ] = {}
- return empty
-
- def find_areas(self):
- "Run through buffer and identify head/conf/body areas"
- buf = self.buffer ; ref = [1,4,0] # defaults
- if not string.strip(buf[1]): # no header
- ref[0] = 0 ; ref[1] = 2
- for i in range(ref[1],len(buf)): # find body init
- if string.strip(buf[i]) and buf[i][0] != '%':
- ref[2] = i ; break # !blank, !comment
- if ParseConfig(buf[i], 'include'):
- ref[2] = i ; break # %!include command
- if ref[1] == ref[2]: ref[1] = 0 # no conf area
- for i in 0,1,2: # del !existent
- if not ref[i]: self.areas[i] = ''
- self.arearef = ref # save results
- self.lastline = len(self.buffer)-1
- Debug('Head,Conf,Body start line: %s'%ref, 1)
- # store CONF and BODY lines found
- cfgend = ref[2] or len(buf)
- self.conflines = buf[ref[1]:cfgend]
- if ref[2]: self.bodylines = buf[ref[2]:]
-
-
- def set_headers(self):
- "Extract and save headers contents"
- if not self.arearef: self.find_areas()
- if not self.areas.count('head'): return
- if self.lastline < 3:
- #TODO on gui this checking is !working
- Error(
- "Premature end of Headers on '%s'."%self.filename +\
- '\n\nFile has %s line(s), but '%self.lastline +\
- 'Headers should be composed by 3 lines. ' +\
- '\nMaybe you should left the first line blank? ' +\
- '(for no headers)')
- for i in 0,1,2:
- self.headers[i] = string.strip(self.buffer[i+1])
- Debug("Headers found: %s"%self.headers, 1, i+1)
-
- def set_config(self):
- "Extract and save config contents (including includes)"
- if not self.arearef: self.find_areas()
- if not self.areas.count('conf'): return
- keywords = string.join(CONFIG_KEYWORDS, '|')
- linenr = self.arearef[1]-1 # for debug messages
- for line in self.conflines:
- linenr = linenr + 1
- if len(line) < 3: continue
- if line[:2] != '%!': continue
- cfg = ParseConfig(line, keywords)
- # any _valid_ config found?
- if not cfg:
- Debug('Bogus Config Line',1,linenr)
- continue
- # get data
- targ, key, val = cfg['target'],cfg['name'], cfg['value']
- # check config target specification
- if targ not in targets+['all']:
- Debug("Config Error: Invalid target '%s', ignoring"%targ,
- 1,linenr)
- continue
- # filters are multiple config
- if key in ['preproc','postproc']:
- if not self.config['all'].has_key(key): # 1st one
- self.config['all'][key] = []
- # all filters are saved to target 'all'
- # finish_him will decide what to consider
- self.config['all'][key].append((targ,)+val)
- else:
- self.config[targ][key] = val
- Debug("Found config for target '%s': '%s', value '%s'"%(
- targ,key,val),1,linenr)
- Debug("All %%!CONFIG: %s"%self.config, 1)
- def get_file_body(file):
- "Returns all the document BODY lines (including includes)"
- prop = Proprierties()
- prop.read_file(file)
- prop.find_areas()
- return prop.bodylines
- def finish_him(outlist, CONF):
- "Writing output to screen or file"
- outfile = CONF['outfile']
- outlist = unmaskEscapeChar(outlist)
-
- # do PostProc
- if CONF['postproc']:
- postoutlist = []
- for line in outlist:
- for targ,patt,repl in CONF['postproc']:
- if targ not in [CONF['type'], 'all']: continue
- try : line = re.sub(patt, repl, line)
- except: Error("Invalid PostProc filter regex: '%s'"%patt)
- postoutlist.append(line)
- outlist = postoutlist[:]
-
- if outfile == STDOUT:
- if CONF['gui']:
- return outlist
- else:
- for line in outlist: print line
- else:
- Savefile(outfile, addLineBreaks(outlist))
- if not CONF['gui']: print 'wrote %s'%(outfile)
-
- if CONF['split']:
- print "--- html..."
- sgml2html = 'sgml2html -s %s -l %s %s'%(
- CONF['split'],CONF['lang'] or lang,outfile)
- print "Running system command:", sgml2html
- os.system(sgml2html)
- def toc_maker(toc, conf):
- "Compose TOC list 'by hand'"
- # TOC is a tag, so there's nothing to do here
- if TAGS['TOC']: return []
- # toc is a valid t2t marked text (list type), that is converted
- if conf['toc'] or conf['toconly']:
- fakeconf = conf.copy()
- fakeconf['noheaders'] = 1
- fakeconf['toconly'] = 0
- fakeconf['maskemail'] = 0
- fakeconf['dump-source'] = 0
- fakeconf['preproc'] = []
- fakeconf['postproc'] = []
- toc,foo = convert(toc, fakeconf)
- # TOC between bars (not for --toconly)
- if conf['toc']:
- para = TAGS['paragraph']
- tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
- toc = tocbar + toc + tocbar
- return toc
- def getTags(doctype):
- keys = [
- 'paragraph','title1','title2','title3','title4','title5',
- 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
- 'areaPreOpen','areaPreClose',
- 'areaQuoteOpen','areaQuoteClose',
- 'fontMonoOpen','fontMonoClose',
- 'fontBoldOpen','fontBoldClose',
- 'fontItalicOpen','fontItalicClose',
- 'fontBolditalicOpen','fontBolditalicClose',
- 'fontUnderlineOpen','fontUnderlineClose',
- 'listOpen','listClose','listItem',
- 'numlistOpen','numlistClose','numlistItem',
- 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
- 'bar1','bar2',
- 'url','urlMark','email','emailMark',
- 'img','imgsolo',
- 'tableOpen','tableClose','tableLineOpen','tableLineClose',
- 'tableCellOpen','tableCellClose',
- 'tableTitleCellOpen','tableTitleCellClose',
- 'anchor','comment','TOC',
- 'EOD'
- ]
-
- alltags = {
-
- 'txt': {
- 'title1' : ' \a' ,
- 'title2' : '\t\a' ,
- 'title3' : '\t\t\a' ,
- 'title4' : '\t\t\t\a' ,
- 'title5' : '\t\t\t\t\a',
- 'areaQuoteOpen' : ' ' ,
- 'listItem' : '- ' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)' ,
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)' ,
- 'img' : '[\a]' ,
- },
-
- 'html': {
- 'paragraph' : '<P>' ,
- 'title1' : '<H1>\a</H1>' ,
- 'title2' : '<H2>\a</H2>' ,
- 'title3' : '<H3>\a</H3>' ,
- 'title4' : '<H4>\a</H4>' ,
- 'title5' : '<H5>\a</H5>' ,
- 'areaPreOpen' : '<PRE>' ,
- 'areaPreClose' : '</PRE>' ,
- 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
- 'areaQuoteClose' : '</BLOCKQUOTE>' ,
- 'fontMonoOpen' : '<CODE>' ,
- 'fontMonoClose' : '</CODE>' ,
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '</B>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '</I>' ,
- 'fontBolditalicOpen' : '<B><I>' ,
- 'fontBolditalicClose' : '</I></B>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '</U>' ,
- 'listOpen' : '<UL>' ,
- 'listClose' : '</UL>' ,
- 'listItem' : '<LI>' ,
- 'numlistOpen' : '<OL>' ,
- 'numlistClose' : '</OL>' ,
- 'numlistItem' : '<LI>' ,
- 'deflistOpen' : '<DL>' ,
- 'deflistClose' : '</DL>' ,
- 'deflistItem1' : '<DT>\a</DT>' ,
- 'deflistItem2' : '<DD>' ,
- 'bar1' : '<HR NOSHADE SIZE=1>' ,
- 'bar2' : '<HR NOSHADE SIZE=5>' ,
- 'url' : '<A HREF="\a">\a</A>' ,
- 'urlMark' : '<A HREF="\a">\a</A>' ,
- 'email' : '<A HREF="mailto:\a">\a</A>' ,
- 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
- 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
- 'imgsolo' : '<P ALIGN="center">\a</P>' ,
- 'tableOpen' : '<table\a cellpadding=4 border=\a>',
- 'tableClose' : '</table>' ,
- 'tableLineOpen' : '<tr>' ,
- 'tableLineClose' : '</tr>' ,
- 'tableCellOpen' : '<td\a>' ,
- 'tableCellClose' : '</td>' ,
- 'tableTitleCellOpen' : '<th>' ,
- 'tableTitleCellClose' : '</th>' ,
- 'tableAlignLeft' : '' ,
- 'tableAlignCenter' : ' align="center"',
- 'tableCellAlignLeft' : '' ,
- 'tableCellAlignRight' : ' align="right"' ,
- 'tableCellAlignCenter': ' align="center"',
- 'anchor' : '<a name="\a"></a>',
- 'comment' : '<!-- \a -->' ,
- 'EOD' : '</BODY></HTML>'
- },
-
- 'sgml': {
- 'paragraph' : '<p>' ,
- 'title1' : '<sect>\a<p>' ,
- 'title2' : '<sect1>\a<p>' ,
- 'title3' : '<sect2>\a<p>' ,
- 'title4' : '<sect3>\a<p>' ,
- 'title5' : '<sect4>\a<p>' ,
- 'areaPreOpen' : '<tscreen><verb>' ,
- 'areaPreClose' : '</verb></tscreen>' ,
- 'areaQuoteOpen' : '<quote>' ,
- 'areaQuoteClose' : '</quote>' ,
- 'fontMonoOpen' : '<tt>' ,
- 'fontMonoClose' : '</tt>' ,
- 'fontBoldOpen' : '<bf>' ,
- 'fontBoldClose' : '</bf>' ,
- 'fontItalicOpen' : '<em>' ,
- 'fontItalicClose' : '</em>' ,
- 'fontBolditalicOpen' : '<bf><em>' ,
- 'fontBolditalicClose' : '</em></bf>' ,
- 'fontUnderlineOpen' : '<bf><em>' ,
- 'fontUnderlineClose' : '</em></bf>' ,
- 'listOpen' : '<itemize>' ,
- 'listClose' : '</itemize>' ,
- 'listItem' : '<item>' ,
- 'numlistOpen' : '<enum>' ,
- 'numlistClose' : '</enum>' ,
- 'numlistItem' : '<item>' ,
- 'deflistOpen' : '<descrip>' ,
- 'deflistClose' : '</descrip>' ,
- 'deflistItem1' : '<tag>\a</tag>' ,
- 'bar1' : '<!-- \a -->' ,
- 'bar2' : '<!-- \a -->' ,
- 'url' : '<htmlurl url="\a" name="\a">' ,
- 'urlMark' : '<htmlurl url="\a" name="\a">' ,
- 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'img' : '<figure><ph vspace=""><img src="\a">'+\
- '</figure>' ,
- 'tableOpen' : '<table><tabular ca="\a">' ,
- 'tableClose' : '</tabular></table>' ,
- 'tableLineClose' : '<rowsep>' ,
- 'tableCellClose' : '<colsep>' ,
- 'tableTitleCellClose' : '<colsep>' ,
- 'tableColAlignLeft' : 'l' ,
- 'tableColAlignRight' : 'r' ,
- 'tableColAlignCenter' : 'c' ,
- 'comment' : '<!-- \a -->' ,
- 'TOC' : '<toc>' ,
- 'EOD' : '</article>'
- },
-
- 'tex': {
- 'title1' : '\n\\newpage\section*{\a}',
- 'title2' : '\\subsection*{\a}' ,
- 'title3' : '\\subsubsection*{\a}' ,
- # title 4/5: DIRTY: para+BF+\\+\n
- 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
- 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
- 'numtitle1' : '\n\\newpage\section{\a}',
- 'numtitle2' : '\\subsection{\a}' ,
- 'numtitle3' : '\\subsubsection{\a}' ,
- 'areaPreOpen' : '\\begin{verbatim}' ,
- 'areaPreClose' : '\\end{verbatim}' ,
- 'areaQuoteOpen' : '\\begin{quotation}' ,
- 'areaQuoteClose' : '\\end{quotation}' ,
- 'fontMonoOpen' : '\\texttt{' ,
- 'fontMonoClose' : '}' ,
- 'fontBoldOpen' : '\\textbf{' ,
- 'fontBoldClose' : '}' ,
- 'fontItalicOpen' : '\\textit{' ,
- 'fontItalicClose' : '}' ,
- 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
- 'fontBolditalicClose' : '}}' ,
- 'fontUnderlineOpen' : '\\underline{' ,
- 'fontUnderlineClose' : '}' ,
- 'listOpen' : '\\begin{itemize}' ,
- 'listClose' : '\\end{itemize}' ,
- 'listItem' : '\\item ' ,
- 'numlistOpen' : '\\begin{enumerate}' ,
- 'numlistClose' : '\\end{enumerate}' ,
- 'numlistItem' : '\\item ' ,
- 'deflistOpen' : '\\begin{description}',
- 'deflistClose' : '\\end{description}' ,
- 'deflistItem1' : '\\item[\a]' ,
- 'bar1' : '\n\\hrulefill{}\n' ,
- 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
- 'url' : '\\url{\a}' ,
- 'urlMark' : '\\textit{\a} (\\url{\a})' ,
- 'email' : '\\url{\a}' ,
- 'emailMark' : '\\textit{\a} (\\url{\a})' ,
- 'img' : '\\begin{figure}\\includegraphics{\a}'+\
- '\\end{figure}',
- 'tableOpen' : '\\begin{center}\\begin{tabular}{\a|}',
- 'tableClose' : '\\end{tabular}\\end{center}',
- 'tableLineOpen' : '\\hline ' ,
- 'tableLineClose' : ' \\\\' ,
- 'tableCellClose' : ' & ' ,
- 'tableTitleCellOpen' : '\\textbf{',
- 'tableTitleCellClose' : '} & ' ,
- 'tableColAlignLeft' : '|l' ,
- 'tableColAlignRight' : '|r' ,
- 'tableColAlignCenter' : '|c' ,
- 'comment' : '% \a' ,
- 'TOC' : '\\newpage\\tableofcontents',
- 'EOD' : '\\end{document}'
- },
-
- 'moin': {
- 'title1' : '= \a =' ,
- 'title2' : '== \a ==' ,
- 'title3' : '=== \a ===' ,
- 'title4' : '==== \a ====' ,
- 'title5' : '===== \a =====',
- 'areaPreOpen' : '{{{' ,
- 'areaPreClose' : '}}}' ,
- 'areaQuoteOpen' : ' ' ,
- 'fontMonoOpen' : '{{{' ,
- 'fontMonoClose' : '}}}' ,
- 'fontBoldOpen' : "'''" ,
- 'fontBoldClose' : "'''" ,
- 'fontItalicOpen' : "''" ,
- 'fontItalicClose' : "''" ,
- 'fontBolditalicOpen' : "'''''" ,
- 'fontBolditalicClose' : "'''''" ,
- 'fontUnderlineOpen' : "'''''" ,
- 'fontUnderlineClose' : "'''''" ,
- 'listItem' : ' * ' ,
- 'numlistItem' : ' \a. ' ,
- 'bar1' : '----' ,
- 'bar2' : '----' ,
- 'url' : '[\a]' ,
- 'urlMark' : '[\a \a]' ,
- 'email' : '[\a]' ,
- 'emailMark' : '[\a \a]' ,
- 'img' : '[\a]' ,
- 'tableLineOpen' : '||' ,
- 'tableCellClose' : '||' ,
- 'tableTitleCellClose' : '||'
- },
-
- 'mgp': {
- 'paragraph' : '%font "normal", size 5\n' ,
- 'title1' : '%page\n\n\a' ,
- 'title2' : '%page\n\n\a' ,
- 'title3' : '%page\n\n\a' ,
- 'title4' : '%page\n\n\a' ,
- 'title5' : '%page\n\n\a' ,
- 'areaPreOpen' : '\n%font "mono"' ,
- 'areaPreClose' : '%font "normal"' ,
- 'areaQuoteOpen' : '%prefix " "' ,
- 'areaQuoteClose' : '%prefix " "' ,
- 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
- 'fontMonoClose' : '\n%cont, font "normal"\n' ,
- 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
- 'fontBoldClose' : '\n%cont, font "normal"\n' ,
- 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
- 'fontItalicClose' : '\n%cont, font "normal"\n' ,
- 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n',
- 'fontBolditalicClose' : '\n%cont, font "normal"\n' ,
- 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
- 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '%bar "white" 5' ,
- 'bar2' : '%pause' ,
- 'url' : '\n%cont, fore "cyan"\n\a' +\
- '\n%cont, fore "white"\n' ,
- 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
- '\n%cont, fore "white"\n' ,
- 'email' : '\n%cont, fore "cyan"\n\a' +\
- '\n%cont, fore "white"\n' ,
- 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
- '\n%cont, fore "white"\n' ,
- 'img' : '\n%center\n%newimage "\a", left\n',
- 'comment' : '%% \a' ,
- 'EOD' : '%%EOD'
- },
-
- 'man': {
- 'paragraph' : '.P' ,
- 'title1' : '.SH \a' ,
- 'title2' : '.SS \a' ,
- 'title3' : '.SS \a' ,
- 'title4' : '.SS \a' ,
- 'title5' : '.SS \a' ,
- 'areaPreOpen' : '.nf' ,
- 'areaPreClose' : '.fi\n' ,
- 'areaQuoteOpen' : '\n' ,
- 'areaQuoteClose' : '\n' ,
- 'fontBoldOpen' : '\\fB' ,
- 'fontBoldClose' : '\\fP' ,
- 'fontItalicOpen' : '\\fI' ,
- 'fontItalicClose' : '\\fP' ,
- 'fontBolditalicOpen' : '\\fI' ,
- 'fontBolditalicClose' : '\\fP' ,
- 'listOpen' : '\n.nf' , # pre
- 'listClose' : '.fi\n' ,
- 'listItem' : '* ' ,
- 'numlistOpen' : '\n.nf' , # pre
- 'numlistClose' : '.fi\n' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '\n\n' ,
- 'bar2' : '\n\n' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)',
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)',
- 'img' : '\a' ,
- 'comment' : '.\\" \a'
- },
-
- 'pm6': {
- 'paragraph' : '<@Normal:>' ,
- 'title1' : '\n<@Title1:>\a',
- 'title2' : '\n<@Title2:>\a',
- 'title3' : '\n<@Title3:>\a',
- 'title4' : '\n<@Title4:>\a',
- 'title5' : '\n<@Title5:>\a',
- 'areaPreOpen' : '<@PreFormat:>' ,
- 'areaQuoteOpen' : '<@Quote:>' ,
- 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
- 'fontMonoClose' : '<SIZE$><FONT$>',
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '<P>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '<P>' ,
- 'fontBolditalicOpen' : '<B><I>' ,
- 'fontBolditalicClose' : '<P>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '<P>' ,
- 'listOpen' : '<@Bullet:>' ,
- 'listItem' : '\x95 ' , # \x95 == ~U
- 'numlistOpen' : '<@Bullet:>' ,
- 'numlistItem' : '\x95 ' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '<U>\a<P>' , # underline
- 'urlMark' : '\a <U>\a<P>' ,
- 'email' : '\a' ,
- 'emailMark' : '\a \a' ,
- 'img' : '\a'
- }
- }
-
- # compose the target tags dictionary
- tags = {}
- target_tags = alltags[doctype]
- for key in keys: tags[key] = '' # create empty keys
- for key in target_tags.keys():
- tags[key] = maskEscapeChar(target_tags[key]) # populate
-
- return tags
- def getRules(doctype):
- ret = {}
- allrules = [
-
- # target rules (ON/OFF)
- 'linkable', # target supports external links
- 'tableable', # target supports tables
- 'imglinkable', # target supports images as links
- 'imgalignable', # target supports image alignment
- 'imgasdefterm', # target supports image as definition term
- 'tablealignable', # target supports table alignment
- 'autonumberlist', # target supports numbered lists natively
- 'autonumbertitle', # target supports numbered titles natively
- 'tablecellsplit', # place delimiters only *between* cells
- 'listnotnested', # lists cannot be nested
- 'quotenotnested', # quotes cannot be nested
- 'preareanotescaped', # don't escape specials in PRE area
- 'escapeurl', # escape special in link URL
-
- # target code beautify (ON/OFF)
- 'indentprearea', # add leading spaces to PRE area lines
- 'breaktablecell', # break lines after any table cell
- 'breaktablelineopen', # break line after opening table line
- 'keepquoteindent', # don't remove the leading TABs on quotes
-
- # value settings
- 'listmaxdepth', # maximum depth for lists
- 'tablecellaligntype' # type of table cell align: cell, column
- ]
-
- rules = {
- 'txt' : {
- 'indentprearea':1
- },
- 'html': {
- 'indentprearea':1,
- 'linkable':1,
- 'imglinkable':1,
- 'imgalignable':1,
- 'imgasdefterm':1,
- 'autonumberlist':1,
- 'tableable':1,
- 'breaktablecell':1,
- 'breaktablelineopen':1,
- 'keepquoteindent':1,
- 'tablealignable':1,
- 'tablecellaligntype':'cell'
- },
- 'sgml': {
- 'linkable':1,
- 'escapeurl':1,
- 'autonumberlist':1,
- 'tableable':1,
- 'tablecellsplit':1,
- 'quotenotnested':1,
- 'keepquoteindent':1,
- 'tablecellaligntype':'column'
- },
- 'mgp' : {
- },
- 'tex' : {
- 'autonumberlist':1,
- 'autonumbertitle':1,
- 'tableable':1,
- 'tablecellsplit':1,
- 'preareanotescaped':1,
- 'listmaxdepth':4,
- 'tablecellaligntype':'column'
- },
- 'moin': {
- 'linkable':1,
- 'tableable':1
- },
- 'man' : {
- 'indentprearea':1,
- 'listnotnested':1
- },
- 'pm6' : {
- }
- }
-
-
- # populate return dictionary
- myrules = rules[doctype]
- for key in allrules : ret[key] = 0 # reset all
- for key in myrules.keys(): ret[key] = myrules[key] # turn ON
- return ret
- def getRegexes():
- regex = {
- # extra at end: (\[(?P<label>\w+)\])?
- 'title':
- re.compile(r'^\s*(?P<id>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
- 'numtitle':
- re.compile(r'^\s*(?P<id>\+{1,5})(?P<txt>[^+].*[^+])\1\s*$'),
- 'areaPreOpen':
- re.compile(r'^---$'),
- 'areaPreClose':
- re.compile(r'^---$'),
- 'quote':
- re.compile(r'^\t+'),
- '1linePre':
- re.compile(r'^--- (?=.)'),
- 'fontMono':
- re.compile(r'`([^`]+)`'),
- 'fontBold':
- re.compile(r'\*\*([^\s*].*?)\*\*'),
- 'fontItalic':
- re.compile(r'(^|[^:])//([^ /].*?)//'),
- 'fontUnderline':
- re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
- 'fontBolditalic':
- re.compile(r'\*/([^/].*?)/\*'),
- 'list':
- re.compile(r'^( *)([+-]) ([^ ])'),
- 'deflist':
- re.compile(r'^( *)(=) ([^:]+):'),
- 'bar':
- re.compile(r'^\s*([_=-]{20,})\s*$'),
- 'table':
- re.compile(r'^ *\|\|? '),
- 'blankline':
- re.compile(r'^\s*$'),
- 'comment':
- re.compile(r'^%'),
- 'raw':
- re.compile(r'``(.+?)``')
- }
-
- # special char to place data on TAGs contents (\a == bell)
- regex['x'] = re.compile('\a')
-
- # %%date [ (formatting) ]
- regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
-
-
- ### complicated regexes begin here ;)
- #
- # textual descriptions on --help's style: [...] is optional, | is OR
-
-
- ### first, some auxiliar variables
- #
-
- # [image.EXT]
- patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
-
- # link things
- urlskel = {
- 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
- 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
- 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
- 'pass' : r'[^ @]*', # for ftp://login:password@dom.com
- 'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
- 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
- 'form' : r'A-Za-z0-9/%&=+.,@*_-',# .,@*_-(as is)
- 'punct' : r'.,;:!?'
- }
-
- # username [ :password ] @
- patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
-
- # [ http:// ] [ username:password@ ] domain.com [ / ]
- # [ #anchor | ?form=data ]
- retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
- urlskel['proto'],patt_url_login, urlskel['guess'],
- urlskel['chars'],urlskel['form'],urlskel['anchor'])
-
- # filename | [ filename ] #anchor
- retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
- urlskel['chars'],urlskel['chars'],urlskel['anchor'])
-
- # user@domain [ ?form=data ]
- patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
- urlskel['login'],urlskel['form'])
-
-
- # saving for future use
- regex['_urlskel'] = urlskel
-
- ### and now the real regexes
- #
-
- regex['email'] = re.compile(patt_email,re.I)
-
- # email | url
- regex['link'] = \
- re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
-
- # \[ label | imagetag url | email | filename \]
- regex['linkmark'] = \
- re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
- patt_img, retxt_url, patt_email, retxt_url_local),
- re.L+re.I)
-
- # image
- regex['img'] = re.compile(patt_img, re.L+re.I)
-
- # all macros
- regex['macro'] = regex['date']
-
- # special things
- regex['special'] = re.compile(r'^%!\s*')
- regex['command'] = re.compile(r'(Include)\s*:\s*(.+)\s*$',re.I)
- return regex
- ### END OF regex nightmares
- class SubareaMaster:
- def __init__(self) : self.x = []
- def __call__(self) :
- if not self.x: return ''
- return self.x[-1]
- def add(self, area):
- if not self.x or (self.x and self.x[-1] != area):
- self.x.append(area)
- Debug('subarea ++ (%s): %s' % (area,self.x), 1)
- def pop(self, area=None):
- if area and self.x[-1] == area: self.x.pop()
- Debug('subarea -- (%s): %s' % (area,self.x), 1)
- def doHeader(headers, CONF):
- if CONF['noheaders']: return []
- doctype = CONF['type']
- if not HEADER_TEMPLATE.has_key(doctype):
- Error("doheader: Unknow doctype '%s'"%doctype)
-
- template = string.split(HEADER_TEMPLATE[doctype], '\n')
-
- head_data = {'STYLE':'', 'ENCODING':''}
- for key in head_data.keys():
- val = CONF.get(string.lower(key))
- if key == 'ENCODING': val = get_encoding_string(val, doctype)
- head_data[key] = val
- # parse header contents
- for i in 0,1,2:
- contents = doDateMacro(headers[i]) # expand %%date
- # Escapes - on tex, just do it if any \tag{} present
- if doctype != 'tex' or \
- (doctype == 'tex' and re.search(r'\\\w+{', contents)):
- contents = doEscape(doctype, contents)
-
- head_data['HEADER%d'%(i+1)] = contents
- Debug("Header Data: %s"%head_data, 1)
- # scan for empty dictionary keys
- # if found, scan template lines for that key reference
- # if found, remove the reference
- # if there isn't any other key reference on the same line, remove it
- for key in head_data.keys():
- if head_data.get(key): continue
- for line in template:
- if string.count(line, '%%(%s)s'%key):
- sline = string.replace(line, '%%(%s)s'%key, '')
- if not re.search(r'%\([A-Z0-9]+\)s', sline):
- template.remove(line)
- # populate template with data
- template = string.join(template, '\n') % head_data
- ### post processing
- #
- # let tex format today
- if doctype == 'tex' and head_data['HEADER3'] == currdate:
- template = re.sub(r'\\date\{.*?}', r'\date', template)
-
- return string.split(template, '\n')
- def doDateMacro(line):
- re_date = getRegexes()['date']
- while re_date.search(line):
- m = re_date.search(line)
- fmt = m.group('fmt') or ''
- dateme = currdate
- if fmt: dateme = strftime(fmt,localtime(time()))
- line = re_date.sub(dateme,line,1)
- return line
- def doCommentLine(txt):
- # the -- string ends a sgml/html comment :(
- txt = maskEscapeChar(txt)
- if string.count(TAGS['comment'], '--') and \
- string.count(txt, '--'):
- txt = re.sub('-(?=-)', r'-\\', txt)
-
- if TAGS['comment']:
- return regex['x'].sub(txt, TAGS['comment'])
- return ''
- def doFooter(CONF):
- ret = []
- doctype = CONF['type']
- cmdline = CONF['cmdline']
- typename = doctype
- if doctype == 'tex': typename = 'LaTeX2e'
- ppgd = '%s code generated by txt2tags %s (%s)'%(
- typename,my_version,my_url)
- cmdline = 'cmdline: txt2tags %s'%string.join(cmdline[1:], ' ')
- ret.append('\n'+doCommentLine(ppgd))
- ret.append(doCommentLine(cmdline))
- ret.append(TAGS['EOD'])
- return ret
- # TODO mgp: any line (header or not) can't begin with % (add a space before)
- def doEscape(doctype,txt):
- if doctype in ['html','sgml']:
- txt = re.sub('&','&',txt)
- txt = re.sub('<','<',txt)
- txt = re.sub('>','>',txt)
- if doctype == 'sgml':
- txt = re.sub('\xff','ÿ',txt) # "+y
- elif doctype == 'pm6':
- txt = re.sub('<','<\#60>',txt)
- elif doctype == 'mgp':
- txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
- elif doctype == 'man':
- txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
- txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
- elif doctype == 'tex':
- txt = string.replace(txt, ESCCHAR, '@@LaTeX-escaping-SUX@@')
- txt = re.sub('([#$&%{}])', r'\\\1', txt)
- txt = string.replace(txt, '~', maskEscapeChar(r'\~{}'))
- txt = string.replace(txt, '^', maskEscapeChar(r'\^{}'))
- txt = string.replace(txt, '@@LaTeX-escaping-SUX@@',
- maskEscapeChar(r'$\backslash$'))
- # TIP the _ is escaped at the end
- return txt
- def doFinalEscape(doctype, txt):
- "Last escapes of each line"
- if doctype == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
- elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
- elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
- elif doctype == 'sgml': txt = string.replace(txt, '[', '[')
- return txt
- def EscapeCharHandler(action, data):
- "Mask/Unmask the Escape Char on the given string"
- if not string.strip(data): return data
- if action not in ['mask','unmask']:
- Error("EscapeCharHandler: Invalid action '%s'"%action)
- if action == 'mask': return string.replace(data,'\\',ESCCHAR)
- else: return string.replace(data,ESCCHAR,'\\')
- def maskEscapeChar(data):
- "Replace any Escape Char \ with a text mask (Input: str or list)"
- if type(data) == type([]):
- return map(lambda x: EscapeCharHandler('mask', x), data)
- return EscapeCharHandler('mask',data)
- def unmaskEscapeChar(data):
- "Undo the Escape char \ masking (Input: str or list)"
- if type(data) == type([]):
- return map(lambda x: EscapeCharHandler('unmask', x), data)
- return EscapeCharHandler('unmask',data)
- def addLineBreaks(list):
- "use LB to respect sys.platform"
- ret = []
- for line in list:
- line = string.replace(line,'\n',LB) # embedded \n's
- ret.append(line+LB) # add final line break
- return ret
- def doPreLine(doctype,line):
- "Parsing procedures for preformatted (verbatim) lines"
- if not rules['preareanotescaped']: line = doEscape(doctype,line)
- if rules['indentprearea']: line = ' '+line
- if doctype == 'pm6': line = doFinalEscape(doctype, line)
- return line
- def doCloseTable(doctype):
- global subarea, tableborder
- ret = ''
- if rules['tableable']:
- if doctype == 'tex' and tableborder:
- ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
- else:
- ret = TAGS['tableClose']+'\n'
- else:
- ret = TAGS['areaPreClose']
- tableborder = 0
- subarea.pop('table')
- return ret
- def doCloseQuote(howmany=None):
- global quotedepth
- ret = []
- if not howmany: howmany = len(quotedepth)
- for i in range(howmany):
- quotedepth.pop()
- #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
- ret.append(TAGS['areaQuoteClose'])
-
- if not quotedepth: subarea.pop('quote')
- return string.join(ret,'\n')
- def doCloseList(howmany=None):
- global listindent, listids
- ret = []
- if not howmany: howmany = len(listindent)
- for i in range(howmany):
- if listids[-1] == '-': tag = TAGS['listClose']
- elif listids[-1] == '+': tag = TAGS['numlistClose']
- elif listids[-1] == '=': tag = TAGS['deflistClose']
- if not tag: tag = TAGS['listClose'] # default
- if tag:
- # unnested lists are only closed at mother-list
- if rules['listnotnested']:
- if len(listindent) == 1:
- ret.append(tag)
- else:
- ret.append(listindent[-1]+tag)
- del listindent[-1]
- del listids[-1]
-
- if not listindent: subarea.pop('list')
- return string.join(ret,'\n')
- def beautify_me(name, line):
- "where name is: bold, italic, underline or bolditalic"
- name = 'font%s' % string.capitalize(name)
- open = TAGS['%sOpen'%name]
- close = TAGS['%sClose'%name]
- txt = r'%s\1%s'%(open, close)
- if name == 'fontItalic':
- txt = r'\1%s\2%s'%(open, close)
- line = regex[name].sub(txt,line)
- return line
- def get_tagged_link(label, url, CONF):
- ret = ''
- doctype = CONF['type']
-
- # set link type
- if regex['email'].match(url):
- linktype = 'email'
- else:
- linktype = 'url';
-
- # escape specials from TEXT parts
- label = doEscape(doctype,label)
-
- # escape specials from link URL
- if rules['linkable'] and rules['escapeurl']:
- url = doEscape(doctype, url)
-
- # if not linkable, the URL is plain text, that needs escape
- if not rules['linkable']:
- if doctype == 'tex':
- url = re.sub('^#', '\#', url) # ugly, but compile
- else:
- url = doEscape(doctype,url)
-
- # adding protocol to guessed link
- guessurl = ''
- if linktype == 'url' and \
- re.match(regex['_urlskel']['guess'], url):
- if url[0] == 'w': guessurl = 'http://' +url
- else : guessurl = 'ftp://' +url
-
- # not link aware targets -> protocol is useless
- if not rules['linkable']: guessurl = ''
-
- # simple link (not guessed)
- if not label and not guessurl:
- if CONF['maskemail'] and linktype == 'email':
- # do the email mask feature (no TAGs, just text)
- url = string.replace(url,'@',' (a) ')
- url = string.replace(url,'.',' ')
- url = "<%s>" % url
- if rules['linkable']: url = doEscape(doctype, url)
- ret = url
- else:
- # just add link data to tag
- tag = TAGS[linktype]
- ret = regex['x'].sub(url,tag)
-
- # named link or guessed simple link
- else:
- # adjusts for guessed link
- if not label: label = url # no protocol
- if guessurl : url = guessurl # with protocol
-
- # change image tag for !supported img+link targets
- if regex['img'].match(label) and not rules['imglinkable']:
- label = "(%s)"%regex['img'].match(label).group(1)
-
- # putting data on the right appearance order
- if rules['linkable']:
- urlorder = [url, label] # link before label
- else:
- urlorder = [label, url] # label before link
-
- # add link data to tag (replace \a's)
- ret = TAGS["%sMark"%linktype]
- for data in urlorder:
- ret = regex['x'].sub(data,ret,1)
-
- return ret
- def get_image_align(line):
- align = ''
- line = string.strip(line)
- m = regex['img'].search(line)
- ini = m.start() ; head = 0
- end = m.end() ; tail = len(line)
-
- align = 'middle' # default align # ^text +img +text$
- if ini == head and end == tail: align = 'para' # ^img$
- elif ini == head: align = 'left' # ^img + text$
- elif end == tail: align = 'right' # ^text + img$
-
- return align
- def get_tablecell_align(cells):
- ret = []
- for cell in cells:
- align = 'Left'
- if string.strip(cell):
- if cell[0] == ' ' and cell[-1] == ' ': align = 'Center'
- elif cell[0] == ' ': align = 'Right'
- ret.append(align)
- return ret
- def get_table_prop(line):
- # default table proprierties
- ret = {'border':0,'header':0,'align':'Left','cells':[],'cellalign':[]}
- # detect table align (and remove spaces mark)
- if line[0] == ' ': ret['align'] = 'Center'
- line = string.lstrip(line)
- # detect header (title) mark
- if line[1] == '|':
- ret['header'] = 1
- # delete trailing spaces after last cell border
- line = re.sub('\|\s*$','|', line)
- # detect (and delete) border mark (and leading space)
- if line[-1] == '|':
- ret['border'] = 1 ; line = line[:-2]
- # delete table mark
- line = regex['table'].sub('', line)
- # split cells
- ret['cells'] = string.split(line, ' | ')
- # find cells align
- ret['cellalign'] = get_tablecell_align(ret['cells'])
-
- Debug('Table Prop: %s' % ret, 1)
- return ret
- def tag_table_cells(table, doctype):
- ret = ''
- open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
- # title cell
- if table['header']:
- open = TAGS['tableTitleCellOpen']
- close = TAGS['tableTitleCellClose']
- # should we break the line?
- if rules['breaktablecell']: close = close+'\n'
- # here we go
- while table['cells']:
- openalign = open
- cel = table['cells'].pop(0)
- # set each cell align
- if rules['tablecellaligntype'] == 'cell':
- align = table['cellalign'].pop(0)
- align = TAGS['tableCellAlign%s'%align]
- openalign = string.replace(open,'\a',align)
- # show empty cell on HTML
- if not cel and doctype == 'html': cel = ' '
- # last cell gotchas
- if not table['cells']:
- # don't need cell separator
- if rules['tablecellsplit']: close = ''
- # close beautifier for last title cell
- if doctype == 'tex' and table['header']: close = '}'
- # join it all
- newcell = openalign + string.strip(cel) + close
- ret = ret + newcell
- return ret
- def get_tableopen_tag(table_prop, doctype):
- global tableborder
- open = TAGS['tableOpen'] # the default one
- # the first line defines if table has border or not
- tableborder = table_prop['border']
- # align full table
- if rules['tablealignable']:
- talign = TAGS['tableAlign'+table_prop['align']]
- open = regex['x'].sub(talign, open, 1)
- # set the columns alignment
- if rules['tablecellaligntype'] == 'column':
- calign = map(lambda x: TAGS['tableColAlign%s'%x],
- table_prop['cellalign'])
- calign = string.join(calign,'')
- open = regex['x'].sub(calign, open, 1)
- # tex table spec, border or not: {|l|c|r|} , {lcr}
- if doctype == 'tex' and not tableborder:
- open = string.replace(open,'|','')
- # we're almost done, just border left
- tag = regex['x'].sub(`tableborder`, open)
- return tag
- # reference: http://www.iana.org/assignments/character-sets
- # http://www.drclue.net/F1.cgi/HTML/META/META.html
- def get_encoding_string(enc, doctype):
- if not enc: return ''
- # target specific translation table
- translate = {
- 'tex': {
- # missing: ansinew , applemac , cp437 , cp437de , cp865
- 'us-ascii' : 'ascii',
- 'windows-1250': 'cp1250',
- 'windows-1252': 'cp1252',
- 'ibm850' : 'cp850',
- 'ibm852' : 'cp852',
- 'iso-8859-1' : 'latin1',
- 'iso-8859-2' : 'latin2',
- 'iso-8859-3' : 'latin3',
- 'iso-8859-4' : 'latin4',
- 'iso-8859-5' : 'latin5',
- 'iso-8859-9' : 'latin9',
- 'koi8-r' : 'koi8-r'
- }
- }
- # normalization
- enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
- enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
- enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
- enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
- # apply translation table
- try: enc = translate[doctype][string.lower(enc)]
- except: pass
- return enc
- ################################################################################
- ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
- ################################################################################
- def getAllConf(cmdlinelist, nocheck=0):
- """
- Returns a list of (File Configuration, File Proprierties) tuples
- for all the given Input files. The Configuration is the merge of
- command line options and %!cmdline settings.
- """
- all_confs = []
- # parse command line to get input files list
- cmdline = Cmdline(cmdlinelist, nocheck)
- infiles = cmdline.cmdline_conf.get('infiles')
- if not infiles: return []
-
- for infile in infiles: # multifile support
- # the first file doesn't need to recall Cmdline()
- if all_confs: cmdline = Cmdline(cmdlinelist, nocheck)
- # extract file Headers and Config
- prop = Proprierties(infile)
- # decide to use generic or target specfic (if any) %!cmdline:
- cmdline_target = cmdline.cmdline_conf.get('type')
- if cmdline_target and cmdline_target in targets and \
- prop.config[cmdline_target].get('cmdline'):
- cfgcmdline_target = cmdline_target
- else:
- cfgcmdline_target = 'all'
- # merge %!cmdline contents (if any) into original cmdline
- cmdline.merge(prop.config[cfgcmdline_target].get('cmdline'))
- # force infile
- cmdline.cmdline_conf['infile'] = infile
- # get all the configuration (flags/options) for this file
- # it saves general AND specific config (not OR as in %!cmdline)
- myconf = cmdline.merge_conf(prop.config['all'])
- myconf = cmdline.merge_conf(prop.config.get(myconf['type']), override=1)
- # adding %!cmdline contents to config (used by GUI)
- myconf['%!cmdline'] = prop.config[cfgcmdline_target].get('cmdline')
- # ensure the configuration has ALL keys defined
- for key in FLAGS.keys() + OPTIONS.keys() + CONFIG_KEYWORDS:
- if not myconf.has_key(key): myconf[key] = ''
- # append the (configuration, proprierties) tuple
- all_confs.append((myconf,prop))
- # remove what has left
- del cmdline, prop
- return all_confs
- def convertAllFiles(confs):
- if not confs: Quit(usage, 1)
- header = []
- for myconf,prop in confs: # multifile support
- # --dump-source option handler
- # hidden option, maybe will be removed on next versions
- if myconf['dump-source']:
- comment_id = '%--------------------- Area Delimiter:'
- for line in prop.headers: print line
- print '%s HEADER --> CONFIG'%comment_id
- for line in prop.conflines: print string.rstrip(line)
- print '%s CONFIG --> BODY'%comment_id
- doc = convert(prop.bodylines, myconf)
- for line in doc: print line
- print '%s EOD'%comment_id
- continue
- # compose the target file Headers
- #TODO escape line before?
- #TODO see exceptions by tex and mgp
- header = doHeader(prop.headers, myconf)
- # get the marked file BODY that has left
- body = prop.bodylines
- # parse the full marked body into tagged target
- doc,toc = convert(body, myconf, firstlinenr=prop.arearef[-1])
- # make TOC (if needed)
- toc = toc_maker(toc,myconf)
- # finally, we have our document
- outlist = header + toc + doc
- # break here if Gui - it has some more processing to do
- if myconf['gui']: return outlist, myconf
- # write results to file or STDOUT
- finish_him(outlist, myconf)
- def reallydoitall(cmdlinelist):
- confs = getAllConf(cmdlinelist)
- return convertAllFiles(confs)
- def convert(bodylines, CONF, firstlinenr=1):
- # global vars for doClose*()
- global TAGS, regex, rules, quotedepth, listindent, listids
- global subarea, tableborder
-
- doctype = CONF['type']
- outfile = CONF['outfile']
- TAGS = getTags(doctype)
- rules = getRules(doctype)
- regex = getRegexes()
-
- # the defaults
- linkmask = '@@_link_@@'
- monomask = '@@_mono_@@'
- macromask = '@@_macro_@@'
- rawmask = '@@_raw_@@'
-
- subarea = SubareaMaster()
- ret = []
- incdumpbuf = []
- toclist = []
- f_tt = 0
- listindent = []
- listids = []
- listcount = []
- titlecount = ['',0,0,0,0,0]
- f_lastwasblank = 0
- holdspace = ''
- listholdspace = ''
- quotedepth = []
- tableborder = 0
-
- if outfile != STDOUT:
- if not CONF['gui']:
- print "--- %s..."%doctype
-
- # if TOC is a header tag
- if CONF['toc'] and TAGS['TOC']:
- ret.append(TAGS['TOC']+'\n')
-
- # let's put the opening paragraph
- if doctype != 'pm6':
- ret.append(TAGS['paragraph'])
-
- # let's mark it up!
- linenr = firstlinenr-1
- lineref = -1
- while lineref < len(bodylines)-1:
- # for lineref in range(len(bodylines)):
- lineref = lineref + 1
- # print lineref, len(bodylines)
- skip_continue = 0
- linkbank = []
- monobank = []
- macrobank = []
- rawbank = []
-
- untouchedline = bodylines[lineref]
- line = re.sub('[\n\r]+$','',untouchedline) # del line break
- incdumpbuf.append(line) # for --dump-source
-
- # apply PreProc rules
- if CONF['preproc']:
- for targ,patt,repl in CONF['preproc']:
- if targ not in [CONF['type'], 'all']: continue
- try : line = re.sub(patt, repl, line)
- except: Error("Invalid PreProc filter regex: '%s'"%patt)
-
- line = maskEscapeChar(line) # protect \ char
- linenr = linenr +1
-
- Debug('LINE %04d: %s'%(linenr,repr(line)), 1) # heavy debug
-
- # we need (not really) to mark each paragraph
- #TODO check if this is really needed
- if doctype == 'pm6' and f_lastwasblank:
- if f_tt or listindent:
- holdspace = ''
- else:
- holdspace = TAGS['paragraph']+'\n'
-
- # any NOT table line (or comment), closes an open table
- #if subarea() == 'table' and not regex['table'].search(line):
- if subarea() == 'table' \
- and not regex['table'].search(line) \
- and not regex['comment'].search(line):
- ret.append(doCloseTable(doctype))
-
-
- #---------------------[ PRE formatted ]----------------------
-
- #TIP we'll never support beautifiers inside pre-formatted
-
- # we're already on a PRE area
- if f_tt:
- # closing PRE
- if regex['areaPreClose'].search(line):
- if doctype != 'pm6':
- ret.append(TAGS['areaPreClose'])
- f_tt = 0
- continue
-
- # normal PRE-inside line
- line = doPreLine(doctype, line)
- ret.append(line)
- continue
-
- # detecting PRE area init
- if regex['areaPreOpen'].search(line):
- ret.append(TAGS['areaPreOpen'])
- f_lastwasblank = 0
- f_tt = 1
- continue
-
- # one line PRE-formatted text
- if regex['1linePre'].search(line):
- f_lastwasblank = 0
- line = regex['1linePre'].sub('',line)
- line = doPreLine(doctype, line)
- t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
- ret.append('%s\n%s\n%s'%(t1,line,t2))
- continue
-
- #---------------------[ blank lines ]-----------------------
-
- #TODO "holdspace" to save <p> to not show in closelist
- if regex['blankline'].search(line):
-
- # closing all open quotes
- if quotedepth:
- ret.append(doCloseQuote())
-
- # closing all open lists
- if f_lastwasblank: # 2nd consecutive blank line
- if listindent: # closes list (if any)
- ret.append(doCloseList())
- holdspace = ''
- continue # consecutive blanks are trash
-
- # normal blank line
- if doctype != 'pm6':
- # paragraph (if any) is wanted inside lists also
- if listindent:
- para = TAGS['paragraph'] + '\n'
- holdspace = holdspace + para
- elif doctype == 'html':
- ret.append(TAGS['paragraph'])
- # sgml: quote close tag must not be \n\n</quote>
- elif doctype == 'sgml' and quotedepth:
- skip_continue = 1
- # otherwise we just show a blank line
- else:
- ret.append('')
-
- f_lastwasblank = 1
- if not skip_continue: continue
-
-
- #---------------------[ special ]------------------------
-
- if regex['special'].search(line):
-
- # include command
- m = ParseConfig(line, 'include', doctype)
- if m:
- incpath = os.path.dirname(CONF['infile'])
- incfile = m['value']
- if CONF['infile'] == incfile:
- Error('A file cannot include itself (loop!): %s'%incfile)
- inctype, inclines = get_include_contents(incfile, incpath)
-
- if inctype == 'PASS':
- ret.extend(inclines)
- continue
-
- # change %!include command by comment
- incdumpbuf[-1] = inclines[0]
-
- # insert include lines into bodylines list
- # removing the %!include command call
- bodylines = bodylines[:lineref] +inclines \
- +bodylines[lineref+1:]
- continue
-
- #---------------------[ comments ]-----------------------
-
- # just skip them (if not macro or config)
- if regex['comment'].search(line) and not \
- regex['date'].match(line):
- continue
- f_lastwasblank = 0 # reset blank status
-
- #---------------------[ Title ]-----------------------
-
- #TODO set next blank and set f_lastwasblank or f_lasttitle
- if (regex['title'].search(line) or
- regex['numtitle'].search(line)) and not listindent:
-
- if string.lstrip(line)[0] == '=':
- titletype = 'title'
- else:
- titletype = 'numtitle'
-
- m = regex[titletype].search(line)
- level = len(m.group('id'))
- tag = TAGS['title%s'%level]
- txt = string.strip(m.group('txt'))
-
- ### numbered title
- if CONF['enumtitle'] or titletype == 'numtitle':
- if rules['autonumbertitle']:
- tag = TAGS['numtitle%s'%level] or tag
- idtxt = txt
- else:
- # add count manually
- id = '' ; n = level
- titlecount[n] = titlecount[n] +1
- if n < len(titlecount)-1: # reset sublevels count
- for i in range(n+1, len(titlecount)):
- titlecount[i] = 0
- for i in range(n): # compose id from hierarchy
- id = "%s%d."%(id,titlecount[i+1])
- idtxt = "%s %s"%(id, txt) # add id to title
- else:
- idtxt = txt
-
- anchorid = 'toc%d'%(len(toclist)+1)
- if TAGS['anchor'] and CONF['toc'] \
- and level <= CONF['toclevel']:
- ret.append(regex['x'].sub(anchorid,TAGS['anchor']))
-
- # place title tag overriding line
- line = regex[titletype].sub(tag,line)
-
- ### escape title text (unescaped text is used for TOC)
- #
- esctxt = doEscape(doctype,idtxt)
- # sgml: [ is special on title (and lists) - here bcos 'continue'
- if doctype in ['sgml','tex']:
- esctxt = doFinalEscape(doctype, esctxt)
- # txt: blank before
- if doctype == 'txt': ret.append('')
- # finish title line
- ret.append(regex['x'].sub(esctxt,line))
-
- # add "underline" to text titles
- if doctype == 'txt':
- ret.append(regex['x'].sub('='*len(idtxt),tag))
- ret.append('') # blank line after
-
- # let's do some TOC!
- if not CONF['toc'] and not CONF['toconly']: continue
- if level > CONF['toclevel']: continue # max level
- if TAGS['TOC']: continue # TOC is a tag
- if TAGS['anchor']:
- # tocitemid = '#toc%d'%(len(toclist)+1)
- # TOC more readable with master topics not
- # linked at number stoled idea from windows .CHM
- # files (help system)
- if CONF['enumtitle'] and level == 1:
- tocitem = '%s+ [``%s`` #%s]'%(' '*level,txt,anchorid)
- else:
- tocitem = '%s- [``%s`` #%s]'%(' '*level,idtxt,anchorid)
- else:
- tocitem = '%s- ``%s``'%(' '*level,idtxt)
- if doctype in ['txt', 'man']:
- tocitem = '%s``%s``' %(' '*level,idtxt)
- toclist.append(tocitem)
-
- continue
-
- #TODO! labeltxt = ''
- # label = m.group('label')
- # if label: labeltxt = '<label id="%s">' %label
-
-
- #---------------------[ apply masks ]-----------------------
-
- ### protect important structures from escaping and formatting
- while regex['raw'].search(line):
- txt = regex['raw'].search(line).group(1)
- txt = doEscape(doctype,txt)
- rawbank.append(txt)
- line = regex['raw'].sub(rawmask,line,1)
-
- # protect pre-formatted font text
- while regex['fontMono'].search(line):
- txt = regex['fontMono'].search(line).group(1)
- txt = doEscape(doctype,txt)
- monobank.append(txt)
- line = regex['fontMono'].sub(monomask,line,1)
-
- # protect macros
- while regex['macro'].search(line):
- txt = regex['macro'].search(line).group()
- macrobank.append(txt)
- line = regex['macro'].sub(macromask,line,1)
-
- # protect URLs and emails
- while regex['linkmark'].search(line) or regex['link'].search(line):
-
- # try to match plain or named links
- match_link = regex['link'].search(line)
- match_named = regex['linkmark'].search(line)
-
- # define the current match
- if match_link and match_named:
- # both types found, which is the first?
- m = match_link
- if match_named.start() < match_link.start():
- m = match_named
- else:
- # just one type found, we're fine
- m = match_link or match_named
-
- # extract link data and apply mask
- if m == match_link: # plain link
- label = ''
- link = m.group()
- line = regex['link'].sub(linkmask,line,1)
- else: # named link
- label = string.rstrip(m.group('label'))
- link = m.group('link')
- line = regex['linkmark'].sub(linkmask,line,1)
-
- # save link data to the link bank
- linkbank.append((label, link))
-
- #---------------------[ do Escapes ]-----------------------
-
- # the target-specific special char escapes for body lines
- line = doEscape(doctype,line)
-
- #---------------------[ Horizontal Bar ]--------------------
-
- if regex['bar'].search(line):
- txt = regex['bar'].search(line).group(1)
- if txt[0] == '=': bar = TAGS['bar2']
- else : bar = TAGS['bar1']
-
- # to avoid comment tag confusion
- if doctype == 'sgml':
- txt = string.replace(txt,'--','__')
-
- line = regex['bar'].sub(bar,line)
- ret.append(regex['x'].sub(txt,line))
- continue
-
- #---------------------[ Quote ]-----------------------
-
- if regex['quote'].search(line):
- subarea.add('quote')
-
- # store number of leading TABS
- currquotedepth = len(regex['quote'].search(line).group(0))
-
- # SGML doesn't support nested quotes
- if rules['quotenotnested']:
- if quotedepth and currquotedepth > quotedepth[-1]:
- currquotedepth = quotedepth[-1]
-
- # for don't-close-me quote tags
- if not TAGS['areaQuoteClose']:
- line = regex['quote'].sub(TAGS['areaQuoteOpen']*currquotedepth, line)
- else:
- # new (sub)quote
- if not quotedepth or currquotedepth > quotedepth[-1]:
- quotedepth.append(currquotedepth)
- ret.append(TAGS['areaQuoteOpen'])
-
- # remove leading TABs
- if not rules['keepquoteindent']:
- line = regex['quote'].sub('', line)
-
- # closing quotes
- while currquotedepth < quotedepth[-1]:
- ret.append(doCloseQuote(1))
- else:
- # closing all quotes (not quote line)
- if quotedepth: ret.append(doCloseQuote())
-
-
- #---------------------[ Lists ]-----------------------
-
- if (regex['list'].search(line) or regex['deflist'].search(line)):
- subarea.add('list')
-
- if regex['list'].search(line): rgx = regex['list']
- else: rgx = regex['deflist']
-
- m = rgx.search(line)
- listitemindent = m.group(1)
- listtype = m.group(2)
- extra = m.group(3) # regex anchor char
-
- if listtype == '=':
- listdefterm = m.group(3)
- extra = ''
- if doctype == 'tex':
- # on tex, brackets are term delimiters
- # TODO escape ] at list definition
- # \], \rbrack{} and \verb!]! don't work :(
- #listdefterm = string.replace(listdefterm, ']', '???')
- pass
- if not rules['imgasdefterm'] and \
- regex['img'].search(listdefterm):
- while regex['img'].search(listdefterm):
- img = regex['img'].search(listdefterm).group(1)
- masked = '(%s)'%img
- listdefterm = regex['img'].sub(masked,listdefterm,1)
-
- # don't cross depth limit
- maxdepth = rules['listmaxdepth']
- if maxdepth and len(listindent) == maxdepth:
- if len(listitemindent) > len(listindent[-1]):
- listitemindent = listindent[-1]
-
- # new sublist
- if not listindent or len(listitemindent) > len(listindent[-1]):
- listindent.append(listitemindent)
- listids.append(listtype)
- if listids[-1] == '-': tag = TAGS['listOpen']
- elif listids[-1] == '+': tag = TAGS['numlistOpen']
- elif listids[-1] == '=': tag = TAGS['deflistOpen']
- if not tag: tag = TAGS['listOpen'] # default
- # no need to reopen <pre> tag on man sublists
- if rules['listnotnested'] and len(listindent) != 1:
- tag = ''
- openlist = listindent[-1]+tag
- if doctype == 'pm6':
- listholdspace = openlist
- else:
- if string.strip(openlist): ret.append(openlist)
- # reset item manual count
- listcount.append(0)
-
- # closing sublists
- while len(listitemindent) < len(listindent[-1]):
- close = doCloseList(1)
- if close: ret.append(close)
- if listcount: del listcount[-1]
-
- # normal item
- listid = listindent[-1]
- if listids[-1] == '-':
- tag = TAGS['listItem']
- elif listids[-1] == '+':
- tag = TAGS['numlistItem']
- listcount[-1] = listcount[-1] +1
- if not rules['autonumberlist']:
- tag = regex['x'].sub(str(listcount[-1]), tag)
- elif listids[-1] == '=':
- if not TAGS['deflistItem1']:
- # emulate def list, with <li><b>def</b>:
- tag = TAGS['listItem'] +TAGS['fontBoldOpen'] +listdefterm
- tag = tag +TAGS['fontBoldClose'] +':'
- else:
- tag = regex['x'].sub(listdefterm, TAGS['deflistItem1'])
- tag = tag + TAGS['deflistItem2'] # open <DD>
- if doctype == 'mgp': listid = len(listindent)*'\t'
- line = rgx.sub(listid+tag+extra,line)
- if listholdspace:
- line = listholdspace+line
- listholdspace = ''
-
-
- #---------------------[ Table ]-----------------------
-
- #TODO escape undesired format inside table
- #TODO add man, pm6 targets
- if regex['table'].search(line):
-
- table = get_table_prop(line)
-
- if subarea() != 'table':
- subarea.add('table') # first table line!
- if rules['tableable']: # table-aware target
- ret.append(get_tableopen_tag(table,doctype))
- else: # if not, use verb
- ret.append(TAGS['areaPreOpen'])
-
- if rules['tableable']:
- # setting line tags
- tl1 = TAGS['tableLineOpen']
- tl2 = TAGS['tableLineClose']
- # little table gotchas
- if rules['breaktablelineopen']:
- tl1 = tl1+'\n'
- if doctype == 'tex' and not tableborder:
- tl1 = ''
- # do cells and finish
- cells = tag_table_cells(table, doctype)
- line = tl1 + cells + tl2
-
-
- ### BEGIN of at-any-part-of-the-line/various-per-line TAGs.
-
- for beauti in ['Bold', 'Italic', 'Bolditalic', 'Underline']:
- if regex['font%s'%beauti].search(line):
- line = beautify_me(beauti, line)
-
- #---------------------[ URL & E-mail ]-----------------------
-
- for label,url in linkbank:
- link = get_tagged_link(label, url, CONF)
- line = string.replace(line, linkmask, link, 1)
-
- #---------------------[ Image ]-----------------------
-
- #TODO fix smart align when image is a link label
- while regex['img'].search(line) and TAGS['img'] != '[\a]':
- txt = regex['img'].search(line).group(1)
- tag = TAGS['img']
-
- # HTML is the only align-aware target for now
- if rules['imgalignable']:
- align = get_image_align(line)
- if align == 'para':
- align = 'center'
- tag= regex['x'].sub(tag,TAGS['imgsolo'])
- # add align on tag
- tag = regex['x'].sub(align, tag, 1)
-
- if doctype == 'tex': tag = re.sub(r'\\b',r'\\\\b',tag)
- line = regex['img'].sub(tag,line,1)
- line = regex['x'].sub(txt,line,1)
-
- #---------------------[ Expand Macros ]-----------------------
-
- if macrobank:
- for macro in macrobank:
- line = string.replace(line, macromask, macro,1)
- # now the line is full of macros again
- line = doDateMacro(line)
-
- #---------------------[ Expand PREs ]-----------------------
-
- for mono in monobank:
- open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
- tagged = open+mono+close
- line = string.replace(line,monomask,tagged,1)
-
- #---------------------[ Expand raw ]-----------------------
-
- for raw in rawbank:
- line = string.replace(line,rawmask,raw,1)
-
- #---------------------[ Final Escapes ]-----------------------
-
- line = doFinalEscape(doctype, line)
- ret.append(holdspace+line)
- holdspace = ''
-
- # We just need the included dump
- if CONF['dump-source']: return incdumpbuf
-
- # EOF: close any open lists/tables/quotes
- #TODO take table exception out when self.doctype
- while subarea():
- func = eval("doClose%s" % string.capitalize(subarea()))
- parm = None
- if subarea() == 'table': parm = doctype
- txt = func(parm)
- if txt: ret.append(txt)
-
- # add footer
- if not CONF['noheaders']:
- ret.extend(doFooter(CONF))
-
- if CONF['toconly']: ret = []
- return ret, toclist
- ################################################################################
- ##################################### GUI ######################################
- ################################################################################
- # tk help: http://python.org/topics/tkinter/
- class Gui:
- "Graphical Tk Interface"
- def __init__(self, conf={}):
- self.bg = 'orange'
- self.root = Tkinter.Tk()
- self.root.config(bd=15,bg=self.bg)
- self.root.title("txt2tags")
- self.frame1 = Tkinter.Frame(self.root,bg=self.bg)
- self.frame1.pack(fill='x')
- self.frame2 = Tkinter.Frame(self.root,bg=self.bg)
- self.frame2.pack()
- self.frame3 = Tkinter.Frame(self.root,bg=self.bg)
- self.frame3.pack(fill='x')
- self.frame = self.root
-
- self.conf = conf
- self.infile = self.setvar('')
- #self.infile = self.setvar('C:/aurelio/a.txt')
- self.doctype = self.setvar('')
- self.checks = ['noheaders','enumtitle','toc','toconly','stdout']
-
- # creating variables
- for check in self.checks:
- setattr(self, 'f_'+check, self.setvar(''))
-
- ### config as dic for python 1.5 compat (**opts don't work :( )
- def entry(self, **opts): return Tkinter.Entry(self.frame, opts)
- def label(self, txt='', **opts):
- opts.update({'text':txt,'bg':self.bg})
- return Tkinter.Label(self.frame, opts)
- def button(self,name,cmd,**opts):
- opts.update({'text':name,'command':cmd})
- return Tkinter.Button(self.frame, opts)
- def check(self,name,val,checked=0,**opts):
- opts.update( {'text':name, 'onvalue':val, 'offvalue':'',
- 'anchor':'w', 'bg':self.bg, 'activebackground':self.bg} )
- chk = Tkinter.Checkbutton(self.frame, opts)
- if checked: chk.select()
- chk.pack(fill='x',padx=10)
-
- def exit(self): self.root.destroy(); sys.exit()
- def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
- def menu(self,sel,items):
- return apply(Tkinter.OptionMenu,(self.frame,sel)+tuple(items))
- def askfile(self):
- ftypes= [("txt2tags files",("*.t2t","*.txt")),("All files","*")]
- newfile = askopenfilename(filetypes=ftypes)
- if newfile:
- self.infile.set(newfile)
- newconf = getAllConf(['foo',newfile], nocheck=1)
- if newconf: newconf = newconf[0][0]
- # restate all checkboxes after file selection
- # TODO how to make a refresh without killing it?
- self.root.destroy()
- self.__init__(newconf)
- self.mainwindow()
-
- def scrollwindow(self,txt='no text!',title=''):
- win = Tkinter.Toplevel() ; win.title(title)
- scroll = Tkinter.Scrollbar(win)
- text = Tkinter.Text(win,yscrollcommand=scroll.set)
- scroll.config(command=text.yview)
- text.insert(Tkinter.END, string.join(txt,'\n'))
- text.pack(side='left',fill='both')
- scroll.pack(side='right',fill='y')
-
- def runprogram(self):
- # prepare
- infile, doctype = self.infile.get(), self.doctype.get()
- if not infile:
- showwarning('txt2tags',\
- "You must provide the source file location!")
- return
- # compose cmdline
- guiflags = []
- for flag in self.checks:
- flag = getattr(self, 'f_%s'%flag).get()
- if flag: guiflags.append(flag)
- cmdline = ['txt2tags', '--gui', '-t', doctype] +guiflags +[infile]
- Debug('Gui/Tk cmdline: %s'%cmdline,1)
- # run!
- try:
- outlist, CONF = reallydoitall(cmdline)
- outfile = CONF['outfile']
- infile = CONF['infile']
- outlist = finish_him(outlist,CONF) or ''
-
- if outfile == STDOUT:
- title = 'txt2tags: %s converted to %s'%(
- os.path.basename(infile),
- string.upper(CONF['type']))
- self.scrollwindow(outlist, title)
- else:
- msg = "Conversion done!\n\n" +\
- "FROM:\n\t%s\n"%infile +\
- "TO:\n\t%s"%outfile
- showinfo('txt2tags', msg)
- except ZeroDivisionError: # common error, not quit
- pass
- except: # fatal error
- traceback.print_exc()
- print '\nSorry! txt2tags-Tk Fatal Error.'
- errmsg = 'Unknown error occurred.\n\n'+\
- 'Please send the Error Traceback '+\
- 'dumped to the author:\n %s'%my_email
- showerror('txt2tags FATAL ERROR!',errmsg)
- self.exit()
-
- def mainwindow(self):
- #TODO show outfile somewhere
- #TODO redraw GUI only using grid() because pack() sux
- self.infile.set(self.conf.get('infile') or '')
- self.doctype.set(self.conf.get('type') or 'html')
- if self.conf.get('outfile') == STDOUT: # map -o-
- self.conf['stdout'] = 1
-
- action1 = " \nChoose the target document type:"
- action2 = "\n\nEnter the tagged source file location:"
- action3 = "\n\nSome options you may check:"
- checks_txt = {
- 'noheaders': "Suppress headers from output",
- 'enumtitle': "Number titles (1, 1.1, 1.1.1, etc)",
- 'toc' : "Do TOC also (Table of Contents)",
- 'toconly' : "Just do TOC, nothing more",
- 'stdout' : "Dump to screen (Don't save target file)"
- }
-
- self.frame = self.frame1
- self.label("TXT2TAGS\n%s\nv%s"%(my_url,my_version)).pack()
- self.label(action1, anchor='w').pack(fill='x')
- self.menu(self.doctype, targets).pack()
- self.label(action2, anchor='w').pack(fill='x')
-
- self.frame = self.frame2
- self.entry(textvariable=self.infile).grid(row=0, column=0)
- self.button("Browse", self.askfile
- ).grid(row=0, column=1, padx=10)
- if self.conf.get('%!cmdline'):
- txt = '%%!cmdline: %s' % self.conf['%!cmdline']
- self.label(txt,fg='brown'
- ).grid(row=1, column=0, columnspan=2, sticky='w')
-
- self.frame = self.frame3
- self.label(action3, anchor='w').pack(fill='x')
-
- # compose options check boxes, example:
- # self.check(checks_txt['toc'], '--toc', 1, variable=self.f_toc)
- for check in self.checks:
- txt = checks_txt[check]
- opt = '--'+check
- var = getattr(self, 'f_'+check)
- onoff = self.conf.get(check)
- self.check(txt,opt,onoff,variable=var)
-
- self.label('\n').pack()
- self.button("Quit", self.exit).pack(side='left',padx=40)
- self.button("Convert!", self.runprogram
- ).pack(side='right',padx=40)
-
- # as documentation told me
- if sys.platform[:3] == 'win':
- self.root.iconify()
- self.root.update()
- self.root.deiconify()
-
- self.root.mainloop()
- ################################################################################
- ################################################################################
- if __name__ == '__main__':
- # set debug and remove option from cmdline
- if sys.argv.count('--debug'):
- DEBUG = 1
- sys.argv.remove('--debug')
-
- ### check if we will enter on GUI mode
- CONF['gui'] = 0
- # GUI is default on this platforms, when called alone
- if len(sys.argv) == 1 and sys.platform[:3] in ['mac','cyg','win']:
- CONF['gui'] = 1
- # user specified GUI mode
- if sys.argv.count('--gui'): CONF['gui'] = 1
-
- # check for GUI mode ressorces
- if CONF['gui'] == 1:
- try:
- from tkFileDialog import askopenfilename
- from tkMessageBox import showinfo,showwarning,showerror
- import Tkinter
- except:
- # if GUI was forced, show the error message
- if len(sys.argv) > 1 and sys.argv[1] == '--gui':
- traceback.print_exc()
- sys.exit()
- # or just abandon GUI mode, and continue
- else:
- CONF['gui'] = 0
-
- Debug("system platform: %s"%sys.platform,1)
- Debug("line break char: %s"%repr(LB),1)
- nocheck = CONF['gui'] # if GUI, no cmdline checking
- CONFS = getAllConf(sys.argv, nocheck) # get all infiles config (if any)
-
- if CONF['gui'] == 1:
- if len(CONFS) > 1:
- Error("GUI doesn't support multiple Input files.")
- # remove proprierties, get just config
- if CONFS: conf = CONFS[0][0]
- else : conf = {}
-
- # redefine Error function to raise exception instead sys.exit()
- def Error(msg):
- showerror('txt2tags ERROR!', msg)
- raise ZeroDivisionError
- Gui(conf).mainwindow()
- else:
- # console mode rocks forever!
- convertAllFiles(CONFS)
-
- sys.exit(0)
- # vim: ts=4