/old/txt2tags-1.2.py
Python | 2120 lines | 2095 code | 5 blank | 20 comment | 8 complexity | b851cc03425f3e667188c72857900df7 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env python
- # txt2tags - generic text conversion tool
- # http://txt2tags.sf.net
- #
- # Copyright 2001, 2002 Aurélio Marinho Jargas
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, version 2.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You have received a copy of the GNU General Public License along
- # with this program, on the COPYING file.
- #
- # please, don't look at this code, it's ugly!
- # i'll try to make it better on the next releases
- import re, string, os, sys, getopt, traceback
- from time import strftime,time,localtime
- my_url = 'http://txt2tags.sf.net'
- my_email = 'aurelio@verde666.org'
- my_version = '1.2'
- DEBUG = 0 # do not edit here, please use --debug
- targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
- FLAGS = {'noheaders':0,'enumtitle':0,'maskemail':0, 'stdout':0,
- 'toconly' :0,'toc' :0,'gui' :0, 'fixme' :0}
- regex = {}
- TAGS = {}
- rules = {}
- CMDLINE = ''
- currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
- splitlevel = '' ; lang = 'english'
- doctype = outfile = ''
- pipefileid = '-'
- has_obsolete = has_fixed = 0
- #my_version = my_version + '-dev' + currdate[4:] # devel!
- # global vars for doClose*()
- quotedepth = []
- listindent = []
- listids = []
- subarea = None
- tableborder = 0
- versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
- usage = """
- %s
- usage: txt2tags -t <type> [OPTIONS] file.t2t
- txt2tags -t html -s <split level> -l <lang> file.t2t
- -t, --type target document type. actually supported:
- %s
- --stdout by default, the output is written to file.<type>
- with this option, STDOUT is used (no files written)
- --noheaders suppress header, title and footer information
- --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
- --maskemail hide email from spam robots. x@y.z turns to <x (a) y z>
- --toc add TOC (Table of Contents) to target document
- --toconly print document TOC and exit
- --gui invoke Graphical Tk Interface
- --fixme temporary option to fix obsoleted structures
- -h, --help print this help information and exit
- -V, --version print program version and exit
- extra options for HTML target (needs sgml-tools):
- --split split documents. values: 0, 1, 2 (default 0)
- --lang document language (default english)
- """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
- # here is all the target's templates
- # you may edit them to fit your needs
- # - the %(HEADERn)s strings represent the Header lines
- # - use %% to represent a literal %
- #
- HEADER_TEMPLATE = {
- 'txt': """\
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- 'sgml': """\
- <!doctype linuxdoc system>
- <article>
- <title>%(HEADER1)s
- <author>%(HEADER2)s
- <date>%(HEADER3)s
- """,
- #TODO (peter valach) <meta http-equiv="Content-Type"
- # content="text/html; charset=iso-8859-2">
- 'html': """\
- <HTML>
- <HEAD><TITLE>%(HEADER1)s</TITLE></HEAD>
- <BODY BGCOLOR="white" TEXT="black">
- <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
- <FONT SIZE=4>
- <I>%(HEADER2)s</I><BR>
- %(HEADER3)s
- </FONT></CENTER>
- """,
- # TODO man section 1 is hardcoded...
- 'man': """\
- .TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
- """,
- # TODO style to <HR>
- 'pm6': """\
- <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
- ><@Normal=
- <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
- <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
- <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
- <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
- <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
- <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
- <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
- ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
- <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
- ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
- <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
- ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
- <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
- ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
- ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
- ><@Title4=<@-PARENT "Title3">
- ><@Title5=<@-PARENT "Title3">
- ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- #TODO escape lines beginning with %% after all formatting
- 'mgp': """\
- #!/usr/X11R6/bin/mgp -t 90
- %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
- %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
- %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
- %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
- %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
- %%default 1 size 5
- %%default 2 size 8, fore "yellow", font "normal-b", center
- %%default 3 size 5, fore "white", font "normal", left, prefix " "
- %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
- %%tab 2 prefix " ", icon arc "orange" 40, leftfill
- %%tab 3 prefix " ", icon arc "brown" 40, leftfill
- %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
- %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
- %%%%------------------------- end of headers -----------------------------
- %%page
- %%size 10, center, fore "yellow"
- %(HEADER1)s
- %%font "normal-i", size 6, fore "white", center
- %(HEADER2)s
- %%font "mono", size 7, center
- %(HEADER3)s
- """,
- # TODO please, improve me!
- 'moin': """\
- %(HEADER1)s
- %(HEADER2)s
- %(HEADER3)s
- """,
- #TODO how to make acrobat left-side pages count?
- # \includepackage{graphics}
- # --lang matters! \usepackage[brazil]{babel}
- # local links seems to be \label, \ref, \pageref
- 'tex': \
- r"""\documentclass[11pt,a4paper]{article}
- \usepackage{amsfonts,amssymb,graphicx,url}
- \usepackage[latin1]{inputenc} %% for accented chars
- \pagestyle{plain} %% do page numbering ('empty' turns off)
- \frenchspacing %% no aditional spaces after periods
- %% all paragraph must be indented equaly
- \setlength{\parskip}{8pt}\parindent=0pt
- %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
- %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
- \newcommand\email{\begingroup \urlstyle{tt}\Url} %% for email
- \title{%(HEADER1)s}
- \author{%(HEADER2)s}
- \begin{document}
- \date{%(HEADER3)s}
- \maketitle
- """
- }
- #-----------------------------------------------------------------------
- def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
- def Error(msg): print "ERROR: %s"%msg ; sys.exit()
- def Debug(msg,i=0,linenr=None):
- if i > DEBUG: return
- if linenr is not None:
- print "(%d) %04d:%s"%(i,linenr,msg)
- else:
- print "(%d) %s"%(i,msg)
- def Readfile(file):
- if file == '-':
- try: data = sys.stdin.readlines()
- except: Error('You must feed me with data on STDIN!')
- else:
- try: f = open(file); data = f.readlines() ; f.close()
- except: Error("Cannot read file:\n %s"%file)
- return data
- def Savefile(file, contents):
- try: f = open(file, 'w')
- except: Error("Cannot open file for writing:\n %s"%file)
- if type(contents) == type([]): doit = f.writelines
- else: doit = f.write
- doit(contents) ; f.close()
- def NewArea(new, linenr):
- if new not in ['head', 'conf', 'body']:
- Error("Invalid new AREA '%s' on line '%s'"%(new,linenr))
- Debug('NEW AREA: %s'%new, 1, linenr)
- return new
- def Obsoleted(n, old, new, ver):
- global has_obsolete
- has_obsolete = 1
- print 'OBSOLETE WARNING: line %04d'%n
- print ' The %s is now obsoleted and will be removed'%old
- print ' on txt2tags version %s. Please use %s instead.'%(ver,new)
- def Fixed(n, old, new):
- global has_fixed
- has_fixed = 1
- print 'FIXED: line %04d: %15s ---> %s'%(n,old,new)
- def FixTable(tabline):
- new = string.replace(tabline,'\t',' ',1) # del 1st TAB
- new = string.replace(new,'\t',' | ') # change TABs by spaced pipes
- new = re.sub('(.*[^\n\r])', '\\1 |', new) # add last pipe (for border)
- return new
- def FixFile(file, newcontent):
- Savefile(file+'.OLD', Readfile(file)) # copy to .old
- Savefile(file, newcontent)
- print "\nFile '%s' correctly updated and saved."%(infile)
- print "The old contents were saved to '%s.OLD'."%(infile)
- def ObsoletedInstructions(infile):
- print """\n
- ATTENTION:
- Some obsoleted txt2tags structures were found on your source document.
- Please correct them by hand, or just run:
- txt2tags --fixme %s
- To update (rewrite) this file automatically.
- Please don't ignore this message.
- On next releases of txt2tags, these old structures will not be valid.
- \n"""%infile
- def reset_flags():
- global FLAGS
- for flag in FLAGS.keys(): FLAGS[flag] = 0
- def set_outfile_name(infile, doctype):
- "dirname is the same for {in,out}file"
- if not infile: return
- if infile == pipefileid or FLAGS['toconly'] or FLAGS['stdout']:
- outfile = pipefileid
- else:
- outfile = "%s.%s"%(re.sub('\.(txt|t2t)$','',infile), doctype)
- Debug(" infile: '%s'"% infile, 1)
- Debug("outfile: '%s'"%outfile, 1)
- return outfile
- def finish_him(outlist, outfile):
- "writing output to screen or file"
- if outfile == pipefileid:
- for line in outlist: print line
- else:
- Savefile(outfile, addLineBreaks(outlist))
- if not FLAGS['gui']: print 'wrote %s'%(outfile)
-
- if splitlevel:
- print "--- html..."
- os.system('sgml2html --language=%s --split=%s %s'%(
- lang,splitlevel,outfile))
- def ParseCmdline(cmdline=sys.argv):
- "return a dic with all options:value found"
- global CMDLINE ; CMDLINE = cmdline # save for dofooter()
- Debug("cmdline: %s"%cmdline, 1)
- options = {'infile': '', 'infiles':''}
-
- # get cmdline options
- longopt = ['help', 'version', 'type=', 'split=', 'lang=']+FLAGS.keys()
- try: (opt, args) = getopt.getopt(cmdline[1:], 'hVt:', longopt)
- except getopt.GetoptError:
- Error('Bad option or missing argument (try --help)')
-
- # get infile, if any
- if args:
- options['infile'] = args[0]
- options['infiles'] = args # multi
-
- for name,val in opt:
- # parse information options
- if name in ['-h','--help' ]: Quit(usage)
- elif name in ['-V','--version']: Quit(versionstr)
- # parse short/long options
- elif name in ['-t','--type']:
- options['doctype'] = val
- continue
- # just long options
- options[name[2:]] = val # del --
-
- Debug("cmdline options: %s"%options, 1)
- return options
- def ParseCmdlineOptions(optdic):
- "set vars and flags according to options dic"
- global FLAGS, splitlevel, lang
-
- # store flags and vars
- myflags = [] # for debug msg
- for flag in FLAGS.keys():
- if optdic.has_key(flag):
- FLAGS[flag] = 1
- myflags.append(flag)
- doctype = optdic.get('doctype')
- infile = optdic.get('infile')
- splitlevel = optdic.get('split')
- lang = optdic.get('lang')
- Debug("cmdline flags: %s"%string.join(myflags,', '), 1)
-
- if FLAGS['fixme']:
- if not infile: Quit(usage, 1)
- doctype = 'moin' # bogus, not used at all
-
- if not doctype and FLAGS['toconly']: doctype = 'txt' # toconly dft type
- if not infile or not doctype: Quit(usage, 1) # no filename/doctype
-
- # sanity check: validate target type
- if not targets.count(doctype):
- Error("Invalid document type '%s' (try --help)"%(doctype))
-
- outfile = set_outfile_name(infile, doctype)
-
- # sanity check: validate split level
- if doctype != 'html': splitlevel = '' # only valid for HTML target
- if splitlevel:
- # checkings
- if outfile == pipefileid:
- Error('You need to provide a FILE (not STDIN) '
- 'when using --split')
- if splitlevel[0] not in '012':
- Error('Option --split must be 0, 1 or 2')
- # check for sgml-tools
- #TODO how to test (in a clever way) if an executable is in path?
- #TODO os.system() return code? sgml2html w/out --help exit 0?
- #TODO bah! implement sgml2html split natively and we're done
- # Error("Sorry, you must have 'sgml2html' to use --split")
-
- # set things
- FLAGS['stdout'] = 0 # no --stdout
- doctype = 'sgml' # 1st do a sgml, then sgml2html
- outfile = set_outfile_name(infile, doctype)
-
- # sanity check: source loss!
- if infile != pipefileid and infile == outfile:
- Error("SUICIDE WARNING!!! (try --stdout)\n source"+\
- " and target files has the same name: %s"%outfile)
- ### yes, i've got my sample.t2t file deleted before add this test... :/
-
- return infile,outfile,doctype
- #TODO splitlevel, lang
- #---End of ParseCmdlineOptions
- def toc_master(doctype, header, doc, toc):
- "decide to include TOC or not on the outlist"
-
- # deal with the TOC options
- if FLAGS['toc'] or FLAGS['toconly']:
- # format TOC lines
- ### here we do toc as a valid t2t marked text (list type)
- FLAGS['noheaders'] = 1
- x,y,toc = convert(['']+toc+['',''], doctype)
-
- # TOC between bars (not for --toconly)
- if FLAGS['toc']:
- para = TAGS['paragraph']
- tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
- toc = tocbar + toc + tocbar
-
- if FLAGS['toconly']: header = doc = []
- else:
- toc = []
-
- # on tex, \tableofcontents do it all - see doHeader()
- if doctype == 'tex' and not FLAGS['toconly']:
- toc = []
-
- return header + toc + doc
- def doitall(cmdlinedic):
- global outfile
- infile,outfile,doctype = ParseCmdlineOptions(cmdlinedic)
- header,toc,doc = convert(Readfile(infile), doctype)
- outlist = toc_master(doctype,header,doc,toc)
- if has_obsolete: ObsoletedInstructions(infile)
- return doctype, outfile, outlist
- # set the Line Break across platforms
- LB = '\n' # default
- if sys.platform[:3] == 'win': LB = '\r\n'
- #elif sys.platform[:3] == 'cyg': LB = '\r\n' # not sure if it's best :(
- elif sys.platform[:3] == 'mac': LB = '\r'
- def getTags(doctype):
- keys = [
- 'paragraph','title1','title2','title3','title4','title5',
- 'areaPreOpen','areaPreClose',
- 'areaQuoteOpen','areaQuoteClose',
- 'fontMonoOpen','fontMonoClose',
- 'fontBoldOpen','fontBoldClose',
- 'fontItalicOpen','fontItalicClose',
- 'fontBolditalicOpen','fontBolditalicClose',
- 'fontUnderlineOpen','fontUnderlineClose',
- 'listOpen','listClose','listItem',
- 'numlistOpen','numlistClose','numlistItem',
- 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
- 'bar1','bar2',
- 'url','urlMark','email','emailMark',
- 'img','imgsolo',
- 'tableOpen','tableClose','tableLineOpen','tableLineClose',
- 'tableCellOpen','tableCellClose',
- 'tableTitleCellOpen','tableTitleCellClose',
- 'anchor','comment',
- 'EOD'
- ]
-
- if doctype == "txt":
- tags = {
- 'title1' : ' \a' ,
- 'title2' : '\t\a' ,
- 'title3' : '\t\t\a' ,
- 'title4' : '\t\t\t\a' ,
- 'title5' : '\t\t\t\t\a',
- 'areaQuoteOpen' : ' ' ,
- 'listItem' : '- ' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)' ,
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)' ,
- 'img' : '[\a]' ,
- }
-
- elif doctype == "html":
- tags = {
- 'paragraph' : '<P>' ,
- 'title1' : '<H1>\a</H1>' ,
- 'title2' : '<H2>\a</H2>' ,
- 'title3' : '<H3>\a</H3>' ,
- 'title4' : '<H4>\a</H4>' ,
- 'title5' : '<H5>\a</H5>' ,
- 'areaPreOpen' : '<PRE>' ,
- 'areaPreClose' : '</PRE>' ,
- 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
- 'areaQuoteClose' : '</BLOCKQUOTE>' ,
- 'fontMonoOpen' : '<CODE>' ,
- 'fontMonoClose' : '</CODE>' ,
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '</B>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '</I>' ,
- 'fontBolditalicOpen' : '<B><I>' ,
- 'fontBolditalicClose': '</I></B>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '</U>' ,
- 'listOpen' : '<UL>' ,
- 'listClose' : '</UL>' ,
- 'listItem' : '<LI>' ,
- 'numlistOpen' : '<OL>' ,
- 'numlistClose' : '</OL>' ,
- 'numlistItem' : '<LI>' ,
- 'deflistOpen' : '<DL>' ,
- 'deflistClose' : '</DL>' ,
- 'deflistItem1' : '<DT>\a</DT>' ,
- 'deflistItem2' : '<DD>' ,
- 'bar1' : '<HR NOSHADE SIZE=1>' ,
- 'bar2' : '<HR NOSHADE SIZE=5>' ,
- 'url' : '<A HREF="\a">\a</A>' ,
- 'urlMark' : '<A HREF="\a">\a</A>' ,
- 'email' : '<A HREF="mailto:\a">\a</A>' ,
- 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
- 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
- 'imgsolo' : '<P ALIGN="center">\a</P>' ,
- 'tableOpen' : '<table align=center cellpadding=4 border=\a>',
- 'tableClose' : '</table>' ,
- 'tableLineOpen' : '<tr>' ,
- 'tableLineClose' : '</tr>' ,
- 'tableCellOpen' : '<td>' ,
- 'tableCellClose' : '</td>' ,
- 'tableTitleCellOpen' : '<th>' ,
- 'tableTitleCellClose': '</th>' ,
- 'anchor' : '<a name="\a">' ,
- 'comment' : '<!-- \a -->' ,
- 'EOD' : '</BODY></HTML>'
- }
-
- elif doctype == "sgml":
- tags = {
- 'paragraph' : '<p>' ,
- 'title1' : '<sect>\a<p>' ,
- 'title2' : '<sect1>\a<p>' ,
- 'title3' : '<sect2>\a<p>' ,
- 'title4' : '<sect3>\a<p>' ,
- 'title5' : '<sect4>\a<p>' ,
- 'areaPreOpen' : '<tscreen><verb>' ,
- 'areaPreClose' : '</verb></tscreen>' ,
- 'areaQuoteOpen' : '<quote>' ,
- 'areaQuoteClose' : '</quote>' ,
- 'fontMonoOpen' : '<tt>' ,
- 'fontMonoClose' : '</tt>' ,
- 'fontBoldOpen' : '<bf>' ,
- 'fontBoldClose' : '</bf>' ,
- 'fontItalicOpen' : '<em>' ,
- 'fontItalicClose' : '</em>' ,
- 'fontBolditalicOpen' : '<bf><em>' ,
- 'fontBolditalicClose': '</em></bf>' ,
- 'fontUnderlineOpen' : '<bf><em>' ,
- 'fontUnderlineClose' : '</em></bf>' ,
- 'listOpen' : '<itemize>' ,
- 'listClose' : '</itemize>' ,
- 'listItem' : '<item>' ,
- 'numlistOpen' : '<enum>' ,
- 'numlistClose' : '</enum>' ,
- 'numlistItem' : '<item>' ,
- 'bar1' : '<!-- \a -->' ,
- 'bar2' : '<!-- \a -->' ,
- 'url' : '<htmlurl url="\a" name="\a">' ,
- 'urlMark' : '<htmlurl url="\a" name="\a">' ,
- 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
- 'img' : '<figure><ph vspace=""><img src="\a"></figure>',
- 'tableOpen' : '<table><tabular ca="c">' ,
- 'tableClose' : '</tabular></table>' ,
- 'tableLineClose' : '<rowsep>' ,
- 'tableCellClose' : '<colsep>' ,
- 'tableTitleCellClose': '<colsep>' ,
- 'comment' : '<!-- \a -->' ,
- 'EOD' : '</article>'
- }
-
- elif doctype == "tex":
- tags = {
- 'title1' : '\n\\newpage\section{\a}',
- 'title2' : '\\subsection{\a}' ,
- 'title3' : '\\subsubsection{\a}' ,
- # title 4/5: DIRTY: para+BF+\\+\n
- 'title4' : '\\paragraph{}\\textbf{\a}\\\\\\\n' ,
- 'title5' : '\\paragraph{}\\textbf{\a}\\\\\\\n' ,
- 'areaPreOpen' : '\\begin{verbatim}' ,
- 'areaPreClose' : '\\end{verbatim}' ,
- 'areaQuoteOpen' : '\\begin{quotation}' ,
- 'areaQuoteClose' : '\\end{quotation}' ,
- 'fontMonoOpen' : '\\texttt{' ,
- 'fontMonoClose' : '}' ,
- 'fontBoldOpen' : '\\textbf{' ,
- 'fontBoldClose' : '}' ,
- 'fontItalicOpen' : '\\textit{' ,
- 'fontItalicClose' : '}' ,
- 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
- 'fontBolditalicClose': '}}' ,
- 'fontUnderlineOpen' : '\\underline{' ,
- 'fontUnderlineClose' : '}' ,
- 'listOpen' : '\\begin{itemize}' ,
- 'listClose' : '\\end{itemize}' ,
- 'listItem' : '\\item ' ,
- 'numlistOpen' : '\\begin{enumerate}' ,
- 'numlistClose' : '\\end{enumerate}' ,
- 'numlistItem' : '\\item ' ,
- 'deflistOpen' : '\\begin{description}' ,
- 'deflistClose' : '\\end{description}' ,
- 'deflistItem1' : '\\item[\a]' ,
- 'bar1' : '\n\\hrulefill{}\n' ,
- 'bar2' : '\n\\rule{\linewidth}{1mm}\n' ,
- 'url' : '\\url{\a}' ,
- 'urlMark' : '\\textit{\a} (\\url{\a})' ,
- 'email' : '\\email{\a}' ,
- 'emailMark' : '\\textit{\a} (\\email{\a})' ,
- 'img' : '(\a)' ,
- 'tableOpen' : '\\begin{center}\\begin{tabular}',
- 'tableClose' : '\\end{tabular}\\end{center}' ,
- 'tableLineOpen' : '\\hline ' ,
- 'tableLineClose' : ' \\\\' ,
- 'tableCellClose' : ' & ' ,
- 'tableTitleCellOpen' : '\\textbf{' ,
- 'tableTitleCellClose': '} & ' ,
- 'comment' : '% \a' ,
- 'EOD' : '\\end{document}'
- }
-
- elif doctype == "moin":
- tags = {
- 'title1' : '= \a =' ,
- 'title2' : '== \a ==' ,
- 'title3' : '=== \a ===' ,
- 'title4' : '==== \a ====' ,
- 'title5' : '===== \a =====' ,
- 'areaPreOpen' : '{{{' ,
- 'areaPreClose' : '}}}' ,
- 'areaQuoteOpen' : ' ' ,
- 'fontMonoOpen' : '{{{' ,
- 'fontMonoClose' : '}}}' ,
- 'fontBoldOpen' : "'''" ,
- 'fontBoldClose' : "'''" ,
- 'fontItalicOpen' : "''" ,
- 'fontItalicClose' : "''" ,
- 'fontBolditalicOpen' : "'''''" ,
- 'fontBolditalicClose': "'''''" ,
- 'fontUnderlineOpen' : "'''''" ,
- 'fontUnderlineClose' : "'''''" ,
- 'listItem' : '* ' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '----' ,
- 'bar2' : '----' ,
- 'url' : '[\a]' ,
- 'urlMark' : '[\a \a]' ,
- 'email' : '[\a]' ,
- 'emailMark' : '[\a \a]' ,
- 'img' : '[\a]' ,
- 'tableLineOpen' : '||' ,
- 'tableCellClose' : '||' ,
- 'tableTitleCellClose': '||' ,
- }
-
- elif doctype == "mgp":
- tags = {
- 'paragraph' : '%font "normal", size 5\n' ,
- 'title1' : '%page\n\n\a' ,
- 'title2' : '%page\n\n\a' ,
- 'title3' : '%page\n\n\a' ,
- 'title4' : '%page\n\n\a' ,
- 'title5' : '%page\n\n\a' ,
- 'areaPreOpen' : '\n%font "mono"' ,
- 'areaPreClose' : '%font "normal"' ,
- 'areaQuoteOpen' : '%prefix " "' ,
- 'areaQuoteClose' : '%prefix " "' ,
- 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
- 'fontMonoClose' : '\n%cont, font "normal"\n' ,
- 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
- 'fontBoldClose' : '\n%cont, font "normal"\n' ,
- 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
- 'fontItalicClose' : '\n%cont, font "normal"\n' ,
- 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n' ,
- 'fontBolditalicClose': '\n%cont, font "normal"\n' ,
- 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
- 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '%bar "white" 5' ,
- 'bar2' : '%pause' ,
- 'url' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
- 'urlMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
- 'email' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
- 'emailMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
- 'img' : '\n%center\n%newimage "\a", left\n',
- 'comment' : '%% \a' ,
- 'EOD' : '%%EOD'
- }
-
- elif doctype == "man":
- tags = {
- 'paragraph' : '.P' ,
- 'title1' : '.SH \a' ,
- 'title2' : '.SS \a' ,
- 'title3' : '.SS \a' ,
- 'title4' : '.SS \a' ,
- 'title5' : '.SS \a' ,
- 'areaPreOpen' : '.nf' ,
- 'areaPreClose' : '.fi\n' ,
- 'areaQuoteOpen' : '\n' ,
- 'areaQuoteClose' : '\n' ,
- 'fontBoldOpen' : '\\fB' ,
- 'fontBoldClose' : '\\fP' ,
- 'fontItalicOpen' : '\\fI' ,
- 'fontItalicClose' : '\\fP' ,
- 'fontBolditalicOpen' : '\n.BI ' ,
- 'fontBolditalicClose': '\n\\&' ,
- 'listOpen' : '\n.nf' , # pre
- 'listClose' : '.fi\n' ,
- 'listItem' : '* ' ,
- 'numlistOpen' : '\n.nf' , # pre
- 'numlistClose' : '.fi\n' ,
- 'numlistItem' : '\a. ' ,
- 'bar1' : '\n\n' ,
- 'bar2' : '\n\n' ,
- 'url' : '\a' ,
- 'urlMark' : '\a (\a)' ,
- 'email' : '\a' ,
- 'emailMark' : '\a (\a)' ,
- 'img' : '\a' ,
- 'comment' : '.\\" \a'
- }
-
- elif doctype == "pm6":
- tags = {
- 'paragraph' : '<@Normal:>' ,
- 'title1' : '\n<@Title1:>\a' ,
- 'title2' : '\n<@Title2:>\a' ,
- 'title3' : '\n<@Title3:>\a' ,
- 'title4' : '\n<@Title4:>\a' ,
- 'title5' : '\n<@Title5:>\a' ,
- 'areaPreOpen' : '<@PreFormat:>' ,
- 'areaQuoteOpen' : '<@Quote:>' ,
- 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
- 'fontMonoClose' : '<SIZE$><FONT$>' ,
- 'fontBoldOpen' : '<B>' ,
- 'fontBoldClose' : '<P>' ,
- 'fontItalicOpen' : '<I>' ,
- 'fontItalicClose' : '<P>' ,
- 'fontBolditalicOpen' : '<B><I>' ,
- 'fontBolditalicClose': '<P>' ,
- 'fontUnderlineOpen' : '<U>' ,
- 'fontUnderlineClose' : '<P>' ,
- 'listOpen' : '<@Bullet:>' ,
- 'listItem' : '\x95 ' , # \x95 == ~U
- 'numlistOpen' : '<@Bullet:>' ,
- 'numlistItem' : '\x95 ' ,
- 'bar1' : '\a' ,
- 'bar2' : '\a' ,
- 'url' : '<U>\a<P>' , # underline
- 'urlMark' : '\a <U>\a<P>' ,
- 'email' : '\a' ,
- 'emailMark' : '\a \a' ,
- 'img' : '\a' ,
- }
-
- # create empty tags keys
- for key in keys:
- if not tags.has_key(key):
- tags[key] = ''
- else:
- # drawback of using re.sub() - double escape some specials
- # see also: 'force_re' marks on the code
- specials = {'1':'ntsrful', '2':'ntsrf'}
- specials = specials[sys.version[0]]
- tags[key] = re.sub(r'(\\[%s])'%specials,r'\\\1',tags[key])
-
- return tags
- def getRules(doctype):
- ret = {}
- allrules = [
-
- # target rules (ON/OFF)
- 'linkable', # target supports external links
- 'tableable', # target supports tables
- 'imgalignable', # target supports image alignment
- 'listcountable', # target supports numbered lists natively
- 'tablecellsplit', # place delimiters only *between* cells
- 'listnotnested', # lists cannot be nested
- 'quotenotnested', # quotes cannot be nested
- 'preareanotescaped', # don't escape specials in PRE area
-
- # target code beautify (ON/OFF)
- 'indentprearea', # add leading spaces to PRE area lines
- 'breaktablecell', # break lines after any table cell
- 'breaktablelineopen', # break line after opening table line
- 'keepquoteindent', # don't remove the leading TABs on quotes
-
- # value settings
- 'listmaxdepth', # maximum depth for lists
- ]
-
- rules = {
- 'txt' : {
- 'indentprearea':1
- },
- 'html': {
- 'indentprearea':1,
- 'linkable':1,
- 'imgalignable':1,
- 'listcountable':1,
- 'tableable':1,
- 'breaktablecell':1,
- 'breaktablelineopen':1,
- 'keepquoteindent':1
- },
- 'sgml': {
- 'linkable':1,
- 'listcountable':1,
- 'tableable':1,
- 'tablecellsplit':1,
- 'quotenotnested':1,
- 'keepquoteindent':1
- },
- 'mgp' : {
- },
- 'tex' : {
- 'listcountable':1,
- 'tableable':1,
- 'tablecellsplit':1,
- 'preareanotescaped':1,
- 'listmaxdepth':4
- },
- 'moin': {
- 'linkable':1,
- 'tableable':1
- },
- 'man' : {
- 'indentprearea':1,
- 'listnotnested':1
- },
- 'pm6' : {
- }
- }
-
-
- # populate return dictionary
- myrules = rules[doctype]
- for key in allrules : ret[key] = 0 # reset all
- for key in myrules.keys(): ret[key] = myrules[key] # turn ON
- return ret
- def getRegexes():
- regex = {
- # extra at end: (\[(?P<label>\w+)\])?
- 'title':
- re.compile(r'^\s*(?P<tag>={1,5})(?P<txt>[^=].*[^=])\1$'),
- 'areaPreOpen':
- re.compile(r'^---$'),
- 'areaPreClose':
- re.compile(r'^---$'),
- 'quote':
- re.compile(r'^\t+'),
- '1linePreOld':
- re.compile(r'^ {4}([^\s-])'),
- '1linePre':
- re.compile(r'^--- '),
- 'fontMono':
- re.compile(r'`([^`]+)`'),
- 'fontBold':
- re.compile(r'\*\*([^\s*].*?)\*\*'),
- 'fontItalic':
- re.compile(r'(^|[^:])//([^ /].*?)//'),
- 'fontUnderline':
- re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
- 'fontBolditalic':
- re.compile(r'\*/([^/].*?)/\*'),
- 'list':
- re.compile(r'^( *)([+-]) ([^ ])'),
- 'deflist':
- re.compile(r'^( *)(=) ([^:]+):'),
- 'bar':
- re.compile(r'^\s*([_=-]{20,})\s*$'),
- 'table':
- re.compile(r'^ *\|\|?[<:>]*\s'),
- 'blankline':
- re.compile(r'^\s*$'),
- 'comment':
- re.compile(r'^(//|%)')
- }
-
- # special char to place data on TAGs contents (\a == bell)
- regex['x'] = re.compile('\a')
-
- # %%date [ (formatting) ]
- regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
-
-
- ### complicated regexes begin here ;)
- #
- # textual descriptions on --help's style: [...] is optional, | is OR
-
-
- ### first, some auxiliar variables
- #
-
- # [image.EXT]
- patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
-
- # link things
- urlskel = {
- 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
- 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
- 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
- 'pass' : r'[^ @]*', # for ftp://login:password@domain.com
- 'chars' : r'A-Za-z0-9%._/~:,=-', # %20(space), :80(port)
- 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
- 'form' : r'A-Za-z0-9/%&=+.@*_-',# .@*_-(as is)
- 'punct' : r'.,;:!?'
- }
-
- # username [ :password ] @
- patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
-
- # [ http:// ] [ username:password@ ] domain.com [ / ] [ #anchor | ?form=data ]
- retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
- urlskel['proto'],patt_url_login, urlskel['guess'],
- urlskel['chars'],urlskel['form'],urlskel['anchor'])
-
- # filename | [ filename ] #anchor
- retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
- urlskel['chars'],urlskel['chars'],urlskel['anchor'])
-
- # user@domain [ ?form=data ]
- patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
- urlskel['login'],urlskel['form'])
-
-
- # saving for future use
- regex['_urlskel'] = urlskel
-
- ### and now the real regexes
- #
-
- regex['email'] = re.compile(patt_email,re.I)
-
- # email | url
- regex['link'] = \
- re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
-
- # \[ label | imagetag url | email | filename \]
- regex['linkmark'] = \
- re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
- patt_img, retxt_url, patt_email, retxt_url_local),
- re.L+re.I)
-
- # image
- regex['img'] = re.compile(patt_img, re.L+re.I)
-
- # all macros
- regex['macro'] = regex['date']
-
- # Settings are still in development stage - DON'T USE THIS
- #regex['setting'] = re.compile(r'^%\s*Document \s*(Encoding|Toc)\s*:\s*(.*)$',re.I)
-
- return regex
- ### END OF regex nightmares
- class SubareaMaster:
- def __init__(self) : self.x = []
- def __call__(self) :
- if not self.x: return ''
- return self.x[-1]
- def add(self, area):
- if not self.x or (self.x and self.x[-1] != area):
- self.x.append(area)
- Debug('subarea ++ (%s): %s' % (area,self.x), 1)
- def pop(self, area=None):
- if area and self.x[-1] == area: self.x.pop()
- Debug('subarea -- (%s): %s' % (area,self.x), 1)
- def doHeader(doctype, headdic):
- if not HEADER_TEMPLATE.has_key(doctype):
- Error("doheader: Unknow doctype '%s'"%doctype)
- Debug('HEADER data: %s'%headdic, 1)
- template = string.split(HEADER_TEMPLATE[doctype], '\n')
-
- # scan for empty dictionary keys
- # if found, scan template lines for that key reference
- # if found, remove the reference
- # if there aren't any other key reference on the same line, remove it
- for key in headdic.keys():
- if not headdic[key]:
- for line in template:
- if string.count(line, key):
- sline = string.replace(
- line, '%%(%s)s'%key, '')
- if not string.count(sline, '%(HEADER'):
- template.remove(line)
- # populate template with data
- template = string.join(template, '\n') % headdic
-
- # post processing
- if doctype == 'tex':
- if FLAGS['toc']:
- template = template + '\n' + r'\newpage\tableofcontents'
- if headdic['HEADER3'] == currdate:
- # let tex format today
- template = re.sub(r'\\date\{.*?}', r'\date', template)
-
- return string.split(template, '\n')
- def doCommentLine(doctype,txt):
- # the -- string ends a sgml comment :(
- if doctype == 'sgml':
- txt = string.replace(txt, '--', '\\-\\-')
-
- if TAGS['comment']:
- return regex['x'].sub(txt, TAGS['comment'])
- return ''
- def doFooter(doctype):
- ret = []
- typename = doctype
- if doctype == 'tex': typename = 'LaTeX2e'
- ppgd = '%s code generated by txt2tags %s (%s)'%(
- typename,my_version,my_url)
- cmdline = 'cmdline: txt2tags %s'%string.join(CMDLINE[1:], ' ')
- ret.append('\n'+doCommentLine(doctype,ppgd))
- ret.append(doCommentLine(doctype,cmdline))
- ret.append(TAGS['EOD'])
- return ret
- def doEscape(doctype,txt):
- if doctype == 'html' or doctype == 'sgml':
- txt = re.sub('&','&',txt)
- txt = re.sub('<','<',txt)
- txt = re.sub('>','>',txt)
- if doctype == 'sgml':
- txt = re.sub('\xff','ÿ',txt) # "+y
- elif doctype == 'pm6':
- txt = re.sub('<','<\#60>',txt)
- elif doctype == 'mgp':
- txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
- #txt = re.sub('^%([^%])','%prefix ""\n %\n%cont, prefix " "\n\\1',txt)
- elif doctype == 'man':
- txt = re.sub('^\.', ' .',txt) # command ID
- txt = doEscapeEscapechar(txt)
- elif doctype == 'tex':
- txt = string.replace(txt, '\\', r'\verb!\!')
- txt = string.replace(txt, '~', r'\verb!~!')
- txt = string.replace(txt, '^', r'\verb!^!')
- txt = re.sub('([#$&%{}])', r'\\\1', txt)
- # TIP the _ is escaped at end
- return txt
- def doFinalEscape(doctype, txt):
- if doctype == 'pm6' : txt = string.replace(txt, r'\<',r'<\#92><')
- elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
- elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
- elif doctype == 'sgml': txt = string.replace(txt, '[', '[')
- return txt
- def doEscapeEscapechar(txt):
- return string.replace(txt, '\\', '\\\\')
- def addLineBreaks(list):
- "use LB to respect sys.platform"
- ret = []
- for line in list:
- line = string.replace(line,'\n',LB) # embedded \n's
- ret.append(line+LB) # add final line break
- return ret
- def doPreLine(doctype,line):
- "Parsing procedures for preformatted (verbatim) lines"
- if not rules['preareanotescaped']: line = doEscape(doctype,line)
- if rules['indentprearea']: line = ' '+line
- if doctype == 'pm6': line = doFinalEscape(doctype, line)
- return line
- def doCloseTable(doctype):
- global subarea, tableborder
- ret = ''
- if rules['tableable']:
- if doctype == 'tex' and tableborder:
- ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
- else:
- ret = TAGS['tableClose']+'\n'
- else:
- ret = TAGS['areaPreClose']
- tableborder = 0
- subarea.pop('table')
- return ret
- def doCloseQuote(howmany=None):
- global quotedepth
- ret = []
- if not howmany: howmany = len(quotedepth)
- for i in range(howmany):
- quotedepth.pop()
- #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
- ret.append(TAGS['areaQuoteClose'])
-
- if not quotedepth: subarea.pop('quote')
- return string.join(ret,'\n')
- def doCloseList(howmany=None):
- global listindent, listids
- ret = []
- if not howmany: howmany = len(listindent)
- for i in range(howmany):
- if listids[-1] == '-': tag = TAGS['listClose']
- elif listids[-1] == '+': tag = TAGS['numlistClose']
- elif listids[-1] == '=': tag = TAGS['deflistClose']
- if not tag: tag = TAGS['listClose'] # default
- if tag:
- # unnested lists are only closed at mother-list
- if rules['listnotnested']:
- if len(listindent) == 1:
- ret.append(tag)
- else:
- ret.append(listindent[-1]+tag)
- del listindent[-1]
- del listids[-1]
-
- if not listindent: subarea.pop('list')
- return string.join(ret,'\n')
- def beautify_me(name, doctype, line):
- "where name is: bold, italic, underline or bolditalic"
- name = 'font%s' % string.capitalize(name)
- open = TAGS['%sOpen'%name]
- close = TAGS['%sClose'%name]
- txt = r'%s\1%s'%(open, close)
- if name == 'fontItalic':
- txt = r'\1%s\2%s'%(open, close)
- line = regex[name].sub(txt,line)
- return line
- def get_tagged_link(doctype, label, url):
- ret = ''
-
- # set link type
- if regex['email'].match(url):
- linktype = 'email'
- else:
- linktype = 'url';
-
- # adding protocol to guessed link
- guessurl = ''
- if linktype == 'url' and \
- re.match(regex['_urlskel']['guess'], url):
- if url[0] == 'w': guessurl = 'http://' +url
- else : guessurl = 'ftp://' +url
-
- # not link aware targets -> protocol is useless
- if not rules['linkable']: guessurl = ''
-
- # escape specials from TEXT parts
- label = doEscape(doctype,label)
- if not rules['linkable']:
- if doctype == 'tex':
- url = re.sub('^#', '\#', url) # ugly, but compile
- else:
- url = doEscape(doctype,url)
-
- # simple link (not guessed)
- if not label and not guessurl:
- if FLAGS['maskemail'] and linktype == 'email':
- # do the email mask feature (no TAGs, just text)
- url = string.replace(url,'@',' (a) ')
- url = string.replace(url,'.',' ')
- url = "<%s>" % url
- if rules['linkable']: url = doEscape(url)
- ret = url
- else:
- # just add link data to tag
- tag = re.sub('.*', TAGS[linktype], '') #force_re
- ret = regex['x'].sub(url,tag)
-
- # named link or guessed simple link
- else:
- # adjusts for guessed link
- if not label: label = url # no protocol
- if guessurl : url = guessurl # with protocol
-
- # handle \ on link label
- label = doEscapeEscapechar(label)
-
- # putting data on the right appearance order
- if rules['linkable']:
- urlorder = [url, label] # link before label
- else:
- urlorder = [label, url] # label before link
-
- # get tag
- ret = re.sub('.*', TAGS["%sMark"%linktype], '') #force_re
-
- # add link data to tag (replace \a's)
- for data in urlorder:
- ret = regex['x'].sub(data,ret,1)
- return ret
- def get_image_align(line):
- align = ''
- line = string.strip(line)
- m = regex['img'].search(line)
- ini = m.start() ; head = 0
- end = m.end() ; tail = len(line)
-
- align = 'center' # default align # ^text +img +text$
- if ini == head and end == tail: align = 'para' # ^img$
- elif ini == head: align = 'left' # ^img + text$
- elif end == tail: align = 'right' # ^text + img$
-
- return align
- def get_table_prop(line):
- # default table proprierties
- ret = {'border': 0, 'type': '|', 'header':0, 'cells':[]}
- # strip and del leading table mark |
- line = string.strip(line)
- line = line[1:]
- # detect (and delete) header mark
- if line[0] == '|':
- ret['header'] = 1
- line = line[1:]
- # detect (and delete) table ID (pipe-made is default)
- if line[0] == '\t':
- ret['type'] = '\t'
- ret['border'] = 1
- line = line[1:]
- # detect (and delete) border mark
- if line[-1] == '|':
- ret['border'] = 1
- line = line[:-1]
- # split cells
- # TODO v1.3: take \t\|?| out
- ret['cells'] = re.split(r'\t\|?| \| ', line)
-
- Debug('Table Prop: %s' % ret, 1)
- return ret
- #TODO if ' | ' table cell is center align
- def tag_table_cells(table, doctype):
- ret = ''
- # plain cell
- open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
- # title cell
- if table['header']:
- open = TAGS['tableTitleCellOpen']
- close = TAGS['tableTitleCellClose']
- # should we break the line?
- if rules['breaktablecell']: close = close+'\n'
- # here we go
- while table['cells']:
- cel = table['cells'].pop(0)
- if not cel and doctype == 'html':
- cel = ' '
- # last cell gotchas
- if not table['cells']:
- # don't need cell separator
- if rules['tablecellsplit']: close = ''
- # close beautifier for last title cell
- if doctype == 'tex' and table['header']: close = '}'
- newcell = open + string.strip(cel) + close
- newcell = re.sub('.*', newcell, '') #force_re
- ret = ret + newcell
- return ret
- ################################################################################
- ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
- ################################################################################
- def convert(inlines, doctype):
- # global vars for doClose*()
- global TAGS, regex, rules, quotedepth, listindent, listids
- global subarea, tableborder
- global has_obsolete, has_fixed
-
- TAGS = getTags(doctype)
- rules = getRules(doctype)
- regex = getRegexes()
-
- # the defaults
- linkmask = '@@_link_@@'
- monomask = '@@_mono_@@'
- macromask = '@@_macro_@@'
-
- AREA = NewArea('head',0) # then conf, then body
- subarea = SubareaMaster()
- HEADERS = { 'HEADER1': '-NO TITLE-', 'HEADER2':'', 'HEADER3':'' }
- # SETTINGS = {}
- ret = []
- fixedinfile = []
- toclist = []
- header = []
- f_tt = 0
- listindent = []
- listids = []
- listcount = []
- titlecount = ['',0,0,0,0,0]
- f_lastblank = 0
- holdspace = ''
- listholdspace = ''
- quotedepth = []
- tableborder = 0
- tablealign = []
- has_obsolete = has_fixed = 0
-
- if outfile != pipefileid:
- if not FLAGS['gui'] and not FLAGS['fixme']:
- print "--- %s..."%doctype
-
- # let's mark it up!
- linenr = 0
- for lineref in range(len(inlines)):
- skip_continue = 0
- linkbank = []
- monobank = []
- macrobank = []
- linenr = lineref +1
- untouchedline = inlines[lineref]
- line = string.rstrip(untouchedline)
- # save line to the 'fixed' buffer
- fixedinfile.append(untouchedline)
-
- Debug('LINE %04d: %s' % (linenr,repr(line)), 1) # for heavy debug
-
- # detect if head section is over
- #TIP 'not line' depends on previous line.rstrip()
- if (linenr == 4 and AREA == 'head') or \
- (linenr == 1 and not line):
- AREA = NewArea('conf',linenr)
- if not FLAGS['noheaders']:
- header = doHeader(doctype,HEADERS)
-
- # we need (not really) to mark each paragraph
- #TODO check if this is really needed
- if doctype == 'pm6' and f_lastblank:
- if f_tt or AREA == 'head' or listindent:
- holdspace = ''
- else:
- holdspace = TAGS['paragraph']+'\n'
-
- # any NOT table line, closes an open table
- if subarea() == 'table' and not regex['table'].search(line):
- ret.append(doCloseTable(doctype))
-
-
- #---------------------[ PRE formatted ]----------------------
-
- #TIP we'll never support beautifiers inside pre-formatted
-
- # we're already on a PRE area
- if f_tt:
- # closing PRE
- if regex['areaPreClose'].search(line):
- if doctype != 'pm6':
- ret.append(TAGS['areaPreClose'])
- f_tt = 0
- continue
-
- # normal PRE-inside line
- line = doPreLine(doctype, line)
- ret.append(line)
- continue
-
- # detecting PRE area init
- if regex['areaPreOpen'].search(line):
- ret.append(TAGS['areaPreOpen'])
- f_lastblank = 0
- f_tt = 1
- continue
-
- # one line PRE-formatted text
- if regex['1linePre'].search(line):
- f_lastblank = 0
- line = regex['1linePre'].sub('',line)
- line = doPreLine(doctype, line)
- t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
- ret.append('%s\n%s\n%s'%(t1,line,t2))
- continue
-
- #---------------------[ blank lines ]-----------------------
-
- #TODO "holdspace" to save <p> to not show in closelist
- if regex['blankline'].search(line):
-
- # closing all open quotes
- if quotedepth:
- ret.append(doCloseQuote())
-
- # closing all open lists
- if f_lastblank: # 2nd consecutive blank line
- if listindent: # closes list (if any)
- ret.append(doCloseList())
- holdspace = ''
- continue # consecutive blanks are trash
-
- # normal blank line
- if doctype != 'pm6' and AREA == 'body':
- # paragraph (if any) is wanted inside lists also
- if listindent:
- para = TAGS['paragraph'] + '\n'
- holdspace = holdspace + para
- elif doctype == 'html':
- ret.append(TAGS['paragraph'])
- # sgml: quote close tag must not be \n\n</quote>
- elif doctype == 'sgml' and quotedepth:
- skip_continue = 1
- # otherwise we just print a blank line
- else:
- ret.append('')
-
- f_lastblank = 1
- if not skip_continue: continue
- else:
- f_lastblank = 0 # reset blank status
-
-
- #---------------------[ comments ]-----------------------
-
- # just skip them (if not macro or setting)
- if regex['comment'].search(line) and not regex['date'].match(line):
-
- ### Still in development stage
- # # detect settings
- # if regex['setting'].search(line):
- # if AREA == 'conf':
- # m = regex['setting'].search(line)
- # name, val = m.group(1), m.group(2)
- # SETTINGS[string.lower(name)] = string.strip(val)
- # Debug("Found Setting '%s', value '%s'"%(name,val),1,linenr)
- # continue
-
- # obsoleted comment format
- if line[0] == '/':
- # fixes it
- if FLAGS['fixme']:
- # discard original line, save new
- fixedinfile.pop()
- fixedinfile.append(re.sub('^//','%',untouchedline))
- Fixed(linenr, '// comment line', '% comment line')
- # just show a warning message
- else:
- old = '// as the comment line string'
- new = '% as the new comment line char'
- Obsoleted(linenr, old, new, '1.3')
-
- f_lastblank = 1
- continue
-
- #---------------------[ BODY detect ]-----------------------
-
- ### if got here, its a header or a valid line
- if AREA == 'conf':
- # oops, not header, so we're now on document BODY
- AREA = NewArea('body', linenr)
- # so, let's print the opening paragraph
- if doctype != 'pm6':
- ret.append(TAGS['paragraph'])
-
-
- #---------------------[ Title ]-----------------------
-
- # man: - should not be escaped, \ turns to \\\\
-
- #TODO set next blank and set f_lastblank or f_lasttitle
- if regex['title'].search(line) and not listindent and AREA == 'body':
- line = doEscape(doctype,line)
-
- # double escape escape char
- if doctype == 'man':
- line = doEscapeEscapechar(line)
-
- m = regex['title'].search(line)
- tag = m.group('tag')
- level = len(tag)
- tag = TAGS['title%s'%level]
-
- txt = string.strip(m.group('txt'))
- if doctype == 'sgml':
- txt = re.sub(r'\[', r'[', txt)
-
- if FLAGS['enumtitle']: ### numbered title
- id = '' ; n = level #
- titlecount[n] = titlecount[n] +1 # add count
- if n < len(titlecount)-1: # reset sublevels count
- for i in range(n+1, len(titlecount)): titlecount[i] = 0
- for i in range(n): # compose id from hierarchy
- id = "%s%d."%(id,titlecount[i+1])
- idtxt = "%s %s"%(id, txt) # add id to title
- else:
- idtxt = txt
-
- anchorid = 'toc%d'%(len(toclist)+1)
- if TAGS['anchor'] and FLAGS['toc']:
- ret.append(regex['x'].sub(anchorid,TAGS['anchor']))
-
- # escape to handle \ on title
- idtxt = doEscapeEscapechar(idtxt)
-
- line = regex['title'].sub(tag,line)
- ret.append(regex['x'].sub(idtxt,line))
-
- # let's do some TOC!
- if TAGS['anchor']:
- # tocitemid = '#toc%d'%(len(toclist)+1)
- # TOC more readable with master topics not linked at number
- # stoled idea from windows .CHM files (help system)
- if FLAGS['enumtitle'] and level == 1:
- tocitem = '%s+ [%s #%s]'%(' '*level,txt,anchorid)
- else:
- tocitem = '%s- [%s #%s]'%(' '*level,idtxt,anchorid)
- else:
- tocitem = '%s- %s'%(' '*level,idtxt)
- if doctype in ['txt', 'man']:
- tocitem = '%s%s' %(' '*level,idtxt)
- if level <= 3: toclist.append(tocitem) # max toc level: 3
-
- # add "underline" to text titles
- if doctype == 'txt':
- ret.append(regex['x'].sub('='*len(idtxt),tag))
-
- continue
-
- #TODO! labeltxt = ''
- # label = m.group('label')
- # if label: labeltxt = '<label id="%s">' %label
-
-
- #---------------------[ apply masks ]-----------------------
-
- ### protect important structures from escaping and formatting
- # protect pre-formatted font text
- while regex['fontMono'].search(line):
- t…
Large files files are truncated, but you can click here to view the full file