PageRenderTime 103ms CodeModel.GetById 23ms RepoModel.GetById 3ms app.codeStats 0ms

/old/txt2tags-1.1.py

http://txt2tags.googlecode.com/
Python | 1565 lines | 1553 code | 5 blank | 7 comment | 7 complexity | 715bf9eef125e8037e3bf1c545647a8e MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net - aurelio
  4. # please, don't look at this code, it's ugly!
  5. # i'll try to make it better on the next releases
  6. import re, string, os, sys, getopt, traceback
  7. from time import strftime,time,localtime
  8. my_url = 'http://txt2tags.sf.net'
  9. my_email = 'aurelio@verde666.org'
  10. my_version = '1.1'
  11. DEBUG = 0 # do not edit here, please use --debug
  12. tags = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man']
  13. #tex will be 100% on v1.2
  14. #tags = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
  15. FLAGS = {'noheaders':0,'enumtitle':0,'maskemail':0, 'stdout':0,
  16. 'toconly' :0,'toc' :0,'gui' :0, 'fixme' :0}
  17. T = CMDLINE = ''
  18. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  19. splitlevel = '' ; lang = 'english'
  20. doctype = outfile = ''
  21. pipefileid = '-'
  22. has_obsolete = has_fixed = 0
  23. # global vars for doClose*()
  24. quotedepth = []
  25. listindent = []
  26. listids = []
  27. istable = istableaware = tableborder = pendingtableopen = 0
  28. versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
  29. usage = """
  30. %s
  31. usage: txt2tags -t <type> [OPTIONS] file.t2t
  32. txt2tags -t html -s <split level> -l <lang> file.t2t
  33. -t, --type target document type. actually supported:
  34. %s
  35. --stdout by default, the output is written to file.<type>
  36. with this option, STDOUT is used (no files written)
  37. --noheaders suppress header, title and footer information
  38. --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
  39. --maskemail hide email from spam robots. x@y.z turns to <x (a) y z>
  40. --toc add TOC (Table of Contents) to target document
  41. --toconly print document TOC and exit
  42. --gui invoke Graphical Tk Interface
  43. --fixme temporary option to fix obsoleted structures
  44. -h, --help print this help information and exit
  45. -V, --version print program version and exit
  46. extra options for HTML target (needs sgml-tools):
  47. --split split documents. values: 0, 1, 2 (default 0)
  48. --lang document language (default english)
  49. """%(versionstr, re.sub(r"[]'[]",'',repr(tags)))
  50. # here is all the target's templates
  51. # you may edit them to fit your needs
  52. # - the %(HEADERn)s strings represent the Header lines
  53. # - use %% to represent a literal %
  54. #
  55. HEADER_TEMPLATE = {
  56. 'txt': """\
  57. %(HEADER1)s
  58. %(HEADER2)s
  59. %(HEADER3)s
  60. """,
  61. 'sgml': """\
  62. <!doctype linuxdoc system>
  63. <article>
  64. <title>%(HEADER1)s
  65. <author>%(HEADER2)s
  66. <date>%(HEADER3)s
  67. """,
  68. #TODO (peter valach) <meta http-equiv="Content-Type"
  69. # content="text/html; charset=iso-8859-2">
  70. 'html': """\
  71. <HTML>
  72. <HEAD><TITLE>%(HEADER1)s</TITLE></HEAD>
  73. <BODY BGCOLOR="white" TEXT="black">
  74. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  75. <FONT SIZE=4>
  76. <I>%(HEADER2)s</I><BR>
  77. %(HEADER3)s
  78. </FONT></CENTER>
  79. """,
  80. # TODO man section 1 is hardcoded...
  81. 'man': """\
  82. .TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
  83. """,
  84. # TODO style to <HR>
  85. 'pm6': """\
  86. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  87. ><@Normal=
  88. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  89. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  90. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  91. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  92. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  93. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  94. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  95. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  96. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  97. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  98. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  99. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  100. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  101. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  102. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  103. ><@Title4=<@-PARENT "Title3">
  104. ><@Title5=<@-PARENT "Title3">
  105. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  106. %(HEADER1)s
  107. %(HEADER2)s
  108. %(HEADER3)s
  109. """,
  110. #TODO escape lines beginning with %% after all formatting
  111. 'mgp': """\
  112. #!/usr/X11R6/bin/mgp -t 90
  113. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  114. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  115. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  116. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  117. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  118. %%default 1 size 5
  119. %%default 2 size 8, fore "yellow", font "normal-b", center
  120. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  121. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  122. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  123. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  124. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  125. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  126. %%%%------------------------- end of headers -----------------------------
  127. %%page
  128. %%size 10, center, fore "yellow"
  129. %(HEADER1)s
  130. %%font "normal-i", size 6, fore "white", center
  131. %(HEADER2)s
  132. %%font "mono", size 7, center
  133. %(HEADER3)s
  134. """,
  135. # TODO please, improve me!
  136. 'moin': """\
  137. %(HEADER1)s
  138. %(HEADER2)s
  139. %(HEADER3)s
  140. """,
  141. #TODO how to make acrobat left-side pages count?
  142. # \includepackage{graphics}
  143. # --lang matters! \usepackage[brazil]{babel}
  144. # local links seems to be \label, \ref, \pageref
  145. 'tex': r"""
  146. \documentclass[11pt,a4paper]{article}
  147. \usepackage{amsfonts,amssymb,graphicx,url}
  148. \usepackage[latin1]{inputenc} %% for accented chars
  149. \pagestyle{empty} %% no page numbering and header/footer
  150. \frenchspacing %% no aditional spaces after periods
  151. \newcommand\email{\begingroup \urlstyle{tt}\Url} %% for email
  152. \title{%(HEADER1)s}
  153. \author{%(HEADER2)s}
  154. \begin{document}
  155. \date{%(HEADER3)s}
  156. \maketitle
  157. """
  158. }
  159. #-----------------------------------------------------------------------
  160. def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
  161. def Error(msg): print "ERROR: %s"%msg ; sys.exit()
  162. def Debug(msg,i=0,linenr=None):
  163. if i > DEBUG: return
  164. if linenr is not None:
  165. print "(%d) %04d:%s"%(i,linenr,msg)
  166. else:
  167. print "(%d) %s"%(i,msg)
  168. def Readfile(file):
  169. if file == '-':
  170. try: data = sys.stdin.readlines()
  171. except: Error('You must feed me with data on STDIN!')
  172. else:
  173. try: f = open(file); data = f.readlines() ; f.close()
  174. except: Error("Cannot read file:\n %s"%file)
  175. return data
  176. def Savefile(file, contents):
  177. try: f = open(file, 'w')
  178. except: Error("Cannot open file for writing:\n %s"%file)
  179. if type(contents) == type([]): doit = f.writelines
  180. else: doit = f.write
  181. doit(contents) ; f.close()
  182. def NewArea(new, linenr):
  183. if new not in ['head', 'conf', 'body']:
  184. Error("Invalid new AREA '%s' on line '%s'"%(new,linenr))
  185. Debug('NEW AREA: %s'%new, 1, linenr)
  186. return new
  187. def Obsoleted(n, old, new, ver):
  188. global has_obsolete
  189. has_obsolete = 1
  190. print 'OBSOLETE WARNING: line %04d'%n
  191. print ' The %s is now obsoleted and will be removed'%old
  192. print ' on txt2tags version %s. Please use %s instead.'%(ver,new)
  193. def Fixed(n, old, new):
  194. global has_fixed
  195. has_fixed = 1
  196. print 'FIXED: line %04d: %15s ---> %s'%(n,old,new)
  197. def FixTable(tabline):
  198. new = string.replace(tabline,'\t',' ',1) # del 1st TAB
  199. new = string.replace(new,'\t',' | ') # change TABs by spaced pipes
  200. new = re.sub('(.*[^\n\r])', '\\1 |', new) # add last pipe (for border)
  201. return new
  202. def ObsoletedInstructions(infile):
  203. print """\n
  204. ATTENTION:
  205. Some obsoleted txt2tags structures were found on your source document.
  206. Please correct them by hand, or just run:
  207. txt2tags --fixme %s
  208. To update (rewrite) this file automatically.
  209. Please don't ignore this message.
  210. On next releases of txt2tags, these old structures will not be valid.
  211. \n"""%infile
  212. def reset_flags():
  213. global FLAGS
  214. for flag in FLAGS.keys(): FLAGS[flag] = 0
  215. def set_outfile_name(infile, doctype):
  216. "dirname is the same for {in,out}file"
  217. if not infile: return
  218. if infile == pipefileid or FLAGS['toconly'] or FLAGS['stdout']:
  219. outfile = pipefileid
  220. else:
  221. outfile = "%s.%s"%(re.sub('\.(txt|t2t)$','',infile), doctype)
  222. Debug(" infile: '%s'"% infile, 1)
  223. Debug("outfile: '%s'"%outfile, 1)
  224. return outfile
  225. def finish_him(outlist, outfile):
  226. "writing output to screen or file"
  227. if outfile == pipefileid:
  228. for line in outlist: print line
  229. else:
  230. f = open(outfile, 'w'); f.writelines(addLineBreaks(outlist)); f.close()
  231. if not FLAGS['gui']: print 'wrote %s'%(outfile)
  232. if splitlevel:
  233. print "--- html..."
  234. os.system('sgml2html --language=%s --split=%s %s'%(
  235. lang,splitlevel,outfile))
  236. def ParseCmdline(cmdline=sys.argv):
  237. "return a dic with all options:value found"
  238. global CMDLINE ; CMDLINE = cmdline # save for dofooter()
  239. Debug("cmdline: %s"%cmdline, 1)
  240. options = {'infile': ''}
  241. # get cmdline options
  242. longopt = ['help', 'version', 'type=', 'split=', 'lang=']+FLAGS.keys()
  243. try: (opt, args) = getopt.getopt(cmdline[1:], 'hVt:', longopt)
  244. except getopt.GetoptError:
  245. Error('Bad option or missing argument (try --help)')
  246. # get infile, if any
  247. if args: options['infile'] = args[0]
  248. for name,val in opt:
  249. # parse information options
  250. if name in ['-h','--help' ]: Quit(usage)
  251. elif name in ['-V','--version']: Quit(versionstr)
  252. # parse short/long options
  253. elif name in ['-t','--type']: options['doctype'] = val ; continue
  254. # just long options
  255. options[name[2:]] = val # del --
  256. Debug("cmdline options: %s"%options, 1)
  257. return options
  258. def ParseCmdlineOptions(optdic):
  259. "set vars and flags according to options dic"
  260. global FLAGS, splitlevel, lang
  261. # store flags and vars
  262. myflags = [] # for debug msg
  263. for flag in FLAGS.keys():
  264. if optdic.has_key(flag):
  265. FLAGS[flag] = 1
  266. myflags.append(flag)
  267. doctype = optdic.get('doctype')
  268. infile = optdic.get('infile')
  269. splitlevel = optdic.get('split')
  270. lang = optdic.get('lang')
  271. Debug("cmdline flags: %s"%string.join(myflags,', '), 1)
  272. if FLAGS['fixme']:
  273. if not infile: Quit(usage, 1)
  274. doctype = 'moin' # bogus, not used at all
  275. if not doctype and FLAGS['toconly']: doctype = 'txt' # toconly default type
  276. if not infile or not doctype: Quit(usage, 1) # no filename/doctype
  277. # sanity check: validate target type
  278. if not tags.count(doctype):
  279. Error("Invalid document type '%s' (try --help)"%(doctype))
  280. outfile = set_outfile_name(infile, doctype)
  281. # sanity check: validate split level
  282. if doctype != 'html': splitlevel = '' # only valid for HTML target
  283. if splitlevel:
  284. # checkings
  285. if outfile == pipefileid:
  286. Error('You need to provide a FILE (not STDIN) when using --split')
  287. if splitlevel[0] not in '012':
  288. Error('Option --split must be 0, 1 or 2')
  289. # check for sgml-tools
  290. #TODO how to test (in a clever way) if an executable is in path?
  291. #TODO os.system() return code? sgml2html without --help exit 0 also?
  292. #TODO bah! implement sgml2html split natively and we're done
  293. # Error("Sorry, you must have 'sgml2html' program to use --split")
  294. # set things
  295. FLAGS['stdout'] = 0 # no --stdout
  296. doctype = 'sgml' # 1st do a sgml, then sgml2html
  297. outfile = set_outfile_name(infile, doctype)
  298. # sanity check: source loss!
  299. if infile != pipefileid and infile == outfile:
  300. Error("SUICIDE WARNING!!! (try --stdout)\n source"+\
  301. " and target files has the same name: %s"%outfile)
  302. ### yes, i've got my sample.t2t file deleted before add this test... :/
  303. return infile,outfile,doctype
  304. #TODO splitlevel, lang
  305. #---End of ParseCmdlineOptions
  306. def toc_master(doctype, header, doc, toc):
  307. "decide to include TOC or not on the outlist"
  308. # deal with the TOC options
  309. if FLAGS['toc'] or FLAGS['toconly']:
  310. # format TOC lines
  311. ### here we do toc as a valid t2t marked text (list type)
  312. FLAGS['noheaders'] = 1
  313. x,y,toc = doitall(['']+toc+['',''], doctype)
  314. # TOC between bars (not for --toconly)
  315. if FLAGS['toc']:
  316. para = TAGparagraph[T]
  317. tocbar = [para, re_x.sub('-'*72,TAGbar1[T]), para]
  318. toc = tocbar + toc + tocbar
  319. if FLAGS['toconly']: header = doc = []
  320. else:
  321. toc = []
  322. # on tex, \tableofcontents do it all - see doHeader()
  323. if doctype == 'tex' and not FLAGS['toconly']:
  324. toc = []
  325. return header + toc + doc
  326. # check if we will enter on GUI mode
  327. if len(sys.argv) == 2 and sys.argv[1] == '--gui':
  328. FLAGS['gui'] = 1
  329. if len(sys.argv) == 1 and sys.platform[:3] in ['mac','cyg','win']:
  330. FLAGS['gui'] = 1
  331. # check for GUI mode ressorces
  332. if FLAGS['gui'] == 1:
  333. try:
  334. from tkFileDialog import askopenfilename
  335. from tkMessageBox import showinfo, showwarning, showerror
  336. import Tkinter
  337. except:
  338. # if GUI was forced, show the error message
  339. if len(sys.argv) > 1 and sys.argv[1] == '--gui':
  340. traceback.print_exc()
  341. sys.exit()
  342. # or just abandon GUI mode, and continue
  343. else:
  344. FLAGS['gui'] = 0
  345. # set the Line Break across platforms
  346. LB = '\n' # default
  347. if sys.platform[:3] == 'win': LB = '\r\n'
  348. #elif sys.platform[:3] == 'cyg': LB = '\r\n' # not sure if it's best :(
  349. elif sys.platform[:3] == 'mac': LB = '\r'
  350. ### all the registered tags
  351. TAGparagraph = ['', '<p>', '<P>', '<@Normal:>', '%font "normal", size 5\n', '', '.P', '']
  352. TAGtitle1 = [' \a' , '<sect>\a<p>' , '<H1>\a</H1>', '\n<@Title1:>\a', '%page\n\n\a', '= \a =', '.SH \a', r'\n\\newpage\section{\a}']
  353. # TODO tex: '\section*' is unumbered, but not added to TOC also
  354. TAGtitle2 = ['\t\a' , '<sect1>\a<p>', '<H2>\a</H2>', '\n<@Title2:>\a', '%page\n\n\a', '== \a ==', '.SS \a', r'\subsection{\a}']
  355. TAGtitle3 = ['\t\t\a' , '<sect2>\a<p>', '<H3>\a</H3>', '\n<@Title3:>\a', '%page\n\n\a', '=== \a ===', '.SS \a', r'\subsubsection{\a}']
  356. TAGtitle4 = ['\t\t\t\a' , '<sect3>\a<p>', '<H4>\a</H4>', '\n<@Title4:>\a', '%page\n\n\a', '==== \a ====', '.SS \a', r'\paragraph{\a}\\textbf{}\\\\\n'] #tex: DIRTY: para+emptyBF+\\+\n
  357. TAGtitle5 = ['\t\t\t\t\a', '<sect4>\a<p>', '<H5>\a</H5>', '\n<@Title5:>\a', '%page\n\n\a', '===== \a =====', '.SS \a', TAGtitle4[7]]
  358. TAGareaPreOpen = ['', '<tscreen><verb>', '<PRE>', '<@PreFormat:>', '\n%font "mono"', '{{{', '.nf', r'\begin{verbatim}']
  359. TAGareaPreClose = ['', '</verb></tscreen>', '</PRE>', '', '%font "normal"', '}}}', '.fi\n', r'\end{verbatim}']
  360. TAGareaQuoteOpen = [' ', '<quote>', '<BLOCKQUOTE>', '<@Quote:>', '%prefix " "', ' ', '\n', r'\begin{quotation}']
  361. TAGareaQuoteClose = ['', '</quote>', '</BLOCKQUOTE>', '', '%prefix " "', '', '\n', r'\end{quotation}']
  362. TAGfontMonoOpen = ['', '<tt>', '<CODE>', '<FONT "Lucida Console"><SIZE 9>', '\n%cont, font "mono"\n', '{{{', '', r'\texttt{']
  363. TAGfontMonoClose = ['', '</tt>', '</CODE>', '<SIZE$><FONT$>', '\n%cont, font "normal"\n', '}}}', '', '}']
  364. TAGfontBoldOpen = ['', '<bf>', '<B>', '<B>', '\n%cont, font "normal-b"\n', "'''", r'\\fB', r'\textbf{']
  365. TAGfontBoldClose = ['', '</bf>', '</B>', '<P>', '\n%cont, font "normal"\n', "'''", r'\\fP', '}']
  366. TAGfontItalicOpen = ['', '<em>', '<I>', '<I>', '\n%cont, font "normal-i"\n', "''", r'\\fI', r'\textit{']
  367. TAGfontItalicClose = ['', '</em>', '</I>', '<P>', '\n%cont, font "normal"\n', "''", r'\\fP', '}']
  368. TAGfontBoldItalicOpen = ['', '<bf><em>', '<B><I>', '<B><I>', '\n%cont, font "normal-bi"\n', "'''''", '\n.BI ', r'\textbf{\textit{']
  369. TAGfontBoldItalicClose = ['', '</em></bf>', '</I></B>', '<P>', '\n%cont, font "normal"\n', "'''''", '\n\\&', '}}']
  370. TAGfontUnderlineOpen = ['', TAGfontBoldItalicOpen[1], '<U>', '<U>', '\n%cont, fore "cyan"\n', TAGfontBoldItalicOpen[5], '', r'\underline{']
  371. TAGfontUnderlineClose = ['', TAGfontBoldItalicClose[1], '</U>', '<P>', '\n%cont, fore "white"\n', TAGfontBoldItalicClose[5], '', '}']
  372. TAGlistOpen = ['', '<itemize>', '<UL>', '<@Bullet:>', '', '', '\n'+TAGareaPreOpen[6], r'\begin{itemize}']
  373. TAGlistClose = ['', '</itemize>', '</UL>', '', '', '', TAGareaPreClose[6], r'\end{itemize}']
  374. TAGlistItem = ['- ', '<item>', '<LI>', '\x95 ', '', '* ', '* ', r'\item '] # \x95 == ~U
  375. TAGnumlistOpen = ['', '<enum>', '<OL>', '<@Bullet:>', '', '', '\n'+TAGareaPreOpen[6], r'\begin{enumerate}']
  376. TAGnumlistClose = ['', '</enum>', '</OL>', '', '', '', TAGareaPreClose[6], r'\end{enumerate}']
  377. TAGnumlistItem = ['\a. ', '<item>', '<LI>', '~U ', '\a. ', '\a. ', '\a. ', r'\item ']
  378. TAGdeflistOpen = ['', '', '<DL>' , '', '', '', '', r'\begin{description}']
  379. TAGdeflistItem1 = ['', '', '<DT>\a</DT>', '', '', '', '', '\\item[\a]']
  380. TAGdeflistItem2 = ['', '', '<DD>' , '', '', '', '', '']
  381. TAGdeflistClose = ['', '', '</DL>' , '', '', '', '', r'\end{description}']
  382. TAGbar1 = ['\a', '<!-- \a -->', '<HR NOSHADE SIZE=1>', '\a', '%bar "white" 5', '----', '\n\n', '\n\\hrulefill{}\n']
  383. TAGbar2 = ['\a', '<!-- \a -->', '<HR NOSHADE SIZE=5>', '\a', '%pause', '----', '\n\n', '\n\\hrulefill{}\n']
  384. TAGurl = ['\a', '<htmlurl url="\a" name="\a">', '<A HREF="\a">\a</A>', TAGfontUnderlineOpen[3]+'\a'+TAGfontUnderlineClose[3], '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n', '[\a]', '\a', '\\url{\a}']
  385. TAGurlMark = ['\a (\a)', TAGurl[1], TAGurl[2], '\a '+TAGurl[3], '\a '+TAGurl[4], '[\a \a]', '\a (\a)', '\\textit{\a} (\\url{\a})']
  386. TAGemail = ['\a', '<htmlurl url="mailto:\a" name="\a">', '<A HREF="mailto:\a">\a</A>', '\a', TAGurl[4], '[\a]', '\a', '\\email{\a}']
  387. TAGemailMark = ['\a (\a)', TAGemail[1], TAGemail[2], '\a '+TAGemail[3], '\a '+TAGemail[4], '[\a \a]', '\a (\a)', '\\textit{\a} (\\email{\a})']
  388. TAGimg = ['[\a]', '<figure><ph vspace=""><img src="\a"></figure>', '<IMG ALIGN="\a" SRC="\a" BORDER="0">', '\a', '\n%center\n%newimage "\a", left\n', '[\a]', '\a', '\a']
  389. TAGtableOpen = [ '', '<table><tabular ca="c">', '<table align=center cellpadding=4 border=\a>', '', '', '', '', r'\begin{center}\begin{tabular}']
  390. TAGtableLineOpen = [ '', '', '<tr>', '', '', '||', '', r'\hline ']
  391. TAGtableLineClose = [ '', '<rowsep>', '</tr>', '', '', '', '', r' \\']
  392. TAGtableCellOpen = [ '', '', '<td>', '', '', '', '', '']
  393. TAGtableCellClose = [ '', '<colsep>', '</td>', '', '', '||', '', ' & ']
  394. TAGtableTitleCellOpen = [ '', '', '<th>', '', '', '', '', r'\textbf{']
  395. TAGtableTitleCellClose = [ '', '<colsep>', '</th>', '', '', '||', '', '} & ']
  396. TAGtableClose = [ '', '</tabular></table>', '</table>', '', '', '', '', r'\end{tabular}\end{center}']
  397. TAGanchor = ['', '', '<a name="\a">', '', '', '', '', '']
  398. TAGEOD = ['', '</article>', '</BODY></HTML>', '', '%%EOD', '', '', r'\end{document}']
  399. ### the cool regexes
  400. re_title = re.compile(r'^\s*(?P<tag>={1,5})(?P<txt>[^=].*[^=])\1(\[(?P<label>\w+)\])?$')
  401. re_areaPreOpen = re_areaPreClose = re.compile(r'^---$')
  402. re_quote = re.compile(r'^\t+')
  403. re_1linePreOld = re.compile(r'^ {4}([^\s-])')
  404. re_1linePre = re.compile(r'^--- ')
  405. re_mono = re.compile(r'`([^`]+)`')
  406. re_bold = re.compile(r'\*\*([^\s*].*?)\*\*')
  407. re_italic = re.compile(r'(^|[^:])//([^ /].*?)//')
  408. re_underline = re.compile(r'__([^_].*?)__') # underline lead/trailing blank
  409. re_bolditalic = re.compile(r'\*/([^/].*?)/\*')
  410. re_list = re.compile(r'^( *)([+-]) ([^ ])')
  411. re_deflist = re.compile(r'^( *)(=) ([^:]+):')
  412. re_bar =re.compile(r'^\s*([_=-]{20,})\s*$')
  413. re_table = re.compile(r'^ *\|\|?[<:>]*\s')
  414. # link things
  415. urlskel = {
  416. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  417. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  418. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  419. 'pass' : r'[^ @]*', # for ftp://login:password@domain.com
  420. 'chars' : r'A-Za-z0-9%._/~:,=-', # %20(space), :80(port)
  421. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  422. 'form' : r'A-Za-z0-9/%&=+.@*_-',# .@*_-(as is)
  423. 'punct' : r'.,;:!?'
  424. }
  425. ### WARNING: regex nightmares ahead :)
  426. # textual descriptions are on --help's style: [...] is optional, | is OR
  427. #
  428. # username [ :password ] @
  429. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  430. #
  431. # [ http:// ] [ username:password@ ] domain.com [ / ] [ #anchor | ?form=data ]
  432. retxt_url = r'\b(%s%s|%s)[%s]+\b/?(#[%s]+|\?[%s]+)?'%(
  433. urlskel['proto'],patt_url_login, urlskel['guess'],
  434. urlskel['chars'],urlskel['anchor'],urlskel['form'])
  435. #
  436. # filename | [ filename ] #anchor
  437. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  438. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  439. #
  440. # user@domain [ ?form=data ]
  441. retxt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  442. urlskel['login'],urlskel['form'])
  443. #
  444. # email | url
  445. re_link = re.compile(r'%s|%s'%(retxt_url,retxt_email), re.I)
  446. #
  447. # [image.EXT]
  448. retxt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  449. #
  450. # \[ label | imagetag url | email | filename \]
  451. re_linkmark = re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  452. retxt_img, retxt_url, retxt_email, retxt_url_local),
  453. re.L+re.I)
  454. #
  455. # special char to place data on TAGs contents (\a == bell)
  456. re_x = re.compile('\a')
  457. #
  458. # special lines: blank lines and comment lines
  459. re_blankline = re.compile(r'^\s*$')
  460. re_comment = re.compile(r'^(//|%)')
  461. #
  462. # %%date [ (formatting) ]
  463. re_date = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  464. #
  465. re_img = re.compile(retxt_img, re.L+re.I)
  466. re_macro = re_date
  467. # Settings are still in development stage - DON'T USE THIS
  468. #re_setting = re.compile(r'^%\s*Document \s*(Encoding|Toc)\s*:\s*(.*)$',re.I)
  469. #
  470. ### END OF regex nightmares
  471. def doHeader(doctype, headdic):
  472. if not HEADER_TEMPLATE.has_key(doctype):
  473. Error("doheader: Unknow doctype '%s'"%doctype)
  474. Debug('HEADER data: %s'%headdic, 1)
  475. template = string.split(HEADER_TEMPLATE[doctype], '\n')
  476. # scan for empty dictionary keys
  477. # if found, scan template lines for that key reference
  478. # if found, remove the reference
  479. # if there aren't any other key reference on the same line, remove it
  480. for key in headdic.keys():
  481. if not headdic[key]:
  482. for line in template:
  483. if string.count(line, key):
  484. sline = string.replace(line, '%%(%s)s'%key, '')
  485. if not string.count(sline, '%(HEADER'):
  486. template.remove(line)
  487. # populate template with data
  488. template = string.join(template, '\n') % headdic
  489. # post processing
  490. if doctype == 'tex':
  491. if FLAGS['toc']:
  492. template = template + '\n' + r'\newpage\tableofcontents'
  493. if headdic['HEADER3'] == currdate:
  494. # let tex format today
  495. template = re.sub(r'\\date\{.*?}', r'\date', template)
  496. return string.split(template, '\n')
  497. def doCommentLine(doctype,txt):
  498. if doctype == 'sgml' or doctype == 'html': ret = "<!-- %s -->"%txt
  499. elif doctype == 'mgp': ret = "%%%% %s"%txt
  500. elif doctype == 'man': ret = '.\\" %s'%txt
  501. elif doctype == 'tex': ret = "%% %s"%txt
  502. else: ret = ''
  503. return ret
  504. def doFooter(doctype):
  505. ret = []
  506. typename = doctype
  507. if doctype == 'tex': typename = 'LaTeX2e'
  508. ppgd = '%s code generated by txt2tags %s (%s)'%(typename,my_version,my_url)
  509. cmdline = 'cmdline: txt2tags %s'%string.join(CMDLINE[1:], ' ')
  510. ret.append('\n'+doCommentLine(doctype,ppgd))
  511. ret.append(doCommentLine(doctype,cmdline))
  512. ret.append(TAGEOD[T])
  513. return ret
  514. def doEscape(doctype,txt):
  515. if doctype == 'html' or doctype == 'sgml':
  516. txt = re.sub('&','&amp;',txt)
  517. txt = re.sub('<','&lt;',txt)
  518. txt = re.sub('>','&gt;',txt)
  519. if doctype == 'sgml': txt = re.sub('\xff','&yuml;',txt) # "+y
  520. elif doctype == 'pm6':
  521. txt = re.sub('<','<\#60>',txt)
  522. elif doctype == 'mgp':
  523. txt = re.sub('^%',' %',txt) # add leading blank to avoid confusion
  524. #txt = re.sub('^%([^%])','%prefix ""\n %\n%cont, prefix " "\n\\1',txt)
  525. elif doctype == 'man':
  526. txt = re.sub('^\.', ' .',txt) # command ID
  527. elif doctype == 'tex':
  528. txt = string.replace(txt, '\\', r'\verb!\!')
  529. txt = string.replace(txt, '~', r'\verb!~!')
  530. txt = string.replace(txt, '^', r'\verb!^!')
  531. txt = re.sub('([#$&%{}])', r'\\\1', txt)
  532. # TIP the _ is escaped at end
  533. return txt
  534. def doFinalEscape(txt):
  535. if doctype == 'pm6': txt = string.replace(txt, r'\<',r'<\#92><')
  536. elif doctype == 'man': txt = string.replace(txt, '-', r'\-')
  537. elif doctype == 'tex': txt = string.replace(txt, '_', r'\_')
  538. return txt
  539. def doEscapeEscapechar(txt):
  540. return string.replace(txt, '\\', '\\\\')
  541. def addLineBreaks(list):
  542. "use LB to respect sys.platform"
  543. ret = []
  544. for line in list:
  545. line = string.replace(line,'\n',LB) # embedded \n's
  546. ret.append(line+LB) # add final line break
  547. return ret
  548. def doPreLine(doctype,line):
  549. "Parsing procedures for preformatted (verbatim) lines"
  550. # tex doesn't need escapes inside verbatim
  551. if doctype != 'tex': line = doEscape(doctype,line)
  552. if doctype == 'pm6': line = doFinalEscape(line)
  553. elif doctype in ['txt','man','html']: line = ' '+line # align
  554. return line
  555. def doCloseTable(doctype):
  556. global istable, istableaware, tableborder, pendingtableopen
  557. ret = ''
  558. if istableaware:
  559. if doctype == 'tex' and tableborder:
  560. ret = TAGtableLineOpen[T]+TAGtableClose[T]+'\n'
  561. else:
  562. ret = TAGtableClose[T]+'\n'
  563. else:
  564. ret = TAGareaPreClose[T]
  565. istable = tableborder = pendingtableopen = 0
  566. return ret
  567. def doCloseQuote(howmany=1):
  568. global quotedepth
  569. ret = []
  570. for i in range(howmany):
  571. quotedepth.pop()
  572. #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
  573. ret.append(TAGareaQuoteClose[T])
  574. return string.join(ret,'\n')
  575. def doCloseList(doctype, howmany=1):
  576. global listindent, listids
  577. ret = []
  578. for i in range(howmany):
  579. if listids[-1] == '-': tag = TAGlistClose[T]
  580. elif listids[-1] == '+': tag = TAGnumlistClose[T]
  581. elif listids[-1] == '=': tag = TAGdeflistClose[T]
  582. if not tag: tag = TAGlistClose[T] # default
  583. if tag:
  584. # man tags just for mother-list and at ^
  585. if doctype == 'man':
  586. if len(listindent) == 1:
  587. ret.append(tag)
  588. else:
  589. ret.append(listindent[-1]+tag)
  590. del listindent[-1]
  591. del listids[-1]
  592. return string.join(ret,'\n')
  593. ################################################################################
  594. ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
  595. ################################################################################
  596. def doitall(inlines, doctype):
  597. # global vars for doClose*()
  598. global T, quotedepth, listindent, listids
  599. global istable, istableaware, tableborder, pendingtableopen
  600. # the defaults
  601. linkmask = '@@_link_@@'
  602. monomask = '@@_mono_@@'
  603. macromask = '@@_macro_@@'
  604. T = tags.index(doctype)
  605. AREA = NewArea('head',0) # then conf, then body
  606. HEADERS = { 'HEADER1': '-NO TITLE-', 'HEADER2':'', 'HEADER3':'' }
  607. # SETTINGS = {}
  608. ret = []
  609. fixedinfile = []
  610. toclist = []
  611. header = []
  612. f_tt = 0
  613. listindent = []
  614. listids = []
  615. listcount = []
  616. titlecount = ['',0,0,0,0,0]
  617. f_lastblank = 0
  618. holdspace = ''
  619. listholdspace = ''
  620. quotedepth = []
  621. istable = 0
  622. tableborder = 0
  623. pendingtableopen = 0
  624. tablealign = []
  625. if outfile != pipefileid:
  626. if not FLAGS['gui'] and not FLAGS['fixme']:
  627. print "--- %s..."%doctype
  628. # let's mark it up!
  629. linenr = 0
  630. for lineref in range(len(inlines)):
  631. skip_continue = 0
  632. linkbank = []
  633. monobank = []
  634. macrobank = []
  635. linenr = lineref +1
  636. untouchedline = inlines[lineref]
  637. line = string.rstrip(untouchedline)
  638. # save line to the 'fixed' buffer
  639. fixedinfile.append(untouchedline)
  640. #print 'LINE: %s'%repr(line) # for heavy debug
  641. # detect if head section is over
  642. #TIP 'not line' depends on previous line.rstrip()
  643. if (linenr == 4 and AREA == 'head') or \
  644. (linenr == 1 and not line):
  645. AREA = NewArea('conf',linenr)
  646. if not FLAGS['noheaders']:
  647. header = doHeader(doctype,HEADERS)
  648. # we need (not really) to mark each paragraph
  649. #TODO check if this is really needed
  650. if doctype == 'pm6' and f_lastblank:
  651. if f_tt or AREA == 'head' or listindent:
  652. holdspace = ''
  653. else:
  654. holdspace = TAGparagraph[T]+'\n'
  655. # any NOT table line, closes an open table
  656. if istable and not re_table.search(line):
  657. ret.append(doCloseTable(doctype))
  658. #---------------------[ PRE formatted ]----------------------
  659. #TIP we'll never support beautifiers inside pre-formatted
  660. # we're already on a PRE area
  661. if f_tt:
  662. # closing PRE
  663. if re_areaPreClose.search(line):
  664. if doctype != 'pm6': ret.append(TAGareaPreClose[T])
  665. f_tt = 0
  666. continue
  667. # normal PRE-inside line
  668. line = doPreLine(doctype, line)
  669. ret.append(line)
  670. continue
  671. # detecting PRE area init
  672. if re_areaPreOpen.search(line):
  673. ret.append(TAGareaPreOpen[T])
  674. f_lastblank = 0
  675. f_tt = 1
  676. continue
  677. # one line PRE-formatted text
  678. if re_1linePre.search(line):
  679. f_lastblank = 0
  680. line = re_1linePre.sub('',line)
  681. line = doPreLine(doctype, line)
  682. ret.append('%s\n%s\n%s'%(TAGareaPreOpen[T],line,TAGareaPreClose[T]))
  683. continue
  684. #---------------------[ blank lines ]-----------------------
  685. #TODO "holdspace" to save <p> to not show in closelist
  686. if re_blankline.search(line):
  687. # closing all open quotes
  688. if quotedepth: ret.append(doCloseQuote(len(quotedepth)))
  689. # closing all open lists
  690. if f_lastblank: # 2nd consecutive blank line
  691. if listindent: # closes list (if any)
  692. ret.append(doCloseList(doctype,len(listindent)))
  693. holdspace = ''
  694. continue # consecutive blanks are trash
  695. # normal blank line
  696. if doctype != 'pm6' and AREA == 'body':
  697. # paragraph (if any) is wanted inside lists also
  698. if listindent:
  699. holdspace = holdspace+TAGparagraph[T]+'\n'
  700. elif doctype == 'html': ret.append(TAGparagraph[T])
  701. # sgml: the quote close tag must not be \n\n</quote>
  702. elif doctype == 'sgml' and quotedepth:
  703. skip_continue = 1
  704. # otherwise we just print a blank line
  705. else: ret.append('')
  706. f_lastblank = 1
  707. if not skip_continue: continue
  708. else:
  709. f_lastblank = 0 # reset blank status
  710. #---------------------[ comments ]-----------------------
  711. # just skip them (if not macro or setting)
  712. if re_comment.search(line) and not re_date.match(line):
  713. ### Still in development stage
  714. # # detect settings
  715. # if re_setting.search(line):
  716. # if AREA == 'conf':
  717. # m = re_setting.search(line)
  718. # name, val = m.group(1), m.group(2)
  719. # SETTINGS[string.lower(name)] = string.strip(val)
  720. # Debug("Found Setting '%s', value '%s'"%(name,val),1,linenr)
  721. # continue
  722. # obsoleted comment format
  723. if line[0] == '/':
  724. # fixes it
  725. if FLAGS['fixme']:
  726. fixedinfile.pop() # discard original line, save new
  727. fixedinfile.append(re.sub('^//','%',untouchedline))
  728. Fixed(linenr, '// comment line', '% comment line')
  729. # just show a warning message
  730. else:
  731. old = '// as the comment line string'
  732. new = '% as the new comment line char'
  733. Obsoleted(linenr, old, new, '1.3')
  734. f_lastblank = 1
  735. continue
  736. #---------------------[ BODY detect ]-----------------------
  737. ### if got here, its a header or a valid line
  738. if AREA == 'conf':
  739. # oops, not header, so we're now on document BODY
  740. AREA = NewArea('body', linenr)
  741. # so, let's print the opening paragraph
  742. if doctype != 'pm6':
  743. ret.append(TAGparagraph[T])
  744. #---------------------[ Title ]-----------------------
  745. #TODO set next blank and set f_lastblank or f_lasttitle
  746. if re_title.search(line) and not listindent and AREA == 'body':
  747. line = doEscape(doctype,line)
  748. m = re_title.search(line)
  749. tag = m.group('tag')
  750. level = len(tag)
  751. tag = eval('TAGtitle%s[T]'%level)
  752. txt = string.strip(m.group('txt'))
  753. if doctype == 'sgml':
  754. txt = re.sub(r'\[', r'&lsqb;', txt)
  755. txt = re.sub(r'\\', r'&bsol;', txt)
  756. if FLAGS['enumtitle']: ### numbered title
  757. id = '' ; n = level #
  758. titlecount[n] = titlecount[n] +1 # add count
  759. if n < len(titlecount)-1: # reset sublevels count
  760. for i in range(n+1, len(titlecount)): titlecount[i] = 0
  761. for i in range(n): # compose id from hierarchy
  762. id = "%s%d."%(id,titlecount[i+1])
  763. idtxt = "%s %s"%(id, txt) # add id to title
  764. else:
  765. idtxt = txt
  766. anchorid = 'toc%d'%(len(toclist)+1)
  767. if TAGanchor[T] and FLAGS['toc']:
  768. ret.append(re_x.sub(anchorid,TAGanchor[T]))
  769. # escape to handle \ on title
  770. idtxt = string.replace(idtxt, '\\', '\\\\')
  771. line = re_title.sub(tag,line)
  772. ret.append(re_x.sub(idtxt,line))
  773. # let's do some TOC!
  774. if TAGanchor[T]:
  775. # tocitemid = '#toc%d'%(len(toclist)+1)
  776. # TOC more readable with master topics not linked at number
  777. # stoled idea from windows .CHM files (help system)
  778. if FLAGS['enumtitle'] and level == 1:
  779. tocitem = '%s+ [%s #%s]'%(' '*level,txt,anchorid)
  780. else:
  781. tocitem = '%s- [%s #%s]'%(' '*level,idtxt,anchorid)
  782. else:
  783. tocitem = '%s- %s'%(' '*level,idtxt)
  784. if doctype in ['txt', 'man']:
  785. tocitem = '%s%s' %(' '*level,idtxt)
  786. if level <= 3: toclist.append(tocitem) # max toc level: 3
  787. # add "underline" to text titles
  788. if doctype == 'txt': ret.append(re_x.sub('='*len(idtxt),tag))
  789. continue
  790. #TODO! labeltxt = ''
  791. # label = m.group('label')
  792. # if label: labeltxt = '<label id="%s">' %label
  793. #---------------------[ apply masks ]-----------------------
  794. ### protect important structures from escaping and formatting
  795. # protect pre-formatted font text
  796. while re_mono.search(line):
  797. txt = re_mono.search(line).group(1)
  798. if doctype == 'tex':
  799. monobank.append(txt)
  800. else:
  801. monobank.append(doEscape(doctype,txt))
  802. line = re_mono.sub(monomask,line,1)
  803. # protect macros
  804. while re_macro.search(line):
  805. txt = re_macro.search(line).group()
  806. macrobank.append(txt)
  807. line = re_macro.sub(macromask,line,1)
  808. # protect URLs and emails
  809. while re_linkmark.search(line) or re_link.search(line):
  810. # try to match plain or named links
  811. match_link = re_link.search(line)
  812. match_named = re_linkmark.search(line)
  813. # define the current match
  814. if match_link and match_named:
  815. # both types found, which is the first?
  816. m = match_link
  817. if match_named.start() < match_link.start():
  818. m = match_named
  819. else:
  820. # just one type found, we're fine
  821. m = match_link or match_named
  822. # extract link data and apply mask
  823. if m == match_link: # plain link
  824. label = ''
  825. link = m.group()
  826. line = re_link.sub(linkmask,line,1)
  827. else: # named link
  828. label = string.rstrip(m.group('label'))
  829. link = m.group('link')
  830. line = re_linkmark.sub(linkmask,line,1)
  831. # save link data to the link bank
  832. linkbank.append((label, link))
  833. #---------------------[ do Escapes ]-----------------------
  834. # the target-specific special char escapes for body lines
  835. line = doEscape(doctype,line)
  836. #---------------------[ Horizontal Bar ]--------------------
  837. if re_bar.search(line):
  838. txt = re_bar.search(line).group(1)
  839. if txt[0] == '=': bar = TAGbar2[T]
  840. else : bar = TAGbar1[T]
  841. line = re_bar.sub(bar,line)
  842. ret.append(re_x.sub(txt,line))
  843. continue
  844. #---------------------[ Quote ]-----------------------
  845. if re_quote.search(line) and AREA == 'body':
  846. # store number of leading TABS
  847. currquotedepth = len(re_quote.search(line).group(0))
  848. # SGML doesn't support nested quotes
  849. if doctype == 'sgml':
  850. if quotedepth and currquotedepth > quotedepth[-1]:
  851. currquotedepth = quotedepth[-1]
  852. # for don't-close-me quote tags
  853. if not TAGareaQuoteClose[T]:
  854. line = re_quote.sub(TAGareaQuoteOpen[T]*currquotedepth, line)
  855. else:
  856. # new (sub)quote
  857. if not quotedepth or currquotedepth > quotedepth[-1]:
  858. quotedepth.append(currquotedepth)
  859. ret.append(TAGareaQuoteOpen[T])
  860. if doctype != 'html'and doctype != 'sgml':
  861. line = re_quote.sub('', line)
  862. # closing quotes
  863. while currquotedepth < quotedepth[-1]:
  864. ret.append(doCloseQuote(1))
  865. else:
  866. # closing all quotes (not quote line)
  867. if quotedepth: ret.append(doCloseQuote(len(quotedepth)))
  868. #---------------------[ Lists ]-----------------------
  869. if (re_list.search(line) or re_deflist.search(line)) and AREA == 'body':
  870. if re_list.search(line): rgx = re_list
  871. else : rgx = re_deflist
  872. m = rgx.search(line)
  873. listitemindent = m.group(1)
  874. listtype = m.group(2)
  875. extra = m.group(3) # regex anchor char
  876. if listtype == '=':
  877. listdefterm = m.group(3)
  878. extra = ''
  879. if doctype == 'tex':
  880. # on tex, brackets are term delimiters
  881. # TODO escape ] at list definition
  882. # \], \rbrack{} and \verb!]! don't work :(
  883. #listdefterm = string.replace(listdefterm, ']', '???')
  884. pass
  885. # tex maximum list depth is 4, so let's force it
  886. if doctype == 'tex' and len(listindent) == 4:
  887. if len(listitemindent) > len(listindent[-1]):
  888. listitemindent = listindent[-1]
  889. # new sublist
  890. if not listindent or len(listitemindent) > len(listindent[-1]):
  891. listindent.append(listitemindent)
  892. listids.append(listtype)
  893. if listids[-1] == '-': tag = TAGlistOpen[T]
  894. elif listids[-1] == '+': tag = TAGnumlistOpen[T]
  895. elif listids[-1] == '=': tag = TAGdeflistOpen[T]
  896. if not tag: tag = TAGlistOpen[T] # default
  897. # no need to reopen <pre> tag on man sublists
  898. if doctype == 'man' and len(listindent) != 1: tag = ''
  899. openlist = listindent[-1]+tag
  900. if doctype == 'pm6': listholdspace = openlist
  901. else:
  902. if string.strip(openlist): ret.append(openlist)
  903. # reset item manual count
  904. listcount.append(0)
  905. # closing sublists
  906. while len(listitemindent) < len(listindent[-1]):
  907. close = doCloseList(doctype)
  908. if close: ret.append(close)
  909. if listcount: del listcount[-1]
  910. # normal item
  911. listid = listindent[-1]
  912. if listids[-1] == '-':
  913. tag = TAGlistItem[T]
  914. elif listids[-1] == '+':
  915. tag = TAGnumlistItem[T]
  916. listcount[-1] = listcount[-1] +1
  917. if doctype in ['txt', 'man', 'moin', 'mgp']:
  918. tag = re_x.sub(str(listcount[-1]), tag)
  919. elif listids[-1] == '=':
  920. if not TAGdeflistItem1[T]:
  921. # emulate def list, with <li><b>def</b>:
  922. tag = TAGlistItem[T] +TAGfontBoldOpen[T] +listdefterm
  923. tag = tag +TAGfontBoldClose[T] +':'
  924. else:
  925. tag = re_x.sub(listdefterm, TAGdeflistItem1[T])
  926. tag = tag + TAGdeflistItem2[T] # open <DD>
  927. if doctype == 'mgp': listid = len(listindent)*'\t'
  928. line = rgx.sub(listid+tag+extra,line)
  929. if listholdspace:
  930. line = listholdspace+line
  931. listholdspace = ''
  932. if doctype == 'sgml': line = re.sub(r'\[', r'&lsqb;', line)
  933. #---------------------[ Table ]-----------------------
  934. #TODO rework all this table spaghetti code
  935. #TODO escape undesired format inside table
  936. #TODO not rstrip if table line (above)
  937. #TODO add man, pm6 targets
  938. if re_table.search(line) and AREA == 'body':
  939. closingbar = re.compile(r'\| *$')
  940. tableid = line[re_table.search(line).end()-1]
  941. if not istable: # table header
  942. if doctype in ['sgml', 'html', 'moin', 'tex']:
  943. istableaware = 1
  944. # obsoleted TAB-made table
  945. if tableid == '\t': tableborder = 1
  946. if closingbar.search(line): tableborder = 1
  947. # add border=1 and open table
  948. if doctype == 'tex':
  949. pendingtableopen = 1
  950. else:
  951. ret.append(re_x.sub(`tableborder`, TAGtableOpen[T]))
  952. else:
  953. istableaware = 0 ; ret.append(TAGareaPreOpen[T])
  954. istable = 1
  955. if istableaware:
  956. line = re.sub(r'^ *' , '', line) # del leading spaces
  957. line = closingbar.sub('', line) # del last bar |
  958. # obsoleted TAB-made table
  959. if line[1] == '\t' or line[1:3] == '|\t':
  960. # fix it
  961. if FLAGS['fixme']:
  962. fixedinfile.pop() # discard original line, save new
  963. fixedinfile.append(FixTable(untouchedline))
  964. Fixed(linenr, 'TAB-made table', 'PIPE-made table')
  965. # just show a warning message
  966. else:
  967. Obsoleted(linenr,'TAB-made table',
  968. 'PIPE-made table','1.3')
  969. tablefmt, tablecel = re.split(r'\s', line, 1)
  970. tablefmt = tablefmt[1:] # cut mark off
  971. # TODO v1.3: take \t\|?| out
  972. # TODO use ' | ' as field separator
  973. tablecel = re.split(r'\t\|?| \|', tablecel)
  974. line = ''
  975. if doctype == 'tex' and pendingtableopen:
  976. vlinechar = '|'
  977. if not tableborder: vlinechar = ''
  978. cellspec = vlinechar+'l' # lcr = left, center, right align
  979. tablespec = (cellspec)*len(tablecel)
  980. tablespec = '{%s%s}'%(tablespec,vlinechar)
  981. ret.append(TAGtableOpen[T]+tablespec)
  982. pendingtableopen = 0
  983. # setting cell and line tags
  984. tl1, tl2 = TAGtableLineOpen[T], TAGtableLineClose[T]
  985. tc1, tc2 = TAGtableCellOpen[T], TAGtableCellClose[T]
  986. #TODO if ' | ' table cell is center align
  987. if tablefmt and tablefmt[0] == '|': # title cell
  988. tc1,tc2 = TAGtableTitleCellOpen[T],TAGtableTitleCellClose[T]
  989. if doctype == 'html': tc2 = tc2+'\n' ; tl1 = tl1+'\n'
  990. if doctype == 'tex' and not tableborder: tl1 = ''
  991. if tablecel:
  992. while tablecel:
  993. cel = tablecel.pop(0)
  994. if not cel and doctype == 'html':
  995. cel = '&nbsp;'
  996. else:
  997. # user escaped (not delim!)
  998. cel = string.replace(cel,'\|', '|')
  999. if not tablecel and doctype in ['sgml','tex']:
  1000. tc2 = '' # last cell don't need separator
  1001. # close beautifier for last title cell
  1002. if not tablecel and doctype == 'tex':
  1003. if tablefmt and tablefmt[0] == '|': tc2 = '}'
  1004. line = '%s%s%s%s'%(line,tc1,string.strip(cel),tc2)
  1005. line = '%s%s%s'%(tl1,line,tl2)
  1006. ### BEGIN of at-any-part-of-the-line/various-per-line TAGs.
  1007. # bold
  1008. if re_bold.search(line):
  1009. txt = r'%s\1%s'%(TAGfontBoldOpen[T],TAGfontBoldClose[T])
  1010. if doctype == 'tex': txt = '\\'+txt # escape escape
  1011. line = re_bold.sub(txt,line)
  1012. # italic
  1013. if re_italic.search(line):
  1014. open, close = TAGfontItalicOpen[T], TAGfontItalicClose[T]
  1015. if doctype == 'tex': open = '\\'+open # must escape escape (sux)
  1016. txt = r'\1%s\2%s'%(open, close)
  1017. line = re_italic.sub(txt,line)
  1018. # bolditalic
  1019. if re_bolditalic.search(line):
  1020. open, close = TAGfontBoldItalicOpen[T], TAGfontBoldItalicClose[T]
  1021. if doctype == 'tex': open = doEscapeEscapechar(open)
  1022. txt = r'%s\1%s'%(open, close)
  1023. line = re_bolditalic.sub(txt,line)
  1024. # underline
  1025. if re_underline.search(line):
  1026. txt = r'%s\1%s'%(TAGfontUnderlineOpen[T],TAGfontUnderlineClose[T])
  1027. line = re_underline.sub(txt,line)
  1028. #---------------------[ URL & E-mail ]-----------------------
  1029. for linkdata in linkbank:
  1030. # set link type and data
  1031. label, url = linkdata
  1032. linktype = 'url';
  1033. if re.match(retxt_email, url): linktype = 'email'
  1034. # adding protocol to guessed link
  1035. guessurl = ''
  1036. if linktype == 'url' and re.match(urlskel['guess'], url):
  1037. if url[0] == 'w': guessurl = 'http://' +url
  1038. else : guessurl = 'ftp://' +url
  1039. # not link aware targets -> protocol is useless
  1040. if doctype in ['txt','man','pm6','mgp','tex']: guessurl = ''
  1041. # simple link (not guessed)
  1042. if not label and not guessurl:
  1043. if FLAGS['maskemail'] and linktype == 'email':
  1044. # do the email mask feature (no TAGs, just text)
  1045. url = string.replace(url,'@',' (a) ')
  1046. url = string.replace(url,'.',' ')
  1047. url = doEscape(doctype,"<%s>"%url)
  1048. line = string.replace(line, linkmask, url, 1)
  1049. else:
  1050. # replace mask with tag, add link data to tag
  1051. cmd = 'string.replace(line,linkmask,TAG%s[T],1)'%linktype
  1052. line = eval(cmd)
  1053. line = re_x.sub(url,line)
  1054. # named link or guessed simple link
  1055. else:
  1056. # adjusts for guessed link
  1057. if not label: label = url # no protocol
  1058. if guessurl : url = guessurl # with protocol
  1059. # putting data on the right appearance order
  1060. urlorder = [label, url] # label before link
  1061. if doctype in ('html', 'sgml', 'moin'): # link before label
  1062. urlorder = [url, label]
  1063. # replace mask with tag
  1064. cmd = 'string.replace(line,linkmask,TAG%sMark[T],1)'%linktype
  1065. line = eval(cmd)
  1066. # add link data to tag (replace \a)
  1067. for data in urlorder:
  1068. # escape to handle \ on link
  1069. data = string.replace(data, '\\', '\\\\')
  1070. line = re_x.sub(data,line,1)
  1071. #---------------------[ Image ]-----------------------
  1072. #TODO fix smart align when image is a link label
  1073. # moin and txt tags are the same as the mark, so just skip
  1074. if re_img.search(line) and doctype not in ['moin','txt']:
  1075. # first store blanks final position to detect image at ^
  1076. try: leadingblanks = re.match(' +',line).end()
  1077. except: leadingblanks = 0
  1078. while re_img.search(line):
  1079. m = re_img.search(line)
  1080. txt = m.group(1)
  1081. ini = m.start() ; head = leadingblanks
  1082. end = m.end() ; tail = len(line)
  1083. tag = TAGimg[T]
  1084. if doctype == 'html': # do img align
  1085. align = 'center' # default align # text +img +text
  1086. if ini == head and end == tail:
  1087. tag = '<P ALIGN="center">%s</P>'%tag # ^img$
  1088. elif ini == head: align = 'left' # ^img + text$
  1089. elif end == tail: align = 'right' # ^text + img$
  1090. tag = re_x.sub(align, tag, 1) # add align on tag
  1091. line = re_img.sub(tag,line,1)
  1092. line = re_x.sub(txt,line,1)
  1093. if doctype == 'sgml': line = re.sub(r'\[', r'&lsqb;', line)
  1094. line = '%s%s'%(' '*leadingblanks,line) # put blanks back
  1095. #---------------------[ Rethink this ]-----------------------
  1096. # mgp/tex: restore orig line for headers (no formatting at all!)
  1097. # only %%date must be converted
  1098. if not FLAGS['noheaders'] and AREA == 'head':
  1099. uline = string.rstrip(untouchedline)
  1100. # is there any tex on the line?
  1101. # TODO protect %%date from escaping
  1102. if doctype == 'tex' and re.search(r'\\\w+{', line):
  1103. line = doEscape(doctype, uline)
  1104. # mgp, escape anyway
  1105. elif doctype == 'mgp':
  1106. line = doEscape(doctype, uline)
  1107. #---------------------[ Expand Macros ]-----------------------
  1108. if macrobank:
  1109. for macro in macrobank:
  1110. line = string.replace(line, macromask, macro,1)
  1111. # now the line is full of macros again
  1112. # date
  1113. while re_date.search(line):
  1114. m = re_date.search(line)
  1115. fmt = m.group('fmt') or ''
  1116. dateme = currdate
  1117. if fmt: dateme = strftime(fmt,localtime(time()))
  1118. line = re_date.sub(dateme,line,1)
  1119. #---------------------[ Expand PREs ]-----------------------
  1120. for mono in monobank:
  1121. line = string.replace(line, monomask, "%s%s%s"%(
  1122. TAGfontMonoOpen[T],mono,TAGfontMonoClose[T]),1)
  1123. #---------------------[ Headers ]-----------------------
  1124. if AREA == 'head' and linenr < 4:
  1125. HEADERS['HEADER%d'%linenr] = line
  1126. continue
  1127. #---------------------[ Final Escapes ]-----------------------
  1128. line = doFinalEscape(line)
  1129. ret.append(holdspace+line)
  1130. holdspace = ''
  1131. # EOF: close any open lists/tables/quotes
  1132. if listindent: ret.append(doCloseList(doctype,len(listindent)))
  1133. if istable : ret.append(doCloseTable(doctype))
  1134. if quotedepth: ret.append(doCloseQuote(len(quotedepth)))
  1135. if not FLAGS['noheaders']: ret.extend(doFooter(doctype))
  1136. if FLAGS['fixme']: ret = fixedinfile[:]
  1137. return header,toclist,ret
  1138. # TODO each formatting function should set flags
  1139. # like: readnetxline: 1 , continue: 1, etc.
  1140. # for func in [do_bold, do_under, ...]:
  1141. # func(); if readnextline: read(); etc
  1142. ################################################################################
  1143. ##################################### GUI ######################################
  1144. ################################################################################
  1145. # tk help: http://python.org/topics/tkinter/
  1146. class Gui:
  1147. "Graphical Tk Interface"
  1148. def __init__(self):
  1149. self.bg = 'orange'
  1150. self.root = Tkinter.Tk()
  1151. self.root.config(bd=15,bg=self.bg)
  1152. self.root.title("txt2tags")
  1153. self.frame1 = Tkinter.Frame(self.root,bg=self.bg)
  1154. self.frame1.pack(fill='x')
  1155. self.frame2 = Tkinter.Frame(self.root,bg=self.bg)
  1156. self.frame2.pack()
  1157. self.frame3 = Tkinter.Frame(self.root,bg=self.bg)
  1158. self.frame3.pack(fill='x')
  1159. self.frame = self.root
  1160. self.infile = self.setvar('')
  1161. #self.infile = self.setvar('C:/cygwin/home/Milene/abc.txt')
  1162. #self.infile = self.setvar('C:/aurelio/a.txt')
  1163. self.doctype = self.setvar('html')
  1164. self.f_noheaders = self.setvar('')
  1165. self.f_enumtitle = self.setvar('')
  1166. self.f_toc = self.setvar('')
  1167. self.f_toconly = self.setvar('')
  1168. self.f_stdout = self.setvar('')
  1169. ### config as dic for python 1.5 compat (**opts don't work :( )
  1170. def entry(self, **opts): return Tkinter.Entry(self.frame, opts)
  1171. def label(self, txt='', **opts):
  1172. opts.update({'text':txt,'bg':self.bg})
  1173. return Tkinter.Label(self.frame, opts)
  1174. def button(self,name,cmd,**opts):
  1175. opts.update({'text':name,'command':cmd})
  1176. return Tkinter.Button(self.frame, opts)
  1177. def check(self,name,val,**opts):
  1178. opts.update( {'text':name, 'onvalue':val, 'offvalue':'',
  1179. 'anchor':'w', 'bg':self.bg, 'activebackground':self.bg} )
  1180. Tkinter.Checkbutton(self.frame, opts).pack(fill='x',padx=10)
  1181. ### config as positional parameters for python 2.*
  1182. # def entry(self, **opts): return Tkinter.Entry(self.frame, **opts)
  1183. # def label(self, txt='', **opts):
  1184. # return Tkinter.Label(self.frame, text=txt, bg=self.bg, **opts)
  1185. # def button(self,name,cmd,**opts):
  1186. # return Tkinter.Button(self.frame, text=name, command=cmd, **opts)
  1187. # def check(self,name,val,**opts):
  1188. # Tkinter.Checkbutton(self.frame,text=name, onvalue=val, offvalue='',
  1189. # anchor='w', bg=self.bg, activebackground=self.bg, **opts).pack(
  1190. # fill='x',padx=10)
  1191. def exit(self): self.root.destroy(); sys.exit()
  1192. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  1193. def menu(self,sel,items):
  1194. return apply(Tkinter.OptionMenu,(self.frame,sel)+tuple(items))
  1195. def askfile(self):
  1196. ftypes = [("txt2tags files",("*.t2t","*.txt")),("All files","*")]
  1197. self.infile.set(askopenfilename(filetypes=ftypes))
  1198. def scrollwindow(self,txt='no text!',title=''):
  1199. win = Tkinter.Toplevel() ; win.title(title)
  1200. scroll = Tkinter.Scrollbar(win)
  1201. text = Tkinter.Text(win,yscrollcommand=scroll.set)
  1202. scroll.config(command=text.yview)
  1203. text.insert(Tkinter.END, string.join(txt,'\n'))
  1204. text.pack(side='left',fill='both')
  1205. scroll.pack(side='right',fill='y')
  1206. def runprogram(self):
  1207. # prepare
  1208. infile, doctype = self.infile.get(), self.doctype.get()
  1209. if not infile:
  1210. showwarning('txt2tags',"You must provide the source file location!")
  1211. return
  1212. # compose cmdline
  1213. reset_flags(); FLAGS['gui'] = 1
  1214. myflags = []
  1215. for flag in FLAGS.keys():
  1216. if flag in ['maskemail','gui','fixme']:
  1217. continue # not supported
  1218. flag = getattr(self, 'f_%s'%flag)
  1219. if flag.get(): myflags.append(flag.get())
  1220. cmdline = ['txt2tags', '-t', doctype] +myflags +[infile]
  1221. Debug('Gui/tk cmdline: %s'%cmdline,1)
  1222. # run!
  1223. try:
  1224. infile,outfile,doctype = ParseCmdlineOptions(ParseCmdline(cmdline))
  1225. header,toc,doc = doitall(Readfile(infile), doctype)
  1226. outlist = toc_master(doctype,header,doc,toc)
  1227. if outfile == pipefileid:
  1228. title = 'txt2tags: %s converted to %s'%(
  1229. os.path.basename(infile),string.upper(doctype))
  1230. self.scrollwindow(outlist, title)
  1231. else:
  1232. finish_him(outlist,outfile)
  1233. msg = "FROM:\n\t%s\nTO:\n\t%s"%(infile,outfile)
  1234. showinfo('txt2tags', "Conversion done!\n\n%s"%msg)
  1235. except ZeroDivisionError: # common error, not quit
  1236. pass
  1237. except: # fatal error
  1238. traceback.print_exc()
  1239. print '\nSorry! txt2tags-Tk Fatal Error.'
  1240. errmsg = 'Unknown error occurred.\n\nPlease send the Error '+\
  1241. 'Traceback dumped to the author:\n %s'%my_email
  1242. showerror('txt2tags FATAL ERROR!',errmsg)
  1243. self.exit()
  1244. def mainwindow(self):
  1245. action1 = " \nChoose the target document type:"
  1246. action2 = "\n\nEnter the tagged source file location:"
  1247. action3 = "\n\nSome options you may check:"
  1248. nohead_txt = "Suppress headers from output"
  1249. enum_txt = "Number titles (1, 1.1, 1.1.1, etc)"
  1250. toc_txt = "Do TOC also (Table of Contents)"
  1251. toconly_txt= "Just do TOC, nothing more"
  1252. stdout_txt = "Dump to screen (Don't save target file)"
  1253. self.frame = self.frame1
  1254. self.label("TXT2TAGS\n%s\nv%s"%(my_url,my_version)).pack()
  1255. self.label(action1, anchor='w').pack(fill='x')
  1256. self.menu(self.doctype, tags).pack()
  1257. self.label(action2, anchor='w').pack(fill='x')
  1258. self.frame = self.frame2
  1259. self.entry(textvariable=self.infile).pack(side='left',padx=10)
  1260. self.button("Browse", self.askfile).pack(side='right')
  1261. self.frame = self.frame3
  1262. self.label(action3, anchor='w').pack(fill='x')
  1263. self.check(nohead_txt ,'--noheaders',variable=self.f_noheaders)
  1264. self.check(enum_txt ,'--enumtitle',variable=self.f_enumtitle)
  1265. self.check(toc_txt ,'--toc' ,variable=self.f_toc)
  1266. self.check(toconly_txt,'--toconly' ,variable=self.f_toconly)
  1267. self.check(stdout_txt ,'--stdout' ,variable=self.f_stdout)
  1268. self.label('\n').pack()
  1269. self.button("Quit", self.exit).pack(side='left',padx=40)
  1270. self.button("Convert!", self.runprogram).pack(side='right',padx=40)
  1271. # as documentation told me
  1272. if sys.platform[:3] == 'win':
  1273. self.root.iconify()
  1274. self.root.update()
  1275. self.root.deiconify()
  1276. self.root.mainloop()
  1277. ################################################################################
  1278. ################################################################################
  1279. # set debug are remove option from cmdline
  1280. if sys.argv.count('--debug'):
  1281. DEBUG = 1
  1282. sys.argv.remove('--debug')
  1283. Debug("system platform: %s"%sys.platform,1)
  1284. Debug("line break char: %s"%repr(LB),1)
  1285. if FLAGS['gui'] == 1:
  1286. # redefine Error function to raise exception instead sys.exit()
  1287. def Error(msg): showerror('txt2tags ERROR!', msg); raise ZeroDivisionError
  1288. Gui().mainwindow()
  1289. else:
  1290. # console mode rocks forever!
  1291. infile, outfile, doctype = ParseCmdlineOptions(ParseCmdline())
  1292. header,toc,doc = doitall(Readfile(infile), doctype)
  1293. if has_obsolete: ObsoletedInstructions(infile)
  1294. if FLAGS['fixme']:
  1295. if has_fixed:
  1296. Savefile(infile+'.OLD', Readfile(infile)) # copy to .old
  1297. Savefile(infile, doc)
  1298. print "\nFile '%s' correctly updated and saved."%(infile)
  1299. print "The old contents were saved to '%s.OLD'."%(infile)
  1300. else:
  1301. print "This file is OK! Nothing to fix."
  1302. else:
  1303. outlist = toc_master(doctype, header, doc, toc) # TOC!
  1304. finish_him(outlist, outfile) # writing output to screen or file
  1305. sys.exit(0)
  1306. #TODO pm6: check all the things @home
  1307. ### RESOURCES
  1308. # html: http://www.w3.org/TR/WD-html-lex
  1309. # man: man 7 man
  1310. # sgml: www.linuxdoc.org
  1311. # moin: http://twistedmatrix.com/users/jh.twistd/moin/moin.cgi/WikiSandBox
  1312. # moin: http://moin.sf.net
  1313. # pm6: <font$> turn all formatting to the style's default
  1314. # pm6: <#comments#> <font #comment# $>
  1315. # pagemaker table
  1316. # 1 = 0,55
  1317. # 2 = 1,10
  1318. # 3 = 1,65
  1319. # 4 = 2,20
  1320. #
  1321. # |__1_| | | | | |
  1322. # |_______2_| | | | |
  1323. # |____________3_| | | |
  1324. # vim: set ts=4