PageRenderTime 360ms CodeModel.GetById 127ms RepoModel.GetById 7ms app.codeStats 0ms

/old/txt2tags-1.2.py

http://txt2tags.googlecode.com/
Python | 2120 lines | 2095 code | 5 blank | 20 comment | 8 complexity | b851cc03425f3e667188c72857900df7 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002 Aurélio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. # please, don't look at this code, it's ugly!
  20. # i'll try to make it better on the next releases
  21. import re, string, os, sys, getopt, traceback
  22. from time import strftime,time,localtime
  23. my_url = 'http://txt2tags.sf.net'
  24. my_email = 'aurelio@verde666.org'
  25. my_version = '1.2'
  26. DEBUG = 0 # do not edit here, please use --debug
  27. targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
  28. FLAGS = {'noheaders':0,'enumtitle':0,'maskemail':0, 'stdout':0,
  29. 'toconly' :0,'toc' :0,'gui' :0, 'fixme' :0}
  30. regex = {}
  31. TAGS = {}
  32. rules = {}
  33. CMDLINE = ''
  34. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  35. splitlevel = '' ; lang = 'english'
  36. doctype = outfile = ''
  37. pipefileid = '-'
  38. has_obsolete = has_fixed = 0
  39. #my_version = my_version + '-dev' + currdate[4:] # devel!
  40. # global vars for doClose*()
  41. quotedepth = []
  42. listindent = []
  43. listids = []
  44. subarea = None
  45. tableborder = 0
  46. versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
  47. usage = """
  48. %s
  49. usage: txt2tags -t <type> [OPTIONS] file.t2t
  50. txt2tags -t html -s <split level> -l <lang> file.t2t
  51. -t, --type target document type. actually supported:
  52. %s
  53. --stdout by default, the output is written to file.<type>
  54. with this option, STDOUT is used (no files written)
  55. --noheaders suppress header, title and footer information
  56. --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
  57. --maskemail hide email from spam robots. x@y.z turns to <x (a) y z>
  58. --toc add TOC (Table of Contents) to target document
  59. --toconly print document TOC and exit
  60. --gui invoke Graphical Tk Interface
  61. --fixme temporary option to fix obsoleted structures
  62. -h, --help print this help information and exit
  63. -V, --version print program version and exit
  64. extra options for HTML target (needs sgml-tools):
  65. --split split documents. values: 0, 1, 2 (default 0)
  66. --lang document language (default english)
  67. """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
  68. # here is all the target's templates
  69. # you may edit them to fit your needs
  70. # - the %(HEADERn)s strings represent the Header lines
  71. # - use %% to represent a literal %
  72. #
  73. HEADER_TEMPLATE = {
  74. 'txt': """\
  75. %(HEADER1)s
  76. %(HEADER2)s
  77. %(HEADER3)s
  78. """,
  79. 'sgml': """\
  80. <!doctype linuxdoc system>
  81. <article>
  82. <title>%(HEADER1)s
  83. <author>%(HEADER2)s
  84. <date>%(HEADER3)s
  85. """,
  86. #TODO (peter valach) <meta http-equiv="Content-Type"
  87. # content="text/html; charset=iso-8859-2">
  88. 'html': """\
  89. <HTML>
  90. <HEAD><TITLE>%(HEADER1)s</TITLE></HEAD>
  91. <BODY BGCOLOR="white" TEXT="black">
  92. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  93. <FONT SIZE=4>
  94. <I>%(HEADER2)s</I><BR>
  95. %(HEADER3)s
  96. </FONT></CENTER>
  97. """,
  98. # TODO man section 1 is hardcoded...
  99. 'man': """\
  100. .TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
  101. """,
  102. # TODO style to <HR>
  103. 'pm6': """\
  104. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  105. ><@Normal=
  106. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  107. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  108. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  109. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  110. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  111. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  112. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  113. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  114. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  115. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  116. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  117. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  118. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  119. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  120. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  121. ><@Title4=<@-PARENT "Title3">
  122. ><@Title5=<@-PARENT "Title3">
  123. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  124. %(HEADER1)s
  125. %(HEADER2)s
  126. %(HEADER3)s
  127. """,
  128. #TODO escape lines beginning with %% after all formatting
  129. 'mgp': """\
  130. #!/usr/X11R6/bin/mgp -t 90
  131. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  132. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  133. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  134. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  135. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  136. %%default 1 size 5
  137. %%default 2 size 8, fore "yellow", font "normal-b", center
  138. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  139. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  140. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  141. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  142. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  143. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  144. %%%%------------------------- end of headers -----------------------------
  145. %%page
  146. %%size 10, center, fore "yellow"
  147. %(HEADER1)s
  148. %%font "normal-i", size 6, fore "white", center
  149. %(HEADER2)s
  150. %%font "mono", size 7, center
  151. %(HEADER3)s
  152. """,
  153. # TODO please, improve me!
  154. 'moin': """\
  155. %(HEADER1)s
  156. %(HEADER2)s
  157. %(HEADER3)s
  158. """,
  159. #TODO how to make acrobat left-side pages count?
  160. # \includepackage{graphics}
  161. # --lang matters! \usepackage[brazil]{babel}
  162. # local links seems to be \label, \ref, \pageref
  163. 'tex': \
  164. r"""\documentclass[11pt,a4paper]{article}
  165. \usepackage{amsfonts,amssymb,graphicx,url}
  166. \usepackage[latin1]{inputenc} %% for accented chars
  167. \pagestyle{plain} %% do page numbering ('empty' turns off)
  168. \frenchspacing %% no aditional spaces after periods
  169. %% all paragraph must be indented equaly
  170. \setlength{\parskip}{8pt}\parindent=0pt
  171. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  172. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  173. \newcommand\email{\begingroup \urlstyle{tt}\Url} %% for email
  174. \title{%(HEADER1)s}
  175. \author{%(HEADER2)s}
  176. \begin{document}
  177. \date{%(HEADER3)s}
  178. \maketitle
  179. """
  180. }
  181. #-----------------------------------------------------------------------
  182. def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
  183. def Error(msg): print "ERROR: %s"%msg ; sys.exit()
  184. def Debug(msg,i=0,linenr=None):
  185. if i > DEBUG: return
  186. if linenr is not None:
  187. print "(%d) %04d:%s"%(i,linenr,msg)
  188. else:
  189. print "(%d) %s"%(i,msg)
  190. def Readfile(file):
  191. if file == '-':
  192. try: data = sys.stdin.readlines()
  193. except: Error('You must feed me with data on STDIN!')
  194. else:
  195. try: f = open(file); data = f.readlines() ; f.close()
  196. except: Error("Cannot read file:\n %s"%file)
  197. return data
  198. def Savefile(file, contents):
  199. try: f = open(file, 'w')
  200. except: Error("Cannot open file for writing:\n %s"%file)
  201. if type(contents) == type([]): doit = f.writelines
  202. else: doit = f.write
  203. doit(contents) ; f.close()
  204. def NewArea(new, linenr):
  205. if new not in ['head', 'conf', 'body']:
  206. Error("Invalid new AREA '%s' on line '%s'"%(new,linenr))
  207. Debug('NEW AREA: %s'%new, 1, linenr)
  208. return new
  209. def Obsoleted(n, old, new, ver):
  210. global has_obsolete
  211. has_obsolete = 1
  212. print 'OBSOLETE WARNING: line %04d'%n
  213. print ' The %s is now obsoleted and will be removed'%old
  214. print ' on txt2tags version %s. Please use %s instead.'%(ver,new)
  215. def Fixed(n, old, new):
  216. global has_fixed
  217. has_fixed = 1
  218. print 'FIXED: line %04d: %15s ---> %s'%(n,old,new)
  219. def FixTable(tabline):
  220. new = string.replace(tabline,'\t',' ',1) # del 1st TAB
  221. new = string.replace(new,'\t',' | ') # change TABs by spaced pipes
  222. new = re.sub('(.*[^\n\r])', '\\1 |', new) # add last pipe (for border)
  223. return new
  224. def FixFile(file, newcontent):
  225. Savefile(file+'.OLD', Readfile(file)) # copy to .old
  226. Savefile(file, newcontent)
  227. print "\nFile '%s' correctly updated and saved."%(infile)
  228. print "The old contents were saved to '%s.OLD'."%(infile)
  229. def ObsoletedInstructions(infile):
  230. print """\n
  231. ATTENTION:
  232. Some obsoleted txt2tags structures were found on your source document.
  233. Please correct them by hand, or just run:
  234. txt2tags --fixme %s
  235. To update (rewrite) this file automatically.
  236. Please don't ignore this message.
  237. On next releases of txt2tags, these old structures will not be valid.
  238. \n"""%infile
  239. def reset_flags():
  240. global FLAGS
  241. for flag in FLAGS.keys(): FLAGS[flag] = 0
  242. def set_outfile_name(infile, doctype):
  243. "dirname is the same for {in,out}file"
  244. if not infile: return
  245. if infile == pipefileid or FLAGS['toconly'] or FLAGS['stdout']:
  246. outfile = pipefileid
  247. else:
  248. outfile = "%s.%s"%(re.sub('\.(txt|t2t)$','',infile), doctype)
  249. Debug(" infile: '%s'"% infile, 1)
  250. Debug("outfile: '%s'"%outfile, 1)
  251. return outfile
  252. def finish_him(outlist, outfile):
  253. "writing output to screen or file"
  254. if outfile == pipefileid:
  255. for line in outlist: print line
  256. else:
  257. Savefile(outfile, addLineBreaks(outlist))
  258. if not FLAGS['gui']: print 'wrote %s'%(outfile)
  259. if splitlevel:
  260. print "--- html..."
  261. os.system('sgml2html --language=%s --split=%s %s'%(
  262. lang,splitlevel,outfile))
  263. def ParseCmdline(cmdline=sys.argv):
  264. "return a dic with all options:value found"
  265. global CMDLINE ; CMDLINE = cmdline # save for dofooter()
  266. Debug("cmdline: %s"%cmdline, 1)
  267. options = {'infile': '', 'infiles':''}
  268. # get cmdline options
  269. longopt = ['help', 'version', 'type=', 'split=', 'lang=']+FLAGS.keys()
  270. try: (opt, args) = getopt.getopt(cmdline[1:], 'hVt:', longopt)
  271. except getopt.GetoptError:
  272. Error('Bad option or missing argument (try --help)')
  273. # get infile, if any
  274. if args:
  275. options['infile'] = args[0]
  276. options['infiles'] = args # multi
  277. for name,val in opt:
  278. # parse information options
  279. if name in ['-h','--help' ]: Quit(usage)
  280. elif name in ['-V','--version']: Quit(versionstr)
  281. # parse short/long options
  282. elif name in ['-t','--type']:
  283. options['doctype'] = val
  284. continue
  285. # just long options
  286. options[name[2:]] = val # del --
  287. Debug("cmdline options: %s"%options, 1)
  288. return options
  289. def ParseCmdlineOptions(optdic):
  290. "set vars and flags according to options dic"
  291. global FLAGS, splitlevel, lang
  292. # store flags and vars
  293. myflags = [] # for debug msg
  294. for flag in FLAGS.keys():
  295. if optdic.has_key(flag):
  296. FLAGS[flag] = 1
  297. myflags.append(flag)
  298. doctype = optdic.get('doctype')
  299. infile = optdic.get('infile')
  300. splitlevel = optdic.get('split')
  301. lang = optdic.get('lang')
  302. Debug("cmdline flags: %s"%string.join(myflags,', '), 1)
  303. if FLAGS['fixme']:
  304. if not infile: Quit(usage, 1)
  305. doctype = 'moin' # bogus, not used at all
  306. if not doctype and FLAGS['toconly']: doctype = 'txt' # toconly dft type
  307. if not infile or not doctype: Quit(usage, 1) # no filename/doctype
  308. # sanity check: validate target type
  309. if not targets.count(doctype):
  310. Error("Invalid document type '%s' (try --help)"%(doctype))
  311. outfile = set_outfile_name(infile, doctype)
  312. # sanity check: validate split level
  313. if doctype != 'html': splitlevel = '' # only valid for HTML target
  314. if splitlevel:
  315. # checkings
  316. if outfile == pipefileid:
  317. Error('You need to provide a FILE (not STDIN) '
  318. 'when using --split')
  319. if splitlevel[0] not in '012':
  320. Error('Option --split must be 0, 1 or 2')
  321. # check for sgml-tools
  322. #TODO how to test (in a clever way) if an executable is in path?
  323. #TODO os.system() return code? sgml2html w/out --help exit 0?
  324. #TODO bah! implement sgml2html split natively and we're done
  325. # Error("Sorry, you must have 'sgml2html' to use --split")
  326. # set things
  327. FLAGS['stdout'] = 0 # no --stdout
  328. doctype = 'sgml' # 1st do a sgml, then sgml2html
  329. outfile = set_outfile_name(infile, doctype)
  330. # sanity check: source loss!
  331. if infile != pipefileid and infile == outfile:
  332. Error("SUICIDE WARNING!!! (try --stdout)\n source"+\
  333. " and target files has the same name: %s"%outfile)
  334. ### yes, i've got my sample.t2t file deleted before add this test... :/
  335. return infile,outfile,doctype
  336. #TODO splitlevel, lang
  337. #---End of ParseCmdlineOptions
  338. def toc_master(doctype, header, doc, toc):
  339. "decide to include TOC or not on the outlist"
  340. # deal with the TOC options
  341. if FLAGS['toc'] or FLAGS['toconly']:
  342. # format TOC lines
  343. ### here we do toc as a valid t2t marked text (list type)
  344. FLAGS['noheaders'] = 1
  345. x,y,toc = convert(['']+toc+['',''], doctype)
  346. # TOC between bars (not for --toconly)
  347. if FLAGS['toc']:
  348. para = TAGS['paragraph']
  349. tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
  350. toc = tocbar + toc + tocbar
  351. if FLAGS['toconly']: header = doc = []
  352. else:
  353. toc = []
  354. # on tex, \tableofcontents do it all - see doHeader()
  355. if doctype == 'tex' and not FLAGS['toconly']:
  356. toc = []
  357. return header + toc + doc
  358. def doitall(cmdlinedic):
  359. global outfile
  360. infile,outfile,doctype = ParseCmdlineOptions(cmdlinedic)
  361. header,toc,doc = convert(Readfile(infile), doctype)
  362. outlist = toc_master(doctype,header,doc,toc)
  363. if has_obsolete: ObsoletedInstructions(infile)
  364. return doctype, outfile, outlist
  365. # set the Line Break across platforms
  366. LB = '\n' # default
  367. if sys.platform[:3] == 'win': LB = '\r\n'
  368. #elif sys.platform[:3] == 'cyg': LB = '\r\n' # not sure if it's best :(
  369. elif sys.platform[:3] == 'mac': LB = '\r'
  370. def getTags(doctype):
  371. keys = [
  372. 'paragraph','title1','title2','title3','title4','title5',
  373. 'areaPreOpen','areaPreClose',
  374. 'areaQuoteOpen','areaQuoteClose',
  375. 'fontMonoOpen','fontMonoClose',
  376. 'fontBoldOpen','fontBoldClose',
  377. 'fontItalicOpen','fontItalicClose',
  378. 'fontBolditalicOpen','fontBolditalicClose',
  379. 'fontUnderlineOpen','fontUnderlineClose',
  380. 'listOpen','listClose','listItem',
  381. 'numlistOpen','numlistClose','numlistItem',
  382. 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
  383. 'bar1','bar2',
  384. 'url','urlMark','email','emailMark',
  385. 'img','imgsolo',
  386. 'tableOpen','tableClose','tableLineOpen','tableLineClose',
  387. 'tableCellOpen','tableCellClose',
  388. 'tableTitleCellOpen','tableTitleCellClose',
  389. 'anchor','comment',
  390. 'EOD'
  391. ]
  392. if doctype == "txt":
  393. tags = {
  394. 'title1' : ' \a' ,
  395. 'title2' : '\t\a' ,
  396. 'title3' : '\t\t\a' ,
  397. 'title4' : '\t\t\t\a' ,
  398. 'title5' : '\t\t\t\t\a',
  399. 'areaQuoteOpen' : ' ' ,
  400. 'listItem' : '- ' ,
  401. 'numlistItem' : '\a. ' ,
  402. 'bar1' : '\a' ,
  403. 'bar2' : '\a' ,
  404. 'url' : '\a' ,
  405. 'urlMark' : '\a (\a)' ,
  406. 'email' : '\a' ,
  407. 'emailMark' : '\a (\a)' ,
  408. 'img' : '[\a]' ,
  409. }
  410. elif doctype == "html":
  411. tags = {
  412. 'paragraph' : '<P>' ,
  413. 'title1' : '<H1>\a</H1>' ,
  414. 'title2' : '<H2>\a</H2>' ,
  415. 'title3' : '<H3>\a</H3>' ,
  416. 'title4' : '<H4>\a</H4>' ,
  417. 'title5' : '<H5>\a</H5>' ,
  418. 'areaPreOpen' : '<PRE>' ,
  419. 'areaPreClose' : '</PRE>' ,
  420. 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
  421. 'areaQuoteClose' : '</BLOCKQUOTE>' ,
  422. 'fontMonoOpen' : '<CODE>' ,
  423. 'fontMonoClose' : '</CODE>' ,
  424. 'fontBoldOpen' : '<B>' ,
  425. 'fontBoldClose' : '</B>' ,
  426. 'fontItalicOpen' : '<I>' ,
  427. 'fontItalicClose' : '</I>' ,
  428. 'fontBolditalicOpen' : '<B><I>' ,
  429. 'fontBolditalicClose': '</I></B>' ,
  430. 'fontUnderlineOpen' : '<U>' ,
  431. 'fontUnderlineClose' : '</U>' ,
  432. 'listOpen' : '<UL>' ,
  433. 'listClose' : '</UL>' ,
  434. 'listItem' : '<LI>' ,
  435. 'numlistOpen' : '<OL>' ,
  436. 'numlistClose' : '</OL>' ,
  437. 'numlistItem' : '<LI>' ,
  438. 'deflistOpen' : '<DL>' ,
  439. 'deflistClose' : '</DL>' ,
  440. 'deflistItem1' : '<DT>\a</DT>' ,
  441. 'deflistItem2' : '<DD>' ,
  442. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  443. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  444. 'url' : '<A HREF="\a">\a</A>' ,
  445. 'urlMark' : '<A HREF="\a">\a</A>' ,
  446. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  447. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  448. 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
  449. 'imgsolo' : '<P ALIGN="center">\a</P>' ,
  450. 'tableOpen' : '<table align=center cellpadding=4 border=\a>',
  451. 'tableClose' : '</table>' ,
  452. 'tableLineOpen' : '<tr>' ,
  453. 'tableLineClose' : '</tr>' ,
  454. 'tableCellOpen' : '<td>' ,
  455. 'tableCellClose' : '</td>' ,
  456. 'tableTitleCellOpen' : '<th>' ,
  457. 'tableTitleCellClose': '</th>' ,
  458. 'anchor' : '<a name="\a">' ,
  459. 'comment' : '<!-- \a -->' ,
  460. 'EOD' : '</BODY></HTML>'
  461. }
  462. elif doctype == "sgml":
  463. tags = {
  464. 'paragraph' : '<p>' ,
  465. 'title1' : '<sect>\a<p>' ,
  466. 'title2' : '<sect1>\a<p>' ,
  467. 'title3' : '<sect2>\a<p>' ,
  468. 'title4' : '<sect3>\a<p>' ,
  469. 'title5' : '<sect4>\a<p>' ,
  470. 'areaPreOpen' : '<tscreen><verb>' ,
  471. 'areaPreClose' : '</verb></tscreen>' ,
  472. 'areaQuoteOpen' : '<quote>' ,
  473. 'areaQuoteClose' : '</quote>' ,
  474. 'fontMonoOpen' : '<tt>' ,
  475. 'fontMonoClose' : '</tt>' ,
  476. 'fontBoldOpen' : '<bf>' ,
  477. 'fontBoldClose' : '</bf>' ,
  478. 'fontItalicOpen' : '<em>' ,
  479. 'fontItalicClose' : '</em>' ,
  480. 'fontBolditalicOpen' : '<bf><em>' ,
  481. 'fontBolditalicClose': '</em></bf>' ,
  482. 'fontUnderlineOpen' : '<bf><em>' ,
  483. 'fontUnderlineClose' : '</em></bf>' ,
  484. 'listOpen' : '<itemize>' ,
  485. 'listClose' : '</itemize>' ,
  486. 'listItem' : '<item>' ,
  487. 'numlistOpen' : '<enum>' ,
  488. 'numlistClose' : '</enum>' ,
  489. 'numlistItem' : '<item>' ,
  490. 'bar1' : '<!-- \a -->' ,
  491. 'bar2' : '<!-- \a -->' ,
  492. 'url' : '<htmlurl url="\a" name="\a">' ,
  493. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  494. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  495. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  496. 'img' : '<figure><ph vspace=""><img src="\a"></figure>',
  497. 'tableOpen' : '<table><tabular ca="c">' ,
  498. 'tableClose' : '</tabular></table>' ,
  499. 'tableLineClose' : '<rowsep>' ,
  500. 'tableCellClose' : '<colsep>' ,
  501. 'tableTitleCellClose': '<colsep>' ,
  502. 'comment' : '<!-- \a -->' ,
  503. 'EOD' : '</article>'
  504. }
  505. elif doctype == "tex":
  506. tags = {
  507. 'title1' : '\n\\newpage\section{\a}',
  508. 'title2' : '\\subsection{\a}' ,
  509. 'title3' : '\\subsubsection{\a}' ,
  510. # title 4/5: DIRTY: para+BF+\\+\n
  511. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\\\n' ,
  512. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\\\n' ,
  513. 'areaPreOpen' : '\\begin{verbatim}' ,
  514. 'areaPreClose' : '\\end{verbatim}' ,
  515. 'areaQuoteOpen' : '\\begin{quotation}' ,
  516. 'areaQuoteClose' : '\\end{quotation}' ,
  517. 'fontMonoOpen' : '\\texttt{' ,
  518. 'fontMonoClose' : '}' ,
  519. 'fontBoldOpen' : '\\textbf{' ,
  520. 'fontBoldClose' : '}' ,
  521. 'fontItalicOpen' : '\\textit{' ,
  522. 'fontItalicClose' : '}' ,
  523. 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
  524. 'fontBolditalicClose': '}}' ,
  525. 'fontUnderlineOpen' : '\\underline{' ,
  526. 'fontUnderlineClose' : '}' ,
  527. 'listOpen' : '\\begin{itemize}' ,
  528. 'listClose' : '\\end{itemize}' ,
  529. 'listItem' : '\\item ' ,
  530. 'numlistOpen' : '\\begin{enumerate}' ,
  531. 'numlistClose' : '\\end{enumerate}' ,
  532. 'numlistItem' : '\\item ' ,
  533. 'deflistOpen' : '\\begin{description}' ,
  534. 'deflistClose' : '\\end{description}' ,
  535. 'deflistItem1' : '\\item[\a]' ,
  536. 'bar1' : '\n\\hrulefill{}\n' ,
  537. 'bar2' : '\n\\rule{\linewidth}{1mm}\n' ,
  538. 'url' : '\\url{\a}' ,
  539. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  540. 'email' : '\\email{\a}' ,
  541. 'emailMark' : '\\textit{\a} (\\email{\a})' ,
  542. 'img' : '(\a)' ,
  543. 'tableOpen' : '\\begin{center}\\begin{tabular}',
  544. 'tableClose' : '\\end{tabular}\\end{center}' ,
  545. 'tableLineOpen' : '\\hline ' ,
  546. 'tableLineClose' : ' \\\\' ,
  547. 'tableCellClose' : ' & ' ,
  548. 'tableTitleCellOpen' : '\\textbf{' ,
  549. 'tableTitleCellClose': '} & ' ,
  550. 'comment' : '% \a' ,
  551. 'EOD' : '\\end{document}'
  552. }
  553. elif doctype == "moin":
  554. tags = {
  555. 'title1' : '= \a =' ,
  556. 'title2' : '== \a ==' ,
  557. 'title3' : '=== \a ===' ,
  558. 'title4' : '==== \a ====' ,
  559. 'title5' : '===== \a =====' ,
  560. 'areaPreOpen' : '{{{' ,
  561. 'areaPreClose' : '}}}' ,
  562. 'areaQuoteOpen' : ' ' ,
  563. 'fontMonoOpen' : '{{{' ,
  564. 'fontMonoClose' : '}}}' ,
  565. 'fontBoldOpen' : "'''" ,
  566. 'fontBoldClose' : "'''" ,
  567. 'fontItalicOpen' : "''" ,
  568. 'fontItalicClose' : "''" ,
  569. 'fontBolditalicOpen' : "'''''" ,
  570. 'fontBolditalicClose': "'''''" ,
  571. 'fontUnderlineOpen' : "'''''" ,
  572. 'fontUnderlineClose' : "'''''" ,
  573. 'listItem' : '* ' ,
  574. 'numlistItem' : '\a. ' ,
  575. 'bar1' : '----' ,
  576. 'bar2' : '----' ,
  577. 'url' : '[\a]' ,
  578. 'urlMark' : '[\a \a]' ,
  579. 'email' : '[\a]' ,
  580. 'emailMark' : '[\a \a]' ,
  581. 'img' : '[\a]' ,
  582. 'tableLineOpen' : '||' ,
  583. 'tableCellClose' : '||' ,
  584. 'tableTitleCellClose': '||' ,
  585. }
  586. elif doctype == "mgp":
  587. tags = {
  588. 'paragraph' : '%font "normal", size 5\n' ,
  589. 'title1' : '%page\n\n\a' ,
  590. 'title2' : '%page\n\n\a' ,
  591. 'title3' : '%page\n\n\a' ,
  592. 'title4' : '%page\n\n\a' ,
  593. 'title5' : '%page\n\n\a' ,
  594. 'areaPreOpen' : '\n%font "mono"' ,
  595. 'areaPreClose' : '%font "normal"' ,
  596. 'areaQuoteOpen' : '%prefix " "' ,
  597. 'areaQuoteClose' : '%prefix " "' ,
  598. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  599. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  600. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  601. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  602. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  603. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  604. 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n' ,
  605. 'fontBolditalicClose': '\n%cont, font "normal"\n' ,
  606. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  607. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  608. 'numlistItem' : '\a. ' ,
  609. 'bar1' : '%bar "white" 5' ,
  610. 'bar2' : '%pause' ,
  611. 'url' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  612. 'urlMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  613. 'email' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  614. 'emailMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  615. 'img' : '\n%center\n%newimage "\a", left\n',
  616. 'comment' : '%% \a' ,
  617. 'EOD' : '%%EOD'
  618. }
  619. elif doctype == "man":
  620. tags = {
  621. 'paragraph' : '.P' ,
  622. 'title1' : '.SH \a' ,
  623. 'title2' : '.SS \a' ,
  624. 'title3' : '.SS \a' ,
  625. 'title4' : '.SS \a' ,
  626. 'title5' : '.SS \a' ,
  627. 'areaPreOpen' : '.nf' ,
  628. 'areaPreClose' : '.fi\n' ,
  629. 'areaQuoteOpen' : '\n' ,
  630. 'areaQuoteClose' : '\n' ,
  631. 'fontBoldOpen' : '\\fB' ,
  632. 'fontBoldClose' : '\\fP' ,
  633. 'fontItalicOpen' : '\\fI' ,
  634. 'fontItalicClose' : '\\fP' ,
  635. 'fontBolditalicOpen' : '\n.BI ' ,
  636. 'fontBolditalicClose': '\n\\&' ,
  637. 'listOpen' : '\n.nf' , # pre
  638. 'listClose' : '.fi\n' ,
  639. 'listItem' : '* ' ,
  640. 'numlistOpen' : '\n.nf' , # pre
  641. 'numlistClose' : '.fi\n' ,
  642. 'numlistItem' : '\a. ' ,
  643. 'bar1' : '\n\n' ,
  644. 'bar2' : '\n\n' ,
  645. 'url' : '\a' ,
  646. 'urlMark' : '\a (\a)' ,
  647. 'email' : '\a' ,
  648. 'emailMark' : '\a (\a)' ,
  649. 'img' : '\a' ,
  650. 'comment' : '.\\" \a'
  651. }
  652. elif doctype == "pm6":
  653. tags = {
  654. 'paragraph' : '<@Normal:>' ,
  655. 'title1' : '\n<@Title1:>\a' ,
  656. 'title2' : '\n<@Title2:>\a' ,
  657. 'title3' : '\n<@Title3:>\a' ,
  658. 'title4' : '\n<@Title4:>\a' ,
  659. 'title5' : '\n<@Title5:>\a' ,
  660. 'areaPreOpen' : '<@PreFormat:>' ,
  661. 'areaQuoteOpen' : '<@Quote:>' ,
  662. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  663. 'fontMonoClose' : '<SIZE$><FONT$>' ,
  664. 'fontBoldOpen' : '<B>' ,
  665. 'fontBoldClose' : '<P>' ,
  666. 'fontItalicOpen' : '<I>' ,
  667. 'fontItalicClose' : '<P>' ,
  668. 'fontBolditalicOpen' : '<B><I>' ,
  669. 'fontBolditalicClose': '<P>' ,
  670. 'fontUnderlineOpen' : '<U>' ,
  671. 'fontUnderlineClose' : '<P>' ,
  672. 'listOpen' : '<@Bullet:>' ,
  673. 'listItem' : '\x95 ' , # \x95 == ~U
  674. 'numlistOpen' : '<@Bullet:>' ,
  675. 'numlistItem' : '\x95 ' ,
  676. 'bar1' : '\a' ,
  677. 'bar2' : '\a' ,
  678. 'url' : '<U>\a<P>' , # underline
  679. 'urlMark' : '\a <U>\a<P>' ,
  680. 'email' : '\a' ,
  681. 'emailMark' : '\a \a' ,
  682. 'img' : '\a' ,
  683. }
  684. # create empty tags keys
  685. for key in keys:
  686. if not tags.has_key(key):
  687. tags[key] = ''
  688. else:
  689. # drawback of using re.sub() - double escape some specials
  690. # see also: 'force_re' marks on the code
  691. specials = {'1':'ntsrful', '2':'ntsrf'}
  692. specials = specials[sys.version[0]]
  693. tags[key] = re.sub(r'(\\[%s])'%specials,r'\\\1',tags[key])
  694. return tags
  695. def getRules(doctype):
  696. ret = {}
  697. allrules = [
  698. # target rules (ON/OFF)
  699. 'linkable', # target supports external links
  700. 'tableable', # target supports tables
  701. 'imgalignable', # target supports image alignment
  702. 'listcountable', # target supports numbered lists natively
  703. 'tablecellsplit', # place delimiters only *between* cells
  704. 'listnotnested', # lists cannot be nested
  705. 'quotenotnested', # quotes cannot be nested
  706. 'preareanotescaped', # don't escape specials in PRE area
  707. # target code beautify (ON/OFF)
  708. 'indentprearea', # add leading spaces to PRE area lines
  709. 'breaktablecell', # break lines after any table cell
  710. 'breaktablelineopen', # break line after opening table line
  711. 'keepquoteindent', # don't remove the leading TABs on quotes
  712. # value settings
  713. 'listmaxdepth', # maximum depth for lists
  714. ]
  715. rules = {
  716. 'txt' : {
  717. 'indentprearea':1
  718. },
  719. 'html': {
  720. 'indentprearea':1,
  721. 'linkable':1,
  722. 'imgalignable':1,
  723. 'listcountable':1,
  724. 'tableable':1,
  725. 'breaktablecell':1,
  726. 'breaktablelineopen':1,
  727. 'keepquoteindent':1
  728. },
  729. 'sgml': {
  730. 'linkable':1,
  731. 'listcountable':1,
  732. 'tableable':1,
  733. 'tablecellsplit':1,
  734. 'quotenotnested':1,
  735. 'keepquoteindent':1
  736. },
  737. 'mgp' : {
  738. },
  739. 'tex' : {
  740. 'listcountable':1,
  741. 'tableable':1,
  742. 'tablecellsplit':1,
  743. 'preareanotescaped':1,
  744. 'listmaxdepth':4
  745. },
  746. 'moin': {
  747. 'linkable':1,
  748. 'tableable':1
  749. },
  750. 'man' : {
  751. 'indentprearea':1,
  752. 'listnotnested':1
  753. },
  754. 'pm6' : {
  755. }
  756. }
  757. # populate return dictionary
  758. myrules = rules[doctype]
  759. for key in allrules : ret[key] = 0 # reset all
  760. for key in myrules.keys(): ret[key] = myrules[key] # turn ON
  761. return ret
  762. def getRegexes():
  763. regex = {
  764. # extra at end: (\[(?P<label>\w+)\])?
  765. 'title':
  766. re.compile(r'^\s*(?P<tag>={1,5})(?P<txt>[^=].*[^=])\1$'),
  767. 'areaPreOpen':
  768. re.compile(r'^---$'),
  769. 'areaPreClose':
  770. re.compile(r'^---$'),
  771. 'quote':
  772. re.compile(r'^\t+'),
  773. '1linePreOld':
  774. re.compile(r'^ {4}([^\s-])'),
  775. '1linePre':
  776. re.compile(r'^--- '),
  777. 'fontMono':
  778. re.compile(r'`([^`]+)`'),
  779. 'fontBold':
  780. re.compile(r'\*\*([^\s*].*?)\*\*'),
  781. 'fontItalic':
  782. re.compile(r'(^|[^:])//([^ /].*?)//'),
  783. 'fontUnderline':
  784. re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
  785. 'fontBolditalic':
  786. re.compile(r'\*/([^/].*?)/\*'),
  787. 'list':
  788. re.compile(r'^( *)([+-]) ([^ ])'),
  789. 'deflist':
  790. re.compile(r'^( *)(=) ([^:]+):'),
  791. 'bar':
  792. re.compile(r'^\s*([_=-]{20,})\s*$'),
  793. 'table':
  794. re.compile(r'^ *\|\|?[<:>]*\s'),
  795. 'blankline':
  796. re.compile(r'^\s*$'),
  797. 'comment':
  798. re.compile(r'^(//|%)')
  799. }
  800. # special char to place data on TAGs contents (\a == bell)
  801. regex['x'] = re.compile('\a')
  802. # %%date [ (formatting) ]
  803. regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  804. ### complicated regexes begin here ;)
  805. #
  806. # textual descriptions on --help's style: [...] is optional, | is OR
  807. ### first, some auxiliar variables
  808. #
  809. # [image.EXT]
  810. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  811. # link things
  812. urlskel = {
  813. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  814. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  815. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  816. 'pass' : r'[^ @]*', # for ftp://login:password@domain.com
  817. 'chars' : r'A-Za-z0-9%._/~:,=-', # %20(space), :80(port)
  818. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  819. 'form' : r'A-Za-z0-9/%&=+.@*_-',# .@*_-(as is)
  820. 'punct' : r'.,;:!?'
  821. }
  822. # username [ :password ] @
  823. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  824. # [ http:// ] [ username:password@ ] domain.com [ / ] [ #anchor | ?form=data ]
  825. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  826. urlskel['proto'],patt_url_login, urlskel['guess'],
  827. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  828. # filename | [ filename ] #anchor
  829. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  830. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  831. # user@domain [ ?form=data ]
  832. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  833. urlskel['login'],urlskel['form'])
  834. # saving for future use
  835. regex['_urlskel'] = urlskel
  836. ### and now the real regexes
  837. #
  838. regex['email'] = re.compile(patt_email,re.I)
  839. # email | url
  840. regex['link'] = \
  841. re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  842. # \[ label | imagetag url | email | filename \]
  843. regex['linkmark'] = \
  844. re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  845. patt_img, retxt_url, patt_email, retxt_url_local),
  846. re.L+re.I)
  847. # image
  848. regex['img'] = re.compile(patt_img, re.L+re.I)
  849. # all macros
  850. regex['macro'] = regex['date']
  851. # Settings are still in development stage - DON'T USE THIS
  852. #regex['setting'] = re.compile(r'^%\s*Document \s*(Encoding|Toc)\s*:\s*(.*)$',re.I)
  853. return regex
  854. ### END OF regex nightmares
  855. class SubareaMaster:
  856. def __init__(self) : self.x = []
  857. def __call__(self) :
  858. if not self.x: return ''
  859. return self.x[-1]
  860. def add(self, area):
  861. if not self.x or (self.x and self.x[-1] != area):
  862. self.x.append(area)
  863. Debug('subarea ++ (%s): %s' % (area,self.x), 1)
  864. def pop(self, area=None):
  865. if area and self.x[-1] == area: self.x.pop()
  866. Debug('subarea -- (%s): %s' % (area,self.x), 1)
  867. def doHeader(doctype, headdic):
  868. if not HEADER_TEMPLATE.has_key(doctype):
  869. Error("doheader: Unknow doctype '%s'"%doctype)
  870. Debug('HEADER data: %s'%headdic, 1)
  871. template = string.split(HEADER_TEMPLATE[doctype], '\n')
  872. # scan for empty dictionary keys
  873. # if found, scan template lines for that key reference
  874. # if found, remove the reference
  875. # if there aren't any other key reference on the same line, remove it
  876. for key in headdic.keys():
  877. if not headdic[key]:
  878. for line in template:
  879. if string.count(line, key):
  880. sline = string.replace(
  881. line, '%%(%s)s'%key, '')
  882. if not string.count(sline, '%(HEADER'):
  883. template.remove(line)
  884. # populate template with data
  885. template = string.join(template, '\n') % headdic
  886. # post processing
  887. if doctype == 'tex':
  888. if FLAGS['toc']:
  889. template = template + '\n' + r'\newpage\tableofcontents'
  890. if headdic['HEADER3'] == currdate:
  891. # let tex format today
  892. template = re.sub(r'\\date\{.*?}', r'\date', template)
  893. return string.split(template, '\n')
  894. def doCommentLine(doctype,txt):
  895. # the -- string ends a sgml comment :(
  896. if doctype == 'sgml':
  897. txt = string.replace(txt, '--', '\\-\\-')
  898. if TAGS['comment']:
  899. return regex['x'].sub(txt, TAGS['comment'])
  900. return ''
  901. def doFooter(doctype):
  902. ret = []
  903. typename = doctype
  904. if doctype == 'tex': typename = 'LaTeX2e'
  905. ppgd = '%s code generated by txt2tags %s (%s)'%(
  906. typename,my_version,my_url)
  907. cmdline = 'cmdline: txt2tags %s'%string.join(CMDLINE[1:], ' ')
  908. ret.append('\n'+doCommentLine(doctype,ppgd))
  909. ret.append(doCommentLine(doctype,cmdline))
  910. ret.append(TAGS['EOD'])
  911. return ret
  912. def doEscape(doctype,txt):
  913. if doctype == 'html' or doctype == 'sgml':
  914. txt = re.sub('&','&amp;',txt)
  915. txt = re.sub('<','&lt;',txt)
  916. txt = re.sub('>','&gt;',txt)
  917. if doctype == 'sgml':
  918. txt = re.sub('\xff','&yuml;',txt) # "+y
  919. elif doctype == 'pm6':
  920. txt = re.sub('<','<\#60>',txt)
  921. elif doctype == 'mgp':
  922. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  923. #txt = re.sub('^%([^%])','%prefix ""\n %\n%cont, prefix " "\n\\1',txt)
  924. elif doctype == 'man':
  925. txt = re.sub('^\.', ' .',txt) # command ID
  926. txt = doEscapeEscapechar(txt)
  927. elif doctype == 'tex':
  928. txt = string.replace(txt, '\\', r'\verb!\!')
  929. txt = string.replace(txt, '~', r'\verb!~!')
  930. txt = string.replace(txt, '^', r'\verb!^!')
  931. txt = re.sub('([#$&%{}])', r'\\\1', txt)
  932. # TIP the _ is escaped at end
  933. return txt
  934. def doFinalEscape(doctype, txt):
  935. if doctype == 'pm6' : txt = string.replace(txt, r'\<',r'<\#92><')
  936. elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
  937. elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
  938. elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  939. return txt
  940. def doEscapeEscapechar(txt):
  941. return string.replace(txt, '\\', '\\\\')
  942. def addLineBreaks(list):
  943. "use LB to respect sys.platform"
  944. ret = []
  945. for line in list:
  946. line = string.replace(line,'\n',LB) # embedded \n's
  947. ret.append(line+LB) # add final line break
  948. return ret
  949. def doPreLine(doctype,line):
  950. "Parsing procedures for preformatted (verbatim) lines"
  951. if not rules['preareanotescaped']: line = doEscape(doctype,line)
  952. if rules['indentprearea']: line = ' '+line
  953. if doctype == 'pm6': line = doFinalEscape(doctype, line)
  954. return line
  955. def doCloseTable(doctype):
  956. global subarea, tableborder
  957. ret = ''
  958. if rules['tableable']:
  959. if doctype == 'tex' and tableborder:
  960. ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
  961. else:
  962. ret = TAGS['tableClose']+'\n'
  963. else:
  964. ret = TAGS['areaPreClose']
  965. tableborder = 0
  966. subarea.pop('table')
  967. return ret
  968. def doCloseQuote(howmany=None):
  969. global quotedepth
  970. ret = []
  971. if not howmany: howmany = len(quotedepth)
  972. for i in range(howmany):
  973. quotedepth.pop()
  974. #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
  975. ret.append(TAGS['areaQuoteClose'])
  976. if not quotedepth: subarea.pop('quote')
  977. return string.join(ret,'\n')
  978. def doCloseList(howmany=None):
  979. global listindent, listids
  980. ret = []
  981. if not howmany: howmany = len(listindent)
  982. for i in range(howmany):
  983. if listids[-1] == '-': tag = TAGS['listClose']
  984. elif listids[-1] == '+': tag = TAGS['numlistClose']
  985. elif listids[-1] == '=': tag = TAGS['deflistClose']
  986. if not tag: tag = TAGS['listClose'] # default
  987. if tag:
  988. # unnested lists are only closed at mother-list
  989. if rules['listnotnested']:
  990. if len(listindent) == 1:
  991. ret.append(tag)
  992. else:
  993. ret.append(listindent[-1]+tag)
  994. del listindent[-1]
  995. del listids[-1]
  996. if not listindent: subarea.pop('list')
  997. return string.join(ret,'\n')
  998. def beautify_me(name, doctype, line):
  999. "where name is: bold, italic, underline or bolditalic"
  1000. name = 'font%s' % string.capitalize(name)
  1001. open = TAGS['%sOpen'%name]
  1002. close = TAGS['%sClose'%name]
  1003. txt = r'%s\1%s'%(open, close)
  1004. if name == 'fontItalic':
  1005. txt = r'\1%s\2%s'%(open, close)
  1006. line = regex[name].sub(txt,line)
  1007. return line
  1008. def get_tagged_link(doctype, label, url):
  1009. ret = ''
  1010. # set link type
  1011. if regex['email'].match(url):
  1012. linktype = 'email'
  1013. else:
  1014. linktype = 'url';
  1015. # adding protocol to guessed link
  1016. guessurl = ''
  1017. if linktype == 'url' and \
  1018. re.match(regex['_urlskel']['guess'], url):
  1019. if url[0] == 'w': guessurl = 'http://' +url
  1020. else : guessurl = 'ftp://' +url
  1021. # not link aware targets -> protocol is useless
  1022. if not rules['linkable']: guessurl = ''
  1023. # escape specials from TEXT parts
  1024. label = doEscape(doctype,label)
  1025. if not rules['linkable']:
  1026. if doctype == 'tex':
  1027. url = re.sub('^#', '\#', url) # ugly, but compile
  1028. else:
  1029. url = doEscape(doctype,url)
  1030. # simple link (not guessed)
  1031. if not label and not guessurl:
  1032. if FLAGS['maskemail'] and linktype == 'email':
  1033. # do the email mask feature (no TAGs, just text)
  1034. url = string.replace(url,'@',' (a) ')
  1035. url = string.replace(url,'.',' ')
  1036. url = "<%s>" % url
  1037. if rules['linkable']: url = doEscape(url)
  1038. ret = url
  1039. else:
  1040. # just add link data to tag
  1041. tag = re.sub('.*', TAGS[linktype], '') #force_re
  1042. ret = regex['x'].sub(url,tag)
  1043. # named link or guessed simple link
  1044. else:
  1045. # adjusts for guessed link
  1046. if not label: label = url # no protocol
  1047. if guessurl : url = guessurl # with protocol
  1048. # handle \ on link label
  1049. label = doEscapeEscapechar(label)
  1050. # putting data on the right appearance order
  1051. if rules['linkable']:
  1052. urlorder = [url, label] # link before label
  1053. else:
  1054. urlorder = [label, url] # label before link
  1055. # get tag
  1056. ret = re.sub('.*', TAGS["%sMark"%linktype], '') #force_re
  1057. # add link data to tag (replace \a's)
  1058. for data in urlorder:
  1059. ret = regex['x'].sub(data,ret,1)
  1060. return ret
  1061. def get_image_align(line):
  1062. align = ''
  1063. line = string.strip(line)
  1064. m = regex['img'].search(line)
  1065. ini = m.start() ; head = 0
  1066. end = m.end() ; tail = len(line)
  1067. align = 'center' # default align # ^text +img +text$
  1068. if ini == head and end == tail: align = 'para' # ^img$
  1069. elif ini == head: align = 'left' # ^img + text$
  1070. elif end == tail: align = 'right' # ^text + img$
  1071. return align
  1072. def get_table_prop(line):
  1073. # default table proprierties
  1074. ret = {'border': 0, 'type': '|', 'header':0, 'cells':[]}
  1075. # strip and del leading table mark |
  1076. line = string.strip(line)
  1077. line = line[1:]
  1078. # detect (and delete) header mark
  1079. if line[0] == '|':
  1080. ret['header'] = 1
  1081. line = line[1:]
  1082. # detect (and delete) table ID (pipe-made is default)
  1083. if line[0] == '\t':
  1084. ret['type'] = '\t'
  1085. ret['border'] = 1
  1086. line = line[1:]
  1087. # detect (and delete) border mark
  1088. if line[-1] == '|':
  1089. ret['border'] = 1
  1090. line = line[:-1]
  1091. # split cells
  1092. # TODO v1.3: take \t\|?| out
  1093. ret['cells'] = re.split(r'\t\|?| \| ', line)
  1094. Debug('Table Prop: %s' % ret, 1)
  1095. return ret
  1096. #TODO if ' | ' table cell is center align
  1097. def tag_table_cells(table, doctype):
  1098. ret = ''
  1099. # plain cell
  1100. open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
  1101. # title cell
  1102. if table['header']:
  1103. open = TAGS['tableTitleCellOpen']
  1104. close = TAGS['tableTitleCellClose']
  1105. # should we break the line?
  1106. if rules['breaktablecell']: close = close+'\n'
  1107. # here we go
  1108. while table['cells']:
  1109. cel = table['cells'].pop(0)
  1110. if not cel and doctype == 'html':
  1111. cel = '&nbsp;'
  1112. # last cell gotchas
  1113. if not table['cells']:
  1114. # don't need cell separator
  1115. if rules['tablecellsplit']: close = ''
  1116. # close beautifier for last title cell
  1117. if doctype == 'tex' and table['header']: close = '}'
  1118. newcell = open + string.strip(cel) + close
  1119. newcell = re.sub('.*', newcell, '') #force_re
  1120. ret = ret + newcell
  1121. return ret
  1122. ################################################################################
  1123. ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
  1124. ################################################################################
  1125. def convert(inlines, doctype):
  1126. # global vars for doClose*()
  1127. global TAGS, regex, rules, quotedepth, listindent, listids
  1128. global subarea, tableborder
  1129. global has_obsolete, has_fixed
  1130. TAGS = getTags(doctype)
  1131. rules = getRules(doctype)
  1132. regex = getRegexes()
  1133. # the defaults
  1134. linkmask = '@@_link_@@'
  1135. monomask = '@@_mono_@@'
  1136. macromask = '@@_macro_@@'
  1137. AREA = NewArea('head',0) # then conf, then body
  1138. subarea = SubareaMaster()
  1139. HEADERS = { 'HEADER1': '-NO TITLE-', 'HEADER2':'', 'HEADER3':'' }
  1140. # SETTINGS = {}
  1141. ret = []
  1142. fixedinfile = []
  1143. toclist = []
  1144. header = []
  1145. f_tt = 0
  1146. listindent = []
  1147. listids = []
  1148. listcount = []
  1149. titlecount = ['',0,0,0,0,0]
  1150. f_lastblank = 0
  1151. holdspace = ''
  1152. listholdspace = ''
  1153. quotedepth = []
  1154. tableborder = 0
  1155. tablealign = []
  1156. has_obsolete = has_fixed = 0
  1157. if outfile != pipefileid:
  1158. if not FLAGS['gui'] and not FLAGS['fixme']:
  1159. print "--- %s..."%doctype
  1160. # let's mark it up!
  1161. linenr = 0
  1162. for lineref in range(len(inlines)):
  1163. skip_continue = 0
  1164. linkbank = []
  1165. monobank = []
  1166. macrobank = []
  1167. linenr = lineref +1
  1168. untouchedline = inlines[lineref]
  1169. line = string.rstrip(untouchedline)
  1170. # save line to the 'fixed' buffer
  1171. fixedinfile.append(untouchedline)
  1172. Debug('LINE %04d: %s' % (linenr,repr(line)), 1) # for heavy debug
  1173. # detect if head section is over
  1174. #TIP 'not line' depends on previous line.rstrip()
  1175. if (linenr == 4 and AREA == 'head') or \
  1176. (linenr == 1 and not line):
  1177. AREA = NewArea('conf',linenr)
  1178. if not FLAGS['noheaders']:
  1179. header = doHeader(doctype,HEADERS)
  1180. # we need (not really) to mark each paragraph
  1181. #TODO check if this is really needed
  1182. if doctype == 'pm6' and f_lastblank:
  1183. if f_tt or AREA == 'head' or listindent:
  1184. holdspace = ''
  1185. else:
  1186. holdspace = TAGS['paragraph']+'\n'
  1187. # any NOT table line, closes an open table
  1188. if subarea() == 'table' and not regex['table'].search(line):
  1189. ret.append(doCloseTable(doctype))
  1190. #---------------------[ PRE formatted ]----------------------
  1191. #TIP we'll never support beautifiers inside pre-formatted
  1192. # we're already on a PRE area
  1193. if f_tt:
  1194. # closing PRE
  1195. if regex['areaPreClose'].search(line):
  1196. if doctype != 'pm6':
  1197. ret.append(TAGS['areaPreClose'])
  1198. f_tt = 0
  1199. continue
  1200. # normal PRE-inside line
  1201. line = doPreLine(doctype, line)
  1202. ret.append(line)
  1203. continue
  1204. # detecting PRE area init
  1205. if regex['areaPreOpen'].search(line):
  1206. ret.append(TAGS['areaPreOpen'])
  1207. f_lastblank = 0
  1208. f_tt = 1
  1209. continue
  1210. # one line PRE-formatted text
  1211. if regex['1linePre'].search(line):
  1212. f_lastblank = 0
  1213. line = regex['1linePre'].sub('',line)
  1214. line = doPreLine(doctype, line)
  1215. t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
  1216. ret.append('%s\n%s\n%s'%(t1,line,t2))
  1217. continue
  1218. #---------------------[ blank lines ]-----------------------
  1219. #TODO "holdspace" to save <p> to not show in closelist
  1220. if regex['blankline'].search(line):
  1221. # closing all open quotes
  1222. if quotedepth:
  1223. ret.append(doCloseQuote())
  1224. # closing all open lists
  1225. if f_lastblank: # 2nd consecutive blank line
  1226. if listindent: # closes list (if any)
  1227. ret.append(doCloseList())
  1228. holdspace = ''
  1229. continue # consecutive blanks are trash
  1230. # normal blank line
  1231. if doctype != 'pm6' and AREA == 'body':
  1232. # paragraph (if any) is wanted inside lists also
  1233. if listindent:
  1234. para = TAGS['paragraph'] + '\n'
  1235. holdspace = holdspace + para
  1236. elif doctype == 'html':
  1237. ret.append(TAGS['paragraph'])
  1238. # sgml: quote close tag must not be \n\n</quote>
  1239. elif doctype == 'sgml' and quotedepth:
  1240. skip_continue = 1
  1241. # otherwise we just print a blank line
  1242. else:
  1243. ret.append('')
  1244. f_lastblank = 1
  1245. if not skip_continue: continue
  1246. else:
  1247. f_lastblank = 0 # reset blank status
  1248. #---------------------[ comments ]-----------------------
  1249. # just skip them (if not macro or setting)
  1250. if regex['comment'].search(line) and not regex['date'].match(line):
  1251. ### Still in development stage
  1252. # # detect settings
  1253. # if regex['setting'].search(line):
  1254. # if AREA == 'conf':
  1255. # m = regex['setting'].search(line)
  1256. # name, val = m.group(1), m.group(2)
  1257. # SETTINGS[string.lower(name)] = string.strip(val)
  1258. # Debug("Found Setting '%s', value '%s'"%(name,val),1,linenr)
  1259. # continue
  1260. # obsoleted comment format
  1261. if line[0] == '/':
  1262. # fixes it
  1263. if FLAGS['fixme']:
  1264. # discard original line, save new
  1265. fixedinfile.pop()
  1266. fixedinfile.append(re.sub('^//','%',untouchedline))
  1267. Fixed(linenr, '// comment line', '% comment line')
  1268. # just show a warning message
  1269. else:
  1270. old = '// as the comment line string'
  1271. new = '% as the new comment line char'
  1272. Obsoleted(linenr, old, new, '1.3')
  1273. f_lastblank = 1
  1274. continue
  1275. #---------------------[ BODY detect ]-----------------------
  1276. ### if got here, its a header or a valid line
  1277. if AREA == 'conf':
  1278. # oops, not header, so we're now on document BODY
  1279. AREA = NewArea('body', linenr)
  1280. # so, let's print the opening paragraph
  1281. if doctype != 'pm6':
  1282. ret.append(TAGS['paragraph'])
  1283. #---------------------[ Title ]-----------------------
  1284. # man: - should not be escaped, \ turns to \\\\
  1285. #TODO set next blank and set f_lastblank or f_lasttitle
  1286. if regex['title'].search(line) and not listindent and AREA == 'body':
  1287. line = doEscape(doctype,line)
  1288. # double escape escape char
  1289. if doctype == 'man':
  1290. line = doEscapeEscapechar(line)
  1291. m = regex['title'].search(line)
  1292. tag = m.group('tag')
  1293. level = len(tag)
  1294. tag = TAGS['title%s'%level]
  1295. txt = string.strip(m.group('txt'))
  1296. if doctype == 'sgml':
  1297. txt = re.sub(r'\[', r'&lsqb;', txt)
  1298. if FLAGS['enumtitle']: ### numbered title
  1299. id = '' ; n = level #
  1300. titlecount[n] = titlecount[n] +1 # add count
  1301. if n < len(titlecount)-1: # reset sublevels count
  1302. for i in range(n+1, len(titlecount)): titlecount[i] = 0
  1303. for i in range(n): # compose id from hierarchy
  1304. id = "%s%d."%(id,titlecount[i+1])
  1305. idtxt = "%s %s"%(id, txt) # add id to title
  1306. else:
  1307. idtxt = txt
  1308. anchorid = 'toc%d'%(len(toclist)+1)
  1309. if TAGS['anchor'] and FLAGS['toc']:
  1310. ret.append(regex['x'].sub(anchorid,TAGS['anchor']))
  1311. # escape to handle \ on title
  1312. idtxt = doEscapeEscapechar(idtxt)
  1313. line = regex['title'].sub(tag,line)
  1314. ret.append(regex['x'].sub(idtxt,line))
  1315. # let's do some TOC!
  1316. if TAGS['anchor']:
  1317. # tocitemid = '#toc%d'%(len(toclist)+1)
  1318. # TOC more readable with master topics not linked at number
  1319. # stoled idea from windows .CHM files (help system)
  1320. if FLAGS['enumtitle'] and level == 1:
  1321. tocitem = '%s+ [%s #%s]'%(' '*level,txt,anchorid)
  1322. else:
  1323. tocitem = '%s- [%s #%s]'%(' '*level,idtxt,anchorid)
  1324. else:
  1325. tocitem = '%s- %s'%(' '*level,idtxt)
  1326. if doctype in ['txt', 'man']:
  1327. tocitem = '%s%s' %(' '*level,idtxt)
  1328. if level <= 3: toclist.append(tocitem) # max toc level: 3
  1329. # add "underline" to text titles
  1330. if doctype == 'txt':
  1331. ret.append(regex['x'].sub('='*len(idtxt),tag))
  1332. continue
  1333. #TODO! labeltxt = ''
  1334. # label = m.group('label')
  1335. # if label: labeltxt = '<label id="%s">' %label
  1336. #---------------------[ apply masks ]-----------------------
  1337. ### protect important structures from escaping and formatting
  1338. # protect pre-formatted font text
  1339. while regex['fontMono'].search(line):
  1340. t

Large files files are truncated, but you can click here to view the full file