PageRenderTime 55ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/old/txt2tags-1.4.py

http://txt2tags.googlecode.com/
Python | 2157 lines | 2133 code | 5 blank | 19 comment | 7 complexity | cfa33d77fcd57ac3df3a16e2a9743c25 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002 Aurélio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. # the code is getting better, but is still ugly - stay tunned
  20. import re, string, os, sys, getopt, traceback
  21. from time import strftime,time,localtime
  22. my_url = 'http://txt2tags.sf.net'
  23. my_email = 'aurelio@verde666.org'
  24. my_version = '1.4'
  25. DEBUG = 0 # do not edit here, please use --debug
  26. targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
  27. FLAGS = {'noheaders':0,'enumtitle':0,'maskemail':0, 'stdout':0,
  28. 'toconly' :0,'toc' :0,'gui' :0}
  29. OPTIONS = {'toclevel' :3,'style' :''}
  30. regex = {}
  31. TAGS = {}
  32. rules = {}
  33. CMDLINE = ''
  34. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  35. splitlevel = '' ; lang = 'english'
  36. doctype = outfile = ''
  37. pipefileid = '-'
  38. #my_version = my_version + '-dev' + currdate[4:] # devel!
  39. # global vars for doClose*()
  40. quotedepth = []
  41. listindent = []
  42. listids = []
  43. subarea = None
  44. tableborder = 0
  45. versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
  46. usage = """
  47. %s
  48. usage: txt2tags -t <type> [OPTIONS] file.t2t
  49. txt2tags -t html -s <split level> -l <lang> file.t2t
  50. -t, --type set target document type. actually supported:
  51. %s
  52. --stdout send output to STDOUT instead writing to a file
  53. --noheaders suppress header, title and footer information
  54. --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
  55. --maskemail hide email from spam robots. x@y.z turns <x (a) y z>
  56. --toc add TOC (Table of Contents) to target document
  57. --toconly print document TOC and exit
  58. --toclevel N set maximum TOC level (deepness) to N
  59. --gui invoke Graphical Tk Interface
  60. --style FILE use FILE as the document style (like Html CSS)
  61. -h, --help print this help information and exit
  62. -V, --version print program version and exit
  63. extra options for HTML target (needs sgml-tools):
  64. --split split documents. values: 0, 1, 2 (default 0)
  65. --lang document language (default english)
  66. If input file is '-', reads from STDIN. Output is saved to
  67. 'file.<type>' file, unless --stdout is specified.
  68. """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
  69. # here is all the target's templates
  70. # you may edit them to fit your needs
  71. # - the %(HEADERn)s strings represent the Header lines
  72. # - use %% to represent a literal %
  73. #
  74. HEADER_TEMPLATE = {
  75. 'txt': """\
  76. %(HEADER1)s
  77. %(HEADER2)s
  78. %(HEADER3)s
  79. """,
  80. 'sgml': """\
  81. <!doctype linuxdoc system>
  82. <article>
  83. <title>%(HEADER1)s
  84. <author>%(HEADER2)s
  85. <date>%(HEADER3)s
  86. """,
  87. 'html': """\
  88. <HTML>
  89. <HEAD>
  90. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  91. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  92. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  93. <TITLE>%(HEADER1)s</TITLE>
  94. </HEAD><BODY BGCOLOR="white" TEXT="black">
  95. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  96. <FONT SIZE=4>
  97. <I>%(HEADER2)s</I><BR>
  98. %(HEADER3)s
  99. </FONT></CENTER>
  100. """,
  101. # TODO man section 1 is hardcoded...
  102. 'man': """\
  103. .TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
  104. """,
  105. # TODO style to <HR>
  106. 'pm6': """\
  107. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  108. ><@Normal=
  109. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  110. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  111. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  112. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  113. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  114. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  115. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  116. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  117. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  118. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  119. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  120. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  121. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  122. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  123. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  124. ><@Title4=<@-PARENT "Title3">
  125. ><@Title5=<@-PARENT "Title3">
  126. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  127. %(HEADER1)s
  128. %(HEADER2)s
  129. %(HEADER3)s
  130. """,
  131. 'mgp': """\
  132. #!/usr/X11R6/bin/mgp -t 90
  133. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  134. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  135. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  136. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  137. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  138. %%default 1 size 5
  139. %%default 2 size 8, fore "yellow", font "normal-b", center
  140. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  141. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  142. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  143. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  144. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  145. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  146. %%%%------------------------- end of headers -----------------------------
  147. %%page
  148. %%size 10, center, fore "yellow"
  149. %(HEADER1)s
  150. %%font "normal-i", size 6, fore "white", center
  151. %(HEADER2)s
  152. %%font "mono", size 7, center
  153. %(HEADER3)s
  154. """,
  155. # TODO please, improve me!
  156. 'moin': """\
  157. %(HEADER1)s
  158. %(HEADER2)s
  159. %(HEADER3)s
  160. """,
  161. 'tex': \
  162. r"""\documentclass[11pt,a4paper]{article}
  163. \usepackage{amsfonts,amssymb,graphicx,url}
  164. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  165. \pagestyle{plain} %% do page numbering ('empty' turns off)
  166. \frenchspacing %% no aditional spaces after periods
  167. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  168. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  169. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  170. \title{%(HEADER1)s}
  171. \author{%(HEADER2)s}
  172. \begin{document}
  173. \date{%(HEADER3)s}
  174. \maketitle
  175. """
  176. }
  177. #-----------------------------------------------------------------------
  178. def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
  179. def Error(msg): print "ERROR: %s"%msg ; sys.exit()
  180. def Debug(msg,i=0,linenr=None):
  181. if i > DEBUG: return
  182. if linenr is not None:
  183. print "(%d) %04d:%s"%(i,linenr,msg)
  184. else:
  185. print "(%d) %s"%(i,msg)
  186. def Readfile(file):
  187. if file == '-':
  188. try: data = sys.stdin.readlines()
  189. except: Error('You must feed me with data on STDIN!')
  190. else:
  191. try: f = open(file); data = f.readlines() ; f.close()
  192. except: Error("Cannot read file:\n %s"%file)
  193. return data
  194. def Savefile(file, contents):
  195. try: f = open(file, 'w')
  196. except: Error("Cannot open file for writing:\n %s"%file)
  197. if type(contents) == type([]): doit = f.writelines
  198. else: doit = f.write
  199. doit(contents) ; f.close()
  200. def NewArea(new, linenr):
  201. if new not in ['head', 'conf', 'body']:
  202. Error("Invalid new AREA '%s' on line '%s'"%(new,linenr))
  203. Debug('NEW AREA: %s'%new, 1, linenr)
  204. return new
  205. def reset_flags():
  206. global FLAGS
  207. for flag in FLAGS.keys(): FLAGS[flag] = 0
  208. def set_outfile_name(infile, doctype):
  209. "dirname is the same for {in,out}file"
  210. if not infile: return
  211. if infile == pipefileid or FLAGS['toconly'] or FLAGS['stdout']:
  212. outfile = pipefileid
  213. else:
  214. outfile = "%s.%s"%(re.sub('\.(txt|t2t)$','',infile), doctype)
  215. Debug(" infile: '%s'"% infile, 1)
  216. Debug("outfile: '%s'"%outfile, 1)
  217. return outfile
  218. def finish_him(outlist, outfile):
  219. "writing output to screen or file"
  220. if outfile == pipefileid:
  221. for line in outlist: print line
  222. else:
  223. Savefile(outfile, addLineBreaks(outlist))
  224. if not FLAGS['gui']: print 'wrote %s'%(outfile)
  225. if splitlevel:
  226. print "--- html..."
  227. os.system('sgml2html --language=%s --split=%s %s'%(
  228. lang,splitlevel,outfile))
  229. def ParseCmdline(cmdline=sys.argv):
  230. "return a dic with all options:value found"
  231. global CMDLINE ; CMDLINE = cmdline # save for dofooter()
  232. Debug("cmdline: %s"%cmdline, 1)
  233. options = {'infile': '', 'infiles':''}
  234. # get cmdline options
  235. longopt = ['help','version','type=','split=','lang='] +FLAGS.keys()
  236. longopt = longopt + map(lambda x:x+'=', OPTIONS.keys()) # add =
  237. try: (opt, args) = getopt.getopt(cmdline[1:], 'hVt:', longopt)
  238. except getopt.GetoptError:
  239. Error('Bad option or missing argument (try --help)')
  240. # get infile, if any
  241. if args:
  242. options['infile'] = args[0]
  243. options['infiles'] = args # multi
  244. for name,val in opt:
  245. # parse information options
  246. if name in ['-h','--help' ]: Quit(usage)
  247. elif name in ['-V','--version']: Quit(versionstr)
  248. # parse short/long options
  249. elif name in ['-t','--type']:
  250. options['doctype'] = val
  251. continue
  252. # just long options
  253. options[name[2:]] = val # del --
  254. Debug("cmdline arguments: %s"%options, 1)
  255. return options
  256. def ParseCmdlineOptions(optdic):
  257. "set vars and flags according to options dic"
  258. global FLAGS, OPTIONS, splitlevel, lang
  259. # store flags
  260. myflags = [] # for debug msg
  261. for flag in FLAGS.keys():
  262. if optdic.has_key(flag):
  263. FLAGS[flag] = 1
  264. myflags.append(flag)
  265. # and now options
  266. for opt in OPTIONS.keys():
  267. opttype = type(OPTIONS[opt])
  268. val = optdic.get(opt)
  269. if val:
  270. if opttype == type(9):
  271. try: val = int(val)
  272. except: Error('--%s value must be a number'%opt)
  273. OPTIONS[opt] = val
  274. # finally, the most important vars
  275. doctype = optdic.get('doctype')
  276. infile = optdic.get('infile')
  277. splitlevel = optdic.get('split')
  278. lang = optdic.get('lang')
  279. Debug("cmdline flags: %s"%string.join(myflags,', '), 1)
  280. Debug("cmdline options: %s"%OPTIONS, 1)
  281. if not doctype and FLAGS['toconly']: doctype = 'txt' # toconly dft type
  282. if not infile or not doctype: Quit(usage, 1) # no filename/doctype
  283. # sanity check: validate target type
  284. if not targets.count(doctype):
  285. Error("Invalid document type '%s' (try --help)"%(doctype))
  286. outfile = set_outfile_name(infile, doctype)
  287. # sanity check: validate split level
  288. if doctype != 'html': splitlevel = '' # only valid for HTML target
  289. if splitlevel:
  290. # checkings
  291. if outfile == pipefileid:
  292. Error('You need to provide a FILE (not STDIN) '
  293. 'when using --split')
  294. if splitlevel[0] not in '012':
  295. Error('Option --split must be 0, 1 or 2')
  296. # check for sgml-tools
  297. #TODO how to test (in a clever way) if an executable is in path?
  298. #TODO os.system() return code? sgml2html w/out --help exit 0?
  299. #TODO bah! implement sgml2html split natively and we're done
  300. # Error("Sorry, you must have 'sgml2html' to use --split")
  301. # set things
  302. FLAGS['stdout'] = 0 # no --stdout
  303. doctype = 'sgml' # 1st do a sgml, then sgml2html
  304. outfile = set_outfile_name(infile, doctype)
  305. # sanity check: source loss!
  306. if infile != pipefileid and infile == outfile:
  307. Error("SUICIDE WARNING!!! (try --stdout)\n source"+\
  308. " and target files has the same name: %s"%outfile)
  309. ### yes, i've got my sample.t2t file deleted before add this test... :/
  310. return infile,outfile,doctype
  311. #TODO splitlevel, lang
  312. #---End of ParseCmdlineOptions
  313. def toc_master(doctype, header, doc, toc):
  314. "decide to include TOC or not on the outlist"
  315. # deal with the TOC options
  316. if FLAGS['toc'] or FLAGS['toconly']:
  317. # format TOC lines
  318. ### here we do toc as a valid t2t marked text (list type)
  319. FLAGS['noheaders'] = 1
  320. x,y,toc = convert(['']+toc+['',''], doctype)
  321. # TOC between bars (not for --toconly)
  322. if FLAGS['toc']:
  323. para = TAGS['paragraph']
  324. tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
  325. toc = tocbar + toc + tocbar
  326. if FLAGS['toconly']: header = doc = []
  327. else:
  328. toc = []
  329. # TOC is a tag
  330. if TAGS['TOC'] and not FLAGS['toconly']:
  331. toc = []
  332. return header + toc + doc
  333. def doitall(cmdlinedic):
  334. global outfile
  335. infile,outfile,doctype = ParseCmdlineOptions(cmdlinedic)
  336. header,toc,doc = convert(Readfile(infile), doctype)
  337. outlist = toc_master(doctype,header,doc,toc)
  338. return doctype, outfile, outlist
  339. # set the Line Break across platforms
  340. LB = '\n' # default
  341. if sys.platform[:3] == 'win': LB = '\r\n'
  342. #elif sys.platform[:3] == 'cyg': LB = '\r\n' # not sure if it's best :(
  343. elif sys.platform[:3] == 'mac': LB = '\r'
  344. def escapePythonSpecials(txt):
  345. # drawback of using re.sub() - double escape some specials like \n
  346. # see also: 'force_re' marks on the code
  347. if sys.version[0] == '1':
  348. return re.sub(r'(\\[ntsrfvul])',r'\\\1',txt)
  349. else:
  350. return re.sub(r'(\\[ntsrfv])' ,r'\\\1',txt)
  351. def getTags(doctype):
  352. keys = [
  353. 'paragraph','title1','title2','title3','title4','title5',
  354. 'areaPreOpen','areaPreClose',
  355. 'areaQuoteOpen','areaQuoteClose',
  356. 'fontMonoOpen','fontMonoClose',
  357. 'fontBoldOpen','fontBoldClose',
  358. 'fontItalicOpen','fontItalicClose',
  359. 'fontBolditalicOpen','fontBolditalicClose',
  360. 'fontUnderlineOpen','fontUnderlineClose',
  361. 'listOpen','listClose','listItem',
  362. 'numlistOpen','numlistClose','numlistItem',
  363. 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
  364. 'bar1','bar2',
  365. 'url','urlMark','email','emailMark',
  366. 'img','imgsolo',
  367. 'tableOpen','tableClose','tableLineOpen','tableLineClose',
  368. 'tableCellOpen','tableCellClose',
  369. 'tableTitleCellOpen','tableTitleCellClose',
  370. 'anchor','comment','TOC',
  371. 'EOD'
  372. ]
  373. if doctype == "txt":
  374. tags = {
  375. 'title1' : ' \a' ,
  376. 'title2' : '\t\a' ,
  377. 'title3' : '\t\t\a' ,
  378. 'title4' : '\t\t\t\a' ,
  379. 'title5' : '\t\t\t\t\a',
  380. 'areaQuoteOpen' : ' ' ,
  381. 'listItem' : '- ' ,
  382. 'numlistItem' : '\a. ' ,
  383. 'bar1' : '\a' ,
  384. 'bar2' : '\a' ,
  385. 'url' : '\a' ,
  386. 'urlMark' : '\a (\a)' ,
  387. 'email' : '\a' ,
  388. 'emailMark' : '\a (\a)' ,
  389. 'img' : '[\a]' ,
  390. }
  391. elif doctype == "html":
  392. tags = {
  393. 'paragraph' : '<P>' ,
  394. 'title1' : '<H1>\a</H1>' ,
  395. 'title2' : '<H2>\a</H2>' ,
  396. 'title3' : '<H3>\a</H3>' ,
  397. 'title4' : '<H4>\a</H4>' ,
  398. 'title5' : '<H5>\a</H5>' ,
  399. 'areaPreOpen' : '<PRE>' ,
  400. 'areaPreClose' : '</PRE>' ,
  401. 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
  402. 'areaQuoteClose' : '</BLOCKQUOTE>' ,
  403. 'fontMonoOpen' : '<CODE>' ,
  404. 'fontMonoClose' : '</CODE>' ,
  405. 'fontBoldOpen' : '<B>' ,
  406. 'fontBoldClose' : '</B>' ,
  407. 'fontItalicOpen' : '<I>' ,
  408. 'fontItalicClose' : '</I>' ,
  409. 'fontBolditalicOpen' : '<B><I>' ,
  410. 'fontBolditalicClose' : '</I></B>' ,
  411. 'fontUnderlineOpen' : '<U>' ,
  412. 'fontUnderlineClose' : '</U>' ,
  413. 'listOpen' : '<UL>' ,
  414. 'listClose' : '</UL>' ,
  415. 'listItem' : '<LI>' ,
  416. 'numlistOpen' : '<OL>' ,
  417. 'numlistClose' : '</OL>' ,
  418. 'numlistItem' : '<LI>' ,
  419. 'deflistOpen' : '<DL>' ,
  420. 'deflistClose' : '</DL>' ,
  421. 'deflistItem1' : '<DT>\a</DT>' ,
  422. 'deflistItem2' : '<DD>' ,
  423. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  424. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  425. 'url' : '<A HREF="\a">\a</A>' ,
  426. 'urlMark' : '<A HREF="\a">\a</A>' ,
  427. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  428. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  429. 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
  430. 'imgsolo' : '<P ALIGN="center">\a</P>' ,
  431. 'tableOpen' : '<table\a cellpadding=4 border=\a>',
  432. 'tableClose' : '</table>' ,
  433. 'tableLineOpen' : '<tr>' ,
  434. 'tableLineClose' : '</tr>' ,
  435. 'tableCellOpen' : '<td\a>' ,
  436. 'tableCellClose' : '</td>' ,
  437. 'tableTitleCellOpen' : '<th>' ,
  438. 'tableTitleCellClose' : '</th>' ,
  439. 'tableAlignLeft' : '' ,
  440. 'tableAlignCenter' : ' align="center"',
  441. 'tableCellAlignLeft' : '' ,
  442. 'tableCellAlignRight' : ' align="right"' ,
  443. 'tableCellAlignCenter': ' align="center"',
  444. 'anchor' : '<a name="\a">' ,
  445. 'comment' : '<!-- \a -->' ,
  446. 'EOD' : '</BODY></HTML>'
  447. }
  448. elif doctype == "sgml":
  449. tags = {
  450. 'paragraph' : '<p>' ,
  451. 'title1' : '<sect>\a<p>' ,
  452. 'title2' : '<sect1>\a<p>' ,
  453. 'title3' : '<sect2>\a<p>' ,
  454. 'title4' : '<sect3>\a<p>' ,
  455. 'title5' : '<sect4>\a<p>' ,
  456. 'areaPreOpen' : '<tscreen><verb>' ,
  457. 'areaPreClose' : '</verb></tscreen>' ,
  458. 'areaQuoteOpen' : '<quote>' ,
  459. 'areaQuoteClose' : '</quote>' ,
  460. 'fontMonoOpen' : '<tt>' ,
  461. 'fontMonoClose' : '</tt>' ,
  462. 'fontBoldOpen' : '<bf>' ,
  463. 'fontBoldClose' : '</bf>' ,
  464. 'fontItalicOpen' : '<em>' ,
  465. 'fontItalicClose' : '</em>' ,
  466. 'fontBolditalicOpen' : '<bf><em>' ,
  467. 'fontBolditalicClose' : '</em></bf>' ,
  468. 'fontUnderlineOpen' : '<bf><em>' ,
  469. 'fontUnderlineClose' : '</em></bf>' ,
  470. 'listOpen' : '<itemize>' ,
  471. 'listClose' : '</itemize>' ,
  472. 'listItem' : '<item>' ,
  473. 'numlistOpen' : '<enum>' ,
  474. 'numlistClose' : '</enum>' ,
  475. 'numlistItem' : '<item>' ,
  476. 'deflistOpen' : '<descrip>' ,
  477. 'deflistClose' : '</descrip>' ,
  478. 'deflistItem1' : '<tag>\a</tag>' ,
  479. 'bar1' : '<!-- \a -->' ,
  480. 'bar2' : '<!-- \a -->' ,
  481. 'url' : '<htmlurl url="\a" name="\a">' ,
  482. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  483. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  484. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  485. 'img' : '<figure><ph vspace=""><img src="\a"></figure>',
  486. 'tableOpen' : '<table><tabular ca="\a">' ,
  487. 'tableClose' : '</tabular></table>' ,
  488. 'tableLineClose' : '<rowsep>' ,
  489. 'tableCellClose' : '<colsep>' ,
  490. 'tableTitleCellClose' : '<colsep>' ,
  491. 'tableColAlignLeft' : 'l' ,
  492. 'tableColAlignRight' : 'r' ,
  493. 'tableColAlignCenter' : 'c' ,
  494. 'comment' : '<!-- \a -->' ,
  495. 'TOC' : '<toc>' ,
  496. 'EOD' : '</article>'
  497. }
  498. elif doctype == "tex":
  499. tags = {
  500. 'title1' : '\n\\newpage\section{\a}',
  501. 'title2' : '\\subsection{\a}' ,
  502. 'title3' : '\\subsubsection{\a}' ,
  503. # title 4/5: DIRTY: para+BF+\\+\n
  504. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\\\n',
  505. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\\\n',
  506. 'areaPreOpen' : '\\begin{verbatim}' ,
  507. 'areaPreClose' : '\\end{verbatim}' ,
  508. 'areaQuoteOpen' : '\\begin{quotation}' ,
  509. 'areaQuoteClose' : '\\end{quotation}' ,
  510. 'fontMonoOpen' : '\\texttt{' ,
  511. 'fontMonoClose' : '}' ,
  512. 'fontBoldOpen' : '\\textbf{' ,
  513. 'fontBoldClose' : '}' ,
  514. 'fontItalicOpen' : '\\textit{' ,
  515. 'fontItalicClose' : '}' ,
  516. 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
  517. 'fontBolditalicClose' : '}}' ,
  518. 'fontUnderlineOpen' : '\\underline{' ,
  519. 'fontUnderlineClose' : '}' ,
  520. 'listOpen' : '\\begin{itemize}' ,
  521. 'listClose' : '\\end{itemize}' ,
  522. 'listItem' : '\\item ' ,
  523. 'numlistOpen' : '\\begin{enumerate}' ,
  524. 'numlistClose' : '\\end{enumerate}' ,
  525. 'numlistItem' : '\\item ' ,
  526. 'deflistOpen' : '\\begin{description}',
  527. 'deflistClose' : '\\end{description}' ,
  528. 'deflistItem1' : '\\item[\a]' ,
  529. 'bar1' : '\n\\hrulefill{}\n' ,
  530. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  531. 'url' : '\\url{\a}' ,
  532. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  533. 'email' : '\\url{\a}' ,
  534. 'emailMark' : '\\textit{\a} (\\url{\a})' ,
  535. 'img' : '(\a)' ,
  536. 'tableOpen' : '\\begin{center}\\begin{tabular}{\a|}',
  537. 'tableClose' : '\\end{tabular}\\end{center}',
  538. 'tableLineOpen' : '\\hline ' ,
  539. 'tableLineClose' : ' \\\\' ,
  540. 'tableCellClose' : ' & ' ,
  541. 'tableTitleCellOpen' : '\\textbf{',
  542. 'tableTitleCellClose' : '} & ' ,
  543. 'tableColAlignLeft' : '|l' ,
  544. 'tableColAlignRight' : '|r' ,
  545. 'tableColAlignCenter' : '|c' ,
  546. 'comment' : '% \a' ,
  547. 'TOC' : '\\newpage\\tableofcontents',
  548. 'EOD' : '\\end{document}'
  549. }
  550. elif doctype == "moin":
  551. tags = {
  552. 'title1' : '= \a =' ,
  553. 'title2' : '== \a ==' ,
  554. 'title3' : '=== \a ===' ,
  555. 'title4' : '==== \a ====' ,
  556. 'title5' : '===== \a =====',
  557. 'areaPreOpen' : '{{{' ,
  558. 'areaPreClose' : '}}}' ,
  559. 'areaQuoteOpen' : ' ' ,
  560. 'fontMonoOpen' : '{{{' ,
  561. 'fontMonoClose' : '}}}' ,
  562. 'fontBoldOpen' : "'''" ,
  563. 'fontBoldClose' : "'''" ,
  564. 'fontItalicOpen' : "''" ,
  565. 'fontItalicClose' : "''" ,
  566. 'fontBolditalicOpen' : "'''''" ,
  567. 'fontBolditalicClose' : "'''''" ,
  568. 'fontUnderlineOpen' : "'''''" ,
  569. 'fontUnderlineClose' : "'''''" ,
  570. 'listItem' : '* ' ,
  571. 'numlistItem' : '\a. ' ,
  572. 'bar1' : '----' ,
  573. 'bar2' : '----' ,
  574. 'url' : '[\a]' ,
  575. 'urlMark' : '[\a \a]' ,
  576. 'email' : '[\a]' ,
  577. 'emailMark' : '[\a \a]' ,
  578. 'img' : '[\a]' ,
  579. 'tableLineOpen' : '||' ,
  580. 'tableCellClose' : '||' ,
  581. 'tableTitleCellClose' : '||' ,
  582. }
  583. elif doctype == "mgp":
  584. tags = {
  585. 'paragraph' : '%font "normal", size 5\n' ,
  586. 'title1' : '%page\n\n\a' ,
  587. 'title2' : '%page\n\n\a' ,
  588. 'title3' : '%page\n\n\a' ,
  589. 'title4' : '%page\n\n\a' ,
  590. 'title5' : '%page\n\n\a' ,
  591. 'areaPreOpen' : '\n%font "mono"' ,
  592. 'areaPreClose' : '%font "normal"' ,
  593. 'areaQuoteOpen' : '%prefix " "' ,
  594. 'areaQuoteClose' : '%prefix " "' ,
  595. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  596. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  597. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  598. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  599. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  600. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  601. 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n',
  602. 'fontBolditalicClose' : '\n%cont, font "normal"\n' ,
  603. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  604. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  605. 'numlistItem' : '\a. ' ,
  606. 'bar1' : '%bar "white" 5' ,
  607. 'bar2' : '%pause' ,
  608. 'url' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  609. 'urlMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  610. 'email' : '\n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  611. 'emailMark' : '\a \n%cont, fore "cyan"\n\a\n%cont, fore "white"\n',
  612. 'img' : '\n%center\n%newimage "\a", left\n',
  613. 'comment' : '%% \a' ,
  614. 'EOD' : '%%EOD'
  615. }
  616. elif doctype == "man":
  617. tags = {
  618. 'paragraph' : '.P' ,
  619. 'title1' : '.SH \a' ,
  620. 'title2' : '.SS \a' ,
  621. 'title3' : '.SS \a' ,
  622. 'title4' : '.SS \a' ,
  623. 'title5' : '.SS \a' ,
  624. 'areaPreOpen' : '.nf' ,
  625. 'areaPreClose' : '.fi\n' ,
  626. 'areaQuoteOpen' : '\n' ,
  627. 'areaQuoteClose' : '\n' ,
  628. 'fontBoldOpen' : '\\fB' ,
  629. 'fontBoldClose' : '\\fP' ,
  630. 'fontItalicOpen' : '\\fI' ,
  631. 'fontItalicClose' : '\\fP' ,
  632. 'fontBolditalicOpen' : '\n.BI ' ,
  633. 'fontBolditalicClose' : '\n\\&' ,
  634. 'listOpen' : '\n.nf' , # pre
  635. 'listClose' : '.fi\n' ,
  636. 'listItem' : '* ' ,
  637. 'numlistOpen' : '\n.nf' , # pre
  638. 'numlistClose' : '.fi\n' ,
  639. 'numlistItem' : '\a. ' ,
  640. 'bar1' : '\n\n' ,
  641. 'bar2' : '\n\n' ,
  642. 'url' : '\a' ,
  643. 'urlMark' : '\a (\a)',
  644. 'email' : '\a' ,
  645. 'emailMark' : '\a (\a)',
  646. 'img' : '\a' ,
  647. 'comment' : '.\\" \a'
  648. }
  649. elif doctype == "pm6":
  650. tags = {
  651. 'paragraph' : '<@Normal:>' ,
  652. 'title1' : '\n<@Title1:>\a',
  653. 'title2' : '\n<@Title2:>\a',
  654. 'title3' : '\n<@Title3:>\a',
  655. 'title4' : '\n<@Title4:>\a',
  656. 'title5' : '\n<@Title5:>\a',
  657. 'areaPreOpen' : '<@PreFormat:>' ,
  658. 'areaQuoteOpen' : '<@Quote:>' ,
  659. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  660. 'fontMonoClose' : '<SIZE$><FONT$>',
  661. 'fontBoldOpen' : '<B>' ,
  662. 'fontBoldClose' : '<P>' ,
  663. 'fontItalicOpen' : '<I>' ,
  664. 'fontItalicClose' : '<P>' ,
  665. 'fontBolditalicOpen' : '<B><I>' ,
  666. 'fontBolditalicClose' : '<P>' ,
  667. 'fontUnderlineOpen' : '<U>' ,
  668. 'fontUnderlineClose' : '<P>' ,
  669. 'listOpen' : '<@Bullet:>' ,
  670. 'listItem' : '\x95 ' , # \x95 == ~U
  671. 'numlistOpen' : '<@Bullet:>' ,
  672. 'numlistItem' : '\x95 ' ,
  673. 'bar1' : '\a' ,
  674. 'bar2' : '\a' ,
  675. 'url' : '<U>\a<P>' , # underline
  676. 'urlMark' : '\a <U>\a<P>' ,
  677. 'email' : '\a' ,
  678. 'emailMark' : '\a \a' ,
  679. 'img' : '\a' ,
  680. }
  681. # create empty tags keys
  682. for key in keys:
  683. if not tags.has_key(key):
  684. tags[key] = ''
  685. else:
  686. tags[key] = escapePythonSpecials(tags[key])
  687. return tags
  688. def getRules(doctype):
  689. ret = {}
  690. allrules = [
  691. # target rules (ON/OFF)
  692. 'linkable', # target supports external links
  693. 'tableable', # target supports tables
  694. 'imgalignable', # target supports image alignment
  695. 'tablealignable', # target supports table alignment
  696. 'listcountable', # target supports numbered lists natively
  697. 'tablecellsplit', # place delimiters only *between* cells
  698. 'listnotnested', # lists cannot be nested
  699. 'quotenotnested', # quotes cannot be nested
  700. 'preareanotescaped', # don't escape specials in PRE area
  701. # target code beautify (ON/OFF)
  702. 'indentprearea', # add leading spaces to PRE area lines
  703. 'breaktablecell', # break lines after any table cell
  704. 'breaktablelineopen', # break line after opening table line
  705. 'keepquoteindent', # don't remove the leading TABs on quotes
  706. # value settings
  707. 'listmaxdepth', # maximum depth for lists
  708. 'tablecellaligntype' # type of table cell align: cell, column
  709. ]
  710. rules = {
  711. 'txt' : {
  712. 'indentprearea':1
  713. },
  714. 'html': {
  715. 'indentprearea':1,
  716. 'linkable':1,
  717. 'imgalignable':1,
  718. 'listcountable':1,
  719. 'tableable':1,
  720. 'breaktablecell':1,
  721. 'breaktablelineopen':1,
  722. 'keepquoteindent':1,
  723. 'tablealignable':1,
  724. 'tablecellaligntype':'cell'
  725. },
  726. 'sgml': {
  727. 'linkable':1,
  728. 'listcountable':1,
  729. 'tableable':1,
  730. 'tablecellsplit':1,
  731. 'quotenotnested':1,
  732. 'keepquoteindent':1,
  733. 'tablecellaligntype':'column'
  734. },
  735. 'mgp' : {
  736. },
  737. 'tex' : {
  738. 'listcountable':1,
  739. 'tableable':1,
  740. 'tablecellsplit':1,
  741. 'preareanotescaped':1,
  742. 'listmaxdepth':4,
  743. 'tablecellaligntype':'column'
  744. },
  745. 'moin': {
  746. 'linkable':1,
  747. 'tableable':1
  748. },
  749. 'man' : {
  750. 'indentprearea':1,
  751. 'listnotnested':1
  752. },
  753. 'pm6' : {
  754. }
  755. }
  756. # populate return dictionary
  757. myrules = rules[doctype]
  758. for key in allrules : ret[key] = 0 # reset all
  759. for key in myrules.keys(): ret[key] = myrules[key] # turn ON
  760. return ret
  761. def getRegexes():
  762. regex = {
  763. # extra at end: (\[(?P<label>\w+)\])?
  764. 'title':
  765. re.compile(r'^\s*(?P<tag>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
  766. 'areaPreOpen':
  767. re.compile(r'^---$'),
  768. 'areaPreClose':
  769. re.compile(r'^---$'),
  770. 'quote':
  771. re.compile(r'^\t+'),
  772. '1linePreOld':
  773. re.compile(r'^ {4}([^\s-])'),
  774. '1linePre':
  775. re.compile(r'^--- '),
  776. 'fontMono':
  777. re.compile(r'`([^`]+)`'),
  778. 'fontBold':
  779. re.compile(r'\*\*([^\s*].*?)\*\*'),
  780. 'fontItalic':
  781. re.compile(r'(^|[^:])//([^ /].*?)//'),
  782. 'fontUnderline':
  783. re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
  784. 'fontBolditalic':
  785. re.compile(r'\*/([^/].*?)/\*'),
  786. 'list':
  787. re.compile(r'^( *)([+-]) ([^ ])'),
  788. 'deflist':
  789. re.compile(r'^( *)(=) ([^:]+):'),
  790. 'bar':
  791. re.compile(r'^\s*([_=-]{20,})\s*$'),
  792. 'table':
  793. re.compile(r'^ *\|\|? '),
  794. 'blankline':
  795. re.compile(r'^\s*$'),
  796. 'comment':
  797. re.compile(r'^%'),
  798. 'raw':
  799. re.compile(r'``(.+?)``')
  800. }
  801. # special char to place data on TAGs contents (\a == bell)
  802. regex['x'] = re.compile('\a')
  803. # %%date [ (formatting) ]
  804. regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  805. ### complicated regexes begin here ;)
  806. #
  807. # textual descriptions on --help's style: [...] is optional, | is OR
  808. ### first, some auxiliar variables
  809. #
  810. # [image.EXT]
  811. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  812. # link things
  813. urlskel = {
  814. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  815. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  816. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  817. 'pass' : r'[^ @]*', # for ftp://login:password@domain.com
  818. 'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
  819. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  820. 'form' : r'A-Za-z0-9/%&=+.@*_-', # .@*_-(as is)
  821. 'punct' : r'.,;:!?'
  822. }
  823. # username [ :password ] @
  824. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  825. # [ http:// ] [ username:password@ ] domain.com [ / ] [ #anchor | ?form=data ]
  826. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  827. urlskel['proto'],patt_url_login, urlskel['guess'],
  828. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  829. # filename | [ filename ] #anchor
  830. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  831. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  832. # user@domain [ ?form=data ]
  833. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  834. urlskel['login'],urlskel['form'])
  835. # saving for future use
  836. regex['_urlskel'] = urlskel
  837. ### and now the real regexes
  838. #
  839. regex['email'] = re.compile(patt_email,re.I)
  840. # email | url
  841. regex['link'] = \
  842. re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  843. # \[ label | imagetag url | email | filename \]
  844. regex['linkmark'] = \
  845. re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  846. patt_img, retxt_url, patt_email, retxt_url_local),
  847. re.L+re.I)
  848. # image
  849. regex['img'] = re.compile(patt_img, re.L+re.I)
  850. # all macros
  851. regex['macro'] = regex['date']
  852. # special things
  853. regex['special'] = re.compile(r'^%!\s*')
  854. regex['setting'] = re.compile(r'(Encoding|Style)\s*:\s*(.+)\s*$',re.I)
  855. return regex
  856. ### END OF regex nightmares
  857. class SubareaMaster:
  858. def __init__(self) : self.x = []
  859. def __call__(self) :
  860. if not self.x: return ''
  861. return self.x[-1]
  862. def add(self, area):
  863. if not self.x or (self.x and self.x[-1] != area):
  864. self.x.append(area)
  865. Debug('subarea ++ (%s): %s' % (area,self.x), 1)
  866. def pop(self, area=None):
  867. if area and self.x[-1] == area: self.x.pop()
  868. Debug('subarea -- (%s): %s' % (area,self.x), 1)
  869. def doHeader(doctype, headdic):
  870. if not HEADER_TEMPLATE.has_key(doctype):
  871. Error("doheader: Unknow doctype '%s'"%doctype)
  872. # cmdline options takes precedence on settings
  873. if OPTIONS['style']: headdic['STYLE'] = OPTIONS['style']
  874. Debug('HEADER data: %s'%headdic, 1)
  875. template = string.split(HEADER_TEMPLATE[doctype], '\n')
  876. # scan for empty dictionary keys
  877. # if found, scan template lines for that key reference
  878. # if found, remove the reference
  879. # if there aren't any other key reference on the same line, remove it
  880. for key in headdic.keys():
  881. if not headdic[key]:
  882. for line in template:
  883. if string.count(line, '%%(%s)s'%key):
  884. sline = string.replace(line, '%%(%s)s'%key, '')
  885. if not re.search(r'%([A-Z0-9]+)s', sline):
  886. template.remove(line)
  887. # populate template with data
  888. template = string.join(template, '\n') % headdic
  889. ### post processing
  890. #
  891. # TOC is a header tag
  892. if FLAGS['toc'] and TAGS['TOC']:
  893. toctag = re.sub('.*', TAGS['TOC'], '') #force_re
  894. template = template + toctag
  895. #
  896. # let tex format today
  897. if doctype == 'tex' and headdic['HEADER3'] == currdate:
  898. template = re.sub(r'\\date\{.*?}', r'\date', template)
  899. return string.split(template, '\n')
  900. def doCommentLine(doctype,txt):
  901. # the -- string ends a sgml comment :(
  902. if doctype == 'sgml':
  903. txt = string.replace(txt, '--', '\\-\\-')
  904. if TAGS['comment']:
  905. return regex['x'].sub(txt, TAGS['comment'])
  906. return ''
  907. def doFooter(doctype):
  908. ret = []
  909. typename = doctype
  910. if doctype == 'tex': typename = 'LaTeX2e'
  911. ppgd = '%s code generated by txt2tags %s (%s)'%(
  912. typename,my_version,my_url)
  913. cmdline = 'cmdline: txt2tags %s'%string.join(CMDLINE[1:], ' ')
  914. ret.append('\n'+doCommentLine(doctype,ppgd))
  915. ret.append(doCommentLine(doctype,cmdline))
  916. ret.append(TAGS['EOD'])
  917. return ret
  918. def doEscape(doctype,txt):
  919. if doctype == 'html' or doctype == 'sgml':
  920. txt = re.sub('&','&amp;',txt)
  921. txt = re.sub('<','&lt;',txt)
  922. txt = re.sub('>','&gt;',txt)
  923. if doctype == 'sgml':
  924. txt = re.sub('\xff','&yuml;',txt) # "+y
  925. elif doctype == 'pm6':
  926. txt = re.sub('<','<\#60>',txt)
  927. elif doctype == 'mgp':
  928. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  929. #txt = re.sub('^%([^%])','%prefix ""\n %\n%cont, prefix " "\n\\1',txt)
  930. elif doctype == 'man':
  931. txt = re.sub('^\.', ' .',txt) # command ID
  932. txt = doEscapeEscapechar(txt)
  933. elif doctype == 'tex':
  934. txt = string.replace(txt, '\\', r'\verb!\!')
  935. txt = string.replace(txt, '~', r'\verb!~!')
  936. txt = string.replace(txt, '^', r'\verb!^!')
  937. txt = re.sub('([#$&%{}])', r'\\\1', txt)
  938. # TIP the _ is escaped at end
  939. return txt
  940. def doFinalEscape(doctype, txt):
  941. if doctype == 'pm6' : txt = string.replace(txt, r'\<',r'<\#92><')
  942. elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
  943. elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
  944. elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  945. return txt
  946. def doEscapeEscapechar(txt):
  947. return string.replace(txt, '\\', '\\\\')
  948. def addLineBreaks(list):
  949. "use LB to respect sys.platform"
  950. ret = []
  951. for line in list:
  952. line = string.replace(line,'\n',LB) # embedded \n's
  953. ret.append(line+LB) # add final line break
  954. return ret
  955. def doPreLine(doctype,line):
  956. "Parsing procedures for preformatted (verbatim) lines"
  957. if not rules['preareanotescaped']: line = doEscape(doctype,line)
  958. if rules['indentprearea']: line = ' '+line
  959. if doctype == 'pm6': line = doFinalEscape(doctype, line)
  960. return line
  961. def doCloseTable(doctype):
  962. global subarea, tableborder
  963. ret = ''
  964. if rules['tableable']:
  965. if doctype == 'tex' and tableborder:
  966. ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
  967. else:
  968. ret = TAGS['tableClose']+'\n'
  969. else:
  970. ret = TAGS['areaPreClose']
  971. tableborder = 0
  972. subarea.pop('table')
  973. return ret
  974. def doCloseQuote(howmany=None):
  975. global quotedepth
  976. ret = []
  977. if not howmany: howmany = len(quotedepth)
  978. for i in range(howmany):
  979. quotedepth.pop()
  980. #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
  981. ret.append(TAGS['areaQuoteClose'])
  982. if not quotedepth: subarea.pop('quote')
  983. return string.join(ret,'\n')
  984. def doCloseList(howmany=None):
  985. global listindent, listids
  986. ret = []
  987. if not howmany: howmany = len(listindent)
  988. for i in range(howmany):
  989. if listids[-1] == '-': tag = TAGS['listClose']
  990. elif listids[-1] == '+': tag = TAGS['numlistClose']
  991. elif listids[-1] == '=': tag = TAGS['deflistClose']
  992. if not tag: tag = TAGS['listClose'] # default
  993. if tag:
  994. # unnested lists are only closed at mother-list
  995. if rules['listnotnested']:
  996. if len(listindent) == 1:
  997. ret.append(tag)
  998. else:
  999. ret.append(listindent[-1]+tag)
  1000. del listindent[-1]
  1001. del listids[-1]
  1002. if not listindent: subarea.pop('list')
  1003. return string.join(ret,'\n')
  1004. def beautify_me(name, doctype, line):
  1005. "where name is: bold, italic, underline or bolditalic"
  1006. name = 'font%s' % string.capitalize(name)
  1007. open = TAGS['%sOpen'%name]
  1008. close = TAGS['%sClose'%name]
  1009. txt = r'%s\1%s'%(open, close)
  1010. if name == 'fontItalic':
  1011. txt = r'\1%s\2%s'%(open, close)
  1012. line = regex[name].sub(txt,line)
  1013. return line
  1014. def get_tagged_link(doctype, label, url):
  1015. ret = ''
  1016. # set link type
  1017. if regex['email'].match(url):
  1018. linktype = 'email'
  1019. else:
  1020. linktype = 'url';
  1021. # adding protocol to guessed link
  1022. guessurl = ''
  1023. if linktype == 'url' and \
  1024. re.match(regex['_urlskel']['guess'], url):
  1025. if url[0] == 'w': guessurl = 'http://' +url
  1026. else : guessurl = 'ftp://' +url
  1027. # not link aware targets -> protocol is useless
  1028. if not rules['linkable']: guessurl = ''
  1029. # escape specials from TEXT parts
  1030. label = doEscape(doctype,label)
  1031. if not rules['linkable']:
  1032. if doctype == 'tex':
  1033. url = re.sub('^#', '\#', url) # ugly, but compile
  1034. else:
  1035. url = doEscape(doctype,url)
  1036. # simple link (not guessed)
  1037. if not label and not guessurl:
  1038. if FLAGS['maskemail'] and linktype == 'email':
  1039. # do the email mask feature (no TAGs, just text)
  1040. url = string.replace(url,'@',' (a) ')
  1041. url = string.replace(url,'.',' ')
  1042. url = "<%s>" % url
  1043. if rules['linkable']: url = doEscape(doctype, url)
  1044. ret = url
  1045. else:
  1046. # just add link data to tag
  1047. tag = re.sub('.*', TAGS[linktype], '') #force_re
  1048. ret = regex['x'].sub(url,tag)
  1049. # named link or guessed simple link
  1050. else:
  1051. # adjusts for guessed link
  1052. if not label: label = url # no protocol
  1053. if guessurl : url = guessurl # with protocol
  1054. # handle \ on link label
  1055. label = doEscapeEscapechar(label)
  1056. # putting data on the right appearance order
  1057. if rules['linkable']:
  1058. urlorder = [url, label] # link before label
  1059. else:
  1060. urlorder = [label, url] # label before link
  1061. # get tag
  1062. ret = re.sub('.*', TAGS["%sMark"%linktype], '') #force_re
  1063. # add link data to tag (replace \a's)
  1064. for data in urlorder:
  1065. ret = regex['x'].sub(data,ret,1)
  1066. return ret
  1067. def get_image_align(line):
  1068. align = ''
  1069. line = string.strip(line)
  1070. m = regex['img'].search(line)
  1071. ini = m.start() ; head = 0
  1072. end = m.end() ; tail = len(line)
  1073. align = 'center' # default align # ^text +img +text$
  1074. if ini == head and end == tail: align = 'para' # ^img$
  1075. elif ini == head: align = 'left' # ^img + text$
  1076. elif end == tail: align = 'right' # ^text + img$
  1077. return align
  1078. def get_tablecell_align(cells):
  1079. ret = []
  1080. for cell in cells:
  1081. align = 'Left'
  1082. if string.strip(cell):
  1083. if cell[0] == ' ' and cell[-1] == ' ': align = 'Center'
  1084. elif cell[0] == ' ': align = 'Right'
  1085. ret.append(align)
  1086. return ret
  1087. def get_table_prop(line):
  1088. # default table proprierties
  1089. ret = {'border': 0, 'header':0, 'align':'Left', 'cells':[], 'cellalign':[]}
  1090. # detect table align (and remove spaces mark)
  1091. if line[0] == ' ': ret['align'] = 'Center'
  1092. line = string.lstrip(line)
  1093. # detect header (title) mark
  1094. if line[1] == '|':
  1095. ret['header'] = 1
  1096. # delete trailing spaces after last cell border
  1097. line = re.sub('\|\s*$','|', line)
  1098. # detect (and delete) border mark (and leading space)
  1099. if line[-1] == '|':
  1100. ret['border'] = 1 ; line = line[:-2]
  1101. # delete table mark
  1102. line = regex['table'].sub('', line)
  1103. # split cells
  1104. ret['cells'] = string.split(line, ' | ')
  1105. # find cells align
  1106. ret['cellalign'] = get_tablecell_align(ret['cells'])
  1107. Debug('Table Prop: %s' % ret, 1)
  1108. return ret
  1109. def tag_table_cells(table, doctype):
  1110. ret = ''
  1111. open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
  1112. # title cell
  1113. if table['header']:
  1114. open = TAGS['tableTitleCellOpen']
  1115. close = TAGS['tableTitleCellClose']
  1116. if doctype == 'tex': open = re.sub('.*',open,'') # force_re
  1117. # should we break the line?
  1118. if rules['breaktablecell']: close = close+'\n'
  1119. # here we go
  1120. while table['cells']:
  1121. openalign = open
  1122. cel = table['cells'].pop(0)
  1123. # set each cell align
  1124. if rules['tablecellaligntype'] == 'cell':
  1125. align = table['cellalign'].pop(0)
  1126. align = TAGS['tableCellAlign%s'%align]
  1127. openalign = string.replace(open,'\a',align)
  1128. # show empty cell on HTML
  1129. if not cel and doctype == 'html': cel = '&nbsp;'
  1130. # last cell gotchas
  1131. if not table['cells']:
  1132. # don't need cell separator
  1133. if rules['tablecellsplit']: close = ''
  1134. # close beautifier for last title cell
  1135. if doctype == 'tex' and table['header']: close = '}'
  1136. # join it all
  1137. newcell = openalign + string.strip(cel) + close
  1138. ret = ret + newcell
  1139. return ret
  1140. def get_tableopen_tag(table_prop, doctype):
  1141. global tableborder
  1142. open = TAGS['tableOpen'] # the default one
  1143. # the first line defines if table has border or not
  1144. tableborder = table_prop['border']
  1145. # align full table
  1146. if rules['tablealignable']:
  1147. talign = TAGS['tableAlign'+table_prop['align']]
  1148. open = regex['x'].sub(talign, open, 1)
  1149. # set the columns alignment
  1150. if rules['tablecellaligntype'] == 'column':
  1151. calign = map(lambda x: TAGS['tableColAlign%s'%x],
  1152. table_prop['cellalign'])
  1153. calign = string.join(calign,'')
  1154. open = regex['x'].sub(calign, open, 1)
  1155. # tex table spec, border or not: {|l|c|r|} , {lcr}
  1156. if doctype == 'tex' and not tableborder:
  1157. open = string.replace(open,'|','')
  1158. # we're almost done, just border left
  1159. tag = regex['x'].sub(`tableborder`, open)
  1160. return tag
  1161. # reference: http://www.iana.org/assignments/character-sets
  1162. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  1163. def get_encoding_string(enc, doctype):
  1164. if not enc: return ''
  1165. # target specific translation table
  1166. translate = {
  1167. 'tex': {
  1168. # missing: ansinew , applemac , cp437 , cp437de , cp865
  1169. 'us-ascii' : 'ascii',
  1170. 'windows-1250': 'cp1250',
  1171. 'windows-1252': 'cp1252',
  1172. 'ibm850' : 'cp850',
  1173. 'ibm852' : 'cp852',
  1174. 'iso-8859-1' : 'latin1',
  1175. 'iso-8859-2' : 'latin2',
  1176. 'iso-8859-3' : 'latin3',
  1177. 'iso-8859-4' : 'latin4',
  1178. 'iso-8859-5' : 'latin5',
  1179. 'iso-8859-9' : 'latin9',
  1180. 'koi8-r' : 'koi8-r'
  1181. }
  1182. }
  1183. # normalization
  1184. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  1185. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  1186. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  1187. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  1188. # apply translation table
  1189. try: enc = translate[doctype][string.upper(enc)]
  1190. except: pass
  1191. return enc
  1192. ################################################################################
  1193. ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
  1194. ################################################################################
  1195. def convert(inlines, doctype):
  1196. # global vars for doClose*()
  1197. global TAGS, regex, rules, quotedepth, listindent, listids
  1198. global subarea, tableborder
  1199. TAGS = getTags(doctype)
  1200. rules = getRules(doctype)
  1201. regex = getRegexes()
  1202. # the defaults
  1203. linkmask = '@@_link_@@'
  1204. monomask = '@@_mono_@@'
  1205. macromask = '@@_macro_@@'
  1206. rawmask = '@@_raw_@@'
  1207. AREA = NewArea('head',0) # then conf, then body
  1208. subarea = SubareaMaster()
  1209. HEADERS = { 'HEADER1': '', 'HEADER2':'', 'HEADER3':'',
  1210. 'ENCODING': '', 'STYLE': '' }
  1211. ret = []
  1212. toclist = []
  1213. header = []
  1214. f_tt = 0
  1215. listindent = []
  1216. listids = []
  1217. listcount = []
  1218. titlecount = ['',0,0,0,0,0]
  1219. f_lastwasblank = 0
  1220. holdspace = ''
  1221. listholdspace = ''
  1222. quotedepth = []
  1223. tableborder = 0
  1224. if outfile != pipefileid:
  1225. if not FLAGS['gui']:
  1226. print "--- %s..."%doctype
  1227. # let's mark it up!
  1228. linenr = 0
  1229. for lineref in range(len(inlines)):
  1230. skip_continue = 0
  1231. linkbank = []
  1232. monobank = []
  1233. macrobank = []
  1234. rawbank = []
  1235. linenr = lineref +1
  1236. untouchedline = inlines[lineref]
  1237. # TODO take this rstrip() out - think about consequences
  1238. #line = string.rstrip(untouchedline)
  1239. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  1240. Debug('LINE %04d: %s' % (linenr,repr(line)), 1) # for heavy debug
  1241. # detect if head section is over
  1242. if (linenr == 4 and AREA == 'head') or \
  1243. (linenr == 1 and not string.rstrip(line)):
  1244. AREA = NewArea('conf',linenr)
  1245. # we need (not really) to mark each paragraph
  1246. #TODO check if this is really needed
  1247. if doctype == 'pm6' and f_lastwasblank:
  1248. if f_tt or AREA == 'head' or listindent:
  1249. holdspace = ''
  1250. else:
  1251. holdspace = TAGS['paragraph']+'\n'
  1252. # any NOT table line (or comment), closes an open table
  1253. #if subarea() == 'table' and not regex['table'].search(line):
  1254. if subarea() == 'table' \
  1255. and not regex['table'].search(line) \
  1256. and not regex['comment'].search(line):
  1257. ret.append(doCloseTable(doctype))
  1258. #---------------------[ PRE formatted ]----------------------
  1259. #TIP we'll never support beautifiers inside pre-formatted
  1260. # we're already on a PRE area
  1261. if f_tt:
  1262. # closing PRE
  1263. if regex['areaPreClose'].search(line):
  1264. if doctype != 'pm6':
  1265. ret.append(TAGS['areaPreClose'])
  1266. f_tt = 0
  1267. continue
  1268. # normal PRE-inside line
  1269. line = doPreLine(doctype, line)
  1270. ret.append(line)
  1271. continue
  1272. # detecting PRE area init
  1273. if regex['areaPreOpen'].search(line):
  1274. ret.append(TAGS['areaPreOpen'])
  1275. f_lastwasblank = 0
  1276. f_tt = 1
  1277. continue
  1278. # one line PRE-formatted text
  1279. if regex['1linePre'].search(line):
  1280. f_lastwasblank = 0
  1281. line = regex['1linePre'].sub('',line)
  1282. line = doPreLine(doctype, line)
  1283. t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
  1284. ret.append('%s\n%s\n%s'%(t1,line,t2))
  1285. continue
  1286. #---------------------[ blank lines ]-----------------------
  1287. #TODO "holdspace" to save <p> to not show in closelist
  1288. if regex['blankline'].search(line):
  1289. # closing all open quotes
  1290. if quotedepth:
  1291. ret.append(doCloseQuote())
  1292. # closing all open lists
  1293. if f_lastwasblank: # 2nd consecutive blank line
  1294. if listindent: # closes list (if any)
  1295. ret.append(doCloseList())
  1296. holdspace = ''
  1297. continue # consecutive blanks are trash
  1298. # normal blank line
  1299. if doctype != 'pm6' and AREA == 'body':
  1300. # paragraph (if any) is wanted inside lists also
  1301. if listindent:
  1302. para = TAGS['paragraph'] + '\n'
  1303. holdspace = holdspace + para
  1304. elif doctype == 'html':
  1305. ret.append(TAGS['paragraph'])
  1306. # sgml: quote close tag must not be \n\n</quote>
  1307. elif doctype == 'sgml' and quotedepth:
  1308. skip_continue = 1
  1309. # otherwise we just print a blank line
  1310. else:
  1311. ret.append('')
  1312. f_lastwasblank = 1
  1313. if not skip_continue: continue
  1314. #---------------------[ special ]------------------------
  1315. # just encoding for now
  1316. if regex['special'].search(line):
  1317. special = line[2:]
  1318. # try Settings
  1319. m = regex['setting'].match(special)
  1320. if m:
  1321. name = string.upper(m.group(1))
  1322. val = m.group(2)
  1323. if AREA == 'conf':
  1324. if name == 'ENCODING':
  1325. val = get_encoding_string(val,doctype)
  1326. HEADERS[name] = val
  1327. Debug("Found Setting '%s', value '%s'"%(
  1328. name,val),1,linenr)
  1329. else:
  1330. Debug('Ignoring Setting outside CONF area:'
  1331. ' %s'%name,1,linenr)
  1332. else:
  1333. Debug('Bogus Special Line',1,linenr)
  1334. #---------------------[ comments ]-----------------------
  1335. # just skip them (if not macro or setting)
  1336. if regex['comment'].search(line) and not regex['date'].match(line):
  1337. continue
  1338. f_lastwasblank = 0 # reset blank status
  1339. #---------------------[ BODY detect ]-----------------------
  1340. ### if got here, its a header or a valid line
  1341. if AREA == 'conf':
  1342. # oops, not header, so we're now on document BODY
  1343. AREA = NewArea('body', linenr)
  1344. # do headers!
  1345. if not FLAGS['noheaders']:
  1346. header = doHeader(doctype,HEADERS)
  1347. # so, let's print …

Large files files are truncated, but you can click here to view the full file