PageRenderTime 35ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/old/txt2tags-1.7.py

http://txt2tags.googlecode.com/
Python | 2628 lines | 2582 code | 16 blank | 30 comment | 9 complexity | f705939eba7a6267cceef0ee2a39b8ff MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL
  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003 Aurelio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. # the code is better, even readable now, but needs more improvements
  20. # please wait for the upcoming 2.0 series for a cleaner one
  21. #XXX Python coding warning
  22. # Avoid common mistakes:
  23. # - do NOT use newlist=list instead newlist=list[:]
  24. # - do NOT use newdic=dic instead newdic=dic.copy()
  25. # - do NOT use dic[key] instead dic.get(key)
  26. import re, string, os, sys, getopt, traceback
  27. from time import strftime,time,localtime
  28. my_url = 'http://txt2tags.sf.net'
  29. my_email = 'verde@aurelio.net'
  30. my_version = '1.7' #-betaN
  31. DEBUG = 0 # do not edit here, please use --debug
  32. targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
  33. FLAGS = {'noheaders':0,'enumtitle':0 ,'maskemail':0 ,'stdout' :0,
  34. 'toconly' :0,'toc' :0 ,'gui' :0 ,'dump-source':0}
  35. OPTIONS = {'toclevel' :3,'style' :'','type' :'','outfile' :'',
  36. 'split':0, 'lang':''}
  37. CONFIG_KEYWORDS = ['encoding', 'style', 'cmdline','preproc','postproc']
  38. CONF = {}
  39. regex = {}
  40. TAGS = {}
  41. rules = {}
  42. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  43. lang = 'english'
  44. doctype = outfile = ''
  45. STDIN = STDOUT = '-'
  46. ESCCHAR = '\x00'
  47. LINEBREAK = {'default':'\n', 'win':'\r\n', 'mac':'\r'}
  48. #my_version = my_version + '-dev' + currdate[4:] # devel!
  49. # global vars for doClose*()
  50. quotedepth = []
  51. listindent = []
  52. listids = []
  53. subarea = None
  54. tableborder = 0
  55. # set the Line Break across platforms
  56. LB = LINEBREAK.get(sys.platform[:3]) or LINEBREAK['default']
  57. versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
  58. usage = """
  59. %s
  60. Usage: txt2tags -t <type> [OPTIONS] file.t2t
  61. -t, --type set target document type. currently supported:
  62. %s
  63. -o, --outfile=FILE set FILE as the output file name ('-' for STDOUT)
  64. --stdout same as '-o -' or '--outfile -' (deprecated option)
  65. -H, --noheaders suppress header, title and footer information
  66. -n, --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
  67. --maskemail hide email from spam robots. x@y.z turns <x (a) y z>
  68. --toc add TOC (Table of Contents) to target document
  69. --toconly print document TOC and exit
  70. --toclevel=N set maximum TOC level (depth) to N
  71. --gui invoke Graphical Tk Interface
  72. --style=FILE use FILE as the document style (like Html CSS)
  73. -h, --help print this help information and exit
  74. -V, --version print program version and exit
  75. Extra options for HTML target (needs sgml-tools):
  76. --split split documents. values: 0, 1, 2 (default 0)
  77. --lang document language (default english)
  78. By default, converted output is saved to 'file.<type>'.
  79. Use --outfile to force an output file name.
  80. If input file is '-', reads from STDIN.
  81. If output file is '-', dumps output to STDOUT.\
  82. """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
  83. # here is all the target's templates
  84. # you may edit them to fit your needs
  85. # - the %(HEADERn)s strings represent the Header lines
  86. # - use %% to represent a literal %
  87. #
  88. HEADER_TEMPLATE = {
  89. 'txt': """\
  90. %(HEADER1)s
  91. %(HEADER2)s
  92. %(HEADER3)s
  93. """,
  94. 'sgml': """\
  95. <!doctype linuxdoc system>
  96. <article>
  97. <title>%(HEADER1)s
  98. <author>%(HEADER2)s
  99. <date>%(HEADER3)s
  100. """,
  101. 'html': """\
  102. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  103. <HTML>
  104. <HEAD>
  105. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  106. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  107. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  108. <TITLE>%(HEADER1)s</TITLE>
  109. </HEAD><BODY BGCOLOR="white" TEXT="black">
  110. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  111. <FONT SIZE=4>
  112. <I>%(HEADER2)s</I><BR>
  113. %(HEADER3)s
  114. </FONT></CENTER>
  115. """,
  116. # TODO man section 1 is hardcoded...
  117. 'man': """\
  118. .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  119. """,
  120. # TODO style to <HR>
  121. 'pm6': """\
  122. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  123. ><@Normal=
  124. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  125. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  126. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  127. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  128. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  129. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  130. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  131. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  132. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  133. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  134. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  135. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  136. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  137. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  138. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  139. ><@Title4=<@-PARENT "Title3">
  140. ><@Title5=<@-PARENT "Title3">
  141. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  142. %(HEADER1)s
  143. %(HEADER2)s
  144. %(HEADER3)s
  145. """,
  146. 'mgp': """\
  147. #!/usr/X11R6/bin/mgp -t 90
  148. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  149. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  150. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  151. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  152. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  153. %%default 1 size 5
  154. %%default 2 size 8, fore "yellow", font "normal-b", center
  155. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  156. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  157. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  158. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  159. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  160. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  161. %%%%------------------------- end of headers -----------------------------
  162. %%page
  163. %%size 10, center, fore "yellow"
  164. %(HEADER1)s
  165. %%font "normal-i", size 6, fore "white", center
  166. %(HEADER2)s
  167. %%font "mono", size 7, center
  168. %(HEADER3)s
  169. """,
  170. # TODO please, improve me!
  171. 'moin': """\
  172. %(HEADER1)s
  173. %(HEADER2)s
  174. %(HEADER3)s
  175. """,
  176. 'tex': \
  177. r"""\documentclass[11pt,a4paper]{article}
  178. \usepackage{amsfonts,amssymb,graphicx,url}
  179. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  180. \pagestyle{plain} %% do page numbering ('empty' turns off)
  181. \frenchspacing %% no aditional spaces after periods
  182. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  183. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  184. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  185. \title{%(HEADER1)s}
  186. \author{%(HEADER2)s}
  187. \begin{document}
  188. \date{%(HEADER3)s}
  189. \maketitle
  190. """
  191. }
  192. #-----------------------------------------------------------------------
  193. def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
  194. def Error(msg): print "ERROR: %s"%msg ; sys.exit()
  195. def echo(msg): print '\033[32;1m%s\033[m'%msg # quick debug
  196. def Debug(msg,i=0,linenr=None):
  197. if i > DEBUG: return
  198. if linenr is not None:
  199. print "(%d) %04d:%s"%(i,linenr,msg)
  200. else:
  201. print "(%d) %s"%(i,msg)
  202. def Readfile(file, remove_linebreaks=0):
  203. if file == '-':
  204. try: data = sys.stdin.readlines()
  205. except: Error('You must feed me with data on STDIN!')
  206. else:
  207. try: f = open(file); data = f.readlines() ; f.close()
  208. except: Error("Cannot read file:\n %s"%file)
  209. if remove_linebreaks:
  210. data = map(lambda x:re.sub('[\n\r]+$','',x), data)
  211. return data
  212. def Savefile(file, contents):
  213. try: f = open(file, 'wb')
  214. except: Error("Cannot open file for writing:\n %s"%file)
  215. if type(contents) == type([]): doit = f.writelines
  216. else: doit = f.write
  217. doit(contents) ; f.close()
  218. def get_include_contents(file, path=''):
  219. "Parses %!include: value and extract file contents"
  220. # set include type
  221. id = 'T2T'
  222. if file[0] == file[-1] == '`':
  223. id = 'VERB'
  224. file = file[1:-1] # remove ``
  225. elif file[0] == file[-1] == "'":
  226. id = 'PASS'
  227. file = file[1:-1] # remove ''
  228. # handle remote dir execution
  229. filepath = os.path.join(path, file)
  230. # pass-thru
  231. if id == 'PASS':
  232. return id, Readfile(filepath, remove_linebreaks=1)
  233. # VERB text
  234. if id == 'VERB':
  235. lines = Readfile(filepath, remove_linebreaks=1)
  236. # escape inner '---' that would end VERB block
  237. lines = map(lambda x: re.sub('^---$','--- ',x), lines)
  238. # add VERB block identifiers
  239. lines = ['---'] + lines + ['---']
  240. # default txt2tags marked text
  241. else:
  242. id = 'T2T'
  243. lines = get_file_body(filepath)
  244. # add delimiter comments
  245. lines.insert(0, '%%INCLUDED_%s starts here: %s'%(id,file))
  246. lines.append('%%INCLUDED_%s ends here: %s'%(id,file))
  247. return id, lines
  248. def ParseConfig(text='',name='', target=''):
  249. ret = {}
  250. if not text: return ret
  251. re_name = name or '[a-z]+'
  252. re_target = target or '[a-z]*'
  253. cfgregex = re.compile("""
  254. ^%%!\s* # leading id with opt spaces
  255. (?P<name>%s)\s* # config name
  256. (\((?P<target>%s)\))? # optional target spec inside ()
  257. \s*:\s* # key:value delimiter with opt spaces
  258. (?P<value>\S.+?) # config value
  259. \s*$ # rstrip() spaces and hit EOL
  260. """%(re_name,re_target), re.I+re.VERBOSE)
  261. prepostregex = re.compile("""
  262. # ---[ PATTERN ]---
  263. ^( "([^"]*)" # "double quoted" or
  264. | '([^']*)' # 'single quoted' or
  265. | ([^\s]+) # single_word
  266. )
  267. \s+ # separated by spaces
  268. # ---[ REPLACE ]---
  269. ( "([^"]*)" # "double quoted" or
  270. | '([^']*)' # 'single quoted' or
  271. | (.*) # anything
  272. )
  273. \s*$
  274. """, re.VERBOSE)
  275. match = cfgregex.match(text)
  276. if match:
  277. ret = {'name' :string.lower(match.group('name') or ''),
  278. 'target':string.lower(match.group('target') or 'all'),
  279. 'value' :match.group('value') }
  280. # Special config with two quoted values (%!preproc: "foo" 'bar')
  281. if ret['name'] in ['preproc','postproc']:
  282. valmatch = prepostregex.search(ret['value'])
  283. if not valmatch: return None
  284. getval = valmatch.group
  285. patt = getval(2) or getval(3) or getval(4) or ''
  286. repl = getval(6) or getval(7) or getval(8) or ''
  287. ret['value'] = (patt, repl)
  288. return ret
  289. class Cmdline:
  290. def __init__(self, cmdline=[], nocheck=0):
  291. self.conf = {}
  292. self.cmdline = cmdline
  293. self.cmdline_conf = {}
  294. self.dft_options = OPTIONS.copy()
  295. self.dft_flags = FLAGS.copy()
  296. self.all_options = self.dft_options.keys()
  297. self.all_flags = self.dft_flags.keys()
  298. self.defaults = self._get_empty_conf()
  299. self.nocheck = nocheck
  300. if cmdline: self.parse()
  301. #TODO protect quotes contents
  302. def _tokenize(self, cmd_string):
  303. return string.split(cmd_string)
  304. def parse(self):
  305. "return a dic with all options:value found"
  306. if not self.cmdline: return {}
  307. Debug("cmdline: %s"%self.cmdline, 1)
  308. options = {'infile': '', 'infiles':''}
  309. # compose valid options list
  310. longopts = ['help','version'] + self.all_flags + \
  311. map(lambda x:x+'=', self.all_options) # add =
  312. cmdline = self.cmdline[1:] # del prog name
  313. # get cmdline options
  314. try: (opt, args) = getopt.getopt(cmdline, 'hVnHt:o:', longopts)
  315. except getopt.error, errmsg:
  316. Error("%s (try --help)"%errmsg)
  317. # get infile, if any
  318. if args:
  319. options['infile'] = args[0]
  320. options['infiles'] = args # multi
  321. # parse all options
  322. for name,val in opt:
  323. if name in ['-h','--help' ]: Quit(usage)
  324. elif name in ['-V','--version']: Quit(versionstr)
  325. elif name in ['-t','--type' ]: options['type'] = val
  326. elif name in ['-o','--outfile' ]: options['outfile'] = val
  327. elif name in ['-n','--enumtitle']: options['enumtitle'] = 1
  328. elif name in ['-H','--noheaders']: options['noheaders'] = 1
  329. elif name in ['--stdout']: options['outfile'] = STDOUT
  330. else: options[name[2:]] = val or 1 # del --
  331. # save results
  332. Debug("cmdline arguments: %s"%options, 1)
  333. self.cmdline_conf = options
  334. def compose(self, conf={}):
  335. "compose full command line from CONF dict"
  336. if not conf: return ''
  337. args = []
  338. cfg = conf.copy()
  339. valid_opts = self.all_options + self.all_flags
  340. use_short = {'noheaders':'H', 'enumtitle':'n'}
  341. # remove useless options
  342. if cfg.get('toconly'):
  343. del cfg['noheaders']
  344. del cfg['outfile'] # defaults to STDOUT
  345. if cfg.get('type') == 'txt':
  346. del cfg['type'] # already default
  347. args.append('--toconly') # must be the first
  348. del cfg['toconly']
  349. # add target type
  350. if cfg.has_key('type'):
  351. args.append('-t '+cfg['type'])
  352. del cfg['type']
  353. # add other options
  354. for key in cfg.keys():
  355. if key not in valid_opts: continue # must be a %!setting
  356. if key == 'outfile': continue # later
  357. val = cfg[key]
  358. if not val: continue
  359. # default values are useless on cmdline
  360. if val == self.dft_options.get(key): continue
  361. # -short format
  362. if key in use_short.keys():
  363. args.append('-'+use_short[key])
  364. continue
  365. # --long format
  366. if key in self.all_flags: # add --option
  367. args.append('--'+key)
  368. else: # add --option=value
  369. args.append('--%s=%s'%(key,val))
  370. # the outfile using -o
  371. if cfg.has_key('outfile') and \
  372. cfg['outfile'] != self.dft_options.get('outfile'):
  373. args.append('-o '+cfg['outfile'])
  374. # the input file is always at the end
  375. if cfg.has_key('infile'):
  376. args.append(cfg['infile'])
  377. # return as a single string
  378. ret = string.join(args,' ')
  379. Debug("Diet command line: %s"%ret, 1)
  380. return ret
  381. def merge(self, extraopts=''):
  382. "insert cmdline portion BEFORE current cmdline"
  383. if not extraopts: return
  384. if type(extraopts) == type(''):
  385. extraopts = self._tokenize(extraopts)
  386. if not self.cmdline: self.cmdline = extraopts
  387. else: self.cmdline = ['t2t-merged'] +extraopts +self.cmdline[1:]
  388. self.parse()
  389. def _get_outfile_name(self, conf):
  390. "dirname is the same for {in,out}file"
  391. infile = conf['infile']
  392. if not infile: return ''
  393. if infile == STDIN or conf['outfile'] == STDOUT:
  394. outfile = STDOUT
  395. else:
  396. basename = re.sub('\.(txt|t2t)$','',infile)
  397. outfile = "%s.%s"%(basename, conf['type'])
  398. self.dft_options['outfile'] = outfile # save for self.compose()
  399. Debug(" infile: '%s'"%infile , 1)
  400. Debug("outfile: '%s'"%outfile, 1)
  401. return outfile
  402. def _sanity(self, dic):
  403. "basic cmdline syntax checkings"
  404. if not dic: return {}
  405. if not dic['infile'] or not dic['type']:
  406. Quit(usage, 1) # no filename/doctype
  407. if not targets.count(dic['type']): # check target
  408. Error("Invalid document type '%s' (try --help)"%(
  409. dic['type']))
  410. #DISABLED: conflicting with %!cmdline: -o foo
  411. #if len(dic['infiles']) > 1 and dic['outfile']: # -o FILE *.t2t
  412. # Error("--outfile can't be used with multiple files")
  413. for opt in self.all_options: # check numeric options
  414. opttype = type(self.dft_options[opt])
  415. if dic.get(opt) and opttype == type(9):
  416. try: dic[opt] = int(dic.get(opt)) # save
  417. except: Error('--%s value must be a number'%opt)
  418. if dic['split'] not in [0,1,2]: # check split level
  419. Error('Option --split must be 0, 1 or 2')
  420. return dic
  421. def merge_conf(self, newconfs={}, override=0):
  422. "include Config Area settings into self.conf"
  423. if not self.conf: self.get_conf()
  424. if not newconfs: return self.conf
  425. for key in newconfs.keys():
  426. if key == 'cmdline': continue # already done
  427. # filters are always accumulative
  428. if key in ['preproc','postproc']:
  429. if not self.conf.has_key(key):
  430. self.conf[key] = []
  431. self.conf[key].extend(newconfs[key])
  432. continue
  433. # add anyway
  434. if override:
  435. self.conf[key] = newconfs[key]
  436. continue
  437. # just update if still 'virgin'
  438. if self.conf.has_key(key) and \
  439. self.conf[key] == self.defaults.get(key):
  440. self.conf[key] = newconfs[key]
  441. # add new
  442. if not self.conf.has_key(key):
  443. self.conf[key] = newconfs[key]
  444. Debug("Merged CONF (override=%s): %s"%(override,self.conf), 1)
  445. return self.conf
  446. def _get_empty_conf(self):
  447. econf = self.dft_options.copy()
  448. for k in self.dft_flags.keys(): econf[k] = self.dft_flags[k]
  449. return econf
  450. def get_conf(self):
  451. "set vars and flags according to options dic"
  452. if not self.cmdline_conf:
  453. if not self.cmdline: return {}
  454. self.parse()
  455. dic = self.cmdline_conf
  456. conf = self.defaults.copy()
  457. ## store flags & options
  458. for flag in self.all_flags:
  459. if dic.has_key(flag): conf[flag] = 1
  460. for opt in self.all_options + ['infile', 'infiles']:
  461. if dic.has_key(opt): conf[opt] = dic.get(opt)
  462. if not conf['type'] and conf['toconly']: conf['type'] = 'txt'
  463. if not conf['type'] and conf['dump-source']: conf['type'] = 'txt'
  464. if not self.nocheck: conf = self._sanity(conf)
  465. ## some gotchas for specific issues
  466. doctype = conf['type']
  467. infile = conf['infile']
  468. # toconly is stronger than others
  469. if conf['toconly']:
  470. conf['noheaders'] = 1
  471. conf['toc'] = 0
  472. conf['split'] = 0
  473. conf['gui'] = 0
  474. conf['outfile'] = STDOUT
  475. conf['toclevel'] = conf['toclevel'] or \
  476. self.dft_options['toclevel']
  477. # dump-source is stronger than others (including toconly)
  478. if conf['dump-source']:
  479. conf['toconly'] = 0
  480. conf['noheaders'] = 0
  481. conf['toc'] = 0
  482. conf['split'] = 0
  483. conf['gui'] = 0
  484. conf['outfile'] = STDOUT
  485. # split: just HTML, no stdout, 1st do a sgml, then sgml2html
  486. if conf['split']:
  487. if doctype != 'html':
  488. conf['split'] = 0
  489. else:
  490. conf['type'] = 'sgml'
  491. if conf['outfile'] == STDOUT:
  492. conf['outfile'] = ''
  493. outfile = conf['outfile'] or self._get_outfile_name(conf)
  494. # final checkings
  495. if conf['split'] and outfile == STDOUT:
  496. Error('--split: You must provide a FILE (not STDIN)')
  497. if infile == outfile and outfile != STDOUT:
  498. Error("SUICIDE WARNING!!! (see --outfile)\n source"+\
  499. " and target files has the same name: "+outfile)
  500. ### author's note: "yes, i've got my sample.t2t file deleted
  501. ### before add this test... :/"
  502. conf['outfile'] = outfile
  503. conf['cmdline'] = self.cmdline
  504. Debug("CONF data: %s\n"%conf, 1)
  505. self.conf = conf
  506. return self.conf
  507. #
  508. ### End of Cmdline class
  509. class Proprierties:
  510. def __init__(self, filename=''):
  511. self.buffer = [''] # text start at pos 1
  512. self.areas = ['head','conf','body']
  513. self.arearef = []
  514. self.headers = ['','','']
  515. self.config = self.get_empty_config()
  516. self.lastline = 0
  517. self.filename = filename
  518. self.conflines = []
  519. self.bodylines = []
  520. if filename:
  521. self.read_file(filename)
  522. self.find_areas()
  523. self.set_headers()
  524. self.set_config()
  525. def read_file(self, file):
  526. lines = Readfile(file)
  527. if not lines: Error('Empty file! %s'%file)
  528. self.buffer.extend(lines)
  529. def get_empty_config(self):
  530. empty = {}
  531. for targ in targets+['all']: empty[targ] = {}
  532. return empty
  533. def find_areas(self):
  534. "Run through buffer and identify head/conf/body areas"
  535. buf = self.buffer ; ref = [1,4,0] # defaults
  536. if not string.strip(buf[1]): # no header
  537. ref[0] = 0 ; ref[1] = 2
  538. for i in range(ref[1],len(buf)): # find body init
  539. if string.strip(buf[i]) and buf[i][0] != '%':
  540. ref[2] = i ; break # !blank, !comment
  541. if ParseConfig(buf[i], 'include'):
  542. ref[2] = i ; break # %!include command
  543. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  544. for i in 0,1,2: # del !existent
  545. if not ref[i]: self.areas[i] = ''
  546. self.arearef = ref # save results
  547. self.lastline = len(self.buffer)-1
  548. Debug('Head,Conf,Body start line: %s'%ref, 1)
  549. # store CONF and BODY lines found
  550. cfgend = ref[2] or len(buf)
  551. self.conflines = buf[ref[1]:cfgend]
  552. if ref[2]: self.bodylines = buf[ref[2]:]
  553. def set_headers(self):
  554. "Extract and save headers contents"
  555. if not self.arearef: self.find_areas()
  556. if not self.areas.count('head'): return
  557. if self.lastline < 3:
  558. #TODO on gui this checking is !working
  559. Error(
  560. "Premature end of Headers on '%s'."%self.filename +\
  561. '\n\nFile has %s line(s), but '%self.lastline +\
  562. 'Headers should be composed by 3 lines. ' +\
  563. '\nMaybe you should left the first line blank? ' +\
  564. '(for no headers)')
  565. for i in 0,1,2:
  566. self.headers[i] = string.strip(self.buffer[i+1])
  567. Debug("Headers found: %s"%self.headers, 1, i+1)
  568. def set_config(self):
  569. "Extract and save config contents (including includes)"
  570. if not self.arearef: self.find_areas()
  571. if not self.areas.count('conf'): return
  572. keywords = string.join(CONFIG_KEYWORDS, '|')
  573. linenr = self.arearef[1]-1 # for debug messages
  574. for line in self.conflines:
  575. linenr = linenr + 1
  576. if len(line) < 3: continue
  577. if line[:2] != '%!': continue
  578. cfg = ParseConfig(line, keywords)
  579. # any _valid_ config found?
  580. if not cfg:
  581. Debug('Bogus Config Line',1,linenr)
  582. continue
  583. # get data
  584. targ, key, val = cfg['target'],cfg['name'], cfg['value']
  585. # check config target specification
  586. if targ not in targets+['all']:
  587. Debug("Config Error: Invalid target '%s', ignoring"%targ,
  588. 1,linenr)
  589. continue
  590. # filters are multiple config
  591. if key in ['preproc','postproc']:
  592. if not self.config['all'].has_key(key): # 1st one
  593. self.config['all'][key] = []
  594. # all filters are saved to target 'all'
  595. # finish_him will decide what to consider
  596. self.config['all'][key].append((targ,)+val)
  597. else:
  598. self.config[targ][key] = val
  599. Debug("Found config for target '%s': '%s', value '%s'"%(
  600. targ,key,val),1,linenr)
  601. Debug("All %%!CONFIG: %s"%self.config, 1)
  602. def get_file_body(file):
  603. "Returns all the document BODY lines (including includes)"
  604. prop = Proprierties()
  605. prop.read_file(file)
  606. prop.find_areas()
  607. return prop.bodylines
  608. def finish_him(outlist, CONF):
  609. "Writing output to screen or file"
  610. outfile = CONF['outfile']
  611. outlist = unmaskEscapeChar(outlist)
  612. # do PostProc
  613. if CONF['postproc']:
  614. postoutlist = []
  615. for line in outlist:
  616. for targ,patt,repl in CONF['postproc']:
  617. if targ not in [CONF['type'], 'all']: continue
  618. try : line = re.sub(patt, repl, line)
  619. except: Error("Invalid PostProc filter regex: '%s'"%patt)
  620. postoutlist.append(line)
  621. outlist = postoutlist[:]
  622. if outfile == STDOUT:
  623. if CONF['gui']:
  624. return outlist
  625. else:
  626. for line in outlist: print line
  627. else:
  628. Savefile(outfile, addLineBreaks(outlist))
  629. if not CONF['gui']: print 'wrote %s'%(outfile)
  630. if CONF['split']:
  631. print "--- html..."
  632. sgml2html = 'sgml2html -s %s -l %s %s'%(
  633. CONF['split'],CONF['lang'] or lang,outfile)
  634. print "Running system command:", sgml2html
  635. os.system(sgml2html)
  636. def toc_maker(toc, conf):
  637. "Compose TOC list 'by hand'"
  638. # TOC is a tag, so there's nothing to do here
  639. if TAGS['TOC']: return []
  640. # toc is a valid t2t marked text (list type), that is converted
  641. if conf['toc'] or conf['toconly']:
  642. fakeconf = conf.copy()
  643. fakeconf['noheaders'] = 1
  644. fakeconf['toconly'] = 0
  645. fakeconf['maskemail'] = 0
  646. fakeconf['dump-source'] = 0
  647. fakeconf['preproc'] = []
  648. fakeconf['postproc'] = []
  649. toc,foo = convert(toc, fakeconf)
  650. # TOC between bars (not for --toconly)
  651. if conf['toc']:
  652. para = TAGS['paragraph']
  653. tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
  654. toc = tocbar + toc + tocbar
  655. return toc
  656. def getTags(doctype):
  657. keys = [
  658. 'paragraph','title1','title2','title3','title4','title5',
  659. 'numtitle1','numtitle2','numtitle3','numtitle4','numtitle5',
  660. 'areaPreOpen','areaPreClose',
  661. 'areaQuoteOpen','areaQuoteClose',
  662. 'fontMonoOpen','fontMonoClose',
  663. 'fontBoldOpen','fontBoldClose',
  664. 'fontItalicOpen','fontItalicClose',
  665. 'fontBolditalicOpen','fontBolditalicClose',
  666. 'fontUnderlineOpen','fontUnderlineClose',
  667. 'listOpen','listClose','listItem',
  668. 'numlistOpen','numlistClose','numlistItem',
  669. 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
  670. 'bar1','bar2',
  671. 'url','urlMark','email','emailMark',
  672. 'img','imgsolo',
  673. 'tableOpen','tableClose','tableLineOpen','tableLineClose',
  674. 'tableCellOpen','tableCellClose',
  675. 'tableTitleCellOpen','tableTitleCellClose',
  676. 'anchor','comment','TOC',
  677. 'EOD'
  678. ]
  679. alltags = {
  680. 'txt': {
  681. 'title1' : ' \a' ,
  682. 'title2' : '\t\a' ,
  683. 'title3' : '\t\t\a' ,
  684. 'title4' : '\t\t\t\a' ,
  685. 'title5' : '\t\t\t\t\a',
  686. 'areaQuoteOpen' : ' ' ,
  687. 'listItem' : '- ' ,
  688. 'numlistItem' : '\a. ' ,
  689. 'bar1' : '\a' ,
  690. 'bar2' : '\a' ,
  691. 'url' : '\a' ,
  692. 'urlMark' : '\a (\a)' ,
  693. 'email' : '\a' ,
  694. 'emailMark' : '\a (\a)' ,
  695. 'img' : '[\a]' ,
  696. },
  697. 'html': {
  698. 'paragraph' : '<P>' ,
  699. 'title1' : '<H1>\a</H1>' ,
  700. 'title2' : '<H2>\a</H2>' ,
  701. 'title3' : '<H3>\a</H3>' ,
  702. 'title4' : '<H4>\a</H4>' ,
  703. 'title5' : '<H5>\a</H5>' ,
  704. 'areaPreOpen' : '<PRE>' ,
  705. 'areaPreClose' : '</PRE>' ,
  706. 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
  707. 'areaQuoteClose' : '</BLOCKQUOTE>' ,
  708. 'fontMonoOpen' : '<CODE>' ,
  709. 'fontMonoClose' : '</CODE>' ,
  710. 'fontBoldOpen' : '<B>' ,
  711. 'fontBoldClose' : '</B>' ,
  712. 'fontItalicOpen' : '<I>' ,
  713. 'fontItalicClose' : '</I>' ,
  714. 'fontBolditalicOpen' : '<B><I>' ,
  715. 'fontBolditalicClose' : '</I></B>' ,
  716. 'fontUnderlineOpen' : '<U>' ,
  717. 'fontUnderlineClose' : '</U>' ,
  718. 'listOpen' : '<UL>' ,
  719. 'listClose' : '</UL>' ,
  720. 'listItem' : '<LI>' ,
  721. 'numlistOpen' : '<OL>' ,
  722. 'numlistClose' : '</OL>' ,
  723. 'numlistItem' : '<LI>' ,
  724. 'deflistOpen' : '<DL>' ,
  725. 'deflistClose' : '</DL>' ,
  726. 'deflistItem1' : '<DT>\a</DT>' ,
  727. 'deflistItem2' : '<DD>' ,
  728. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  729. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  730. 'url' : '<A HREF="\a">\a</A>' ,
  731. 'urlMark' : '<A HREF="\a">\a</A>' ,
  732. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  733. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  734. 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
  735. 'imgsolo' : '<P ALIGN="center">\a</P>' ,
  736. 'tableOpen' : '<table\a cellpadding=4 border=\a>',
  737. 'tableClose' : '</table>' ,
  738. 'tableLineOpen' : '<tr>' ,
  739. 'tableLineClose' : '</tr>' ,
  740. 'tableCellOpen' : '<td\a>' ,
  741. 'tableCellClose' : '</td>' ,
  742. 'tableTitleCellOpen' : '<th>' ,
  743. 'tableTitleCellClose' : '</th>' ,
  744. 'tableAlignLeft' : '' ,
  745. 'tableAlignCenter' : ' align="center"',
  746. 'tableCellAlignLeft' : '' ,
  747. 'tableCellAlignRight' : ' align="right"' ,
  748. 'tableCellAlignCenter': ' align="center"',
  749. 'anchor' : '<a name="\a"></a>',
  750. 'comment' : '<!-- \a -->' ,
  751. 'EOD' : '</BODY></HTML>'
  752. },
  753. 'sgml': {
  754. 'paragraph' : '<p>' ,
  755. 'title1' : '<sect>\a<p>' ,
  756. 'title2' : '<sect1>\a<p>' ,
  757. 'title3' : '<sect2>\a<p>' ,
  758. 'title4' : '<sect3>\a<p>' ,
  759. 'title5' : '<sect4>\a<p>' ,
  760. 'areaPreOpen' : '<tscreen><verb>' ,
  761. 'areaPreClose' : '</verb></tscreen>' ,
  762. 'areaQuoteOpen' : '<quote>' ,
  763. 'areaQuoteClose' : '</quote>' ,
  764. 'fontMonoOpen' : '<tt>' ,
  765. 'fontMonoClose' : '</tt>' ,
  766. 'fontBoldOpen' : '<bf>' ,
  767. 'fontBoldClose' : '</bf>' ,
  768. 'fontItalicOpen' : '<em>' ,
  769. 'fontItalicClose' : '</em>' ,
  770. 'fontBolditalicOpen' : '<bf><em>' ,
  771. 'fontBolditalicClose' : '</em></bf>' ,
  772. 'fontUnderlineOpen' : '<bf><em>' ,
  773. 'fontUnderlineClose' : '</em></bf>' ,
  774. 'listOpen' : '<itemize>' ,
  775. 'listClose' : '</itemize>' ,
  776. 'listItem' : '<item>' ,
  777. 'numlistOpen' : '<enum>' ,
  778. 'numlistClose' : '</enum>' ,
  779. 'numlistItem' : '<item>' ,
  780. 'deflistOpen' : '<descrip>' ,
  781. 'deflistClose' : '</descrip>' ,
  782. 'deflistItem1' : '<tag>\a</tag>' ,
  783. 'bar1' : '<!-- \a -->' ,
  784. 'bar2' : '<!-- \a -->' ,
  785. 'url' : '<htmlurl url="\a" name="\a">' ,
  786. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  787. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  788. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  789. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  790. '</figure>' ,
  791. 'tableOpen' : '<table><tabular ca="\a">' ,
  792. 'tableClose' : '</tabular></table>' ,
  793. 'tableLineClose' : '<rowsep>' ,
  794. 'tableCellClose' : '<colsep>' ,
  795. 'tableTitleCellClose' : '<colsep>' ,
  796. 'tableColAlignLeft' : 'l' ,
  797. 'tableColAlignRight' : 'r' ,
  798. 'tableColAlignCenter' : 'c' ,
  799. 'comment' : '<!-- \a -->' ,
  800. 'TOC' : '<toc>' ,
  801. 'EOD' : '</article>'
  802. },
  803. 'tex': {
  804. 'title1' : '\n\\newpage\section*{\a}',
  805. 'title2' : '\\subsection*{\a}' ,
  806. 'title3' : '\\subsubsection*{\a}' ,
  807. # title 4/5: DIRTY: para+BF+\\+\n
  808. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  809. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  810. 'numtitle1' : '\n\\newpage\section{\a}',
  811. 'numtitle2' : '\\subsection{\a}' ,
  812. 'numtitle3' : '\\subsubsection{\a}' ,
  813. 'areaPreOpen' : '\\begin{verbatim}' ,
  814. 'areaPreClose' : '\\end{verbatim}' ,
  815. 'areaQuoteOpen' : '\\begin{quotation}' ,
  816. 'areaQuoteClose' : '\\end{quotation}' ,
  817. 'fontMonoOpen' : '\\texttt{' ,
  818. 'fontMonoClose' : '}' ,
  819. 'fontBoldOpen' : '\\textbf{' ,
  820. 'fontBoldClose' : '}' ,
  821. 'fontItalicOpen' : '\\textit{' ,
  822. 'fontItalicClose' : '}' ,
  823. 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
  824. 'fontBolditalicClose' : '}}' ,
  825. 'fontUnderlineOpen' : '\\underline{' ,
  826. 'fontUnderlineClose' : '}' ,
  827. 'listOpen' : '\\begin{itemize}' ,
  828. 'listClose' : '\\end{itemize}' ,
  829. 'listItem' : '\\item ' ,
  830. 'numlistOpen' : '\\begin{enumerate}' ,
  831. 'numlistClose' : '\\end{enumerate}' ,
  832. 'numlistItem' : '\\item ' ,
  833. 'deflistOpen' : '\\begin{description}',
  834. 'deflistClose' : '\\end{description}' ,
  835. 'deflistItem1' : '\\item[\a]' ,
  836. 'bar1' : '\n\\hrulefill{}\n' ,
  837. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  838. 'url' : '\\url{\a}' ,
  839. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  840. 'email' : '\\url{\a}' ,
  841. 'emailMark' : '\\textit{\a} (\\url{\a})' ,
  842. 'img' : '\\begin{figure}\\includegraphics{\a}'+\
  843. '\\end{figure}',
  844. 'tableOpen' : '\\begin{center}\\begin{tabular}{\a|}',
  845. 'tableClose' : '\\end{tabular}\\end{center}',
  846. 'tableLineOpen' : '\\hline ' ,
  847. 'tableLineClose' : ' \\\\' ,
  848. 'tableCellClose' : ' & ' ,
  849. 'tableTitleCellOpen' : '\\textbf{',
  850. 'tableTitleCellClose' : '} & ' ,
  851. 'tableColAlignLeft' : '|l' ,
  852. 'tableColAlignRight' : '|r' ,
  853. 'tableColAlignCenter' : '|c' ,
  854. 'comment' : '% \a' ,
  855. 'TOC' : '\\newpage\\tableofcontents',
  856. 'EOD' : '\\end{document}'
  857. },
  858. 'moin': {
  859. 'title1' : '= \a =' ,
  860. 'title2' : '== \a ==' ,
  861. 'title3' : '=== \a ===' ,
  862. 'title4' : '==== \a ====' ,
  863. 'title5' : '===== \a =====',
  864. 'areaPreOpen' : '{{{' ,
  865. 'areaPreClose' : '}}}' ,
  866. 'areaQuoteOpen' : ' ' ,
  867. 'fontMonoOpen' : '{{{' ,
  868. 'fontMonoClose' : '}}}' ,
  869. 'fontBoldOpen' : "'''" ,
  870. 'fontBoldClose' : "'''" ,
  871. 'fontItalicOpen' : "''" ,
  872. 'fontItalicClose' : "''" ,
  873. 'fontBolditalicOpen' : "'''''" ,
  874. 'fontBolditalicClose' : "'''''" ,
  875. 'fontUnderlineOpen' : "'''''" ,
  876. 'fontUnderlineClose' : "'''''" ,
  877. 'listItem' : ' * ' ,
  878. 'numlistItem' : ' \a. ' ,
  879. 'bar1' : '----' ,
  880. 'bar2' : '----' ,
  881. 'url' : '[\a]' ,
  882. 'urlMark' : '[\a \a]' ,
  883. 'email' : '[\a]' ,
  884. 'emailMark' : '[\a \a]' ,
  885. 'img' : '[\a]' ,
  886. 'tableLineOpen' : '||' ,
  887. 'tableCellClose' : '||' ,
  888. 'tableTitleCellClose' : '||'
  889. },
  890. 'mgp': {
  891. 'paragraph' : '%font "normal", size 5\n' ,
  892. 'title1' : '%page\n\n\a' ,
  893. 'title2' : '%page\n\n\a' ,
  894. 'title3' : '%page\n\n\a' ,
  895. 'title4' : '%page\n\n\a' ,
  896. 'title5' : '%page\n\n\a' ,
  897. 'areaPreOpen' : '\n%font "mono"' ,
  898. 'areaPreClose' : '%font "normal"' ,
  899. 'areaQuoteOpen' : '%prefix " "' ,
  900. 'areaQuoteClose' : '%prefix " "' ,
  901. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  902. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  903. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  904. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  905. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  906. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  907. 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n',
  908. 'fontBolditalicClose' : '\n%cont, font "normal"\n' ,
  909. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  910. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  911. 'numlistItem' : '\a. ' ,
  912. 'bar1' : '%bar "white" 5' ,
  913. 'bar2' : '%pause' ,
  914. 'url' : '\n%cont, fore "cyan"\n\a' +\
  915. '\n%cont, fore "white"\n' ,
  916. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  917. '\n%cont, fore "white"\n' ,
  918. 'email' : '\n%cont, fore "cyan"\n\a' +\
  919. '\n%cont, fore "white"\n' ,
  920. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  921. '\n%cont, fore "white"\n' ,
  922. 'img' : '\n%center\n%newimage "\a", left\n',
  923. 'comment' : '%% \a' ,
  924. 'EOD' : '%%EOD'
  925. },
  926. 'man': {
  927. 'paragraph' : '.P' ,
  928. 'title1' : '.SH \a' ,
  929. 'title2' : '.SS \a' ,
  930. 'title3' : '.SS \a' ,
  931. 'title4' : '.SS \a' ,
  932. 'title5' : '.SS \a' ,
  933. 'areaPreOpen' : '.nf' ,
  934. 'areaPreClose' : '.fi\n' ,
  935. 'areaQuoteOpen' : '\n' ,
  936. 'areaQuoteClose' : '\n' ,
  937. 'fontBoldOpen' : '\\fB' ,
  938. 'fontBoldClose' : '\\fP' ,
  939. 'fontItalicOpen' : '\\fI' ,
  940. 'fontItalicClose' : '\\fP' ,
  941. 'fontBolditalicOpen' : '\\fI' ,
  942. 'fontBolditalicClose' : '\\fP' ,
  943. 'listOpen' : '\n.nf' , # pre
  944. 'listClose' : '.fi\n' ,
  945. 'listItem' : '* ' ,
  946. 'numlistOpen' : '\n.nf' , # pre
  947. 'numlistClose' : '.fi\n' ,
  948. 'numlistItem' : '\a. ' ,
  949. 'bar1' : '\n\n' ,
  950. 'bar2' : '\n\n' ,
  951. 'url' : '\a' ,
  952. 'urlMark' : '\a (\a)',
  953. 'email' : '\a' ,
  954. 'emailMark' : '\a (\a)',
  955. 'img' : '\a' ,
  956. 'comment' : '.\\" \a'
  957. },
  958. 'pm6': {
  959. 'paragraph' : '<@Normal:>' ,
  960. 'title1' : '\n<@Title1:>\a',
  961. 'title2' : '\n<@Title2:>\a',
  962. 'title3' : '\n<@Title3:>\a',
  963. 'title4' : '\n<@Title4:>\a',
  964. 'title5' : '\n<@Title5:>\a',
  965. 'areaPreOpen' : '<@PreFormat:>' ,
  966. 'areaQuoteOpen' : '<@Quote:>' ,
  967. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  968. 'fontMonoClose' : '<SIZE$><FONT$>',
  969. 'fontBoldOpen' : '<B>' ,
  970. 'fontBoldClose' : '<P>' ,
  971. 'fontItalicOpen' : '<I>' ,
  972. 'fontItalicClose' : '<P>' ,
  973. 'fontBolditalicOpen' : '<B><I>' ,
  974. 'fontBolditalicClose' : '<P>' ,
  975. 'fontUnderlineOpen' : '<U>' ,
  976. 'fontUnderlineClose' : '<P>' ,
  977. 'listOpen' : '<@Bullet:>' ,
  978. 'listItem' : '\x95 ' , # \x95 == ~U
  979. 'numlistOpen' : '<@Bullet:>' ,
  980. 'numlistItem' : '\x95 ' ,
  981. 'bar1' : '\a' ,
  982. 'bar2' : '\a' ,
  983. 'url' : '<U>\a<P>' , # underline
  984. 'urlMark' : '\a <U>\a<P>' ,
  985. 'email' : '\a' ,
  986. 'emailMark' : '\a \a' ,
  987. 'img' : '\a'
  988. }
  989. }
  990. # compose the target tags dictionary
  991. tags = {}
  992. target_tags = alltags[doctype]
  993. for key in keys: tags[key] = '' # create empty keys
  994. for key in target_tags.keys():
  995. tags[key] = maskEscapeChar(target_tags[key]) # populate
  996. return tags
  997. def getRules(doctype):
  998. ret = {}
  999. allrules = [
  1000. # target rules (ON/OFF)
  1001. 'linkable', # target supports external links
  1002. 'tableable', # target supports tables
  1003. 'imglinkable', # target supports images as links
  1004. 'imgalignable', # target supports image alignment
  1005. 'imgasdefterm', # target supports image as definition term
  1006. 'tablealignable', # target supports table alignment
  1007. 'autonumberlist', # target supports numbered lists natively
  1008. 'autonumbertitle', # target supports numbered titles natively
  1009. 'tablecellsplit', # place delimiters only *between* cells
  1010. 'listnotnested', # lists cannot be nested
  1011. 'quotenotnested', # quotes cannot be nested
  1012. 'preareanotescaped', # don't escape specials in PRE area
  1013. 'escapeurl', # escape special in link URL
  1014. # target code beautify (ON/OFF)
  1015. 'indentprearea', # add leading spaces to PRE area lines
  1016. 'breaktablecell', # break lines after any table cell
  1017. 'breaktablelineopen', # break line after opening table line
  1018. 'keepquoteindent', # don't remove the leading TABs on quotes
  1019. # value settings
  1020. 'listmaxdepth', # maximum depth for lists
  1021. 'tablecellaligntype' # type of table cell align: cell, column
  1022. ]
  1023. rules = {
  1024. 'txt' : {
  1025. 'indentprearea':1
  1026. },
  1027. 'html': {
  1028. 'indentprearea':1,
  1029. 'linkable':1,
  1030. 'imglinkable':1,
  1031. 'imgalignable':1,
  1032. 'imgasdefterm':1,
  1033. 'autonumberlist':1,
  1034. 'tableable':1,
  1035. 'breaktablecell':1,
  1036. 'breaktablelineopen':1,
  1037. 'keepquoteindent':1,
  1038. 'tablealignable':1,
  1039. 'tablecellaligntype':'cell'
  1040. },
  1041. 'sgml': {
  1042. 'linkable':1,
  1043. 'escapeurl':1,
  1044. 'autonumberlist':1,
  1045. 'tableable':1,
  1046. 'tablecellsplit':1,
  1047. 'quotenotnested':1,
  1048. 'keepquoteindent':1,
  1049. 'tablecellaligntype':'column'
  1050. },
  1051. 'mgp' : {
  1052. },
  1053. 'tex' : {
  1054. 'autonumberlist':1,
  1055. 'autonumbertitle':1,
  1056. 'tableable':1,
  1057. 'tablecellsplit':1,
  1058. 'preareanotescaped':1,
  1059. 'listmaxdepth':4,
  1060. 'tablecellaligntype':'column'
  1061. },
  1062. 'moin': {
  1063. 'linkable':1,
  1064. 'tableable':1
  1065. },
  1066. 'man' : {
  1067. 'indentprearea':1,
  1068. 'listnotnested':1
  1069. },
  1070. 'pm6' : {
  1071. }
  1072. }
  1073. # populate return dictionary
  1074. myrules = rules[doctype]
  1075. for key in allrules : ret[key] = 0 # reset all
  1076. for key in myrules.keys(): ret[key] = myrules[key] # turn ON
  1077. return ret
  1078. def getRegexes():
  1079. regex = {
  1080. # extra at end: (\[(?P<label>\w+)\])?
  1081. 'title':
  1082. re.compile(r'^\s*(?P<id>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
  1083. 'numtitle':
  1084. re.compile(r'^\s*(?P<id>\+{1,5})(?P<txt>[^+].*[^+])\1\s*$'),
  1085. 'areaPreOpen':
  1086. re.compile(r'^---$'),
  1087. 'areaPreClose':
  1088. re.compile(r'^---$'),
  1089. 'quote':
  1090. re.compile(r'^\t+'),
  1091. '1linePre':
  1092. re.compile(r'^--- (?=.)'),
  1093. 'fontMono':
  1094. re.compile(r'`([^`]+)`'),
  1095. 'fontBold':
  1096. re.compile(r'\*\*([^\s*].*?)\*\*'),
  1097. 'fontItalic':
  1098. re.compile(r'(^|[^:])//([^ /].*?)//'),
  1099. 'fontUnderline':
  1100. re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
  1101. 'fontBolditalic':
  1102. re.compile(r'\*/([^/].*?)/\*'),
  1103. 'list':
  1104. re.compile(r'^( *)([+-]) ([^ ])'),
  1105. 'deflist':
  1106. re.compile(r'^( *)(=) ([^:]+):'),
  1107. 'bar':
  1108. re.compile(r'^\s*([_=-]{20,})\s*$'),
  1109. 'table':
  1110. re.compile(r'^ *\|\|? '),
  1111. 'blankline':
  1112. re.compile(r'^\s*$'),
  1113. 'comment':
  1114. re.compile(r'^%'),
  1115. 'raw':
  1116. re.compile(r'``(.+?)``')
  1117. }
  1118. # special char to place data on TAGs contents (\a == bell)
  1119. regex['x'] = re.compile('\a')
  1120. # %%date [ (formatting) ]
  1121. regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  1122. ### complicated regexes begin here ;)
  1123. #
  1124. # textual descriptions on --help's style: [...] is optional, | is OR
  1125. ### first, some auxiliar variables
  1126. #
  1127. # [image.EXT]
  1128. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  1129. # link things
  1130. urlskel = {
  1131. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  1132. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  1133. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  1134. 'pass' : r'[^ @]*', # for ftp://login:password@dom.com
  1135. 'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
  1136. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  1137. 'form' : r'A-Za-z0-9/%&=+.,@*_-',# .,@*_-(as is)
  1138. 'punct' : r'.,;:!?'
  1139. }
  1140. # username [ :password ] @
  1141. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  1142. # [ http:// ] [ username:password@ ] domain.com [ / ]
  1143. # [ #anchor | ?form=data ]
  1144. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  1145. urlskel['proto'],patt_url_login, urlskel['guess'],
  1146. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  1147. # filename | [ filename ] #anchor
  1148. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  1149. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  1150. # user@domain [ ?form=data ]
  1151. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  1152. urlskel['login'],urlskel['form'])
  1153. # saving for future use
  1154. regex['_urlskel'] = urlskel
  1155. ### and now the real regexes
  1156. #
  1157. regex['email'] = re.compile(patt_email,re.I)
  1158. # email | url
  1159. regex['link'] = \
  1160. re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1161. # \[ label | imagetag url | email | filename \]
  1162. regex['linkmark'] = \
  1163. re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1164. patt_img, retxt_url, patt_email, retxt_url_local),
  1165. re.L+re.I)
  1166. # image
  1167. regex['img'] = re.compile(patt_img, re.L+re.I)
  1168. # all macros
  1169. regex['macro'] = regex['date']
  1170. # special things
  1171. regex['special'] = re.compile(r'^%!\s*')
  1172. regex['command'] = re.compile(r'(Include)\s*:\s*(.+)\s*$',re.I)
  1173. return regex
  1174. ### END OF regex nightmares
  1175. class SubareaMaster:
  1176. def __init__(self) : self.x = []
  1177. def __call__(self) :
  1178. if not self.x: return ''
  1179. return self.x[-1]
  1180. def add(self, area):
  1181. if not self.x or (self.x and self.x[-1] != area):
  1182. self.x.append(area)
  1183. Debug('subarea ++ (%s): %s' % (area,self.x), 1)
  1184. def pop(self, area=None):
  1185. if area and self.x[-1] == area: self.x.pop()
  1186. Debug('subarea -- (%s): %s' % (area,self.x), 1)
  1187. def doHeader(headers, CONF):
  1188. if CONF['noheaders']: return []
  1189. doctype = CONF['type']
  1190. if not HEADER_TEMPLATE.has_key(doctype):
  1191. Error("doheader: Unknow doctype '%s'"%doctype)
  1192. template = string.split(HEADER_TEMPLATE[doctype], '\n')
  1193. head_data = {'STYLE':'', 'ENCODING':''}
  1194. for key in head_data.keys():
  1195. val = CONF.get(string.lower(key))
  1196. if key == 'ENCODING': val = get_encoding_string(val, doctype)
  1197. head_data[key] = val
  1198. # parse header contents
  1199. for i in 0,1,2:
  1200. contents = doDateMacro(headers[i]) # expand %%date
  1201. # Escapes - on tex, just do it if any \tag{} present
  1202. if doctype != 'tex' or \
  1203. (doctype == 'tex' and re.search(r'\\\w+{', contents)):
  1204. contents = doEscape(doctype, contents)
  1205. head_data['HEADER%d'%(i+1)] = contents
  1206. Debug("Header Data: %s"%head_data, 1)
  1207. # scan for empty dictionary keys
  1208. # if found, scan template lines for that key reference
  1209. # if found, remove the reference
  1210. # if there isn't any other key reference on the same line, remove it
  1211. for key in head_data.keys():
  1212. if head_data.get(key): continue
  1213. for line in template:
  1214. if string.count(line, '%%(%s)s'%key):
  1215. sline = string.replace(line, '%%(%s)s'%key, '')
  1216. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  1217. template.remove(line)
  1218. # populate template with data
  1219. template = string.join(template, '\n') % head_data
  1220. ### post processing
  1221. #
  1222. # let tex format today
  1223. if doctype == 'tex' and head_data['HEADER3'] == currdate:
  1224. template = re.sub(r'\\date\{.*?}', r'\date', template)
  1225. return string.split(template, '\n')
  1226. def doDateMacro(line):
  1227. re_date = getRegexes()['date']
  1228. while re_date.search(line):
  1229. m = re_date.search(line)
  1230. fmt = m.group('fmt') or ''
  1231. dateme = currdate
  1232. if fmt: dateme = strftime(fmt,localtime(time()))
  1233. line = re_date.sub(dateme,line,1)
  1234. return line
  1235. def doCommentLine(txt):
  1236. # the -- string ends a sgml/html comment :(
  1237. txt = maskEscapeChar(txt)
  1238. if string.count(TAGS['comment'], '--') and \
  1239. string.count(txt, '--'):
  1240. txt = re.sub('-(?=-)', r'-\\', txt)
  1241. if TAGS['comment']:
  1242. return regex['x'].sub(txt, TAGS['comment'])
  1243. return ''
  1244. def doFooter(CONF):
  1245. ret = []
  1246. doctype = CONF['type']
  1247. cmdline = CONF['cmdline']
  1248. typename = doctype
  1249. if doctype == 'tex': typename = 'LaTeX2e'
  1250. ppgd = '%s code generated by txt2tags %s (%s)'%(
  1251. typename,my_version,my_url)
  1252. cmdline = 'cmdline: txt2tags %s'%string.join(cmdline[1:], ' ')
  1253. ret.append('\n'+doCommentLine(ppgd))
  1254. ret.append(doCommentLine(cmdline))
  1255. ret.append(TAGS['EOD'])
  1256. return ret
  1257. # TODO mgp: any line (header or not) can't begin with % (add a space before)
  1258. def doEscape(doctype,txt):
  1259. if doctype in ['html','sgml']:
  1260. txt = re.sub('&','&amp;',txt)
  1261. txt = re.sub('<','&lt;',txt)
  1262. txt = re.sub('>','&gt;',txt)
  1263. if doctype == 'sgml':
  1264. txt = re.sub('\xff','&yuml;',txt) # "+y
  1265. elif doctype == 'pm6':
  1266. txt = re.sub('<','<\#60>',txt)
  1267. elif doctype == 'mgp':
  1268. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  1269. elif doctype == 'man':
  1270. txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
  1271. txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
  1272. elif doctype == 'tex':
  1273. txt = string.replace(txt, ESCCHAR, '@@LaTeX-escaping-SUX@@')
  1274. txt = re.sub('([#$&%{}])', r'\\\1', txt)
  1275. txt = string.replace(txt, '~', maskEscapeChar(r'\~{}'))
  1276. txt = string.replace(txt, '^', maskEscapeChar(r'\^{}'))
  1277. txt = string.replace(txt, '@@LaTeX-escaping-SUX@@',
  1278. maskEscapeChar(r'$\backslash$'))
  1279. # TIP the _ is escaped at the end
  1280. return txt
  1281. def doFinalEscape(doctype, txt):
  1282. "Last escapes of each line"
  1283. if doctype == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
  1284. elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
  1285. elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
  1286. elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  1287. return txt
  1288. def EscapeCharHandler(action, data):
  1289. "Mask/Unmask the Escape Char on the given string"
  1290. if not string.strip(data): return data
  1291. if action not in ['mask','unmask']:
  1292. Error("EscapeCharHandler: Invalid action '%s'"%action)
  1293. if action == 'mask': return string.replace(data,'\\',ESCCHAR)
  1294. else: return string.replace(data,ESCCHAR,'\\')
  1295. def maskEscapeChar(data):
  1296. "Replace any Escape Char \ with a text mask (Input: str or list)"
  1297. if type(data) == type([]):
  1298. return map(lambda x: EscapeCharHandler('mask', x), data)
  1299. return EscapeCharHandler('mask',data)
  1300. def unmaskEscapeChar(data):
  1301. "Undo the Escape char \ masking (Input: str or list)"
  1302. if type(data) == type([]):
  1303. return map(lambda x: EscapeCharHandler('unmask', x), data)
  1304. return EscapeCharHandler('unmask',data)
  1305. def addLineBreaks(list):
  1306. "use LB to respect sys.platform"
  1307. ret = []
  1308. for line in list:
  1309. line = string.replace(line,'\n',LB) # embedded \n's
  1310. ret.append(line+LB) # add final line break
  1311. return ret
  1312. def doPreLine(doctype,line):
  1313. "Parsing procedures for preformatted (verbatim) lines"
  1314. if not rules['preareanotescaped']: line = doEscape(doctype,line)
  1315. if rules['indentprearea']: line = ' '+line
  1316. if doctype == 'pm6': line = doFinalEscape(doctype, line)
  1317. return line
  1318. def doCloseTable(doctype):
  1319. global subarea, tableborder
  1320. ret = ''
  1321. if rules['tableable']:
  1322. if doctype == 'tex' and tableborder:
  1323. ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
  1324. else:
  1325. ret = TAGS['tableClose']+'\n'
  1326. else:
  1327. ret = TAGS['areaPreClose']
  1328. tableborder = 0
  1329. subarea.pop('table')
  1330. return ret
  1331. def doCloseQuote(howmany=None):
  1332. global quotedepth
  1333. ret = []
  1334. if not howmany: howmany = len(quotedepth)
  1335. for i in range(howmany):
  1336. quotedepth.pop()
  1337. #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
  1338. ret.append(TAGS['areaQuoteClose'])
  1339. if not quotedepth: subarea.pop('quote')
  1340. return string.join(ret,'\n')
  1341. def doCloseList(howmany=None):
  1342. global listindent, listids
  1343. ret = []
  1344. if not howmany: howmany = len(listindent)
  1345. for i in range(howmany):
  1346. if listids[-1] == '-': tag = TAGS['listClose']
  1347. elif listids[-1] == '+': tag = TAGS['numlistClose']
  1348. elif listids[-1] == '=': tag = TAGS['deflistClose']
  1349. if not tag: tag = TAGS['listClose'] # default
  1350. if tag:
  1351. # unnested lists are only closed at mother-list
  1352. if rules['listnotnested']:
  1353. if len(listindent) == 1:
  1354. ret.append(tag)
  1355. else:
  1356. ret.append(listindent[-1]+tag)
  1357. del listindent[-1]
  1358. del listids[-1]
  1359. if not listindent: subarea.pop('list')
  1360. return string.join(ret,'\n')
  1361. def beautify_me(name, line):
  1362. "where name is: bold, italic, underline or bolditalic"
  1363. name = 'font%s' % string.capitalize(name)
  1364. open = TAGS['%sOpen'%name]
  1365. close = TAGS['%sClose'%name]
  1366. txt = r'%s\1%s'%(open, close)
  1367. if name == 'fontItalic':
  1368. txt = r'\1%s\2%s'%(open, close)
  1369. line = regex[name].sub(txt,line)
  1370. return line
  1371. def get_tagged_link(label, url, CONF):
  1372. ret = ''
  1373. doctype = CONF['type']
  1374. # set link type
  1375. if regex['email'].match(url):
  1376. linktype = 'email'
  1377. else:
  1378. linktype = 'url';
  1379. # escape specials from TEXT parts
  1380. label = doEscape(doctype,label)
  1381. # escape specials from link URL
  1382. if rules['linkable'] and rules['escapeurl']:
  1383. url = doEscape(doctype, url)
  1384. # if not linkable, the URL is plain text, that needs escape
  1385. if not rules['linkable']:
  1386. if doctype == 'tex':
  1387. url = re.sub('^#', '\#', url) # ugly, but compile
  1388. else:
  1389. url = doEscape(doctype,url)
  1390. # adding protocol to guessed link
  1391. guessurl = ''
  1392. if linktype == 'url' and \
  1393. re.match(regex['_urlskel']['guess'], url):
  1394. if url[0] == 'w': guessurl = 'http://' +url
  1395. else : guessurl = 'ftp://' +url
  1396. # not link aware targets -> protocol is useless
  1397. if not rules['linkable']: guessurl = ''
  1398. # simple link (not guessed)
  1399. if not label and not guessurl:
  1400. if CONF['maskemail'] and linktype == 'email':
  1401. # do the email mask feature (no TAGs, just text)
  1402. url = string.replace(url,'@',' (a) ')
  1403. url = string.replace(url,'.',' ')
  1404. url = "<%s>" % url
  1405. if rules['linkable']: url = doEscape(doctype, url)
  1406. ret = url
  1407. else:
  1408. # just add link data to tag
  1409. tag = TAGS[linktype]
  1410. ret = regex['x'].sub(url,tag)
  1411. # named link or guessed simple link
  1412. else:
  1413. # adjusts for guessed link
  1414. if not label: label = url # no protocol
  1415. if guessurl : url = guessurl # with protocol
  1416. # change image tag for !supported img+link targets
  1417. if regex['img'].match(label) and not rules['imglinkable']:
  1418. label = "(%s)"%regex['img'].match(label).group(1)
  1419. # putting data on the right appearance order
  1420. if rules['linkable']:
  1421. urlorder = [url, label] # link before label
  1422. else:
  1423. urlorder = [label, url] # label before link
  1424. # add link data to tag (replace \a's)
  1425. ret = TAGS["%sMark"%linktype]
  1426. for data in urlorder:
  1427. ret = regex['x'].sub(data,ret,1)
  1428. return ret
  1429. def get_image_align(line):
  1430. align = ''
  1431. line = string.strip(line)
  1432. m = regex['img'].search(line)
  1433. ini = m.start() ; head = 0
  1434. end = m.end() ; tail = len(line)
  1435. align = 'middle' # default align # ^text +img +text$
  1436. if ini == head and end == tail: align = 'para' # ^img$
  1437. elif ini == head: align = 'left' # ^img + text$
  1438. elif end == tail: align = 'right' # ^text + img$
  1439. return align
  1440. def get_tablecell_align(cells):
  1441. ret = []
  1442. for cell in cells:
  1443. align = 'Left'
  1444. if string.strip(cell):
  1445. if cell[0] == ' ' and cell[-1] == ' ': align = 'Center'
  1446. elif cell[0] == ' ': align = 'Right'
  1447. ret.append(align)
  1448. return ret
  1449. def get_table_prop(line):
  1450. # default table proprierties
  1451. ret = {'border':0,'header':0,'align':'Left','cells':[],'cellalign':[]}
  1452. # detect table align (and remove spaces mark)
  1453. if line[0] == ' ': ret['align'] = 'Center'
  1454. line = string.lstrip(line)
  1455. # detect header (title) mark
  1456. if line[1] == '|':
  1457. ret['header'] = 1
  1458. # delete trailing spaces after last cell border
  1459. line = re.sub('\|\s*$','|', line)
  1460. # detect (and delete) border mark (and leading space)
  1461. if line[-1] == '|':
  1462. ret['border'] = 1 ; line = line[:-2]
  1463. # delete table mark
  1464. line = regex['table'].sub('', line)
  1465. # split cells
  1466. ret['cells'] = string.split(line, ' | ')
  1467. # find cells align
  1468. ret['cellalign'] = get_tablecell_align(ret['cells'])
  1469. Debug('Table Prop: %s' % ret, 1)
  1470. return ret
  1471. def tag_table_cells(table, doctype):
  1472. ret = ''
  1473. open, close = TAGS['tableCellOpen'], TAGS['tableCellClose']
  1474. # title cell
  1475. if table['header']:
  1476. open = TAGS['tableTitleCellOpen']
  1477. close = TAGS['tableTitleCellClose']
  1478. # should we break the line?
  1479. if rules['breaktablecell']: close = close+'\n'
  1480. # here we go
  1481. while table['cells']:
  1482. openalign = open
  1483. cel = table['cells'].pop(0)
  1484. # set each cell align
  1485. if rules['tablecellaligntype'] == 'cell':
  1486. align = table['cellalign'].pop(0)
  1487. align = TAGS['tableCellAlign%s'%align]
  1488. openalign = string.replace(open,'\a',align)
  1489. # show empty cell on HTML
  1490. if not cel and doctype == 'html': cel = '&nbsp;'
  1491. # last cell gotchas
  1492. if not table['cells']:
  1493. # don't need cell separator
  1494. if rules['tablecellsplit']: close = ''
  1495. # close beautifier for last title cell
  1496. if doctype == 'tex' and table['header']: close = '}'
  1497. # join it all
  1498. newcell = openalign + string.strip(cel) + close
  1499. ret = ret + newcell
  1500. return ret
  1501. def get_tableopen_tag(table_prop, doctype):
  1502. global tableborder
  1503. open = TAGS['tableOpen'] # the default one
  1504. # the first line defines if table has border or not
  1505. tableborder = table_prop['border']
  1506. # align full table
  1507. if rules['tablealignable']:
  1508. talign = TAGS['tableAlign'+table_prop['align']]
  1509. open = regex['x'].sub(talign, open, 1)
  1510. # set the columns alignment
  1511. if rules['tablecellaligntype'] == 'column':
  1512. calign = map(lambda x: TAGS['tableColAlign%s'%x],
  1513. table_prop['cellalign'])
  1514. calign = string.join(calign,'')
  1515. open = regex['x'].sub(calign, open, 1)
  1516. # tex table spec, border or not: {|l|c|r|} , {lcr}
  1517. if doctype == 'tex' and not tableborder:
  1518. open = string.replace(open,'|','')
  1519. # we're almost done, just border left
  1520. tag = regex['x'].sub(`tableborder`, open)
  1521. return tag
  1522. # reference: http://www.iana.org/assignments/character-sets
  1523. # http://www.drclue.net/F1.cgi/HTML/META/META.html
  1524. def get_encoding_string(enc, doctype):
  1525. if not enc: return ''
  1526. # target specific translation table
  1527. translate = {
  1528. 'tex': {
  1529. # missing: ansinew , applemac , cp437 , cp437de , cp865
  1530. 'us-ascii' : 'ascii',
  1531. 'windows-1250': 'cp1250',
  1532. 'windows-1252': 'cp1252',
  1533. 'ibm850' : 'cp850',
  1534. 'ibm852' : 'cp852',
  1535. 'iso-8859-1' : 'latin1',
  1536. 'iso-8859-2' : 'latin2',
  1537. 'iso-8859-3' : 'latin3',
  1538. 'iso-8859-4' : 'latin4',
  1539. 'iso-8859-5' : 'latin5',
  1540. 'iso-8859-9' : 'latin9',
  1541. 'koi8-r' : 'koi8-r'
  1542. }
  1543. }
  1544. # normalization
  1545. enc = re.sub('(?i)(us[-_]?)?ascii|us|ibm367','us-ascii' , enc)
  1546. enc = re.sub('(?i)(ibm|cp)?85([02])' ,'ibm85\\2' , enc)
  1547. enc = re.sub('(?i)(iso[_-]?)?8859[_-]?' ,'iso-8859-' , enc)
  1548. enc = re.sub('iso-8859-($|[^1-9]).*' ,'iso-8859-1', enc)
  1549. # apply translation table
  1550. try: enc = translate[doctype][string.lower(enc)]
  1551. except: pass
  1552. return enc
  1553. ################################################################################
  1554. ###MerryChristmas,IdontwanttofighttonightwithyouImissyourbodyandIneedyourlove###
  1555. ################################################################################
  1556. def getAllConf(cmdlinelist, nocheck=0):
  1557. """
  1558. Returns a list of (File Configuration, File Proprierties) tuples
  1559. for all the given Input files. The Configuration is the merge of
  1560. command line options and %!cmdline settings.
  1561. """
  1562. all_confs = []
  1563. # parse command line to get input files list
  1564. cmdline = Cmdline(cmdlinelist, nocheck)
  1565. infiles = cmdline.cmdline_conf.get('infiles')
  1566. if not infiles: return []
  1567. for infile in infiles: # multifile support
  1568. # the first file doesn't need to recall Cmdline()
  1569. if all_confs: cmdline = Cmdline(cmdlinelist, nocheck)
  1570. # extract file Headers and Config
  1571. prop = Proprierties(infile)
  1572. # decide to use generic or target specfic (if any) %!cmdline:
  1573. cmdline_target = cmdline.cmdline_conf.get('type')
  1574. if cmdline_target and cmdline_target in targets and \
  1575. prop.config[cmdline_target].get('cmdline'):
  1576. cfgcmdline_target = cmdline_target
  1577. else:
  1578. cfgcmdline_target = 'all'
  1579. # merge %!cmdline contents (if any) into original cmdline
  1580. cmdline.merge(prop.config[cfgcmdline_target].get('cmdline'))
  1581. # force infile
  1582. cmdline.cmdline_conf['infile'] = infile
  1583. # get all the configuration (flags/options) for this file
  1584. # it saves general AND specific config (not OR as in %!cmdline)
  1585. myconf = cmdline.merge_conf(prop.config['all'])
  1586. myconf = cmdline.merge_conf(prop.config.get(myconf['type']), override=1)
  1587. # adding %!cmdline contents to config (used by GUI)
  1588. myconf['%!cmdline'] = prop.config[cfgcmdline_target].get('cmdline')
  1589. # ensure the configuration has ALL keys defined
  1590. for key in FLAGS.keys() + OPTIONS.keys() + CONFIG_KEYWORDS:
  1591. if not myconf.has_key(key): myconf[key] = ''
  1592. # append the (configuration, proprierties) tuple
  1593. all_confs.append((myconf,prop))
  1594. # remove what has left
  1595. del cmdline, prop
  1596. return all_confs
  1597. def convertAllFiles(confs):
  1598. if not confs: Quit(usage, 1)
  1599. header = []
  1600. for myconf,prop in confs: # multifile support
  1601. # --dump-source option handler
  1602. # hidden option, maybe will be removed on next versions
  1603. if myconf['dump-source']:
  1604. comment_id = '%--------------------- Area Delimiter:'
  1605. for line in prop.headers: print line
  1606. print '%s HEADER --> CONFIG'%comment_id
  1607. for line in prop.conflines: print string.rstrip(line)
  1608. print '%s CONFIG --> BODY'%comment_id
  1609. doc = convert(prop.bodylines, myconf)
  1610. for line in doc: print line
  1611. print '%s EOD'%comment_id
  1612. continue
  1613. # compose the target file Headers
  1614. #TODO escape line before?
  1615. #TODO see exceptions by tex and mgp
  1616. header = doHeader(prop.headers, myconf)
  1617. # get the marked file BODY that has left
  1618. body = prop.bodylines
  1619. # parse the full marked body into tagged target
  1620. doc,toc = convert(body, myconf, firstlinenr=prop.arearef[-1])
  1621. # make TOC (if needed)
  1622. toc = toc_maker(toc,myconf)
  1623. # finally, we have our document
  1624. outlist = header + toc + doc
  1625. # break here if Gui - it has some more processing to do
  1626. if myconf['gui']: return outlist, myconf
  1627. # write results to file or STDOUT
  1628. finish_him(outlist, myconf)
  1629. def reallydoitall(cmdlinelist):
  1630. confs = getAllConf(cmdlinelist)
  1631. return convertAllFiles(confs)
  1632. def convert(bodylines, CONF, firstlinenr=1):
  1633. # global vars for doClose*()
  1634. global TAGS, regex, rules, quotedepth, listindent, listids
  1635. global subarea, tableborder
  1636. doctype = CONF['type']
  1637. outfile = CONF['outfile']
  1638. TAGS = getTags(doctype)
  1639. rules = getRules(doctype)
  1640. regex = getRegexes()
  1641. # the defaults
  1642. linkmask = '@@_link_@@'
  1643. monomask = '@@_mono_@@'
  1644. macromask = '@@_macro_@@'
  1645. rawmask = '@@_raw_@@'
  1646. subarea = SubareaMaster()
  1647. ret = []
  1648. incdumpbuf = []
  1649. toclist = []
  1650. f_tt = 0
  1651. listindent = []
  1652. listids = []
  1653. listcount = []
  1654. titlecount = ['',0,0,0,0,0]
  1655. f_lastwasblank = 0
  1656. holdspace = ''
  1657. listholdspace = ''
  1658. quotedepth = []
  1659. tableborder = 0
  1660. if outfile != STDOUT:
  1661. if not CONF['gui']:
  1662. print "--- %s..."%doctype
  1663. # if TOC is a header tag
  1664. if CONF['toc'] and TAGS['TOC']:
  1665. ret.append(TAGS['TOC']+'\n')
  1666. # let's put the opening paragraph
  1667. if doctype != 'pm6':
  1668. ret.append(TAGS['paragraph'])
  1669. # let's mark it up!
  1670. linenr = firstlinenr-1
  1671. lineref = -1
  1672. while lineref < len(bodylines)-1:
  1673. # for lineref in range(len(bodylines)):
  1674. lineref = lineref + 1
  1675. # print lineref, len(bodylines)
  1676. skip_continue = 0
  1677. linkbank = []
  1678. monobank = []
  1679. macrobank = []
  1680. rawbank = []
  1681. untouchedline = bodylines[lineref]
  1682. line = re.sub('[\n\r]+$','',untouchedline) # del line break
  1683. incdumpbuf.append(line) # for --dump-source
  1684. # apply PreProc rules
  1685. if CONF['preproc']:
  1686. for targ,patt,repl in CONF['preproc']:
  1687. if targ not in [CONF['type'], 'all']: continue
  1688. try : line = re.sub(patt, repl, line)
  1689. except: Error("Invalid PreProc filter regex: '%s'"%patt)
  1690. line = maskEscapeChar(line) # protect \ char
  1691. linenr = linenr +1
  1692. Debug('LINE %04d: %s'%(linenr,repr(line)), 1) # heavy debug
  1693. # we need (not really) to mark each paragraph
  1694. #TODO check if this is really needed
  1695. if doctype == 'pm6' and f_lastwasblank:
  1696. if f_tt or listindent:
  1697. holdspace = ''
  1698. else:
  1699. holdspace = TAGS['paragraph']+'\n'
  1700. # any NOT table line (or comment), closes an open table
  1701. #if subarea() == 'table' and not regex['table'].search(line):
  1702. if subarea() == 'table' \
  1703. and not regex['table'].search(line) \
  1704. and not regex['comment'].search(line):
  1705. ret.append(doCloseTable(doctype))
  1706. #---------------------[ PRE formatted ]----------------------
  1707. #TIP we'll never support beautifiers inside pre-formatted
  1708. # we're already on a PRE area
  1709. if f_tt:
  1710. # closing PRE
  1711. if regex['areaPreClose'].search(line):
  1712. if doctype != 'pm6':
  1713. ret.append(TAGS['areaPreClose'])
  1714. f_tt = 0
  1715. continue
  1716. # normal PRE-inside line
  1717. line = doPreLine(doctype, line)
  1718. ret.append(line)
  1719. continue
  1720. # detecting PRE area init
  1721. if regex['areaPreOpen'].search(line):
  1722. ret.append(TAGS['areaPreOpen'])
  1723. f_lastwasblank = 0
  1724. f_tt = 1
  1725. continue
  1726. # one line PRE-formatted text
  1727. if regex['1linePre'].search(line):
  1728. f_lastwasblank = 0
  1729. line = regex['1linePre'].sub('',line)
  1730. line = doPreLine(doctype, line)
  1731. t1, t2 = TAGS['areaPreOpen'],TAGS['areaPreClose']
  1732. ret.append('%s\n%s\n%s'%(t1,line,t2))
  1733. continue
  1734. #---------------------[ blank lines ]-----------------------
  1735. #TODO "holdspace" to save <p> to not show in closelist
  1736. if regex['blankline'].search(line):
  1737. # closing all open quotes
  1738. if quotedepth:
  1739. ret.append(doCloseQuote())
  1740. # closing all open lists
  1741. if f_lastwasblank: # 2nd consecutive blank line
  1742. if listindent: # closes list (if any)
  1743. ret.append(doCloseList())
  1744. holdspace = ''
  1745. continue # consecutive blanks are trash
  1746. # normal blank line
  1747. if doctype != 'pm6':
  1748. # paragraph (if any) is wanted inside lists also
  1749. if listindent:
  1750. para = TAGS['paragraph'] + '\n'
  1751. holdspace = holdspace + para
  1752. elif doctype == 'html':
  1753. ret.append(TAGS['paragraph'])
  1754. # sgml: quote close tag must not be \n\n</quote>
  1755. elif doctype == 'sgml' and quotedepth:
  1756. skip_continue = 1
  1757. # otherwise we just show a blank line
  1758. else:
  1759. ret.append('')
  1760. f_lastwasblank = 1
  1761. if not skip_continue: continue
  1762. #---------------------[ special ]------------------------
  1763. if regex['special'].search(line):
  1764. # include command
  1765. m = ParseConfig(line, 'include', doctype)
  1766. if m:
  1767. incpath = os.path.dirname(CONF['infile'])
  1768. incfile = m['value']
  1769. if CONF['infile'] == incfile:
  1770. Error('A file cannot include itself (loop!): %s'%incfile)
  1771. inctype, inclines = get_include_contents(incfile, incpath)
  1772. if inctype == 'PASS':
  1773. ret.extend(inclines)
  1774. continue
  1775. # change %!include command by comment
  1776. incdumpbuf[-1] = inclines[0]
  1777. # insert include lines into bodylines list
  1778. # removing the %!include command call
  1779. bodylines = bodylines[:lineref] +inclines \
  1780. +bodylines[lineref+1:]
  1781. continue
  1782. #---------------------[ comments ]-----------------------
  1783. # just skip them (if not macro or config)
  1784. if regex['comment'].search(line) and not \
  1785. regex['date'].match(line):
  1786. continue
  1787. f_lastwasblank = 0 # reset blank status
  1788. #---------------------[ Title ]-----------------------
  1789. #TODO set next blank and set f_lastwasblank or f_lasttitle
  1790. if (regex['title'].search(line) or
  1791. regex['numtitle'].search(line)) and not listindent:
  1792. if string.lstrip(line)[0] == '=':
  1793. titletype = 'title'
  1794. else:
  1795. titletype = 'numtitle'
  1796. m = regex[titletype].search(line)
  1797. level = len(m.group('id'))
  1798. tag = TAGS['title%s'%level]
  1799. txt = string.strip(m.group('txt'))
  1800. ### numbered title
  1801. if CONF['enumtitle'] or titletype == 'numtitle':
  1802. if rules['autonumbertitle']:
  1803. tag = TAGS['numtitle%s'%level] or tag
  1804. idtxt = txt
  1805. else:
  1806. # add count manually
  1807. id = '' ; n = level
  1808. titlecount[n] = titlecount[n] +1
  1809. if n < len(titlecount)-1: # reset sublevels count
  1810. for i in range(n+1, len(titlecount)):
  1811. titlecount[i] = 0
  1812. for i in range(n): # compose id from hierarchy
  1813. id = "%s%d."%(id,titlecount[i+1])
  1814. idtxt = "%s %s"%(id, txt) # add id to title
  1815. else:
  1816. idtxt = txt
  1817. anchorid = 'toc%d'%(len(toclist)+1)
  1818. if TAGS['anchor'] and CONF['toc'] \
  1819. and level <= CONF['toclevel']:
  1820. ret.append(regex['x'].sub(anchorid,TAGS['anchor']))
  1821. # place title tag overriding line
  1822. line = regex[titletype].sub(tag,line)
  1823. ### escape title text (unescaped text is used for TOC)
  1824. #
  1825. esctxt = doEscape(doctype,idtxt)
  1826. # sgml: [ is special on title (and lists) - here bcos 'continue'
  1827. if doctype in ['sgml','tex']:
  1828. esctxt = doFinalEscape(doctype, esctxt)
  1829. # txt: blank before
  1830. if doctype == 'txt': ret.append('')
  1831. # finish title line
  1832. ret.append(regex['x'].sub(esctxt,line))
  1833. # add "underline" to text titles
  1834. if doctype == 'txt':
  1835. ret.append(regex['x'].sub('='*len(idtxt),tag))
  1836. ret.append('') # blank line after
  1837. # let's do some TOC!
  1838. if not CONF['toc'] and not CONF['toconly']: continue
  1839. if level > CONF['toclevel']: continue # max level
  1840. if TAGS['TOC']: continue # TOC is a tag
  1841. if TAGS['anchor']:
  1842. # tocitemid = '#toc%d'%(len(toclist)+1)
  1843. # TOC more readable with master topics not
  1844. # linked at number stoled idea from windows .CHM
  1845. # files (help system)
  1846. if CONF['enumtitle'] and level == 1:
  1847. tocitem = '%s+ [``%s`` #%s]'%(' '*level,txt,anchorid)
  1848. else:
  1849. tocitem = '%s- [``%s`` #%s]'%(' '*level,idtxt,anchorid)
  1850. else:
  1851. tocitem = '%s- ``%s``'%(' '*level,idtxt)
  1852. if doctype in ['txt', 'man']:
  1853. tocitem = '%s``%s``' %(' '*level,idtxt)
  1854. toclist.append(tocitem)
  1855. continue
  1856. #TODO! labeltxt = ''
  1857. # label = m.group('label')
  1858. # if label: labeltxt = '<label id="%s">' %label
  1859. #---------------------[ apply masks ]-----------------------
  1860. ### protect important structures from escaping and formatting
  1861. while regex['raw'].search(line):
  1862. txt = regex['raw'].search(line).group(1)
  1863. txt = doEscape(doctype,txt)
  1864. rawbank.append(txt)
  1865. line = regex['raw'].sub(rawmask,line,1)
  1866. # protect pre-formatted font text
  1867. while regex['fontMono'].search(line):
  1868. txt = regex['fontMono'].search(line).group(1)
  1869. txt = doEscape(doctype,txt)
  1870. monobank.append(txt)
  1871. line = regex['fontMono'].sub(monomask,line,1)
  1872. # protect macros
  1873. while regex['macro'].search(line):
  1874. txt = regex['macro'].search(line).group()
  1875. macrobank.append(txt)
  1876. line = regex['macro'].sub(macromask,line,1)
  1877. # protect URLs and emails
  1878. while regex['linkmark'].search(line) or regex['link'].search(line):
  1879. # try to match plain or named links
  1880. match_link = regex['link'].search(line)
  1881. match_named = regex['linkmark'].search(line)
  1882. # define the current match
  1883. if match_link and match_named:
  1884. # both types found, which is the first?
  1885. m = match_link
  1886. if match_named.start() < match_link.start():
  1887. m = match_named
  1888. else:
  1889. # just one type found, we're fine
  1890. m = match_link or match_named
  1891. # extract link data and apply mask
  1892. if m == match_link: # plain link
  1893. label = ''
  1894. link = m.group()
  1895. line = regex['link'].sub(linkmask,line,1)
  1896. else: # named link
  1897. label = string.rstrip(m.group('label'))
  1898. link = m.group('link')
  1899. line = regex['linkmark'].sub(linkmask,line,1)
  1900. # save link data to the link bank
  1901. linkbank.append((label, link))
  1902. #---------------------[ do Escapes ]-----------------------
  1903. # the target-specific special char escapes for body lines
  1904. line = doEscape(doctype,line)
  1905. #---------------------[ Horizontal Bar ]--------------------
  1906. if regex['bar'].search(line):
  1907. txt = regex['bar'].search(line).group(1)
  1908. if txt[0] == '=': bar = TAGS['bar2']
  1909. else : bar = TAGS['bar1']
  1910. # to avoid comment tag confusion
  1911. if doctype == 'sgml':
  1912. txt = string.replace(txt,'--','__')
  1913. line = regex['bar'].sub(bar,line)
  1914. ret.append(regex['x'].sub(txt,line))
  1915. continue
  1916. #---------------------[ Quote ]-----------------------
  1917. if regex['quote'].search(line):
  1918. subarea.add('quote')
  1919. # store number of leading TABS
  1920. currquotedepth = len(regex['quote'].search(line).group(0))
  1921. # SGML doesn't support nested quotes
  1922. if rules['quotenotnested']:
  1923. if quotedepth and currquotedepth > quotedepth[-1]:
  1924. currquotedepth = quotedepth[-1]
  1925. # for don't-close-me quote tags
  1926. if not TAGS['areaQuoteClose']:
  1927. line = regex['quote'].sub(TAGS['areaQuoteOpen']*currquotedepth, line)
  1928. else:
  1929. # new (sub)quote
  1930. if not quotedepth or currquotedepth > quotedepth[-1]:
  1931. quotedepth.append(currquotedepth)
  1932. ret.append(TAGS['areaQuoteOpen'])
  1933. # remove leading TABs
  1934. if not rules['keepquoteindent']:
  1935. line = regex['quote'].sub('', line)
  1936. # closing quotes
  1937. while currquotedepth < quotedepth[-1]:
  1938. ret.append(doCloseQuote(1))
  1939. else:
  1940. # closing all quotes (not quote line)
  1941. if quotedepth: ret.append(doCloseQuote())
  1942. #---------------------[ Lists ]-----------------------
  1943. if (regex['list'].search(line) or regex['deflist'].search(line)):
  1944. subarea.add('list')
  1945. if regex['list'].search(line): rgx = regex['list']
  1946. else: rgx = regex['deflist']
  1947. m = rgx.search(line)
  1948. listitemindent = m.group(1)
  1949. listtype = m.group(2)
  1950. extra = m.group(3) # regex anchor char
  1951. if listtype == '=':
  1952. listdefterm = m.group(3)
  1953. extra = ''
  1954. if doctype == 'tex':
  1955. # on tex, brackets are term delimiters
  1956. # TODO escape ] at list definition
  1957. # \], \rbrack{} and \verb!]! don't work :(
  1958. #listdefterm = string.replace(listdefterm, ']', '???')
  1959. pass
  1960. if not rules['imgasdefterm'] and \
  1961. regex['img'].search(listdefterm):
  1962. while regex['img'].search(listdefterm):
  1963. img = regex['img'].search(listdefterm).group(1)
  1964. masked = '(%s)'%img
  1965. listdefterm = regex['img'].sub(masked,listdefterm,1)
  1966. # don't cross depth limit
  1967. maxdepth = rules['listmaxdepth']
  1968. if maxdepth and len(listindent) == maxdepth:
  1969. if len(listitemindent) > len(listindent[-1]):
  1970. listitemindent = listindent[-1]
  1971. # new sublist
  1972. if not listindent or len(listitemindent) > len(listindent[-1]):
  1973. listindent.append(listitemindent)
  1974. listids.append(listtype)
  1975. if listids[-1] == '-': tag = TAGS['listOpen']
  1976. elif listids[-1] == '+': tag = TAGS['numlistOpen']
  1977. elif listids[-1] == '=': tag = TAGS['deflistOpen']
  1978. if not tag: tag = TAGS['listOpen'] # default
  1979. # no need to reopen <pre> tag on man sublists
  1980. if rules['listnotnested'] and len(listindent) != 1:
  1981. tag = ''
  1982. openlist = listindent[-1]+tag
  1983. if doctype == 'pm6':
  1984. listholdspace = openlist
  1985. else:
  1986. if string.strip(openlist): ret.append(openlist)
  1987. # reset item manual count
  1988. listcount.append(0)
  1989. # closing sublists
  1990. while len(listitemindent) < len(listindent[-1]):
  1991. close = doCloseList(1)
  1992. if close: ret.append(close)
  1993. if listcount: del listcount[-1]
  1994. # normal item
  1995. listid = listindent[-1]
  1996. if listids[-1] == '-':
  1997. tag = TAGS['listItem']
  1998. elif listids[-1] == '+':
  1999. tag = TAGS['numlistItem']
  2000. listcount[-1] = listcount[-1] +1
  2001. if not rules['autonumberlist']:
  2002. tag = regex['x'].sub(str(listcount[-1]), tag)
  2003. elif listids[-1] == '=':
  2004. if not TAGS['deflistItem1']:
  2005. # emulate def list, with <li><b>def</b>:
  2006. tag = TAGS['listItem'] +TAGS['fontBoldOpen'] +listdefterm
  2007. tag = tag +TAGS['fontBoldClose'] +':'
  2008. else:
  2009. tag = regex['x'].sub(listdefterm, TAGS['deflistItem1'])
  2010. tag = tag + TAGS['deflistItem2'] # open <DD>
  2011. if doctype == 'mgp': listid = len(listindent)*'\t'
  2012. line = rgx.sub(listid+tag+extra,line)
  2013. if listholdspace:
  2014. line = listholdspace+line
  2015. listholdspace = ''
  2016. #---------------------[ Table ]-----------------------
  2017. #TODO escape undesired format inside table
  2018. #TODO add man, pm6 targets
  2019. if regex['table'].search(line):
  2020. table = get_table_prop(line)
  2021. if subarea() != 'table':
  2022. subarea.add('table') # first table line!
  2023. if rules['tableable']: # table-aware target
  2024. ret.append(get_tableopen_tag(table,doctype))
  2025. else: # if not, use verb
  2026. ret.append(TAGS['areaPreOpen'])
  2027. if rules['tableable']:
  2028. # setting line tags
  2029. tl1 = TAGS['tableLineOpen']
  2030. tl2 = TAGS['tableLineClose']
  2031. # little table gotchas
  2032. if rules['breaktablelineopen']:
  2033. tl1 = tl1+'\n'
  2034. if doctype == 'tex' and not tableborder:
  2035. tl1 = ''
  2036. # do cells and finish
  2037. cells = tag_table_cells(table, doctype)
  2038. line = tl1 + cells + tl2
  2039. ### BEGIN of at-any-part-of-the-line/various-per-line TAGs.
  2040. for beauti in ['Bold', 'Italic', 'Bolditalic', 'Underline']:
  2041. if regex['font%s'%beauti].search(line):
  2042. line = beautify_me(beauti, line)
  2043. #---------------------[ URL & E-mail ]-----------------------
  2044. for label,url in linkbank:
  2045. link = get_tagged_link(label, url, CONF)
  2046. line = string.replace(line, linkmask, link, 1)
  2047. #---------------------[ Image ]-----------------------
  2048. #TODO fix smart align when image is a link label
  2049. while regex['img'].search(line) and TAGS['img'] != '[\a]':
  2050. txt = regex['img'].search(line).group(1)
  2051. tag = TAGS['img']
  2052. # HTML is the only align-aware target for now
  2053. if rules['imgalignable']:
  2054. align = get_image_align(line)
  2055. if align == 'para':
  2056. align = 'center'
  2057. tag= regex['x'].sub(tag,TAGS['imgsolo'])
  2058. # add align on tag
  2059. tag = regex['x'].sub(align, tag, 1)
  2060. if doctype == 'tex': tag = re.sub(r'\\b',r'\\\\b',tag)
  2061. line = regex['img'].sub(tag,line,1)
  2062. line = regex['x'].sub(txt,line,1)
  2063. #---------------------[ Expand Macros ]-----------------------
  2064. if macrobank:
  2065. for macro in macrobank:
  2066. line = string.replace(line, macromask, macro,1)
  2067. # now the line is full of macros again
  2068. line = doDateMacro(line)
  2069. #---------------------[ Expand PREs ]-----------------------
  2070. for mono in monobank:
  2071. open,close = TAGS['fontMonoOpen'],TAGS['fontMonoClose']
  2072. tagged = open+mono+close
  2073. line = string.replace(line,monomask,tagged,1)
  2074. #---------------------[ Expand raw ]-----------------------
  2075. for raw in rawbank:
  2076. line = string.replace(line,rawmask,raw,1)
  2077. #---------------------[ Final Escapes ]-----------------------
  2078. line = doFinalEscape(doctype, line)
  2079. ret.append(holdspace+line)
  2080. holdspace = ''
  2081. # We just need the included dump
  2082. if CONF['dump-source']: return incdumpbuf
  2083. # EOF: close any open lists/tables/quotes
  2084. #TODO take table exception out when self.doctype
  2085. while subarea():
  2086. func = eval("doClose%s" % string.capitalize(subarea()))
  2087. parm = None
  2088. if subarea() == 'table': parm = doctype
  2089. txt = func(parm)
  2090. if txt: ret.append(txt)
  2091. # add footer
  2092. if not CONF['noheaders']:
  2093. ret.extend(doFooter(CONF))
  2094. if CONF['toconly']: ret = []
  2095. return ret, toclist
  2096. ################################################################################
  2097. ##################################### GUI ######################################
  2098. ################################################################################
  2099. # tk help: http://python.org/topics/tkinter/
  2100. class Gui:
  2101. "Graphical Tk Interface"
  2102. def __init__(self, conf={}):
  2103. self.bg = 'orange'
  2104. self.root = Tkinter.Tk()
  2105. self.root.config(bd=15,bg=self.bg)
  2106. self.root.title("txt2tags")
  2107. self.frame1 = Tkinter.Frame(self.root,bg=self.bg)
  2108. self.frame1.pack(fill='x')
  2109. self.frame2 = Tkinter.Frame(self.root,bg=self.bg)
  2110. self.frame2.pack()
  2111. self.frame3 = Tkinter.Frame(self.root,bg=self.bg)
  2112. self.frame3.pack(fill='x')
  2113. self.frame = self.root
  2114. self.conf = conf
  2115. self.infile = self.setvar('')
  2116. #self.infile = self.setvar('C:/aurelio/a.txt')
  2117. self.doctype = self.setvar('')
  2118. self.checks = ['noheaders','enumtitle','toc','toconly','stdout']
  2119. # creating variables
  2120. for check in self.checks:
  2121. setattr(self, 'f_'+check, self.setvar(''))
  2122. ### config as dic for python 1.5 compat (**opts don't work :( )
  2123. def entry(self, **opts): return Tkinter.Entry(self.frame, opts)
  2124. def label(self, txt='', **opts):
  2125. opts.update({'text':txt,'bg':self.bg})
  2126. return Tkinter.Label(self.frame, opts)
  2127. def button(self,name,cmd,**opts):
  2128. opts.update({'text':name,'command':cmd})
  2129. return Tkinter.Button(self.frame, opts)
  2130. def check(self,name,val,checked=0,**opts):
  2131. opts.update( {'text':name, 'onvalue':val, 'offvalue':'',
  2132. 'anchor':'w', 'bg':self.bg, 'activebackground':self.bg} )
  2133. chk = Tkinter.Checkbutton(self.frame, opts)
  2134. if checked: chk.select()
  2135. chk.pack(fill='x',padx=10)
  2136. def exit(self): self.root.destroy(); sys.exit()
  2137. def setvar(self, val): z = Tkinter.StringVar() ; z.set(val) ; return z
  2138. def menu(self,sel,items):
  2139. return apply(Tkinter.OptionMenu,(self.frame,sel)+tuple(items))
  2140. def askfile(self):
  2141. ftypes= [("txt2tags files",("*.t2t","*.txt")),("All files","*")]
  2142. newfile = askopenfilename(filetypes=ftypes)
  2143. if newfile:
  2144. self.infile.set(newfile)
  2145. newconf = getAllConf(['foo',newfile], nocheck=1)
  2146. if newconf: newconf = newconf[0][0]
  2147. # restate all checkboxes after file selection
  2148. # TODO how to make a refresh without killing it?
  2149. self.root.destroy()
  2150. self.__init__(newconf)
  2151. self.mainwindow()
  2152. def scrollwindow(self,txt='no text!',title=''):
  2153. win = Tkinter.Toplevel() ; win.title(title)
  2154. scroll = Tkinter.Scrollbar(win)
  2155. text = Tkinter.Text(win,yscrollcommand=scroll.set)
  2156. scroll.config(command=text.yview)
  2157. text.insert(Tkinter.END, string.join(txt,'\n'))
  2158. text.pack(side='left',fill='both')
  2159. scroll.pack(side='right',fill='y')
  2160. def runprogram(self):
  2161. # prepare
  2162. infile, doctype = self.infile.get(), self.doctype.get()
  2163. if not infile:
  2164. showwarning('txt2tags',\
  2165. "You must provide the source file location!")
  2166. return
  2167. # compose cmdline
  2168. guiflags = []
  2169. for flag in self.checks:
  2170. flag = getattr(self, 'f_%s'%flag).get()
  2171. if flag: guiflags.append(flag)
  2172. cmdline = ['txt2tags', '--gui', '-t', doctype] +guiflags +[infile]
  2173. Debug('Gui/Tk cmdline: %s'%cmdline,1)
  2174. # run!
  2175. try:
  2176. outlist, CONF = reallydoitall(cmdline)
  2177. outfile = CONF['outfile']
  2178. infile = CONF['infile']
  2179. outlist = finish_him(outlist,CONF) or ''
  2180. if outfile == STDOUT:
  2181. title = 'txt2tags: %s converted to %s'%(
  2182. os.path.basename(infile),
  2183. string.upper(CONF['type']))
  2184. self.scrollwindow(outlist, title)
  2185. else:
  2186. msg = "Conversion done!\n\n" +\
  2187. "FROM:\n\t%s\n"%infile +\
  2188. "TO:\n\t%s"%outfile
  2189. showinfo('txt2tags', msg)
  2190. except ZeroDivisionError: # common error, not quit
  2191. pass
  2192. except: # fatal error
  2193. traceback.print_exc()
  2194. print '\nSorry! txt2tags-Tk Fatal Error.'
  2195. errmsg = 'Unknown error occurred.\n\n'+\
  2196. 'Please send the Error Traceback '+\
  2197. 'dumped to the author:\n %s'%my_email
  2198. showerror('txt2tags FATAL ERROR!',errmsg)
  2199. self.exit()
  2200. def mainwindow(self):
  2201. #TODO show outfile somewhere
  2202. #TODO redraw GUI only using grid() because pack() sux
  2203. self.infile.set(self.conf.get('infile') or '')
  2204. self.doctype.set(self.conf.get('type') or 'html')
  2205. if self.conf.get('outfile') == STDOUT: # map -o-
  2206. self.conf['stdout'] = 1
  2207. action1 = " \nChoose the target document type:"
  2208. action2 = "\n\nEnter the tagged source file location:"
  2209. action3 = "\n\nSome options you may check:"
  2210. checks_txt = {
  2211. 'noheaders': "Suppress headers from output",
  2212. 'enumtitle': "Number titles (1, 1.1, 1.1.1, etc)",
  2213. 'toc' : "Do TOC also (Table of Contents)",
  2214. 'toconly' : "Just do TOC, nothing more",
  2215. 'stdout' : "Dump to screen (Don't save target file)"
  2216. }
  2217. self.frame = self.frame1
  2218. self.label("TXT2TAGS\n%s\nv%s"%(my_url,my_version)).pack()
  2219. self.label(action1, anchor='w').pack(fill='x')
  2220. self.menu(self.doctype, targets).pack()
  2221. self.label(action2, anchor='w').pack(fill='x')
  2222. self.frame = self.frame2
  2223. self.entry(textvariable=self.infile).grid(row=0, column=0)
  2224. self.button("Browse", self.askfile
  2225. ).grid(row=0, column=1, padx=10)
  2226. if self.conf.get('%!cmdline'):
  2227. txt = '%%!cmdline: %s' % self.conf['%!cmdline']
  2228. self.label(txt,fg='brown'
  2229. ).grid(row=1, column=0, columnspan=2, sticky='w')
  2230. self.frame = self.frame3
  2231. self.label(action3, anchor='w').pack(fill='x')
  2232. # compose options check boxes, example:
  2233. # self.check(checks_txt['toc'], '--toc', 1, variable=self.f_toc)
  2234. for check in self.checks:
  2235. txt = checks_txt[check]
  2236. opt = '--'+check
  2237. var = getattr(self, 'f_'+check)
  2238. onoff = self.conf.get(check)
  2239. self.check(txt,opt,onoff,variable=var)
  2240. self.label('\n').pack()
  2241. self.button("Quit", self.exit).pack(side='left',padx=40)
  2242. self.button("Convert!", self.runprogram
  2243. ).pack(side='right',padx=40)
  2244. # as documentation told me
  2245. if sys.platform[:3] == 'win':
  2246. self.root.iconify()
  2247. self.root.update()
  2248. self.root.deiconify()
  2249. self.root.mainloop()
  2250. ################################################################################
  2251. ################################################################################
  2252. if __name__ == '__main__':
  2253. # set debug and remove option from cmdline
  2254. if sys.argv.count('--debug'):
  2255. DEBUG = 1
  2256. sys.argv.remove('--debug')
  2257. ### check if we will enter on GUI mode
  2258. CONF['gui'] = 0
  2259. # GUI is default on this platforms, when called alone
  2260. if len(sys.argv) == 1 and sys.platform[:3] in ['mac','cyg','win']:
  2261. CONF['gui'] = 1
  2262. # user specified GUI mode
  2263. if sys.argv.count('--gui'): CONF['gui'] = 1
  2264. # check for GUI mode ressorces
  2265. if CONF['gui'] == 1:
  2266. try:
  2267. from tkFileDialog import askopenfilename
  2268. from tkMessageBox import showinfo,showwarning,showerror
  2269. import Tkinter
  2270. except:
  2271. # if GUI was forced, show the error message
  2272. if len(sys.argv) > 1 and sys.argv[1] == '--gui':
  2273. traceback.print_exc()
  2274. sys.exit()
  2275. # or just abandon GUI mode, and continue
  2276. else:
  2277. CONF['gui'] = 0
  2278. Debug("system platform: %s"%sys.platform,1)
  2279. Debug("line break char: %s"%repr(LB),1)
  2280. nocheck = CONF['gui'] # if GUI, no cmdline checking
  2281. CONFS = getAllConf(sys.argv, nocheck) # get all infiles config (if any)
  2282. if CONF['gui'] == 1:
  2283. if len(CONFS) > 1:
  2284. Error("GUI doesn't support multiple Input files.")
  2285. # remove proprierties, get just config
  2286. if CONFS: conf = CONFS[0][0]
  2287. else : conf = {}
  2288. # redefine Error function to raise exception instead sys.exit()
  2289. def Error(msg):
  2290. showerror('txt2tags ERROR!', msg)
  2291. raise ZeroDivisionError
  2292. Gui(conf).mainwindow()
  2293. else:
  2294. # console mode rocks forever!
  2295. convertAllFiles(CONFS)
  2296. sys.exit(0)
  2297. # vim: ts=4