PageRenderTime 60ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/old/txt2tags-1.5.1.py

http://txt2tags.googlecode.com/
Python | 2341 lines | 2296 code | 17 blank | 28 comment | 10 complexity | 8eae18bda32683fe34ab503ab5290fef MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, WTFPL

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # txt2tags - generic text conversion tool
  3. # http://txt2tags.sf.net
  4. #
  5. # Copyright 2001, 2002, 2003 Aurélio Marinho Jargas
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 2.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You have received a copy of the GNU General Public License along
  17. # with this program, on the COPYING file.
  18. #
  19. # the code is better, even readable now, but needs more improvements
  20. # TODO what if %!cmdline with syn error or wrong opts? and if on include?
  21. # TODO headers. what is valid: date, !image, !link, !beautifiers, !structs
  22. # TODO mgp: any line (header or not) can't begin with % (add a space before)
  23. import re, string, os, sys, getopt, traceback
  24. from time import strftime,time,localtime
  25. my_url = 'http://txt2tags.sf.net'
  26. my_email = 'verde@aurelio.net'
  27. my_version = '1.5.1'
  28. DEBUG = 0 # do not edit here, please use --debug
  29. targets = ['txt', 'sgml', 'html', 'pm6', 'mgp', 'moin', 'man', 'tex']
  30. FLAGS = {'noheaders':0,'enumtitle':0 ,'maskemail':0 ,'stdout' :0,
  31. 'toconly' :0,'toc' :0 ,'gui' :0 ,'included':0}
  32. OPTIONS = {'toclevel' :3,'style' :'','type' :'','outfile' :'',
  33. 'split':0, 'lang':''}
  34. CONFIG_KEYWORDS = ['encoding', 'style', 'cmdline']
  35. CONF = {}
  36. regex = {}
  37. TAGS = {}
  38. rules = {}
  39. currdate = strftime('%Y%m%d',localtime(time())) # ISO current date
  40. lang = 'english'
  41. doctype = outfile = ''
  42. STDIN = STDOUT = '-'
  43. ESCCHAR = '\x00'
  44. #my_version = my_version + '-beta0505' # beta!
  45. #my_version = my_version + '-dev' + currdate[4:] # devel!
  46. # global vars for doClose*()
  47. quotedepth = []
  48. listindent = []
  49. listids = []
  50. subarea = None
  51. tableborder = 0
  52. versionstr = "txt2tags version %s <%s>"%(my_version,my_url)
  53. usage = """
  54. %s
  55. Usage: txt2tags -t <type> [OPTIONS] file.t2t
  56. -t, --type set target document type. actually supported:
  57. %s
  58. -o, --outfile=FILE set FILE as the output filename ('-' for STDOUT)
  59. --stdout same as '-o -' or '--outfile -' (deprecated option)
  60. -H, --noheaders suppress header, title and footer information
  61. -n, --enumtitle enumerate all title lines as 1, 1.1, 1.1.1, etc
  62. --maskemail hide email from spam robots. x@y.z turns <x (a) y z>
  63. --toc add TOC (Table of Contents) to target document
  64. --toconly print document TOC and exit
  65. --toclevel=N set maximum TOC level (deepness) to N
  66. --gui invoke Graphical Tk Interface
  67. --style=FILE use FILE as the document style (like Html CSS)
  68. -h, --help print this help information and exit
  69. -V, --version print program version and exit
  70. Extra options for HTML target (needs sgml-tools):
  71. --split split documents. values: 0, 1, 2 (default 0)
  72. --lang document language (default english)
  73. By default, converted output is saved to 'file.<type>'.
  74. Use --outfile to force an output filename.
  75. If input file is '-', reads from STDIN.
  76. If outfile is '-', dumps output to STDOUT.\
  77. """%(versionstr, re.sub(r"[]'[]",'',repr(targets)))
  78. # here is all the target's templates
  79. # you may edit them to fit your needs
  80. # - the %(HEADERn)s strings represent the Header lines
  81. # - use %% to represent a literal %
  82. #
  83. HEADER_TEMPLATE = {
  84. 'txt': """\
  85. %(HEADER1)s
  86. %(HEADER2)s
  87. %(HEADER3)s
  88. """,
  89. 'sgml': """\
  90. <!doctype linuxdoc system>
  91. <article>
  92. <title>%(HEADER1)s
  93. <author>%(HEADER2)s
  94. <date>%(HEADER3)s
  95. """,
  96. 'html': """\
  97. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
  98. <HTML>
  99. <HEAD>
  100. <META NAME="generator" CONTENT="http://txt2tags.sf.net">
  101. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s">
  102. <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s">
  103. <TITLE>%(HEADER1)s</TITLE>
  104. </HEAD><BODY BGCOLOR="white" TEXT="black">
  105. <P ALIGN="center"><CENTER><H1>%(HEADER1)s</H1>
  106. <FONT SIZE=4>
  107. <I>%(HEADER2)s</I><BR>
  108. %(HEADER3)s
  109. </FONT></CENTER>
  110. """,
  111. # TODO man section 1 is hardcoded...
  112. 'man': """\
  113. .TH "%(HEADER1)s" 1 %(HEADER3)s "%(HEADER2)s"
  114. """,
  115. # TODO style to <HR>
  116. 'pm6': """\
  117. <PMTags1.0 win><C-COLORTABLE ("Preto" 1 0 0 0)
  118. ><@Normal=
  119. <FONT "Times New Roman"><CCOLOR "Preto"><SIZE 11>
  120. <HORIZONTAL 100><LETTERSPACE 0><CTRACK 127><CSSIZE 70><C+SIZE 58.3>
  121. <C-POSITION 33.3><C+POSITION 33.3><P><CBASELINE 0><CNOBREAK 0><CLEADING -0.05>
  122. <GGRID 0><GLEFT 7.2><GRIGHT 0><GFIRST 0><G+BEFORE 7.2><G+AFTER 0>
  123. <GALIGNMENT "justify"><GMETHOD "proportional"><G& "ENGLISH">
  124. <GPAIRS 12><G%% 120><GKNEXT 0><GKWIDOW 0><GKORPHAN 0><GTABS $>
  125. <GHYPHENATION 2 34 0><GWORDSPACE 75 100 150><GSPACE -5 0 25>
  126. ><@Bullet=<@-PARENT "Normal"><FONT "Abadi MT Condensed Light">
  127. <GLEFT 14.4><G+BEFORE 2.15><G%% 110><GTABS(25.2 l "")>
  128. ><@PreFormat=<@-PARENT "Normal"><FONT "Lucida Console"><SIZE 8><CTRACK 0>
  129. <GLEFT 0><G+BEFORE 0><GALIGNMENT "left"><GWORDSPACE 100 100 100><GSPACE 0 0 0>
  130. ><@Title1=<@-PARENT "Normal"><FONT "Arial"><SIZE 14><B>
  131. <GCONTENTS><GLEFT 0><G+BEFORE 0><GALIGNMENT "left">
  132. ><@Title2=<@-PARENT "Title1"><SIZE 12><G+BEFORE 3.6>
  133. ><@Title3=<@-PARENT "Title1"><SIZE 10><GLEFT 7.2><G+BEFORE 7.2>
  134. ><@Title4=<@-PARENT "Title3">
  135. ><@Title5=<@-PARENT "Title3">
  136. ><@Quote=<@-PARENT "Normal"><SIZE 10><I>>
  137. %(HEADER1)s
  138. %(HEADER2)s
  139. %(HEADER3)s
  140. """,
  141. 'mgp': """\
  142. #!/usr/X11R6/bin/mgp -t 90
  143. %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1"
  144. %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1"
  145. %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1"
  146. %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1"
  147. %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1"
  148. %%default 1 size 5
  149. %%default 2 size 8, fore "yellow", font "normal-b", center
  150. %%default 3 size 5, fore "white", font "normal", left, prefix " "
  151. %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill
  152. %%tab 2 prefix " ", icon arc "orange" 40, leftfill
  153. %%tab 3 prefix " ", icon arc "brown" 40, leftfill
  154. %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill
  155. %%tab 5 prefix " ", icon arc "magenta" 40, leftfill
  156. %%%%------------------------- end of headers -----------------------------
  157. %%page
  158. %%size 10, center, fore "yellow"
  159. %(HEADER1)s
  160. %%font "normal-i", size 6, fore "white", center
  161. %(HEADER2)s
  162. %%font "mono", size 7, center
  163. %(HEADER3)s
  164. """,
  165. # TODO please, improve me!
  166. 'moin': """\
  167. %(HEADER1)s
  168. %(HEADER2)s
  169. %(HEADER3)s
  170. """,
  171. 'tex': \
  172. r"""\documentclass[11pt,a4paper]{article}
  173. \usepackage{amsfonts,amssymb,graphicx,url}
  174. \usepackage[%(ENCODING)s]{inputenc} %% char encoding
  175. \pagestyle{plain} %% do page numbering ('empty' turns off)
  176. \frenchspacing %% no aditional spaces after periods
  177. \setlength{\parskip}{8pt}\parindent=0pt %% no paragraph indentation
  178. %% uncomment next line for fancy PDF output on Adobe Acrobat Reader
  179. %%\usepackage[pdfstartview=FitV,colorlinks=true,bookmarks=true]{hyperref}
  180. \title{%(HEADER1)s}
  181. \author{%(HEADER2)s}
  182. \begin{document}
  183. \date{%(HEADER3)s}
  184. \maketitle
  185. """
  186. }
  187. #-----------------------------------------------------------------------
  188. def Quit(msg, exitcode=0): print msg ; sys.exit(exitcode)
  189. def Error(msg): print "ERROR: %s"%msg ; sys.exit()
  190. def Debug(msg,i=0,linenr=None):
  191. if i > DEBUG: return
  192. if linenr is not None:
  193. print "(%d) %04d:%s"%(i,linenr,msg)
  194. else:
  195. print "(%d) %s"%(i,msg)
  196. def Readfile(file):
  197. if file == '-':
  198. try: data = sys.stdin.readlines()
  199. except: Error('You must feed me with data on STDIN!')
  200. else:
  201. try: f = open(file); data = f.readlines() ; f.close()
  202. except: Error("Cannot read file:\n %s"%file)
  203. return data
  204. def Savefile(file, contents):
  205. try: f = open(file, 'w')
  206. except: Error("Cannot open file for writing:\n %s"%file)
  207. if type(contents) == type([]): doit = f.writelines
  208. else: doit = f.write
  209. doit(contents) ; f.close()
  210. def ParseConfig(text='',name='',kind=''):
  211. ret = {}
  212. if not text: return ret
  213. re_name = name or '[a-z]+'
  214. re_kind = kind or '[a-z]*'
  215. regex = re.compile("""
  216. ^%%!\s* # leading id with opt spaces
  217. (?P<name>%s) # config name
  218. (\((?P<kind>%s)\))? # optional config kind inside ()
  219. \s*:\s* # key:value delimiter with opt spaces
  220. (?P<value>.+?) # config value
  221. \s*$ # rstrip() spaces and hit EOL
  222. """%(re_name,re_kind), re.I+re.VERBOSE)
  223. match = regex.match(text)
  224. if match: ret = {
  225. 'name' :string.lower(match.group('name') or ''),
  226. 'kind' :string.lower(match.group('kind') or ''),
  227. 'value':match.group('value') }
  228. return ret
  229. class Cmdline:
  230. def __init__(self, cmdline=[]):
  231. self.conf = {}
  232. self.cmdline = cmdline
  233. self.cmdline_conf = {}
  234. self.dft_options = OPTIONS
  235. self.dft_flags = FLAGS
  236. self.all_options = self.dft_options.keys()
  237. self.all_flags = self.dft_flags.keys()
  238. self.defaults = self._get_empty_conf()
  239. if cmdline: self.parse()
  240. #TODO protect quotes contents
  241. def _tokenize(self, cmd_string):
  242. return string.split(cmd_string)
  243. def parse(self):
  244. "return a dic with all options:value found"
  245. if not self.cmdline: return {}
  246. Debug("cmdline: %s"%self.cmdline, 1)
  247. options = {'infile': '', 'infiles':''}
  248. # compose valid options list
  249. longopts = ['help','version'] + self.all_flags + \
  250. map(lambda x:x+'=', self.all_options) # add =
  251. cmdline = self.cmdline[1:] # del prog name
  252. # get cmdline options
  253. try: (opt, args) = getopt.getopt(cmdline, 'hVnHt:o:', longopts)
  254. except getopt.GetoptError:
  255. Error('Bad option or missing argument (try --help)')
  256. # get infile, if any
  257. if args:
  258. options['infile'] = args[0]
  259. options['infiles'] = args # multi
  260. # parse all options
  261. for name,val in opt:
  262. if name in ['-h','--help' ]: Quit(usage)
  263. elif name in ['-V','--version']: Quit(versionstr)
  264. elif name in ['-t','--type' ]: options['type'] = val
  265. elif name in ['-o','--outfile' ]: options['outfile'] = val
  266. elif name in ['-n','--enumtitle']: options['enumtitle'] = 1
  267. elif name in ['-H','--noheaders']: options['noheaders'] = 1
  268. else: options[name[2:]] = val # del --
  269. # save results
  270. Debug("cmdline arguments: %s"%options, 1)
  271. self.cmdline_conf = options
  272. def compose(self, conf):
  273. "compose cmdline from CONF dict"
  274. #TODO if toconly, del noheaders, del toc, del toclevel
  275. args = []
  276. if conf.has_key('type'): # the first
  277. args.extend(['-t', conf['type']]) ;
  278. del conf[type]
  279. for key in conf.keys():
  280. if key in ['infile','infiles']: continue
  281. args.extend(['--'+key, conf[key]])
  282. if conf.has_key('infiles'): # the last
  283. args.extend(conf['infiles'])
  284. return string.join(args, ' ')
  285. def merge(self, extraopts=''):
  286. "insert cmdline portion BEFORE current cmdline"
  287. if not extraopts: return
  288. if type(extraopts) == type(''):
  289. extraopts = self._tokenize(extraopts)
  290. if not self.cmdline: self.cmdline = extraopts
  291. else: self.cmdline = ['t2t-merged'] +extraopts +self.cmdline[1:]
  292. self.parse()
  293. def _get_outfile_name(self, conf):
  294. "dirname is the same for {in,out}file"
  295. infile = conf['infile']
  296. if not infile: return ''
  297. if infile == STDIN or conf['stdout']:
  298. outfile = STDOUT
  299. else:
  300. basename = re.sub('\.(txt|t2t)$','',infile)
  301. outfile = "%s.%s"%(basename, conf['type'])
  302. Debug(" infile: '%s'"%infile , 1)
  303. Debug("outfile: '%s'"%outfile, 1)
  304. return outfile
  305. def _sanity(self, dic):
  306. "basic cmdline syntax checkings"
  307. if not dic: return {}
  308. if not dic['infile'] or not dic['type']:
  309. Quit(usage, 1) # no filename/doctype
  310. if not targets.count(dic['type']): # check target
  311. Error("Invalid document type '%s' (try --help)"%(
  312. dic['type']))
  313. if len(dic['infiles']) > 1 and dic['outfile']: # -o FILE *.t2t
  314. Error("--outfile can't be used with multiple files")
  315. for opt in self.all_options: # check numeric options
  316. opttype = type(self.dft_options[opt])
  317. if dic.get(opt) and opttype == type(9):
  318. try: dic[opt] = int(dic.get(opt)) # save
  319. except: Error('--%s value must be a number'%opt)
  320. if dic['split'] not in [0,1,2]: # check split level
  321. Error('Option --split must be 0, 1 or 2')
  322. return dic
  323. def merge_conf(self, newconfs={}):
  324. "include Config Area settings into self.conf"
  325. if not self.conf: self.get_conf()
  326. if not newconfs: return self.conf
  327. for key in newconfs.keys():
  328. if key == 'cmdline': continue # already done
  329. # just update if still 'virgin'
  330. if self.conf.has_key(key) and \
  331. self.conf[key] == self.defaults[key]:
  332. self.conf[key] = newconfs[key]
  333. # add new
  334. if not self.conf.has_key(key):
  335. self.conf[key] = newconfs[key]
  336. Debug("Merged CONF: %s"%self.conf, 1)
  337. return self.conf
  338. def _get_empty_conf(self):
  339. econf = self.dft_options.copy()
  340. for k in self.dft_flags.keys(): econf[k] = self.dft_flags[k]
  341. return econf
  342. def get_conf(self):
  343. "set vars and flags according to options dic"
  344. if not self.cmdline_conf:
  345. if not self.cmdline: return {}
  346. self.parse()
  347. dic = self.cmdline_conf
  348. conf = self.defaults.copy()
  349. ## store flags & options
  350. for flag in self.all_flags:
  351. if dic.has_key(flag): conf[flag] = 1
  352. for opt in self.all_options + ['infile', 'infiles']:
  353. if dic.has_key(opt): conf[opt] = dic.get(opt)
  354. if not conf['type'] and conf['toconly']: conf['type'] = 'txt'
  355. conf = self._sanity(conf)
  356. ## some gotchas for specific issues
  357. doctype = conf['type']
  358. infile = conf['infile']
  359. # toconly is stronger than others
  360. if conf['toconly']:
  361. conf['noheaders'] = 1
  362. conf['stdout'] = 1
  363. conf['toc'] = 0
  364. conf['split'] = 0
  365. conf['toclevel'] = self.dft_options['toclevel']
  366. # split: just HTML, no stdout, 1st do a sgml, then sgml2html
  367. if conf['split']:
  368. if doctype != 'html': conf['split'] = 0
  369. else: conf['stdout'] = 0 ; conf['type'] = 'sgml'
  370. outfile = conf['outfile'] or self._get_outfile_name(conf)
  371. # final checkings
  372. if conf['split'] and outfile == STDOUT:
  373. Error('--split: You must provide a FILE (not STDIN)')
  374. if infile == outfile and outfile != STDOUT:
  375. Error("SUICIDE WARNING!!! (see --outfile)\n source"+\
  376. " and target files has the same name: "+outfile)
  377. ### author's note: "yes, i've got my sample.t2t file deleted
  378. ### before add this test... :/"
  379. conf['outfile'] = outfile
  380. conf['cmdline'] = self.cmdline
  381. Debug("CONF data: %s\n"%conf, 1)
  382. self.conf = conf
  383. return self.conf
  384. #
  385. ### End of Cmdline class
  386. class Proprierties:
  387. def __init__(self, filename=''):
  388. self.buffer = [''] # text start at pos 1
  389. self.areas = ['head','conf','body']
  390. self.arearef = []
  391. self.headers = ['','','']
  392. self.config = self.get_empty_config()
  393. self.lastline = 0
  394. self.filename = filename
  395. self.conflines = []
  396. self.bodylines = []
  397. if filename:
  398. self.read_file(filename)
  399. self.find_areas()
  400. self.set_headers()
  401. self.set_config()
  402. def read_file(self, file):
  403. lines = Readfile(file)
  404. if not lines: Error('Empty file! %s'%file)
  405. self.buffer.extend(lines)
  406. def get_empty_config(self):
  407. empty = {}
  408. for key in CONFIG_KEYWORDS: empty[key] = ''
  409. return empty
  410. def find_areas(self):
  411. "Run through buffer and identify head/conf/body areas"
  412. buf = self.buffer ; ref = [1,4,0] # defaults
  413. if not string.strip(buf[1]): # no header
  414. ref[0] = 0 ; ref[1] = 2
  415. for i in range(ref[1],len(buf)): # find body init
  416. if string.strip(buf[i]) and buf[i][0] != '%':
  417. ref[2] = i ; break # !blank, !comment
  418. if ParseConfig(buf[i], 'include', 'verb|body|'):
  419. ref[2] = i ; break # %!include
  420. if ref[1] == ref[2]: ref[1] = 0 # no conf area
  421. for i in 0,1,2: # del !existent
  422. if not ref[i]: self.areas[i] = ''
  423. self.arearef = ref # save results
  424. self.lastline = len(self.buffer)-1
  425. Debug('Head,Conf,Body start line: %s'%ref, 1)
  426. # store CONF and BODY lines found
  427. cfgend = ref[2] or len(buf)
  428. self.conflines = buf[ref[1]:cfgend]
  429. if ref[2]: self.bodylines = buf[ref[2]:]
  430. def set_headers(self):
  431. "Extract and save headers contents"
  432. if not self.arearef: self.find_areas()
  433. if not self.areas.count('head'): return
  434. if self.lastline < 3:
  435. #TODO on gui this checking is !working
  436. Error(
  437. "Premature end of Headers on '%s'."%self.filename +\
  438. '\n\nFile has %s line(s), but '%self.lastline +\
  439. 'Headers should be composed by 3 lines. ' +\
  440. '\nMaybe you should left the first line blank? ' +\
  441. '(for no headers)')
  442. for i in 0,1,2:
  443. self.headers[i] = string.strip(self.buffer[i+1])
  444. Debug("Headers found: %s"%self.headers, 1, i+1)
  445. def set_config(self):
  446. "Extract and save config contents (including includes)"
  447. if not self.arearef: self.find_areas()
  448. if not self.areas.count('conf'): return
  449. keywords = string.join(CONFIG_KEYWORDS, '|')
  450. linenr = self.arearef[1] # for debug messages
  451. for line in self.conflines:
  452. linenr = linenr + 1
  453. if len(line) < 3: continue
  454. if line[:2] != '%!': continue
  455. cfg = ParseConfig(line, name=keywords)
  456. if not cfg:
  457. Debug('Bogus Config Line',1,linenr)
  458. continue
  459. key, val = cfg['name'], cfg['value']
  460. self.config[key] = val
  461. Debug("Found config '%s', value '%s'"%(
  462. key,val),1,linenr)
  463. def get_file_body(file):
  464. "Returns all the document BODY lines (including includes)"
  465. prop = Proprierties()
  466. prop.read_file(file)
  467. prop.find_areas()
  468. return prop.bodylines
  469. def finish_him(outlist, CONF):
  470. "Writing output to screen or file"
  471. outfile = CONF['outfile']
  472. outlist = unmaskEscapeChar(outlist)
  473. if outfile == STDOUT:
  474. for line in outlist: print line
  475. else:
  476. Savefile(outfile, addLineBreaks(outlist))
  477. if not CONF['gui']: print 'wrote %s'%(outfile)
  478. if CONF['split']:
  479. print "--- html..."
  480. sgml2html = 'sgml2html -s %s -l %s %s'%(
  481. CONF['split'],CONF['lang'] or lang,outfile)
  482. print "Running system command:", sgml2html
  483. os.system(sgml2html)
  484. def toc_maker(toc, conf):
  485. "Compose TOC list 'by hand'"
  486. # TOC is a tag, so there's nothing to do here
  487. if TAGS['TOC']: return []
  488. # toc is a valid t2t marked text (list type), that is converted
  489. if conf['toc'] or conf['toconly']:
  490. fakeconf = conf.copy()
  491. fakeconf['noheaders'] = 1
  492. fakeconf['toconly'] = 0
  493. fakeconf['maskemail'] = 0
  494. toc,foo = convert(toc, fakeconf)
  495. # TOC between bars (not for --toconly)
  496. if conf['toc']:
  497. para = TAGS['paragraph']
  498. tocbar = [para, regex['x'].sub('-'*72,TAGS['bar1']), para]
  499. toc = tocbar + toc + tocbar
  500. return toc
  501. # set the Line Break across platforms
  502. LB = '\n' # default
  503. if sys.platform[:3] == 'win': LB = '\r\n'
  504. #elif sys.platform[:3] == 'cyg': LB = '\r\n' # not sure if it's best :(
  505. elif sys.platform[:3] == 'mac': LB = '\r'
  506. def getTags(doctype):
  507. keys = [
  508. 'paragraph','title1','title2','title3','title4','title5',
  509. 'areaPreOpen','areaPreClose',
  510. 'areaQuoteOpen','areaQuoteClose',
  511. 'fontMonoOpen','fontMonoClose',
  512. 'fontBoldOpen','fontBoldClose',
  513. 'fontItalicOpen','fontItalicClose',
  514. 'fontBolditalicOpen','fontBolditalicClose',
  515. 'fontUnderlineOpen','fontUnderlineClose',
  516. 'listOpen','listClose','listItem',
  517. 'numlistOpen','numlistClose','numlistItem',
  518. 'deflistOpen','deflistClose','deflistItem1','deflistItem2',
  519. 'bar1','bar2',
  520. 'url','urlMark','email','emailMark',
  521. 'img','imgsolo',
  522. 'tableOpen','tableClose','tableLineOpen','tableLineClose',
  523. 'tableCellOpen','tableCellClose',
  524. 'tableTitleCellOpen','tableTitleCellClose',
  525. 'anchor','comment','TOC',
  526. 'EOD'
  527. ]
  528. alltags = {
  529. 'txt': {
  530. 'title1' : ' \a' ,
  531. 'title2' : '\t\a' ,
  532. 'title3' : '\t\t\a' ,
  533. 'title4' : '\t\t\t\a' ,
  534. 'title5' : '\t\t\t\t\a',
  535. 'areaQuoteOpen' : ' ' ,
  536. 'listItem' : '- ' ,
  537. 'numlistItem' : '\a. ' ,
  538. 'bar1' : '\a' ,
  539. 'bar2' : '\a' ,
  540. 'url' : '\a' ,
  541. 'urlMark' : '\a (\a)' ,
  542. 'email' : '\a' ,
  543. 'emailMark' : '\a (\a)' ,
  544. 'img' : '[\a]' ,
  545. },
  546. 'html': {
  547. 'paragraph' : '<P>' ,
  548. 'title1' : '<H1>\a</H1>' ,
  549. 'title2' : '<H2>\a</H2>' ,
  550. 'title3' : '<H3>\a</H3>' ,
  551. 'title4' : '<H4>\a</H4>' ,
  552. 'title5' : '<H5>\a</H5>' ,
  553. 'areaPreOpen' : '<PRE>' ,
  554. 'areaPreClose' : '</PRE>' ,
  555. 'areaQuoteOpen' : '<BLOCKQUOTE>' ,
  556. 'areaQuoteClose' : '</BLOCKQUOTE>' ,
  557. 'fontMonoOpen' : '<CODE>' ,
  558. 'fontMonoClose' : '</CODE>' ,
  559. 'fontBoldOpen' : '<B>' ,
  560. 'fontBoldClose' : '</B>' ,
  561. 'fontItalicOpen' : '<I>' ,
  562. 'fontItalicClose' : '</I>' ,
  563. 'fontBolditalicOpen' : '<B><I>' ,
  564. 'fontBolditalicClose' : '</I></B>' ,
  565. 'fontUnderlineOpen' : '<U>' ,
  566. 'fontUnderlineClose' : '</U>' ,
  567. 'listOpen' : '<UL>' ,
  568. 'listClose' : '</UL>' ,
  569. 'listItem' : '<LI>' ,
  570. 'numlistOpen' : '<OL>' ,
  571. 'numlistClose' : '</OL>' ,
  572. 'numlistItem' : '<LI>' ,
  573. 'deflistOpen' : '<DL>' ,
  574. 'deflistClose' : '</DL>' ,
  575. 'deflistItem1' : '<DT>\a</DT>' ,
  576. 'deflistItem2' : '<DD>' ,
  577. 'bar1' : '<HR NOSHADE SIZE=1>' ,
  578. 'bar2' : '<HR NOSHADE SIZE=5>' ,
  579. 'url' : '<A HREF="\a">\a</A>' ,
  580. 'urlMark' : '<A HREF="\a">\a</A>' ,
  581. 'email' : '<A HREF="mailto:\a">\a</A>' ,
  582. 'emailMark' : '<A HREF="mailto:\a">\a</A>' ,
  583. 'img' : '<IMG ALIGN="\a" SRC="\a" BORDER="0">',
  584. 'imgsolo' : '<P ALIGN="center">\a</P>' ,
  585. 'tableOpen' : '<table\a cellpadding=4 border=\a>',
  586. 'tableClose' : '</table>' ,
  587. 'tableLineOpen' : '<tr>' ,
  588. 'tableLineClose' : '</tr>' ,
  589. 'tableCellOpen' : '<td\a>' ,
  590. 'tableCellClose' : '</td>' ,
  591. 'tableTitleCellOpen' : '<th>' ,
  592. 'tableTitleCellClose' : '</th>' ,
  593. 'tableAlignLeft' : '' ,
  594. 'tableAlignCenter' : ' align="center"',
  595. 'tableCellAlignLeft' : '' ,
  596. 'tableCellAlignRight' : ' align="right"' ,
  597. 'tableCellAlignCenter': ' align="center"',
  598. 'anchor' : '<a name="\a">' ,
  599. 'comment' : '<!-- \a -->' ,
  600. 'EOD' : '</BODY></HTML>'
  601. },
  602. 'sgml': {
  603. 'paragraph' : '<p>' ,
  604. 'title1' : '<sect>\a<p>' ,
  605. 'title2' : '<sect1>\a<p>' ,
  606. 'title3' : '<sect2>\a<p>' ,
  607. 'title4' : '<sect3>\a<p>' ,
  608. 'title5' : '<sect4>\a<p>' ,
  609. 'areaPreOpen' : '<tscreen><verb>' ,
  610. 'areaPreClose' : '</verb></tscreen>' ,
  611. 'areaQuoteOpen' : '<quote>' ,
  612. 'areaQuoteClose' : '</quote>' ,
  613. 'fontMonoOpen' : '<tt>' ,
  614. 'fontMonoClose' : '</tt>' ,
  615. 'fontBoldOpen' : '<bf>' ,
  616. 'fontBoldClose' : '</bf>' ,
  617. 'fontItalicOpen' : '<em>' ,
  618. 'fontItalicClose' : '</em>' ,
  619. 'fontBolditalicOpen' : '<bf><em>' ,
  620. 'fontBolditalicClose' : '</em></bf>' ,
  621. 'fontUnderlineOpen' : '<bf><em>' ,
  622. 'fontUnderlineClose' : '</em></bf>' ,
  623. 'listOpen' : '<itemize>' ,
  624. 'listClose' : '</itemize>' ,
  625. 'listItem' : '<item>' ,
  626. 'numlistOpen' : '<enum>' ,
  627. 'numlistClose' : '</enum>' ,
  628. 'numlistItem' : '<item>' ,
  629. 'deflistOpen' : '<descrip>' ,
  630. 'deflistClose' : '</descrip>' ,
  631. 'deflistItem1' : '<tag>\a</tag>' ,
  632. 'bar1' : '<!-- \a -->' ,
  633. 'bar2' : '<!-- \a -->' ,
  634. 'url' : '<htmlurl url="\a" name="\a">' ,
  635. 'urlMark' : '<htmlurl url="\a" name="\a">' ,
  636. 'email' : '<htmlurl url="mailto:\a" name="\a">' ,
  637. 'emailMark' : '<htmlurl url="mailto:\a" name="\a">' ,
  638. 'img' : '<figure><ph vspace=""><img src="\a">'+\
  639. '</figure>' ,
  640. 'tableOpen' : '<table><tabular ca="\a">' ,
  641. 'tableClose' : '</tabular></table>' ,
  642. 'tableLineClose' : '<rowsep>' ,
  643. 'tableCellClose' : '<colsep>' ,
  644. 'tableTitleCellClose' : '<colsep>' ,
  645. 'tableColAlignLeft' : 'l' ,
  646. 'tableColAlignRight' : 'r' ,
  647. 'tableColAlignCenter' : 'c' ,
  648. 'comment' : '<!-- \a -->' ,
  649. 'TOC' : '<toc>' ,
  650. 'EOD' : '</article>'
  651. },
  652. 'tex': {
  653. 'title1' : '\n\\newpage\section{\a}',
  654. 'title2' : '\\subsection{\a}' ,
  655. 'title3' : '\\subsubsection{\a}' ,
  656. # title 4/5: DIRTY: para+BF+\\+\n
  657. 'title4' : '\\paragraph{}\\textbf{\a}\\\\\n',
  658. 'title5' : '\\paragraph{}\\textbf{\a}\\\\\n',
  659. 'areaPreOpen' : '\\begin{verbatim}' ,
  660. 'areaPreClose' : '\\end{verbatim}' ,
  661. 'areaQuoteOpen' : '\\begin{quotation}' ,
  662. 'areaQuoteClose' : '\\end{quotation}' ,
  663. 'fontMonoOpen' : '\\texttt{' ,
  664. 'fontMonoClose' : '}' ,
  665. 'fontBoldOpen' : '\\textbf{' ,
  666. 'fontBoldClose' : '}' ,
  667. 'fontItalicOpen' : '\\textit{' ,
  668. 'fontItalicClose' : '}' ,
  669. 'fontBolditalicOpen' : '\\textbf{\\textit{' ,
  670. 'fontBolditalicClose' : '}}' ,
  671. 'fontUnderlineOpen' : '\\underline{' ,
  672. 'fontUnderlineClose' : '}' ,
  673. 'listOpen' : '\\begin{itemize}' ,
  674. 'listClose' : '\\end{itemize}' ,
  675. 'listItem' : '\\item ' ,
  676. 'numlistOpen' : '\\begin{enumerate}' ,
  677. 'numlistClose' : '\\end{enumerate}' ,
  678. 'numlistItem' : '\\item ' ,
  679. 'deflistOpen' : '\\begin{description}',
  680. 'deflistClose' : '\\end{description}' ,
  681. 'deflistItem1' : '\\item[\a]' ,
  682. 'bar1' : '\n\\hrulefill{}\n' ,
  683. 'bar2' : '\n\\rule{\linewidth}{1mm}\n',
  684. 'url' : '\\url{\a}' ,
  685. 'urlMark' : '\\textit{\a} (\\url{\a})' ,
  686. 'email' : '\\url{\a}' ,
  687. 'emailMark' : '\\textit{\a} (\\url{\a})' ,
  688. 'img' : '\\begin{figure}\\includegraphics{\a}'+\
  689. '\\end{figure}',
  690. 'tableOpen' : '\\begin{center}\\begin{tabular}{\a|}',
  691. 'tableClose' : '\\end{tabular}\\end{center}',
  692. 'tableLineOpen' : '\\hline ' ,
  693. 'tableLineClose' : ' \\\\' ,
  694. 'tableCellClose' : ' & ' ,
  695. 'tableTitleCellOpen' : '\\textbf{',
  696. 'tableTitleCellClose' : '} & ' ,
  697. 'tableColAlignLeft' : '|l' ,
  698. 'tableColAlignRight' : '|r' ,
  699. 'tableColAlignCenter' : '|c' ,
  700. 'comment' : '% \a' ,
  701. 'TOC' : '\\newpage\\tableofcontents',
  702. 'EOD' : '\\end{document}'
  703. },
  704. 'moin': {
  705. 'title1' : '= \a =' ,
  706. 'title2' : '== \a ==' ,
  707. 'title3' : '=== \a ===' ,
  708. 'title4' : '==== \a ====' ,
  709. 'title5' : '===== \a =====',
  710. 'areaPreOpen' : '{{{' ,
  711. 'areaPreClose' : '}}}' ,
  712. 'areaQuoteOpen' : ' ' ,
  713. 'fontMonoOpen' : '{{{' ,
  714. 'fontMonoClose' : '}}}' ,
  715. 'fontBoldOpen' : "'''" ,
  716. 'fontBoldClose' : "'''" ,
  717. 'fontItalicOpen' : "''" ,
  718. 'fontItalicClose' : "''" ,
  719. 'fontBolditalicOpen' : "'''''" ,
  720. 'fontBolditalicClose' : "'''''" ,
  721. 'fontUnderlineOpen' : "'''''" ,
  722. 'fontUnderlineClose' : "'''''" ,
  723. 'listItem' : '* ' ,
  724. 'numlistItem' : '\a. ' ,
  725. 'bar1' : '----' ,
  726. 'bar2' : '----' ,
  727. 'url' : '[\a]' ,
  728. 'urlMark' : '[\a \a]' ,
  729. 'email' : '[\a]' ,
  730. 'emailMark' : '[\a \a]' ,
  731. 'img' : '[\a]' ,
  732. 'tableLineOpen' : '||' ,
  733. 'tableCellClose' : '||' ,
  734. 'tableTitleCellClose' : '||'
  735. },
  736. 'mgp': {
  737. 'paragraph' : '%font "normal", size 5\n' ,
  738. 'title1' : '%page\n\n\a' ,
  739. 'title2' : '%page\n\n\a' ,
  740. 'title3' : '%page\n\n\a' ,
  741. 'title4' : '%page\n\n\a' ,
  742. 'title5' : '%page\n\n\a' ,
  743. 'areaPreOpen' : '\n%font "mono"' ,
  744. 'areaPreClose' : '%font "normal"' ,
  745. 'areaQuoteOpen' : '%prefix " "' ,
  746. 'areaQuoteClose' : '%prefix " "' ,
  747. 'fontMonoOpen' : '\n%cont, font "mono"\n' ,
  748. 'fontMonoClose' : '\n%cont, font "normal"\n' ,
  749. 'fontBoldOpen' : '\n%cont, font "normal-b"\n' ,
  750. 'fontBoldClose' : '\n%cont, font "normal"\n' ,
  751. 'fontItalicOpen' : '\n%cont, font "normal-i"\n' ,
  752. 'fontItalicClose' : '\n%cont, font "normal"\n' ,
  753. 'fontBolditalicOpen' : '\n%cont, font "normal-bi"\n',
  754. 'fontBolditalicClose' : '\n%cont, font "normal"\n' ,
  755. 'fontUnderlineOpen' : '\n%cont, fore "cyan"\n' ,
  756. 'fontUnderlineClose' : '\n%cont, fore "white"\n' ,
  757. 'numlistItem' : '\a. ' ,
  758. 'bar1' : '%bar "white" 5' ,
  759. 'bar2' : '%pause' ,
  760. 'url' : '\n%cont, fore "cyan"\n\a' +\
  761. '\n%cont, fore "white"\n' ,
  762. 'urlMark' : '\a \n%cont, fore "cyan"\n\a'+\
  763. '\n%cont, fore "white"\n' ,
  764. 'email' : '\n%cont, fore "cyan"\n\a' +\
  765. '\n%cont, fore "white"\n' ,
  766. 'emailMark' : '\a \n%cont, fore "cyan"\n\a'+\
  767. '\n%cont, fore "white"\n' ,
  768. 'img' : '\n%center\n%newimage "\a", left\n',
  769. 'comment' : '%% \a' ,
  770. 'EOD' : '%%EOD'
  771. },
  772. 'man': {
  773. 'paragraph' : '.P' ,
  774. 'title1' : '.SH \a' ,
  775. 'title2' : '.SS \a' ,
  776. 'title3' : '.SS \a' ,
  777. 'title4' : '.SS \a' ,
  778. 'title5' : '.SS \a' ,
  779. 'areaPreOpen' : '.nf' ,
  780. 'areaPreClose' : '.fi\n' ,
  781. 'areaQuoteOpen' : '\n' ,
  782. 'areaQuoteClose' : '\n' ,
  783. 'fontBoldOpen' : '\\fB' ,
  784. 'fontBoldClose' : '\\fP' ,
  785. 'fontItalicOpen' : '\\fI' ,
  786. 'fontItalicClose' : '\\fP' ,
  787. 'fontBolditalicOpen' : '\n.BI ' ,
  788. 'fontBolditalicClose' : '\n\\&' ,
  789. 'listOpen' : '\n.nf' , # pre
  790. 'listClose' : '.fi\n' ,
  791. 'listItem' : '* ' ,
  792. 'numlistOpen' : '\n.nf' , # pre
  793. 'numlistClose' : '.fi\n' ,
  794. 'numlistItem' : '\a. ' ,
  795. 'bar1' : '\n\n' ,
  796. 'bar2' : '\n\n' ,
  797. 'url' : '\a' ,
  798. 'urlMark' : '\a (\a)',
  799. 'email' : '\a' ,
  800. 'emailMark' : '\a (\a)',
  801. 'img' : '\a' ,
  802. 'comment' : '.\\" \a'
  803. },
  804. 'pm6': {
  805. 'paragraph' : '<@Normal:>' ,
  806. 'title1' : '\n<@Title1:>\a',
  807. 'title2' : '\n<@Title2:>\a',
  808. 'title3' : '\n<@Title3:>\a',
  809. 'title4' : '\n<@Title4:>\a',
  810. 'title5' : '\n<@Title5:>\a',
  811. 'areaPreOpen' : '<@PreFormat:>' ,
  812. 'areaQuoteOpen' : '<@Quote:>' ,
  813. 'fontMonoOpen' : '<FONT "Lucida Console"><SIZE 9>' ,
  814. 'fontMonoClose' : '<SIZE$><FONT$>',
  815. 'fontBoldOpen' : '<B>' ,
  816. 'fontBoldClose' : '<P>' ,
  817. 'fontItalicOpen' : '<I>' ,
  818. 'fontItalicClose' : '<P>' ,
  819. 'fontBolditalicOpen' : '<B><I>' ,
  820. 'fontBolditalicClose' : '<P>' ,
  821. 'fontUnderlineOpen' : '<U>' ,
  822. 'fontUnderlineClose' : '<P>' ,
  823. 'listOpen' : '<@Bullet:>' ,
  824. 'listItem' : '\x95 ' , # \x95 == ~U
  825. 'numlistOpen' : '<@Bullet:>' ,
  826. 'numlistItem' : '\x95 ' ,
  827. 'bar1' : '\a' ,
  828. 'bar2' : '\a' ,
  829. 'url' : '<U>\a<P>' , # underline
  830. 'urlMark' : '\a <U>\a<P>' ,
  831. 'email' : '\a' ,
  832. 'emailMark' : '\a \a' ,
  833. 'img' : '\a'
  834. }
  835. }
  836. # compose the target tags dictionary
  837. tags = {}
  838. target_tags = alltags[doctype]
  839. for key in keys: tags[key] = '' # create empty keys
  840. for key in target_tags.keys():
  841. tags[key] = maskEscapeChar(target_tags[key]) # populate
  842. return tags
  843. def getRules(doctype):
  844. ret = {}
  845. allrules = [
  846. # target rules (ON/OFF)
  847. 'linkable', # target supports external links
  848. 'tableable', # target supports tables
  849. 'imglinkable', # target supports images as links
  850. 'imgalignable', # target supports image alignment
  851. 'imgasdefterm', # target supports image as definition term
  852. 'tablealignable', # target supports table alignment
  853. 'listcountable', # target supports numbered lists natively
  854. 'tablecellsplit', # place delimiters only *between* cells
  855. 'listnotnested', # lists cannot be nested
  856. 'quotenotnested', # quotes cannot be nested
  857. 'preareanotescaped', # don't escape specials in PRE area
  858. 'escapeurl', # escape special in link URL
  859. # target code beautify (ON/OFF)
  860. 'indentprearea', # add leading spaces to PRE area lines
  861. 'breaktablecell', # break lines after any table cell
  862. 'breaktablelineopen', # break line after opening table line
  863. 'keepquoteindent', # don't remove the leading TABs on quotes
  864. # value settings
  865. 'listmaxdepth', # maximum depth for lists
  866. 'tablecellaligntype' # type of table cell align: cell, column
  867. ]
  868. rules = {
  869. 'txt' : {
  870. 'indentprearea':1
  871. },
  872. 'html': {
  873. 'indentprearea':1,
  874. 'linkable':1,
  875. 'imglinkable':1,
  876. 'imgalignable':1,
  877. 'imgasdefterm':1,
  878. 'listcountable':1,
  879. 'tableable':1,
  880. 'breaktablecell':1,
  881. 'breaktablelineopen':1,
  882. 'keepquoteindent':1,
  883. 'tablealignable':1,
  884. 'tablecellaligntype':'cell'
  885. },
  886. 'sgml': {
  887. 'linkable':1,
  888. 'escapeurl':1,
  889. 'listcountable':1,
  890. 'tableable':1,
  891. 'tablecellsplit':1,
  892. 'quotenotnested':1,
  893. 'keepquoteindent':1,
  894. 'tablecellaligntype':'column'
  895. },
  896. 'mgp' : {
  897. },
  898. 'tex' : {
  899. 'listcountable':1,
  900. 'tableable':1,
  901. 'tablecellsplit':1,
  902. 'preareanotescaped':1,
  903. 'listmaxdepth':4,
  904. 'tablecellaligntype':'column'
  905. },
  906. 'moin': {
  907. 'linkable':1,
  908. 'tableable':1
  909. },
  910. 'man' : {
  911. 'indentprearea':1,
  912. 'listnotnested':1
  913. },
  914. 'pm6' : {
  915. }
  916. }
  917. # populate return dictionary
  918. myrules = rules[doctype]
  919. for key in allrules : ret[key] = 0 # reset all
  920. for key in myrules.keys(): ret[key] = myrules[key] # turn ON
  921. return ret
  922. def getRegexes():
  923. regex = {
  924. # extra at end: (\[(?P<label>\w+)\])?
  925. 'title':
  926. re.compile(r'^\s*(?P<tag>={1,5})(?P<txt>[^=].*[^=])\1\s*$'),
  927. 'areaPreOpen':
  928. re.compile(r'^---$'),
  929. 'areaPreClose':
  930. re.compile(r'^---$'),
  931. 'quote':
  932. re.compile(r'^\t+'),
  933. '1linePre':
  934. # re.compile(r'^--- '),
  935. re.compile(r'^--- (?=.)'),
  936. 'fontMono':
  937. re.compile(r'`([^`]+)`'),
  938. 'fontBold':
  939. re.compile(r'\*\*([^\s*].*?)\*\*'),
  940. 'fontItalic':
  941. re.compile(r'(^|[^:])//([^ /].*?)//'),
  942. 'fontUnderline':
  943. re.compile(r'__([^_].*?)__'), # underline lead/trailing blank
  944. 'fontBolditalic':
  945. re.compile(r'\*/([^/].*?)/\*'),
  946. 'list':
  947. re.compile(r'^( *)([+-]) ([^ ])'),
  948. 'deflist':
  949. re.compile(r'^( *)(=) ([^:]+):'),
  950. 'bar':
  951. re.compile(r'^\s*([_=-]{20,})\s*$'),
  952. 'table':
  953. re.compile(r'^ *\|\|? '),
  954. 'blankline':
  955. re.compile(r'^\s*$'),
  956. 'comment':
  957. re.compile(r'^%'),
  958. 'raw':
  959. re.compile(r'``(.+?)``')
  960. }
  961. # special char to place data on TAGs contents (\a == bell)
  962. regex['x'] = re.compile('\a')
  963. # %%date [ (formatting) ]
  964. regex['date'] = re.compile(r'%%date\b(\((?P<fmt>.*?)\))?', re.I)
  965. ### complicated regexes begin here ;)
  966. #
  967. # textual descriptions on --help's style: [...] is optional, | is OR
  968. ### first, some auxiliar variables
  969. #
  970. # [image.EXT]
  971. patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp))\]'
  972. # link things
  973. urlskel = {
  974. 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://',
  975. 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess
  976. 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com
  977. 'pass' : r'[^ @]*', # for ftp://login:password@dom.com
  978. 'chars' : r'A-Za-z0-9%._/~:,=$@-',# %20(space), :80(port)
  979. 'anchor': r'A-Za-z0-9%._-', # %nn(encoded)
  980. 'form' : r'A-Za-z0-9/%&=+.,@*_-',# .,@*_-(as is)
  981. 'punct' : r'.,;:!?'
  982. }
  983. # username [ :password ] @
  984. patt_url_login = r'([%s]+(:%s)?@)?'%(urlskel['login'],urlskel['pass'])
  985. # [ http:// ] [ username:password@ ] domain.com [ / ]
  986. # [ #anchor | ?form=data ]
  987. retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]+)?'%(
  988. urlskel['proto'],patt_url_login, urlskel['guess'],
  989. urlskel['chars'],urlskel['form'],urlskel['anchor'])
  990. # filename | [ filename ] #anchor
  991. retxt_url_local = r'[%s]+|[%s]*(#[%s]+)'%(
  992. urlskel['chars'],urlskel['chars'],urlskel['anchor'])
  993. # user@domain [ ?form=data ]
  994. patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?'%(
  995. urlskel['login'],urlskel['form'])
  996. # saving for future use
  997. regex['_urlskel'] = urlskel
  998. ### and now the real regexes
  999. #
  1000. regex['email'] = re.compile(patt_email,re.I)
  1001. # email | url
  1002. regex['link'] = \
  1003. re.compile(r'%s|%s'%(retxt_url,patt_email), re.I)
  1004. # \[ label | imagetag url | email | filename \]
  1005. regex['linkmark'] = \
  1006. re.compile(r'\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]'%(
  1007. patt_img, retxt_url, patt_email, retxt_url_local),
  1008. re.L+re.I)
  1009. # image
  1010. regex['img'] = re.compile(patt_img, re.L+re.I)
  1011. # all macros
  1012. regex['macro'] = regex['date']
  1013. # special things
  1014. regex['special'] = re.compile(r'^%!\s*')
  1015. regex['command'] = re.compile(r'(Include)\s*:\s*(.+)\s*$',re.I)
  1016. return regex
  1017. ### END OF regex nightmares
  1018. class SubareaMaster:
  1019. def __init__(self) : self.x = []
  1020. def __call__(self) :
  1021. if not self.x: return ''
  1022. return self.x[-1]
  1023. def add(self, area):
  1024. if not self.x or (self.x and self.x[-1] != area):
  1025. self.x.append(area)
  1026. Debug('subarea ++ (%s): %s' % (area,self.x), 1)
  1027. def pop(self, area=None):
  1028. if area and self.x[-1] == area: self.x.pop()
  1029. Debug('subarea -- (%s): %s' % (area,self.x), 1)
  1030. def doHeader(headers, CONF):
  1031. if CONF['noheaders']: return []
  1032. doctype = CONF['type']
  1033. if not HEADER_TEMPLATE.has_key(doctype):
  1034. Error("doheader: Unknow doctype '%s'"%doctype)
  1035. template = string.split(HEADER_TEMPLATE[doctype], '\n')
  1036. head_data = {'STYLE':'', 'ENCODING':''}
  1037. for key in head_data.keys():
  1038. val = CONF.get(string.lower(key))
  1039. if key == 'ENCODING': val = get_encoding_string(val, doctype)
  1040. head_data[key] = val
  1041. # parse header contents
  1042. for i in 0,1,2:
  1043. contents = doDateMacro(headers[i]) # expand %%date
  1044. # Escapes - on tex, just do it if any \tag{} present
  1045. if doctype != 'tex' or \
  1046. (doctype == 'tex' and re.search(r'\\\w+{', contents)):
  1047. contents = doEscape(doctype, contents)
  1048. head_data['HEADER%d'%(i+1)] = contents
  1049. Debug("Header Data: %s"%head_data, 1)
  1050. # scan for empty dictionary keys
  1051. # if found, scan template lines for that key reference
  1052. # if found, remove the reference
  1053. # if there isn't any other key reference on the same line, remove it
  1054. for key in head_data.keys():
  1055. if head_data.get(key): continue
  1056. for line in template:
  1057. if string.count(line, '%%(%s)s'%key):
  1058. sline = string.replace(line, '%%(%s)s'%key, '')
  1059. if not re.search(r'%\([A-Z0-9]+\)s', sline):
  1060. template.remove(line)
  1061. # populate template with data
  1062. template = string.join(template, '\n') % head_data
  1063. ### post processing
  1064. #
  1065. # let tex format today
  1066. if doctype == 'tex' and head_data['HEADER3'] == currdate:
  1067. template = re.sub(r'\\date\{.*?}', r'\date', template)
  1068. return string.split(template, '\n')
  1069. def doDateMacro(line):
  1070. re_date = getRegexes()['date']
  1071. while re_date.search(line):
  1072. m = re_date.search(line)
  1073. fmt = m.group('fmt') or ''
  1074. dateme = currdate
  1075. if fmt: dateme = strftime(fmt,localtime(time()))
  1076. line = re_date.sub(dateme,line,1)
  1077. return line
  1078. def doCommentLine(doctype,txt):
  1079. # the -- string ends a sgml comment :(
  1080. if doctype == 'sgml':
  1081. txt = string.replace(txt, '--', '\\-\\-')
  1082. if TAGS['comment']:
  1083. return regex['x'].sub(txt, TAGS['comment'])
  1084. return ''
  1085. def doFooter(CONF):
  1086. ret = []
  1087. doctype = CONF['type']
  1088. cmdline = CONF['cmdline']
  1089. typename = doctype
  1090. if doctype == 'tex': typename = 'LaTeX2e'
  1091. ppgd = '%s code generated by txt2tags %s (%s)'%(
  1092. typename,my_version,my_url)
  1093. cmdline = 'cmdline: txt2tags %s'%string.join(cmdline[1:], ' ')
  1094. ret.append('\n'+doCommentLine(doctype,ppgd))
  1095. ret.append(doCommentLine(doctype,cmdline))
  1096. ret.append(TAGS['EOD'])
  1097. return ret
  1098. def doEscape(doctype,txt):
  1099. if doctype in ['html','sgml']:
  1100. txt = re.sub('&','&amp;',txt)
  1101. txt = re.sub('<','&lt;',txt)
  1102. txt = re.sub('>','&gt;',txt)
  1103. if doctype == 'sgml':
  1104. txt = re.sub('\xff','&yuml;',txt) # "+y
  1105. elif doctype == 'pm6':
  1106. txt = re.sub('<','<\#60>',txt)
  1107. elif doctype == 'mgp':
  1108. txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
  1109. elif doctype == 'man':
  1110. txt = re.sub('^\.', ' .',txt) # command ID
  1111. txt = doEscapeEscapechar(txt)
  1112. elif doctype == 'tex':
  1113. txt = string.replace(txt, ESCCHAR, maskEscapeChar(r'\verb!\!'))
  1114. txt = string.replace(txt, '~', maskEscapeChar(r'\verb!~!'))
  1115. txt = string.replace(txt, '^', maskEscapeChar(r'\verb!^!'))
  1116. txt = re.sub('([#$&%{}])', r'\\\1', txt)
  1117. # TIP the _ is escaped at the end
  1118. return txt
  1119. def doFinalEscape(doctype, txt):
  1120. "Last escapes of each line"
  1121. if doctype == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
  1122. elif doctype == 'man' : txt = string.replace(txt, '-', r'\-')
  1123. elif doctype == 'tex' : txt = string.replace(txt, '_', r'\_')
  1124. elif doctype == 'sgml': txt = string.replace(txt, '[', '&lsqb;')
  1125. return txt
  1126. def doEscapeEscapechar(txt):
  1127. "Double all Escape Chars"
  1128. return string.replace(txt, ESCCHAR, ESCCHAR*2)
  1129. def EscapeCharHandler(action, data):
  1130. "Mask/Unmask the Escape Char on the given string"
  1131. if not string.strip(data): return data
  1132. if action not in ['mask','unmask']:
  1133. Error("EscapeCharHandler: Invalid action '%s'"%action)
  1134. if action == 'mask': return string.replace(data,'\\',ESCCHAR)
  1135. else: return string.replace(data,ESCCHAR,'\\')
  1136. def maskEscapeChar(data):
  1137. "Replace any Escape Char \ with a text mask (Input: str or list)"
  1138. if type(data) == type([]):
  1139. return map(lambda x: EscapeCharHandler('mask', x), data)
  1140. return EscapeCharHandler('mask',data)
  1141. def unmaskEscapeChar(data):
  1142. "Undo the Escape char \ masking (Input: str or list)"
  1143. if type(data) == type([]):
  1144. return map(lambda x: EscapeCharHandler('unmask', x), data)
  1145. return EscapeCharHandler('unmask',data)
  1146. def addLineBreaks(list):
  1147. "use LB to respect sys.platform"
  1148. ret = []
  1149. for line in list:
  1150. line = string.replace(line,'\n',LB) # embedded \n's
  1151. ret.append(line+LB) # add final line break
  1152. return ret
  1153. def doPreLine(doctype,line):
  1154. "Parsing procedures for preformatted (verbatim) lines"
  1155. if not rules['preareanotescaped']: line = doEscape(doctype,line)
  1156. if rules['indentprearea']: line = ' '+line
  1157. if doctype == 'pm6': line = doFinalEscape(doctype, line)
  1158. return line
  1159. def doCloseTable(doctype):
  1160. global subarea, tableborder
  1161. ret = ''
  1162. if rules['tableable']:
  1163. if doctype == 'tex' and tableborder:
  1164. ret = TAGS['tableLineOpen']+TAGS['tableClose']+'\n'
  1165. else:
  1166. ret = TAGS['tableClose']+'\n'
  1167. else:
  1168. ret = TAGS['areaPreClose']
  1169. tableborder = 0
  1170. subarea.pop('table')
  1171. return ret
  1172. def doCloseQuote(howmany=None):
  1173. global quotedepth
  1174. ret = []
  1175. if not howmany: howmany = len(quotedepth)
  1176. for i in range(howmany):
  1177. quotedepth.pop()
  1178. #TODO align open/close tag -> FREE_ALING_TAG = 1 (man not)
  1179. ret.append(TAGS['areaQuoteClose'])
  1180. if not quotedepth: subarea.pop('quote')
  1181. return string.join(ret,'\n')
  1182. def doCloseList(howmany=None):
  1183. global listindent, listids
  1184. ret = []
  1185. if not howmany: howmany = len(listindent)
  1186. for i in range(howmany):
  1187. if listids[-1] == '-': tag = TAGS['listClose']
  1188. elif listids[-1] == '+': tag = TAGS['numlistClose']
  1189. elif listids[-1] == '=': tag = TAGS['deflistClose']
  1190. if not tag: tag = TAGS['listClose'] # default
  1191. if tag:
  1192. # unnested lists are only closed at mother-list
  1193. if rules['listnotnested']:
  1194. if len(listindent) == 1:
  1195. ret.append(tag)
  1196. else:
  1197. ret.append(listindent[-1]+tag)
  1198. del listindent[-1]
  1199. del listids[-1]
  1200. if not listindent: subarea.pop('list')
  1201. return string.join(ret,'\n')
  1202. def beautify_me(name, doctype, line):
  1203. "where name is: bold, italic, underline or bolditalic"
  1204. name = 'font%s' % string.capitalize(name)
  1205. open = TAGS['%sOpen'%name]
  1206. close = TAGS['%sClose'%name]
  1207. txt = r'%s\1%s'%(open, close)
  1208. if name == 'fontItalic':
  1209. txt = r'\1%s\2%s'%(open, close)
  1210. line = regex[name].sub(txt,line)
  1211. return line
  1212. def get_tagged_link(label, url, CONF):
  1213. ret = ''
  1214. doctype = CONF['type']
  1215. # set link type
  1216. if regex['email'].match(url):
  1217. linktype = 'email'
  1218. else:
  1219. linktype = 'url';
  1220. # escape specials from TEXT parts
  1221. label = doEscape(doctype,label)
  1222. # escape specials from link URL
  1223. if rules['linkable'] and rules['escapeurl']:
  1224. url = doEscape(doctype, url)
  1225. # if not linkable, the URL is plain text, that needs escape
  1226. if not rules['linkable']:
  1227. if doctype == 'tex':
  1228. url = re.sub('^#', '\#', url) # ugly, but compile
  1229. else:
  1230. url = doEscape(doctype,url)
  1231. # adding protocol to guessed link
  1232. guessurl = ''
  1233. if linktype == 'url' and \
  1234. re.match(regex['_urlskel']['guess'], url):
  1235. if url[0] == 'w': guessurl = 'http://' +url
  1236. else : guessurl = 'ftp://' +url
  1237. # not link aware targets -> protocol is useless
  1238. if not rules['linkable']: guessurl = ''
  1239. # simple link (not guessed)
  1240. if not label and not guessurl:
  1241. if CONF['maskemail'] and linktype == 'email':
  1242. # do the email mask feature (no TAGs, just text)
  1243. url = string.replace(url,'@',' (a) ')
  1244. url = string.replace(url,'.',' ')
  1245. url = "<%s>" % url
  1246. if rules['linkable']: url = doEscape(doctype, url)
  1247. ret = url
  1248. else:
  1249. # just add link data to tag
  1250. tag = TAGS[linktype]
  1251. ret = regex['x'].sub(url,tag)
  1252. # named link or guessed simple link
  1253. else:
  1254. # adjusts for guessed link
  1255. if not label: label = url # no protocol
  1256. if guessurl : url = guessurl # with protocol
  1257. # change image tag for !supported img+link targets
  1258. if regex['img'].match(label) and not rules['imglinkable']:
  1259. label = "(%s)"%regex['img'].match(label).group(1)
  1260. # putting data on the right appearance order
  1261. if rules['linkable']:
  1262. urlorder = [url, label] # link before label
  1263. else:
  1264. urlorder = [label, url] # label before link
  1265. # add link data to tag (replace \a's)
  1266. ret = TAGS["%sMark"%linktype]
  1267. for data in urlorder:
  1268. ret = regex['x'].sub(data,ret,1)
  1269. return ret
  1270. def get_image_align(line):
  1271. align = ''
  1272. line = string.strip(line)
  1273. m = regex['img'].search(line)
  1274. ini = m.start() ; head = 0
  1275. end = m.end() ; tail = len(line)
  1276. align = 'center' # default align # ^text +img +text$
  1277. if ini == head and end == tail: align = 'para' # ^img$
  1278. elif ini == head: align = 'left' # ^img + text$
  1279. elif end == tail: align = 'right' # ^text + img$
  1280. return align
  1281. def get_tablecell_align(cells):
  1282. ret = []
  1283. for cell in cells:
  1284. align = 'Left'
  1285. if string.strip(cell):
  1286. if cell[0] == ' ' and cell[-1] == ' ': align = 'Center'
  1287. elif cell[0] == ' ': align = 'Right'
  1288. ret.append(align)
  1289. return ret
  1290. def get_table_prop(line):
  1291. # default table proprierties
  1292. ret = {'border':0,'header':0,'align':'Left','cells':[],'cellalign':[]}
  1293. # detect table align (and remove spaces mark)
  1294. if line[0] == ' ': ret['align'] = 'Center'
  1295. line = string.lstrip(line)
  1296. # detect header (title) mark
  1297. if line[1] == '|':
  1298. ret['header'] = 1
  1299. # delete trailing spaces after last cell border
  1300. line = re.sub('\|\s*$','|', line)
  1301. # detect (and delete) border mark (and leading sp

Large files files are truncated, but you can click here to view the full file