PageRenderTime 54ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/textile/functions.py

https://code.google.com/p/theokayblog/
Python | 981 lines | 920 code | 41 blank | 20 comment | 42 complexity | e9ed47aa26accad2a11e8785ef26fd8f MD5 | raw file
Possible License(s): BSD-3-Clause
  1. #!/usr/bin/env python
  2. """
  3. PyTextile
  4. A Humane Web Text Generator
  5. """
  6. __version__ = '2.1.4'
  7. __date__ = '2009/12/04'
  8. __copyright__ = """
  9. Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
  10. Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
  11. Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
  12. Original PHP Version:
  13. Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
  14. All rights reserved.
  15. Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
  16. Textile's procedural code into a class framework
  17. Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
  18. """
  19. __license__ = """
  20. L I C E N S E
  21. =============
  22. Redistribution and use in source and binary forms, with or without
  23. modification, are permitted provided that the following conditions are met:
  24. * Redistributions of source code must retain the above copyright notice,
  25. this list of conditions and the following disclaimer.
  26. * Redistributions in binary form must reproduce the above copyright notice,
  27. this list of conditions and the following disclaimer in the documentation
  28. and/or other materials provided with the distribution.
  29. * Neither the name Textile nor the names of its contributors may be used to
  30. endorse or promote products derived from this software without specific
  31. prior written permission.
  32. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  33. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  34. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  36. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  37. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  38. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  39. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  40. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  42. POSSIBILITY OF SUCH DAMAGE.
  43. """
  44. import re
  45. import uuid
  46. from urlparse import urlparse
  47. def _normalize_newlines(string):
  48. out = re.sub(r'\r\n', '\n', string)
  49. out = re.sub(r'\n{3,}', '\n\n', out)
  50. out = re.sub(r'\n\s*\n', '\n\n', out)
  51. out = re.sub(r'"$', '" ', out)
  52. return out
  53. def getimagesize(url):
  54. """
  55. Attempts to determine an image's width and height, and returns a string
  56. suitable for use in an <img> tag, or None in case of failure.
  57. Requires that PIL is installed.
  58. >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif")
  59. ... #doctest: +ELLIPSIS, +SKIP
  60. 'width="..." height="..."'
  61. """
  62. try:
  63. import ImageFile
  64. import urllib2
  65. except ImportError:
  66. return None
  67. try:
  68. p = ImageFile.Parser()
  69. f = urllib2.urlopen(url)
  70. while True:
  71. s = f.read(1024)
  72. if not s:
  73. break
  74. p.feed(s)
  75. if p.image:
  76. return 'width="%i" height="%i"' % p.image.size
  77. except (IOError, ValueError):
  78. return None
  79. class Textile(object):
  80. hlgn = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
  81. vlgn = r'[\-^~]'
  82. clas = r'(?:\([^)]+\))'
  83. lnge = r'(?:\[[^\]]+\])'
  84. styl = r'(?:\{[^}]+\})'
  85. cspn = r'(?:\\\d+)'
  86. rspn = r'(?:\/\d+)'
  87. a = r'(?:%s|%s)*' % (hlgn, vlgn)
  88. s = r'(?:%s|%s)*' % (cspn, rspn)
  89. c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn])
  90. pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
  91. # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
  92. urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
  93. url_schemes = ('http', 'https', 'ftp', 'mailto')
  94. btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
  95. btag_lite = ('bq', 'bc', 'p')
  96. glyph_defaults = (
  97. ('txt_quote_single_open', '&#8216;'),
  98. ('txt_quote_single_close', '&#8217;'),
  99. ('txt_quote_double_open', '&#8220;'),
  100. ('txt_quote_double_close', '&#8221;'),
  101. ('txt_apostrophe', '&#8217;'),
  102. ('txt_prime', '&#8242;'),
  103. ('txt_prime_double', '&#8243;'),
  104. ('txt_ellipsis', '&#8230;'),
  105. ('txt_emdash', '&#8212;'),
  106. ('txt_endash', '&#8211;'),
  107. ('txt_dimension', '&#215;'),
  108. ('txt_trademark', '&#8482;'),
  109. ('txt_registered', '&#174;'),
  110. ('txt_copyright', '&#169;'),
  111. )
  112. def __init__(self, restricted=False, lite=False, noimage=False):
  113. """docstring for __init__"""
  114. self.restricted = restricted
  115. self.lite = lite
  116. self.noimage = noimage
  117. self.get_sizes = False
  118. self.fn = {}
  119. self.urlrefs = {}
  120. self.shelf = {}
  121. self.rel = ''
  122. self.html_type = 'xhtml'
  123. def textile(self, text, rel=None, head_offset=0, html_type='xhtml'):
  124. """
  125. >>> import textile
  126. >>> textile.textile('some textile')
  127. u'\\t<p>some textile</p>'
  128. """
  129. self.html_type = html_type
  130. # text = unicode(text)
  131. text = _normalize_newlines(text)
  132. if self.restricted:
  133. text = self.encode_html(text, quotes=False)
  134. if rel:
  135. self.rel = ' rel="%s"' % rel
  136. text = self.getRefs(text)
  137. text = self.block(text, int(head_offset))
  138. text = self.retrieve(text)
  139. return text
  140. def pba(self, input, element=None):
  141. """
  142. Parse block attributes.
  143. >>> t = Textile()
  144. >>> t.pba(r'\3')
  145. ''
  146. >>> t.pba(r'\\3', element='td')
  147. ' colspan="3"'
  148. >>> t.pba(r'/4', element='td')
  149. ' rowspan="4"'
  150. >>> t.pba(r'\\3/4', element='td')
  151. ' colspan="3" rowspan="4"'
  152. >>> t.vAlign('^')
  153. 'top'
  154. >>> t.pba('^', element='td')
  155. ' style="vertical-align:top;"'
  156. >>> t.pba('{line-height:18px}')
  157. ' style="line-height:18px;"'
  158. >>> t.pba('(foo-bar)')
  159. ' class="foo-bar"'
  160. >>> t.pba('(#myid)')
  161. ' id="myid"'
  162. >>> t.pba('(foo-bar#myid)')
  163. ' class="foo-bar" id="myid"'
  164. >>> t.pba('((((')
  165. ' style="padding-left:4em;"'
  166. >>> t.pba(')))')
  167. ' style="padding-right:3em;"'
  168. >>> t.pba('[fr]')
  169. ' lang="fr"'
  170. """
  171. style = []
  172. aclass = ''
  173. lang = ''
  174. colspan = ''
  175. rowspan = ''
  176. id = ''
  177. if not input:
  178. return ''
  179. matched = input
  180. if element == 'td':
  181. m = re.search(r'\\(\d+)', matched)
  182. if m:
  183. colspan = m.group(1)
  184. m = re.search(r'/(\d+)', matched)
  185. if m:
  186. rowspan = m.group(1)
  187. if element == 'td' or element == 'tr':
  188. m = re.search(r'(%s)' % self.vlgn, matched)
  189. if m:
  190. style.append("vertical-align:%s;" % self.vAlign(m.group(1)))
  191. m = re.search(r'\{([^}]*)\}', matched)
  192. if m:
  193. style.append(m.group(1).rstrip(';') + ';')
  194. matched = matched.replace(m.group(0), '')
  195. m = re.search(r'\[([^\]]+)\]', matched, re.U)
  196. if m:
  197. lang = m.group(1)
  198. matched = matched.replace(m.group(0), '')
  199. m = re.search(r'\(([^()]+)\)', matched, re.U)
  200. if m:
  201. aclass = m.group(1)
  202. matched = matched.replace(m.group(0), '')
  203. m = re.search(r'([(]+)', matched)
  204. if m:
  205. style.append("padding-left:%sem;" % len(m.group(1)))
  206. matched = matched.replace(m.group(0), '')
  207. m = re.search(r'([)]+)', matched)
  208. if m:
  209. style.append("padding-right:%sem;" % len(m.group(1)))
  210. matched = matched.replace(m.group(0), '')
  211. m = re.search(r'(%s)' % self.hlgn, matched)
  212. if m:
  213. style.append("text-align:%s;" % self.hAlign(m.group(1)))
  214. m = re.search(r'^(.*)#(.*)$', aclass)
  215. if m:
  216. id = m.group(2)
  217. aclass = m.group(1)
  218. if self.restricted:
  219. if lang:
  220. return ' lang="%s"'
  221. else:
  222. return ''
  223. result = []
  224. if style:
  225. result.append(' style="%s"' % "".join(style))
  226. if aclass:
  227. result.append(' class="%s"' % aclass)
  228. if lang:
  229. result.append(' lang="%s"' % lang)
  230. if id:
  231. result.append(' id="%s"' % id)
  232. if colspan:
  233. result.append(' colspan="%s"' % colspan)
  234. if rowspan:
  235. result.append(' rowspan="%s"' % rowspan)
  236. return ''.join(result)
  237. def hasRawText(self, text):
  238. """
  239. checks whether the text has text not already enclosed by a block tag
  240. >>> t = Textile()
  241. >>> t.hasRawText('<p>foo bar biz baz</p>')
  242. False
  243. >>> t.hasRawText(' why yes, yes it does')
  244. True
  245. """
  246. r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>', re.S).sub('', text.strip()).strip()
  247. r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
  248. return '' != r
  249. def table(self, text):
  250. r"""
  251. >>> t = Textile()
  252. >>> t.table('|one|two|three|\n|a|b|c|')
  253. '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
  254. """
  255. text = text + "\n\n"
  256. pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U)
  257. return pattern.sub(self.fTable, text)
  258. def fTable(self, match):
  259. tatts = self.pba(match.group(1), 'table')
  260. rows = []
  261. for row in [ x for x in match.group(2).split('\n') if x]:
  262. rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip())
  263. if rmtch:
  264. ratts = self.pba(rmtch.group(1), 'tr')
  265. row = rmtch.group(2)
  266. else:
  267. ratts = ''
  268. cells = []
  269. for cell in row.split('|')[1:-1]:
  270. ctyp = 'd'
  271. if re.search(r'^_', cell):
  272. ctyp = "h"
  273. cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell)
  274. if cmtch:
  275. catts = self.pba(cmtch.group(1), 'td')
  276. cell = cmtch.group(2)
  277. else:
  278. catts = ''
  279. cell = self.graf(self.span(cell))
  280. cells.append('\t\t\t<t%s%s>%s</t%s>' % (ctyp, catts, cell, ctyp))
  281. rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, '\n'.join(cells)))
  282. cells = []
  283. catts = None
  284. return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows))
  285. def lists(self, text):
  286. """
  287. >>> t = Textile()
  288. >>> t.lists("* one\\n* two\\n* three")
  289. '\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>'
  290. """
  291. pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S)
  292. return pattern.sub(self.fList, text)
  293. def fList(self, match):
  294. text = match.group(0).split("\n")
  295. result = []
  296. lists = []
  297. for i, line in enumerate(text):
  298. try:
  299. nextline = text[i+1]
  300. except IndexError:
  301. nextline = ''
  302. m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S)
  303. if m:
  304. tl, atts, content = m.groups()
  305. nl = ''
  306. nm = re.search(r'^([#*]+)\s.*', nextline)
  307. if nm:
  308. nl = nm.group(1)
  309. if tl not in lists:
  310. lists.append(tl)
  311. atts = self.pba(atts)
  312. line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content))
  313. else:
  314. line = "\t\t<li>" + self.graf(content)
  315. if len(nl) <= len(tl):
  316. line = line + "</li>"
  317. for k in reversed(lists):
  318. if len(k) > len(nl):
  319. line = line + "\n\t</%sl>" % self.lT(k)
  320. if len(k) > 1:
  321. line = line + "</li>"
  322. lists.remove(k)
  323. result.append(line)
  324. return "\n".join(result)
  325. def lT(self, input):
  326. if re.search(r'^#+', input):
  327. return 'o'
  328. else:
  329. return 'u'
  330. def doPBr(self, in_):
  331. return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr, in_)
  332. def doBr(self, match):
  333. if self.html_type == 'html':
  334. content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>', match.group(3))
  335. else:
  336. content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />', match.group(3))
  337. return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4))
  338. def block(self, text, head_offset = 0):
  339. """
  340. >>> t = Textile()
  341. >>> t.block('h1. foobar baby')
  342. '\\t<h1>foobar baby</h1>'
  343. """
  344. if not self.lite:
  345. tre = '|'.join(self.btag)
  346. else:
  347. tre = '|'.join(self.btag_lite)
  348. text = text.split('\n\n')
  349. tag = 'p'
  350. atts = cite = graf = ext = ''
  351. out = []
  352. anon = False
  353. for line in text:
  354. pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c)
  355. match = re.search(pattern, line, re.S)
  356. if match:
  357. if ext:
  358. out.append(out.pop() + c1)
  359. tag, atts, ext, cite, graf = match.groups()
  360. h_match = re.search(r'h([1-6])', tag)
  361. if h_match:
  362. head_level, = h_match.groups()
  363. tag = 'h%i' % max(1,
  364. min(int(head_level) + head_offset,
  365. 6))
  366. o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
  367. cite, graf)
  368. # leave off c1 if this block is extended,
  369. # we'll close it at the start of the next block
  370. if ext:
  371. line = "%s%s%s%s" % (o1, o2, content, c2)
  372. else:
  373. line = "%s%s%s%s%s" % (o1, o2, content, c2, c1)
  374. else:
  375. anon = True
  376. if ext or not re.search(r'^\s', line):
  377. o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
  378. cite, line)
  379. # skip $o1/$c1 because this is part of a continuing
  380. # extended block
  381. if tag == 'p' and not self.hasRawText(content):
  382. line = content
  383. else:
  384. line = "%s%s%s" % (o2, content, c2)
  385. else:
  386. line = self.graf(line)
  387. line = self.doPBr(line)
  388. if self.html_type == 'xhtml':
  389. line = re.sub(r'<br>', '<br />', line)
  390. if ext and anon:
  391. out.append(out.pop() + "\n" + line)
  392. else:
  393. out.append(line)
  394. if not ext:
  395. tag = 'p'
  396. atts = ''
  397. cite = ''
  398. graf = ''
  399. if ext:
  400. out.append(out.pop() + c1)
  401. return '\n\n'.join(out)
  402. def fBlock(self, tag, atts, ext, cite, content):
  403. """
  404. >>> t = Textile()
  405. >>> t.fBlock("bq", "", None, "", "Hello BlockQuote")
  406. ('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
  407. >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote")
  408. ('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
  409. >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS
  410. ('<pre>', '<code>', ..., '</code>', '</pre>')
  411. >>> t.fBlock("h1", "", None, "", "foobar")
  412. ('', '\\t<h1>', 'foobar', '</h1>', '')
  413. """
  414. atts = self.pba(atts)
  415. o1 = o2 = c2 = c1 = ''
  416. m = re.search(r'fn(\d+)', tag)
  417. if m:
  418. tag = 'p'
  419. if m.group(1) in self.fn:
  420. fnid = self.fn[m.group(1)]
  421. else:
  422. fnid = m.group(1)
  423. atts = atts + ' id="fn%s"' % fnid
  424. if atts.find('class=') < 0:
  425. atts = atts + ' class="footnote"'
  426. content = ('<sup>%s</sup>' % m.group(1)) + content
  427. if tag == 'bq':
  428. cite = self.checkRefs(cite)
  429. if cite:
  430. cite = ' cite="%s"' % cite
  431. else:
  432. cite = ''
  433. o1 = "\t<blockquote%s%s>\n" % (cite, atts)
  434. o2 = "\t\t<p%s>" % atts
  435. c2 = "</p>"
  436. c1 = "\n\t</blockquote>"
  437. elif tag == 'bc':
  438. o1 = "<pre%s>" % atts
  439. o2 = "<code%s>" % atts
  440. c2 = "</code>"
  441. c1 = "</pre>"
  442. content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
  443. elif tag == 'notextile':
  444. content = self.shelve(content)
  445. o1 = o2 = ''
  446. c1 = c2 = ''
  447. elif tag == 'pre':
  448. content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
  449. o1 = "<pre%s>" % atts
  450. o2 = c2 = ''
  451. c1 = '</pre>'
  452. else:
  453. o2 = "\t<%s%s>" % (tag, atts)
  454. c2 = "</%s>" % tag
  455. content = self.graf(content)
  456. return o1, o2, content, c2, c1
  457. def footnoteRef(self, text):
  458. """
  459. >>> t = Textile()
  460. >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS
  461. 'foo<sup class="footnote"><a href="#fn...">1</a></sup> '
  462. """
  463. return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text)
  464. def footnoteID(self, match):
  465. id, t = match.groups()
  466. if id not in self.fn:
  467. self.fn[id] = str(uuid.uuid4())
  468. fnid = self.fn[id]
  469. if not t:
  470. t = ''
  471. return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (fnid, id, t)
  472. def glyphs(self, text):
  473. """
  474. >>> t = Textile()
  475. >>> t.glyphs("apostrophe's")
  476. 'apostrophe&#8217;s'
  477. >>> t.glyphs("back in '88")
  478. 'back in &#8217;88'
  479. >>> t.glyphs('foo ...')
  480. 'foo &#8230;'
  481. >>> t.glyphs('--')
  482. '&#8212;'
  483. >>> t.glyphs('FooBar[tm]')
  484. 'FooBar&#8482;'
  485. >>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
  486. '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
  487. """
  488. # fix: hackish
  489. text = re.sub(r'"\Z', '\" ', text)
  490. glyph_search = (
  491. re.compile(r"(\w)\'(\w)"), # apostrophe's
  492. re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
  493. re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
  494. re.compile(r'\'/'), # single opening
  495. re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
  496. re.compile(r'"'), # double opening
  497. re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
  498. re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
  499. re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
  500. re.compile(r'(\s?)--(\s?)'), # em dash
  501. re.compile(r'\s-(?:\s|$)'), # en dash
  502. re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign
  503. re.compile(r'\b ?[([]TM[])]', re.I), # trademark
  504. re.compile(r'\b ?[([]R[])]', re.I), # registered
  505. re.compile(r'\b ?[([]C[])]', re.I), # copyright
  506. )
  507. glyph_replace = [x % dict(self.glyph_defaults) for x in (
  508. r'\1%(txt_apostrophe)s\2', # apostrophe's
  509. r'\1%(txt_apostrophe)s\2', # back in '88
  510. r'\1%(txt_quote_single_close)s', # single closing
  511. r'%(txt_quote_single_open)s', # single opening
  512. r'\1%(txt_quote_double_close)s', # double closing
  513. r'%(txt_quote_double_open)s', # double opening
  514. r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
  515. r'<span class="caps">\1</span>', # 3+ uppercase
  516. r'\1%(txt_ellipsis)s', # ellipsis
  517. r'\1%(txt_emdash)s\2', # em dash
  518. r' %(txt_endash)s ', # en dash
  519. r'\1\2%(txt_dimension)s\3', # dimension sign
  520. r'%(txt_trademark)s', # trademark
  521. r'%(txt_registered)s', # registered
  522. r'%(txt_copyright)s', # copyright
  523. )]
  524. result = []
  525. for line in re.compile(r'(<.*?>)', re.U).split(text):
  526. if not re.search(r'<.*>', line):
  527. for s, r in zip(glyph_search, glyph_replace):
  528. line = s.sub(r, line)
  529. result.append(line)
  530. return ''.join(result)
  531. def vAlign(self, input):
  532. d = {'^':'top', '-':'middle', '~':'bottom'}
  533. return d.get(input, '')
  534. def hAlign(self, input):
  535. d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'}
  536. return d.get(input, '')
  537. def getRefs(self, text):
  538. """
  539. what is this for?
  540. """
  541. pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U)
  542. text = pattern.sub(self.refs, text)
  543. return text
  544. def refs(self, match):
  545. flag, url = match.groups()
  546. self.urlrefs[flag] = url
  547. return ''
  548. def checkRefs(self, url):
  549. return self.urlrefs.get(url, url)
  550. def isRelURL(self, url):
  551. """
  552. Identify relative urls.
  553. >>> t = Textile()
  554. >>> t.isRelURL("http://www.google.com/")
  555. False
  556. >>> t.isRelURL("/foo")
  557. True
  558. """
  559. (scheme, netloc) = urlparse(url)[0:2]
  560. return not scheme and not netloc
  561. def relURL(self, url):
  562. scheme = urlparse(url)[0]
  563. if self.restricted and scheme and scheme not in self.url_schemes:
  564. return '#'
  565. return url
  566. def shelve(self, text):
  567. id = str(uuid.uuid4())
  568. self.shelf[id] = text
  569. return id
  570. def retrieve(self, text):
  571. """
  572. >>> t = Textile()
  573. >>> id = t.shelve("foobar")
  574. >>> t.retrieve(id)
  575. 'foobar'
  576. """
  577. while True:
  578. old = text
  579. for k, v in self.shelf.items():
  580. text = text.replace(k, v)
  581. if text == old:
  582. break
  583. return text
  584. def encode_html(self, text, quotes=True):
  585. a = (
  586. ('&', '&#38;'),
  587. ('<', '&#60;'),
  588. ('>', '&#62;')
  589. )
  590. if quotes:
  591. a = a + (
  592. ("'", '&#39;'),
  593. ('"', '&#34;')
  594. )
  595. for k, v in a:
  596. text = text.replace(k, v)
  597. return text
  598. def graf(self, text):
  599. if not self.lite:
  600. text = self.noTextile(text)
  601. text = self.code(text)
  602. text = self.links(text)
  603. if not self.noimage:
  604. text = self.image(text)
  605. if not self.lite:
  606. text = self.lists(text)
  607. text = self.table(text)
  608. text = self.span(text)
  609. text = self.footnoteRef(text)
  610. text = self.glyphs(text)
  611. return text.rstrip('\n')
  612. def links(self, text):
  613. """
  614. >>> t = Textile()
  615. >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS
  616. 'fooobar ... and hello world ...'
  617. """
  618. punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
  619. pattern = r'''
  620. (?P<pre> [\s\[{(]|[%s] )?
  621. " # start
  622. (?P<atts> %s )
  623. (?P<text> [^"]+? )
  624. \s?
  625. (?: \(([^)]+?)\)(?=") )? # $title
  626. ":
  627. (?P<url> (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|] )
  628. (?P<post> [^\w\/;]*? )
  629. (?=<|\s|$)
  630. ''' % (re.escape(punct), self.c)
  631. text = re.compile(pattern, re.X).sub(self.fLink, text)
  632. return text
  633. def fLink(self, match):
  634. pre, atts, text, title, url, post = match.groups()
  635. if pre == None:
  636. pre = ''
  637. # assume ) at the end of the url is not actually part of the url
  638. # unless the url also contains a (
  639. if url.endswith(')') and not url.find('(') > -1:
  640. post = url[-1] + post
  641. url = url[:-1]
  642. url = self.checkRefs(url)
  643. atts = self.pba(atts)
  644. if title:
  645. atts = atts + ' title="%s"' % self.encode_html(title)
  646. if not self.noimage:
  647. text = self.image(text)
  648. text = self.span(text)
  649. text = self.glyphs(text)
  650. url = self.relURL(url)
  651. out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts, self.rel, text)
  652. out = self.shelve(out)
  653. return ''.join([pre, out, post])
  654. def span(self, text):
  655. """
  656. >>> t = Textile()
  657. >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
  658. 'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
  659. """
  660. qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
  661. pnct = ".,\"'?!;:"
  662. for qtag in qtags:
  663. pattern = re.compile(r"""
  664. (?:^|(?<=[\s>%(pnct)s])|([\]}]))
  665. (%(qtag)s)(?!%(qtag)s)
  666. (%(c)s)
  667. (?::(\S+))?
  668. ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
  669. ([%(pnct)s]*)
  670. %(qtag)s
  671. (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
  672. """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
  673. 'selfpnct':self.pnct}, re.X)
  674. text = pattern.sub(self.fSpan, text)
  675. return text
  676. def fSpan(self, match):
  677. _, tag, atts, cite, content, end, _ = match.groups()
  678. qtags = {
  679. '*': 'strong',
  680. '**': 'b',
  681. '??': 'cite',
  682. '_' : 'em',
  683. '__': 'i',
  684. '-' : 'del',
  685. '%' : 'span',
  686. '+' : 'ins',
  687. '~' : 'sub',
  688. '^' : 'sup'
  689. }
  690. tag = qtags[tag]
  691. atts = self.pba(atts)
  692. if cite:
  693. atts = atts + 'cite="%s"' % cite
  694. content = self.span(content)
  695. out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
  696. return out
  697. def image(self, text):
  698. """
  699. >>> t = Textile()
  700. >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
  701. '<a href="http://jsamsa.com"><img src="/imgs/myphoto.jpg" alt="" /></a>'
  702. """
  703. pattern = re.compile(r"""
  704. (?:[\[{])? # pre
  705. \! # opening !
  706. (%s) # optional style,class atts
  707. (?:\. )? # optional dot-space
  708. ([^\s(!]+) # presume this is the src
  709. \s? # optional space
  710. (?:\(([^\)]+)\))? # optional title
  711. \! # closing
  712. (?::(\S+))? # optional href
  713. (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
  714. """ % self.c, re.U|re.X)
  715. return pattern.sub(self.fImage, text)
  716. def fImage(self, match):
  717. # (None, '', '/imgs/myphoto.jpg', None, None)
  718. atts, url, title, href = match.groups()
  719. atts = self.pba(atts)
  720. if title:
  721. atts = atts + ' title="%s" alt="%s"' % (title, title)
  722. else:
  723. atts = atts + ' alt=""'
  724. if not self.isRelURL(url) and self.get_sizes:
  725. size = getimagesize(url)
  726. if (size):
  727. atts += " %s" % size
  728. if href:
  729. href = self.checkRefs(href)
  730. url = self.checkRefs(url)
  731. url = self.relURL(url)
  732. out = []
  733. if href:
  734. out.append('<a href="%s" class="img">' % href)
  735. if self.html_type == 'html':
  736. out.append('<img src="%s"%s>' % (url, atts))
  737. else:
  738. out.append('<img src="%s"%s />' % (url, atts))
  739. if href:
  740. out.append('</a>')
  741. return ''.join(out)
  742. def code(self, text):
  743. text = self.doSpecial(text, '<code>', '</code>', self.fCode)
  744. text = self.doSpecial(text, '@', '@', self.fCode)
  745. text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
  746. return text
  747. def fCode(self, match):
  748. before, text, after = match.groups()
  749. if after == None:
  750. after = ''
  751. # text needs to be escaped
  752. if not self.restricted:
  753. text = self.encode_html(text)
  754. return ''.join([before, self.shelve('<code>%s</code>' % text), after])
  755. def fPre(self, match):
  756. before, text, after = match.groups()
  757. if after == None:
  758. after = ''
  759. # text needs to be escapedd
  760. if not self.restricted:
  761. text = self.encode_html(text)
  762. return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
  763. def doSpecial(self, text, start, end, method=None):
  764. if method == None:
  765. method = self.fSpecial
  766. pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S)
  767. return pattern.sub(method, text)
  768. def fSpecial(self, match):
  769. """
  770. special blocks like notextile or code
  771. """
  772. before, text, after = match.groups()
  773. if after == None:
  774. after = ''
  775. return ''.join([before, self.shelve(self.encode_html(text)), after])
  776. def noTextile(self, text):
  777. text = self.doSpecial(text, '<notextile>', '</notextile>', self.fTextile)
  778. return self.doSpecial(text, '==', '==', self.fTextile)
  779. def fTextile(self, match):
  780. before, notextile, after = match.groups()
  781. if after == None:
  782. after = ''
  783. return ''.join([before, self.shelve(notextile), after])
  784. def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None):
  785. """
  786. this function takes additional parameters:
  787. head_offset - offset to apply to heading levels (default: 0)
  788. html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
  789. """
  790. return Textile().textile(text, head_offset=head_offset,
  791. html_type=html_type)
  792. def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
  793. """
  794. Restricted version of Textile designed for weblog comments and other
  795. untrusted input.
  796. Raw HTML is escaped.
  797. Style attributes are disabled.
  798. rel='nofollow' is added to external links.
  799. When lite=True is set (the default):
  800. Block tags are restricted to p, bq, and bc.
  801. Lists and tables are disabled.
  802. When noimage=True is set (the default):
  803. Image tags are disabled.
  804. """
  805. return Textile(restricted=True, lite=lite,
  806. noimage=noimage).textile(text, rel='nofollow',
  807. html_type=html_type)