PageRenderTime 56ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/contrib/textile/functions.py

https://bitbucket.org/mulonemartin/powerpack/
Python | 1003 lines | 942 code | 44 blank | 17 comment | 41 complexity | 79bde3c4a57a18eea387244b014c49d0 MD5 | raw file
Possible License(s): BSD-2-Clause, WTFPL
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. __copyright__ = """
  4. Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
  5. Copyright (c) 2010, Kurt Raschke <kurt@kurtraschke.com>
  6. Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
  7. Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
  8. Original PHP Version:
  9. Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
  10. All rights reserved.
  11. Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
  12. Textile's procedural code into a class framework
  13. Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
  14. """
  15. import re
  16. import uuid
  17. import string
  18. from urlparse import urlparse
  19. from tools import sanitizer, imagesize
  20. def _normalize_newlines(string):
  21. out = string.strip()
  22. out = re.sub(r'\r\n', '\n', out)
  23. out = re.sub(r'\n{3,}', '\n\n', out)
  24. out = re.sub(r'\n\s*\n', '\n\n', out)
  25. out = re.sub(r'"$', '" ', out)
  26. return out
  27. class Textile(object):
  28. horizontal_align_re = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
  29. vertical_align_re = r'[\-^~]'
  30. class_re = r'(?:\([^)]+\))'
  31. language_re = r'(?:\[[^\]]+\])'
  32. style_re = r'(?:\{[^}]+\})'
  33. colspan_re = r'(?:\\\d+)'
  34. rowspan_re = r'(?:\/\d+)'
  35. align_re = r'(?:%s|%s)*' % (horizontal_align_re, vertical_align_re)
  36. table_span_re = r'(?:%s|%s)*' % (colspan_re, rowspan_re)
  37. c = r'(?:%s)*' % '|'.join([class_re, style_re,
  38. language_re, horizontal_align_re])
  39. pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
  40. urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
  41. url_schemes = ('http', 'https', 'ftp', 'mailto')
  42. btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
  43. btag_lite = ('bq', 'bc', 'p')
  44. iAlign = {'<': 'float: left;',
  45. '>': 'float: right;',
  46. '=': 'display: block; margin: 0 auto;'}
  47. vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
  48. hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
  49. glyph_defaults = (
  50. ('txt_quote_single_open', '&#8216;'),
  51. ('txt_quote_single_close', '&#8217;'),
  52. ('txt_quote_double_open', '&#8220;'),
  53. ('txt_quote_double_close', '&#8221;'),
  54. ('txt_apostrophe', '&#8217;'),
  55. ('txt_prime', '&#8242;'),
  56. ('txt_prime_double', '&#8243;'),
  57. ('txt_ellipsis', '&#8230;'),
  58. ('txt_emdash', '&#8212;'),
  59. ('txt_endash', '&#8211;'),
  60. ('txt_dimension', '&#215;'),
  61. ('txt_trademark', '&#8482;'),
  62. ('txt_registered', '&#174;'),
  63. ('txt_copyright', '&#169;'),
  64. )
  65. def __init__(self, restricted=False, lite=False, noimage=False,
  66. auto_link=False, get_sizes=False):
  67. """docstring for __init__"""
  68. self.restricted = restricted
  69. self.lite = lite
  70. self.noimage = noimage
  71. self.get_sizes = get_sizes
  72. self.auto_link = auto_link
  73. self.fn = {}
  74. self.urlrefs = {}
  75. self.shelf = {}
  76. self.rel = ''
  77. self.html_type = 'xhtml'
  78. def textile(self, text, rel=None, head_offset=0, html_type='xhtml',
  79. sanitize=False):
  80. """
  81. >>> import textile
  82. >>> textile.textile('some textile')
  83. '\\t<p>some textile</p>'
  84. """
  85. self.html_type = html_type
  86. # text = unicode(text)
  87. text = _normalize_newlines(text)
  88. if self.restricted:
  89. text = self.encode_html(text, quotes=False)
  90. if rel:
  91. self.rel = ' rel="%s"' % rel
  92. text = self.getRefs(text)
  93. text = self.block(text, int(head_offset))
  94. text = self.retrieve(text)
  95. if sanitize:
  96. text = sanitizer.sanitize(text, self.html_type)
  97. return text
  98. def pba(self, block_attributes, element=None):
  99. """
  100. Parse block attributes.
  101. >>> t = Textile()
  102. >>> t.pba(r'\3')
  103. ''
  104. >>> t.pba(r'\\3', element='td')
  105. ' colspan="3"'
  106. >>> t.pba(r'/4', element='td')
  107. ' rowspan="4"'
  108. >>> t.pba(r'\\3/4', element='td')
  109. ' colspan="3" rowspan="4"'
  110. >>> t.pba('^', element='td')
  111. ' style="vertical-align:top;"'
  112. >>> t.pba('{line-height:18px}')
  113. ' style="line-height:18px;"'
  114. >>> t.pba('(foo-bar)')
  115. ' class="foo-bar"'
  116. >>> t.pba('(#myid)')
  117. ' id="myid"'
  118. >>> t.pba('(foo-bar#myid)')
  119. ' class="foo-bar" id="myid"'
  120. >>> t.pba('((((')
  121. ' style="padding-left:4em;"'
  122. >>> t.pba(')))')
  123. ' style="padding-right:3em;"'
  124. >>> t.pba('[fr]')
  125. ' lang="fr"'
  126. >>> rt = Textile()
  127. >>> rt.restricted = True
  128. >>> rt.pba('[en]')
  129. ' lang="en"'
  130. >>> rt.pba('(#id)')
  131. ''
  132. """
  133. style = []
  134. aclass = ''
  135. lang = ''
  136. colspan = ''
  137. rowspan = ''
  138. block_id = ''
  139. if not block_attributes:
  140. return ''
  141. matched = block_attributes
  142. if element == 'td':
  143. m = re.search(r'\\(\d+)', matched)
  144. if m:
  145. colspan = m.group(1)
  146. m = re.search(r'/(\d+)', matched)
  147. if m:
  148. rowspan = m.group(1)
  149. if element == 'td' or element == 'tr':
  150. m = re.search(r'(%s)' % self.vertical_align_re, matched)
  151. if m:
  152. style.append("vertical-align:%s;" % self.vAlign[m.group(1)])
  153. m = re.search(r'\{([^}]*)\}', matched)
  154. if m:
  155. style.append(m.group(1).rstrip(';') + ';')
  156. matched = matched.replace(m.group(0), '')
  157. m = re.search(r'\[([^\]]+)\]', matched, re.U)
  158. if m:
  159. lang = m.group(1)
  160. matched = matched.replace(m.group(0), '')
  161. m = re.search(r'\(([^()]+)\)', matched, re.U)
  162. if m:
  163. aclass = m.group(1)
  164. matched = matched.replace(m.group(0), '')
  165. m = re.search(r'([(]+)', matched)
  166. if m:
  167. style.append("padding-left:%sem;" % len(m.group(1)))
  168. matched = matched.replace(m.group(0), '')
  169. m = re.search(r'([)]+)', matched)
  170. if m:
  171. style.append("padding-right:%sem;" % len(m.group(1)))
  172. matched = matched.replace(m.group(0), '')
  173. m = re.search(r'(%s)' % self.horizontal_align_re, matched)
  174. if m:
  175. style.append("text-align:%s;" % self.hAlign[m.group(1)])
  176. m = re.search(r'^(.*)#(.*)$', aclass)
  177. if m:
  178. block_id = m.group(2)
  179. aclass = m.group(1)
  180. if self.restricted:
  181. if lang:
  182. return ' lang="%s"' % lang
  183. else:
  184. return ''
  185. result = []
  186. if style:
  187. result.append(' style="%s"' % "".join(style))
  188. if aclass:
  189. result.append(' class="%s"' % aclass)
  190. if lang:
  191. result.append(' lang="%s"' % lang)
  192. if block_id:
  193. result.append(' id="%s"' % block_id)
  194. if colspan:
  195. result.append(' colspan="%s"' % colspan)
  196. if rowspan:
  197. result.append(' rowspan="%s"' % rowspan)
  198. return ''.join(result)
  199. def hasRawText(self, text):
  200. """
  201. checks whether the text has text not already enclosed by a block tag
  202. >>> t = Textile()
  203. >>> t.hasRawText('<p>foo bar biz baz</p>')
  204. False
  205. >>> t.hasRawText(' why yes, yes it does')
  206. True
  207. """
  208. r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>',
  209. re.S).sub('', text.strip()).strip()
  210. r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
  211. return '' != r
  212. def table(self, text):
  213. r"""
  214. >>> t = Textile()
  215. >>> t.table('(rowclass). |one|two|three|\n|a|b|c|')
  216. '\t<table>\n\t\t<tr class="rowclass">\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
  217. """
  218. text = text + "\n\n"
  219. pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n'
  220. % {'s': self.table_span_re,
  221. 'a': self.align_re,
  222. 'c': self.c},
  223. re.S | re.M | re.U)
  224. return pattern.sub(self.fTable, text)
  225. def fTable(self, match):
  226. tatts = self.pba(match.group(1), 'table')
  227. rows = []
  228. for row in [x for x in match.group(2).split('\n') if x]:
  229. rmtch = re.search(r'^(%s%s\. )(.*)'
  230. % (self.align_re, self.c), row.lstrip())
  231. if rmtch:
  232. ratts = self.pba(rmtch.group(1), 'tr')
  233. row = rmtch.group(2)
  234. else:
  235. ratts = ''
  236. cells = []
  237. for cell in row.split('|')[1:-1]:
  238. ctyp = 'd'
  239. if re.search(r'^_', cell):
  240. ctyp = "h"
  241. cmtch = re.search(r'^(_?%s%s%s\. )(.*)'
  242. % (self.table_span_re,
  243. self.align_re,
  244. self.c),
  245. cell)
  246. if cmtch:
  247. catts = self.pba(cmtch.group(1), 'td')
  248. cell = cmtch.group(2)
  249. else:
  250. catts = ''
  251. cell = self.graf(self.span(cell))
  252. cells.append('\t\t\t<t%s%s>%s</t%s>'
  253. % (ctyp, catts, cell, ctyp))
  254. rows.append("\t\t<tr%s>\n%s\n\t\t</tr>"
  255. % (ratts, '\n'.join(cells)))
  256. cells = []
  257. catts = None
  258. return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows))
  259. def lists(self, text):
  260. """
  261. >>> t = Textile()
  262. >>> t.lists("* one\\n* two\\n* three")
  263. '\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>'
  264. """
  265. #Replace line-initial bullets with asterisks
  266. bullet_pattern = re.compile(u'^•', re.U | re.M)
  267. pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])'
  268. % self.c, re.U | re.M | re.S)
  269. return pattern.sub(self.fList, bullet_pattern.sub('*', text))
  270. def fList(self, match):
  271. text = match.group(0).split("\n")
  272. result = []
  273. lists = []
  274. for i, line in enumerate(text):
  275. try:
  276. nextline = text[i + 1]
  277. except IndexError:
  278. nextline = ''
  279. m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.align_re,
  280. self.c), line, re.S)
  281. if m:
  282. tl, atts, content = m.groups()
  283. nl = ''
  284. nm = re.search(r'^([#*]+)\s.*', nextline)
  285. if nm:
  286. nl = nm.group(1)
  287. if tl not in lists:
  288. lists.append(tl)
  289. atts = self.pba(atts)
  290. line = "\t<%sl%s>\n\t\t<li>%s" % (self.listType(tl),
  291. atts, self.graf(content))
  292. else:
  293. line = "\t\t<li>" + self.graf(content)
  294. if len(nl) <= len(tl):
  295. line = line + "</li>"
  296. for k in reversed(lists):
  297. if len(k) > len(nl):
  298. line = line + "\n\t</%sl>" % self.listType(k)
  299. if len(k) > 1:
  300. line = line + "</li>"
  301. lists.remove(k)
  302. result.append(line)
  303. return "\n".join(result)
  304. def listType(self, list_string):
  305. if re.search(r'^#+', list_string):
  306. return 'o'
  307. else:
  308. return 'u'
  309. def doPBr(self, in_):
  310. return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr,
  311. in_)
  312. def doBr(self, match):
  313. if self.html_type == 'html':
  314. content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>',
  315. match.group(3))
  316. else:
  317. content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />',
  318. match.group(3))
  319. return '<%s%s>%s%s' % (match.group(1), match.group(2),
  320. content, match.group(4))
  321. def block(self, text, head_offset=0):
  322. """
  323. >>> t = Textile()
  324. >>> t.block('h1. foobar baby')
  325. '\\t<h1>foobar baby</h1>'
  326. """
  327. if not self.lite:
  328. tre = '|'.join(self.btag)
  329. else:
  330. tre = '|'.join(self.btag_lite)
  331. text = text.split('\n\n')
  332. tag = 'p'
  333. atts = cite = graf = ext = ''
  334. c1 = ''
  335. out = []
  336. anon = False
  337. for line in text:
  338. pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre,
  339. self.align_re,
  340. self.c)
  341. match = re.search(pattern, line, re.S)
  342. if match:
  343. if ext:
  344. out.append(out.pop() + c1)
  345. tag, atts, ext, cite, graf = match.groups()
  346. h_match = re.search(r'h([1-6])', tag)
  347. if h_match:
  348. head_level, = h_match.groups()
  349. tag = 'h%i' % max(1,
  350. min(int(head_level) + head_offset,
  351. 6))
  352. o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
  353. cite, graf)
  354. # leave off c1 if this block is extended,
  355. # we'll close it at the start of the next block
  356. if ext:
  357. line = "%s%s%s%s" % (o1, o2, content, c2)
  358. else:
  359. line = "%s%s%s%s%s" % (o1, o2, content, c2, c1)
  360. else:
  361. anon = True
  362. if ext or not re.search(r'^\s', line):
  363. o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
  364. cite, line)
  365. # skip $o1/$c1 because this is part of a continuing
  366. # extended block
  367. if tag == 'p' and not self.hasRawText(content):
  368. line = content
  369. else:
  370. line = "%s%s%s" % (o2, content, c2)
  371. else:
  372. line = self.graf(line)
  373. line = self.doPBr(line)
  374. if self.html_type == 'xhtml':
  375. line = re.sub(r'<br>', '<br />', line)
  376. if ext and anon:
  377. out.append(out.pop() + "\n" + line)
  378. else:
  379. out.append(line)
  380. if not ext:
  381. tag = 'p'
  382. atts = ''
  383. cite = ''
  384. graf = ''
  385. if ext:
  386. out.append(out.pop() + c1)
  387. return '\n\n'.join(out)
  388. def fBlock(self, tag, atts, ext, cite, content):
  389. """
  390. >>> t = Textile()
  391. >>> t.fBlock("bq", "", None, "", "Hello BlockQuote")
  392. ('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
  393. >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote")
  394. ('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
  395. >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS
  396. ('<pre>', '<code>', ..., '</code>', '</pre>')
  397. >>> t.fBlock("h1", "", None, "", "foobar")
  398. ('', '\\t<h1>', 'foobar', '</h1>', '')
  399. """
  400. atts = self.pba(atts)
  401. o1 = o2 = c2 = c1 = ''
  402. m = re.search(r'fn(\d+)', tag)
  403. if m:
  404. tag = 'p'
  405. if m.group(1) in self.fn:
  406. fnid = self.fn[m.group(1)]
  407. else:
  408. fnid = m.group(1)
  409. atts = atts + ' id="fn%s"' % fnid
  410. if atts.find('class=') < 0:
  411. atts = atts + ' class="footnote"'
  412. content = ('<sup>%s</sup>' % m.group(1)) + content
  413. if tag == 'bq':
  414. cite = self.checkRefs(cite)
  415. if cite:
  416. cite = ' cite="%s"' % cite
  417. else:
  418. cite = ''
  419. o1 = "\t<blockquote%s%s>\n" % (cite, atts)
  420. o2 = "\t\t<p%s>" % atts
  421. c2 = "</p>"
  422. c1 = "\n\t</blockquote>"
  423. elif tag == 'bc':
  424. o1 = "<pre%s>" % atts
  425. o2 = "<code%s>" % atts
  426. c2 = "</code>"
  427. c1 = "</pre>"
  428. content = self.shelve(self.encode_html(content.rstrip("\n") +
  429. "\n"))
  430. elif tag == 'notextile':
  431. content = self.shelve(content)
  432. o1 = o2 = ''
  433. c1 = c2 = ''
  434. elif tag == 'pre':
  435. content = self.shelve(self.encode_html(content.rstrip("\n") +
  436. "\n"))
  437. o1 = "<pre%s>" % atts
  438. o2 = c2 = ''
  439. c1 = '</pre>'
  440. else:
  441. o2 = "\t<%s%s>" % (tag, atts)
  442. c2 = "</%s>" % tag
  443. content = self.graf(content)
  444. return o1, o2, content, c2, c1
  445. def footnoteRef(self, text):
  446. """
  447. >>> t = Textile()
  448. >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS
  449. 'foo<sup class="footnote"><a href="#fn...">1</a></sup> '
  450. """
  451. return re.compile(r'\b\[([0-9]+)\](\s)?', re.U).sub(self.footnoteID,
  452. text)
  453. def footnoteID(self, match):
  454. footnoteNum, text = match.groups()
  455. if footnoteNum not in self.fn:
  456. self.fn[footnoteNum] = str(uuid.uuid4())
  457. footnoteID = self.fn[footnoteNum]
  458. if not text:
  459. text = ''
  460. return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (
  461. footnoteID, footnoteNum, text)
  462. def glyphs(self, text):
  463. """
  464. >>> t = Textile()
  465. >>> t.glyphs("apostrophe's")
  466. 'apostrophe&#8217;s'
  467. >>> t.glyphs("back in '88")
  468. 'back in &#8217;88'
  469. >>> t.glyphs('foo ...')
  470. 'foo &#8230;'
  471. >>> t.glyphs('--')
  472. '&#8212;'
  473. >>> t.glyphs('FooBar[tm]')
  474. 'FooBar&#8482;'
  475. >>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
  476. '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
  477. """
  478. # fix: hackish
  479. text = re.sub(r'"\Z', '\" ', text)
  480. glyph_search = (
  481. # apostrophe's
  482. re.compile(r"(\w)\'(\w)"),
  483. # back in '88
  484. re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),
  485. # single closing
  486. re.compile(r'(\S)\'(?=\s|' + self.pnct + '|<|$)'),
  487. # single opening
  488. re.compile(r'\'/'),
  489. # double closing
  490. re.compile(r'(\S)\"(?=\s|' + self.pnct + '|<|$)'),
  491. # double opening
  492. re.compile(r'"'),
  493. # 3+ uppercase acronym
  494. re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),
  495. # 3+ uppercase
  496. re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),
  497. # ellipsis
  498. re.compile(r'\b(\s{0,1})?\.{3}'),
  499. # em dash
  500. re.compile(r'(\s?)--(\s?)'),
  501. # en dash
  502. re.compile(r'\s-(?:\s|$)'),
  503. # dimension sign
  504. re.compile(r'(\d+)( ?)x( ?)(?=\d+)'),
  505. # trademark
  506. re.compile(r'\b ?[([]TM[])]', re.I),
  507. # registered
  508. re.compile(r'\b ?[([]R[])]', re.I),
  509. # copyright
  510. re.compile(r'\b ?[([]C[])]', re.I),
  511. )
  512. glyph_replace = [x % dict(self.glyph_defaults) for x in (
  513. r'\1%(txt_apostrophe)s\2', # apostrophe's
  514. r'\1%(txt_apostrophe)s\2', # back in '88
  515. r'\1%(txt_quote_single_close)s', # single closing
  516. r'%(txt_quote_single_open)s', # single opening
  517. r'\1%(txt_quote_double_close)s', # double closing
  518. r'%(txt_quote_double_open)s', # double opening
  519. r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
  520. r'<span class="caps">\1</span>', # 3+ uppercase
  521. r'\1%(txt_ellipsis)s', # ellipsis
  522. r'\1%(txt_emdash)s\2', # em dash
  523. r' %(txt_endash)s ', # en dash
  524. r'\1\2%(txt_dimension)s\3', # dimension sign
  525. r'%(txt_trademark)s', # trademark
  526. r'%(txt_registered)s', # registered
  527. r'%(txt_copyright)s', # copyright
  528. )]
  529. result = []
  530. for line in re.compile(r'(<.*?>)', re.U).split(text):
  531. if not re.search(r'<.*>', line):
  532. for s, r in zip(glyph_search, glyph_replace):
  533. line = s.sub(r, line)
  534. result.append(line)
  535. return ''.join(result)
  536. def getRefs(self, text):
  537. """
  538. Capture and store URL references in self.urlrefs.
  539. >>> t = Textile()
  540. >>> t.getRefs("some text [Google]http://www.google.com")
  541. 'some text '
  542. >>> t.urlrefs
  543. {'Google': 'http://www.google.com'}
  544. """
  545. pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U)
  546. text = pattern.sub(self.refs, text)
  547. return text
  548. def refs(self, match):
  549. flag, url = match.groups()
  550. self.urlrefs[flag] = url
  551. return ''
  552. def checkRefs(self, url):
  553. return self.urlrefs.get(url, url)
  554. def isRelURL(self, url):
  555. """
  556. Identify relative urls.
  557. >>> t = Textile()
  558. >>> t.isRelURL("http://www.google.com/")
  559. False
  560. >>> t.isRelURL("/foo")
  561. True
  562. """
  563. (scheme, netloc) = urlparse(url)[0:2]
  564. return not scheme and not netloc
  565. def relURL(self, url):
  566. """
  567. >>> t = Textile()
  568. >>> t.relURL("http://www.google.com/")
  569. 'http://www.google.com/'
  570. >>> t.restricted = True
  571. >>> t.relURL("gopher://gopher.com/")
  572. '#'
  573. """
  574. scheme = urlparse(url)[0]
  575. if self.restricted and scheme and scheme not in self.url_schemes:
  576. return '#'
  577. return url
  578. def shelve(self, text):
  579. itemID = str(uuid.uuid4())
  580. self.shelf[itemID] = text
  581. return itemID
  582. def retrieve(self, text):
  583. """
  584. >>> t = Textile()
  585. >>> id = t.shelve("foobar")
  586. >>> t.retrieve(id)
  587. 'foobar'
  588. """
  589. while True:
  590. old = text
  591. for k, v in self.shelf.items():
  592. text = text.replace(k, v)
  593. if text == old:
  594. break
  595. return text
  596. def encode_html(self, text, quotes=True):
  597. a = (
  598. ('&', '&#38;'),
  599. ('<', '&#60;'),
  600. ('>', '&#62;'))
  601. if quotes:
  602. a = a + (("'", '&#39;'),
  603. ('"', '&#34;'))
  604. for k, v in a:
  605. text = text.replace(k, v)
  606. return text
  607. def graf(self, text):
  608. if not self.lite:
  609. text = self.noTextile(text)
  610. text = self.code(text)
  611. if self.auto_link:
  612. text = self.autoLink(text)
  613. text = self.links(text)
  614. if not self.noimage:
  615. text = self.image(text)
  616. text = self.lists(text)
  617. if not self.lite:
  618. text = self.table(text)
  619. text = self.span(text)
  620. text = self.footnoteRef(text)
  621. text = self.glyphs(text)
  622. return text.rstrip('\n')
  623. def autoLink(self, text):
  624. """
  625. >>> t = Textile()
  626. >>> t.autoLink("http://www.ya.ru")
  627. '"http://www.ya.ru":http://www.ya.ru'
  628. """
  629. pattern = re.compile(r"""\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»]))""", re.U | re.I)
  630. return pattern.sub(r'"\1":\1', text)
  631. def links(self, text):
  632. """
  633. >>> t = Textile()
  634. >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS
  635. 'fooobar ... and hello world ...'
  636. """
  637. punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
  638. pattern = r'''
  639. (?P<pre>[\s\[{(]|[%s])? #leading text
  640. " #opening quote
  641. (?P<atts>%s) #block attributes
  642. (?P<text>[^"]+?) #link text
  643. \s?
  644. (?:\((?P<title>[^)]+?)\)(?="))? #optional title
  645. ": #closing quote, colon
  646. (?P<url>(?:ftp|https?)? #URL
  647. (?: :// )?
  648. [-A-Za-z0-9+&@#/?=~_()|!:,.;%%]*
  649. [-A-Za-z0-9+&@#/=~_()|]
  650. )
  651. (?P<post>[^\w\/;]*?) #trailing text
  652. (?=<|\s|$)
  653. ''' % (re.escape(punct), self.c)
  654. text = re.compile(pattern, re.X).sub(self.fLink, text)
  655. return text
  656. def fLink(self, match):
  657. pre, atts, text, title, url, post = match.groups()
  658. if pre == None:
  659. pre = ''
  660. # assume ) at the end of the url is not actually part of the url
  661. # unless the url also contains a (
  662. if url.endswith(')') and not url.find('(') > -1:
  663. post = url[-1] + post
  664. url = url[:-1]
  665. url = self.checkRefs(url)
  666. atts = self.pba(atts)
  667. if title:
  668. atts = atts + ' title="%s"' % self.encode_html(title)
  669. if not self.noimage:
  670. text = self.image(text)
  671. text = self.span(text)
  672. text = self.glyphs(text)
  673. url = self.relURL(url)
  674. out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url),
  675. atts, self.rel, text)
  676. out = self.shelve(out)
  677. return ''.join([pre, out, post])
  678. def span(self, text):
  679. """
  680. >>> t = Textile()
  681. >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
  682. 'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
  683. """
  684. qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
  685. r'_', r'%', r'\+', r'~', r'\^')
  686. pnct = ".,\"'?!;:("
  687. for qtag in qtags:
  688. pattern = re.compile(r"""
  689. (?:^|(?<=[\s>%(pnct)s])|([\[{]))
  690. (%(qtag)s)(?!%(qtag)s)
  691. (%(c)s)
  692. (?::\(([^)]+?)\))?
  693. ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
  694. ([%(pnct)s]*)
  695. %(qtag)s
  696. (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
  697. """ % {'qtag': qtag, 'c': self.c, 'pnct': pnct,
  698. 'selfpnct': self.pnct}, re.X)
  699. text = pattern.sub(self.fSpan, text)
  700. return text
  701. def fSpan(self, match):
  702. _, tag, atts, cite, content, end, _ = match.groups()
  703. qtags = {'*': 'strong',
  704. '**': 'b',
  705. '??': 'cite',
  706. '_': 'em',
  707. '__': 'i',
  708. '-': 'del',
  709. '%': 'span',
  710. '+': 'ins',
  711. '~': 'sub',
  712. '^': 'sup'}
  713. tag = qtags[tag]
  714. atts = self.pba(atts)
  715. if cite:
  716. atts = atts + ' cite="%s"' % cite
  717. content = self.span(content)
  718. out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
  719. return out
  720. def image(self, text):
  721. """
  722. >>> t = Textile()
  723. >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
  724. '<a href="http://jsamsa.com" class="img"><img src="/imgs/myphoto.jpg" alt="" /></a>'
  725. >>> t.image('!</imgs/myphoto.jpg!')
  726. '<img src="/imgs/myphoto.jpg" style="float: left;" alt="" />'
  727. """
  728. pattern = re.compile(r"""
  729. (?:[\[{])? # pre
  730. \! # opening !
  731. (\<|\=|\>)? # optional alignment atts
  732. (%s) # optional style,class atts
  733. (?:\. )? # optional dot-space
  734. ([^\s(!]+) # presume this is the src
  735. \s? # optional space
  736. (?:\(([^\)]+)\))? # optional title
  737. \! # closing
  738. (?::(\S+))? # optional href
  739. (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
  740. """ % self.c, re.U | re.X)
  741. return pattern.sub(self.fImage, text)
  742. def fImage(self, match):
  743. # (None, '', '/imgs/myphoto.jpg', None, None)
  744. align, atts, url, title, href = match.groups()
  745. atts = self.pba(atts)
  746. if align:
  747. atts = atts + ' style="%s"' % self.iAlign[align]
  748. if title:
  749. atts = atts + ' title="%s" alt="%s"' % (title, title)
  750. else:
  751. atts = atts + ' alt=""'
  752. if not self.isRelURL(url) and self.get_sizes:
  753. size = imagesize.getimagesize(url)
  754. if size:
  755. atts += " %s" % size
  756. if href:
  757. href = self.checkRefs(href)
  758. url = self.checkRefs(url)
  759. url = self.relURL(url)
  760. out = []
  761. if href:
  762. out.append('<a href="%s" class="img">' % href)
  763. if self.html_type == 'html':
  764. out.append('<img src="%s"%s>' % (url, atts))
  765. else:
  766. out.append('<img src="%s"%s />' % (url, atts))
  767. if href:
  768. out.append('</a>')
  769. return ''.join(out)
  770. def code(self, text):
  771. text = self.doSpecial(text, '<code>', '</code>', self.fCode)
  772. text = self.doSpecial(text, '@', '@', self.fCode)
  773. text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
  774. return text
  775. def fCode(self, match):
  776. before, text, after = match.groups()
  777. if after == None:
  778. after = ''
  779. # text needs to be escaped
  780. if not self.restricted:
  781. text = self.encode_html(text, quotes=False)
  782. return ''.join([before, self.shelve('<code>%s</code>' % text), after])
  783. def fPre(self, match):
  784. before, text, after = match.groups()
  785. if after == None:
  786. after = ''
  787. # text needs to be escaped
  788. if not self.restricted:
  789. text = self.encode_html(text)
  790. return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
  791. def doSpecial(self, text, start, end, method):
  792. pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?'
  793. % (re.escape(start), re.escape(end)), re.M | re.S)
  794. return pattern.sub(method, text)
  795. def noTextile(self, text):
  796. text = self.doSpecial(text, '<notextile>', '</notextile>',
  797. self.fTextile)
  798. return self.doSpecial(text, '==', '==', self.fTextile)
  799. def fTextile(self, match):
  800. before, notextile, after = match.groups()
  801. if after == None:
  802. after = ''
  803. return ''.join([before, self.shelve(notextile), after])
  804. def textile(text, head_offset=0, html_type='xhtml', auto_link=False,
  805. encoding=None, output=None):
  806. """
  807. Apply Textile to a block of text.
  808. This function takes the following additional parameters:
  809. auto_link - enable automatic linking of URLs (default: False)
  810. head_offset - offset to apply to heading levels (default: 0)
  811. html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
  812. """
  813. return Textile(auto_link=auto_link).textile(text, head_offset=head_offset,
  814. html_type=html_type)
  815. def textile_restricted(text, lite=True, noimage=True, html_type='xhtml',
  816. auto_link=False):
  817. """
  818. Apply Textile to a block of text, with restrictions designed for weblog
  819. comments and other untrusted input. Raw HTML is escaped, style attributes
  820. are disabled, and rel='nofollow' is added to external links.
  821. This function takes the following additional parameters:
  822. auto_link - enable automatic linking of URLs (default: False)
  823. html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
  824. lite - restrict block tags to p, bq, and bc, disable tables (default: True)
  825. noimage - disable image tags (default: True)
  826. """
  827. return Textile(restricted=True, lite=lite,
  828. noimage=noimage, auto_link=auto_link).textile(
  829. text, rel='nofollow', html_type=html_type)