PageRenderTime 52ms CodeModel.GetById 5ms RepoModel.GetById 0ms app.codeStats 0ms

/sahriswiki/creoleparser/elements.py

https://bitbucket.org/prologic/sahriswiki
Python | 1614 lines | 1582 code | 10 blank | 22 comment | 14 complexity | d4b541a361eb509a05323b03e0c921dc MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. # elements.py
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright Š Stephen Day
  5. #
  6. # This module is part of Creoleparser and is released under
  7. # the MIT License: http://www.opensource.org/licenses/mit-license.php
  8. #
  9. import re
  10. import urlparse
  11. import urllib
  12. import keyword
  13. import sys
  14. import genshi.builder as bldr
  15. from genshi.core import Stream, Markup
  16. from core import (escape_char, esc_neg_look, fragmentize, ImplicitList, P3Template)
  17. BLOCK_ONLY_TAGS = ['h1','h2','h3','h4','h5','h6',
  18. 'ul','ol','dl',
  19. 'pre','hr','blockquote','address',
  20. 'p','div','form','fieldset','table',
  21. 'noscript']
  22. BLOCK_TAGS = BLOCK_ONLY_TAGS + ['ins','del','script']
  23. MACRO_NAME = r'(?P<name>[a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)'
  24. """allows any number of non-repeating hyphens or periods.
  25. Underscore is not included because hyphen is"""
  26. # use Genshi's HTMLSanitizer if possible (i.e., not on Google App Engine)
  27. try:
  28. from genshi.filters import HTMLSanitizer
  29. except:
  30. SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
  31. class HTMLSanitizer(object):
  32. def is_safe_uri(self,uri):
  33. if ':' not in uri:
  34. return True # This is a relative URI
  35. chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
  36. return ''.join(chars).lower() in SAFE_SCHEMES
  37. sanitizer = HTMLSanitizer()
  38. __docformat__ = 'restructuredtext en'
  39. class WikiElement(object):
  40. """Baseclass for all wiki elements."""
  41. append_newline = False
  42. """Determines if newlines are appended to Element(s) during processing.
  43. Should only affect readability of source xml.
  44. """
  45. def __init__(self, tag, token, child_elements=None):
  46. """Constructor for WikiElement objects.
  47. Subclasses may have other keyword arguments.
  48. :parameters:
  49. tag
  50. The xhtml tag associated with the element.
  51. token
  52. The character string (or strings) that identifies the element
  53. in wiki markup.
  54. child_elements
  55. A list of wiki_elements that will be searched for in the body of the
  56. element. The order of these elements matters, because if an element is
  57. found before the element that encloses it, the enclosing element will
  58. never be found. In cases where this imposes limits (e.g, ``strong`` and
  59. ``em`` should be allowed to nest each other), place the conflicting
  60. elements in a sublist. The parser will then find which comes first.
  61. """
  62. self.tag = tag
  63. self.token = token
  64. if child_elements is None:
  65. child_elements = []
  66. self.child_elements = child_elements
  67. def _build(self,mo,element_store, environ):
  68. """Returns a genshi Element that has ``self.tag`` as the
  69. outermost tag.
  70. This methods if called exclusively by ``_process``
  71. :parameters:
  72. mo
  73. match object, usually the one returned by
  74. self.regexp.search(s)
  75. """
  76. return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(1),
  77. self.child_elements,
  78. element_store, environ))
  79. def re_string(self):
  80. """The regular expression pattern that is compiled into ``self.regexp``.
  81. The regular expression must consume the entire wiki element,
  82. including the tokens. For block elements, the newline on the last
  83. line must be consumed also. group(1) should normally be the
  84. entire string inside the tokens. If not, a custom ``_build``
  85. method will be needed.
  86. """
  87. pass
  88. def __repr__(self):
  89. return "<"+self.__class__.__name__ + " " + str(self.tag)+">"
  90. def _process(self, mos, text, wiki_elements,element_store, environ):
  91. """Returns genshi Fragments (Elements and text)
  92. This is mainly for block level markup. See InlineElement
  93. for the other method.
  94. """
  95. frags = []
  96. end = 0
  97. for mo in mos:
  98. if end != mo.start():
  99. # call again for leading text and extend the result list
  100. frags.extend(fragmentize(text[end:mo.start()],wiki_elements[1:],
  101. element_store, environ))
  102. # append the found wiki element to the result list
  103. built = self._build(mo,element_store, environ)
  104. if built is not None:
  105. frags.append(built)
  106. # make the source output easier to read
  107. if self.append_newline:
  108. frags.append('\n')
  109. end = mo.end()
  110. # call again for trailing text and extend the result list
  111. if end < len(text):
  112. if not isinstance(wiki_elements[0],(list,tuple)):
  113. wiki_elements = wiki_elements[1:]
  114. frags.extend(fragmentize(text[end:],wiki_elements,
  115. element_store, environ))
  116. return frags
  117. class BlockElement(WikiElement):
  118. """Block elements inherit form this class
  119. Wiki elements wanting ``append_newline = True`` should use this
  120. as the base also.
  121. """
  122. append_newline = True
  123. class InlineElement(WikiElement):
  124. r"""For finding generic inline elements
  125. >>> em = InlineElement('em','//')
  126. >>> mo1 = em.regexp.search('a //word// in a line')
  127. >>> mo2 = em.regexp.search('a //word in a line\n or two\n')
  128. >>> mo1.group(0),mo1.group(1)
  129. ('//word//', 'word')
  130. >>> mo2.group(0),mo2.group(1)
  131. ('//word in a line\n or two', 'word in a line\n or two')
  132. Use a list for the ``token`` argument to have different start
  133. and end strings. These must be closed.
  134. >>> foo = InlineElement('foo',['<<','>>'])
  135. >>> mo = foo.regexp.search('blaa <<here it is >>\n')
  136. >>> mo.group(1)
  137. 'here it is '
  138. """
  139. def __init__(self, tag='', token=''):
  140. super(InlineElement,self).__init__(tag,token)
  141. self.regexp = re.compile(self.re_string(),re.DOTALL)
  142. def re_string(self):
  143. if isinstance(self.token,str):
  144. content = '(.+?)'
  145. end = '(' + esc_neg_look + re.escape(self.token) + r'|$)'
  146. return esc_neg_look + re.escape(self.token) + content + end
  147. else:
  148. content = '(.+?)'
  149. return esc_neg_look + re.escape(self.token[0]) + content + esc_neg_look + re.escape(self.token[1])
  150. def _process(self, mos, text, wiki_elements, element_store, environ):
  151. """Returns genshi Fragments (Elements and text)"""
  152. parts = []
  153. end = 0
  154. for mo in mos:
  155. processed = self._build(mo,element_store, environ)
  156. store_id = str(id(processed))
  157. element_store[store_id] = processed
  158. parts.append(''.join([text[end:mo.start()],'<<<',store_id,'>>>']))
  159. end = mo.end()
  160. # call again for trailing text and extend the result list
  161. if end < len(text):
  162. parts.append(text[end:])
  163. new_text = ''.join(parts)
  164. if not isinstance(wiki_elements[0],(list,tuple)):
  165. wiki_elements = wiki_elements[1:]
  166. frags = fragmentize(new_text,wiki_elements,element_store, environ)
  167. return frags
  168. class SimpleElement(InlineElement):
  169. r"""For finding generic inline elements like ``strong`` and ``em``.
  170. >>> em = SimpleElement({'//':'em'})
  171. >>> mo1 = em.regexp.search('a //word// in a line')
  172. >>> mo2 = em.regexp.search('a //word in a line\n or two\n')
  173. >>> mo1.group(0),mo1.group(2)
  174. ('//word//', 'word')
  175. >>> mo2.group(0),mo2.group(2)
  176. ('//word in a line\n or two', 'word in a line\n or two')
  177. """
  178. def __init__(self, token_dict={}):
  179. self.token_dict = token_dict
  180. self.tokens = token_dict.keys()
  181. super(SimpleElement,self).__init__('','')
  182. self.regexp = re.compile(self.re_string(),re.DOTALL)
  183. def re_string(self):
  184. if isinstance(self.token,basestring):
  185. tokens = '(' + '|'.join([re.escape(token) for token in self.tokens]) + ')'
  186. content = '(.+?)'
  187. end = '(' + esc_neg_look + r'\1|$)'
  188. return esc_neg_look + tokens + content + end
  189. def _build(self,mo,element_store, environ):
  190. return bldr.tag.__getattr__(self.token_dict[mo.group(1)])(fragmentize(mo.group(2),
  191. self.child_elements,
  192. element_store, environ))
  193. class LinkElement(InlineElement):
  194. """Superclass for AnchorLinks and ImageLinks. Parses internal, external,
  195. and interwiki links.
  196. """
  197. def __init__(self,tag, token, delimiter,
  198. interwiki_delimiter,base_urls,links_funcs,default_space_char,space_chars,
  199. base_url,space_char,class_func,path_func):
  200. super(LinkElement,self).__init__(tag,token)
  201. self.regexp = re.compile(self.re_string(),re.DOTALL)
  202. self.delimiter = delimiter
  203. self.interwiki_delimiter = interwiki_delimiter
  204. self.base_urls = base_urls
  205. self.links_funcs = links_funcs
  206. self.default_space_char = default_space_char
  207. self.space_chars = space_chars
  208. self.base_url = base_url
  209. self.space_char = space_char
  210. self.class_func = class_func
  211. self.path_func = path_func
  212. self.content_regexp = re.compile(self.content_re_string(),re.DOTALL)
  213. ## self.arg_regexp = re.compile(self.arg_re_string(),re.DOTALL)
  214. self.interwikilink_regexp = re.compile(self.interwikilink_re_string())
  215. self.urllink_regexp = re.compile(self.urllink_re_string(), re.DOTALL)
  216. self.wikilink_regexp = re.compile(self.wikilink_re_string())
  217. ## def arg_re_string(self):
  218. ## key = r'((?P<key>\w+)\s*\=)?'
  219. ## value = r'(?P<value>.*?)'
  220. ## return r'\s*' + key + r'\s*' + value + r'\s*(?P<delimiter>' + \
  221. ## re.escape(self.delimiter) + r'|$)(?P<tail>.*)'
  222. def content_re_string(self):
  223. return r'(?P<body>.*?)(' + re.escape(self.delimiter) + '(?P<arg_string>.*?))?$'
  224. def interwikilink_re_string(self):
  225. all_wikis = set(self.links_funcs.keys() + self.base_urls.keys())
  226. wiki_id = '(?P<wiki_id>' + '|'.join(all_wikis) + ')'
  227. optional_spaces = ' *'
  228. page_name = r'(?P<page_name>\S+?( \S+?)*)' #allows any number of single spaces
  229. return '^' + optional_spaces + wiki_id + \
  230. re.escape(self.interwiki_delimiter) + ' *' + page_name + \
  231. optional_spaces + '$'#+ alias
  232. def urllink_re_string(self):
  233. protocol = r'^\s*((\w+?:|/)'
  234. rest_of_url = r'[\S\n]*?)\s*$'
  235. return protocol + rest_of_url #+ alias
  236. def wikilink_re_string(self):
  237. optional_spaces = ' *'
  238. page_name = r'(?P<page_name>\S+?( \S+?)*?)' #allows any number of single spaces
  239. return '^' + optional_spaces + page_name + optional_spaces + '$'#+ \
  240. ## def parse_args(self, arg_string):
  241. ## args = []
  242. ## delimiter = True
  243. ## while delimiter:
  244. ## mo = self.arg_regexp.match(arg_string)
  245. ## key, value, delimiter, tail = mo.group('key'),mo.group('value'),mo.group('delimiter'), mo.group('tail')
  246. ## if key:
  247. ## args.append((key, value))
  248. ## else:
  249. ## args.append(value)
  250. ## arg_string = tail
  251. ## positional_args = []
  252. ## kw_args = {}
  253. ## for arg in args:
  254. ## if isinstance(arg,tuple):
  255. ## k, v = arg
  256. ## k = str(k).lower()
  257. ## if k in keyword.kwlist:
  258. ## k = k + '_'
  259. ## if k in kw_args:
  260. ## if isinstance(v,list):
  261. ## try:
  262. ## kw_args[k].extend(v)
  263. ## except AttributeError:
  264. ## v.insert(0,kw_args[k])
  265. ## kw_args[k] = v
  266. ## elif isinstance(kw_args[k],list):
  267. ## kw_args[k].append(v)
  268. ## else:
  269. ## kw_args[k] = [kw_args[k], v]
  270. ## kw_args[k] = ImplicitList(kw_args[k])
  271. ## else:
  272. ## kw_args[k] = v
  273. ## if isinstance(kw_args[k],ImplicitList):
  274. ## kw_args[k] = ','.join(kw_args[k])
  275. ## else:
  276. ## positional_args.append(arg)
  277. ##
  278. ## return (positional_args, kw_args)
  279. def page_name(self,mo):
  280. if 'wiki_id' in mo.groupdict():
  281. space_char = self.space_chars.get(mo.group('wiki_id'),self.default_space_char)
  282. else:
  283. space_char = self.space_char
  284. return mo.group('page_name').replace(' ',space_char)
  285. def _build(self,mo,element_store, environ):
  286. content_mo = self.content_regexp.match(mo.group(1))
  287. body = content_mo.group('body')
  288. arg_string = content_mo.group('arg_string')
  289. the_class = None
  290. page_name = None
  291. if self.interwikilink_regexp.match(body):
  292. interwikilink_mo = self.interwikilink_regexp.match(body)
  293. link_type = 'interwiki'
  294. base_url = self.base_urls.get(interwikilink_mo.group('wiki_id'))
  295. link_func = self.links_funcs.get(interwikilink_mo.group('wiki_id'))
  296. page_name = self.page_name(interwikilink_mo)
  297. if link_func:
  298. url = link_func(page_name)
  299. else:
  300. url = urllib.quote(page_name.encode('utf-8'))
  301. if base_url:
  302. url = urlparse.urljoin(base_url, url)
  303. elif self.urllink_regexp.match(body):
  304. urllink_mo = self.urllink_regexp.match(body)
  305. link_type = 'url'
  306. if sanitizer.is_safe_uri(urllink_mo.group(1)):
  307. url = urllink_mo.group(1)
  308. else:
  309. url = None
  310. elif self.wikilink_regexp.match(body):
  311. wikilink_mo = self.wikilink_regexp.match(body)
  312. link_type = 'wiki'
  313. page_name = self.page_name(wikilink_mo)
  314. if self.path_func:
  315. the_path = self.path_func(self.tag, page_name, environ)
  316. else:
  317. the_path = urllib.quote(page_name.encode('utf-8'))
  318. url = urlparse.urljoin(self.base_url, the_path)
  319. else:
  320. url = None
  321. if not url:
  322. return mo.group(0)
  323. else:
  324. if arg_string is not None:
  325. args, kw_args = [arg_string.strip()], {} #self.parse_args(arg_string)
  326. else:
  327. args, kw_args = [], {}
  328. try:
  329. if self.class_func:
  330. the_class = self.class_func(link_type, url, body, page_name)
  331. return self.emit(element_store, environ,link_type,body,url,the_class, *args, **kw_args)
  332. except TypeError:
  333. return mo.group(0)
  334. class AnchorElement(LinkElement):
  335. """Finds and builds internal, external, and interwiki links.
  336. >>> link = AnchorElement('a',('[[',']]'),'|',
  337. ... interwiki_delimiter=':',
  338. ... base_urls=dict(somewiki='http://somewiki.org/',
  339. ... bigwiki='http://bigwiki.net/'),
  340. ... links_funcs={},default_space_char='-',
  341. ... space_chars={'bigwiki':' '},base_url='http://somewiki.org/',
  342. ... space_char='_',class_func=None,path_func=None)
  343. >>> mo = link.regexp.search("[[http://www.google.com| here]]")
  344. >>> link._build(mo,{},None).generate().render()
  345. '<a href="http://www.google.com">here</a>'
  346. >>> mo = link.regexp.search(" [[somewiki:Home Page|steve]] ")
  347. >>> link._build(mo,{},None).generate().render()
  348. '<a href="http://somewiki.org/Home-Page">steve</a>'
  349. >>> mo = link.regexp.search(" [[bigwiki:Home Page]] ")
  350. >>> link._build(mo,{},None).generate().render()
  351. '<a href="http://bigwiki.net/Home%20Page">bigwiki:Home Page</a>'
  352. >>> mo = link.regexp.search(" [[Home Page |Home]]")
  353. >>> link._build(mo,{},None).generate().render()
  354. '<a href="http://somewiki.org/Home_Page">Home</a>'
  355. """
  356. def __init__(self, *args, **kw_args):
  357. super(AnchorElement,self).__init__(*args, **kw_args)
  358. def emit(self,element_store, environ,link_type,body,url,the_class, alias=None):
  359. if alias:
  360. alias = fragmentize(alias,self.child_elements,element_store, environ)
  361. else:
  362. alias = body.strip()
  363. return bldr.tag.__getattr__(self.tag)(alias,
  364. href=url,
  365. class_=the_class)
  366. class ImageElement(LinkElement):
  367. def __init__(self, *args, **kw_args):
  368. super(ImageElement,self).__init__(*args, **kw_args)
  369. def emit(self,element_store, environ,link_type,body,url,the_class, alt=None):
  370. if alt is None:
  371. if link_type == 'url':
  372. alt = urlparse.urlsplit(url).path.split('/')[-1]
  373. else:
  374. alt = body.strip()
  375. return bldr.tag.__getattr__(self.tag)(src=url ,alt=alt, title=alt,
  376. #class_=the_class
  377. )
  378. class Link(InlineElement):
  379. """Finds and builds links."""
  380. def __init__(self,tag, token):
  381. super(Link,self).__init__(tag,token)
  382. self.regexp = re.compile(self.re_string(),re.DOTALL)
  383. def _build(self,mo,element_store, environ):
  384. for tag in self.child_elements:
  385. m = tag.regexp.search(mo.group(1))
  386. if m:
  387. link = tag._build(m,element_store, environ)
  388. if link:
  389. break
  390. else:
  391. link = None
  392. if link:
  393. return bldr.tag(link)
  394. else:
  395. return mo.group(0)
  396. class Macro(WikiElement):
  397. r"""Finds and processes inline macro elements."""
  398. def __init__(self, tag, token, func):
  399. super(Macro,self).__init__(tag,token , [])
  400. self.func = func
  401. self.regexp = re.compile(self.re_string())
  402. def _process(self, mos, text, wiki_elements,element_store, environ):
  403. """Returns genshi Fragments (Elements and text)"""
  404. assert len(mos) == 1
  405. mo = mos[0]
  406. processed = self._build(mo,element_store, environ)
  407. if isinstance(processed, list):
  408. tail = processed[1]
  409. processed = processed[0]
  410. else:
  411. tail = ''
  412. if isinstance(processed, basestring) and not isinstance(processed,Markup):
  413. text = ''.join([text[:mo.start()],processed,tail,
  414. text[mo.end():]])
  415. else:
  416. store_id = str(id(processed))
  417. element_store[store_id] = processed
  418. text = ''.join([text[:mo.start()],'<<<',store_id,'>>>',tail,
  419. text[mo.end():]])
  420. frags = fragmentize(text,wiki_elements,element_store, environ)
  421. return frags
  422. def re_string(self):
  423. content = '(.*?)'
  424. return esc_neg_look + re.escape(self.token[0]) + r'(' + MACRO_NAME + \
  425. content + ')' + esc_neg_look + re.escape(self.token[1])
  426. trailing_slash = re.compile(r'(?<=[ "\'\]])/$')
  427. def _build(self,mo,element_store, environ):
  428. arg_string = re.sub(self.trailing_slash,'',mo.group(4))
  429. if self.func:
  430. value = self.func(mo.group('name'),arg_string,None,False,environ)
  431. else:
  432. value = None
  433. if value is None:
  434. return bldr.tag.code(self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"),
  435. bldr.tag.span(arg_string,class_="macro_arg_string"),
  436. self.token[1],class_="unknown_macro")
  437. elif isinstance(value, (basestring,bldr.Fragment,bldr.Element, Stream)):
  438. return value
  439. else:
  440. raise Exception("macros can only return strings and genshi objects")
  441. class BodiedMacro(Macro):
  442. """Finds and processes macros with bodies.
  443. Does not span across top level block markup
  444. (see BodiedBlockMacro's for that)."""
  445. def __init__(self, tag, token, func):
  446. super(BodiedMacro,self).__init__(tag,token , func)
  447. self.func = func
  448. self.regexp = re.compile(self.re_string(),re.DOTALL)
  449. def re_string(self):
  450. content = r'(?P<arg_string>[ \S]*?)'
  451. body = '(?P<body>.+)'
  452. return esc_neg_look + re.escape(self.token[0]) + MACRO_NAME + \
  453. content + '(?<!/)' + re.escape(self.token[1]) + \
  454. body + esc_neg_look + re.escape(self.token[0]) + \
  455. r'/(?P=name)' + '(?<!/)' + re.escape(self.token[1])
  456. def _build(self,mo,element_store, environ):
  457. start = ''.join([esc_neg_look, re.escape(self.token[0]), re.escape(mo.group('name')),
  458. r'(?P<arg_string>[ \S]*?)', re.escape(self.token[1])])
  459. end = ''.join([esc_neg_look, re.escape(self.token[0]), '/', re.escape(mo.group('name')),
  460. re.escape(self.token[1])])
  461. count = 0
  462. for mo2 in re.finditer(start + '|' + end, mo.group('body')):
  463. if re.match(end,mo2.group(0)):
  464. count = count + 1
  465. else:
  466. count = count - 1
  467. if count > 0:
  468. body = mo.group('body')[:mo2.start()]
  469. tail = ''.join([mo.group('body')[mo2.end():], self.token[0],
  470. '/', mo.group('name'), self.token[1]])
  471. break
  472. else:
  473. body = mo.group('body')
  474. tail = ''
  475. if self.func:
  476. value = self.func(mo.group('name'),mo.group('arg_string'),body,False,environ)
  477. else:
  478. value = None
  479. if value is None:
  480. content_out = [self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"),
  481. bldr.tag.span(mo.group('arg_string'),class_="macro_arg_string"),
  482. self.token[1],bldr.tag.span(mo.group('body'),class_="macro_body"),
  483. self.token[0] + '/' + mo.group('name') + self.token[1]]
  484. return [bldr.tag.code(content_out,class_="unknown_macro", style="white-space:pre-wrap"),tail]
  485. elif isinstance(value, (basestring,bldr.Fragment, Stream)):
  486. return [value,tail]
  487. else:
  488. raise Exception("macros can only return strings and genshi objects")
  489. class BodiedBlockMacro(WikiElement):
  490. """Finds and processes block macros with bodies.
  491. The opening and closing tokens must be are each on a line alone without
  492. leading spaces. These macros can enclose other block level markup
  493. including pre blocks and other BodiedBlockMacro's."""
  494. def __init__(self, tag, token, func):
  495. super(BodiedBlockMacro,self).__init__(tag,token , func)
  496. self.func = func
  497. self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
  498. def re_string(self):
  499. arg_string = r'(?P<arg_string>(?![^\n]*>>[^\n]*>>)[ \S]*?)'
  500. start = '^' + re.escape(self.token[0])
  501. body = r'(?P<body>.*\n)'
  502. end = re.escape(self.token[0]) + \
  503. r'/(?P=name)' + '(?<!/)' + re.escape(self.token[1]) + r'\s*?$'
  504. return start + '(' + MACRO_NAME + arg_string + ')' + '(?<!/)' + \
  505. re.escape(self.token[1]) + r'\s*?\n' + body + end
  506. def _process(self, mos, text, wiki_elements,element_store, environ):
  507. """Returns genshi Fragments (Elements and text)
  508. This is mainly for block level markup. See InlineElement
  509. for the other method.
  510. """
  511. assert len(mos) == 1
  512. mo = mos[0]
  513. processed = self._build(mo,element_store, environ)
  514. if isinstance(processed, list):
  515. tail = processed[1]
  516. processed = processed[0]
  517. else:
  518. tail = ''
  519. if isinstance(processed, basestring) and not isinstance(processed,Markup):
  520. #print '_process', repr(processed)
  521. text = ''.join([text[:mo.start()],processed,tail,
  522. text[mo.end():]])
  523. frags = fragmentize(text,wiki_elements,element_store, environ)
  524. else:
  525. frags = []
  526. # call again for leading text and extend the result list
  527. if mo.start():
  528. frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:],
  529. element_store, environ))
  530. # append the found wiki element to the result list
  531. frags.append(processed)
  532. # make the source output easier to read
  533. if self.append_newline:
  534. frags.append('\n')
  535. # call again for trailing text and extend the result list
  536. if tail or mo.end() < len(text):
  537. frags.extend(fragmentize(tail + text[mo.end():],wiki_elements,
  538. element_store, environ))
  539. return frags
  540. def _build(self,mo,element_store, environ):
  541. start = ''.join(['^', re.escape(self.token[0]), re.escape(mo.group('name')),
  542. r'(?P<arg_string>(?![^\n]*>>[^\n]*>>)[ \S]*?)', re.escape(self.token[1]),r'\s*?\n'])
  543. end = ''.join(['^', re.escape(self.token[0]), '/', re.escape(mo.group('name')),
  544. re.escape(self.token[1]),r'\s*?$'])
  545. count = 0
  546. for mo2 in re.finditer(start + '|' + end, mo.group('body'),re.MULTILINE):
  547. if re.match(end,mo2.group(0)):
  548. count = count + 1
  549. else:
  550. count = count - 1
  551. if count > 0:
  552. body = mo.group('body')[:mo2.start()]
  553. tail = ''.join([mo.group('body')[mo2.end():], self.token[0],
  554. '/', mo.group('name'), self.token[1],'\n'])
  555. break
  556. else:
  557. body = mo.group('body')
  558. tail = ''
  559. if self.func:
  560. value = self.func(mo.group('name'),mo.group('arg_string'),body,True,environ)
  561. else:
  562. value = None
  563. if value is None:
  564. return [bldr.tag.pre(self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"),
  565. bldr.tag.span(mo.group('arg_string'),class_="macro_arg_string"),
  566. self.token[1],'\n',bldr.tag.span(mo.group('body'),class_="macro_body"),
  567. self.token[0] + '/' + mo.group('name') + self.token[1],
  568. class_="unknown_macro"), tail]
  569. elif (isinstance(value, (Stream, basestring)) or
  570. (isinstance(value,bldr.Element) and value.tag in BLOCK_TAGS)):
  571. return [value, tail]
  572. # Add a p tag if the value is a Fragment or Element that needs one
  573. elif isinstance(value, bldr.Fragment):
  574. return [bldr.tag.p(value), tail]
  575. else:
  576. raise Exception("macros can only return strings and genshi objects")
  577. class RawLink(InlineElement):
  578. """Used to find raw urls in wiki text and build xml from them.
  579. >>> raw_link = RawLink(tag='a')
  580. >>> mo = raw_link.regexp.search(" a http://www.google.com url ")
  581. >>> raw_link.href(mo)
  582. 'http://www.google.com'
  583. >>> raw_link._build(mo,{},None).generate().render()
  584. '<a href="http://www.google.com">http://www.google.com</a>'
  585. """
  586. linking_protocols = ['http','https']
  587. def __init__(self, tag):
  588. super(RawLink,self).__init__(tag=tag, token=None)
  589. self.regexp = re.compile(self.re_string())
  590. def re_string(self):
  591. escape = '(' + re.escape(escape_char) + ')?'
  592. #protocol = '((https?|ftp)://'
  593. protocol = '((https?)://'
  594. rest_of_url = r'\S+?)'
  595. #allow one punctuation character or '**' or '//'. Don't include a placeholder.
  596. look_ahead = r'(?=([>)}\]]?[,.?!:;"\']?(([^a-zA-Z0-9])\6)?(\s|$))|<<<)'
  597. return escape + protocol + rest_of_url + look_ahead
  598. def _build(self,mo,element_store, environ):
  599. if (not mo.group(1)) and (mo.group(3) in self.linking_protocols):
  600. return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store),
  601. href=self.href(mo))
  602. else:
  603. return self.href(mo)
  604. def href(self,mo):
  605. """Returns the string for the href attribute of the Element."""
  606. if sanitizer.is_safe_uri(mo.group(2)):
  607. return mo.group(2)
  608. else:
  609. return "unsafe_uri_detected"
  610. def alias(self,mo,element_store):
  611. """Returns the string for the content of the Element."""
  612. return self.href(mo)
  613. class URLLink(WikiElement):
  614. """Used to find url type links inside a link.
  615. The scope of these is within link markup only (i.e., [[url]]
  616. >>> url_link = URLLink('a','|')
  617. >>> mo = url_link.regexp.search(" http://www.google.com| here ")
  618. >>> url_link.href(mo)
  619. 'http://www.google.com'
  620. >>> url_link._build(mo,{},None).generate().render()
  621. '<a href="http://www.google.com">here</a>'
  622. """
  623. def __init__(self, tag,delimiter):
  624. super(URLLink,self).__init__(tag, '')
  625. self.delimiter = delimiter
  626. self.regexp = re.compile(self.re_string(),re.DOTALL)
  627. def re_string(self):
  628. protocol = r'^\s*((\w+?:|/)'
  629. rest_of_url = r'[\S\n]*?)\s*'
  630. alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$'
  631. return protocol + rest_of_url + alias
  632. def _build(self,mo,element_store, environ):
  633. if not self.href(mo):
  634. return None
  635. return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ),
  636. href=self.href(mo))
  637. def href(self,mo):
  638. """Returns the string for the href attribute of the Element."""
  639. if sanitizer.is_safe_uri(mo.group(1)):
  640. return mo.group(1)
  641. else:
  642. return None
  643. def alias(self,mo,element_store, environ):
  644. """Returns the string for the content of the Element."""
  645. if not mo.group(4):
  646. return self.href(mo)
  647. else:
  648. return fragmentize(mo.group(4),self.child_elements,element_store, environ)
  649. class InterWikiLink(WikiElement):
  650. """Used to match interwiki links inside a link.
  651. The search scope for these is only inside links.
  652. >>> interwiki_link = InterWikiLink('a',
  653. ... delimiter1=':', delimiter2 = '|',
  654. ... base_urls=dict(somewiki='http://somewiki.org/',
  655. ... bigwiki='http://bigwiki.net/'),
  656. ... links_funcs={},default_space_char='_',
  657. ... space_chars={})
  658. >>> mo = interwiki_link.regexp.search(" somewiki:Home Page|steve ")
  659. >>> interwiki_link.href(mo)
  660. 'http://somewiki.org/Home_Page'
  661. >>> interwiki_link.alias(mo,{},None)
  662. ['steve']
  663. """
  664. def __init__(self, tag, delimiter1,
  665. delimiter2,base_urls,links_funcs,default_space_char,space_chars):
  666. super(InterWikiLink,self).__init__(tag, '')
  667. self.delimiter1 = delimiter1
  668. self.delimiter2 = delimiter2
  669. #self.regexp = re.compile(self.re_string())
  670. self.base_urls = base_urls
  671. self.links_funcs = links_funcs
  672. self.default_space_char = default_space_char
  673. self.space_chars = space_chars
  674. self.regexp = re.compile(self.re_string())
  675. def re_string(self):
  676. #all_wikis = set(self.links_funcs.keys() + self.base_urls.keys())
  677. #wiki_id = '(' + '|'.join(all_wikis) + ')'
  678. wiki_id = r'(\w+)'
  679. optional_spaces = ' *'
  680. page_name = r'(\S+?( \S+?)*)' #allows any number of single spaces
  681. alias = r'(' + re.escape(self.delimiter2) + r' *(.*?))? *$'
  682. return '^' + optional_spaces + wiki_id + optional_spaces + \
  683. re.escape(self.delimiter1) + optional_spaces + page_name + \
  684. optional_spaces + alias
  685. def page_name(self,mo):
  686. space_char = self.space_chars.get(mo.group(1),self.default_space_char)
  687. return mo.group(2).replace(' ',space_char)
  688. def href(self,mo):
  689. linktype = mo.group(1)
  690. base_url = self.base_urls.get(linktype)
  691. link_func = self.links_funcs.get(linktype)
  692. if not (link_func or base_url):
  693. return None
  694. else:
  695. href = self.page_name(mo)
  696. if link_func:
  697. href = link_func(href)
  698. else:
  699. href = urllib.quote(href.encode('utf-8'))
  700. if base_url:
  701. href = urlparse.urljoin(base_url, href)
  702. return href
  703. def _build(self,mo,element_store, environ):
  704. if not self.href(mo):
  705. return '[[' + mo.group(0) + ']]'
  706. return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ),
  707. href=self.href(mo))
  708. def alias(self,mo,element_store, environ):
  709. """Returns the string for the content of the Element."""
  710. if not mo.group(5):
  711. return ''.join([mo.group(1),self.delimiter1,mo.group(2)])
  712. else:
  713. return fragmentize(mo.group(5),self.child_elements,element_store, environ)
  714. class WikiLink(WikiElement):
  715. """Used to match wiki links inside a link.
  716. The search scope for these is only inside links.
  717. >>> wiki_link = WikiLink('a','|',base_url='http://somewiki.org/',
  718. ... space_char='_',class_func=None, path_func=None)
  719. >>> mo = wiki_link.regexp.search(" Home Page |Home")
  720. >>> wiki_link.href(mo)
  721. 'http://somewiki.org/Home_Page'
  722. >>> wiki_link.alias(mo,{},None)
  723. ['Home']
  724. """
  725. def __init__(self, tag, delimiter,
  726. base_url,space_char,class_func,path_func):
  727. super(WikiLink,self).__init__(tag, '')
  728. self.delimiter = delimiter
  729. self.base_url = base_url
  730. self.space_char = space_char
  731. self.class_func = class_func
  732. self.path_func = path_func
  733. self.regexp = re.compile(self.re_string())
  734. def re_string(self):
  735. optional_spaces = ' *'
  736. page_name = r'(\S+?( \S+?)*?)' #allows any number of single spaces
  737. alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$'
  738. return '^' + optional_spaces + page_name + optional_spaces + \
  739. alias
  740. def page_name(self,mo):
  741. return mo.group(1).replace(' ',self.space_char)
  742. def href(self,mo,environ):
  743. if self.path_func:
  744. the_path = self.path_func(self.tag, self.page_name(mo), environ)
  745. else:
  746. the_path = urllib.quote(self.page_name(mo).encode('utf-8'))
  747. return urlparse.urljoin(self.base_url, the_path)
  748. def _build(self,mo,element_store, environ):
  749. if self.class_func:
  750. the_class = self.class_func(self.page_name(mo))
  751. else:
  752. the_class = None
  753. return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ),
  754. href=self.href(mo, environ),
  755. class_=the_class)
  756. def alias(self,mo,element_store, environ):
  757. """Returns the string for the content of the Element."""
  758. if not mo.group(3):
  759. return mo.group(1)
  760. else:
  761. return fragmentize(mo.group(4),self.child_elements,element_store, environ)
  762. class List(BlockElement):
  763. """Finds list (ordered, unordered, and definition) wiki elements.
  764. group(1) of the match object includes all lines from the list
  765. including newline characters.
  766. """
  767. def __init__(self, tag, token,stop_tokens=None):
  768. self.stop_tokens = stop_tokens
  769. super(List,self).__init__(tag, token)
  770. #self.stop_tokens = stop_tokens
  771. self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
  772. def re_string(self):
  773. """This re_string is for finding generic block elements like
  774. lists (ordered, unordered, and definition) that start with a
  775. single token.
  776. """
  777. leading_whitespace = r'^([ \t]*'
  778. only_one_token = re.escape(self.token)+ '(?!' + re.escape(self.token) + ')'
  779. rest_of_list = r'.*?(?:\n|\Z))'
  780. only_one_stop_token = '([' + re.escape(self.stop_tokens) + r'])(?!\3)'
  781. look_ahead = '(?=([ \t]*' + only_one_stop_token + '|$))'
  782. return leading_whitespace + only_one_token + rest_of_list + \
  783. look_ahead
  784. class ListItem(BlockElement):
  785. r"""Matches the current list item.
  786. Everything up to the next same-level list item is matched.
  787. >>> list_item = ListItem('li','#*')
  788. >>> mo = list_item.regexp.search("*one\n**one.1\n**one.2\n*two\n")
  789. >>> mo.group(3)
  790. 'one\n**one.1\n**one.2\n'
  791. >>> mo.group(0)
  792. '*one\n**one.1\n**one.2\n'
  793. """
  794. append_newline = False
  795. def __init__(self, tag, list_tokens):
  796. """Constructor for list items.
  797. :parameters"
  798. list_tokens
  799. A string that includes the tokens used for lists
  800. """
  801. self.list_tokens = list_tokens
  802. super(ListItem,self).__init__(tag, None)
  803. self.regexp = re.compile(self.re_string(),re.DOTALL)
  804. def re_string(self):
  805. whitespace = r'[ \t]*'
  806. item_start = '(([' + self.list_tokens + r'])\2*)'
  807. rest_of_item = r'(.*?(?:\n|\Z))'
  808. start_of_same_level_item = r'\1(?!\2)'
  809. look_ahead = r'(?=(' + whitespace + start_of_same_level_item + '|$))'
  810. return whitespace + item_start + whitespace + \
  811. rest_of_item + look_ahead
  812. def _build(self,mo,element_store, environ):
  813. return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(3),
  814. self.child_elements,
  815. element_store, environ))
  816. class NestedList(WikiElement):
  817. r"""Finds a list in the current list item.
  818. >>> nested_ul = NestedList('ul','*')
  819. >>> mo = nested_ul.regexp.search('one\n**one.1\n**one.2\n')
  820. >>> mo.group(1)
  821. '**one.1\n**one.2\n'
  822. >>> mo.group(0) == mo.group(1)
  823. True
  824. """
  825. def __init__(self, tag, token):
  826. super(NestedList,self).__init__(tag, token)
  827. self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
  828. def re_string(self):
  829. look_behind = r'(?<=\n)' # have to avoid finding a list on the first line
  830. whitespace = r'(\s*'
  831. rest_of_list = '.*$)'
  832. return look_behind + '^' + whitespace + re.escape(self.token) + \
  833. rest_of_list
  834. class DefinitionTerm(BlockElement):
  835. r"""Processes definition terms.
  836. >>> term = DefinitionTerm('dt',';',stop_token=':')
  837. >>> mo1,mo2 = term.regexp.finditer(";term1\n:def1\n;term2:def2\n")
  838. >>> mo1.group(1), mo2.group(1)
  839. ('term1', 'term2')
  840. >>> mo1.group(0), mo2.group(0)
  841. (';term1\n', ';term2')
  842. group(1) of the match object is the term line or up to the first ':'
  843. """
  844. def __init__(self, tag, token,stop_token):
  845. super(DefinitionTerm,self).__init__(tag, token)
  846. self.stop_token = stop_token
  847. self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
  848. def re_string(self):
  849. look_ahead = r'(\n|(?=(' + esc_neg_look + re.escape(self.stop_token) + r'|$)))'
  850. return r'^[ \t]*' + re.escape(self.token) + r'[ \t]*(.*?' + \
  851. re.escape(self.stop_token) + '?)\s*' + look_ahead
  852. class DefinitionDef(BlockElement):
  853. r"""Processes definitions.
  854. >>> definition = DefinitionDef('dd',':')
  855. >>> mo1,mo2 = definition.regexp.finditer(":def1a\ndef1b\n:def2\n")
  856. >>> mo1.group(1), mo2.group(1)
  857. ('def1a\ndef1b', 'def2')
  858. >>> mo1.group(0), mo2.group(0)
  859. (':def1a\ndef1b\n', ':def2\n')
  860. group(1) of the match object includes all lines from the defintion
  861. up to the next definition.
  862. """
  863. def __init__(self, tag, token):
  864. super(DefinitionDef,self).__init__(tag, token)
  865. self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
  866. def re_string(self):
  867. look_ahead = r'(?=(^[ \t]*' + re.escape(self.token) + r')|\Z)'
  868. return r'^[ \t]*' + re.escape(self.token) + r'?[ \t]*(.+?)\s*' + look_ahead
  869. class Paragraph(BlockElement):
  870. """"This should be the last outer level wiki element to be searched.
  871. Anything that is left over will be placed in a paragraphs unless it looks
  872. like block content according to xhtml1 strict. Block content is defined
  873. here as valid children of the <body> element (see BLOCK_TAGS). Only genshi
  874. Element objects will be evaluated (see BLOCK_TAGS). Fragments and stings
  875. are treated as inline while Streams are block content.
  876. """
  877. def __init__(self, tag):
  878. super(Paragraph,self).__init__(tag,None)
  879. self.regexp = re.compile(self.re_string(),re.DOTALL)#+re.MULTILINE)
  880. def re_string(self):
  881. return r'^(.*?)\n?$'
  882. def _build(self,mo,element_store, environ):
  883. content = fragmentize(mo.group(1), self.child_elements, element_store, environ)
  884. # Check each list item and record those that are block only
  885. block_only_frags = []
  886. for i,element in enumerate(content):
  887. if ((isinstance(element, bldr.Element) and
  888. element.tag in BLOCK_ONLY_TAGS) or
  889. isinstance(element,(Stream,Markup))):
  890. block_only_frags.append(i)
  891. # Build a new result list if needed
  892. if block_only_frags:
  893. new_content = []
  894. last_i = -1
  895. for i in block_only_frags:
  896. if content[last_i+1:i]:
  897. if not (len(content[last_i+1:i])==1 and
  898. content[last_i+1] == '\n'):
  899. new_content.append(bldr.tag.__getattr__(self.tag)(content[last_i+1:i]))
  900. else:
  901. new_content.append('\n')
  902. new_content.append(content[i])
  903. last_i = i
  904. if content[last_i+1:]:
  905. new_content.append(bldr.tag.__getattr__(self.tag)(content[last_i+1:]))
  906. return bldr.tag(new_content)
  907. else:
  908. return bldr.tag.__getattr__(self.tag)(content)
  909. class Heading(BlockElement):
  910. r"""Finds heading wiki elements.
  911. >>> h1 = Heading(['h1','h2'],'=')
  912. >>> mo = h1.regexp.search('before\n = An important thing = \n after')
  913. >>> mo.group(2)
  914. 'An important thing'
  915. >>> mo.group(0)
  916. ' = An important thing = \n'
  917. """
  918. def __init__(self, tag, token):
  919. super(Heading,self).__init__('',token)
  920. self.tags = tag
  921. self.regexp = re.compile(self.re_string(),re.MULTILINE)
  922. def re_string(self):
  923. whitespace = r'[ \t]*'
  924. tokens = '(' + re.escape(self.token) + '{1,' + str(len(self.tags)) +'})'
  925. content = '(.*?)'
  926. trailing_markup = '(' + re.escape(self.token) + r'+[ \t]*)?(\n|\Z)'
  927. return '^' + whitespace + tokens + \
  928. whitespace + content + whitespace + trailing_markup
  929. def _build(self,mo,element_store, environ):
  930. heading_tag = self.tags[len(mo.group(1))-1]
  931. return bldr.tag.__getattr__(heading_tag)(fragmentize(mo.group(2),
  932. self.child_elements,
  933. element_store, environ))
  934. class Table(BlockElement):
  935. r"""Find tables.
  936. >>> table = Table('table','|')
  937. >>> mo = table.regexp.search("before\n | one | two |\n|one|two \n hi")
  938. >>> mo.group(1)
  939. ' | one | two |\n|one|two \n'
  940. >>> mo.group(0) == mo.group(1)
  941. True
  942. """
  943. def __init__(self, tag, token):
  944. super(Table,self).__init__(tag,token)
  945. self.regexp = re.compile(self.re_string(),re.MULTILINE)
  946. def re_string(self):
  947. whitespace = r'[ \t]*'
  948. rest_of_line = r'.*?(\n|\Z)'
  949. return '^((' + whitespace + re.escape(self.token) + \
  950. rest_of_line + ')+)'
  951. class TableRow(BlockElement):
  952. r"""Finds rows in a table.
  953. >>> row = TableRow('tr','|')
  954. >>> mo = row.regexp.search(' | one | two |\n|one|two \n')
  955. >>> mo.group(1)
  956. '| one | two '
  957. >>> mo.group(0)
  958. ' | one | two |\n'
  959. """
  960. def __init__(self, tag, token):
  961. super(TableRow,self).__init__(tag,token)
  962. self.regexp = re.compile(self.re_string(),re.MULTILINE)
  963. def re_string(self):
  964. whitespace = r'[ \t]*'
  965. content = '(' + re.escape(self.token) + '.*?)'
  966. trailing_token = re.escape(self.token) + '?'
  967. return '^' + whitespace + content + trailing_token + \
  968. whitespace + r'(\n|\Z)'
  969. class TableCell(WikiElement):
  970. r"""Finds cells in a table row.
  971. >>> cell = TableCell('td','|')
  972. >>> mo = cell.regexp.search('| one | two ')
  973. >>> mo.group(1)
  974. 'one'
  975. >>> mo.group(0)
  976. '| one '
  977. """
  978. def __init__(self, tag, token):
  979. super(TableCell,self).__init__(tag,token )
  980. self.regexp = re.compile(self.re_string())
  981. def re_string(self):
  982. whitespace = r'[ \t]*'
  983. content = '(.*?)'
  984. look_ahead = '((?=' + esc_neg_look + re.escape(self.token[0]) + ')|$)'
  985. return esc_neg_look + re.escape(self.token) + whitespace + \
  986. content + whitespace + look_ahead
  987. ##class Link(InlineElement):
  988. ##
  989. ## """Finds and builds links."""
  990. ##
  991. ## def __init__(self,tag, token):
  992. ## super(Link,self).__init__(tag,token)
  993. ##
  994. ## def _build(self,mo,element_store, environ):
  995. ##
  996. ## for tag in self.child_elements:
  997. ## m = tag.regexp.search(mo.group(1))
  998. ## if m:
  999. ## link = tag._build(m,element_store, environ)
  1000. ## if link:
  1001. ## break
  1002. ## else:
  1003. ## link = None
  1004. ##
  1005. ## if link:
  1006. ## return bldr.tag(link)
  1007. ## else:
  1008. ## return mo.group(0)
  1009. class Image(InlineElement):
  1010. """Processes image elements.
  1011. >>> img = Image('img',('{{','}}'), delimiter='|')
  1012. >>> mo = img.regexp.search('{{ picture.jpg | An image of a house }}')
  1013. >>> img._build(mo,{},None).generate().render()
  1014. '<img src="picture.jpg" alt="An image of a house" title="An image of a house"/>'
  1015. """
  1016. def __init__(self, tag, token, delimiter):
  1017. super(Image,self).__init__(tag,token )
  1018. self.regexp = re.compile(self.re_string())
  1019. self.delimiter = delimiter
  1020. self.src_regexp = re.compile(r'^\s*(\S+)\s*$')
  1021. def _build(self,mo,element_store, environ):
  1022. body = mo.group(1).split(self.delimiter,1)
  1023. src_mo = self.src_regexp.search(body[0])
  1024. if not src_mo:
  1025. return bldr.tag.span('Bad Image src')
  1026. if sanitizer.is_safe_uri(src_mo.group(1)):
  1027. link = src_mo.group(1)
  1028. else:
  1029. link = "unsafe_uri_detected"
  1030. if len(body) == 1:
  1031. alias = link
  1032. else:
  1033. alias = body[1].strip()
  1034. return bldr.tag.__getattr__(self.tag)(src=link ,alt=alias, title=alias)
  1035. class NoWikiElement(InlineElement):
  1036. """Inline no-wiki.
  1037. When two or more end tokens are found together, only last marks
  1038. the end of the element.
  1039. """
  1040. def __init__(self, tag, token):
  1041. super(NoWikiElement,self).__init__(tag,token )
  1042. self.regexp = re.compile(self.re_string(),re.DOTALL)
  1043. def _build(self,mo,element_store, environ):
  1044. if self.tag:
  1045. return bldr.tag.__getattr__(self.tag)(
  1046. fragmentize(mo.group(1), self.child_elements,
  1047. element_store,environ,
  1048. remove_escapes=False))
  1049. else:
  1050. return bldr.tag(fragmentize(mo.group(1),self.child_elements,
  1051. element_store, environ,
  1052. remove_escapes=False))
  1053. def re_string(self):
  1054. if isinstance(self.token,str):
  1055. content = '(.+?' + re.escape(self.token[-1]) + '*)'
  1056. return esc_neg_look + re.escape(self.token) + \
  1057. content + re.escape(self.token)
  1058. else:
  1059. content = '(.+?' + re.escape(self.token[1][-1]) + '*)'
  1060. return esc_neg_look + re.escape(self.token[0]) + \
  1061. content + re.escape(self.token[1])
  1062. class PreBlock(BlockElement):
  1063. """A preformatted block.
  1064. If a closing token is found on a line with a space as the first
  1065. character, the space will be removed from the output.
  1066. """

Large files files are truncated, but you can click here to view the full file