PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/markdown.py

https://github.com/anestesya/bloggart
Python | 1929 lines | 1641 code | 153 blank | 135 comment | 99 complexity | 4d12ed4544092f4d3af607c7f9314313 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. version = "1.7"
  3. version_info = (1,7,0,"rc-2")
  4. __revision__ = "$Rev: 72 $"
  5. """
  6. Python-Markdown
  7. ===============
  8. Converts Markdown to HTML. Basic usage as a module:
  9. import markdown
  10. md = Markdown()
  11. html = md.convert(your_text_string)
  12. See http://www.freewisdom.org/projects/python-markdown/ for more
  13. information and instructions on how to extend the functionality of the
  14. script. (You might want to read that before you try modifying this
  15. file.)
  16. Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
  17. maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan
  18. Limberg](http://achinghead.com/).
  19. Contact: yuri [at] freewisdom.org
  20. waylan [at] gmail.com
  21. License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
  22. """
  23. import re, sys, codecs
  24. from logging import getLogger, StreamHandler, Formatter, \
  25. DEBUG, INFO, WARN, ERROR, CRITICAL
  26. MESSAGE_THRESHOLD = CRITICAL
  27. # Configure debug message logger (the hard way - to support python 2.3)
  28. logger = getLogger('MARKDOWN')
  29. logger.setLevel(DEBUG) # This is restricted by handlers later
  30. console_hndlr = StreamHandler()
  31. formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"')
  32. console_hndlr.setFormatter(formatter)
  33. console_hndlr.setLevel(MESSAGE_THRESHOLD)
  34. logger.addHandler(console_hndlr)
  35. def message(level, text):
  36. ''' A wrapper method for logging debug messages. '''
  37. logger.log(level, text)
  38. # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
  39. TAB_LENGTH = 4 # expand tabs to this many spaces
  40. ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
  41. SMART_EMPHASIS = 1 # this_or_that does not become this<i>or</i>that
  42. HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
  43. RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
  44. # from Hebrew to Nko (includes Arabic, Syriac and Thaana)
  45. (u'\u2D30', u'\u2D7F'),
  46. # Tifinagh
  47. )
  48. # Unicode Reference Table:
  49. # 0590-05FF - Hebrew
  50. # 0600-06FF - Arabic
  51. # 0700-074F - Syriac
  52. # 0750-077F - Arabic Supplement
  53. # 0780-07BF - Thaana
  54. # 07C0-07FF - Nko
  55. BOMS = { 'utf-8': (codecs.BOM_UTF8, ),
  56. 'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE),
  57. #'utf-32': (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)
  58. }
  59. def removeBOM(text, encoding):
  60. convert = isinstance(text, unicode)
  61. for bom in BOMS[encoding]:
  62. bom = convert and bom.decode(encoding) or bom
  63. if text.startswith(bom):
  64. return text.lstrip(bom)
  65. return text
  66. # The following constant specifies the name used in the usage
  67. # statement displayed for python versions lower than 2.3. (With
  68. # python2.3 and higher the usage statement is generated by optparse
  69. # and uses the actual name of the executable called.)
  70. EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
  71. # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
  72. # a template for html placeholders
  73. HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
  74. HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
  75. BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
  76. 'dl', 'ol', 'ul', 'script', 'noscript',
  77. 'form', 'fieldset', 'iframe', 'math', 'ins',
  78. 'del', 'hr', 'hr/', 'style']
  79. def isBlockLevel (tag):
  80. return ( (tag in BLOCK_LEVEL_ELEMENTS) or
  81. (tag[0] == 'h' and tag[1] in "0123456789") )
  82. """
  83. ======================================================================
  84. ========================== NANODOM ===================================
  85. ======================================================================
  86. The three classes below implement some of the most basic DOM
  87. methods. I use this instead of minidom because I need a simpler
  88. functionality and do not want to require additional libraries.
  89. Importantly, NanoDom does not do normalization, which is what we
  90. want. It also adds extra white space when converting DOM to string
  91. """
  92. ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&amp;"),
  93. (re.compile("<"), "&lt;"),
  94. (re.compile(">"), "&gt;")]
  95. ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"),
  96. (re.compile("<"), "&lt;"),
  97. (re.compile(">"), "&gt;"),
  98. (re.compile("\""), "&quot;")]
  99. def getBidiType(text):
  100. if not text: return None
  101. ch = text[0]
  102. if not isinstance(ch, unicode) or not ch.isalpha():
  103. return None
  104. else:
  105. for min, max in RTL_BIDI_RANGES:
  106. if ( ch >= min and ch <= max ):
  107. return "rtl"
  108. else:
  109. return "ltr"
  110. class Document:
  111. def __init__ (self):
  112. self.bidi = "ltr"
  113. def appendChild(self, child):
  114. self.documentElement = child
  115. child.isDocumentElement = True
  116. child.parent = self
  117. self.entities = {}
  118. def setBidi(self, bidi):
  119. if bidi:
  120. self.bidi = bidi
  121. def createElement(self, tag, textNode=None):
  122. el = Element(tag)
  123. el.doc = self
  124. if textNode:
  125. el.appendChild(self.createTextNode(textNode))
  126. return el
  127. def createTextNode(self, text):
  128. node = TextNode(text)
  129. node.doc = self
  130. return node
  131. def createEntityReference(self, entity):
  132. if entity not in self.entities:
  133. self.entities[entity] = EntityReference(entity)
  134. return self.entities[entity]
  135. def createCDATA(self, text):
  136. node = CDATA(text)
  137. node.doc = self
  138. return node
  139. def toxml (self):
  140. return self.documentElement.toxml()
  141. def normalizeEntities(self, text, avoidDoubleNormalizing=False):
  142. if avoidDoubleNormalizing:
  143. regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT
  144. else:
  145. regexps = ENTITY_NORMALIZATION_EXPRESSIONS
  146. for regexp, substitution in regexps:
  147. text = regexp.sub(substitution, text)
  148. return text
  149. def find(self, test):
  150. return self.documentElement.find(test)
  151. def unlink(self):
  152. self.documentElement.unlink()
  153. self.documentElement = None
  154. class CDATA:
  155. type = "cdata"
  156. def __init__ (self, text):
  157. self.text = text
  158. def handleAttributes(self):
  159. pass
  160. def toxml (self):
  161. return "<![CDATA[" + self.text + "]]>"
  162. class Element:
  163. type = "element"
  164. def __init__ (self, tag):
  165. self.nodeName = tag
  166. self.attributes = []
  167. self.attribute_values = {}
  168. self.childNodes = []
  169. self.bidi = None
  170. self.isDocumentElement = False
  171. def setBidi(self, bidi):
  172. if bidi:
  173. orig_bidi = self.bidi
  174. if not self.bidi or self.isDocumentElement:
  175. # Once the bidi is set don't change it (except for doc element)
  176. self.bidi = bidi
  177. self.parent.setBidi(bidi)
  178. def unlink(self):
  179. for child in self.childNodes:
  180. if child.type == "element":
  181. child.unlink()
  182. self.childNodes = None
  183. def setAttribute(self, attr, value):
  184. if not attr in self.attributes:
  185. self.attributes.append(attr)
  186. self.attribute_values[attr] = value
  187. def insertChild(self, position, child):
  188. self.childNodes.insert(position, child)
  189. child.parent = self
  190. def removeChild(self, child):
  191. self.childNodes.remove(child)
  192. def replaceChild(self, oldChild, newChild):
  193. position = self.childNodes.index(oldChild)
  194. self.removeChild(oldChild)
  195. self.insertChild(position, newChild)
  196. def appendChild(self, child):
  197. self.childNodes.append(child)
  198. child.parent = self
  199. def handleAttributes(self):
  200. pass
  201. def find(self, test, depth=0):
  202. """ Returns a list of descendants that pass the test function """
  203. matched_nodes = []
  204. for child in self.childNodes:
  205. if test(child):
  206. matched_nodes.append(child)
  207. if child.type == "element":
  208. matched_nodes += child.find(test, depth+1)
  209. return matched_nodes
  210. def toxml(self):
  211. if ENABLE_ATTRIBUTES:
  212. for child in self.childNodes:
  213. child.handleAttributes()
  214. buffer = ""
  215. if self.nodeName in ['h1', 'h2', 'h3', 'h4']:
  216. buffer += "\n"
  217. elif self.nodeName in ['li']:
  218. buffer += "\n "
  219. # Process children FIRST, then do the attributes
  220. childBuffer = ""
  221. if self.childNodes or self.nodeName in ['blockquote']:
  222. childBuffer += ">"
  223. for child in self.childNodes:
  224. childBuffer += child.toxml()
  225. if self.nodeName == 'p':
  226. childBuffer += "\n"
  227. elif self.nodeName == 'li':
  228. childBuffer += "\n "
  229. childBuffer += "</%s>" % self.nodeName
  230. else:
  231. childBuffer += "/>"
  232. buffer += "<" + self.nodeName
  233. if self.nodeName in ['p', 'li', 'ul', 'ol',
  234. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
  235. if not self.attribute_values.has_key("dir"):
  236. if self.bidi:
  237. bidi = self.bidi
  238. else:
  239. bidi = self.doc.bidi
  240. if bidi=="rtl":
  241. self.setAttribute("dir", "rtl")
  242. for attr in self.attributes:
  243. value = self.attribute_values[attr]
  244. value = self.doc.normalizeEntities(value,
  245. avoidDoubleNormalizing=True)
  246. buffer += ' %s="%s"' % (attr, value)
  247. # Now let's actually append the children
  248. buffer += childBuffer
  249. if self.nodeName in ['p', 'br ', 'li', 'ul', 'ol',
  250. 'h1', 'h2', 'h3', 'h4'] :
  251. buffer += "\n"
  252. return buffer
  253. class TextNode:
  254. type = "text"
  255. attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
  256. def __init__ (self, text):
  257. self.value = text
  258. def attributeCallback(self, match):
  259. self.parent.setAttribute(match.group(1), match.group(2))
  260. def handleAttributes(self):
  261. self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
  262. def toxml(self):
  263. text = self.value
  264. self.parent.setBidi(getBidiType(text))
  265. if not text.startswith(HTML_PLACEHOLDER_PREFIX):
  266. if self.parent.nodeName == "p":
  267. text = text.replace("\n", "\n ")
  268. elif (self.parent.nodeName == "li"
  269. and self.parent.childNodes[0]==self):
  270. text = "\n " + text.replace("\n", "\n ")
  271. text = self.doc.normalizeEntities(text)
  272. return text
  273. class EntityReference:
  274. type = "entity_ref"
  275. def __init__(self, entity):
  276. self.entity = entity
  277. def handleAttributes(self):
  278. pass
  279. def toxml(self):
  280. return "&" + self.entity + ";"
  281. """
  282. ======================================================================
  283. ========================== PRE-PROCESSORS ============================
  284. ======================================================================
  285. Preprocessors munge source text before we start doing anything too
  286. complicated.
  287. There are two types of preprocessors: TextPreprocessor and Preprocessor.
  288. """
  289. class TextPreprocessor:
  290. '''
  291. TextPreprocessors are run before the text is broken into lines.
  292. Each TextPreprocessor implements a "run" method that takes a pointer to a
  293. text string of the document, modifies it as necessary and returns
  294. either the same pointer or a pointer to a new string.
  295. TextPreprocessors must extend markdown.TextPreprocessor.
  296. '''
  297. def run(self, text):
  298. pass
  299. class Preprocessor:
  300. '''
  301. Preprocessors are run after the text is broken into lines.
  302. Each preprocessor implements a "run" method that takes a pointer to a
  303. list of lines of the document, modifies it as necessary and returns
  304. either the same pointer or a pointer to a new list.
  305. Preprocessors must extend markdown.Preprocessor.
  306. '''
  307. def run(self, lines):
  308. pass
  309. class HtmlBlockPreprocessor(TextPreprocessor):
  310. """Removes html blocks from the source text and stores it."""
  311. def _get_left_tag(self, block):
  312. return block[1:].replace(">", " ", 1).split()[0].lower()
  313. def _get_right_tag(self, left_tag, block):
  314. return block.rstrip()[-len(left_tag)-2:-1].lower()
  315. def _equal_tags(self, left_tag, right_tag):
  316. if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
  317. return True
  318. if ("/" + left_tag) == right_tag:
  319. return True
  320. if (right_tag == "--" and left_tag == "--"):
  321. return True
  322. elif left_tag == right_tag[1:] \
  323. and right_tag[0] != "<":
  324. return True
  325. else:
  326. return False
  327. def _is_oneliner(self, tag):
  328. return (tag in ['hr', 'hr/'])
  329. def run(self, text):
  330. new_blocks = []
  331. text = text.split("\n\n")
  332. items = []
  333. left_tag = ''
  334. right_tag = ''
  335. in_tag = False # flag
  336. for block in text:
  337. if block.startswith("\n"):
  338. block = block[1:]
  339. if not in_tag:
  340. if block.startswith("<"):
  341. left_tag = self._get_left_tag(block)
  342. right_tag = self._get_right_tag(left_tag, block)
  343. if not (isBlockLevel(left_tag) \
  344. or block[1] in ["!", "?", "@", "%"]):
  345. new_blocks.append(block)
  346. continue
  347. if self._is_oneliner(left_tag):
  348. new_blocks.append(block.strip())
  349. continue
  350. if block[1] == "!":
  351. # is a comment block
  352. left_tag = "--"
  353. right_tag = self._get_right_tag(left_tag, block)
  354. # keep checking conditions below and maybe just append
  355. if block.rstrip().endswith(">") \
  356. and self._equal_tags(left_tag, right_tag):
  357. new_blocks.append(
  358. self.stash.store(block.strip()))
  359. continue
  360. else: #if not block[1] == "!":
  361. # if is block level tag and is not complete
  362. items.append(block.strip())
  363. in_tag = True
  364. continue
  365. new_blocks.append(block)
  366. else:
  367. items.append(block.strip())
  368. right_tag = self._get_right_tag(left_tag, block)
  369. if self._equal_tags(left_tag, right_tag):
  370. # if find closing tag
  371. in_tag = False
  372. new_blocks.append(
  373. self.stash.store('\n\n'.join(items)))
  374. items = []
  375. if items:
  376. new_blocks.append(self.stash.store('\n\n'.join(items)))
  377. new_blocks.append('\n')
  378. return "\n\n".join(new_blocks)
  379. HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
  380. class HeaderPreprocessor(Preprocessor):
  381. """
  382. Replaces underlined headers with hashed headers to avoid
  383. the nead for lookahead later.
  384. """
  385. def run (self, lines):
  386. i = -1
  387. while i+1 < len(lines):
  388. i = i+1
  389. if not lines[i].strip():
  390. continue
  391. if lines[i].startswith("#"):
  392. lines.insert(i+1, "\n")
  393. if (i+1 <= len(lines)
  394. and lines[i+1]
  395. and lines[i+1][0] in ['-', '=']):
  396. underline = lines[i+1].strip()
  397. if underline == "="*len(underline):
  398. lines[i] = "# " + lines[i].strip()
  399. lines[i+1] = ""
  400. elif underline == "-"*len(underline):
  401. lines[i] = "## " + lines[i].strip()
  402. lines[i+1] = ""
  403. return lines
  404. HEADER_PREPROCESSOR = HeaderPreprocessor()
  405. class LinePreprocessor(Preprocessor):
  406. """Deals with HR lines (needs to be done before processing lists)"""
  407. blockquote_re = re.compile(r'^(> )+')
  408. def run (self, lines):
  409. for i in range(len(lines)):
  410. prefix = ''
  411. m = self.blockquote_re.search(lines[i])
  412. if m : prefix = m.group(0)
  413. if self._isLine(lines[i][len(prefix):]):
  414. lines[i] = prefix + self.stash.store("<hr />", safe=True)
  415. return lines
  416. def _isLine(self, block):
  417. """Determines if a block should be replaced with an <HR>"""
  418. if block.startswith(" "): return 0 # a code block
  419. text = "".join([x for x in block if not x.isspace()])
  420. if len(text) <= 2:
  421. return 0
  422. for pattern in ['isline1', 'isline2', 'isline3']:
  423. m = RE.regExp[pattern].match(text)
  424. if (m and m.group(1)):
  425. return 1
  426. else:
  427. return 0
  428. LINE_PREPROCESSOR = LinePreprocessor()
  429. class ReferencePreprocessor(Preprocessor):
  430. '''
  431. Removes reference definitions from the text and stores them for later use.
  432. '''
  433. def run (self, lines):
  434. new_text = [];
  435. for line in lines:
  436. m = RE.regExp['reference-def'].match(line)
  437. if m:
  438. id = m.group(2).strip().lower()
  439. t = m.group(4).strip() # potential title
  440. if not t:
  441. self.references[id] = (m.group(3), t)
  442. elif (len(t) >= 2
  443. and (t[0] == t[-1] == "\""
  444. or t[0] == t[-1] == "\'"
  445. or (t[0] == "(" and t[-1] == ")") ) ):
  446. self.references[id] = (m.group(3), t[1:-1])
  447. else:
  448. new_text.append(line)
  449. else:
  450. new_text.append(line)
  451. return new_text #+ "\n"
  452. REFERENCE_PREPROCESSOR = ReferencePreprocessor()
  453. """
  454. ======================================================================
  455. ========================== INLINE PATTERNS ===========================
  456. ======================================================================
  457. Inline patterns such as *emphasis* are handled by means of auxiliary
  458. objects, one per pattern. Pattern objects must be instances of classes
  459. that extend markdown.Pattern. Each pattern object uses a single regular
  460. expression and needs support the following methods:
  461. pattern.getCompiledRegExp() - returns a regular expression
  462. pattern.handleMatch(m, doc) - takes a match object and returns
  463. a NanoDom node (as a part of the provided
  464. doc) or None
  465. All of python markdown's built-in patterns subclass from Patter,
  466. but you can add additional patterns that don't.
  467. Also note that all the regular expressions used by inline must
  468. capture the whole block. For this reason, they all start with
  469. '^(.*)' and end with '(.*)!'. In case with built-in expression
  470. Pattern takes care of adding the "^(.*)" and "(.*)!".
  471. Finally, the order in which regular expressions are applied is very
  472. important - e.g. if we first replace http://.../ links with <a> tags
  473. and _then_ try to replace inline html, we would end up with a mess.
  474. So, we apply the expressions in the following order:
  475. * escape and backticks have to go before everything else, so
  476. that we can preempt any markdown patterns by escaping them.
  477. * then we handle auto-links (must be done before inline html)
  478. * then we handle inline HTML. At this point we will simply
  479. replace all inline HTML strings with a placeholder and add
  480. the actual HTML to a hash.
  481. * then inline images (must be done before links)
  482. * then bracketed links, first regular then reference-style
  483. * finally we apply strong and emphasis
  484. """
  485. NOBRACKET = r'[^\]\[]*'
  486. BRK = ( r'\[('
  487. + (NOBRACKET + r'(\[')*6
  488. + (NOBRACKET+ r'\])*')*6
  489. + NOBRACKET + r')\]' )
  490. NOIMG = r'(?<!\!)'
  491. BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2`
  492. DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")``
  493. ESCAPE_RE = r'\\(.)' # \<
  494. EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis*
  495. STRONG_RE = r'\*\*(.*)\*\*' # **strong**
  496. STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong***
  497. if SMART_EMPHASIS:
  498. EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
  499. else:
  500. EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
  501. STRONG_2_RE = r'__([^_]*)__' # __strong__
  502. STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
  503. LINK_RE = NOIMG + BRK + r'\s*\(([^\)]*)\)' # [text](url)
  504. LINK_ANGLED_RE = NOIMG + BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
  505. IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
  506. REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
  507. IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
  508. NOT_STRONG_RE = r'( \* )' # stand-alone * or _
  509. AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com>
  510. AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
  511. #HTML_RE = r'(\<[^\>]*\>)' # <...>
  512. HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)' # <...>
  513. ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &amp;
  514. LINE_BREAK_RE = r' \n' # two spaces at end of line
  515. LINE_BREAK_2_RE = r' $' # two spaces at end of text
  516. class Pattern:
  517. def __init__ (self, pattern):
  518. self.pattern = pattern
  519. self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL)
  520. def getCompiledRegExp (self):
  521. return self.compiled_re
  522. BasePattern = Pattern # for backward compatibility
  523. class SimpleTextPattern (Pattern):
  524. def handleMatch(self, m, doc):
  525. return doc.createTextNode(m.group(2))
  526. class SimpleTagPattern (Pattern):
  527. def __init__ (self, pattern, tag):
  528. Pattern.__init__(self, pattern)
  529. self.tag = tag
  530. def handleMatch(self, m, doc):
  531. el = doc.createElement(self.tag)
  532. el.appendChild(doc.createTextNode(m.group(2)))
  533. return el
  534. class SubstituteTagPattern (SimpleTagPattern):
  535. def handleMatch (self, m, doc):
  536. return doc.createElement(self.tag)
  537. class BacktickPattern (Pattern):
  538. def __init__ (self, pattern):
  539. Pattern.__init__(self, pattern)
  540. self.tag = "code"
  541. def handleMatch(self, m, doc):
  542. el = doc.createElement(self.tag)
  543. text = m.group(2).strip()
  544. #text = text.replace("&", "&amp;")
  545. el.appendChild(doc.createTextNode(text))
  546. return el
  547. class DoubleTagPattern (SimpleTagPattern):
  548. def handleMatch(self, m, doc):
  549. tag1, tag2 = self.tag.split(",")
  550. el1 = doc.createElement(tag1)
  551. el2 = doc.createElement(tag2)
  552. el1.appendChild(el2)
  553. el2.appendChild(doc.createTextNode(m.group(2)))
  554. return el1
  555. class HtmlPattern (Pattern):
  556. def handleMatch (self, m, doc):
  557. rawhtml = m.group(2)
  558. inline = True
  559. place_holder = self.stash.store(rawhtml)
  560. return doc.createTextNode(place_holder)
  561. class LinkPattern (Pattern):
  562. def handleMatch(self, m, doc):
  563. el = doc.createElement('a')
  564. el.appendChild(doc.createTextNode(m.group(2)))
  565. parts = m.group(9).split('"')
  566. # We should now have [], [href], or [href, title]
  567. if parts:
  568. el.setAttribute('href', parts[0].strip())
  569. else:
  570. el.setAttribute('href', "")
  571. if len(parts) > 1:
  572. # we also got a title
  573. title = '"' + '"'.join(parts[1:]).strip()
  574. title = dequote(title) #.replace('"', "&quot;")
  575. el.setAttribute('title', title)
  576. return el
  577. class ImagePattern (Pattern):
  578. def handleMatch(self, m, doc):
  579. el = doc.createElement('img')
  580. src_parts = m.group(9).split()
  581. if src_parts:
  582. el.setAttribute('src', src_parts[0])
  583. else:
  584. el.setAttribute('src', "")
  585. if len(src_parts) > 1:
  586. el.setAttribute('title', dequote(" ".join(src_parts[1:])))
  587. if ENABLE_ATTRIBUTES:
  588. text = doc.createTextNode(m.group(2))
  589. el.appendChild(text)
  590. text.handleAttributes()
  591. truealt = text.value
  592. el.childNodes.remove(text)
  593. else:
  594. truealt = m.group(2)
  595. el.setAttribute('alt', truealt)
  596. return el
  597. class ReferencePattern (Pattern):
  598. def handleMatch(self, m, doc):
  599. if m.group(9):
  600. id = m.group(9).lower()
  601. else:
  602. # if we got something like "[Google][]"
  603. # we'll use "google" as the id
  604. id = m.group(2).lower()
  605. if not self.references.has_key(id): # ignore undefined refs
  606. return None
  607. href, title = self.references[id]
  608. text = m.group(2)
  609. return self.makeTag(href, title, text, doc)
  610. def makeTag(self, href, title, text, doc):
  611. el = doc.createElement('a')
  612. el.setAttribute('href', href)
  613. if title:
  614. el.setAttribute('title', title)
  615. el.appendChild(doc.createTextNode(text))
  616. return el
  617. class ImageReferencePattern (ReferencePattern):
  618. def makeTag(self, href, title, text, doc):
  619. el = doc.createElement('img')
  620. el.setAttribute('src', href)
  621. if title:
  622. el.setAttribute('title', title)
  623. el.setAttribute('alt', text)
  624. return el
  625. class AutolinkPattern (Pattern):
  626. def handleMatch(self, m, doc):
  627. el = doc.createElement('a')
  628. el.setAttribute('href', m.group(2))
  629. el.appendChild(doc.createTextNode(m.group(2)))
  630. return el
  631. class AutomailPattern (Pattern):
  632. def handleMatch(self, m, doc):
  633. el = doc.createElement('a')
  634. email = m.group(2)
  635. if email.startswith("mailto:"):
  636. email = email[len("mailto:"):]
  637. for letter in email:
  638. entity = doc.createEntityReference("#%d" % ord(letter))
  639. el.appendChild(entity)
  640. mailto = "mailto:" + email
  641. mailto = "".join(['&#%d;' % ord(letter) for letter in mailto])
  642. el.setAttribute('href', mailto)
  643. return el
  644. ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE)
  645. NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE)
  646. BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE)
  647. DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
  648. STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong')
  649. STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong')
  650. EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em')
  651. EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
  652. STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
  653. STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
  654. LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
  655. LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
  656. LINK_PATTERN = LinkPattern(LINK_RE)
  657. LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
  658. IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
  659. IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
  660. REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE)
  661. HTML_PATTERN = HtmlPattern(HTML_RE)
  662. ENTITY_PATTERN = HtmlPattern(ENTITY_RE)
  663. AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE)
  664. AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
  665. """
  666. ======================================================================
  667. ========================== POST-PROCESSORS ===========================
  668. ======================================================================
  669. Markdown also allows post-processors, which are similar to
  670. preprocessors in that they need to implement a "run" method. However,
  671. they are run after core processing.
  672. There are two types of post-processors: Postprocessor and TextPostprocessor
  673. """
  674. class Postprocessor:
  675. '''
  676. Postprocessors are run before the dom it converted back into text.
  677. Each Postprocessor implements a "run" method that takes a pointer to a
  678. NanoDom document, modifies it as necessary and returns a NanoDom
  679. document.
  680. Postprocessors must extend markdown.Postprocessor.
  681. There are currently no standard post-processors, but the footnote
  682. extension uses one.
  683. '''
  684. def run(self, dom):
  685. pass
  686. class TextPostprocessor:
  687. '''
  688. TextPostprocessors are run after the dom it converted back into text.
  689. Each TextPostprocessor implements a "run" method that takes a pointer to a
  690. text string, modifies it as necessary and returns a text string.
  691. TextPostprocessors must extend markdown.TextPostprocessor.
  692. '''
  693. def run(self, text):
  694. pass
  695. class RawHtmlTextPostprocessor(TextPostprocessor):
  696. def __init__(self):
  697. pass
  698. def run(self, text):
  699. for i in range(self.stash.html_counter):
  700. html, safe = self.stash.rawHtmlBlocks[i]
  701. if self.safeMode and not safe:
  702. if str(self.safeMode).lower() == 'escape':
  703. html = self.escape(html)
  704. elif str(self.safeMode).lower() == 'remove':
  705. html = ''
  706. else:
  707. html = HTML_REMOVED_TEXT
  708. text = text.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
  709. html + "\n")
  710. text = text.replace(HTML_PLACEHOLDER % i, html)
  711. return text
  712. def escape(self, html):
  713. ''' Basic html escaping '''
  714. html = html.replace('&', '&amp;')
  715. html = html.replace('<', '&lt;')
  716. html = html.replace('>', '&gt;')
  717. return html.replace('"', '&quot;')
  718. RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
  719. """
  720. ======================================================================
  721. ========================== MISC AUXILIARY CLASSES ====================
  722. ======================================================================
  723. """
  724. class HtmlStash:
  725. """This class is used for stashing HTML objects that we extract
  726. in the beginning and replace with place-holders."""
  727. def __init__ (self):
  728. self.html_counter = 0 # for counting inline html segments
  729. self.rawHtmlBlocks=[]
  730. def store(self, html, safe=False):
  731. """Saves an HTML segment for later reinsertion. Returns a
  732. placeholder string that needs to be inserted into the
  733. document.
  734. @param html: an html segment
  735. @param safe: label an html segment as safe for safemode
  736. @param inline: label a segmant as inline html
  737. @returns : a placeholder string """
  738. self.rawHtmlBlocks.append((html, safe))
  739. placeholder = HTML_PLACEHOLDER % self.html_counter
  740. self.html_counter += 1
  741. return placeholder
  742. class BlockGuru:
  743. def _findHead(self, lines, fn, allowBlank=0):
  744. """Functional magic to help determine boundaries of indented
  745. blocks.
  746. @param lines: an array of strings
  747. @param fn: a function that returns a substring of a string
  748. if the string matches the necessary criteria
  749. @param allowBlank: specifies whether it's ok to have blank
  750. lines between matching functions
  751. @returns: a list of post processes items and the unused
  752. remainder of the original list"""
  753. items = []
  754. item = -1
  755. i = 0 # to keep track of where we are
  756. for line in lines:
  757. if not line.strip() and not allowBlank:
  758. return items, lines[i:]
  759. if not line.strip() and allowBlank:
  760. # If we see a blank line, this _might_ be the end
  761. i += 1
  762. # Find the next non-blank line
  763. for j in range(i, len(lines)):
  764. if lines[j].strip():
  765. next = lines[j]
  766. break
  767. else:
  768. # There is no more text => this is the end
  769. break
  770. # Check if the next non-blank line is still a part of the list
  771. part = fn(next)
  772. if part:
  773. items.append("")
  774. continue
  775. else:
  776. break # found end of the list
  777. part = fn(line)
  778. if part:
  779. items.append(part)
  780. i += 1
  781. continue
  782. else:
  783. return items, lines[i:]
  784. else:
  785. i += 1
  786. return items, lines[i:]
  787. def detabbed_fn(self, line):
  788. """ An auxiliary method to be passed to _findHead """
  789. m = RE.regExp['tabbed'].match(line)
  790. if m:
  791. return m.group(4)
  792. else:
  793. return None
  794. def detectTabbed(self, lines):
  795. return self._findHead(lines, self.detabbed_fn,
  796. allowBlank = 1)
  797. def print_error(string):
  798. """Print an error string to stderr"""
  799. sys.stderr.write(string +'\n')
  800. def dequote(string):
  801. """ Removes quotes from around a string """
  802. if ( ( string.startswith('"') and string.endswith('"'))
  803. or (string.startswith("'") and string.endswith("'")) ):
  804. return string[1:-1]
  805. else:
  806. return string
  807. """
  808. ======================================================================
  809. ========================== CORE MARKDOWN =============================
  810. ======================================================================
  811. This stuff is ugly, so if you are thinking of extending the syntax,
  812. see first if you can do it via pre-processors, post-processors,
  813. inline patterns or a combination of the three.
  814. """
  815. class CorePatterns:
  816. """This class is scheduled for removal as part of a refactoring
  817. effort."""
  818. patterns = {
  819. 'header': r'(#*)([^#]*)(#*)', # # A title
  820. 'reference-def': r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)',
  821. # [Google]: http://www.google.com/
  822. 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc.
  823. 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text
  824. 'ul': r'[ ]{0,3}[*+-]\s+(.*)', # "* text"
  825. 'isline1': r'(\**)', # ***
  826. 'isline2': r'(\-*)', # ---
  827. 'isline3': r'(\_*)', # ___
  828. 'tabbed': r'((\t)|( ))(.*)', # an indented line
  829. 'quoted': r'> ?(.*)', # a quoted block ("> ...")
  830. }
  831. def __init__ (self):
  832. self.regExp = {}
  833. for key in self.patterns.keys():
  834. self.regExp[key] = re.compile("^%s$" % self.patterns[key],
  835. re.DOTALL)
  836. self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
  837. RE = CorePatterns()
  838. class Markdown:
  839. """ Markdown formatter class for creating an html document from
  840. Markdown text """
  841. def __init__(self, source=None, # depreciated
  842. extensions=[],
  843. extension_configs=None,
  844. safe_mode = False):
  845. """Creates a new Markdown instance.
  846. @param source: The text in Markdown format. Depreciated!
  847. @param extensions: A list if extensions.
  848. @param extension-configs: Configuration setting for extensions.
  849. @param safe_mode: Disallow raw html. """
  850. self.source = source
  851. if source is not None:
  852. message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.")
  853. self.safeMode = safe_mode
  854. self.blockGuru = BlockGuru()
  855. self.registeredExtensions = []
  856. self.stripTopLevelTags = 1
  857. self.docType = ""
  858. self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
  859. self.preprocessors = [HEADER_PREPROCESSOR,
  860. LINE_PREPROCESSOR,
  861. # A footnote preprocessor will
  862. # get inserted here
  863. REFERENCE_PREPROCESSOR]
  864. self.postprocessors = [] # a footnote postprocessor will get
  865. # inserted later
  866. self.textPostprocessors = [# a footnote postprocessor will get
  867. # inserted here
  868. RAWHTMLTEXTPOSTPROCESSOR]
  869. self.prePatterns = []
  870. self.inlinePatterns = [DOUBLE_BACKTICK_PATTERN,
  871. BACKTICK_PATTERN,
  872. ESCAPE_PATTERN,
  873. REFERENCE_PATTERN,
  874. LINK_ANGLED_PATTERN,
  875. LINK_PATTERN,
  876. IMAGE_LINK_PATTERN,
  877. IMAGE_REFERENCE_PATTERN,
  878. AUTOLINK_PATTERN,
  879. AUTOMAIL_PATTERN,
  880. LINE_BREAK_PATTERN_2,
  881. LINE_BREAK_PATTERN,
  882. HTML_PATTERN,
  883. ENTITY_PATTERN,
  884. NOT_STRONG_PATTERN,
  885. STRONG_EM_PATTERN,
  886. STRONG_EM_PATTERN_2,
  887. STRONG_PATTERN,
  888. STRONG_PATTERN_2,
  889. EMPHASIS_PATTERN,
  890. EMPHASIS_PATTERN_2
  891. # The order of the handlers matters!!!
  892. ]
  893. self.registerExtensions(extensions = extensions,
  894. configs = extension_configs)
  895. self.reset()
  896. def registerExtensions(self, extensions, configs):
  897. if not configs:
  898. configs = {}
  899. for ext in extensions:
  900. extension_module_name = "mdx_" + ext
  901. try:
  902. module = __import__(extension_module_name)
  903. except:
  904. message(CRITICAL,
  905. "couldn't load extension %s (looking for %s module)"
  906. % (ext, extension_module_name) )
  907. else:
  908. if configs.has_key(ext):
  909. configs_for_ext = configs[ext]
  910. else:
  911. configs_for_ext = []
  912. extension = module.makeExtension(configs_for_ext)
  913. extension.extendMarkdown(self, globals())
  914. def registerExtension(self, extension):
  915. """ This gets called by the extension """
  916. self.registeredExtensions.append(extension)
  917. def reset(self):
  918. """Resets all state variables so that we can start
  919. with a new text."""
  920. self.references={}
  921. self.htmlStash = HtmlStash()
  922. HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
  923. LINE_PREPROCESSOR.stash = self.htmlStash
  924. REFERENCE_PREPROCESSOR.references = self.references
  925. HTML_PATTERN.stash = self.htmlStash
  926. ENTITY_PATTERN.stash = self.htmlStash
  927. REFERENCE_PATTERN.references = self.references
  928. IMAGE_REFERENCE_PATTERN.references = self.references
  929. RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
  930. RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
  931. for extension in self.registeredExtensions:
  932. extension.reset()
  933. def _transform(self):
  934. """Transforms the Markdown text into a XHTML body document
  935. @returns: A NanoDom Document """
  936. # Setup the document
  937. self.doc = Document()
  938. self.top_element = self.doc.createElement("span")
  939. self.top_element.appendChild(self.doc.createTextNode('\n'))
  940. self.top_element.setAttribute('class', 'markdown')
  941. self.doc.appendChild(self.top_element)
  942. # Fixup the source text
  943. text = self.source
  944. text = text.replace("\r\n", "\n").replace("\r", "\n")
  945. text += "\n\n"
  946. text = text.expandtabs(TAB_LENGTH)
  947. # Split into lines and run the preprocessors that will work with
  948. # self.lines
  949. self.lines = text.split("\n")
  950. # Run the pre-processors on the lines
  951. for prep in self.preprocessors :
  952. self.lines = prep.run(self.lines)
  953. # Create a NanoDom tree from the lines and attach it to Document
  954. buffer = []
  955. for line in self.lines:
  956. if line.startswith("#"):
  957. self._processSection(self.top_element, buffer)
  958. buffer = [line]
  959. else:
  960. buffer.append(line)
  961. self._processSection(self.top_element, buffer)
  962. #self._processSection(self.top_element, self.lines)
  963. # Not sure why I put this in but let's leave it for now.
  964. self.top_element.appendChild(self.doc.createTextNode('\n'))
  965. # Run the post-processors
  966. for postprocessor in self.postprocessors:
  967. postprocessor.run(self.doc)
  968. return self.doc
  969. def _processSection(self, parent_elem, lines,
  970. inList = 0, looseList = 0):
  971. """Process a section of a source document, looking for high
  972. level structural elements like lists, block quotes, code
  973. segments, html blocks, etc. Some those then get stripped
  974. of their high level markup (e.g. get unindented) and the
  975. lower-level markup is processed recursively.
  976. @param parent_elem: A NanoDom element to which the content
  977. will be added
  978. @param lines: a list of lines
  979. @param inList: a level
  980. @returns: None"""
  981. # Loop through lines until none left.
  982. while lines:
  983. # Check if this section starts with a list, a blockquote or
  984. # a code block
  985. processFn = { 'ul': self._processUList,
  986. 'ol': self._processOList,
  987. 'quoted': self._processQuote,
  988. 'tabbed': self._processCodeBlock}
  989. for regexp in ['ul', 'ol', 'quoted', 'tabbed']:
  990. m = RE.regExp[regexp].match(lines[0])
  991. if m:
  992. processFn[regexp](parent_elem, lines, inList)
  993. return
  994. # We are NOT looking at one of the high-level structures like
  995. # lists or blockquotes. So, it's just a regular paragraph
  996. # (though perhaps nested inside a list or something else). If
  997. # we are NOT inside a list, we just need to look for a blank
  998. # line to find the end of the block. If we ARE inside a
  999. # list, however, we need to consider that a sublist does not
  1000. # need to be separated by a blank line. Rather, the following
  1001. # markup is legal:
  1002. #
  1003. # * The top level list item
  1004. #
  1005. # Another paragraph of the list. This is where we are now.
  1006. # * Underneath we might have a sublist.
  1007. #
  1008. if inList:
  1009. start, lines = self._linesUntil(lines, (lambda line:
  1010. RE.regExp['ul'].match(line)
  1011. or RE.regExp['ol'].match(line)
  1012. or not line.strip()))
  1013. self._processSection(parent_elem, start,
  1014. inList - 1, looseList = looseList)
  1015. inList = inList-1
  1016. else: # Ok, so it's just a simple block
  1017. paragraph, lines = self._linesUntil(lines, lambda line:
  1018. not line.strip())
  1019. if len(paragraph) and paragraph[0].startswith('#'):
  1020. self._processHeader(parent_elem, paragraph)
  1021. elif paragraph:
  1022. self._processParagraph(parent_elem, paragraph,
  1023. inList, looseList)
  1024. if lines and not lines[0].strip():
  1025. lines = lines[1:] # skip the first (blank) line
  1026. def _processHeader(self, parent_elem, paragraph):
  1027. m = RE.regExp['header'].match(paragraph[0])
  1028. if m:
  1029. level = len(m.group(1))
  1030. h = self.doc.createElement("h%d" % level)
  1031. parent_elem.appendChild(h)
  1032. for item in self._handleInline(m.group(2).strip()):
  1033. h.appendChild(item)
  1034. else:
  1035. message(CRITICAL, "We've got a problem header!")
  1036. def _processParagraph(self, parent_elem, paragraph, inList, looseList):
  1037. list = self._handleInline("\n".join(paragraph))
  1038. if ( parent_elem.nodeName == 'li'
  1039. and not (looseList or parent_elem.childNodes)):
  1040. # If this is the first paragraph inside "li", don't
  1041. # put <p> around it - append the paragraph bits directly
  1042. # onto parent_elem
  1043. el = parent_elem
  1044. else:
  1045. # Otherwise make a "p" element
  1046. el = self.doc.createElement("p")
  1047. parent_elem.appendChild(el)
  1048. for item in list:
  1049. el.appendChild(item)
  1050. def _processUList(self, parent_elem, lines, inList):
  1051. self._processList(parent_elem, lines, inList,
  1052. listexpr='ul', tag = 'ul')
  1053. def _processOList(self, parent_elem, lines, inList):
  1054. self._processList(parent_elem, lines, inList,
  1055. listexpr='ol', tag = 'ol')
  1056. def _processList(self, parent_elem, lines, inList, listexpr, tag):
  1057. """Given a list of document lines starting with a list item,
  1058. finds the end of the list, breaks it up, and recursively
  1059. processes each list item and the remainder of the text file.
  1060. @param parent_elem: A dom element to which the content will be added
  1061. @param lines: a list of lines
  1062. @param inList: a level
  1063. @returns: None"""
  1064. ul = self.doc.createElement(tag) # ul might actually be '<ol>'
  1065. parent_elem.appendChild(ul)
  1066. looseList = 0
  1067. # Make a list of list items
  1068. items = []
  1069. item = -1
  1070. i = 0 # a counter to keep track of where we are
  1071. for line in lines:
  1072. loose = 0
  1073. if not line.strip():
  1074. # If we see a blank line, this _might_ be the end of the list
  1075. i += 1
  1076. loose = 1
  1077. # Find the next non-blank line
  1078. for j in range(i, len(lines)):
  1079. if lines[j].strip():
  1080. next = lines[j]
  1081. break
  1082. else:
  1083. # There is no more text => end of the list
  1084. break
  1085. # Check if the next non-blank line is still a part of the list
  1086. if ( RE.regExp['ul'].match(next) or
  1087. RE.regExp['ol'].match(next) or
  1088. RE.regExp['tabbed'].match(next) ):
  1089. # get rid of any white space in the line
  1090. items[item].append(line.strip())
  1091. looseList = loose or looseList
  1092. continue
  1093. else:
  1094. break # found end of the list
  1095. # Now we need to detect list items (at the current level)
  1096. # while also detabing child elements if necessary
  1097. for expr in ['ul', 'ol', 'tabbed']:
  1098. m = RE.regExp[expr].match(line)
  1099. if m:
  1100. if expr in ['ul', 'ol']: # We are looking at a new item
  1101. #if m.group(1) :
  1102. # Removed the check to allow for a blank line
  1103. # at the beginning of the list item
  1104. items.append([m.group(1)])
  1105. item += 1
  1106. elif expr == 'tabbed': # This line needs to be detabbed
  1107. items[item].append(m.group(4)) #after the 'tab'
  1108. i += 1
  1109. break
  1110. else:
  1111. items[item].append(line) # Just regular continuation
  1112. i += 1 # added on 2006.02.25
  1113. else:
  1114. i += 1
  1115. # Add the dom elements
  1116. for item in items:
  1117. li = self.doc.createElement("li")
  1118. ul.appendChild(li)
  1119. self._processSection(li, item, inList + 1, looseList = looseList)
  1120. # Process the remaining part of the section
  1121. self._processSection(parent_elem, lines[i:], inList)
  1122. def _linesUntil(self, lines, condition):
  1123. """ A utility function to break a list of lines upon the
  1124. first line that satisfied a condition. The condition
  1125. argument should be a predicate function.
  1126. """
  1127. i = -1
  1128. for line in lines:
  1129. i += 1
  1130. if condition(line): break
  1131. else:
  1132. i += 1
  1133. return lines[:i], lines[i:]
  1134. def _processQuote(self, parent_elem, lines, inList):
  1135. """Given a list of document lines starting with a quote finds
  1136. the end of the quote, unindents it and recursively
  1137. processes the body of the quote and the remainder of the
  1138. text file.

Large files files are truncated, but you can click here to view the full file