PageRenderTime 41ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/sphinx/util/smartypants.py

https://bitbucket.org/aohta/sphinx
Python | 299 lines | 222 code | 22 blank | 55 comment | 0 complexity | 21322106b68baa26532940ce17ac19df MD5 | raw file
Possible License(s): BSD-2-Clause
  1. r"""
  2. This is based on SmartyPants.py by `Chad Miller`_ <smartypantspy@chad.org>,
  3. version 1.5_1.6.
  4. Copyright and License
  5. =====================
  6. SmartyPants_ license::
  7. Copyright (c) 2003 John Gruber
  8. (http://daringfireball.net/)
  9. All rights reserved.
  10. Redistribution and use in source and binary forms, with or without
  11. modification, are permitted provided that the following conditions are
  12. met:
  13. * Redistributions of source code must retain the above copyright
  14. notice, this list of conditions and the following disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. notice, this list of conditions and the following disclaimer in
  17. the documentation and/or other materials provided with the
  18. distribution.
  19. * Neither the name "SmartyPants" nor the names of its contributors
  20. may be used to endorse or promote products derived from this
  21. software without specific prior written permission.
  22. This software is provided by the copyright holders and contributors "as
  23. is" and any express or implied warranties, including, but not limited
  24. to, the implied warranties of merchantability and fitness for a
  25. particular purpose are disclaimed. In no event shall the copyright
  26. owner or contributors be liable for any direct, indirect, incidental,
  27. special, exemplary, or consequential damages (including, but not
  28. limited to, procurement of substitute goods or services; loss of use,
  29. data, or profits; or business interruption) however caused and on any
  30. theory of liability, whether in contract, strict liability, or tort
  31. (including negligence or otherwise) arising in any way out of the use
  32. of this software, even if advised of the possibility of such damage.
  33. smartypants.py license::
  34. smartypants.py is a derivative work of SmartyPants.
  35. Redistribution and use in source and binary forms, with or without
  36. modification, are permitted provided that the following conditions are
  37. met:
  38. * Redistributions of source code must retain the above copyright
  39. notice, this list of conditions and the following disclaimer.
  40. * Redistributions in binary form must reproduce the above copyright
  41. notice, this list of conditions and the following disclaimer in
  42. the documentation and/or other materials provided with the
  43. distribution.
  44. This software is provided by the copyright holders and contributors "as
  45. is" and any express or implied warranties, including, but not limited
  46. to, the implied warranties of merchantability and fitness for a
  47. particular purpose are disclaimed. In no event shall the copyright
  48. owner or contributors be liable for any direct, indirect, incidental,
  49. special, exemplary, or consequential damages (including, but not
  50. limited to, procurement of substitute goods or services; loss of use,
  51. data, or profits; or business interruption) however caused and on any
  52. theory of liability, whether in contract, strict liability, or tort
  53. (including negligence or otherwise) arising in any way out of the use
  54. of this software, even if advised of the possibility of such damage.
  55. .. _Chad Miller: http://web.chad.org/
  56. """
  57. import re
  58. def sphinx_smarty_pants(t):
  59. t = t.replace('&quot;', '"')
  60. t = educate_dashes_oldschool(t)
  61. t = educate_quotes(t)
  62. t = t.replace('"', '&quot;')
  63. return t
  64. # Constants for quote education.
  65. punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
  66. end_of_word_class = r"""[\s.,;:!?)]"""
  67. close_class = r"""[^\ \t\r\n\[\{\(\-]"""
  68. dec_dashes = r"""&#8211;|&#8212;"""
  69. # Special case if the very first character is a quote
  70. # followed by punctuation at a non-word-break. Close the quotes by brute force:
  71. single_quote_start_re = re.compile(r"""^'(?=%s\\B)""" % (punct_class,))
  72. double_quote_start_re = re.compile(r"""^"(?=%s\\B)""" % (punct_class,))
  73. # Special case for double sets of quotes, e.g.:
  74. # <p>He said, "'Quoted' words in a larger quote."</p>
  75. double_quote_sets_re = re.compile(r""""'(?=\w)""")
  76. single_quote_sets_re = re.compile(r"""'"(?=\w)""")
  77. # Special case for decade abbreviations (the '80s):
  78. decade_abbr_re = re.compile(r"""\b'(?=\d{2}s)""")
  79. # Get most opening double quotes:
  80. opening_double_quotes_regex = re.compile(r"""
  81. (
  82. \s | # a whitespace char, or
  83. &nbsp; | # a non-breaking space entity, or
  84. -- | # dashes, or
  85. &[mn]dash; | # named dash entities
  86. %s | # or decimal entities
  87. &\#x201[34]; # or hex
  88. )
  89. " # the quote
  90. (?=\w) # followed by a word character
  91. """ % (dec_dashes,), re.VERBOSE)
  92. # Double closing quotes:
  93. closing_double_quotes_regex = re.compile(r"""
  94. #(%s)? # character that indicates the quote should be closing
  95. "
  96. (?=%s)
  97. """ % (close_class, end_of_word_class), re.VERBOSE)
  98. closing_double_quotes_regex_2 = re.compile(r"""
  99. (%s) # character that indicates the quote should be closing
  100. "
  101. """ % (close_class,), re.VERBOSE)
  102. # Get most opening single quotes:
  103. opening_single_quotes_regex = re.compile(r"""
  104. (
  105. \s | # a whitespace char, or
  106. &nbsp; | # a non-breaking space entity, or
  107. -- | # dashes, or
  108. &[mn]dash; | # named dash entities
  109. %s | # or decimal entities
  110. &\#x201[34]; # or hex
  111. )
  112. ' # the quote
  113. (?=\w) # followed by a word character
  114. """ % (dec_dashes,), re.VERBOSE)
  115. closing_single_quotes_regex = re.compile(r"""
  116. (%s)
  117. '
  118. (?!\s | s\b | \d)
  119. """ % (close_class,), re.VERBOSE)
  120. closing_single_quotes_regex_2 = re.compile(r"""
  121. (%s)
  122. '
  123. (\s | s\b)
  124. """ % (close_class,), re.VERBOSE)
  125. def educate_quotes(s):
  126. """
  127. Parameter: String.
  128. Returns: The string, with "educated" curly quote HTML entities.
  129. Example input: "Isn't this fun?"
  130. Example output: &#8220;Isn&#8217;t this fun?&#8221;
  131. """
  132. # Special case if the very first character is a quote
  133. # followed by punctuation at a non-word-break. Close the quotes
  134. # by brute force:
  135. s = single_quote_start_re.sub("&#8217;", s)
  136. s = double_quote_start_re.sub("&#8221;", s)
  137. # Special case for double sets of quotes, e.g.:
  138. # <p>He said, "'Quoted' words in a larger quote."</p>
  139. s = double_quote_sets_re.sub("&#8220;&#8216;", s)
  140. s = single_quote_sets_re.sub("&#8216;&#8220;", s)
  141. # Special case for decade abbreviations (the '80s):
  142. s = decade_abbr_re.sub("&#8217;", s)
  143. s = opening_single_quotes_regex.sub(r"\1&#8216;", s)
  144. s = closing_single_quotes_regex.sub(r"\1&#8217;", s)
  145. s = closing_single_quotes_regex_2.sub(r"\1&#8217;\2", s)
  146. # Any remaining single quotes should be opening ones:
  147. s = s.replace("'", "&#8216;")
  148. s = opening_double_quotes_regex.sub(r"\1&#8220;", s)
  149. s = closing_double_quotes_regex.sub(r"&#8221;", s)
  150. s = closing_double_quotes_regex_2.sub(r"\1&#8221;", s)
  151. # Any remaining quotes should be opening ones.
  152. return s.replace('"', "&#8220;")
  153. def educate_quotes_latex(s, dquotes=("``", "''")):
  154. """
  155. Parameter: String.
  156. Returns: The string, with double quotes corrected to LaTeX quotes.
  157. Example input: "Isn't this fun?"
  158. Example output: ``Isn't this fun?'';
  159. """
  160. # Special case if the very first character is a quote
  161. # followed by punctuation at a non-word-break. Close the quotes
  162. # by brute force:
  163. s = single_quote_start_re.sub("\x04", s)
  164. s = double_quote_start_re.sub("\x02", s)
  165. # Special case for double sets of quotes, e.g.:
  166. # <p>He said, "'Quoted' words in a larger quote."</p>
  167. s = double_quote_sets_re.sub("\x01\x03", s)
  168. s = single_quote_sets_re.sub("\x03\x01", s)
  169. # Special case for decade abbreviations (the '80s):
  170. s = decade_abbr_re.sub("\x04", s)
  171. s = opening_single_quotes_regex.sub("\\1\x03", s)
  172. s = closing_single_quotes_regex.sub("\\1\x04", s)
  173. s = closing_single_quotes_regex_2.sub("\\1\x04\\2", s)
  174. # Any remaining single quotes should be opening ones:
  175. s = s.replace("'", "\x03")
  176. s = opening_double_quotes_regex.sub("\\1\x01", s)
  177. s = closing_double_quotes_regex.sub("\x02", s)
  178. s = closing_double_quotes_regex_2.sub("\\1\x02", s)
  179. # Any remaining quotes should be opening ones.
  180. s = s.replace('"', "\x01")
  181. # Finally, replace all helpers with quotes.
  182. return s.replace("\x01", dquotes[0]).replace("\x02", dquotes[1]).\
  183. replace("\x03", "`").replace("\x04", "'")
  184. def educate_backticks(s):
  185. """
  186. Parameter: String.
  187. Returns: The string, with ``backticks'' -style double quotes
  188. translated into HTML curly quote entities.
  189. Example input: ``Isn't this fun?''
  190. Example output: &#8220;Isn't this fun?&#8221;
  191. """
  192. return s.replace("``", "&#8220;").replace("''", "&#8221;")
  193. def educate_single_backticks(s):
  194. """
  195. Parameter: String.
  196. Returns: The string, with `backticks' -style single quotes
  197. translated into HTML curly quote entities.
  198. Example input: `Isn't this fun?'
  199. Example output: &#8216;Isn&#8217;t this fun?&#8217;
  200. """
  201. return s.replace('`', "&#8216;").replace("'", "&#8217;")
  202. def educate_dashes_oldschool(s):
  203. """
  204. Parameter: String.
  205. Returns: The string, with each instance of "--" translated to
  206. an en-dash HTML entity, and each "---" translated to
  207. an em-dash HTML entity.
  208. """
  209. return s.replace('---', "&#8212;").replace('--', "&#8211;")
  210. def educate_dashes_oldschool_inverted(s):
  211. """
  212. Parameter: String.
  213. Returns: The string, with each instance of "--" translated to
  214. an em-dash HTML entity, and each "---" translated to
  215. an en-dash HTML entity. Two reasons why: First, unlike the
  216. en- and em-dash syntax supported by
  217. educate_dashes_oldschool(), it's compatible with existing
  218. entries written before SmartyPants 1.1, back when "--" was
  219. only used for em-dashes. Second, em-dashes are more
  220. common than en-dashes, and so it sort of makes sense that
  221. the shortcut should be shorter to type. (Thanks to Aaron
  222. Swartz for the idea.)
  223. """
  224. return s.replace('---', "&#8211;").replace('--', "&#8212;")
  225. def educate_ellipses(s):
  226. """
  227. Parameter: String.
  228. Returns: The string, with each instance of "..." translated to
  229. an ellipsis HTML entity.
  230. Example input: Huh...?
  231. Example output: Huh&#8230;?
  232. """
  233. return s.replace('...', "&#8230;").replace('. . .', "&#8230;")