PageRenderTime 51ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/marave/plugins/smarty.py

http://marave.googlecode.com/
Python | 1017 lines | 857 code | 94 blank | 66 comment | 128 complexity | a3e033fdeb1813de4f7544533309f6c3 MD5 | raw file
Possible License(s): GPL-2.0, BSD-2-Clause
  1. # -*- coding: utf-8 -*-
  2. from plugins import Plugin
  3. from PyQt4 import QtGui
  4. class Smarty(Plugin):
  5. name='smarty'
  6. shortcut='Ctrl+.'
  7. description='Smart quote and dash replacement'
  8. mode="qBde"
  9. @classmethod
  10. def addConfigWidgets(self, dialog):
  11. print 'Adding widgets to smarty config'
  12. l=dialog.ui.layout
  13. self.q=QtGui.QCheckBox(dialog.tr('Replace normal quotes'))
  14. if 'q' in self.mode:
  15. self.q.setChecked(True)
  16. self.b=QtGui.QCheckBox(dialog.tr('Replace backtick-style quotes (` and ``)'))
  17. if 'B' in self.mode:
  18. self.b.setChecked(True)
  19. self.d=QtGui.QCheckBox(dialog.tr('Replace -- by en-dash, --- by em-dash'))
  20. if 'd' in self.mode:
  21. self.d.setChecked(True)
  22. self.e=QtGui.QCheckBox(dialog.tr('Replace ellipses'))
  23. if 'e' in self.mode:
  24. self.e.setChecked(True)
  25. l.addWidget(self.q)
  26. l.addWidget(self.b)
  27. l.addWidget(self.d)
  28. l.addWidget(self.e)
  29. @classmethod
  30. def loadConfig(self):
  31. print 'SMARTY loadconfig', self.settings
  32. if self.settings:
  33. sc=self.settings.value('plugin-'+self.name+'-shortcut')
  34. if sc.isValid():
  35. self.shortcut=unicode(sc.toString())
  36. mode=self.settings.value('plugin-smarty-mode')
  37. if mode.isValid():
  38. self.mode=unicode(mode.toString())
  39. @classmethod
  40. def saveConfig(self, dialog):
  41. self.shortcut=unicode(dialog.ui.shortcut.text())
  42. self.settings.setValue('plugin-'+self.name+'-shortcut', self.shortcut)
  43. newmode=""
  44. if self.q.isChecked():
  45. newmode+='q'
  46. if self.b.isChecked():
  47. newmode+='B'
  48. if self.d.isChecked():
  49. newmode+='d'
  50. if self.e.isChecked():
  51. newmode+='e'
  52. self.mode=newmode
  53. self.settings.setValue('plugin-smarty-mode',self.mode)
  54. self.settings.sync()
  55. def run(self):
  56. print 'running smarty plugin'
  57. text=unicode(self.client.editor.toPlainText()).splitlines()
  58. prog=QtGui.QProgressDialog(self.client.tr("Applying smarty"),
  59. self.client.tr("Cancel"),
  60. 0,len(text),
  61. self.client)
  62. prog.show()
  63. output=[]
  64. for i,l in enumerate(text):
  65. output.append(unescape(smartyPants(l,self.mode)))
  66. prog.setValue(i)
  67. QtGui.QApplication.instance().processEvents()
  68. prog.hide()
  69. self.client.editor.setPlainText('\n'.join(output))
  70. # This function is by Fredrik Lundh
  71. import re, htmlentitydefs
  72. ##
  73. # Removes HTML or XML character references and entities from a text string.
  74. #
  75. # @param text The HTML (or XML) source text.
  76. # @return The plain text, as a Unicode string, if necessary.
  77. def unescape(text):
  78. def fixup(m):
  79. text = m.group(0)
  80. if text[:2] == "&#":
  81. # character reference
  82. try:
  83. if text[:3] == "&#x":
  84. return unichr(int(text[3:-1], 16))
  85. else:
  86. return unichr(int(text[2:-1]))
  87. except ValueError:
  88. pass
  89. else:
  90. # named entity
  91. try:
  92. text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
  93. except KeyError:
  94. pass
  95. return text # leave as is
  96. return re.sub("&#?\w+;", fixup, text)
  97. # Below this point, this is a copy of smartypants.py, from
  98. # http://web.chad.org/projects/smartypants.py/
  99. #!/usr/bin/python
  100. r"""
  101. ==============
  102. smartypants.py
  103. ==============
  104. ----------------------------
  105. SmartyPants ported to Python
  106. ----------------------------
  107. Ported by `Chad Miller`_
  108. Copyright (c) 2004, 2007 Chad Miller
  109. original `SmartyPants`_ by `John Gruber`_
  110. Copyright (c) 2003 John Gruber
  111. Synopsis
  112. ========
  113. A smart-quotes plugin for Pyblosxom_.
  114. The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
  115. Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
  116. into "smart" typographic punctuation HTML entities.
  117. This software, *smartypants.py*, endeavours to be a functional port of
  118. SmartyPants to Python, for use with Pyblosxom_.
  119. Description
  120. ===========
  121. SmartyPants can perform the following transformations:
  122. - Straight quotes ( " and ' ) into "curly" quote HTML entities
  123. - Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
  124. - Dashes (``--`` and ``---``) into en- and em-dash entities
  125. - Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
  126. This means you can write, edit, and save your posts using plain old
  127. ASCII straight quotes, plain dashes, and plain dots, but your published
  128. posts (and final HTML output) will appear with smart quotes, em-dashes,
  129. and proper ellipses.
  130. SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
  131. ``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
  132. display text where smart quotes and other "smart punctuation" would not be
  133. appropriate, such as source code or example markup.
  134. Backslash Escapes
  135. =================
  136. If you need to use literal straight quotes (or plain hyphens and
  137. periods), SmartyPants accepts the following backslash escape sequences
  138. to force non-smart punctuation. It does so by transforming the escape
  139. sequence into a decimal-encoded HTML entity:
  140. (FIXME: table here.)
  141. .. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
  142. .. comment ====== ===== =========
  143. .. comment Escape Value Character
  144. .. comment ====== ===== =========
  145. .. comment \\\\\\\\ &#92; \\\\
  146. .. comment \\\\" &#34; "
  147. .. comment \\\\' &#39; '
  148. .. comment \\\\. &#46; .
  149. .. comment \\\\- &#45; \-
  150. .. comment \\\\` &#96; \`
  151. .. comment ====== ===== =========
  152. This is useful, for example, when you want to use straight quotes as
  153. foot and inch marks: 6'2" tall; a 17" iMac.
  154. Options
  155. =======
  156. For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
  157. specify configuration options.
  158. Numeric values are the easiest way to configure SmartyPants' behavior:
  159. "0"
  160. Suppress all transformations. (Do nothing.)
  161. "1"
  162. Performs default SmartyPants transformations: quotes (including
  163. \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
  164. is used to signify an em-dash; there is no support for en-dashes.
  165. "2"
  166. Same as smarty_pants="1", except that it uses the old-school typewriter
  167. shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
  168. (dash dash dash)
  169. for em-dashes.
  170. "3"
  171. Same as smarty_pants="2", but inverts the shorthand for dashes:
  172. "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
  173. en-dashes.
  174. "-1"
  175. Stupefy mode. Reverses the SmartyPants transformation process, turning
  176. the HTML entities produced by SmartyPants into their ASCII equivalents.
  177. E.g. "&#8220;" is turned into a simple double-quote ("), "&#8212;" is
  178. turned into two dashes, etc.
  179. The following single-character attribute values can be combined to toggle
  180. individual transformations from within the smarty_pants attribute. For
  181. example, to educate normal quotes and em-dashes, but not ellipses or
  182. \`\`backticks'' -style quotes:
  183. ``py['smartypants_attributes'] = "1"``
  184. "q"
  185. Educates normal quote characters: (") and (').
  186. "b"
  187. Educates \`\`backticks'' -style double quotes.
  188. "B"
  189. Educates \`\`backticks'' -style double quotes and \`single' quotes.
  190. "d"
  191. Educates em-dashes.
  192. "D"
  193. Educates em-dashes and en-dashes, using old-school typewriter shorthand:
  194. (dash dash) for en-dashes, (dash dash dash) for em-dashes.
  195. "i"
  196. Educates em-dashes and en-dashes, using inverted old-school typewriter
  197. shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
  198. "e"
  199. Educates ellipses.
  200. "w"
  201. Translates any instance of ``&quot;`` into a normal double-quote character.
  202. This should be of no interest to most people, but of particular interest
  203. to anyone who writes their posts using Dreamweaver, as Dreamweaver
  204. inexplicably uses this entity to represent a literal double-quote
  205. character. SmartyPants only educates normal quotes, not entities (because
  206. ordinarily, entities are used for the explicit purpose of representing the
  207. specific character they represent). The "w" option must be used in
  208. conjunction with one (or both) of the other quote options ("q" or "b").
  209. Thus, if you wish to apply all SmartyPants transformations (quotes, en-
  210. and em-dashes, and ellipses) and also translate ``&quot;`` entities into
  211. regular quotes so SmartyPants can educate them, you should pass the
  212. following to the smarty_pants attribute:
  213. The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
  214. which no Smarty Pants rendering will occur.
  215. Caveats
  216. =======
  217. Why You Might Not Want to Use Smart Quotes in Your Weblog
  218. ---------------------------------------------------------
  219. For one thing, you might not care.
  220. Most normal, mentally stable individuals do not take notice of proper
  221. typographic punctuation. Many design and typography nerds, however, break
  222. out in a nasty rash when they encounter, say, a restaurant sign that uses
  223. a straight apostrophe to spell "Joe's".
  224. If you're the sort of person who just doesn't care, you might well want to
  225. continue not caring. Using straight quotes -- and sticking to the 7-bit
  226. ASCII character set in general -- is certainly a simpler way to live.
  227. Even if you I *do* care about accurate typography, you still might want to
  228. think twice before educating the quote characters in your weblog. One side
  229. effect of publishing curly quote HTML entities is that it makes your
  230. weblog a bit harder for others to quote from using copy-and-paste. What
  231. happens is that when someone copies text from your blog, the copied text
  232. contains the 8-bit curly quote characters (as well as the 8-bit characters
  233. for em-dashes and ellipses, if you use these options). These characters
  234. are not standard across different text encoding methods, which is why they
  235. need to be encoded as HTML entities.
  236. People copying text from your weblog, however, may not notice that you're
  237. using curly quotes, and they'll go ahead and paste the unencoded 8-bit
  238. characters copied from their browser into an email message or their own
  239. weblog. When pasted as raw "smart quotes", these characters are likely to
  240. get mangled beyond recognition.
  241. That said, my own opinion is that any decent text editor or email client
  242. makes it easy to stupefy smart quote characters into their 7-bit
  243. equivalents, and I don't consider it my problem if you're using an
  244. indecent text editor or email client.
  245. Algorithmic Shortcomings
  246. ------------------------
  247. One situation in which quotes will get curled the wrong way is when
  248. apostrophes are used at the start of leading contractions. For example:
  249. ``'Twas the night before Christmas.``
  250. In the case above, SmartyPants will turn the apostrophe into an opening
  251. single-quote, when in fact it should be a closing one. I don't think
  252. this problem can be solved in the general case -- every word processor
  253. I've tried gets this wrong as well. In such cases, it's best to use the
  254. proper HTML entity for closing single-quotes (``&#8217;``) by hand.
  255. Bugs
  256. ====
  257. To file bug reports or feature requests (other than topics listed in the
  258. Caveats section above) please send email to: mailto:smartypantspy@chad.org
  259. If the bug involves quotes being curled the wrong way, please send example
  260. text to illustrate.
  261. To Do list
  262. ----------
  263. - Provide a function for use within templates to quote anything at all.
  264. Version History
  265. ===============
  266. 1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
  267. - Fixed bug where blocks of precious unalterable text was instead
  268. interpreted. Thanks to Le Roux and Dirk van Oosterbosch.
  269. 1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
  270. - Fix bogus magical quotation when there is no hint that the
  271. user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
  272. - Be smarter about quotes before terminating numbers in an en-dash'ed
  273. range.
  274. 1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
  275. - Fix a date-processing bug, as reported by jacob childress.
  276. - Begin a test-suite for ensuring correct output.
  277. - Removed import of "string", since I didn't really need it.
  278. (This was my first every Python program. Sue me!)
  279. 1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
  280. - Abort processing if the flavour is in forbidden-list. Default of
  281. [ "rss" ] (Idea of Wolfgang SCHNERRING.)
  282. - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
  283. 1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
  284. - Some single quotes weren't replaced properly. Diff-tesuji played
  285. by Benjamin GEIGER.
  286. 1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
  287. - Support upcoming pyblosxom 0.9 plugin verification feature.
  288. 1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
  289. - Initial release
  290. Version Information
  291. -------------------
  292. Version numbers will track the SmartyPants_ version numbers, with the addition
  293. of an underscore and the smartypants.py version on the end.
  294. New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
  295. .. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
  296. Authors
  297. =======
  298. `John Gruber`_ did all of the hard work of writing this software in Perl for
  299. `Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
  300. ported it to Python to use with Pyblosxom_.
  301. Additional Credits
  302. ==================
  303. Portions of the SmartyPants original work are based on Brad Choate's nifty
  304. MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
  305. this plug-in. Brad Choate is a fine hacker indeed.
  306. `Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
  307. testing of the original SmartyPants.
  308. `Rael Dornfest`_ ported SmartyPants to Blosxom.
  309. .. _Brad Choate: http://bradchoate.com/
  310. .. _Jeremy Hedley: http://antipixel.com/
  311. .. _Charles Wiltgen: http://playbacktime.com/
  312. .. _Rael Dornfest: http://raelity.org/
  313. Copyright and License
  314. =====================
  315. SmartyPants_ license::
  316. Copyright (c) 2003 John Gruber
  317. (http://daringfireball.net/)
  318. All rights reserved.
  319. Redistribution and use in source and binary forms, with or without
  320. modification, are permitted provided that the following conditions are
  321. met:
  322. * Redistributions of source code must retain the above copyright
  323. notice, this list of conditions and the following disclaimer.
  324. * Redistributions in binary form must reproduce the above copyright
  325. notice, this list of conditions and the following disclaimer in
  326. the documentation and/or other materials provided with the
  327. distribution.
  328. * Neither the name "SmartyPants" nor the names of its contributors
  329. may be used to endorse or promote products derived from this
  330. software without specific prior written permission.
  331. This software is provided by the copyright holders and contributors "as
  332. is" and any express or implied warranties, including, but not limited
  333. to, the implied warranties of merchantability and fitness for a
  334. particular purpose are disclaimed. In no event shall the copyright
  335. owner or contributors be liable for any direct, indirect, incidental,
  336. special, exemplary, or consequential damages (including, but not
  337. limited to, procurement of substitute goods or services; loss of use,
  338. data, or profits; or business interruption) however caused and on any
  339. theory of liability, whether in contract, strict liability, or tort
  340. (including negligence or otherwise) arising in any way out of the use
  341. of this software, even if advised of the possibility of such damage.
  342. smartypants.py license::
  343. smartypants.py is a derivative work of SmartyPants.
  344. Redistribution and use in source and binary forms, with or without
  345. modification, are permitted provided that the following conditions are
  346. met:
  347. * Redistributions of source code must retain the above copyright
  348. notice, this list of conditions and the following disclaimer.
  349. * Redistributions in binary form must reproduce the above copyright
  350. notice, this list of conditions and the following disclaimer in
  351. the documentation and/or other materials provided with the
  352. distribution.
  353. This software is provided by the copyright holders and contributors "as
  354. is" and any express or implied warranties, including, but not limited
  355. to, the implied warranties of merchantability and fitness for a
  356. particular purpose are disclaimed. In no event shall the copyright
  357. owner or contributors be liable for any direct, indirect, incidental,
  358. special, exemplary, or consequential damages (including, but not
  359. limited to, procurement of substitute goods or services; loss of use,
  360. data, or profits; or business interruption) however caused and on any
  361. theory of liability, whether in contract, strict liability, or tort
  362. (including negligence or otherwise) arising in any way out of the use
  363. of this software, even if advised of the possibility of such damage.
  364. .. _John Gruber: http://daringfireball.net/
  365. .. _Chad Miller: http://web.chad.org/
  366. .. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
  367. .. _SmartyPants: http://daringfireball.net/projects/smartypants/
  368. .. _Movable Type: http://www.movabletype.org/
  369. """
  370. default_smartypants_attr = "1"
  371. import re
  372. tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I)
  373. def verify_installation(request):
  374. return 1
  375. # assert the plugin is functional
  376. def cb_story(args):
  377. global default_smartypants_attr
  378. try:
  379. forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"]
  380. except KeyError:
  381. forbidden_flavours = [ "rss" ]
  382. try:
  383. attributes = args["entry"]["smartypants_attributes"]
  384. except KeyError:
  385. attributes = default_smartypants_attr
  386. if attributes is None:
  387. attributes = default_smartypants_attr
  388. entryData = args["entry"].getData()
  389. try:
  390. if args["request"]["flavour"] in forbidden_flavours:
  391. return
  392. except KeyError:
  393. if "&lt;" in args["entry"]["body"][0:15]: # sniff the stream
  394. return # abort if it looks like escaped HTML. FIXME
  395. # FIXME: make these configurable, perhaps?
  396. args["entry"]["body"] = smartyPants(entryData, attributes)
  397. args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes)
  398. ### interal functions below here
  399. def smartyPants(text, attr=default_smartypants_attr):
  400. convert_quot = False # should we translate &quot; entities into normal quotes?
  401. # Parse attributes:
  402. # 0 : do nothing
  403. # 1 : set all
  404. # 2 : set all, using old school en- and em- dash shortcuts
  405. # 3 : set all, using inverted old school en and em- dash shortcuts
  406. #
  407. # q : quotes
  408. # b : backtick quotes (``double'' only)
  409. # B : backtick quotes (``double'' and `single')
  410. # d : dashes
  411. # D : old school dashes
  412. # i : inverted old school dashes
  413. # e : ellipses
  414. # w : convert &quot; entities to " for Dreamweaver users
  415. skipped_tag_stack = []
  416. do_dashes = "0"
  417. do_backticks = "0"
  418. do_quotes = "0"
  419. do_ellipses = "0"
  420. do_stupefy = "0"
  421. if attr == "0":
  422. # Do nothing.
  423. return text
  424. elif attr == "1":
  425. do_quotes = "1"
  426. do_backticks = "1"
  427. do_dashes = "1"
  428. do_ellipses = "1"
  429. elif attr == "2":
  430. # Do everything, turn all options on, use old school dash shorthand.
  431. do_quotes = "1"
  432. do_backticks = "1"
  433. do_dashes = "2"
  434. do_ellipses = "1"
  435. elif attr == "3":
  436. # Do everything, turn all options on, use inverted old school dash shorthand.
  437. do_quotes = "1"
  438. do_backticks = "1"
  439. do_dashes = "3"
  440. do_ellipses = "1"
  441. elif attr == "-1":
  442. # Special "stupefy" mode.
  443. do_stupefy = "1"
  444. else:
  445. for c in attr:
  446. if c == "q": do_quotes = "1"
  447. elif c == "b": do_backticks = "1"
  448. elif c == "B": do_backticks = "2"
  449. elif c == "d": do_dashes = "1"
  450. elif c == "D": do_dashes = "2"
  451. elif c == "i": do_dashes = "3"
  452. elif c == "e": do_ellipses = "1"
  453. elif c == "w": convert_quot = "1"
  454. else:
  455. pass
  456. # ignore unknown option
  457. tokens = _tokenize(text)
  458. result = []
  459. in_pre = False
  460. prev_token_last_char = ""
  461. # This is a cheat, used to get some context
  462. # for one-character tokens that consist of
  463. # just a quote char. What we do is remember
  464. # the last character of the previous text
  465. # token, to use as context to curl single-
  466. # character quote tokens correctly.
  467. for cur_token in tokens:
  468. if cur_token[0] == "tag":
  469. # Don't mess with quotes inside some tags. This does not handle self <closing/> tags!
  470. result.append(cur_token[1])
  471. skip_match = tags_to_skip_regex.match(cur_token[1])
  472. if skip_match is not None:
  473. if not skip_match.group(1):
  474. skipped_tag_stack.append(skip_match.group(2).lower())
  475. in_pre = True
  476. else:
  477. if len(skipped_tag_stack) > 0:
  478. if skip_match.group(2).lower() == skipped_tag_stack[-1]:
  479. skipped_tag_stack.pop()
  480. else:
  481. pass
  482. # This close doesn't match the open. This isn't XHTML. We should barf here.
  483. if len(skipped_tag_stack) == 0:
  484. in_pre = False
  485. else:
  486. t = cur_token[1]
  487. last_char = t[-1:] # Remember last char of this token before processing.
  488. if not in_pre:
  489. oldstr = t
  490. t = processEscapes(t)
  491. if convert_quot != "0":
  492. t = re.sub('&quot;', '"', t)
  493. if do_dashes != "0":
  494. if do_dashes == "1":
  495. t = educateDashes(t)
  496. if do_dashes == "2":
  497. t = educateDashesOldSchool(t)
  498. if do_dashes == "3":
  499. t = educateDashesOldSchoolInverted(t)
  500. if do_ellipses != "0":
  501. t = educateEllipses(t)
  502. # Note: backticks need to be processed before quotes.
  503. if do_backticks != "0":
  504. t = educateBackticks(t)
  505. if do_backticks == "2":
  506. t = educateSingleBackticks(t)
  507. if do_quotes != "0":
  508. if t == "'":
  509. # Special case: single-character ' token
  510. if re.match("\S", prev_token_last_char):
  511. t = "&#8217;"
  512. else:
  513. t = "&#8216;"
  514. elif t == '"':
  515. # Special case: single-character " token
  516. if re.match("\S", prev_token_last_char):
  517. t = "&#8221;"
  518. else:
  519. t = "&#8220;"
  520. else:
  521. # Normal case:
  522. t = educateQuotes(t)
  523. if do_stupefy == "1":
  524. t = stupefyEntities(t)
  525. prev_token_last_char = last_char
  526. result.append(t)
  527. return "".join(result)
  528. def educateQuotes(str):
  529. """
  530. Parameter: String.
  531. Returns: The string, with "educated" curly quote HTML entities.
  532. Example input: "Isn't this fun?"
  533. Example output: &#8220;Isn&#8217;t this fun?&#8221;
  534. """
  535. oldstr = str
  536. punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
  537. # Special case if the very first character is a quote
  538. # followed by punctuation at a non-word-break. Close the quotes by brute force:
  539. str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", str)
  540. str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", str)
  541. # Special case for double sets of quotes, e.g.:
  542. # <p>He said, "'Quoted' words in a larger quote."</p>
  543. str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
  544. str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
  545. # Special case for decade abbreviations (the '80s):
  546. str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)
  547. close_class = r"""[^\ \t\r\n\[\{\(\-]"""
  548. dec_dashes = r"""&#8211;|&#8212;"""
  549. # Get most opening single quotes:
  550. opening_single_quotes_regex = re.compile(r"""
  551. (
  552. \s | # a whitespace char, or
  553. &nbsp; | # a non-breaking space entity, or
  554. -- | # dashes, or
  555. &[mn]dash; | # named dash entities
  556. %s | # or decimal entities
  557. &\#x201[34]; # or hex
  558. )
  559. ' # the quote
  560. (?=\w) # followed by a word character
  561. """ % (dec_dashes,), re.VERBOSE)
  562. str = opening_single_quotes_regex.sub(r"""\1&#8216;""", str)
  563. closing_single_quotes_regex = re.compile(r"""
  564. (%s)
  565. '
  566. (?!\s | s\b | \d)
  567. """ % (close_class,), re.VERBOSE)
  568. str = closing_single_quotes_regex.sub(r"""\1&#8217;""", str)
  569. closing_single_quotes_regex = re.compile(r"""
  570. (%s)
  571. '
  572. (\s | s\b)
  573. """ % (close_class,), re.VERBOSE)
  574. str = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", str)
  575. # Any remaining single quotes should be opening ones:
  576. str = re.sub(r"""'""", r"""&#8216;""", str)
  577. # Get most opening double quotes:
  578. opening_double_quotes_regex = re.compile(r"""
  579. (
  580. \s | # a whitespace char, or
  581. &nbsp; | # a non-breaking space entity, or
  582. -- | # dashes, or
  583. &[mn]dash; | # named dash entities
  584. %s | # or decimal entities
  585. &\#x201[34]; # or hex
  586. )
  587. " # the quote
  588. (?=\w) # followed by a word character
  589. """ % (dec_dashes,), re.VERBOSE)
  590. str = opening_double_quotes_regex.sub(r"""\1&#8220;""", str)
  591. # Double closing quotes:
  592. closing_double_quotes_regex = re.compile(r"""
  593. #(%s)? # character that indicates the quote should be closing
  594. "
  595. (?=\s)
  596. """ % (close_class,), re.VERBOSE)
  597. str = closing_double_quotes_regex.sub(r"""&#8221;""", str)
  598. closing_double_quotes_regex = re.compile(r"""
  599. (%s) # character that indicates the quote should be closing
  600. "
  601. """ % (close_class,), re.VERBOSE)
  602. str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str)
  603. # Any remaining quotes should be opening ones.
  604. str = re.sub(r'"', r"""&#8220;""", str)
  605. return str
  606. def educateBackticks(str):
  607. """
  608. Parameter: String.
  609. Returns: The string, with ``backticks'' -style double quotes
  610. translated into HTML curly quote entities.
  611. Example input: ``Isn't this fun?''
  612. Example output: &#8220;Isn't this fun?&#8221;
  613. """
  614. str = re.sub(r"""``""", r"""&#8220;""", str)
  615. str = re.sub(r"""''""", r"""&#8221;""", str)
  616. return str
  617. def educateSingleBackticks(str):
  618. """
  619. Parameter: String.
  620. Returns: The string, with `backticks' -style single quotes
  621. translated into HTML curly quote entities.
  622. Example input: `Isn't this fun?'
  623. Example output: &#8216;Isn&#8217;t this fun?&#8217;
  624. """
  625. str = re.sub(r"""`""", r"""&#8216;""", str)
  626. str = re.sub(r"""'""", r"""&#8217;""", str)
  627. return str
  628. def educateDashes(str):
  629. """
  630. Parameter: String.
  631. Returns: The string, with each instance of "--" translated to
  632. an em-dash HTML entity.
  633. """
  634. str = re.sub(r"""---""", r"""&#8211;""", str) # en (yes, backwards)
  635. str = re.sub(r"""--""", r"""&#8212;""", str) # em (yes, backwards)
  636. return str
  637. def educateDashesOldSchool(str):
  638. """
  639. Parameter: String.
  640. Returns: The string, with each instance of "--" translated to
  641. an en-dash HTML entity, and each "---" translated to
  642. an em-dash HTML entity.
  643. """
  644. str = re.sub(r"""---""", r"""&#8212;""", str) # em (yes, backwards)
  645. str = re.sub(r"""--""", r"""&#8211;""", str) # en (yes, backwards)
  646. return str
  647. def educateDashesOldSchoolInverted(str):
  648. """
  649. Parameter: String.
  650. Returns: The string, with each instance of "--" translated to
  651. an em-dash HTML entity, and each "---" translated to
  652. an en-dash HTML entity. Two reasons why: First, unlike the
  653. en- and em-dash syntax supported by
  654. EducateDashesOldSchool(), it's compatible with existing
  655. entries written before SmartyPants 1.1, back when "--" was
  656. only used for em-dashes. Second, em-dashes are more
  657. common than en-dashes, and so it sort of makes sense that
  658. the shortcut should be shorter to type. (Thanks to Aaron
  659. Swartz for the idea.)
  660. """
  661. str = re.sub(r"""---""", r"""&#8211;""", str) # em
  662. str = re.sub(r"""--""", r"""&#8212;""", str) # en
  663. return str
  664. def educateEllipses(str):
  665. """
  666. Parameter: String.
  667. Returns: The string, with each instance of "..." translated to
  668. an ellipsis HTML entity.
  669. Example input: Huh...?
  670. Example output: Huh&#8230;?
  671. """
  672. str = re.sub(r"""\.\.\.""", r"""&#8230;""", str)
  673. str = re.sub(r"""\. \. \.""", r"""&#8230;""", str)
  674. return str
  675. def stupefyEntities(str):
  676. """
  677. Parameter: String.
  678. Returns: The string, with each SmartyPants HTML entity translated to
  679. its ASCII counterpart.
  680. Example input: &#8220;Hello &#8212; world.&#8221;
  681. Example output: "Hello -- world."
  682. """
  683. str = re.sub(r"""&#8211;""", r"""-""", str) # en-dash
  684. str = re.sub(r"""&#8212;""", r"""--""", str) # em-dash
  685. str = re.sub(r"""&#8216;""", r"""'""", str) # open single quote
  686. str = re.sub(r"""&#8217;""", r"""'""", str) # close single quote
  687. str = re.sub(r"""&#8220;""", r'''"''', str) # open double quote
  688. str = re.sub(r"""&#8221;""", r'''"''', str) # close double quote
  689. str = re.sub(r"""&#8230;""", r"""...""", str)# ellipsis
  690. return str
  691. def processEscapes(str):
  692. r"""
  693. Parameter: String.
  694. Returns: The string, with after processing the following backslash
  695. escape sequences. This is useful if you want to force a "dumb"
  696. quote or other character to appear.
  697. Escape Value
  698. ------ -----
  699. \\ &#92;
  700. \" &#34;
  701. \' &#39;
  702. \. &#46;
  703. \- &#45;
  704. \` &#96;
  705. """
  706. str = re.sub(r"""\\\\""", r"""&#92;""", str)
  707. str = re.sub(r'''\\"''', r"""&#34;""", str)
  708. str = re.sub(r"""\\'""", r"""&#39;""", str)
  709. str = re.sub(r"""\\\.""", r"""&#46;""", str)
  710. str = re.sub(r"""\\-""", r"""&#45;""", str)
  711. str = re.sub(r"""\\`""", r"""&#96;""", str)
  712. return str
  713. def _tokenize(str):
  714. """
  715. Parameter: String containing HTML markup.
  716. Returns: Reference to an array of the tokens comprising the input
  717. string. Each token is either a tag (possibly with nested,
  718. tags contained therein, such as <a href="<MTFoo>">, or a
  719. run of text between tags. Each element of the array is a
  720. two-element array; the first is either 'tag' or 'text';
  721. the second is the actual value.
  722. Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
  723. <http://www.bradchoate.com/past/mtregex.php>
  724. """
  725. pos = 0
  726. length = len(str)
  727. tokens = []
  728. depth = 6
  729. nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
  730. #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
  731. # (?: <\? .*? \?> ) | # directives
  732. # %s # nested tags """ % (nested_tags,)
  733. tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
  734. token_match = tag_soup.search(str)
  735. previous_end = 0
  736. while token_match is not None:
  737. if token_match.group(1):
  738. tokens.append(['text', token_match.group(1)])
  739. tokens.append(['tag', token_match.group(2)])
  740. previous_end = token_match.end()
  741. token_match = tag_soup.search(str, token_match.end())
  742. if previous_end < len(str):
  743. tokens.append(['text', str[previous_end:]])
  744. return tokens
  745. if __name__ == "__main__":
  746. import locale
  747. try:
  748. locale.setlocale(locale.LC_ALL, '')
  749. except:
  750. pass
  751. from docutils.core import publish_string
  752. docstring_html = publish_string(__doc__, writer_name='html')
  753. print docstring_html
  754. # Unit test output goes out stderr. No worries.
  755. import unittest
  756. sp = smartyPants
  757. class TestSmartypantsAllAttributes(unittest.TestCase):
  758. # the default attribute is "1", which means "all".
  759. def test_dates(self):
  760. self.assertEqual(sp("1440-80's"), "1440-80&#8217;s")
  761. self.assertEqual(sp("1440-'80s"), "1440-&#8216;80s")
  762. self.assertEqual(sp("1440---'80s"), "1440&#8211;&#8216;80s")
  763. self.assertEqual(sp("1960s"), "1960s") # no effect.
  764. self.assertEqual(sp("1960's"), "1960&#8217;s")
  765. self.assertEqual(sp("one two '60s"), "one two &#8216;60s")
  766. self.assertEqual(sp("'60s"), "&#8216;60s")
  767. def test_skip_tags(self):
  768. self.assertEqual(
  769. sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""),
  770. """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""")
  771. self.assertEqual(
  772. sp("""<p>He said &quot;Let's write some code.&quot; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>"""),
  773. """<p>He said &#8220;Let&#8217;s write some code.&#8221; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>""")
  774. def test_ordinal_numbers(self):
  775. self.assertEqual(sp("21st century"), "21st century") # no effect.
  776. self.assertEqual(sp("3rd"), "3rd") # no effect.
  777. def test_educated_quotes(self):
  778. self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
  779. unittest.main()
  780. __author__ = "Chad Miller <smartypantspy@chad.org>"
  781. __version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400"
  782. __url__ = "http://wiki.chad.org/SmartyPantsPy"
  783. __description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"