/marave/plugins/smarty.py
Python | 1017 lines | 857 code | 94 blank | 66 comment | 128 complexity | a3e033fdeb1813de4f7544533309f6c3 MD5 | raw file
Possible License(s): GPL-2.0, BSD-2-Clause
- # -*- coding: utf-8 -*-
- from plugins import Plugin
- from PyQt4 import QtGui
- class Smarty(Plugin):
- name='smarty'
- shortcut='Ctrl+.'
- description='Smart quote and dash replacement'
- mode="qBde"
- @classmethod
- def addConfigWidgets(self, dialog):
- print 'Adding widgets to smarty config'
- l=dialog.ui.layout
- self.q=QtGui.QCheckBox(dialog.tr('Replace normal quotes'))
- if 'q' in self.mode:
- self.q.setChecked(True)
- self.b=QtGui.QCheckBox(dialog.tr('Replace backtick-style quotes (` and ``)'))
- if 'B' in self.mode:
- self.b.setChecked(True)
- self.d=QtGui.QCheckBox(dialog.tr('Replace -- by en-dash, --- by em-dash'))
- if 'd' in self.mode:
- self.d.setChecked(True)
- self.e=QtGui.QCheckBox(dialog.tr('Replace ellipses'))
- if 'e' in self.mode:
- self.e.setChecked(True)
- l.addWidget(self.q)
- l.addWidget(self.b)
- l.addWidget(self.d)
- l.addWidget(self.e)
- @classmethod
- def loadConfig(self):
- print 'SMARTY loadconfig', self.settings
- if self.settings:
- sc=self.settings.value('plugin-'+self.name+'-shortcut')
- if sc.isValid():
- self.shortcut=unicode(sc.toString())
- mode=self.settings.value('plugin-smarty-mode')
- if mode.isValid():
- self.mode=unicode(mode.toString())
- @classmethod
- def saveConfig(self, dialog):
-
- self.shortcut=unicode(dialog.ui.shortcut.text())
- self.settings.setValue('plugin-'+self.name+'-shortcut', self.shortcut)
-
- newmode=""
- if self.q.isChecked():
- newmode+='q'
- if self.b.isChecked():
- newmode+='B'
- if self.d.isChecked():
- newmode+='d'
- if self.e.isChecked():
- newmode+='e'
- self.mode=newmode
-
- self.settings.setValue('plugin-smarty-mode',self.mode)
- self.settings.sync()
- def run(self):
- print 'running smarty plugin'
- text=unicode(self.client.editor.toPlainText()).splitlines()
- prog=QtGui.QProgressDialog(self.client.tr("Applying smarty"),
- self.client.tr("Cancel"),
- 0,len(text),
- self.client)
- prog.show()
- output=[]
- for i,l in enumerate(text):
- output.append(unescape(smartyPants(l,self.mode)))
- prog.setValue(i)
- QtGui.QApplication.instance().processEvents()
- prog.hide()
- self.client.editor.setPlainText('\n'.join(output))
-
- # This function is by Fredrik Lundh
- import re, htmlentitydefs
- ##
- # Removes HTML or XML character references and entities from a text string.
- #
- # @param text The HTML (or XML) source text.
- # @return The plain text, as a Unicode string, if necessary.
- def unescape(text):
- def fixup(m):
- text = m.group(0)
- if text[:2] == "&#":
- # character reference
- try:
- if text[:3] == "&#x":
- return unichr(int(text[3:-1], 16))
- else:
- return unichr(int(text[2:-1]))
- except ValueError:
- pass
- else:
- # named entity
- try:
- text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
- except KeyError:
- pass
- return text # leave as is
- return re.sub("&#?\w+;", fixup, text)
-
-
- # Below this point, this is a copy of smartypants.py, from
- # http://web.chad.org/projects/smartypants.py/
- #!/usr/bin/python
- r"""
- ==============
- smartypants.py
- ==============
- ----------------------------
- SmartyPants ported to Python
- ----------------------------
- Ported by `Chad Miller`_
- Copyright (c) 2004, 2007 Chad Miller
- original `SmartyPants`_ by `John Gruber`_
- Copyright (c) 2003 John Gruber
- Synopsis
- ========
- A smart-quotes plugin for Pyblosxom_.
- The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
- Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
- into "smart" typographic punctuation HTML entities.
- This software, *smartypants.py*, endeavours to be a functional port of
- SmartyPants to Python, for use with Pyblosxom_.
- Description
- ===========
- SmartyPants can perform the following transformations:
- - Straight quotes ( " and ' ) into "curly" quote HTML entities
- - Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
- - Dashes (``--`` and ``---``) into en- and em-dash entities
- - Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
- This means you can write, edit, and save your posts using plain old
- ASCII straight quotes, plain dashes, and plain dots, but your published
- posts (and final HTML output) will appear with smart quotes, em-dashes,
- and proper ellipses.
- SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
- ``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
- display text where smart quotes and other "smart punctuation" would not be
- appropriate, such as source code or example markup.
- Backslash Escapes
- =================
- If you need to use literal straight quotes (or plain hyphens and
- periods), SmartyPants accepts the following backslash escape sequences
- to force non-smart punctuation. It does so by transforming the escape
- sequence into a decimal-encoded HTML entity:
- (FIXME: table here.)
- .. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
- .. comment ====== ===== =========
- .. comment Escape Value Character
- .. comment ====== ===== =========
- .. comment \\\\\\\\ \ \\\\
- .. comment \\\\" " "
- .. comment \\\\' ' '
- .. comment \\\\. . .
- .. comment \\\\- - \-
- .. comment \\\\` ` \`
- .. comment ====== ===== =========
- This is useful, for example, when you want to use straight quotes as
- foot and inch marks: 6'2" tall; a 17" iMac.
- Options
- =======
- For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
- specify configuration options.
- Numeric values are the easiest way to configure SmartyPants' behavior:
- "0"
- Suppress all transformations. (Do nothing.)
- "1"
- Performs default SmartyPants transformations: quotes (including
- \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
- is used to signify an em-dash; there is no support for en-dashes.
- "2"
- Same as smarty_pants="1", except that it uses the old-school typewriter
- shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
- (dash dash dash)
- for em-dashes.
- "3"
- Same as smarty_pants="2", but inverts the shorthand for dashes:
- "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
- en-dashes.
- "-1"
- Stupefy mode. Reverses the SmartyPants transformation process, turning
- the HTML entities produced by SmartyPants into their ASCII equivalents.
- E.g. "“" is turned into a simple double-quote ("), "—" is
- turned into two dashes, etc.
- The following single-character attribute values can be combined to toggle
- individual transformations from within the smarty_pants attribute. For
- example, to educate normal quotes and em-dashes, but not ellipses or
- \`\`backticks'' -style quotes:
- ``py['smartypants_attributes'] = "1"``
- "q"
- Educates normal quote characters: (") and (').
- "b"
- Educates \`\`backticks'' -style double quotes.
- "B"
- Educates \`\`backticks'' -style double quotes and \`single' quotes.
- "d"
- Educates em-dashes.
- "D"
- Educates em-dashes and en-dashes, using old-school typewriter shorthand:
- (dash dash) for en-dashes, (dash dash dash) for em-dashes.
- "i"
- Educates em-dashes and en-dashes, using inverted old-school typewriter
- shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
- "e"
- Educates ellipses.
- "w"
- Translates any instance of ``"`` into a normal double-quote character.
- This should be of no interest to most people, but of particular interest
- to anyone who writes their posts using Dreamweaver, as Dreamweaver
- inexplicably uses this entity to represent a literal double-quote
- character. SmartyPants only educates normal quotes, not entities (because
- ordinarily, entities are used for the explicit purpose of representing the
- specific character they represent). The "w" option must be used in
- conjunction with one (or both) of the other quote options ("q" or "b").
- Thus, if you wish to apply all SmartyPants transformations (quotes, en-
- and em-dashes, and ellipses) and also translate ``"`` entities into
- regular quotes so SmartyPants can educate them, you should pass the
- following to the smarty_pants attribute:
- The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
- which no Smarty Pants rendering will occur.
- Caveats
- =======
- Why You Might Not Want to Use Smart Quotes in Your Weblog
- ---------------------------------------------------------
- For one thing, you might not care.
- Most normal, mentally stable individuals do not take notice of proper
- typographic punctuation. Many design and typography nerds, however, break
- out in a nasty rash when they encounter, say, a restaurant sign that uses
- a straight apostrophe to spell "Joe's".
- If you're the sort of person who just doesn't care, you might well want to
- continue not caring. Using straight quotes -- and sticking to the 7-bit
- ASCII character set in general -- is certainly a simpler way to live.
- Even if you I *do* care about accurate typography, you still might want to
- think twice before educating the quote characters in your weblog. One side
- effect of publishing curly quote HTML entities is that it makes your
- weblog a bit harder for others to quote from using copy-and-paste. What
- happens is that when someone copies text from your blog, the copied text
- contains the 8-bit curly quote characters (as well as the 8-bit characters
- for em-dashes and ellipses, if you use these options). These characters
- are not standard across different text encoding methods, which is why they
- need to be encoded as HTML entities.
- People copying text from your weblog, however, may not notice that you're
- using curly quotes, and they'll go ahead and paste the unencoded 8-bit
- characters copied from their browser into an email message or their own
- weblog. When pasted as raw "smart quotes", these characters are likely to
- get mangled beyond recognition.
- That said, my own opinion is that any decent text editor or email client
- makes it easy to stupefy smart quote characters into their 7-bit
- equivalents, and I don't consider it my problem if you're using an
- indecent text editor or email client.
- Algorithmic Shortcomings
- ------------------------
- One situation in which quotes will get curled the wrong way is when
- apostrophes are used at the start of leading contractions. For example:
- ``'Twas the night before Christmas.``
- In the case above, SmartyPants will turn the apostrophe into an opening
- single-quote, when in fact it should be a closing one. I don't think
- this problem can be solved in the general case -- every word processor
- I've tried gets this wrong as well. In such cases, it's best to use the
- proper HTML entity for closing single-quotes (``’``) by hand.
- Bugs
- ====
- To file bug reports or feature requests (other than topics listed in the
- Caveats section above) please send email to: mailto:smartypantspy@chad.org
- If the bug involves quotes being curled the wrong way, please send example
- text to illustrate.
- To Do list
- ----------
- - Provide a function for use within templates to quote anything at all.
- Version History
- ===============
- 1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
- - Fixed bug where blocks of precious unalterable text was instead
- interpreted. Thanks to Le Roux and Dirk van Oosterbosch.
-
- 1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
- - Fix bogus magical quotation when there is no hint that the
- user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
- - Be smarter about quotes before terminating numbers in an en-dash'ed
- range.
- 1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
- - Fix a date-processing bug, as reported by jacob childress.
- - Begin a test-suite for ensuring correct output.
- - Removed import of "string", since I didn't really need it.
- (This was my first every Python program. Sue me!)
- 1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
- - Abort processing if the flavour is in forbidden-list. Default of
- [ "rss" ] (Idea of Wolfgang SCHNERRING.)
- - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
- 1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
- - Some single quotes weren't replaced properly. Diff-tesuji played
- by Benjamin GEIGER.
- 1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
- - Support upcoming pyblosxom 0.9 plugin verification feature.
- 1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
- - Initial release
- Version Information
- -------------------
- Version numbers will track the SmartyPants_ version numbers, with the addition
- of an underscore and the smartypants.py version on the end.
- New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
- .. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
- Authors
- =======
- `John Gruber`_ did all of the hard work of writing this software in Perl for
- `Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
- ported it to Python to use with Pyblosxom_.
- Additional Credits
- ==================
- Portions of the SmartyPants original work are based on Brad Choate's nifty
- MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
- this plug-in. Brad Choate is a fine hacker indeed.
- `Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
- testing of the original SmartyPants.
- `Rael Dornfest`_ ported SmartyPants to Blosxom.
- .. _Brad Choate: http://bradchoate.com/
- .. _Jeremy Hedley: http://antipixel.com/
- .. _Charles Wiltgen: http://playbacktime.com/
- .. _Rael Dornfest: http://raelity.org/
- Copyright and License
- =====================
- SmartyPants_ license::
- Copyright (c) 2003 John Gruber
- (http://daringfireball.net/)
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- * Neither the name "SmartyPants" nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
- smartypants.py license::
- smartypants.py is a derivative work of SmartyPants.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
- .. _John Gruber: http://daringfireball.net/
- .. _Chad Miller: http://web.chad.org/
- .. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
- .. _SmartyPants: http://daringfireball.net/projects/smartypants/
- .. _Movable Type: http://www.movabletype.org/
- """
- default_smartypants_attr = "1"
- import re
- tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I)
- def verify_installation(request):
- return 1
- # assert the plugin is functional
- def cb_story(args):
- global default_smartypants_attr
- try:
- forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"]
- except KeyError:
- forbidden_flavours = [ "rss" ]
- try:
- attributes = args["entry"]["smartypants_attributes"]
- except KeyError:
- attributes = default_smartypants_attr
- if attributes is None:
- attributes = default_smartypants_attr
- entryData = args["entry"].getData()
- try:
- if args["request"]["flavour"] in forbidden_flavours:
- return
- except KeyError:
- if "<" in args["entry"]["body"][0:15]: # sniff the stream
- return # abort if it looks like escaped HTML. FIXME
- # FIXME: make these configurable, perhaps?
- args["entry"]["body"] = smartyPants(entryData, attributes)
- args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes)
- ### interal functions below here
- def smartyPants(text, attr=default_smartypants_attr):
- convert_quot = False # should we translate " entities into normal quotes?
- # Parse attributes:
- # 0 : do nothing
- # 1 : set all
- # 2 : set all, using old school en- and em- dash shortcuts
- # 3 : set all, using inverted old school en and em- dash shortcuts
- #
- # q : quotes
- # b : backtick quotes (``double'' only)
- # B : backtick quotes (``double'' and `single')
- # d : dashes
- # D : old school dashes
- # i : inverted old school dashes
- # e : ellipses
- # w : convert " entities to " for Dreamweaver users
- skipped_tag_stack = []
- do_dashes = "0"
- do_backticks = "0"
- do_quotes = "0"
- do_ellipses = "0"
- do_stupefy = "0"
- if attr == "0":
- # Do nothing.
- return text
- elif attr == "1":
- do_quotes = "1"
- do_backticks = "1"
- do_dashes = "1"
- do_ellipses = "1"
- elif attr == "2":
- # Do everything, turn all options on, use old school dash shorthand.
- do_quotes = "1"
- do_backticks = "1"
- do_dashes = "2"
- do_ellipses = "1"
- elif attr == "3":
- # Do everything, turn all options on, use inverted old school dash shorthand.
- do_quotes = "1"
- do_backticks = "1"
- do_dashes = "3"
- do_ellipses = "1"
- elif attr == "-1":
- # Special "stupefy" mode.
- do_stupefy = "1"
- else:
- for c in attr:
- if c == "q": do_quotes = "1"
- elif c == "b": do_backticks = "1"
- elif c == "B": do_backticks = "2"
- elif c == "d": do_dashes = "1"
- elif c == "D": do_dashes = "2"
- elif c == "i": do_dashes = "3"
- elif c == "e": do_ellipses = "1"
- elif c == "w": convert_quot = "1"
- else:
- pass
- # ignore unknown option
- tokens = _tokenize(text)
- result = []
- in_pre = False
- prev_token_last_char = ""
- # This is a cheat, used to get some context
- # for one-character tokens that consist of
- # just a quote char. What we do is remember
- # the last character of the previous text
- # token, to use as context to curl single-
- # character quote tokens correctly.
- for cur_token in tokens:
- if cur_token[0] == "tag":
- # Don't mess with quotes inside some tags. This does not handle self <closing/> tags!
- result.append(cur_token[1])
- skip_match = tags_to_skip_regex.match(cur_token[1])
- if skip_match is not None:
- if not skip_match.group(1):
- skipped_tag_stack.append(skip_match.group(2).lower())
- in_pre = True
- else:
- if len(skipped_tag_stack) > 0:
- if skip_match.group(2).lower() == skipped_tag_stack[-1]:
- skipped_tag_stack.pop()
- else:
- pass
- # This close doesn't match the open. This isn't XHTML. We should barf here.
- if len(skipped_tag_stack) == 0:
- in_pre = False
- else:
- t = cur_token[1]
- last_char = t[-1:] # Remember last char of this token before processing.
- if not in_pre:
- oldstr = t
- t = processEscapes(t)
- if convert_quot != "0":
- t = re.sub('"', '"', t)
- if do_dashes != "0":
- if do_dashes == "1":
- t = educateDashes(t)
- if do_dashes == "2":
- t = educateDashesOldSchool(t)
- if do_dashes == "3":
- t = educateDashesOldSchoolInverted(t)
- if do_ellipses != "0":
- t = educateEllipses(t)
- # Note: backticks need to be processed before quotes.
- if do_backticks != "0":
- t = educateBackticks(t)
- if do_backticks == "2":
- t = educateSingleBackticks(t)
- if do_quotes != "0":
- if t == "'":
- # Special case: single-character ' token
- if re.match("\S", prev_token_last_char):
- t = "’"
- else:
- t = "‘"
- elif t == '"':
- # Special case: single-character " token
- if re.match("\S", prev_token_last_char):
- t = "”"
- else:
- t = "“"
- else:
- # Normal case:
- t = educateQuotes(t)
- if do_stupefy == "1":
- t = stupefyEntities(t)
- prev_token_last_char = last_char
- result.append(t)
- return "".join(result)
- def educateQuotes(str):
- """
- Parameter: String.
-
- Returns: The string, with "educated" curly quote HTML entities.
-
- Example input: "Isn't this fun?"
- Example output: “Isn’t this fun?”
- """
- oldstr = str
- punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
- # Special case if the very first character is a quote
- # followed by punctuation at a non-word-break. Close the quotes by brute force:
- str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str)
- str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str)
- # Special case for double sets of quotes, e.g.:
- # <p>He said, "'Quoted' words in a larger quote."</p>
- str = re.sub(r""""'(?=\w)""", """“‘""", str)
- str = re.sub(r"""'"(?=\w)""", """‘“""", str)
- # Special case for decade abbreviations (the '80s):
- str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
- close_class = r"""[^\ \t\r\n\[\{\(\-]"""
- dec_dashes = r"""–|—"""
- # Get most opening single quotes:
- opening_single_quotes_regex = re.compile(r"""
- (
- \s | # a whitespace char, or
- | # a non-breaking space entity, or
- -- | # dashes, or
- &[mn]dash; | # named dash entities
- %s | # or decimal entities
- &\#x201[34]; # or hex
- )
- ' # the quote
- (?=\w) # followed by a word character
- """ % (dec_dashes,), re.VERBOSE)
- str = opening_single_quotes_regex.sub(r"""\1‘""", str)
- closing_single_quotes_regex = re.compile(r"""
- (%s)
- '
- (?!\s | s\b | \d)
- """ % (close_class,), re.VERBOSE)
- str = closing_single_quotes_regex.sub(r"""\1’""", str)
- closing_single_quotes_regex = re.compile(r"""
- (%s)
- '
- (\s | s\b)
- """ % (close_class,), re.VERBOSE)
- str = closing_single_quotes_regex.sub(r"""\1’\2""", str)
- # Any remaining single quotes should be opening ones:
- str = re.sub(r"""'""", r"""‘""", str)
- # Get most opening double quotes:
- opening_double_quotes_regex = re.compile(r"""
- (
- \s | # a whitespace char, or
- | # a non-breaking space entity, or
- -- | # dashes, or
- &[mn]dash; | # named dash entities
- %s | # or decimal entities
- &\#x201[34]; # or hex
- )
- " # the quote
- (?=\w) # followed by a word character
- """ % (dec_dashes,), re.VERBOSE)
- str = opening_double_quotes_regex.sub(r"""\1“""", str)
- # Double closing quotes:
- closing_double_quotes_regex = re.compile(r"""
- #(%s)? # character that indicates the quote should be closing
- "
- (?=\s)
- """ % (close_class,), re.VERBOSE)
- str = closing_double_quotes_regex.sub(r"""”""", str)
- closing_double_quotes_regex = re.compile(r"""
- (%s) # character that indicates the quote should be closing
- "
- """ % (close_class,), re.VERBOSE)
- str = closing_double_quotes_regex.sub(r"""\1”""", str)
- # Any remaining quotes should be opening ones.
- str = re.sub(r'"', r"""“""", str)
- return str
- def educateBackticks(str):
- """
- Parameter: String.
- Returns: The string, with ``backticks'' -style double quotes
- translated into HTML curly quote entities.
- Example input: ``Isn't this fun?''
- Example output: “Isn't this fun?”
- """
- str = re.sub(r"""``""", r"""“""", str)
- str = re.sub(r"""''""", r"""”""", str)
- return str
- def educateSingleBackticks(str):
- """
- Parameter: String.
- Returns: The string, with `backticks' -style single quotes
- translated into HTML curly quote entities.
-
- Example input: `Isn't this fun?'
- Example output: ‘Isn’t this fun?’
- """
- str = re.sub(r"""`""", r"""‘""", str)
- str = re.sub(r"""'""", r"""’""", str)
- return str
- def educateDashes(str):
- """
- Parameter: String.
-
- Returns: The string, with each instance of "--" translated to
- an em-dash HTML entity.
- """
- str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards)
- str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards)
- return str
- def educateDashesOldSchool(str):
- """
- Parameter: String.
-
- Returns: The string, with each instance of "--" translated to
- an en-dash HTML entity, and each "---" translated to
- an em-dash HTML entity.
- """
- str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards)
- str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards)
- return str
- def educateDashesOldSchoolInverted(str):
- """
- Parameter: String.
-
- Returns: The string, with each instance of "--" translated to
- an em-dash HTML entity, and each "---" translated to
- an en-dash HTML entity. Two reasons why: First, unlike the
- en- and em-dash syntax supported by
- EducateDashesOldSchool(), it's compatible with existing
- entries written before SmartyPants 1.1, back when "--" was
- only used for em-dashes. Second, em-dashes are more
- common than en-dashes, and so it sort of makes sense that
- the shortcut should be shorter to type. (Thanks to Aaron
- Swartz for the idea.)
- """
- str = re.sub(r"""---""", r"""–""", str) # em
- str = re.sub(r"""--""", r"""—""", str) # en
- return str
- def educateEllipses(str):
- """
- Parameter: String.
- Returns: The string, with each instance of "..." translated to
- an ellipsis HTML entity.
-
- Example input: Huh...?
- Example output: Huh…?
- """
- str = re.sub(r"""\.\.\.""", r"""…""", str)
- str = re.sub(r"""\. \. \.""", r"""…""", str)
- return str
- def stupefyEntities(str):
- """
- Parameter: String.
- Returns: The string, with each SmartyPants HTML entity translated to
- its ASCII counterpart.
- Example input: “Hello — world.”
- Example output: "Hello -- world."
- """
- str = re.sub(r"""–""", r"""-""", str) # en-dash
- str = re.sub(r"""—""", r"""--""", str) # em-dash
- str = re.sub(r"""‘""", r"""'""", str) # open single quote
- str = re.sub(r"""’""", r"""'""", str) # close single quote
- str = re.sub(r"""“""", r'''"''', str) # open double quote
- str = re.sub(r"""”""", r'''"''', str) # close double quote
- str = re.sub(r"""…""", r"""...""", str)# ellipsis
- return str
- def processEscapes(str):
- r"""
- Parameter: String.
- Returns: The string, with after processing the following backslash
- escape sequences. This is useful if you want to force a "dumb"
- quote or other character to appear.
-
- Escape Value
- ------ -----
- \\ \
- \" "
- \' '
- \. .
- \- -
- \` `
- """
- str = re.sub(r"""\\\\""", r"""\""", str)
- str = re.sub(r'''\\"''', r""""""", str)
- str = re.sub(r"""\\'""", r"""'""", str)
- str = re.sub(r"""\\\.""", r""".""", str)
- str = re.sub(r"""\\-""", r"""-""", str)
- str = re.sub(r"""\\`""", r"""`""", str)
- return str
- def _tokenize(str):
- """
- Parameter: String containing HTML markup.
- Returns: Reference to an array of the tokens comprising the input
- string. Each token is either a tag (possibly with nested,
- tags contained therein, such as <a href="<MTFoo>">, or a
- run of text between tags. Each element of the array is a
- two-element array; the first is either 'tag' or 'text';
- the second is the actual value.
-
- Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
- <http://www.bradchoate.com/past/mtregex.php>
- """
- pos = 0
- length = len(str)
- tokens = []
- depth = 6
- nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
- #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
- # (?: <\? .*? \?> ) | # directives
- # %s # nested tags """ % (nested_tags,)
- tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
- token_match = tag_soup.search(str)
- previous_end = 0
- while token_match is not None:
- if token_match.group(1):
- tokens.append(['text', token_match.group(1)])
- tokens.append(['tag', token_match.group(2)])
- previous_end = token_match.end()
- token_match = tag_soup.search(str, token_match.end())
- if previous_end < len(str):
- tokens.append(['text', str[previous_end:]])
- return tokens
- if __name__ == "__main__":
- import locale
- try:
- locale.setlocale(locale.LC_ALL, '')
- except:
- pass
- from docutils.core import publish_string
- docstring_html = publish_string(__doc__, writer_name='html')
- print docstring_html
- # Unit test output goes out stderr. No worries.
- import unittest
- sp = smartyPants
- class TestSmartypantsAllAttributes(unittest.TestCase):
- # the default attribute is "1", which means "all".
- def test_dates(self):
- self.assertEqual(sp("1440-80's"), "1440-80’s")
- self.assertEqual(sp("1440-'80s"), "1440-‘80s")
- self.assertEqual(sp("1440---'80s"), "1440–‘80s")
- self.assertEqual(sp("1960s"), "1960s") # no effect.
- self.assertEqual(sp("1960's"), "1960’s")
- self.assertEqual(sp("one two '60s"), "one two ‘60s")
- self.assertEqual(sp("'60s"), "‘60s")
- def test_skip_tags(self):
- self.assertEqual(
- sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""),
- """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""")
- self.assertEqual(
- sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""),
- """<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""")
- def test_ordinal_numbers(self):
- self.assertEqual(sp("21st century"), "21st century") # no effect.
- self.assertEqual(sp("3rd"), "3rd") # no effect.
- def test_educated_quotes(self):
- self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''')
- unittest.main()
- __author__ = "Chad Miller <smartypantspy@chad.org>"
- __version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400"
- __url__ = "http://wiki.chad.org/SmartyPantsPy"
- __description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"