PageRenderTime 26ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/public_html/wire/modules/Textformatter/TextformatterSmartypants/Michelf/SmartyPants.php

https://bitbucket.org/thomas1151/mats
PHP | 513 lines | 380 code | 45 blank | 88 comment | 72 complexity | cfdec7339434f641e275a294538e805e MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause, LGPL-2.1, MPL-2.0-no-copyleft-exception
  1. <?php
  2. #
  3. # SmartyPants - Smart typography for web sites
  4. #
  5. # PHP SmartyPants
  6. # Copyright (c) 2004-2016 Michel Fortin
  7. # <https://michelf.ca/>
  8. #
  9. # Original SmartyPants
  10. # Copyright (c) 2003-2004 John Gruber
  11. # <https://daringfireball.net/>
  12. #
  13. namespace Michelf;
  14. #
  15. # SmartyPants Parser Class
  16. #
  17. class SmartyPants {
  18. ### Version ###
  19. const SMARTYPANTSLIB_VERSION = "1.7.1";
  20. ### Presets
  21. # SmartyPants does nothing at all
  22. const ATTR_DO_NOTHING = 0;
  23. # "--" for em-dashes; no en-dash support
  24. const ATTR_EM_DASH = 1;
  25. # "---" for em-dashes; "--" for en-dashes
  26. const ATTR_LONG_EM_DASH_SHORT_EN = 2;
  27. # "--" for em-dashes; "---" for en-dashes
  28. const ATTR_SHORT_EM_DASH_LONG_EN = 3;
  29. # "--" for em-dashes; "---" for en-dashes
  30. const ATTR_STUPEFY = -1;
  31. # The default preset: ATTR_EM_DASH
  32. const ATTR_DEFAULT = SmartyPants::ATTR_EM_DASH;
  33. ### Standard Function Interface ###
  34. public static function defaultTransform($text, $attr = SmartyPants::ATTR_DEFAULT) {
  35. #
  36. # Initialize the parser and return the result of its transform method.
  37. # This will work fine for derived classes too.
  38. #
  39. # Take parser class on which this function was called.
  40. $parser_class = \get_called_class();
  41. # try to take parser from the static parser list
  42. static $parser_list;
  43. $parser =& $parser_list[$parser_class][$attr];
  44. # create the parser if not already set
  45. if (!$parser)
  46. $parser = new $parser_class($attr);
  47. # Transform text using parser.
  48. return $parser->transform($text);
  49. }
  50. ### Configuration Variables ###
  51. # Partial regex for matching tags to skip
  52. public $tags_to_skip = 'pre|code|kbd|script|style|math';
  53. # Options to specify which transformations to make:
  54. public $do_nothing = 0; # disable all transforms
  55. public $do_quotes = 0;
  56. public $do_backticks = 0; # 1 => double only, 2 => double & single
  57. public $do_dashes = 0; # 1, 2, or 3 for the three modes described above
  58. public $do_ellipses = 0;
  59. public $do_stupefy = 0;
  60. public $convert_quot = 0; # should we translate &quot; entities into normal quotes?
  61. ### Parser Implementation ###
  62. public function __construct($attr = SmartyPants::ATTR_DEFAULT) {
  63. #
  64. # Initialize a parser with certain attributes.
  65. #
  66. # Parser attributes:
  67. # 0 : do nothing
  68. # 1 : set all
  69. # 2 : set all, using old school en- and em- dash shortcuts
  70. # 3 : set all, using inverted old school en and em- dash shortcuts
  71. #
  72. # q : quotes
  73. # b : backtick quotes (``double'' only)
  74. # B : backtick quotes (``double'' and `single')
  75. # d : dashes
  76. # D : old school dashes
  77. # i : inverted old school dashes
  78. # e : ellipses
  79. # w : convert &quot; entities to " for Dreamweaver users
  80. #
  81. if ($attr == "0") {
  82. $this->do_nothing = 1;
  83. }
  84. else if ($attr == "1") {
  85. # Do everything, turn all options on.
  86. $this->do_quotes = 1;
  87. $this->do_backticks = 1;
  88. $this->do_dashes = 1;
  89. $this->do_ellipses = 1;
  90. }
  91. else if ($attr == "2") {
  92. # Do everything, turn all options on, use old school dash shorthand.
  93. $this->do_quotes = 1;
  94. $this->do_backticks = 1;
  95. $this->do_dashes = 2;
  96. $this->do_ellipses = 1;
  97. }
  98. else if ($attr == "3") {
  99. # Do everything, turn all options on, use inverted old school dash shorthand.
  100. $this->do_quotes = 1;
  101. $this->do_backticks = 1;
  102. $this->do_dashes = 3;
  103. $this->do_ellipses = 1;
  104. }
  105. else if ($attr == "-1") {
  106. # Special "stupefy" mode.
  107. $this->do_stupefy = 1;
  108. }
  109. else {
  110. $chars = preg_split('//', $attr);
  111. foreach ($chars as $c){
  112. if ($c == "q") { $this->do_quotes = 1; }
  113. else if ($c == "b") { $this->do_backticks = 1; }
  114. else if ($c == "B") { $this->do_backticks = 2; }
  115. else if ($c == "d") { $this->do_dashes = 1; }
  116. else if ($c == "D") { $this->do_dashes = 2; }
  117. else if ($c == "i") { $this->do_dashes = 3; }
  118. else if ($c == "e") { $this->do_ellipses = 1; }
  119. else if ($c == "w") { $this->convert_quot = 1; }
  120. else {
  121. # Unknown attribute option, ignore.
  122. }
  123. }
  124. }
  125. }
  126. public function transform($text) {
  127. if ($this->do_nothing) {
  128. return $text;
  129. }
  130. $tokens = $this->tokenizeHTML($text);
  131. $result = '';
  132. $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
  133. $prev_token_last_char = ""; # This is a cheat, used to get some context
  134. # for one-character tokens that consist of
  135. # just a quote char. What we do is remember
  136. # the last character of the previous text
  137. # token, to use as context to curl single-
  138. # character quote tokens correctly.
  139. foreach ($tokens as $cur_token) {
  140. if ($cur_token[0] == "tag") {
  141. # Don't mess with quotes inside tags.
  142. $result .= $cur_token[1];
  143. if (preg_match('@<(/?)(?:'.$this->tags_to_skip.')[\s>]@', $cur_token[1], $matches)) {
  144. $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
  145. }
  146. } else {
  147. $t = $cur_token[1];
  148. $last_char = substr($t, -1); # Remember last char of this token before processing.
  149. if (! $in_pre) {
  150. $t = $this->educate($t, $prev_token_last_char);
  151. }
  152. $prev_token_last_char = $last_char;
  153. $result .= $t;
  154. }
  155. }
  156. return $result;
  157. }
  158. protected function educate($t, $prev_token_last_char) {
  159. $t = $this->processEscapes($t);
  160. if ($this->convert_quot) {
  161. $t = preg_replace('/&quot;/', '"', $t);
  162. }
  163. if ($this->do_dashes) {
  164. if ($this->do_dashes == 1) $t = $this->educateDashes($t);
  165. if ($this->do_dashes == 2) $t = $this->educateDashesOldSchool($t);
  166. if ($this->do_dashes == 3) $t = $this->educateDashesOldSchoolInverted($t);
  167. }
  168. if ($this->do_ellipses) $t = $this->educateEllipses($t);
  169. # Note: backticks need to be processed before quotes.
  170. if ($this->do_backticks) {
  171. $t = $this->educateBackticks($t);
  172. if ($this->do_backticks == 2) $t = $this->educateSingleBackticks($t);
  173. }
  174. if ($this->do_quotes) {
  175. if ($t == "'") {
  176. # Special case: single-character ' token
  177. if (preg_match('/\S/', $prev_token_last_char)) {
  178. $t = "&#8217;";
  179. }
  180. else {
  181. $t = "&#8216;";
  182. }
  183. }
  184. else if ($t == '"') {
  185. # Special case: single-character " token
  186. if (preg_match('/\S/', $prev_token_last_char)) {
  187. $t = "&#8221;";
  188. }
  189. else {
  190. $t = "&#8220;";
  191. }
  192. }
  193. else {
  194. # Normal case:
  195. $t = $this->educateQuotes($t);
  196. }
  197. }
  198. if ($this->do_stupefy) $t = $this->stupefyEntities($t);
  199. return $t;
  200. }
  201. protected function educateQuotes($_) {
  202. #
  203. # Parameter: String.
  204. #
  205. # Returns: The string, with "educated" curly quote HTML entities.
  206. #
  207. # Example input: "Isn't this fun?"
  208. # Example output: &#8220;Isn&#8217;t this fun?&#8221;
  209. #
  210. # Make our own "punctuation" character class, because the POSIX-style
  211. # [:PUNCT:] is only available in Perl 5.6 or later:
  212. $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
  213. # Special case if the very first character is a quote
  214. # followed by punctuation at a non-word-break. Close the quotes by brute force:
  215. $_ = preg_replace(
  216. array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
  217. array('&#8217;', '&#8221;'), $_);
  218. # Special case for double sets of quotes, e.g.:
  219. # <p>He said, "'Quoted' words in a larger quote."</p>
  220. $_ = preg_replace(
  221. array("/\"'(?=\w)/", "/'\"(?=\w)/"),
  222. array('&#8220;&#8216;', '&#8216;&#8220;'), $_);
  223. # Special case for decade abbreviations (the '80s):
  224. $_ = preg_replace("/'(?=\\d{2}s)/", '&#8217;', $_);
  225. $close_class = '[^\ \t\r\n\[\{\(\-]';
  226. $dec_dashes = '&\#8211;|&\#8212;';
  227. # Get most opening single quotes:
  228. $_ = preg_replace("{
  229. (
  230. \\s | # a whitespace char, or
  231. &nbsp; | # a non-breaking space entity, or
  232. -- | # dashes, or
  233. &[mn]dash; | # named dash entities
  234. $dec_dashes | # or decimal entities
  235. &\\#x201[34]; # or hex
  236. )
  237. ' # the quote
  238. (?=\\w) # followed by a word character
  239. }x", '\1&#8216;', $_);
  240. # Single closing quotes:
  241. $_ = preg_replace("{
  242. ($close_class)?
  243. '
  244. (?(1)| # If $1 captured, then do nothing;
  245. (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
  246. ) # char or an 's' at a word ending position. This
  247. # is a special case to handle something like:
  248. # \"<i>Custer</i>'s Last Stand.\"
  249. }xi", '\1&#8217;', $_);
  250. # Any remaining single quotes should be opening ones:
  251. $_ = str_replace("'", '&#8216;', $_);
  252. # Get most opening double quotes:
  253. $_ = preg_replace("{
  254. (
  255. \\s | # a whitespace char, or
  256. &nbsp; | # a non-breaking space entity, or
  257. -- | # dashes, or
  258. &[mn]dash; | # named dash entities
  259. $dec_dashes | # or decimal entities
  260. &\\#x201[34]; # or hex
  261. )
  262. \" # the quote
  263. (?=\\w) # followed by a word character
  264. }x", '\1&#8220;', $_);
  265. # Double closing quotes:
  266. $_ = preg_replace("{
  267. ($close_class)?
  268. \"
  269. (?(1)|(?=\\s)) # If $1 captured, then do nothing;
  270. # if not, then make sure the next char is whitespace.
  271. }x", '\1&#8221;', $_);
  272. # Any remaining quotes should be opening ones.
  273. $_ = str_replace('"', '&#8220;', $_);
  274. return $_;
  275. }
  276. protected function educateBackticks($_) {
  277. #
  278. # Parameter: String.
  279. # Returns: The string, with ``backticks'' -style double quotes
  280. # translated into HTML curly quote entities.
  281. #
  282. # Example input: ``Isn't this fun?''
  283. # Example output: &#8220;Isn't this fun?&#8221;
  284. #
  285. $_ = str_replace(array("``", "''",),
  286. array('&#8220;', '&#8221;'), $_);
  287. return $_;
  288. }
  289. protected function educateSingleBackticks($_) {
  290. #
  291. # Parameter: String.
  292. # Returns: The string, with `backticks' -style single quotes
  293. # translated into HTML curly quote entities.
  294. #
  295. # Example input: `Isn't this fun?'
  296. # Example output: &#8216;Isn&#8217;t this fun?&#8217;
  297. #
  298. $_ = str_replace(array("`", "'",),
  299. array('&#8216;', '&#8217;'), $_);
  300. return $_;
  301. }
  302. protected function educateDashes($_) {
  303. #
  304. # Parameter: String.
  305. #
  306. # Returns: The string, with each instance of "--" translated to
  307. # an em-dash HTML entity.
  308. #
  309. $_ = str_replace('--', '&#8212;', $_);
  310. return $_;
  311. }
  312. protected function educateDashesOldSchool($_) {
  313. #
  314. # Parameter: String.
  315. #
  316. # Returns: The string, with each instance of "--" translated to
  317. # an en-dash HTML entity, and each "---" translated to
  318. # an em-dash HTML entity.
  319. #
  320. # em en
  321. $_ = str_replace(array("---", "--",),
  322. array('&#8212;', '&#8211;'), $_);
  323. return $_;
  324. }
  325. protected function educateDashesOldSchoolInverted($_) {
  326. #
  327. # Parameter: String.
  328. #
  329. # Returns: The string, with each instance of "--" translated to
  330. # an em-dash HTML entity, and each "---" translated to
  331. # an en-dash HTML entity. Two reasons why: First, unlike the
  332. # en- and em-dash syntax supported by
  333. # EducateDashesOldSchool(), it's compatible with existing
  334. # entries written before SmartyPants 1.1, back when "--" was
  335. # only used for em-dashes. Second, em-dashes are more
  336. # common than en-dashes, and so it sort of makes sense that
  337. # the shortcut should be shorter to type. (Thanks to Aaron
  338. # Swartz for the idea.)
  339. #
  340. # en em
  341. $_ = str_replace(array("---", "--",),
  342. array('&#8211;', '&#8212;'), $_);
  343. return $_;
  344. }
  345. protected function educateEllipses($_) {
  346. #
  347. # Parameter: String.
  348. # Returns: The string, with each instance of "..." translated to
  349. # an ellipsis HTML entity. Also converts the case where
  350. # there are spaces between the dots.
  351. #
  352. # Example input: Huh...?
  353. # Example output: Huh&#8230;?
  354. #
  355. $_ = str_replace(array("...", ". . .",), '&#8230;', $_);
  356. return $_;
  357. }
  358. protected function stupefyEntities($_) {
  359. #
  360. # Parameter: String.
  361. # Returns: The string, with each SmartyPants HTML entity translated to
  362. # its ASCII counterpart.
  363. #
  364. # Example input: &#8220;Hello &#8212; world.&#8221;
  365. # Example output: "Hello -- world."
  366. #
  367. # en-dash em-dash
  368. $_ = str_replace(array('&#8211;', '&#8212;'),
  369. array('-', '--'), $_);
  370. # single quote open close
  371. $_ = str_replace(array('&#8216;', '&#8217;'), "'", $_);
  372. # double quote open close
  373. $_ = str_replace(array('&#8220;', '&#8221;'), '"', $_);
  374. $_ = str_replace('&#8230;', '...', $_); # ellipsis
  375. return $_;
  376. }
  377. protected function processEscapes($_) {
  378. #
  379. # Parameter: String.
  380. # Returns: The string, with after processing the following backslash
  381. # escape sequences. This is useful if you want to force a "dumb"
  382. # quote or other character to appear.
  383. #
  384. # Escape Value
  385. # ------ -----
  386. # \\ &#92;
  387. # \" &#34;
  388. # \' &#39;
  389. # \. &#46;
  390. # \- &#45;
  391. # \` &#96;
  392. #
  393. $_ = str_replace(
  394. array('\\\\', '\"', "\'", '\.', '\-', '\`'),
  395. array('&#92;', '&#34;', '&#39;', '&#46;', '&#45;', '&#96;'), $_);
  396. return $_;
  397. }
  398. protected function tokenizeHTML($str) {
  399. #
  400. # Parameter: String containing HTML markup.
  401. # Returns: An array of the tokens comprising the input
  402. # string. Each token is either a tag (possibly with nested,
  403. # tags contained therein, such as <a href="<MTFoo>">, or a
  404. # run of text between tags. Each element of the array is a
  405. # two-element array; the first is either 'tag' or 'text';
  406. # the second is the actual value.
  407. #
  408. #
  409. # Regular expression derived from the _tokenize() subroutine in
  410. # Brad Choate's MTRegex plugin.
  411. # <http://www.bradchoate.com/past/mtregex.php>
  412. #
  413. $index = 0;
  414. $tokens = array();
  415. $match = '(?s:<!--.*?-->)|'. # comment
  416. '(?s:<\?.*?\?>)|'. # processing instruction
  417. # regular tags
  418. '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
  419. $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
  420. foreach ($parts as $part) {
  421. if (++$index % 2 && $part != '')
  422. $tokens[] = array('text', $part);
  423. else
  424. $tokens[] = array('tag', $part);
  425. }
  426. return $tokens;
  427. }
  428. }