/include/texy/modules/TexyLongWordsModule.php

https://gitlab.com/zemek1/lahodnakava.cz · PHP · 211 lines · 126 code · 52 blank · 33 comment · 27 complexity · b0168163050e1798f6dbb89bae2ed2f9 MD5 · raw file

  1. <?php
  2. /**
  3. * Texy! - web text markup-language
  4. * --------------------------------
  5. *
  6. * Copyright (c) 2004, 2009 David Grudl (http://davidgrudl.com)
  7. *
  8. * This source file is subject to the GNU GPL license that is bundled
  9. * with this package in the file license.txt.
  10. *
  11. * For more information please see http://texy.info
  12. *
  13. * @copyright Copyright (c) 2004, 2009 David Grudl
  14. * @license GNU GENERAL PUBLIC LICENSE version 2 or 3
  15. * @link http://texy.info
  16. * @package Texy
  17. * @version $Id: TexyLongWordsModule.php 226 2008-12-31 00:16:35Z David Grudl $
  18. */
  19. /**
  20. * Long words wrap module.
  21. *
  22. * @author David Grudl
  23. * @copyright Copyright (c) 2004, 2009 David Grudl
  24. * @package Texy
  25. */
  26. final class TexyLongWordsModule extends TexyModule
  27. {
  28. public $wordLimit = 20;
  29. const
  30. DONT = 0, // don't hyphenate
  31. HERE = 1, // hyphenate here
  32. AFTER = 2; // hyphenate after
  33. private $consonants = array(
  34. 'b','c','d','f','g','h','j','k','l','m','n','p','q','r','s','t','v','w','x','z',
  35. 'B','C','D','F','G','H','J','K','L','M','N','P','Q','R','S','T','V','W','X','Z',
  36. "\xc4\x8d","\xc4\x8f","\xc5\x88","\xc5\x99","\xc5\xa1","\xc5\xa5","\xc5\xbe", //czech utf-8
  37. "\xc4\x8c","\xc4\x8e","\xc5\x87","\xc5\x98","\xc5\xa0","\xc5\xa4","\xc5\xbd");
  38. private $vowels = array(
  39. 'a','e','i','o','u','y',
  40. 'A','E','I','O','U','Y',
  41. "\xc3\xa1","\xc3\xa9","\xc4\x9b","\xc3\xad","\xc3\xb3","\xc3\xba","\xc5\xaf","\xc3\xbd", //czech utf-8
  42. "\xc3\x81","\xc3\x89","\xc4\x9a","\xc3\x8d","\xc3\x93","\xc3\x9a","\xc5\xae","\xc3\x9d");
  43. private $before_r = array(
  44. 'b','B','c','C','d','D','f','F','g','G','k','K','p','P','r','R','t','T','v','V',
  45. "\xc4\x8d","\xc4\x8c","\xc4\x8f","\xc4\x8e","\xc5\x99","\xc5\x98","\xc5\xa5","\xc5\xa4"); //czech utf-8
  46. private $before_l = array(
  47. 'b','B','c','C','d','D','f','F','g','G','k','K','l','L','p','P','t','T','v','V',
  48. "\xc4\x8d","\xc4\x8c","\xc4\x8f","\xc4\x8e","\xc5\xa5","\xc5\xa4"); //czech utf-8
  49. private $before_h = array('c','C','s','S');
  50. private $doubleVowels = array('a','A','o','O');
  51. public function __construct($texy)
  52. {
  53. $this->texy = $texy;
  54. $this->consonants = array_flip($this->consonants);
  55. $this->vowels = array_flip($this->vowels);
  56. $this->before_r = array_flip($this->before_r);
  57. $this->before_l = array_flip($this->before_l);
  58. $this->before_h = array_flip($this->before_h);
  59. $this->doubleVowels = array_flip($this->doubleVowels);
  60. $texy->registerPostLine(array($this, 'postLine'), 'longwords');
  61. }
  62. public function postLine($text)
  63. {
  64. return preg_replace_callback(
  65. '#[^\ \n\t\x14\x15\x16\x{2013}\x{2014}\x{ad}-]{'.$this->wordLimit.',}#u',
  66. array($this, 'pattern'),
  67. $text
  68. );
  69. }
  70. /**
  71. * Callback for long words.
  72. * (c) David Grudl
  73. * @param array
  74. * @return string
  75. */
  76. private function pattern($matches)
  77. {
  78. list($mWord) = $matches;
  79. // [0] => lllloooonnnnggggwwwoorrdddd
  80. $chars = array();
  81. preg_match_all(
  82. '#['.TEXY_MARK.']+|.#u',
  83. $mWord,
  84. $chars
  85. );
  86. $chars = $chars[0];
  87. if (count($chars) < $this->wordLimit) return $mWord;
  88. $consonants = $this->consonants;
  89. $vowels = $this->vowels;
  90. $before_r = $this->before_r;
  91. $before_l = $this->before_l;
  92. $before_h = $this->before_h;
  93. $doubleVowels = $this->doubleVowels;
  94. $s = array();
  95. $trans = array();
  96. $s[] = '';
  97. $trans[] = -1;
  98. foreach ($chars as $key => $char) {
  99. if (ord($char{0}) < 32) continue;
  100. $s[] = $char;
  101. $trans[] = $key;
  102. }
  103. $s[] = '';
  104. $len = count($s) - 2;
  105. $positions = array();
  106. $a = 0; $last = 1;
  107. while (++$a < $len) {
  108. $hyphen = self::DONT; // Do not hyphenate
  109. do {
  110. if ($s[$a] === "\xC2\xA0") { $a++; continue 2; } // here and after never
  111. if ($s[$a] === '.') { $hyphen = self::HERE; break; }
  112. if (isset($consonants[$s[$a]])) { // souhlásky
  113. if (isset($vowels[$s[$a+1]])) {
  114. if (isset($vowels[$s[$a-1]])) $hyphen = self::HERE;
  115. break;
  116. }
  117. if (($s[$a] === 's') && ($s[$a-1] === 'n') && isset($consonants[$s[$a+1]])) { $hyphen = self::AFTER; break; }
  118. if (isset($consonants[$s[$a+1]]) && isset($vowels[$s[$a-1]])) {
  119. if ($s[$a+1] === 'r') {
  120. $hyphen = isset($before_r[$s[$a]]) ? self::HERE : self::AFTER;
  121. break;
  122. }
  123. if ($s[$a+1] === 'l') {
  124. $hyphen = isset($before_l[$s[$a]]) ? self::HERE : self::AFTER;
  125. break;
  126. }
  127. if ($s[$a+1] === 'h') { // CH
  128. $hyphen = isset($before_h[$s[$a]]) ? self::DONT : self::AFTER;
  129. break;
  130. }
  131. $hyphen = self::AFTER;
  132. break;
  133. }
  134. break;
  135. } // konec souhlasky
  136. if (($s[$a] === 'u') && isset($doubleVowels[$s[$a-1]])) { $hyphen = self::AFTER; break; }
  137. if (isset($vowels[$s[$a]]) && isset($vowels[$s[$a-1]])) { $hyphen = self::HERE; break; }
  138. } while(0);
  139. if ($hyphen === self::DONT && ($a - $last > $this->wordLimit*0.6)) $positions[] = $last = $a-1; // Hyphenate here
  140. if ($hyphen === self::HERE) $positions[] = $last = $a-1; // Hyphenate here
  141. if ($hyphen === self::AFTER) { $positions[] = $last = $a; $a++; } // Hyphenate after
  142. } // while
  143. $a = end($positions);
  144. if (($a === $len-1) && isset($consonants[$s[$len]]))
  145. array_pop($positions);
  146. $syllables = array();
  147. $last = 0;
  148. foreach ($positions as $pos) {
  149. if ($pos - $last > $this->wordLimit*0.6) {
  150. $syllables[] = implode('', array_splice($chars, 0, $trans[$pos] - $trans[$last]));
  151. $last = $pos;
  152. }
  153. }
  154. $syllables[] = implode('', $chars);
  155. //$s = implode("\xC2\xAD", $syllables); // insert shy
  156. //$s = str_replace(array("\xC2\xAD\xC2\xA0", "\xC2\xA0\xC2\xAD"), array(' ', ' '), $s); // shy+nbsp = normal space
  157. return implode("\xC2\xAD", $syllables);;
  158. }
  159. }