PageRenderTime 26ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/html/blog/wp-content/plugins/wordpress-seo/admin/TextStatistics.php

https://github.com/jimmytidey/jimmytidey.co.uk
PHP | 414 lines | 237 code | 49 blank | 128 comment | 33 complexity | 9c9abfd71650533c7a08bcfdc7af15f2 MD5 | raw file
  1. <?php
  2. /**
  3. * @package Admin
  4. */
  5. if ( ! defined( 'WPSEO_VERSION' ) ) {
  6. header( 'Status: 403 Forbidden' );
  7. header( 'HTTP/1.1 403 Forbidden' );
  8. exit();
  9. }
  10. if ( ! class_exists( 'Yoast_TextStatistics' ) ) {
  11. /**
  12. * Modified (Reduced) TextStatistics Class
  13. *
  14. * Mostly removed functionality that isn't needed within the WordPress SEO plugin.
  15. *
  16. * @link http://code.google.com/p/php-text-statistics/
  17. * @link https://github.com/DaveChild/Text-Statistics (new repo location)
  18. * @license http://www.opensource.org/licenses/bsd-license.php New BSD license
  19. *
  20. * @todo [JRF => whomever] Research if a class/library can be found which will offer
  21. * this functionality to a broader scope of languages/charsets.
  22. * Now basically limited to English.
  23. */
  24. class Yoast_TextStatistics {
  25. /**
  26. * @var string $strEncoding Used to hold character encoding to be used by object, if set
  27. */
  28. protected $strEncoding = '';
  29. /**
  30. * @var string $blnMbstring Efficiency: Is the MB String extension loaded ?
  31. */
  32. protected $blnMbstring = true;
  33. /**
  34. * @var bool $normalize Should the result be normalized ?
  35. */
  36. public $normalize = true;
  37. /**
  38. * Constructor.
  39. *
  40. * @param string $strEncoding Optional character encoding.
  41. */
  42. public function __construct( $strEncoding = '' ) {
  43. if ( $strEncoding <> '' ) {
  44. // Encoding is given. Use it!
  45. $this->strEncoding = $strEncoding;
  46. }
  47. $this->blnMbstring = extension_loaded( 'mbstring' );
  48. }
  49. /**
  50. * Gives the Flesch-Kincaid Reading Ease of text entered rounded to one digit
  51. *
  52. * @param string $strText Text to be checked
  53. * @return int|float
  54. */
  55. public function flesch_kincaid_reading_ease( $strText ) {
  56. $strText = $this->clean_text( $strText );
  57. $score = wpseo_calc( wpseo_calc( 206.835, '-', wpseo_calc( 1.015, '*', $this->average_words_per_sentence( $strText ) ) ), '-', wpseo_calc( 84.6, '*', $this->average_syllables_per_word( $strText ) ) );
  58. return $this->normalize_score( $score, 0, 100 );
  59. }
  60. /**
  61. * Gives string length.
  62. *
  63. * @param string $strText Text to be measured
  64. *
  65. * @return int
  66. */
  67. public function text_length( $strText ) {
  68. if ( ! $this->blnMbstring ) {
  69. return strlen( $strText );
  70. }
  71. try {
  72. if ( $this->strEncoding == '' ) {
  73. $intTextLength = mb_strlen( $strText );
  74. } else {
  75. $intTextLength = mb_strlen( $strText, $this->strEncoding );
  76. }
  77. } catch ( Exception $e ) {
  78. $intTextLength = strlen( $strText );
  79. }
  80. return $intTextLength;
  81. }
  82. /**
  83. * Gives letter count (ignores all non-letters). Tries mb_strlen and if that fails uses regular strlen.
  84. *
  85. * @param string $strText Text to be measured
  86. *
  87. * @return int
  88. */
  89. public function letter_count( $strText ) {
  90. $strText = $this->clean_text( $strText ); // To clear out newlines etc
  91. $strText = preg_replace( '`[^A-Za-z]+`', '', $strText );
  92. if ( ! $this->blnMbstring ) {
  93. return strlen( $strText );
  94. }
  95. try {
  96. if ( $this->strEncoding == '' ) {
  97. $intTextLength = mb_strlen( $strText );
  98. } else {
  99. $intTextLength = mb_strlen( $strText, $this->strEncoding );
  100. }
  101. } catch ( Exception $e ) {
  102. $intTextLength = strlen( $strText );
  103. }
  104. return $intTextLength;
  105. }
  106. /**
  107. * Trims, removes line breaks, multiple spaces and generally cleans text before processing.
  108. *
  109. * @param string $strText Text to be transformed
  110. * @return string
  111. */
  112. protected function clean_text( $strText ) {
  113. static $clean = array();
  114. $key = sha1( $strText );
  115. if ( isset( $clean[$key] ) ) {
  116. return $clean[$key];
  117. }
  118. // all these tags should be preceeded by a full stop.
  119. $fullStopTags = array( 'li', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd' );
  120. foreach ( $fullStopTags as $tag ) {
  121. $strText = str_ireplace( '</' . $tag . '>', '.', $strText );
  122. }
  123. $strText = strip_tags( $strText );
  124. $strText = preg_replace( '`[",:;\(\)-]`', ' ', $strText ); // Replace commas, hyphens etc (count them as spaces)
  125. $strText = preg_replace( '`[\.!?]`', '.', $strText ); // Unify terminators
  126. $strText = trim( $strText ) . '.'; // Add final terminator, just in case it's missing.
  127. $strText = preg_replace( '`[ ]*(\n|\r\n|\r)[ ]*`', ' ', $strText ); // Replace new lines with spaces
  128. $strText = preg_replace( '`([\.])[\. ]+`', '$1', $strText ); // Check for duplicated terminators
  129. $strText = trim( preg_replace( '`[ ]*([\.])`', '$1 ', $strText ) ); // Pad sentence terminators
  130. $strText = preg_replace( '` [0-9]+ `', ' ', ' ' . $strText . ' ' ); // Remove "words" comprised only of numbers
  131. $strText = preg_replace( '`[ ]+`', ' ', $strText ); // Remove multiple spaces
  132. $strText = preg_replace_callback( '`\. [^ ]+?`', create_function( '$matches', 'return strtolower( $matches[0] );' ), $strText ); // Lower case all words following terminators (for gunning fog score)
  133. $strText = trim( $strText );
  134. // Cache it and return
  135. $clean[$key] = $strText;
  136. return $strText;
  137. }
  138. /**
  139. * Converts string to lower case. Tries mb_strtolower and if that fails uses regular strtolower.
  140. *
  141. * @param string $strText Text to be transformed
  142. * @return string
  143. */
  144. protected function lower_case( $strText ) {
  145. if ( ! $this->blnMbstring ) {
  146. return strtolower( $strText );
  147. }
  148. try {
  149. if ( $this->strEncoding == '' ) {
  150. $strLowerCaseText = mb_strtolower( $strText );
  151. } else {
  152. $strLowerCaseText = mb_strtolower( $strText, $this->strEncoding );
  153. }
  154. } catch ( Exception $e ) {
  155. $strLowerCaseText = strtolower( $strText );
  156. }
  157. return $strLowerCaseText;
  158. }
  159. /**
  160. * Converts string to upper case. Tries mb_strtoupper and if that fails uses regular strtoupper.
  161. *
  162. * @param string $strText Text to be transformed
  163. * @return string
  164. */
  165. protected function upper_case( $strText ) {
  166. if ( ! $this->blnMbstring ) {
  167. return strtoupper( $strText );
  168. }
  169. try {
  170. if ( $this->strEncoding == '' ) {
  171. $strUpperCaseText = mb_strtoupper( $strText );
  172. } else {
  173. $strUpperCaseText = mb_strtoupper( $strText, $this->strEncoding );
  174. }
  175. } catch ( Exception $e ) {
  176. $strUpperCaseText = strtoupper( $strText );
  177. }
  178. return $strUpperCaseText;
  179. }
  180. /**
  181. * Returns sentence count for text.
  182. *
  183. * @param string $strText Text to be measured
  184. * @return int
  185. */
  186. public function sentence_count( $strText ) {
  187. if ( strlen( trim( $strText ) ) == 0 ) {
  188. return 0;
  189. }
  190. $strText = $this->clean_text( $strText );
  191. // Will be tripped up by "Mr." or "U.K.". Not a major concern at this point.
  192. // [JRF] Will also be tripped up by ... or ?!
  193. // @todo [JRF => whomever] May be replace with something along the lines of this - will at least provide better count in ... and ?! situations:
  194. // $intSentences = max( 1, preg_match_all( '`[^\.!?]+[\.!?]+([\s]+|$)`u', $strText, $matches ) ); [/JRF]
  195. $intSentences = max( 1, $this->text_length( preg_replace( '`[^\.!?]`', '', $strText ) ) );
  196. return $intSentences;
  197. }
  198. /**
  199. * Returns word count for text.
  200. *
  201. * @param string $strText Text to be measured
  202. * @return int
  203. */
  204. public function word_count( $strText ) {
  205. if ( strlen( trim( $strText ) ) == 0 ) {
  206. return 0;
  207. }
  208. $strText = $this->clean_text( $strText );
  209. // Will be tripped by em dashes with spaces either side, among other similar characters
  210. $intWords = 1 + $this->text_length( preg_replace( '`[^ ]`', '', $strText ) ); // Space count + 1 is word count
  211. return $intWords;
  212. }
  213. /**
  214. * Returns average words per sentence for text.
  215. *
  216. * @param string $strText Text to be measured
  217. * @return int|float
  218. */
  219. public function average_words_per_sentence( $strText ) {
  220. $strText = $this->clean_text( $strText );
  221. $intSentenceCount = $this->sentence_count( $strText );
  222. $intWordCount = $this->word_count( $strText );
  223. return ( wpseo_calc( $intWordCount, '/', $intSentenceCount ) );
  224. }
  225. /**
  226. * Returns average syllables per word for text.
  227. *
  228. * @param string $strText Text to be measured
  229. * @return int|float
  230. */
  231. public function average_syllables_per_word( $strText ) {
  232. $strText = $this->clean_text( $strText );
  233. $intSyllableCount = 0;
  234. $intWordCount = $this->word_count( $strText );
  235. $arrWords = explode( ' ', $strText );
  236. for ( $i = 0; $i < $intWordCount; $i++ ) {
  237. $intSyllableCount += $this->syllable_count( $arrWords[$i] );
  238. }
  239. return ( wpseo_calc( $intSyllableCount, '/', $intWordCount ) );
  240. }
  241. /**
  242. * Returns the number of syllables in the word.
  243. * Based in part on Greg Fast's Perl module Lingua::EN::Syllables
  244. *
  245. * @param string $strWord Word to be measured
  246. * @return int
  247. */
  248. public function syllable_count( $strWord ) {
  249. if ( strlen( trim( $strWord ) ) == 0 ) {
  250. return 0;
  251. }
  252. // Should be no non-alpha characters
  253. $strWord = preg_replace( '`[^A-Za-z]`', '', $strWord );
  254. $intSyllableCount = 0;
  255. $strWord = $this->lower_case( $strWord );
  256. // Specific common exceptions that don't follow the rule set below are handled individually
  257. // Array of problem words (with word as key, syllable count as value)
  258. $arrProblemWords = array(
  259. 'simile' => 3,
  260. 'forever' => 3,
  261. 'shoreline' => 2,
  262. );
  263. if ( isset( $arrProblemWords[$strWord] ) ) {
  264. $intSyllableCount = $arrProblemWords[$strWord];
  265. }
  266. if ( $intSyllableCount > 0 ) {
  267. return $intSyllableCount;
  268. }
  269. // These syllables would be counted as two but should be one
  270. $arrSubSyllables = array(
  271. 'cial',
  272. 'tia',
  273. 'cius',
  274. 'cious',
  275. 'giu',
  276. 'ion',
  277. 'iou',
  278. 'sia$',
  279. '[^aeiuoyt]{2,}ed$',
  280. '.ely$',
  281. '[cg]h?e[rsd]?$',
  282. 'rved?$',
  283. '[aeiouy][dt]es?$',
  284. '[aeiouy][^aeiouydt]e[rsd]?$',
  285. '^[dr]e[aeiou][^aeiou]+$', // Sorts out deal, deign etc
  286. '[aeiouy]rse$', // Purse, hearse
  287. );
  288. // These syllables would be counted as one but should be two
  289. $arrAddSyllables = array(
  290. 'ia',
  291. 'riet',
  292. 'dien',
  293. 'iu',
  294. 'io',
  295. 'ii',
  296. '[aeiouym]bl$',
  297. '[aeiou]{3}',
  298. '^mc',
  299. 'ism$',
  300. '([^aeiouy])\1l$',
  301. '[^l]lien',
  302. '^coa[dglx].',
  303. '[^gq]ua[^auieo]',
  304. 'dnt$',
  305. 'uity$',
  306. 'ie(r|st)$',
  307. );
  308. // Single syllable prefixes and suffixes
  309. $arrPrefixSuffix = array(
  310. '`^un`',
  311. '`^fore`',
  312. '`ly$`',
  313. '`less$`',
  314. '`ful$`',
  315. '`ers?$`',
  316. '`ings?$`',
  317. );
  318. // Remove prefixes and suffixes and count how many were taken
  319. $strWord = preg_replace( $arrPrefixSuffix, '', $strWord, -1, $intPrefixSuffixCount );
  320. // Removed non-word characters from word
  321. $strWord = preg_replace( '`[^a-z]`is', '', $strWord );
  322. $arrWordParts = preg_split( '`[^aeiouy]+`', $strWord );
  323. $intWordPartCount = 0;
  324. foreach ( $arrWordParts as $strWordPart ) {
  325. if ( $strWordPart <> '' ) {
  326. $intWordPartCount++;
  327. }
  328. }
  329. // Some syllables do not follow normal rules - check for them
  330. // Thanks to Joe Kovar for correcting a bug in the following lines
  331. $intSyllableCount = $intWordPartCount + $intPrefixSuffixCount;
  332. foreach ( $arrSubSyllables as $strSyllable ) {
  333. $intSyllableCount -= preg_match( '`' . $strSyllable . '`', $strWord );
  334. }
  335. foreach ( $arrAddSyllables as $strSyllable ) {
  336. $intSyllableCount += preg_match( '`' . $strSyllable . '`', $strWord );
  337. }
  338. $intSyllableCount = ( $intSyllableCount == 0 ) ? 1 : $intSyllableCount;
  339. return $intSyllableCount;
  340. }
  341. /**
  342. * Normalizes score according to min & max allowed. If score larger
  343. * than max, max is returned. If score less than min, min is returned.
  344. * Also rounds result to specified precision.
  345. * Thanks to github.com/lvil.
  346. *
  347. * @param int|float $score Initial score
  348. * @param int $min Minimum score allowed
  349. * @param int $max Maximum score allowed
  350. * @return int|float
  351. */
  352. public function normalize_score( $score, $min, $max, $dps = 1 ) {
  353. $score = wpseo_calc( $score, '+', 0, true, $dps ); // Round
  354. if ( ! $this->normalize ) {
  355. return $score;
  356. }
  357. if ( $score > $max ) {
  358. $score = $max;
  359. } elseif ( $score < $min ) {
  360. $score = $min;
  361. }
  362. return $score;
  363. }
  364. } /* End of class */
  365. } /* End of class-exists wrapper */