PageRenderTime 26ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/admin/linkdex/TextStatistics.php

https://gitlab.com/blueprintmrk/bladencountyrecords
PHP | 369 lines | 200 code | 33 blank | 136 comment | 11 complexity | 539e14e24292a33f439c160adf548d2e MD5 | raw file
  1. <?php
  2. /*
  3. TextStatistics Class
  4. http://code.google.com/p/php-text-statistics/
  5. Released under New BSD license
  6. http://www.opensource.org/licenses/bsd-license.php
  7. Calculates following readability scores (formulae can be found in wiki):
  8. * Flesch Kincaid Reading Ease
  9. * Flesch Kincaid Grade Level
  10. * Gunning Fog Score
  11. * Coleman Liau Index
  12. * SMOG Index
  13. * Automated Reability Index
  14. Will also give:
  15. * String length
  16. * Letter count
  17. * Syllable count
  18. * Sentence count
  19. * Average words per sentence
  20. * Average syllables per word
  21. Sample Code
  22. ----------------
  23. $statistics = new TextStatistics;
  24. $text = 'The quick brown fox jumped over the lazy dog.';
  25. echo 'Flesch-Kincaid Reading Ease: ' . $statistics->flesch_kincaid_reading_ease($text);
  26. Modifications by Yoast
  27. -----
  28. Removed all multibyte code references for speed and compatibility
  29. */
  30. class TextStatistics {
  31. protected $strEncoding = ''; // Used to hold character encoding to be used by object, if set
  32. /**
  33. * Constructor.
  34. *
  35. * @param string $strEncoding Optional character encoding.
  36. * @return void
  37. */
  38. public function __construct($strEncoding = '') {
  39. if ($strEncoding <> '') {
  40. // Encoding is given. Use it!
  41. $this->strEncoding = $strEncoding;
  42. }
  43. }
  44. /**
  45. * Gives the Flesch-Kincaid Reading Ease of text entered rounded to one digit
  46. * @param strText Text to be checked
  47. */
  48. function flesch_kincaid_reading_ease($strText) {
  49. $strText = $this->clean_text($strText);
  50. return round((206.835 - (1.015 * $this->average_words_per_sentence($strText)) - (84.6 * $this->average_syllables_per_word($strText))), 1);
  51. }
  52. /**
  53. * Gives the Flesch-Kincaid Grade level of text entered rounded to one digit
  54. * @param strText Text to be checked
  55. */
  56. function flesch_kincaid_grade_level($strText) {
  57. $strText = $this->clean_text($strText);
  58. return round(((0.39 * $this->average_words_per_sentence($strText)) + (11.8 * $this->average_syllables_per_word($strText)) - 15.59), 1);
  59. }
  60. /**
  61. * Gives the Gunning-Fog score of text entered rounded to one digit
  62. * @param strText Text to be checked
  63. */
  64. public function gunning_fog_score($strText) {
  65. $strText = $this->clean_text($strText);
  66. return round((($this->average_words_per_sentence($strText) + $this->percentage_words_with_three_syllables($strText, false)) * 0.4), 1);
  67. }
  68. /**
  69. * Gives the Coleman-Liau Index of text entered rounded to one digit
  70. * @param strText Text to be checked
  71. */
  72. public function coleman_liau_index($strText) {
  73. $strText = $this->clean_text($strText);
  74. return round( ( (5.89 * ($this->letter_count($strText) / $this->word_count($strText))) - (0.3 * ($this->sentence_count($strText) / $this->word_count($strText))) - 15.8 ), 1);
  75. }
  76. /**
  77. * Gives the SMOG Index of text entered rounded to one digit
  78. * @param strText Text to be checked
  79. */
  80. public function smog_index($strText) {
  81. $strText = $this->clean_text($strText);
  82. return round(1.043 * sqrt(($this->words_with_three_syllables($strText) * (30 / $this->sentence_count($strText))) + 3.1291), 1);
  83. }
  84. /**
  85. * Gives the Automated Readability Index of text entered rounded to one digit
  86. * @param strText Text to be checked
  87. */
  88. public function automated_readability_index($strText) {
  89. $strText = $this->clean_text($strText);
  90. return round(((4.71 * ($this->letter_count($strText) / $this->word_count($strText))) + (0.5 * ($this->word_count($strText) / $this->sentence_count($strText))) - 21.43), 1);
  91. }
  92. /**
  93. * Gives string length.
  94. * @param strText Text to be measured
  95. */
  96. public function text_length($strText) {
  97. return strlen( utf8_decode( $strText ) );
  98. }
  99. /**
  100. * Gives letter count (ignores all non-letters).
  101. * @param strText Text to be measured
  102. */
  103. public function letter_count($strText) {
  104. $strText = $this->clean_text($strText); // To clear out newlines etc
  105. $intTextLength = 0;
  106. $strText = preg_replace('/[^A-Za-z]+/', '', $strText);
  107. $intTextLength = strlen($strText);
  108. return $intTextLength;
  109. }
  110. /**
  111. * Trims, removes line breaks, multiple spaces and generally cleans text before processing.
  112. * @param strText Text to be transformed
  113. */
  114. protected function clean_text($strText) {
  115. // all these tags should be preceeded by a full stop.
  116. $fullStopTags = array('li', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'dd');
  117. foreach ($fullStopTags as $tag) {
  118. $strText = str_ireplace('</'.$tag.'>', '.', $strText);
  119. }
  120. $strText = strip_tags($strText);
  121. $strText = preg_replace('/[,:;()-]/', ' ', $strText); // Replace commans, hyphens etc (count them as spaces)
  122. $strText = preg_replace('/[\.!?]/', '.', $strText); // Unify terminators
  123. $strText = trim($strText) . '.'; // Add final terminator, just in case it's missing.
  124. $strText = preg_replace('/[ ]*(\n|\r\n|\r)[ ]*/', ' ', $strText); // Replace new lines with spaces
  125. $strText = preg_replace('/([\.])[\. ]+/', '$1', $strText); // Check for duplicated terminators
  126. $strText = trim(preg_replace('/[ ]*([\.])/', '$1 ', $strText)); // Pad sentence terminators
  127. $strText = preg_replace('/[ ]+/', ' ', $strText); // Remove multiple spaces
  128. $strText = preg_replace_callback('/\. [^ ]+/', create_function('$matches', 'return strtolower($matches[0]);'), $strText); // Lower case all words following terminators (for gunning fog score)
  129. return $strText;
  130. }
  131. /**
  132. * Converts string to lower case. Tries mb_strtolower and if that fails uses regular strtolower.
  133. * @param strText Text to be transformed
  134. */
  135. protected function lower_case($strText) {
  136. return strtolower($strText);
  137. }
  138. /**
  139. * Converts string to upper case. Tries mb_strtoupper and if that fails uses regular strtoupper.
  140. * @param strText Text to be transformed
  141. */
  142. protected function upper_case($strText) {
  143. return strtoupper($strText);
  144. }
  145. /**
  146. * Gets portion of string. Tries mb_substr and if that fails uses regular substr.
  147. * @param strText Text to be cut up
  148. * @param intStart Start character
  149. * @param intLenght Length
  150. */
  151. protected function substring($strText, $intStart, $intLength) {
  152. return substr($strText, $intStart, $intLength);
  153. }
  154. /**
  155. * Returns sentence count for text.
  156. * @param strText Text to be measured
  157. */
  158. public function sentence_count($strText) {
  159. $strText = $this->clean_text($strText);
  160. // Will be tripped up by "Mr." or "U.K.". Not a major concern at this point.
  161. $intSentences = max(1, $this->text_length(preg_replace('/[^\.!?]/', '', $strText)));
  162. return $intSentences;
  163. }
  164. /**
  165. * Returns word count for text.
  166. * @param strText Text to be measured
  167. */
  168. public function word_count($strText) {
  169. $strText = $this->clean_text($strText);
  170. // Will be tripped by by em dashes with spaces either side, among other similar characters
  171. $intWords = 1 + $this->text_length(preg_replace('/[^ ]/', '', $strText)); // Space count + 1 is word count
  172. return $intWords;
  173. }
  174. /**
  175. * Returns average words per sentence for text.
  176. * @param strText Text to be measured
  177. */
  178. public function average_words_per_sentence($strText) {
  179. $strText = $this->clean_text($strText);
  180. $intSentenceCount = $this->sentence_count($strText);
  181. $intWordCount = $this->word_count($strText);
  182. return ($intWordCount / $intSentenceCount);
  183. }
  184. /**
  185. * Returns average syllables per word for text.
  186. * @param strText Text to be measured
  187. */
  188. public function average_syllables_per_word($strText) {
  189. $strText = $this->clean_text($strText);
  190. $intSyllableCount = 0;
  191. $intWordCount = $this->word_count($strText);
  192. $arrWords = explode(' ', $strText);
  193. for ($i = 0; $i < $intWordCount; $i++) {
  194. $intSyllableCount += $this->syllable_count($arrWords[$i]);
  195. }
  196. return ($intSyllableCount / $intWordCount);
  197. }
  198. /**
  199. * Returns the number of words with more than three syllables
  200. * @param strText Text to be measured
  201. * @param blnCountProperNouns Boolean - should proper nouns be included in words count
  202. */
  203. public function words_with_three_syllables($strText, $blnCountProperNouns = true) {
  204. $strText = $this->clean_text($strText);
  205. $intLongWordCount = 0;
  206. $intWordCount = $this->word_count($strText);
  207. $arrWords = explode(' ', $strText);
  208. for ($i = 0; $i < $intWordCount; $i++) {
  209. if ($this->syllable_count($arrWords[$i]) > 2) {
  210. if ($blnCountProperNouns) {
  211. $intLongWordCount++;
  212. } else {
  213. $strFirstLetter = $this->substring($arrWords[$i], 0, 1);
  214. if ($strFirstLetter !== $this->upper_case($strFirstLetter)) {
  215. // First letter is lower case. Count it.
  216. $intLongWordCount++;
  217. }
  218. }
  219. }
  220. }
  221. return ($intLongWordCount);
  222. }
  223. /**
  224. * Returns the percentage of words with more than three syllables
  225. * @param strText Text to be measured
  226. * @param blnCountProperNouns Boolean - should proper nouns be included in words count
  227. */
  228. public function percentage_words_with_three_syllables($strText, $blnCountProperNouns = true) {
  229. $strText = $this->clean_text($strText);
  230. $intWordCount = $this->word_count($strText);
  231. $intLongWordCount = $this->words_with_three_syllables($strText, $blnCountProperNouns);
  232. $intPercentage = (($intLongWordCount / $intWordCount) * 100);
  233. return ($intPercentage);
  234. }
  235. /**
  236. * Returns the number of syllables in the word.
  237. * Based in part on Greg Fast's Perl module Lingua::EN::Syllables
  238. * @param strWord Word to be measured
  239. */
  240. public function syllable_count($strWord) {
  241. $intSyllableCount = 0;
  242. $strWord = $this->lower_case($strWord);
  243. // Specific common exceptions that don't follow the rule set below are handled individually
  244. // Array of problem words (with word as key, syllable count as value)
  245. $arrProblemWords = Array(
  246. 'simile' => 3
  247. ,'forever' => 3
  248. ,'shoreline' => 2
  249. );
  250. if (isset($arrProblemWords[$strWord])) {
  251. $intSyllableCount = $arrProblemWords[$strWord];
  252. }
  253. if ($intSyllableCount > 0) {
  254. return $intSyllableCount;
  255. }
  256. // These syllables would be counted as two but should be one
  257. $arrSubSyllables = Array(
  258. 'cial'
  259. ,'tia'
  260. ,'cius'
  261. ,'cious'
  262. ,'giu'
  263. ,'ion'
  264. ,'iou'
  265. ,'sia$'
  266. ,'[^aeiuoyt]{2,}ed$'
  267. ,'.ely$'
  268. ,'[cg]h?e[rsd]?$'
  269. ,'rved?$'
  270. ,'[aeiouy][dt]es?$'
  271. ,'[aeiouy][^aeiouydt]e[rsd]?$'
  272. ,'^[dr]e[aeiou][^aeiou]+$' // Sorts out deal, deign etc
  273. ,'[aeiouy]rse$' // Purse, hearse
  274. );
  275. // These syllables would be counted as one but should be two
  276. $arrAddSyllables = Array(
  277. 'ia'
  278. ,'riet'
  279. ,'dien'
  280. ,'iu'
  281. ,'io'
  282. ,'ii'
  283. ,'[aeiouym]bl$'
  284. ,'[aeiou]{3}'
  285. ,'^mc'
  286. ,'ism$'
  287. ,'([^aeiouy])\1l$'
  288. ,'[^l]lien'
  289. ,'^coa[dglx].'
  290. ,'[^gq]ua[^auieo]'
  291. ,'dnt$'
  292. ,'uity$'
  293. ,'ie(r|st)$'
  294. );
  295. // Single syllable prefixes and suffixes
  296. $arrPrefixSuffix = Array(
  297. '/^un/'
  298. ,'/^fore/'
  299. ,'/ly$/'
  300. ,'/less$/'
  301. ,'/ful$/'
  302. ,'/ers?$/'
  303. ,'/ings?$/'
  304. );
  305. // Remove prefixes and suffixes and count how many were taken
  306. $strWord = preg_replace($arrPrefixSuffix, '', $strWord, -1, $intPrefixSuffixCount);
  307. // Removed non-word characters from word
  308. $strWord = preg_replace('/[^a-z]/is', '', $strWord);
  309. $arrWordParts = preg_split('/[^aeiouy]+/', $strWord);
  310. $intWordPartCount = 0;
  311. foreach ($arrWordParts as $strWordPart) {
  312. if ($strWordPart <> '') {
  313. $intWordPartCount++;
  314. }
  315. }
  316. // Some syllables do not follow normal rules - check for them
  317. // Thanks to Joe Kovar for correcting a bug in the following lines
  318. $intSyllableCount = $intWordPartCount + $intPrefixSuffixCount;
  319. foreach ($arrSubSyllables as $strSyllable) {
  320. $intSyllableCount -= preg_match('~' . $strSyllable . '~', $strWord);
  321. }
  322. foreach ($arrAddSyllables as $strSyllable) {
  323. $intSyllableCount += preg_match('~' . $strSyllable . '~', $strWord);
  324. }
  325. $intSyllableCount = ($intSyllableCount == 0) ? 1 : $intSyllableCount;
  326. return $intSyllableCount;
  327. }
  328. }
  329. ?>