PageRenderTime 37ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/phpmyfaq/inc/Stopwords.php

http://github.com/thorsten/phpMyFAQ
PHP | 297 lines | 135 code | 40 blank | 122 comment | 16 complexity | bb1b256d220741f557dd7b2939257eb0 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, LGPL-3.0
  1. <?php
  2. /**
  3. * The main Stopwords class
  4. *
  5. * PHP Version 5.3
  6. *
  7. * This Source Code Form is subject to the terms of the Mozilla Public License,
  8. * v. 2.0. If a copy of the MPL was not distributed with this file, You can
  9. * obtain one at http://mozilla.org/MPL/2.0/.
  10. *
  11. * @category phpMyFAQ
  12. * @package PMF_Stopwords
  13. * @author Anatoliy Belsky
  14. * @author Matteo Scaramuccia <matteo@phpmyfaq.de>
  15. * @copyright 2009-2012 phpMyFAQ Team
  16. * @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0
  17. * @link http://www.phpmyfaq.de
  18. * @since 2009-04-01
  19. */
  20. if (!defined('IS_VALID_PHPMYFAQ')) {
  21. exit();
  22. }
  23. /**
  24. * PMF_Stopwords
  25. *
  26. * @category phpMyFAQ
  27. * @package PMF_Stopwords
  28. * @author Anatoliy Belsky
  29. * @author Matteo Scaramuccia <matteo@phpmyfaq.de>
  30. * @copyright 2009-2012 phpMyFAQ Team
  31. * @license http://www.mozilla.org/MPL/2.0/ Mozilla Public License Version 2.0
  32. * @link http://www.phpmyfaq.de
  33. * @since 2009-04-01
  34. */
  35. class PMF_Stopwords
  36. {
  37. /**
  38. * @var PMF_Configuration
  39. */
  40. private $_config;
  41. /**
  42. * @var PMF_Language
  43. */
  44. private $_language;
  45. /**
  46. * Table name
  47. *
  48. * @var string
  49. */
  50. private $table_name;
  51. /**
  52. * Constructor
  53. *
  54. * @param PMF_Configuration $config
  55. *
  56. * @return PMF_Stopwords
  57. */
  58. public function __construct(PMF_Configuration $config)
  59. {
  60. $this->_config = $config;
  61. $this->table_name = SQLPREFIX . "faqstopwords";
  62. }
  63. /**
  64. * @return PMF_Language
  65. */
  66. public function getLanguage()
  67. {
  68. return $this->_language;
  69. }
  70. /**
  71. * @return string
  72. */
  73. public function getTableName()
  74. {
  75. return $this->table_name;
  76. }
  77. /**
  78. * @param PMF_Language $language
  79. */
  80. public function setLanguage($language)
  81. {
  82. $this->_language = $language;
  83. }
  84. /**
  85. * @param string $table_name
  86. */
  87. public function setTableName($table_name)
  88. {
  89. $this->table_name = $table_name;
  90. }
  91. /**
  92. * Add a word to the stop words dictionary.
  93. * If the given word already exists, false is returned.
  94. *
  95. * @param string $word
  96. *
  97. * @return boolean
  98. */
  99. public function add($word)
  100. {
  101. if (!$this->match($word)) {
  102. $sql = "INSERT INTO $this->table_name VALUES(%d, '%s', '%s')";
  103. $sql = sprintf(
  104. $sql,
  105. $this->_config->getDb()->nextId($this->table_name, 'id'),
  106. $this->_language->getLanguage(),
  107. $word
  108. );
  109. $this->_config->getDb()->query($sql);
  110. return true;
  111. }
  112. return false;
  113. }
  114. /**
  115. * Update a word in the stop words dictionary
  116. *
  117. * @param int $id
  118. * @param strng $word
  119. *
  120. * @return void
  121. */
  122. public function update($id, $word)
  123. {
  124. $sql = "UPDATE $this->table_name SET stopword = '%s' WHERE id = %d AND lang = '%s'";
  125. $sql = sprintf(
  126. $sql,
  127. $word,
  128. $id,
  129. $this->_language->getLanguage()
  130. );
  131. $this->_config->getDb()->query($sql);
  132. }
  133. /**
  134. * Remove a word from the stop word dictionary
  135. *
  136. * @param integer $id
  137. *
  138. * @return void
  139. */
  140. public function remove($id)
  141. {
  142. $sql = sprintf(
  143. "DELETE FROM $this->table_name WHERE id = %d AND lang = '%s'",
  144. $id,
  145. $this->_language->getLanguage()
  146. );
  147. $this->_config->getDb()->query($sql);
  148. }
  149. /**
  150. * Match a word against the stop words dictionary
  151. *
  152. * @param string $word
  153. *
  154. * @return boolean
  155. */
  156. public function match($word)
  157. {
  158. $sql = "SELECT id FROM $this->table_name WHERE LOWER(stopword) = LOWER('%s') AND lang = '%s'";
  159. $sql = sprintf($sql, $word, $this->_language->getLanguage());
  160. $result = $this->_config->getDb()->query($sql);
  161. return $this->_config->getDb()->numRows($result) > 0;
  162. }
  163. /**
  164. * Retrieve all the stop words by a certain language
  165. *
  166. * @param string $lang Language to retrieve stop words by
  167. * @param boolean $wordsOnly
  168. *
  169. * @return array
  170. */
  171. public function getByLang($lang = null, $wordsOnly = false)
  172. {
  173. $lang = is_null($lang) ? $this->_language->getLanguage() : $lang;
  174. $sql = sprintf(
  175. "SELECT id, lang, LOWER(stopword) AS stopword FROM $this->table_name WHERE lang = '%s'",
  176. $lang
  177. );
  178. $result = $this->_config->getDb()->query($sql);
  179. $retval = array();
  180. if ($wordsOnly) {
  181. while(($row = $this->_config->getDb()->fetchObject($result)) == true) {
  182. $retval[] = $row->stopword;
  183. }
  184. } else {
  185. return $this->_config->getDb()->fetchAll($result);
  186. }
  187. return $retval;
  188. }
  189. /**
  190. * Filter some text cutting out all non words and stop words
  191. *
  192. * @param string $input text to filter
  193. *
  194. * @return array
  195. */
  196. public function clean($input)
  197. {
  198. $words = explode(' ', $input);
  199. $stop_words = $this->getByLang(null, true);
  200. $retval = array();
  201. foreach ($words as $word) {
  202. $word = PMF_String::strtolower($word);
  203. if (!is_numeric($word) && 1 < PMF_String::strlen($word) &&
  204. !in_array($word, $stop_words) && !in_array($word, $retval)) {
  205. $retval[] = $word;
  206. }
  207. }
  208. return $retval;
  209. }
  210. /**
  211. * This function checks the content against a dab word list
  212. * if the banned word spam protection has been activated from the general PMF configuration.
  213. *
  214. * @param string $content
  215. *
  216. * @return bool
  217. */
  218. public function checkBannedWord($content)
  219. {
  220. // Sanity checks
  221. $content = trim($content);
  222. if (('' == $content) && (!$this->_config->get('spam.checkBannedWords'))) {
  223. return true;
  224. }
  225. $bannedWords = $this->getBannedWords();
  226. // We just search a match of, at least, one banned word into $content
  227. $content = PMF_String::strtolower($content);
  228. if (is_array($bannedWords)) {
  229. foreach ($bannedWords as $bannedWord) {
  230. if (PMF_String::strpos($content, PMF_String::strtolower($bannedWord)) !== false) {
  231. return false;
  232. }
  233. }
  234. }
  235. return true;
  236. }
  237. /**
  238. * This function returns the banned words dictionary as an array.
  239. *
  240. * @return array
  241. */
  242. private function getBannedWords()
  243. {
  244. $bannedTrimmedWords = array();
  245. $bannedWordsFile = __DIR__ . '/blockedwords.txt';
  246. $bannedWords = array();
  247. // Read the dictionary
  248. if (file_exists($bannedWordsFile) && is_readable($bannedWordsFile)) {
  249. $bannedWords = file_get_contents($bannedWordsFile);
  250. }
  251. // Trim it
  252. foreach (explode("\n", $bannedWords) as $word) {
  253. $bannedTrimmedWords[] = trim($word);
  254. }
  255. return $bannedTrimmedWords;
  256. }
  257. }