/demos/quickstart/protected/index/Zend/Search/Lucene/Search/Query/MultiTerm.php

https://bitbucket.org/volatileeight/prado · PHP · 437 lines · 215 code · 58 blank · 164 comment · 40 complexity · 29a7496ef718ddacf7c356e1ea4594e4 MD5 · raw file

  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to version 1.0 of the Zend Framework
  8. * license, that is bundled with this package in the file LICENSE, and
  9. * is available through the world-wide-web at the following URL:
  10. * http://www.zend.com/license/framework/1_0.txt. If you did not receive
  11. * a copy of the Zend Framework license and are unable to obtain it
  12. * through the world-wide-web, please send a note to license@zend.com
  13. * so we can mail you a copy immediately.
  14. *
  15. * @package Zend_Search_Lucene
  16. * @subpackage Search
  17. * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
  19. */
  20. /** Zend_Search_Lucene_Search_Query */
  21. require_once 'Zend/Search/Lucene/Search/Query.php';
  22. /** Zend_Search_Lucene_Search_Weight_MultiTerm */
  23. require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
  24. /**
  25. * @package Zend_Search_Lucene
  26. * @subpackage Search
  27. * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
  28. * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
  29. */
  30. class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
  31. {
  32. /**
  33. * Terms to find.
  34. * Array of Zend_Search_Lucene_Index_Term
  35. *
  36. * @var array
  37. */
  38. private $_terms = array();
  39. /**
  40. * Term signs.
  41. * If true then term is required.
  42. * If false then term is prohibited.
  43. * If null then term is neither prohibited, nor required
  44. *
  45. * If array is null then all terms are required
  46. *
  47. * @var array
  48. */
  49. private $_signs = array();
  50. /**
  51. * Result vector.
  52. * Bitset or array of document IDs
  53. * (depending from Bitset extension availability).
  54. *
  55. * @var mixed
  56. */
  57. private $_resVector = null;
  58. /**
  59. * Terms positions vectors.
  60. * Array of Arrays:
  61. * term1Id => (docId => array( pos1, pos2, ... ), ...)
  62. * term2Id => (docId => array( pos1, pos2, ... ), ...)
  63. *
  64. * @var array
  65. */
  66. private $_termsPositions = array();
  67. /**
  68. * A score factor based on the fraction of all query terms
  69. * that a document contains.
  70. * float for conjunction queries
  71. * array of float for non conjunction queries
  72. *
  73. * @var mixed
  74. */
  75. private $_coord = null;
  76. /**
  77. * Terms weights
  78. * array of Zend_Search_Lucene_Search_Weight
  79. *
  80. * @var array
  81. */
  82. private $_weights = array();
  83. /**
  84. * Class constructor. Create a new multi-term query object.
  85. *
  86. * @param array $terms Array of Zend_Search_Lucene_Index_Term objects
  87. * @param array $signs Array of signs. Sign is boolean|null.
  88. * @return void
  89. */
  90. public function __construct($terms = null, $signs = null)
  91. {
  92. /**
  93. * @todo Check contents of $terms and $signs before adding them.
  94. */
  95. if (is_array($terms)) {
  96. $this->_terms = $terms;
  97. $this->_signs = null;
  98. // Check if all terms are required
  99. if (is_array($signs)) {
  100. foreach ($signs as $sign ) {
  101. if ($sign !== true) {
  102. $this->_signs = $signs;
  103. continue;
  104. }
  105. }
  106. }
  107. }
  108. }
  109. /**
  110. * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
  111. *
  112. * The sign is specified as:
  113. * TRUE - term is required
  114. * FALSE - term is prohibited
  115. * NULL - term is neither prohibited, nor required
  116. *
  117. * @param Zend_Search_Lucene_Index_Term $term
  118. * @param boolean|null $sign
  119. * @return void
  120. */
  121. public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) {
  122. $this->_terms[] = $term;
  123. /**
  124. * @todo This is not good. Sometimes $this->_signs is an array, sometimes
  125. * it is null, even when there are terms. It will be changed so that
  126. * it is always an array.
  127. */
  128. if ($this->_signs === null) {
  129. if ($sign !== null) {
  130. $this->_signs = array();
  131. foreach ($this->_terms as $term) {
  132. $this->_signs[] = null;
  133. }
  134. $this->_signs[] = $sign;
  135. }
  136. } else {
  137. $this->_signs[] = $sign;
  138. }
  139. }
  140. /**
  141. * Returns query term
  142. *
  143. * @return array
  144. */
  145. public function getTerms()
  146. {
  147. return $this->_terms;
  148. }
  149. /**
  150. * Return terms signs
  151. *
  152. * @return array
  153. */
  154. public function getSigns()
  155. {
  156. return $this->_signs;
  157. }
  158. /**
  159. * Set weight for specified term
  160. *
  161. * @param integer $num
  162. * @param Zend_Search_Lucene_Search_Weight_Term $weight
  163. */
  164. public function setWeight($num, $weight)
  165. {
  166. $this->_weights[$num] = $weight;
  167. }
  168. /**
  169. * Constructs an appropriate Weight implementation for this query.
  170. *
  171. * @param Zend_Search_Lucene $reader
  172. * @return Zend_Search_Lucene_Search_Weight
  173. */
  174. protected function _createWeight($reader)
  175. {
  176. return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
  177. }
  178. /**
  179. * Calculate result vector for Conjunction query
  180. * (like '+something +another')
  181. *
  182. * @param Zend_Search_Lucene $reader
  183. */
  184. private function _calculateConjunctionResult($reader)
  185. {
  186. if (extension_loaded('bitset')) {
  187. foreach( $this->_terms as $termId=>$term ) {
  188. if($this->_resVector === null) {
  189. $this->_resVector = bitset_from_array($reader->termDocs($term));
  190. } else {
  191. $this->_resVector = bitset_intersection(
  192. $this->_resVector,
  193. bitset_from_array($reader->termDocs($term)) );
  194. }
  195. $this->_termsPositions[$termId] = $reader->termPositions($term);
  196. }
  197. } else {
  198. foreach( $this->_terms as $termId=>$term ) {
  199. if($this->_resVector === null) {
  200. $this->_resVector = array_flip($reader->termDocs($term));
  201. } else {
  202. $termDocs = array_flip($reader->termDocs($term));
  203. foreach($this->_resVector as $key=>$value) {
  204. if (!isset( $termDocs[$key] )) {
  205. unset( $this->_resVector[$key] );
  206. }
  207. }
  208. }
  209. $this->_termsPositions[$termId] = $reader->termPositions($term);
  210. }
  211. }
  212. }
  213. /**
  214. * Calculate result vector for non Conjunction query
  215. * (like '+something -another')
  216. *
  217. * @param Zend_Search_Lucene $reader
  218. */
  219. private function _calculateNonConjunctionResult($reader)
  220. {
  221. if (extension_loaded('bitset')) {
  222. $required = null;
  223. $neither = bitset_empty();
  224. $prohibited = bitset_empty();
  225. foreach ($this->_terms as $termId => $term) {
  226. $termDocs = bitset_from_array($reader->termDocs($term));
  227. if ($this->_signs[$termId] === true) {
  228. // required
  229. if ($required !== null) {
  230. $required = bitset_intersection($required, $termDocs);
  231. } else {
  232. $required = $termDocs;
  233. }
  234. } elseif ($this->_signs[$termId] === false) {
  235. // prohibited
  236. $prohibited = bitset_union($prohibited, $termDocs);
  237. } else {
  238. // neither required, nor prohibited
  239. $neither = bitset_union($neither, $termDocs);
  240. }
  241. $this->_termsPositions[$termId] = $reader->termPositions($term);
  242. }
  243. if ($required === null) {
  244. $required = $neither;
  245. }
  246. $this->_resVector = bitset_intersection( $required,
  247. bitset_invert($prohibited, $reader->count()) );
  248. } else {
  249. $required = null;
  250. $neither = array();
  251. $prohibited = array();
  252. foreach ($this->_terms as $termId => $term) {
  253. $termDocs = array_flip($reader->termDocs($term));
  254. if ($this->_signs[$termId] === true) {
  255. // required
  256. if ($required !== null) {
  257. // substitute for bitset_intersection
  258. foreach ($required as $key => $value) {
  259. if (!isset( $termDocs[$key] )) {
  260. unset($required[$key]);
  261. }
  262. }
  263. } else {
  264. $required = $termDocs;
  265. }
  266. } elseif ($this->_signs[$termId] === false) {
  267. // prohibited
  268. // substitute for bitset_union
  269. foreach ($termDocs as $key => $value) {
  270. $prohibited[$key] = $value;
  271. }
  272. } else {
  273. // neither required, nor prohibited
  274. // substitute for bitset_union
  275. foreach ($termDocs as $key => $value) {
  276. $neither[$key] = $value;
  277. }
  278. }
  279. $this->_termsPositions[$termId] = $reader->termPositions($term);
  280. }
  281. if ($required === null) {
  282. $required = $neither;
  283. }
  284. foreach ($required as $key=>$value) {
  285. if (isset( $prohibited[$key] )) {
  286. unset($required[$key]);
  287. }
  288. }
  289. $this->_resVector = $required;
  290. }
  291. }
  292. /**
  293. * Score calculator for conjunction queries (all terms are required)
  294. *
  295. * @param integer $docId
  296. * @param Zend_Search_Lucene $reader
  297. * @return float
  298. */
  299. public function _conjunctionScore($docId, $reader)
  300. {
  301. if ($this->_coord === null) {
  302. $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
  303. count($this->_terms) );
  304. }
  305. $score = 0.0;
  306. foreach ($this->_terms as $termId=>$term) {
  307. $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
  308. $this->_weights[$termId]->getValue() *
  309. $reader->norm($docId, $term->field);
  310. }
  311. return $score * $this->_coord;
  312. }
  313. /**
  314. * Score calculator for non conjunction queries (not all terms are required)
  315. *
  316. * @param integer $docId
  317. * @param Zend_Search_Lucene $reader
  318. * @return float
  319. */
  320. public function _nonConjunctionScore($docId, $reader)
  321. {
  322. if ($this->_coord === null) {
  323. $this->_coord = array();
  324. $maxCoord = 0;
  325. foreach ($this->_signs as $sign) {
  326. if ($sign !== false /* not prohibited */) {
  327. $maxCoord++;
  328. }
  329. }
  330. for ($count = 0; $count <= $maxCoord; $count++) {
  331. $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
  332. }
  333. }
  334. $score = 0.0;
  335. $matchedTerms = 0;
  336. foreach ($this->_terms as $termId=>$term) {
  337. // Check if term is
  338. if ($this->_signs[$termId] !== false && // not prohibited
  339. isset($this->_termsPositions[$termId][$docId]) // matched
  340. ) {
  341. $matchedTerms++;
  342. $score +=
  343. $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
  344. $this->_weights[$termId]->getValue() *
  345. $reader->norm($docId, $term->field);
  346. }
  347. }
  348. return $score * $this->_coord[$matchedTerms];
  349. }
  350. /**
  351. * Score specified document
  352. *
  353. * @param integer $docId
  354. * @param Zend_Search_Lucene $reader
  355. * @return float
  356. */
  357. public function score($docId, $reader)
  358. {
  359. if($this->_resVector === null) {
  360. if ($this->_signs === null) {
  361. $this->_calculateConjunctionResult($reader);
  362. } else {
  363. $this->_calculateNonConjunctionResult($reader);
  364. }
  365. $this->_initWeight($reader);
  366. }
  367. if ( (extension_loaded('bitset')) ?
  368. bitset_in($this->_resVector, $docId) :
  369. isset($this->_resVector[$docId]) ) {
  370. if ($this->_signs === null) {
  371. return $this->_conjunctionScore($docId, $reader);
  372. } else {
  373. return $this->_nonConjunctionScore($docId, $reader);
  374. }
  375. } else {
  376. return 0;
  377. }
  378. }
  379. }