PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/tests/Zend/Search/Lucene/LuceneTest.php

https://bitbucket.org/dbaltas/zend-framework-1.x-on-git
PHP | 581 lines | 386 code | 148 blank | 47 comment | 28 complexity | 942595a737ffe4276974b5211bfae035 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, MIT
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: LuceneTest.php 24811 2012-05-17 21:28:49Z rob $
  21. */
  22. if (!defined('PHPUnit_MAIN_METHOD')) {
  23. define('PHPUnit_MAIN_METHOD', 'Zend_Search_Lucene_LuceneTest::main');
  24. }
  25. /**
  26. * Zend_Search_Lucene
  27. */
  28. require_once 'Zend/Search/Lucene.php';
  29. /**
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage UnitTests
  33. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. * @group Zend_Search_Lucene
  36. */
  37. class Zend_Search_Lucene_LuceneTest extends PHPUnit_Framework_TestCase
  38. {
  39. public static function main()
  40. {
  41. $suite = new PHPUnit_Framework_TestSuite(__CLASS__);
  42. $result = PHPUnit_TextUI_TestRunner::run($suite);
  43. }
  44. private function _clearDirectory($dirName)
  45. {
  46. if (!file_exists($dirName) || !is_dir($dirName)) {
  47. return;
  48. }
  49. // remove files from temporary direcytory
  50. $dir = opendir($dirName);
  51. while (($file = readdir($dir)) !== false) {
  52. if (!is_dir($dirName . '/' . $file)) {
  53. @unlink($dirName . '/' . $file);
  54. }
  55. }
  56. closedir($dir);
  57. }
  58. public function setUp()
  59. {
  60. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  61. }
  62. public function testCreate()
  63. {
  64. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  65. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  66. unset($index);
  67. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  68. }
  69. public function testOpen()
  70. {
  71. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  72. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  73. }
  74. public function testOpenNonCompound()
  75. {
  76. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_nonCompoundIndexFiles');
  77. $this->assertTrue($index instanceof Zend_Search_Lucene_Interface);
  78. }
  79. public function testDefaultSearchField()
  80. {
  81. $currentDefaultSearchField = Zend_Search_Lucene::getDefaultSearchField();
  82. $this->assertEquals($currentDefaultSearchField, null);
  83. Zend_Search_Lucene::setDefaultSearchField('anotherField');
  84. $this->assertEquals(Zend_Search_Lucene::getDefaultSearchField(), 'anotherField');
  85. Zend_Search_Lucene::setDefaultSearchField($currentDefaultSearchField);
  86. }
  87. public function testCount()
  88. {
  89. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  90. $this->assertEquals($index->count(), 10);
  91. }
  92. public function testMaxDoc()
  93. {
  94. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  95. $this->assertEquals($index->maxDoc(), 10);
  96. }
  97. public function testNumDocs()
  98. {
  99. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  100. $this->assertEquals($index->numDocs(), 9);
  101. }
  102. public function testIsDeleted()
  103. {
  104. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  105. $this->assertFalse($index->isDeleted(3));
  106. $this->assertTrue($index->isDeleted(6));
  107. }
  108. public function testMaxBufferedDocs()
  109. {
  110. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  111. $currentMaxBufferedDocs = $index->getMaxBufferedDocs();
  112. $index->setMaxBufferedDocs(234);
  113. $this->assertEquals($index->getMaxBufferedDocs(), 234);
  114. $index->setMaxBufferedDocs($currentMaxBufferedDocs);
  115. }
  116. public function testMaxMergeDocs()
  117. {
  118. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  119. $currentMaxMergeDocs = $index->getMaxMergeDocs();
  120. $index->setMaxMergeDocs(34);
  121. $this->assertEquals($index->getMaxMergeDocs(), 34);
  122. $index->setMaxMergeDocs($currentMaxMergeDocs);
  123. }
  124. public function testMergeFactor()
  125. {
  126. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  127. $currentMergeFactor = $index->getMergeFactor();
  128. $index->setMergeFactor(113);
  129. $this->assertEquals($index->getMergeFactor(), 113);
  130. $index->setMergeFactor($currentMergeFactor);
  131. }
  132. public function testFind()
  133. {
  134. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  135. $hits = $index->find('submitting');
  136. $this->assertEquals(count($hits), 3);
  137. }
  138. public function testGetFieldNames()
  139. {
  140. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  141. $this->assertTrue(array_values($index->getFieldNames()) == array('path', 'modified', 'contents'));
  142. }
  143. public function testGetDocument()
  144. {
  145. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  146. $doc = $index->getDocument(3);
  147. $this->assertTrue($doc instanceof Zend_Search_Lucene_Document);
  148. $this->assertEquals($doc->path, 'IndexSource/about-pear.html');
  149. }
  150. public function testHasTerm()
  151. {
  152. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  153. $this->assertTrue($index->hasTerm(new Zend_Search_Lucene_Index_Term('packages', 'contents')));
  154. $this->assertFalse($index->hasTerm(new Zend_Search_Lucene_Index_Term('nonusedword', 'contents')));
  155. }
  156. public function testTermDocs()
  157. {
  158. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  159. $this->assertTrue(array_values( $index->termDocs(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ) ==
  160. array(0, 2, 6, 7, 8));
  161. }
  162. public function testTermPositions()
  163. {
  164. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  165. $this->assertTrue($index->termPositions(new Zend_Search_Lucene_Index_Term('packages', 'contents')) ==
  166. array(0 => array(174),
  167. 2 => array(40, 742),
  168. 6 => array(6, 156, 163),
  169. 7 => array(194),
  170. 8 => array(55, 190, 405)));
  171. }
  172. public function testDocFreq()
  173. {
  174. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  175. $this->assertEquals($index->docFreq(new Zend_Search_Lucene_Index_Term('packages', 'contents')), 5);
  176. }
  177. public function testGetSimilarity()
  178. {
  179. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  180. $this->assertTrue($index->getSimilarity() instanceof Zend_Search_Lucene_Search_Similarity);
  181. }
  182. public function testNorm()
  183. {
  184. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  185. $this->assertTrue(abs($index->norm(3, 'contents') - 0.054688) < 0.000001);
  186. }
  187. public function testHasDeletions()
  188. {
  189. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  190. $this->assertTrue($index->hasDeletions());
  191. }
  192. public function testDelete()
  193. {
  194. // Copy index sample into _files directory
  195. $sampleIndexDir = dirname(__FILE__) . '/_indexSample/_files';
  196. $tempIndexDir = dirname(__FILE__) . '/_files';
  197. if (!is_dir($tempIndexDir)) {
  198. mkdir($tempIndexDir);
  199. }
  200. $this->_clearDirectory($tempIndexDir);
  201. $indexDir = opendir($sampleIndexDir);
  202. while (($file = readdir($indexDir)) !== false) {
  203. if (!is_dir($sampleIndexDir . '/' . $file)) {
  204. copy($sampleIndexDir . '/' . $file, $tempIndexDir . '/' . $file);
  205. }
  206. }
  207. closedir($indexDir);
  208. $index = Zend_Search_Lucene::open($tempIndexDir);
  209. $this->assertFalse($index->isDeleted(2));
  210. $index->delete(2);
  211. $this->assertTrue($index->isDeleted(2));
  212. $index->commit();
  213. unset($index);
  214. $index1 = Zend_Search_Lucene::open($tempIndexDir);
  215. $this->assertTrue($index1->isDeleted(2));
  216. unset($index1);
  217. $this->_clearDirectory($tempIndexDir);
  218. }
  219. public function testAddDocument()
  220. {
  221. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  222. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  223. $dir = opendir($indexSourceDir);
  224. while (($file = readdir($dir)) !== false) {
  225. if (is_dir($indexSourceDir . '/' . $file)) {
  226. continue;
  227. }
  228. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  229. continue;
  230. }
  231. // Create new Document from a file
  232. $doc = new Zend_Search_Lucene_Document();
  233. $doc->addField(Zend_Search_Lucene_Field::Text('path', 'IndexSource/' . $file));
  234. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  235. $f = fopen($indexSourceDir . '/' . $file,'rb');
  236. $byteCount = filesize($indexSourceDir . '/' . $file);
  237. $data = '';
  238. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  239. $data .= $nextBlock;
  240. $byteCount -= strlen($nextBlock);
  241. }
  242. fclose($f);
  243. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  244. // Add document to the index
  245. $index->addDocument($doc);
  246. }
  247. closedir($dir);
  248. unset($index);
  249. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  250. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  251. unset($index1);
  252. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  253. }
  254. public function testOptimize()
  255. {
  256. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  257. $index->setMaxBufferedDocs(2);
  258. $indexSourceDir = dirname(__FILE__) . '/_indexSource/_files';
  259. $dir = opendir($indexSourceDir);
  260. while (($file = readdir($dir)) !== false) {
  261. if (is_dir($indexSourceDir . '/' . $file)) {
  262. continue;
  263. }
  264. if (strcasecmp(substr($file, strlen($file)-5), '.html') != 0) {
  265. continue;
  266. }
  267. // Create new Document from a file
  268. $doc = new Zend_Search_Lucene_Document();
  269. $doc->addField(Zend_Search_Lucene_Field::Keyword('path', 'IndexSource/' . $file));
  270. $doc->addField(Zend_Search_Lucene_Field::Keyword( 'modified', filemtime($indexSourceDir . '/' . $file) ));
  271. $f = fopen($indexSourceDir . '/' . $file,'rb');
  272. $byteCount = filesize($indexSourceDir . '/' . $file);
  273. $data = '';
  274. while ( $byteCount > 0 && ($nextBlock = fread($f, $byteCount)) != false ) {
  275. $data .= $nextBlock;
  276. $byteCount -= strlen($nextBlock);
  277. }
  278. fclose($f);
  279. $doc->addField(Zend_Search_Lucene_Field::Text('contents', $data, 'ISO-8859-1'));
  280. // Add document to the index
  281. $index->addDocument($doc);
  282. }
  283. closedir($dir);
  284. unset($index);
  285. $index1 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  286. $this->assertTrue($index1 instanceof Zend_Search_Lucene_Interface);
  287. $pathTerm = new Zend_Search_Lucene_Index_Term('IndexSource/contributing.html', 'path');
  288. $contributingDocs = $index1->termDocs($pathTerm);
  289. foreach ($contributingDocs as $id) {
  290. $index1->delete($id);
  291. }
  292. $index1->optimize();
  293. unset($index1);
  294. $index2 = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  295. $this->assertTrue($index2 instanceof Zend_Search_Lucene_Interface);
  296. $hits = $index2->find('submitting');
  297. $this->assertEquals(count($hits), 3);
  298. unset($index2);
  299. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  300. }
  301. public function testTerms()
  302. {
  303. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  304. $this->assertEquals(count($index->terms()), 607);
  305. }
  306. public function testTermsStreamInterface()
  307. {
  308. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  309. $terms = array();
  310. $index->resetTermsStream();
  311. while ($index->currentTerm() !== null) {
  312. $terms[] = $index->currentTerm();
  313. $index->nextTerm();
  314. }
  315. $this->assertEquals(count($terms), 607);
  316. }
  317. public function testTermsStreamInterfaceSkipTo()
  318. {
  319. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  320. $terms = array();
  321. $index->resetTermsStream();
  322. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  323. while ($index->currentTerm() !== null) {
  324. $terms[] = $index->currentTerm();
  325. $index->nextTerm();
  326. }
  327. $this->assertEquals(count($terms), 244);
  328. }
  329. public function testTermsStreamInterfaceSkipToTermsRetrieving()
  330. {
  331. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_indexSample/_files');
  332. $terms = array();
  333. $index->resetTermsStream();
  334. $index->skipTo(new Zend_Search_Lucene_Index_Term('one', 'contents'));
  335. $terms[] = $index->currentTerm();
  336. $terms[] = $index->nextTerm();
  337. $terms[] = $index->nextTerm();
  338. $index->closeTermsStream();
  339. $this->assertTrue($terms ==
  340. array(new Zend_Search_Lucene_Index_Term('one', 'contents'),
  341. new Zend_Search_Lucene_Index_Term('only', 'contents'),
  342. new Zend_Search_Lucene_Index_Term('open', 'contents'),
  343. ));
  344. }
  345. public function testTermsStreamInterfaceSkipToTermsRetrievingZeroTermsCase()
  346. {
  347. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  348. // Zero terms
  349. $doc = new Zend_Search_Lucene_Document();
  350. $doc->addField(Zend_Search_Lucene_Field::Text('contents', ''));
  351. $index->addDocument($doc);
  352. unset($index);
  353. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  354. $index->resetTermsStream();
  355. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  356. $this->assertTrue($index->currentTerm() === null);
  357. $index->closeTermsStream();
  358. unset($index);
  359. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  360. }
  361. public function testTermsStreamInterfaceSkipToTermsRetrievingOneTermsCase()
  362. {
  363. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  364. // Zero terms
  365. $doc = new Zend_Search_Lucene_Document();
  366. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm'));
  367. $index->addDocument($doc);
  368. unset($index);
  369. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  370. $index->resetTermsStream();
  371. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  372. $this->assertTrue($index->currentTerm() === null);
  373. $index->closeTermsStream();
  374. unset($index);
  375. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  376. }
  377. public function testTermsStreamInterfaceSkipToTermsRetrievingTwoTermsCase()
  378. {
  379. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  380. // Zero terms
  381. $doc = new Zend_Search_Lucene_Document();
  382. $doc->addField(Zend_Search_Lucene_Field::Text('contents', 'someterm word'));
  383. $index->addDocument($doc);
  384. unset($index);
  385. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  386. $index->resetTermsStream();
  387. $index->skipTo(new Zend_Search_Lucene_Index_Term('term', 'contents'));
  388. $this->assertTrue($index->currentTerm() == new Zend_Search_Lucene_Index_Term('word', 'contents'));
  389. $index->closeTermsStream();
  390. unset($index);
  391. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  392. }
  393. /**
  394. * @group ZF-7518
  395. */
  396. public function testTermsStreamInterfaceSkipToMatchedTerm()
  397. {
  398. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  399. $doc = new Zend_Search_Lucene_Document();
  400. $doc->addField(Zend_Search_Lucene_Field::Keyword('test', 'f'));
  401. $index->addDocument($doc);
  402. unset($index);
  403. $index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index/_files');
  404. $hits = $index->find('test:[a TO t]');
  405. $this->assertEquals(1, count($hits));
  406. $this->assertEquals(0, reset($hits)->id);
  407. $hits = $index->find('test:f');
  408. $this->assertEquals(1, count($hits));
  409. $this->assertEquals(0, reset($hits)->id);
  410. $hits = $index->find('test:g');
  411. $this->assertEquals(0, count($hits));
  412. unset($index);
  413. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  414. }
  415. /**
  416. * @group ZF-9680
  417. */
  418. public function testIsDeletedWithoutExplicitCommit()
  419. {
  420. //$this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  421. $index = Zend_Search_Lucene::create(dirname(__FILE__) . '/_index/_files');
  422. $document = new Zend_Search_Lucene_Document;
  423. $document->addField(Zend_Search_Lucene_Field::Keyword('_id', 'myId'));
  424. $document->addField(Zend_Search_Lucene_Field::Keyword('bla', 'blubb'));
  425. $index->addDocument($document);
  426. $this->assertFalse($index->isDeleted(0));
  427. unset($index);
  428. $this->_clearDirectory(dirname(__FILE__) . '/_index/_files');
  429. }
  430. }
  431. if (PHPUnit_MAIN_METHOD == 'Zend_Search_Lucene_LuceneTest::main') {
  432. Zend_Search_Lucene_LuceneTest::main();
  433. }