PageRenderTime 61ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/Zend/Search/Lucene/DocumentTest.php

https://github.com/MarcelloDuarte/zf2
PHP | 315 lines | 217 code | 57 blank | 41 comment | 16 complexity | 0b2cd9c100c1e329357437699b112a0f MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * @namespace
  23. */
  24. namespace ZendTest\Search\Lucene;
  25. use Zend\Search\Lucene\Document;
  26. use Zend\Search\Lucene;
  27. /**
  28. * PHPUnit test case
  29. */
  30. /**
  31. * @category Zend
  32. * @package Zend_Search_Lucene
  33. * @subpackage UnitTests
  34. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. * @group Zend_Search_Lucene
  37. */
  38. class DocumentTest extends \PHPUnit_Framework_TestCase
  39. {
  40. private function _clearDirectory($dirName)
  41. {
  42. if (!file_exists($dirName) || !is_dir($dirName)) {
  43. return;
  44. }
  45. // remove files from temporary direcytory
  46. $dir = opendir($dirName);
  47. while (($file = readdir($dir)) !== false) {
  48. if (!is_dir($dirName . '/' . $file)) {
  49. @unlink($dirName . '/' . $file);
  50. }
  51. }
  52. closedir($dir);
  53. }
  54. public function testCreate()
  55. {
  56. $document = new Document();
  57. $this->assertEquals($document->boost, 1);
  58. }
  59. public function testFields()
  60. {
  61. $document = new Document();
  62. $document->addField(Document\Field::Text('title', 'Title'));
  63. $document->addField(Document\Field::Text('annotation', 'Annotation'));
  64. $document->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
  65. $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body'));
  66. $this->assertTrue(is_array($fieldnamesDiffArray));
  67. $this->assertEquals(count($fieldnamesDiffArray), 0);
  68. $this->assertEquals($document->title, 'Title');
  69. $this->assertEquals($document->annotation, 'Annotation');
  70. $this->assertEquals($document->body, 'Document body, document body, document body...');
  71. $this->assertEquals($document->getField('title')->value, 'Title');
  72. $this->assertEquals($document->getField('annotation')->value, 'Annotation');
  73. $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...');
  74. $this->assertEquals($document->getFieldValue('title'), 'Title');
  75. $this->assertEquals($document->getFieldValue('annotation'), 'Annotation');
  76. $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...');
  77. if (PHP_OS == 'AIX') {
  78. return; // tests below here not valid on AIX
  79. }
  80. $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...');
  81. $document->addField(Document\Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1'));
  82. $this->assertEquals($document->description, $wordsWithUmlautsIso88591);
  83. $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...');
  84. }
  85. public function testAddFieldMethodChaining()
  86. {
  87. $document = new Document();
  88. $this->assertTrue($document->addField(Document\Field::Text('title', 'Title')) instanceof Document);
  89. $document = new Document();
  90. $document->addField(Document\Field::Text('title', 'Title'))
  91. ->addField(Document\Field::Text('annotation', 'Annotation'))
  92. ->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
  93. }
  94. public function testHtmlHighlighting()
  95. {
  96. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  97. $this->assertTrue($doc instanceof Document\HTML);
  98. $doc->highlight('document', '#66ffff');
  99. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#66ffff">Document</b> body.') !== false);
  100. }
  101. public function testHtmlExtendedHighlighting()
  102. {
  103. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  104. $this->assertTrue($doc instanceof Document\HTML);
  105. $doc->highlightExtended('document',
  106. array('\ZendTest\Search\Lucene\DocHighlightingContainer',
  107. 'extendedHighlightingCallback'),
  108. array('style="color:black;background-color:#ff66ff"',
  109. '(!!!)'));
  110. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b>(!!!) body.') !== false);
  111. }
  112. public function testHtmlWordsHighlighting()
  113. {
  114. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  115. $this->assertTrue($doc instanceof Document\HTML);
  116. $doc->highlight(array('document', 'body'), '#66ffff');
  117. $highlightedHTML = $doc->getHTML();
  118. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Document</b>') !== false);
  119. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">body</b>') !== false);
  120. }
  121. public function testHtmlExtendedHighlightingCorrectWrongHtml()
  122. {
  123. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  124. $this->assertTrue($doc instanceof Document\HTML);
  125. $doc->highlightExtended('document',
  126. array('\ZendTest\Search\Lucene\DocHighlightingContainer',
  127. 'extendedHighlightingCallback'),
  128. array('style="color:black;background-color:#ff66ff"',
  129. '<h3>(!!!)' /* Wrong HTML here, <h3> tag is not closed */));
  130. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b><h3>(!!!)</h3> body.') !== false);
  131. }
  132. public function testHtmlLinksProcessing()
  133. {
  134. $doc = Document\HTML::loadHTMLFile(__DIR__ . '/_indexSource/_files/contributing.documentation.html', true);
  135. $this->assertTrue($doc instanceof Document\HTML);
  136. $this->assertTrue(array_values($doc->getHeaderLinks()) ==
  137. array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
  138. $this->assertTrue(array_values($doc->getLinks()) ==
  139. array('contributing.bugs.html',
  140. 'contributing.wishlist.html',
  141. 'developers.documentation.html',
  142. 'faq.translators-revision-tracking.html',
  143. 'index.html',
  144. 'contributing.html'));
  145. }
  146. /**
  147. * @group ZF-4252
  148. */
  149. public function testHtmlInlineTagsIndexing()
  150. {
  151. $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
  152. $htmlString = '<html><head><title>Hello World</title></head>'
  153. . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n"
  154. . ' <strong>Test</strong></body></html>';
  155. $doc = Document\Html::loadHTML($htmlString);
  156. $index->addDocument($doc);
  157. $hits = $index->find('FooBar');
  158. $this->assertEquals(count($hits), 0);
  159. $hits = $index->find('ZendFramework');
  160. $this->assertEquals(count($hits), 1);
  161. unset($index);
  162. $this->_clearDirectory(__DIR__ . '/_index/_files');
  163. }
  164. /**
  165. * @group ZF-8740
  166. */
  167. public function testHtmlAreaTags()
  168. {
  169. $html = '<HTML>'
  170. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  171. . '<BODY>'
  172. . 'Document body.'
  173. . '<img src="img.png" width="640" height="480" alt="some image" usemap="#some_map" />'
  174. . '<map name="some_map">'
  175. . '<area shape="rect" coords="0,0,100,100" href="link3.html" alt="Link 3" />'
  176. . '<area shape="rect" coords="200,200,300,300" href="link4.html" alt="Link 4" />'
  177. . '</map>'
  178. . '<a href="link1.html">Link 1</a>.'
  179. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  180. . '</BODY>'
  181. . '</HTML>';
  182. $oldNoFollowValue = Document\Html::getExcludeNoFollowLinks();
  183. Document\Html::setExcludeNoFollowLinks(false);
  184. $doc1 = Document\Html::loadHTML($html);
  185. $this->assertTrue($doc1 instanceof Document\Html);
  186. $links = array('link1.html', 'link2.html', 'link3.html', 'link4.html');
  187. $this->assertTrue(array_values($doc1->getLinks()) == $links);
  188. }
  189. public function testHtmlNoFollowLinks()
  190. {
  191. $html = '<HTML>'
  192. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  193. . '<BODY>'
  194. . 'Document body.'
  195. . '<a href="link1.html">Link 1</a>.'
  196. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  197. . '</BODY>'
  198. . '</HTML>';
  199. $oldNoFollowValue = Document\HTML::getExcludeNoFollowLinks();
  200. Document\HTML::setExcludeNoFollowLinks(false);
  201. $doc1 = Document\HTML::loadHTML($html);
  202. $this->assertTrue($doc1 instanceof Document\HTML);
  203. $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
  204. Document\HTML::setExcludeNoFollowLinks(true);
  205. $doc2 = Document\HTML::loadHTML($html);
  206. $this->assertTrue($doc2 instanceof Document\HTML);
  207. $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
  208. }
  209. public function testDocx()
  210. {
  211. if (!class_exists('ZipArchive')) {
  212. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  213. }
  214. $docxDocument = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/test.docx', true);
  215. $this->assertTrue($docxDocument instanceof Document\Docx);
  216. $this->assertEquals($docxDocument->getFieldValue('title'), 'Test document');
  217. $this->assertEquals($docxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  218. $this->assertTrue($docxDocument->getFieldValue('body') != '');
  219. try {
  220. $docxDocument1 = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/dummy.docx', true);
  221. $this->fail('File not readable exception is expected.');
  222. } catch (Document\Exception\InvalidArgumentException $e) {
  223. if (strpos($e->getMessage(), 'is not readable') === false) {
  224. // Passthrough exception
  225. throw $e;
  226. }
  227. }
  228. }
  229. public function testPptx()
  230. {
  231. if (!class_exists('ZipArchive')) {
  232. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  233. }
  234. $pptxDocument = Document\Pptx::loadPptxFile(__DIR__ . '/_openXmlDocuments/test.pptx', true);
  235. $this->assertTrue($pptxDocument instanceof Document\Pptx);
  236. $this->assertEquals($pptxDocument->getFieldValue('title'), 'Test document');
  237. $this->assertEquals($pptxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  238. $this->assertTrue($pptxDocument->getFieldValue('body') != '');
  239. }
  240. public function testXlsx()
  241. {
  242. if (!class_exists('ZipArchive')) {
  243. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  244. }
  245. $xlsxDocument = Document\Xlsx::loadXlsxFile(__DIR__ . '/_openXmlDocuments/test.xlsx', true);
  246. $this->assertTrue($xlsxDocument instanceof Document\Xlsx);
  247. $this->assertEquals($xlsxDocument->getFieldValue('title'), 'Test document');
  248. $this->assertEquals($xlsxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  249. $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
  250. $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
  251. }
  252. }
  253. class DocHighlightingContainer {
  254. public static function extendedHighlightingCallback($stringToHighlight, $param1, $param2)
  255. {
  256. return '<b ' . $param1 . '>' . $stringToHighlight . '</b>' . $param2;
  257. }
  258. }