PageRenderTime 43ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/tests/Zend/Search/Lucene/DocumentTest.php

https://github.com/Exercise/zf2
PHP | 316 lines | 217 code | 57 blank | 42 comment | 16 complexity | fccf38b24bd59c78686670c000566557 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage UnitTests
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @namespace
  24. */
  25. namespace ZendTest\Search\Lucene;
  26. use Zend\Search\Lucene\Document;
  27. use Zend\Search\Lucene;
  28. /**
  29. * PHPUnit test case
  30. */
  31. /**
  32. * @category Zend
  33. * @package Zend_Search_Lucene
  34. * @subpackage UnitTests
  35. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  36. * @license http://framework.zend.com/license/new-bsd New BSD License
  37. * @group Zend_Search_Lucene
  38. */
  39. class DocumentTest extends \PHPUnit_Framework_TestCase
  40. {
  41. private function _clearDirectory($dirName)
  42. {
  43. if (!file_exists($dirName) || !is_dir($dirName)) {
  44. return;
  45. }
  46. // remove files from temporary direcytory
  47. $dir = opendir($dirName);
  48. while (($file = readdir($dir)) !== false) {
  49. if (!is_dir($dirName . '/' . $file)) {
  50. @unlink($dirName . '/' . $file);
  51. }
  52. }
  53. closedir($dir);
  54. }
  55. public function testCreate()
  56. {
  57. $document = new Document();
  58. $this->assertEquals($document->boost, 1);
  59. }
  60. public function testFields()
  61. {
  62. $document = new Document();
  63. $document->addField(Document\Field::Text('title', 'Title'));
  64. $document->addField(Document\Field::Text('annotation', 'Annotation'));
  65. $document->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
  66. $fieldnamesDiffArray = array_diff($document->getFieldNames(), array('title', 'annotation', 'body'));
  67. $this->assertTrue(is_array($fieldnamesDiffArray));
  68. $this->assertEquals(count($fieldnamesDiffArray), 0);
  69. $this->assertEquals($document->title, 'Title');
  70. $this->assertEquals($document->annotation, 'Annotation');
  71. $this->assertEquals($document->body, 'Document body, document body, document body...');
  72. $this->assertEquals($document->getField('title')->value, 'Title');
  73. $this->assertEquals($document->getField('annotation')->value, 'Annotation');
  74. $this->assertEquals($document->getField('body')->value, 'Document body, document body, document body...');
  75. $this->assertEquals($document->getFieldValue('title'), 'Title');
  76. $this->assertEquals($document->getFieldValue('annotation'), 'Annotation');
  77. $this->assertEquals($document->getFieldValue('body'), 'Document body, document body, document body...');
  78. if (PHP_OS == 'AIX') {
  79. return; // tests below here not valid on AIX
  80. }
  81. $wordsWithUmlautsIso88591 = iconv('UTF-8', 'ISO-8859-1', 'Words with umlauts: åãü...');
  82. $document->addField(Document\Field::Text('description', $wordsWithUmlautsIso88591, 'ISO-8859-1'));
  83. $this->assertEquals($document->description, $wordsWithUmlautsIso88591);
  84. $this->assertEquals($document->getFieldUtf8Value('description'), 'Words with umlauts: åãü...');
  85. }
  86. public function testAddFieldMethodChaining()
  87. {
  88. $document = new Document();
  89. $this->assertTrue($document->addField(Document\Field::Text('title', 'Title')) instanceof Document);
  90. $document = new Document();
  91. $document->addField(Document\Field::Text('title', 'Title'))
  92. ->addField(Document\Field::Text('annotation', 'Annotation'))
  93. ->addField(Document\Field::Text('body', 'Document body, document body, document body...'));
  94. }
  95. public function testHtmlHighlighting()
  96. {
  97. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  98. $this->assertTrue($doc instanceof Document\HTML);
  99. $doc->highlight('document', '#66ffff');
  100. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#66ffff">Document</b> body.') !== false);
  101. }
  102. public function testHtmlExtendedHighlighting()
  103. {
  104. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  105. $this->assertTrue($doc instanceof Document\HTML);
  106. $doc->highlightExtended('document',
  107. array('\ZendTest\Search\Lucene\DocHighlightingContainer',
  108. 'extendedHighlightingCallback'),
  109. array('style="color:black;background-color:#ff66ff"',
  110. '(!!!)'));
  111. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b>(!!!) body.') !== false);
  112. }
  113. public function testHtmlWordsHighlighting()
  114. {
  115. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  116. $this->assertTrue($doc instanceof Document\HTML);
  117. $doc->highlight(array('document', 'body'), '#66ffff');
  118. $highlightedHTML = $doc->getHTML();
  119. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">Document</b>') !== false);
  120. $this->assertTrue(strpos($highlightedHTML, '<b style="color:black;background-color:#66ffff">body</b>') !== false);
  121. }
  122. public function testHtmlExtendedHighlightingCorrectWrongHtml()
  123. {
  124. $doc = Document\HTML::loadHTML('<HTML><HEAD><TITLE>Page title</TITLE></HEAD><BODY>Document body.</BODY></HTML>');
  125. $this->assertTrue($doc instanceof Document\HTML);
  126. $doc->highlightExtended('document',
  127. array('\ZendTest\Search\Lucene\DocHighlightingContainer',
  128. 'extendedHighlightingCallback'),
  129. array('style="color:black;background-color:#ff66ff"',
  130. '<h3>(!!!)' /* Wrong HTML here, <h3> tag is not closed */));
  131. $this->assertTrue(strpos($doc->getHTML(), '<b style="color:black;background-color:#ff66ff">Document</b><h3>(!!!)</h3> body.') !== false);
  132. }
  133. public function testHtmlLinksProcessing()
  134. {
  135. $doc = Document\HTML::loadHTMLFile(__DIR__ . '/_indexSource/_files/contributing.documentation.html', true);
  136. $this->assertTrue($doc instanceof Document\HTML);
  137. $this->assertTrue(array_values($doc->getHeaderLinks()) ==
  138. array('index.html', 'contributing.html', 'contributing.bugs.html', 'contributing.wishlist.html'));
  139. $this->assertTrue(array_values($doc->getLinks()) ==
  140. array('contributing.bugs.html',
  141. 'contributing.wishlist.html',
  142. 'developers.documentation.html',
  143. 'faq.translators-revision-tracking.html',
  144. 'index.html',
  145. 'contributing.html'));
  146. }
  147. /**
  148. * @group ZF-4252
  149. */
  150. public function testHtmlInlineTagsIndexing()
  151. {
  152. $index = Lucene\Lucene::create(__DIR__ . '/_index/_files');
  153. $htmlString = '<html><head><title>Hello World</title></head>'
  154. . '<body><b>Zend</b>Framework' . "\n" . ' <div>Foo</div>Bar ' . "\n"
  155. . ' <strong>Test</strong></body></html>';
  156. $doc = Document\Html::loadHTML($htmlString);
  157. $index->addDocument($doc);
  158. $hits = $index->find('FooBar');
  159. $this->assertEquals(count($hits), 0);
  160. $hits = $index->find('ZendFramework');
  161. $this->assertEquals(count($hits), 1);
  162. unset($index);
  163. $this->_clearDirectory(__DIR__ . '/_index/_files');
  164. }
  165. /**
  166. * @group ZF-8740
  167. */
  168. public function testHtmlAreaTags()
  169. {
  170. $html = '<HTML>'
  171. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  172. . '<BODY>'
  173. . 'Document body.'
  174. . '<img src="img.png" width="640" height="480" alt="some image" usemap="#some_map" />'
  175. . '<map name="some_map">'
  176. . '<area shape="rect" coords="0,0,100,100" href="link3.html" alt="Link 3" />'
  177. . '<area shape="rect" coords="200,200,300,300" href="link4.html" alt="Link 4" />'
  178. . '</map>'
  179. . '<a href="link1.html">Link 1</a>.'
  180. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  181. . '</BODY>'
  182. . '</HTML>';
  183. $oldNoFollowValue = Document\Html::getExcludeNoFollowLinks();
  184. Document\Html::setExcludeNoFollowLinks(false);
  185. $doc1 = Document\Html::loadHTML($html);
  186. $this->assertTrue($doc1 instanceof Document\Html);
  187. $links = array('link1.html', 'link2.html', 'link3.html', 'link4.html');
  188. $this->assertTrue(array_values($doc1->getLinks()) == $links);
  189. }
  190. public function testHtmlNoFollowLinks()
  191. {
  192. $html = '<HTML>'
  193. . '<HEAD><TITLE>Page title</TITLE></HEAD>'
  194. . '<BODY>'
  195. . 'Document body.'
  196. . '<a href="link1.html">Link 1</a>.'
  197. . '<a href="link2.html" rel="nofollow">Link 1</a>.'
  198. . '</BODY>'
  199. . '</HTML>';
  200. $oldNoFollowValue = Document\HTML::getExcludeNoFollowLinks();
  201. Document\HTML::setExcludeNoFollowLinks(false);
  202. $doc1 = Document\HTML::loadHTML($html);
  203. $this->assertTrue($doc1 instanceof Document\HTML);
  204. $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
  205. Document\HTML::setExcludeNoFollowLinks(true);
  206. $doc2 = Document\HTML::loadHTML($html);
  207. $this->assertTrue($doc2 instanceof Document\HTML);
  208. $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
  209. }
  210. public function testDocx()
  211. {
  212. if (!class_exists('ZipArchive')) {
  213. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  214. }
  215. $docxDocument = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/test.docx', true);
  216. $this->assertTrue($docxDocument instanceof Document\Docx);
  217. $this->assertEquals($docxDocument->getFieldValue('title'), 'Test document');
  218. $this->assertEquals($docxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  219. $this->assertTrue($docxDocument->getFieldValue('body') != '');
  220. try {
  221. $docxDocument1 = Document\Docx::loadDocxFile(__DIR__ . '/_openXmlDocuments/dummy.docx', true);
  222. $this->fail('File not readable exception is expected.');
  223. } catch (Document\Exception $e) {
  224. if (strpos($e->getMessage(), 'is not readable') === false) {
  225. // Passthrough exception
  226. throw $e;
  227. }
  228. }
  229. }
  230. public function testPptx()
  231. {
  232. if (!class_exists('ZipArchive')) {
  233. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  234. }
  235. $pptxDocument = Document\Pptx::loadPptxFile(__DIR__ . '/_openXmlDocuments/test.pptx', true);
  236. $this->assertTrue($pptxDocument instanceof Document\Pptx);
  237. $this->assertEquals($pptxDocument->getFieldValue('title'), 'Test document');
  238. $this->assertEquals($pptxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  239. $this->assertTrue($pptxDocument->getFieldValue('body') != '');
  240. }
  241. public function testXlsx()
  242. {
  243. if (!class_exists('ZipArchive')) {
  244. $this->markTestSkipped('ZipArchive class (Zip extension) is not loaded');
  245. }
  246. $xlsxDocument = Document\Xlsx::loadXlsxFile(__DIR__ . '/_openXmlDocuments/test.xlsx', true);
  247. $this->assertTrue($xlsxDocument instanceof Document\Xlsx);
  248. $this->assertEquals($xlsxDocument->getFieldValue('title'), 'Test document');
  249. $this->assertEquals($xlsxDocument->getFieldValue('description'), 'This is a test document which can be used to demonstrate something.');
  250. $this->assertTrue($xlsxDocument->getFieldValue('body') != '');
  251. $this->assertTrue( strpos($xlsxDocument->getFieldValue('body'), 'ipsum') !== false );
  252. }
  253. }
  254. class DocHighlightingContainer {
  255. public static function extendedHighlightingCallback($stringToHighlight, $param1, $param2)
  256. {
  257. return '<b ' . $param1 . '>' . $stringToHighlight . '</b>' . $param2;
  258. }
  259. }