/protected/modules/zendsearch/vendors/Zend/Search/Lucene/Document/Pptx.php

https://gitlab.com/RonLab1987/YupePlusClear · PHP · 223 lines · 113 code · 25 blank · 85 comment · 12 complexity · 0588cfbde1086b8519867e547b23abd4 MD5 · raw file

  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Pptx.php 24593 2012-01-05 20:35:02Z matthew $
  21. */
  22. /** Zend_Search_Lucene_Document_OpenXml */
  23. require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  24. /**
  25. * Pptx document.
  26. *
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage Document
  30. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml
  34. {
  35. /**
  36. * Xml Schema - PresentationML
  37. *
  38. * @var string
  39. */
  40. const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
  41. /**
  42. * Xml Schema - DrawingML
  43. *
  44. * @var string
  45. */
  46. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  47. /**
  48. * Xml Schema - Slide relation
  49. *
  50. * @var string
  51. */
  52. const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
  53. /**
  54. * Xml Schema - Slide notes relation
  55. *
  56. * @var string
  57. */
  58. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  59. /**
  60. * Object constructor
  61. *
  62. * @param string $fileName
  63. * @param boolean $storeContent
  64. * @throws Zend_Search_Lucene_Exception
  65. */
  66. private function __construct($fileName, $storeContent)
  67. {
  68. if (!class_exists('ZipArchive', false)) {
  69. require_once 'Zend/Search/Lucene/Exception.php';
  70. throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded');
  71. }
  72. // Document data holders
  73. $slides = [];
  74. $slideNotes = [];
  75. $documentBody = [];
  76. $coreProperties = [];
  77. // Open OpenXML package
  78. $package = new ZipArchive();
  79. $package->open($fileName);
  80. // Read relations and search for officeDocument
  81. $relationsXml = $package->getFromName('_rels/.rels');
  82. if ($relationsXml === false) {
  83. require_once 'Zend/Search/Lucene/Exception.php';
  84. throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .pptx file.');
  85. }
  86. $relations = simplexml_load_string($relationsXml);
  87. foreach ($relations->Relationship as $rel) {
  88. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  89. // Found office document! Search for slides...
  90. $slideRelations = simplexml_load_string(
  91. $package->getFromName(
  92. $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")
  93. )
  94. );
  95. foreach ($slideRelations->Relationship as $slideRel) {
  96. if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) {
  97. // Found slide!
  98. $slides[str_replace('rId', '', (string)$slideRel["Id"])] = simplexml_load_string(
  99. $package->getFromName(
  100. $this->absoluteZipPath(
  101. dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename(
  102. $slideRel["Target"]
  103. )
  104. )
  105. )
  106. );
  107. // Search for slide notes
  108. $slideNotesRelations = simplexml_load_string(
  109. $package->getFromName(
  110. $this->absoluteZipPath(
  111. dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename(
  112. $slideRel["Target"]
  113. ) . ".rels"
  114. )
  115. )
  116. );
  117. foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
  118. if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) {
  119. // Found slide notes!
  120. $slideNotes[str_replace('rId', '', (string)$slideRel["Id"])] = simplexml_load_string(
  121. $package->getFromName(
  122. $this->absoluteZipPath(
  123. dirname($rel["Target"]) . "/" . dirname(
  124. $slideRel["Target"]
  125. ) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename(
  126. $slideNoteRel["Target"]
  127. )
  128. )
  129. )
  130. );
  131. break;
  132. }
  133. }
  134. }
  135. }
  136. break;
  137. }
  138. }
  139. // Sort slides
  140. ksort($slides);
  141. ksort($slideNotes);
  142. // Extract contents from slides
  143. foreach ($slides as $slideKey => $slide) {
  144. // Register namespaces
  145. $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  146. $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  147. // Fetch all text
  148. $textElements = $slide->xpath('//a:t');
  149. foreach ($textElements as $textElement) {
  150. $documentBody[] = (string)$textElement;
  151. }
  152. // Extract contents from slide notes
  153. if (isset($slideNotes[$slideKey])) {
  154. // Fetch slide note
  155. $slideNote = $slideNotes[$slideKey];
  156. // Register namespaces
  157. $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
  158. $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
  159. // Fetch all text
  160. $textElements = $slideNote->xpath('//a:t');
  161. foreach ($textElements as $textElement) {
  162. $documentBody[] = (string)$textElement;
  163. }
  164. }
  165. }
  166. // Read core properties
  167. $coreProperties = $this->extractMetaData($package);
  168. // Close file
  169. $package->close();
  170. // Store filename
  171. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  172. // Store contents
  173. if ($storeContent) {
  174. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  175. } else {
  176. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  177. }
  178. // Store meta data properties
  179. foreach ($coreProperties as $key => $value) {
  180. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  181. }
  182. // Store title (if not present in meta data)
  183. if (!isset($coreProperties['title'])) {
  184. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  185. }
  186. }
  187. /**
  188. * Load Pptx document from a file
  189. *
  190. * @param string $fileName
  191. * @param boolean $storeContent
  192. * @return Zend_Search_Lucene_Document_Pptx
  193. */
  194. public static function loadPptxFile($fileName, $storeContent = false)
  195. {
  196. return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent);
  197. }
  198. }