PageRenderTime 44ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/library/Search/Lucene/Document/Xlsx.php

https://github.com/kervin/kyzstudio
PHP | 263 lines | 128 code | 36 blank | 99 comment | 34 complexity | 6f322d850f8270238ba983d5f0b7f2bb MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Xlsx.php 20096 2010-01-06 02:05:09Z bkarwin $
  21. */
  22. /** Zend_Search_Lucene_Document_OpenXml */
  23. #require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  24. /**
  25. * Xlsx document.
  26. *
  27. * @category Zend
  28. * @package Zend_Search_Lucene
  29. * @subpackage Document
  30. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
  34. {
  35. /**
  36. * Xml Schema - SpreadsheetML
  37. *
  38. * @var string
  39. */
  40. const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
  41. /**
  42. * Xml Schema - DrawingML
  43. *
  44. * @var string
  45. */
  46. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  47. /**
  48. * Xml Schema - Shared Strings
  49. *
  50. * @var string
  51. */
  52. const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
  53. /**
  54. * Xml Schema - Worksheet relation
  55. *
  56. * @var string
  57. */
  58. const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
  59. /**
  60. * Xml Schema - Slide notes relation
  61. *
  62. * @var string
  63. */
  64. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  65. /**
  66. * Object constructor
  67. *
  68. * @param string $fileName
  69. * @param boolean $storeContent
  70. * @throws Zend_Search_Lucene_Exception
  71. */
  72. private function __construct($fileName, $storeContent)
  73. {
  74. if (!class_exists('ZipArchive', false)) {
  75. #require_once 'Zend/Search/Lucene/Exception.php';
  76. throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded');
  77. }
  78. // Document data holders
  79. $sharedStrings = array();
  80. $worksheets = array();
  81. $documentBody = array();
  82. $coreProperties = array();
  83. // Open OpenXML package
  84. $package = new ZipArchive();
  85. $package->open($fileName);
  86. // Read relations and search for officeDocument
  87. $relationsXml = $package->getFromName('_rels/.rels');
  88. if ($relationsXml === false) {
  89. #require_once 'Zend/Search/Lucene/Exception.php';
  90. throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .xlsx file.');
  91. }
  92. $relations = simplexml_load_string($relationsXml);
  93. foreach ($relations->Relationship as $rel) {
  94. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  95. // Found office document! Read relations for workbook...
  96. $workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  97. $workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
  98. // Read shared strings
  99. $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
  100. $sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
  101. $xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
  102. if (isset($xmlStrings) && isset($xmlStrings->si)) {
  103. foreach ($xmlStrings->si as $val) {
  104. if (isset($val->t)) {
  105. $sharedStrings[] = (string)$val->t;
  106. } elseif (isset($val->r)) {
  107. $sharedStrings[] = $this->_parseRichText($val);
  108. }
  109. }
  110. }
  111. // Loop relations for workbook and extract worksheets...
  112. foreach ($workbookRelations->Relationship as $workbookRelation) {
  113. if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
  114. $worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
  115. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
  116. );
  117. }
  118. }
  119. break;
  120. }
  121. }
  122. // Sort worksheets
  123. ksort($worksheets);
  124. // Extract contents from worksheets
  125. foreach ($worksheets as $sheetKey => $worksheet) {
  126. foreach ($worksheet->sheetData->row as $row) {
  127. foreach ($row->c as $c) {
  128. // Determine data type
  129. $dataType = (string)$c["t"];
  130. switch ($dataType) {
  131. case "s":
  132. // Value is a shared string
  133. if ((string)$c->v != '') {
  134. $value = $sharedStrings[intval($c->v)];
  135. } else {
  136. $value = '';
  137. }
  138. break;
  139. case "b":
  140. // Value is boolean
  141. $value = (string)$c->v;
  142. if ($value == '0') {
  143. $value = false;
  144. } else if ($value == '1') {
  145. $value = true;
  146. } else {
  147. $value = (bool)$c->v;
  148. }
  149. break;
  150. case "inlineStr":
  151. // Value is rich text inline
  152. $value = $this->_parseRichText($c->is);
  153. break;
  154. case "e":
  155. // Value is an error message
  156. if ((string)$c->v != '') {
  157. $value = (string)$c->v;
  158. } else {
  159. $value = '';
  160. }
  161. break;
  162. default:
  163. // Value is a string
  164. $value = (string)$c->v;
  165. // Check for numeric values
  166. if (is_numeric($value) && $dataType != 's') {
  167. if ($value == (int)$value) $value = (int)$value;
  168. elseif ($value == (float)$value) $value = (float)$value;
  169. elseif ($value == (double)$value) $value = (double)$value;
  170. }
  171. }
  172. $documentBody[] = $value;
  173. }
  174. }
  175. }
  176. // Read core properties
  177. $coreProperties = $this->extractMetaData($package);
  178. // Close file
  179. $package->close();
  180. // Store filename
  181. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  182. // Store contents
  183. if ($storeContent) {
  184. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  185. } else {
  186. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  187. }
  188. // Store meta data properties
  189. foreach ($coreProperties as $key => $value)
  190. {
  191. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  192. }
  193. // Store title (if not present in meta data)
  194. if (!isset($coreProperties['title']))
  195. {
  196. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  197. }
  198. }
  199. /**
  200. * Parse rich text XML
  201. *
  202. * @param SimpleXMLElement $is
  203. * @return string
  204. */
  205. private function _parseRichText($is = null) {
  206. $value = array();
  207. if (isset($is->t)) {
  208. $value[] = (string)$is->t;
  209. } else {
  210. foreach ($is->r as $run) {
  211. $value[] = (string)$run->t;
  212. }
  213. }
  214. return implode('', $value);
  215. }
  216. /**
  217. * Load Xlsx document from a file
  218. *
  219. * @param string $fileName
  220. * @param boolean $storeContent
  221. * @return Zend_Search_Lucene_Document_Xlsx
  222. */
  223. public static function loadXlsxFile($fileName, $storeContent = false)
  224. {
  225. return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
  226. }
  227. }