PageRenderTime 109ms CodeModel.GetById 20ms RepoModel.GetById 4ms app.codeStats 0ms

/lib/Zend/Search/Lucene/Document/Xlsx.php

https://github.com/gryzz/crystal_magento
PHP | 262 lines | 127 code | 37 blank | 98 comment | 34 complexity | 4fe9d6abd998cd7b1b236382b7a48bd3 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Document
  18. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Xlsx.php 19035 2009-11-19 14:34:11Z alexander $
  21. */
  22. /** Zend_Search_Lucene_Document_OpenXml */
  23. #require_once 'Zend/Search/Lucene/Document/OpenXml.php';
  24. if (class_exists('ZipArchive', false)) {
  25. /**
  26. * Xlsx document.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Document
  31. * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
  35. {
  36. /**
  37. * Xml Schema - SpreadsheetML
  38. *
  39. * @var string
  40. */
  41. const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
  42. /**
  43. * Xml Schema - DrawingML
  44. *
  45. * @var string
  46. */
  47. const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
  48. /**
  49. * Xml Schema - Shared Strings
  50. *
  51. * @var string
  52. */
  53. const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
  54. /**
  55. * Xml Schema - Worksheet relation
  56. *
  57. * @var string
  58. */
  59. const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
  60. /**
  61. * Xml Schema - Slide notes relation
  62. *
  63. * @var string
  64. */
  65. const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
  66. /**
  67. * Object constructor
  68. *
  69. * @param string $fileName
  70. * @param boolean $storeContent
  71. * @throws Zend_Search_Lucene_Exception
  72. */
  73. private function __construct($fileName, $storeContent)
  74. {
  75. // Document data holders
  76. $sharedStrings = array();
  77. $worksheets = array();
  78. $documentBody = array();
  79. $coreProperties = array();
  80. // Open OpenXML package
  81. $package = new ZipArchive();
  82. $package->open($fileName);
  83. // Read relations and search for officeDocument
  84. $relationsXml = $package->getFromName('_rels/.rels');
  85. if ($relationsXml === false) {
  86. #require_once 'Zend/Search/Lucene/Exception.php';
  87. throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .xlsx file.');
  88. }
  89. $relations = simplexml_load_string($relationsXml);
  90. foreach ($relations->Relationship as $rel) {
  91. if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
  92. // Found office document! Read relations for workbook...
  93. $workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
  94. $workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
  95. // Read shared strings
  96. $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
  97. $sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
  98. $xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
  99. if (isset($xmlStrings) && isset($xmlStrings->si)) {
  100. foreach ($xmlStrings->si as $val) {
  101. if (isset($val->t)) {
  102. $sharedStrings[] = (string)$val->t;
  103. } elseif (isset($val->r)) {
  104. $sharedStrings[] = $this->_parseRichText($val);
  105. }
  106. }
  107. }
  108. // Loop relations for workbook and extract worksheets...
  109. foreach ($workbookRelations->Relationship as $workbookRelation) {
  110. if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
  111. $worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
  112. $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
  113. );
  114. }
  115. }
  116. break;
  117. }
  118. }
  119. // Sort worksheets
  120. ksort($worksheets);
  121. // Extract contents from worksheets
  122. foreach ($worksheets as $sheetKey => $worksheet) {
  123. foreach ($worksheet->sheetData->row as $row) {
  124. foreach ($row->c as $c) {
  125. // Determine data type
  126. $dataType = (string)$c["t"];
  127. switch ($dataType) {
  128. case "s":
  129. // Value is a shared string
  130. if ((string)$c->v != '') {
  131. $value = $sharedStrings[intval($c->v)];
  132. } else {
  133. $value = '';
  134. }
  135. break;
  136. case "b":
  137. // Value is boolean
  138. $value = (string)$c->v;
  139. if ($value == '0') {
  140. $value = false;
  141. } else if ($value == '1') {
  142. $value = true;
  143. } else {
  144. $value = (bool)$c->v;
  145. }
  146. break;
  147. case "inlineStr":
  148. // Value is rich text inline
  149. $value = $this->_parseRichText($c->is);
  150. break;
  151. case "e":
  152. // Value is an error message
  153. if ((string)$c->v != '') {
  154. $value = (string)$c->v;
  155. } else {
  156. $value = '';
  157. }
  158. break;
  159. default:
  160. // Value is a string
  161. $value = (string)$c->v;
  162. // Check for numeric values
  163. if (is_numeric($value) && $dataType != 's') {
  164. if ($value == (int)$value) $value = (int)$value;
  165. elseif ($value == (float)$value) $value = (float)$value;
  166. elseif ($value == (double)$value) $value = (double)$value;
  167. }
  168. }
  169. $documentBody[] = $value;
  170. }
  171. }
  172. }
  173. // Read core properties
  174. $coreProperties = $this->extractMetaData($package);
  175. // Close file
  176. $package->close();
  177. // Store filename
  178. $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
  179. // Store contents
  180. if ($storeContent) {
  181. $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
  182. } else {
  183. $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
  184. }
  185. // Store meta data properties
  186. foreach ($coreProperties as $key => $value)
  187. {
  188. $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
  189. }
  190. // Store title (if not present in meta data)
  191. if (!isset($coreProperties['title']))
  192. {
  193. $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
  194. }
  195. }
  196. /**
  197. * Parse rich text XML
  198. *
  199. * @param SimpleXMLElement $is
  200. * @return string
  201. */
  202. private function _parseRichText($is = null) {
  203. $value = array();
  204. if (isset($is->t)) {
  205. $value[] = (string)$is->t;
  206. } else {
  207. foreach ($is->r as $run) {
  208. $value[] = (string)$run->t;
  209. }
  210. }
  211. return implode('', $value);
  212. }
  213. /**
  214. * Load Xlsx document from a file
  215. *
  216. * @param string $fileName
  217. * @param boolean $storeContent
  218. * @return Zend_Search_Lucene_Document_Xlsx
  219. */
  220. public static function loadXlsxFile($fileName, $storeContent = false)
  221. {
  222. return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
  223. }
  224. }
  225. } // end if (class_exists('ZipArchive'))