PageRenderTime 36ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/spout/src/Spout/Reader/ODS/RowIterator.php

https://bitbucket.org/moodle/moodle
PHP | 384 lines | 210 code | 48 blank | 126 comment | 7 complexity | 8ccb936fcc542dd7eca48e5c49dbf172 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-2.1, BSD-3-Clause, MIT, GPL-3.0
  1. <?php
  2. namespace Box\Spout\Reader\ODS;
  3. use Box\Spout\Common\Entity\Cell;
  4. use Box\Spout\Common\Entity\Row;
  5. use Box\Spout\Common\Exception\IOException;
  6. use Box\Spout\Common\Manager\OptionsManagerInterface;
  7. use Box\Spout\Reader\Common\Entity\Options;
  8. use Box\Spout\Reader\Common\Manager\RowManager;
  9. use Box\Spout\Reader\Common\XMLProcessor;
  10. use Box\Spout\Reader\Exception\InvalidValueException;
  11. use Box\Spout\Reader\Exception\IteratorNotRewindableException;
  12. use Box\Spout\Reader\Exception\XMLProcessingException;
  13. use Box\Spout\Reader\IteratorInterface;
  14. use Box\Spout\Reader\ODS\Creator\InternalEntityFactory;
  15. use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
  16. use Box\Spout\Reader\Wrapper\XMLReader;
  17. /**
  18. * Class RowIterator
  19. */
  20. class RowIterator implements IteratorInterface
  21. {
  22. /** Definition of XML nodes names used to parse data */
  23. const XML_NODE_TABLE = 'table:table';
  24. const XML_NODE_ROW = 'table:table-row';
  25. const XML_NODE_CELL = 'table:table-cell';
  26. const MAX_COLUMNS_EXCEL = 16384;
  27. /** Definition of XML attribute used to parse data */
  28. const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
  29. const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
  30. /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
  31. protected $xmlReader;
  32. /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
  33. protected $xmlProcessor;
  34. /** @var bool Whether empty rows should be returned or skipped */
  35. protected $shouldPreserveEmptyRows;
  36. /** @var Helper\CellValueFormatter Helper to format cell values */
  37. protected $cellValueFormatter;
  38. /** @var RowManager Manages rows */
  39. protected $rowManager;
  40. /** @var InternalEntityFactory Factory to create entities */
  41. protected $entityFactory;
  42. /** @var bool Whether the iterator has already been rewound once */
  43. protected $hasAlreadyBeenRewound = false;
  44. /** @var Row The currently processed row */
  45. protected $currentlyProcessedRow;
  46. /** @var Row Buffer used to store the current row, while checking if there are more rows to read */
  47. protected $rowBuffer;
  48. /** @var bool Indicates whether all rows have been read */
  49. protected $hasReachedEndOfFile = false;
  50. /** @var int Last row index processed (one-based) */
  51. protected $lastRowIndexProcessed = 0;
  52. /** @var int Row index to be processed next (one-based) */
  53. protected $nextRowIndexToBeProcessed = 1;
  54. /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
  55. protected $lastProcessedCell;
  56. /** @var int Number of times the last processed row should be repeated */
  57. protected $numRowsRepeated = 1;
  58. /** @var int Number of times the last cell value should be copied to the cells on its right */
  59. protected $numColumnsRepeated = 1;
  60. /** @var bool Whether at least one cell has been read for the row currently being processed */
  61. protected $hasAlreadyReadOneCellInCurrentRow = false;
  62. /**
  63. * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
  64. * @param OptionsManagerInterface $optionsManager Reader's options manager
  65. * @param CellValueFormatter $cellValueFormatter Helper to format cell values
  66. * @param XMLProcessor $xmlProcessor Helper to process XML files
  67. * @param RowManager $rowManager Manages rows
  68. * @param InternalEntityFactory $entityFactory Factory to create entities
  69. */
  70. public function __construct(
  71. XMLReader $xmlReader,
  72. OptionsManagerInterface $optionsManager,
  73. CellValueFormatter $cellValueFormatter,
  74. XMLProcessor $xmlProcessor,
  75. RowManager $rowManager,
  76. InternalEntityFactory $entityFactory
  77. ) {
  78. $this->xmlReader = $xmlReader;
  79. $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
  80. $this->cellValueFormatter = $cellValueFormatter;
  81. $this->entityFactory = $entityFactory;
  82. $this->rowManager = $rowManager;
  83. // Register all callbacks to process different nodes when reading the XML file
  84. $this->xmlProcessor = $xmlProcessor;
  85. $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
  86. $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
  87. $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
  88. $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
  89. }
  90. /**
  91. * Rewind the Iterator to the first element.
  92. * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
  93. * @see http://php.net/manual/en/iterator.rewind.php
  94. *
  95. * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
  96. * @return void
  97. */
  98. public function rewind()
  99. {
  100. // Because sheet and row data is located in the file, we can't rewind both the
  101. // sheet iterator and the row iterator, as XML file cannot be read backwards.
  102. // Therefore, rewinding the row iterator has been disabled.
  103. if ($this->hasAlreadyBeenRewound) {
  104. throw new IteratorNotRewindableException();
  105. }
  106. $this->hasAlreadyBeenRewound = true;
  107. $this->lastRowIndexProcessed = 0;
  108. $this->nextRowIndexToBeProcessed = 1;
  109. $this->rowBuffer = null;
  110. $this->hasReachedEndOfFile = false;
  111. $this->next();
  112. }
  113. /**
  114. * Checks if current position is valid
  115. * @see http://php.net/manual/en/iterator.valid.php
  116. *
  117. * @return bool
  118. */
  119. public function valid()
  120. {
  121. return (!$this->hasReachedEndOfFile);
  122. }
  123. /**
  124. * Move forward to next element. Empty rows will be skipped.
  125. * @see http://php.net/manual/en/iterator.next.php
  126. *
  127. * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
  128. * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
  129. * @return void
  130. */
  131. public function next()
  132. {
  133. if ($this->doesNeedDataForNextRowToBeProcessed()) {
  134. $this->readDataForNextRow();
  135. }
  136. $this->lastRowIndexProcessed++;
  137. }
  138. /**
  139. * Returns whether we need data for the next row to be processed.
  140. * We DO need to read data if:
  141. * - we have not read any rows yet
  142. * OR
  143. * - the next row to be processed immediately follows the last read row
  144. *
  145. * @return bool Whether we need data for the next row to be processed.
  146. */
  147. protected function doesNeedDataForNextRowToBeProcessed()
  148. {
  149. $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
  150. return (
  151. !$hasReadAtLeastOneRow ||
  152. $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
  153. );
  154. }
  155. /**
  156. * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
  157. * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
  158. * @return void
  159. */
  160. protected function readDataForNextRow()
  161. {
  162. $this->currentlyProcessedRow = $this->entityFactory->createRow();
  163. try {
  164. $this->xmlProcessor->readUntilStopped();
  165. } catch (XMLProcessingException $exception) {
  166. throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
  167. }
  168. $this->rowBuffer = $this->currentlyProcessedRow;
  169. }
  170. /**
  171. * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
  172. * @return int A return code that indicates what action should the processor take next
  173. */
  174. protected function processRowStartingNode($xmlReader)
  175. {
  176. // Reset data from current row
  177. $this->hasAlreadyReadOneCellInCurrentRow = false;
  178. $this->lastProcessedCell = null;
  179. $this->numColumnsRepeated = 1;
  180. $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
  181. return XMLProcessor::PROCESSING_CONTINUE;
  182. }
  183. /**
  184. * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
  185. * @return int A return code that indicates what action should the processor take next
  186. */
  187. protected function processCellStartingNode($xmlReader)
  188. {
  189. $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
  190. // NOTE: expand() will automatically decode all XML entities of the child nodes
  191. $node = $xmlReader->expand();
  192. $currentCell = $this->getCell($node);
  193. // process cell N only after having read cell N+1 (see below why)
  194. if ($this->hasAlreadyReadOneCellInCurrentRow) {
  195. for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
  196. $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
  197. }
  198. }
  199. $this->hasAlreadyReadOneCellInCurrentRow = true;
  200. $this->lastProcessedCell = $currentCell;
  201. $this->numColumnsRepeated = $currentNumColumnsRepeated;
  202. return XMLProcessor::PROCESSING_CONTINUE;
  203. }
  204. /**
  205. * @return int A return code that indicates what action should the processor take next
  206. */
  207. protected function processRowEndingNode()
  208. {
  209. $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
  210. // if the fetched row is empty and we don't want to preserve it...
  211. if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
  212. // ... skip it
  213. return XMLProcessor::PROCESSING_CONTINUE;
  214. }
  215. // if the row is empty, we don't want to return more than one cell
  216. $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
  217. $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
  218. // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
  219. // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
  220. // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
  221. // with a number-columns-repeated value equals to the number of (supported columns - used columns).
  222. // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
  223. // always 16384 cells.
  224. if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
  225. for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
  226. $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
  227. }
  228. }
  229. // If we are processing row N and the row is repeated M times,
  230. // then the next row to be processed will be row (N+M).
  231. $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
  232. // at this point, we have all the data we need for the row
  233. // so that we can populate the buffer
  234. return XMLProcessor::PROCESSING_STOP;
  235. }
  236. /**
  237. * @return int A return code that indicates what action should the processor take next
  238. */
  239. protected function processTableEndingNode()
  240. {
  241. // The closing "</table:table>" marks the end of the file
  242. $this->hasReachedEndOfFile = true;
  243. return XMLProcessor::PROCESSING_STOP;
  244. }
  245. /**
  246. * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
  247. * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
  248. */
  249. protected function getNumRowsRepeatedForCurrentNode($xmlReader)
  250. {
  251. $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
  252. return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1;
  253. }
  254. /**
  255. * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
  256. * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
  257. */
  258. protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
  259. {
  260. $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
  261. return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1;
  262. }
  263. /**
  264. * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
  265. *
  266. * @param \DOMNode $node
  267. * @return Cell The cell set with the associated with the cell
  268. */
  269. protected function getCell($node)
  270. {
  271. try {
  272. $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
  273. $cell = $this->entityFactory->createCell($cellValue);
  274. } catch (InvalidValueException $exception) {
  275. $cell = $this->entityFactory->createCell($exception->getInvalidValue());
  276. $cell->setType(Cell::TYPE_ERROR);
  277. }
  278. return $cell;
  279. }
  280. /**
  281. * After finishing processing each cell, a row is considered empty if it contains
  282. * no cells or if the last read cell is empty.
  283. * After finishing processing each cell, the last read cell is not part of the
  284. * row data yet (as we still need to apply the "num-columns-repeated" attribute).
  285. *
  286. * @param Row $currentRow
  287. * @param Cell $lastReadCell The last read cell
  288. * @return bool Whether the row is empty
  289. */
  290. protected function isEmptyRow($currentRow, $lastReadCell)
  291. {
  292. return (
  293. $this->rowManager->isEmpty($currentRow) &&
  294. (!isset($lastReadCell) || $lastReadCell->isEmpty())
  295. );
  296. }
  297. /**
  298. * Return the current element, from the buffer.
  299. * @see http://php.net/manual/en/iterator.current.php
  300. *
  301. * @return Row
  302. */
  303. public function current()
  304. {
  305. return $this->rowBuffer;
  306. }
  307. /**
  308. * Return the key of the current element
  309. * @see http://php.net/manual/en/iterator.key.php
  310. *
  311. * @return int
  312. */
  313. public function key()
  314. {
  315. return $this->lastRowIndexProcessed;
  316. }
  317. /**
  318. * Cleans up what was created to iterate over the object.
  319. *
  320. * @return void
  321. */
  322. public function end()
  323. {
  324. $this->xmlReader->close();
  325. }
  326. }