/src/Dcp/DevTools/Ods/Converter.php

https://github.com/Anakeen/development-tools · PHP · 238 lines · 170 code · 23 blank · 45 comment · 36 complexity · 85e91062c0af0f468a5ecf55f31d1664 MD5 · raw file

  1. <?php
  2. namespace Dcp\DevTools\Ods;
  3. class Converter
  4. {
  5. protected $enclosure = null;
  6. protected $delimiter = null;
  7. private $rows = array();
  8. private $nrows = 0;
  9. private $ncol = 0;
  10. private $celldata = '';
  11. private $colrepeat = false;
  12. private $inrow = false;
  13. private $incell = false;
  14. private $cellattrs = array();
  15. /**
  16. * @param string $enclosure the enclosure parameter for the output CSV
  17. * @param string $delimiter the delimiter parameter for the output CSV
  18. */
  19. public function __construct($enclosure = '"', $delimiter = ";")
  20. {
  21. $this->enclosure = $enclosure;
  22. $this->delimiter = $delimiter;
  23. }
  24. /**
  25. * Take an ODS file and produce one CSV
  26. *
  27. * @param string $inputFile path to an ODS file
  28. * @param string $outputFile path where a writeable place
  29. * @param boolean $force force the write if the output file exist
  30. *
  31. * @throws ConverterException
  32. * @return Converter
  33. */
  34. public function convert($inputFile, $outputFile, $force = false)
  35. {
  36. if (!file_exists($inputFile)) {
  37. throw new ConverterException("Unable to find the ODS file at $inputFile");
  38. }
  39. if (file_exists($outputFile) && $force === false) {
  40. throw new ConverterException("There is a file at $outputFile. Use force to overwrite it.");
  41. }
  42. $this->reinitInternalElements();
  43. $content = $this->unzipODS($inputFile);
  44. $this->parseContent($content);
  45. $this->writeCSVFile($outputFile);
  46. return $this;
  47. }
  48. /**
  49. * Write the CSV
  50. *
  51. * @param string $outputFile path to the output file
  52. * @throws ConverterException
  53. */
  54. protected function writeCSVFile($outputFile)
  55. {
  56. $outputFile = fopen($outputFile, "w");
  57. if ($outputFile === false) {
  58. throw new ConverterException("Unable to open $outputFile in w mode");
  59. }
  60. foreach ($this->rows as $currentRow) {
  61. fputcsv($outputFile, $currentRow, $this->delimiter, $this->enclosure);
  62. }
  63. fclose($outputFile);
  64. }
  65. /**
  66. * Reinit the internal elements
  67. *
  68. * @return $this
  69. */
  70. protected function reinitInternalElements()
  71. {
  72. $this->rows = array();
  73. $this->nrows = 0;
  74. $this->ncol = 0;
  75. $this->colrepeat = false;
  76. $this->inrow = false;
  77. $this->incell = false;
  78. return $this;
  79. }
  80. /**
  81. * Extract content from an ods file
  82. *
  83. * @param string $odsfile file path
  84. * @throws ConverterException
  85. * @return string
  86. */
  87. protected function unzipODS($odsfile)
  88. {
  89. $contentFile = fopen("zip://$odsfile#content.xml", "r");
  90. if ($contentFile === false) {
  91. throw new ConverterException("Unable to read the content.xml of $odsfile");
  92. }
  93. $content = "";
  94. while (!feof($contentFile)) {
  95. $content .= fread($contentFile, 2);
  96. }
  97. fclose($contentFile);
  98. return $content;
  99. }
  100. /**
  101. * @param $xmlcontent
  102. *
  103. * @throws ConverterException
  104. * @return string
  105. */
  106. protected function parseContent($xmlcontent)
  107. {
  108. $xml_parser = xml_parser_create();
  109. xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
  110. xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 0);
  111. xml_set_object($xml_parser, $this);
  112. xml_set_element_handler($xml_parser, "startElement", "endElement");
  113. xml_set_character_data_handler($xml_parser, "characterData");
  114. if (!xml_parse($xml_parser, $xmlcontent)) {
  115. throw new ConverterException(sprintf("Unable to parse XML : %s line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
  116. }
  117. xml_parser_free($xml_parser);
  118. }
  119. /* Handling method for XML parser*/
  120. protected function startElement(
  121. /** @noinspection PhpUnusedParameterInspection */
  122. $parser, $name, $attrs)
  123. {
  124. if ($name == "TABLE:TABLE-ROW") {
  125. $this->inrow = true;
  126. if (isset($this->rows[$this->nrows])) {
  127. // fill empty cells
  128. $idx = 0;
  129. /** @noinspection PhpUnusedLocalVariableInspection */
  130. foreach ($this->rows[$this->nrows] as & $v) {
  131. if (!isset($this->rows[$this->nrows][$idx])) {
  132. $this->rows[$this->nrows][$idx] = '';
  133. }
  134. $idx++;
  135. }
  136. ksort($this->rows[$this->nrows], SORT_NUMERIC);
  137. }
  138. $this->nrows++;
  139. $this->ncol = 0;
  140. $this->rows[$this->nrows] = array();
  141. }
  142. if ($name == "TABLE:TABLE-CELL") {
  143. $this->incell = true;
  144. $this->celldata = "";
  145. $this->cellattrs = $attrs;
  146. if (!empty($attrs["TABLE:NUMBER-COLUMNS-REPEATED"])) {
  147. $this->colrepeat = intval($attrs["TABLE:NUMBER-COLUMNS-REPEATED"]);
  148. }
  149. }
  150. if ($name == "TEXT:P") {
  151. if (isset($this->rows[$this->nrows][$this->ncol])) {
  152. if (strlen($this->rows[$this->nrows][$this->ncol]) > 0) {
  153. $this->rows[$this->nrows][$this->ncol] .= '\n';
  154. }
  155. }
  156. }
  157. }
  158. protected function endElement($parser, $name)
  159. {
  160. if ($name == "TABLE:TABLE-ROW") {
  161. // Remove trailing empty cells
  162. $i = $this->ncol - 1;
  163. while ($i >= 0) {
  164. if (strlen($this->rows[$this->nrows][$i]) > 0) {
  165. break;
  166. }
  167. $i--;
  168. }
  169. array_splice($this->rows[$this->nrows], $i + 1);
  170. $this->inrow = false;
  171. }
  172. if ($name == "TEXT:S") {
  173. $this->celldata .= ' ';
  174. }
  175. if ($name == "TABLE:TABLE-CELL") {
  176. $this->incell = false;
  177. if ($this->celldata === '') {
  178. $this->celldata = $this->getOfficeTypedValue($this->cellattrs);
  179. }
  180. $this->rows[$this->nrows][$this->ncol] = $this->celldata;
  181. if ($this->colrepeat > 1) {
  182. $rval = $this->rows[$this->nrows][$this->ncol];
  183. for ($i = 1; $i < $this->colrepeat; $i++) {
  184. $this->ncol++;
  185. $this->rows[$this->nrows][$this->ncol] = $rval;
  186. }
  187. }
  188. $this->ncol++;
  189. $this->colrepeat = 0;
  190. }
  191. }
  192. protected function characterData($parser, $data)
  193. {
  194. if ($this->inrow && $this->incell) {
  195. $this->celldata .= $data;
  196. }
  197. }
  198. private function getOfficeTypedValue($attrs)
  199. {
  200. $value = '';
  201. /* Get value from property OFFICE:<type>-VALUE */
  202. if (isset($attrs['OFFICE:VALUE-TYPE'])) {
  203. $type = strtoupper($attrs['OFFICE:VALUE-TYPE']);
  204. $propName = 'OFFICE:' . $type . '-VALUE';
  205. if (isset($attrs[$propName])) {
  206. $value = (string)$attrs[$propName];
  207. }
  208. }
  209. /* Get value from property OFFICE:VALUE */
  210. if ($value == '' && isset($attrs['OFFICE:VALUE'])) {
  211. $value = (string)$attrs['OFFICE:VALUE'];
  212. }
  213. return $value;
  214. }
  215. }