PageRenderTime 124ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/backend/modules/mbase2/vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Csv.php

https://gitlab.com/mbase2/source-code
PHP | 564 lines | 330 code | 68 blank | 166 comment | 29 complexity | 8882e32bd2cee77602b1fcfcc3fbff78 MD5 | raw file
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Reader;
  3. use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
  4. use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  5. use PhpOffice\PhpSpreadsheet\Spreadsheet;
  6. class Csv extends BaseReader
  7. {
  8. /**
  9. * Input encoding.
  10. *
  11. * @var string
  12. */
  13. private $inputEncoding = 'UTF-8';
  14. /**
  15. * Delimiter.
  16. *
  17. * @var string
  18. */
  19. private $delimiter;
  20. /**
  21. * Enclosure.
  22. *
  23. * @var string
  24. */
  25. private $enclosure = '"';
  26. /**
  27. * Sheet index to read.
  28. *
  29. * @var int
  30. */
  31. private $sheetIndex = 0;
  32. /**
  33. * Load rows contiguously.
  34. *
  35. * @var bool
  36. */
  37. private $contiguous = false;
  38. /**
  39. * Row counter for loading rows contiguously.
  40. *
  41. * @var int
  42. */
  43. private $contiguousRow = -1;
  44. /**
  45. * The character that can escape the enclosure.
  46. *
  47. * @var string
  48. */
  49. private $escapeCharacter = '\\';
  50. /**
  51. * Create a new CSV Reader instance.
  52. */
  53. public function __construct()
  54. {
  55. parent::__construct();
  56. }
  57. /**
  58. * Set input encoding.
  59. *
  60. * @param string $pValue Input encoding, eg: 'UTF-8'
  61. *
  62. * @return Csv
  63. */
  64. public function setInputEncoding($pValue)
  65. {
  66. $this->inputEncoding = $pValue;
  67. return $this;
  68. }
  69. /**
  70. * Get input encoding.
  71. *
  72. * @return string
  73. */
  74. public function getInputEncoding()
  75. {
  76. return $this->inputEncoding;
  77. }
  78. /**
  79. * Move filepointer past any BOM marker.
  80. */
  81. protected function skipBOM()
  82. {
  83. rewind($this->fileHandle);
  84. switch ($this->inputEncoding) {
  85. case 'UTF-8':
  86. fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
  87. fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
  88. break;
  89. case 'UTF-16LE':
  90. fgets($this->fileHandle, 3) == "\xFF\xFE" ?
  91. fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
  92. break;
  93. case 'UTF-16BE':
  94. fgets($this->fileHandle, 3) == "\xFE\xFF" ?
  95. fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
  96. break;
  97. case 'UTF-32LE':
  98. fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
  99. fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
  100. break;
  101. case 'UTF-32BE':
  102. fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
  103. fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
  104. break;
  105. default:
  106. break;
  107. }
  108. }
  109. /**
  110. * Identify any separator that is explicitly set in the file.
  111. */
  112. protected function checkSeparator()
  113. {
  114. $line = fgets($this->fileHandle);
  115. if ($line === false) {
  116. return;
  117. }
  118. if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
  119. $this->delimiter = substr($line, 4, 1);
  120. return;
  121. }
  122. $this->skipBOM();
  123. }
  124. /**
  125. * Infer the separator if it isn't explicitly set in the file or specified by the user.
  126. */
  127. protected function inferSeparator()
  128. {
  129. if ($this->delimiter !== null) {
  130. return;
  131. }
  132. $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
  133. $counts = [];
  134. foreach ($potentialDelimiters as $delimiter) {
  135. $counts[$delimiter] = [];
  136. }
  137. // Count how many times each of the potential delimiters appears in each line
  138. $numberLines = 0;
  139. while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
  140. $countLine = [];
  141. for ($i = strlen($line) - 1; $i >= 0; --$i) {
  142. $char = $line[$i];
  143. if (isset($counts[$char])) {
  144. if (!isset($countLine[$char])) {
  145. $countLine[$char] = 0;
  146. }
  147. ++$countLine[$char];
  148. }
  149. }
  150. foreach ($potentialDelimiters as $delimiter) {
  151. $counts[$delimiter][] = isset($countLine[$delimiter])
  152. ? $countLine[$delimiter]
  153. : 0;
  154. }
  155. }
  156. // If number of lines is 0, nothing to infer : fall back to the default
  157. if ($numberLines === 0) {
  158. $this->delimiter = reset($potentialDelimiters);
  159. $this->skipBOM();
  160. return;
  161. }
  162. // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
  163. $meanSquareDeviations = [];
  164. $middleIdx = floor(($numberLines - 1) / 2);
  165. foreach ($potentialDelimiters as $delimiter) {
  166. $series = $counts[$delimiter];
  167. sort($series);
  168. $median = ($numberLines % 2)
  169. ? $series[$middleIdx]
  170. : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
  171. if ($median === 0) {
  172. continue;
  173. }
  174. $meanSquareDeviations[$delimiter] = array_reduce(
  175. $series,
  176. function ($sum, $value) use ($median) {
  177. return $sum + pow($value - $median, 2);
  178. }
  179. ) / count($series);
  180. }
  181. // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
  182. $min = INF;
  183. foreach ($potentialDelimiters as $delimiter) {
  184. if (!isset($meanSquareDeviations[$delimiter])) {
  185. continue;
  186. }
  187. if ($meanSquareDeviations[$delimiter] < $min) {
  188. $min = $meanSquareDeviations[$delimiter];
  189. $this->delimiter = $delimiter;
  190. }
  191. }
  192. // If no delimiter could be detected, fall back to the default
  193. if ($this->delimiter === null) {
  194. $this->delimiter = reset($potentialDelimiters);
  195. }
  196. $this->skipBOM();
  197. }
  198. /**
  199. * Get the next full line from the file.
  200. *
  201. * @param string $line
  202. *
  203. * @return bool|string
  204. */
  205. private function getNextLine($line = '')
  206. {
  207. // Get the next line in the file
  208. $newLine = fgets($this->fileHandle);
  209. // Return false if there is no next line
  210. if ($newLine === false) {
  211. return false;
  212. }
  213. // Add the new line to the line passed in
  214. $line = $line . $newLine;
  215. // Drop everything that is enclosed to avoid counting false positives in enclosures
  216. $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')'
  217. . preg_quote($this->enclosure, '/');
  218. $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
  219. // See if we have any enclosures left in the line
  220. // if we still have an enclosure then we need to read the next line as well
  221. if (preg_match('/(' . $enclosure . ')/', $line) > 0) {
  222. $line = $this->getNextLine($line);
  223. }
  224. return $line;
  225. }
  226. /**
  227. * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
  228. *
  229. * @param string $pFilename
  230. *
  231. * @throws Exception
  232. *
  233. * @return array
  234. */
  235. public function listWorksheetInfo($pFilename)
  236. {
  237. // Open file
  238. if (!$this->canRead($pFilename)) {
  239. throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
  240. }
  241. $this->openFile($pFilename);
  242. $fileHandle = $this->fileHandle;
  243. // Skip BOM, if any
  244. $this->skipBOM();
  245. $this->checkSeparator();
  246. $this->inferSeparator();
  247. $worksheetInfo = [];
  248. $worksheetInfo[0]['worksheetName'] = 'Worksheet';
  249. $worksheetInfo[0]['lastColumnLetter'] = 'A';
  250. $worksheetInfo[0]['lastColumnIndex'] = 0;
  251. $worksheetInfo[0]['totalRows'] = 0;
  252. $worksheetInfo[0]['totalColumns'] = 0;
  253. // Loop through each line of the file in turn
  254. while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
  255. ++$worksheetInfo[0]['totalRows'];
  256. $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
  257. }
  258. $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
  259. $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
  260. // Close file
  261. fclose($fileHandle);
  262. return $worksheetInfo;
  263. }
  264. /**
  265. * Loads Spreadsheet from file.
  266. *
  267. * @param string $pFilename
  268. *
  269. * @throws Exception
  270. *
  271. * @return Spreadsheet
  272. */
  273. public function load($pFilename)
  274. {
  275. // Create new Spreadsheet
  276. $spreadsheet = new Spreadsheet();
  277. // Load into this instance
  278. return $this->loadIntoExisting($pFilename, $spreadsheet);
  279. }
  280. /**
  281. * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
  282. *
  283. * @param string $pFilename
  284. * @param Spreadsheet $spreadsheet
  285. *
  286. * @throws Exception
  287. *
  288. * @return Spreadsheet
  289. */
  290. public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
  291. {
  292. $lineEnding = ini_get('auto_detect_line_endings');
  293. ini_set('auto_detect_line_endings', true);
  294. // Open file
  295. if (!$this->canRead($pFilename)) {
  296. throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
  297. }
  298. $this->openFile($pFilename);
  299. $fileHandle = $this->fileHandle;
  300. // Skip BOM, if any
  301. $this->skipBOM();
  302. $this->checkSeparator();
  303. $this->inferSeparator();
  304. // Create new PhpSpreadsheet object
  305. while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
  306. $spreadsheet->createSheet();
  307. }
  308. $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
  309. // Set our starting row based on whether we're in contiguous mode or not
  310. $currentRow = 1;
  311. if ($this->contiguous) {
  312. $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
  313. }
  314. // Loop through each line of the file in turn
  315. while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
  316. $columnLetter = 'A';
  317. foreach ($rowData as $rowDatum) {
  318. if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
  319. // Convert encoding if necessary
  320. if ($this->inputEncoding !== 'UTF-8') {
  321. $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
  322. }
  323. // Set cell value
  324. $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
  325. }
  326. ++$columnLetter;
  327. }
  328. ++$currentRow;
  329. }
  330. // Close file
  331. fclose($fileHandle);
  332. if ($this->contiguous) {
  333. $this->contiguousRow = $currentRow;
  334. }
  335. ini_set('auto_detect_line_endings', $lineEnding);
  336. // Return
  337. return $spreadsheet;
  338. }
  339. /**
  340. * Get delimiter.
  341. *
  342. * @return string
  343. */
  344. public function getDelimiter()
  345. {
  346. return $this->delimiter;
  347. }
  348. /**
  349. * Set delimiter.
  350. *
  351. * @param string $delimiter Delimiter, eg: ','
  352. *
  353. * @return CSV
  354. */
  355. public function setDelimiter($delimiter)
  356. {
  357. $this->delimiter = $delimiter;
  358. return $this;
  359. }
  360. /**
  361. * Get enclosure.
  362. *
  363. * @return string
  364. */
  365. public function getEnclosure()
  366. {
  367. return $this->enclosure;
  368. }
  369. /**
  370. * Set enclosure.
  371. *
  372. * @param string $enclosure Enclosure, defaults to "
  373. *
  374. * @return CSV
  375. */
  376. public function setEnclosure($enclosure)
  377. {
  378. if ($enclosure == '') {
  379. $enclosure = '"';
  380. }
  381. $this->enclosure = $enclosure;
  382. return $this;
  383. }
  384. /**
  385. * Get sheet index.
  386. *
  387. * @return int
  388. */
  389. public function getSheetIndex()
  390. {
  391. return $this->sheetIndex;
  392. }
  393. /**
  394. * Set sheet index.
  395. *
  396. * @param int $pValue Sheet index
  397. *
  398. * @return CSV
  399. */
  400. public function setSheetIndex($pValue)
  401. {
  402. $this->sheetIndex = $pValue;
  403. return $this;
  404. }
  405. /**
  406. * Set Contiguous.
  407. *
  408. * @param bool $contiguous
  409. *
  410. * @return Csv
  411. */
  412. public function setContiguous($contiguous)
  413. {
  414. $this->contiguous = (bool) $contiguous;
  415. if (!$contiguous) {
  416. $this->contiguousRow = -1;
  417. }
  418. return $this;
  419. }
  420. /**
  421. * Get Contiguous.
  422. *
  423. * @return bool
  424. */
  425. public function getContiguous()
  426. {
  427. return $this->contiguous;
  428. }
  429. /**
  430. * Set escape backslashes.
  431. *
  432. * @param string $escapeCharacter
  433. *
  434. * @return $this
  435. */
  436. public function setEscapeCharacter($escapeCharacter)
  437. {
  438. $this->escapeCharacter = $escapeCharacter;
  439. return $this;
  440. }
  441. /**
  442. * Get escape backslashes.
  443. *
  444. * @return string
  445. */
  446. public function getEscapeCharacter()
  447. {
  448. return $this->escapeCharacter;
  449. }
  450. /**
  451. * Can the current IReader read the file?
  452. *
  453. * @param string $pFilename
  454. *
  455. * @return bool
  456. */
  457. public function canRead($pFilename)
  458. {
  459. // Check if file exists
  460. try {
  461. $this->openFile($pFilename);
  462. } catch (Exception $e) {
  463. return false;
  464. }
  465. fclose($this->fileHandle);
  466. // Trust file extension if any
  467. $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
  468. if (in_array($extension, ['csv', 'tsv'])) {
  469. return true;
  470. }
  471. // Attempt to guess mimetype
  472. $type = mime_content_type($pFilename);
  473. $supportedTypes = [
  474. 'text/csv',
  475. 'text/plain',
  476. 'inode/x-empty',
  477. ];
  478. return in_array($type, $supportedTypes, true);
  479. }
  480. }