PageRenderTime 27ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/application/third_party/phpoffice/vendor/phpoffice/phpspreadsheet/src/PhpSpreadsheet/Reader/Csv.php

https://gitlab.com/Japang-Jawara/jawara-penilaian
PHP | 534 lines | 315 code | 67 blank | 152 comment | 27 complexity | 152e56e0f19d2385a6020ebd3f943538 MD5 | raw file
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Reader;
  3. use InvalidArgumentException;
  4. use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
  5. use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  6. use PhpOffice\PhpSpreadsheet\Spreadsheet;
  7. class Csv extends BaseReader
  8. {
  9. /**
  10. * Input encoding.
  11. *
  12. * @var string
  13. */
  14. private $inputEncoding = 'UTF-8';
  15. /**
  16. * Delimiter.
  17. *
  18. * @var string
  19. */
  20. private $delimiter;
  21. /**
  22. * Enclosure.
  23. *
  24. * @var string
  25. */
  26. private $enclosure = '"';
  27. /**
  28. * Sheet index to read.
  29. *
  30. * @var int
  31. */
  32. private $sheetIndex = 0;
  33. /**
  34. * Load rows contiguously.
  35. *
  36. * @var bool
  37. */
  38. private $contiguous = false;
  39. /**
  40. * The character that can escape the enclosure.
  41. *
  42. * @var string
  43. */
  44. private $escapeCharacter = '\\';
  45. /**
  46. * Create a new CSV Reader instance.
  47. */
  48. public function __construct()
  49. {
  50. parent::__construct();
  51. }
  52. /**
  53. * Set input encoding.
  54. *
  55. * @param string $pValue Input encoding, eg: 'UTF-8'
  56. *
  57. * @return $this
  58. */
  59. public function setInputEncoding($pValue)
  60. {
  61. $this->inputEncoding = $pValue;
  62. return $this;
  63. }
  64. /**
  65. * Get input encoding.
  66. *
  67. * @return string
  68. */
  69. public function getInputEncoding()
  70. {
  71. return $this->inputEncoding;
  72. }
  73. /**
  74. * Move filepointer past any BOM marker.
  75. */
  76. protected function skipBOM(): void
  77. {
  78. rewind($this->fileHandle);
  79. switch ($this->inputEncoding) {
  80. case 'UTF-8':
  81. fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
  82. fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
  83. break;
  84. }
  85. }
  86. /**
  87. * Identify any separator that is explicitly set in the file.
  88. */
  89. protected function checkSeparator(): void
  90. {
  91. $line = fgets($this->fileHandle);
  92. if ($line === false) {
  93. return;
  94. }
  95. if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
  96. $this->delimiter = substr($line, 4, 1);
  97. return;
  98. }
  99. $this->skipBOM();
  100. }
  101. /**
  102. * Infer the separator if it isn't explicitly set in the file or specified by the user.
  103. */
  104. protected function inferSeparator(): void
  105. {
  106. if ($this->delimiter !== null) {
  107. return;
  108. }
  109. $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
  110. $counts = [];
  111. foreach ($potentialDelimiters as $delimiter) {
  112. $counts[$delimiter] = [];
  113. }
  114. // Count how many times each of the potential delimiters appears in each line
  115. $numberLines = 0;
  116. while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
  117. $countLine = [];
  118. for ($i = strlen($line) - 1; $i >= 0; --$i) {
  119. $char = $line[$i];
  120. if (isset($counts[$char])) {
  121. if (!isset($countLine[$char])) {
  122. $countLine[$char] = 0;
  123. }
  124. ++$countLine[$char];
  125. }
  126. }
  127. foreach ($potentialDelimiters as $delimiter) {
  128. $counts[$delimiter][] = $countLine[$delimiter]
  129. ?? 0;
  130. }
  131. }
  132. // If number of lines is 0, nothing to infer : fall back to the default
  133. if ($numberLines === 0) {
  134. $this->delimiter = reset($potentialDelimiters);
  135. $this->skipBOM();
  136. return;
  137. }
  138. // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
  139. $meanSquareDeviations = [];
  140. $middleIdx = floor(($numberLines - 1) / 2);
  141. foreach ($potentialDelimiters as $delimiter) {
  142. $series = $counts[$delimiter];
  143. sort($series);
  144. $median = ($numberLines % 2)
  145. ? $series[$middleIdx]
  146. : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
  147. if ($median === 0) {
  148. continue;
  149. }
  150. $meanSquareDeviations[$delimiter] = array_reduce(
  151. $series,
  152. function ($sum, $value) use ($median) {
  153. return $sum + ($value - $median) ** 2;
  154. }
  155. ) / count($series);
  156. }
  157. // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
  158. $min = INF;
  159. foreach ($potentialDelimiters as $delimiter) {
  160. if (!isset($meanSquareDeviations[$delimiter])) {
  161. continue;
  162. }
  163. if ($meanSquareDeviations[$delimiter] < $min) {
  164. $min = $meanSquareDeviations[$delimiter];
  165. $this->delimiter = $delimiter;
  166. }
  167. }
  168. // If no delimiter could be detected, fall back to the default
  169. if ($this->delimiter === null) {
  170. $this->delimiter = reset($potentialDelimiters);
  171. }
  172. $this->skipBOM();
  173. }
  174. /**
  175. * Get the next full line from the file.
  176. *
  177. * @return false|string
  178. */
  179. private function getNextLine()
  180. {
  181. $line = '';
  182. $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' . preg_quote($this->enclosure, '/');
  183. do {
  184. // Get the next line in the file
  185. $newLine = fgets($this->fileHandle);
  186. // Return false if there is no next line
  187. if ($newLine === false) {
  188. return false;
  189. }
  190. // Add the new line to the line passed in
  191. $line = $line . $newLine;
  192. // Drop everything that is enclosed to avoid counting false positives in enclosures
  193. $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
  194. // See if we have any enclosures left in the line
  195. // if we still have an enclosure then we need to read the next line as well
  196. } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
  197. return $line;
  198. }
  199. /**
  200. * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
  201. *
  202. * @param string $pFilename
  203. *
  204. * @return array
  205. */
  206. public function listWorksheetInfo($pFilename)
  207. {
  208. // Open file
  209. $this->openFileOrMemory($pFilename);
  210. $fileHandle = $this->fileHandle;
  211. // Skip BOM, if any
  212. $this->skipBOM();
  213. $this->checkSeparator();
  214. $this->inferSeparator();
  215. $worksheetInfo = [];
  216. $worksheetInfo[0]['worksheetName'] = 'Worksheet';
  217. $worksheetInfo[0]['lastColumnLetter'] = 'A';
  218. $worksheetInfo[0]['lastColumnIndex'] = 0;
  219. $worksheetInfo[0]['totalRows'] = 0;
  220. $worksheetInfo[0]['totalColumns'] = 0;
  221. // Loop through each line of the file in turn
  222. while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
  223. ++$worksheetInfo[0]['totalRows'];
  224. $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
  225. }
  226. $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
  227. $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
  228. // Close file
  229. fclose($fileHandle);
  230. return $worksheetInfo;
  231. }
  232. /**
  233. * Loads Spreadsheet from file.
  234. *
  235. * @param string $pFilename
  236. *
  237. * @return Spreadsheet
  238. */
  239. public function load($pFilename)
  240. {
  241. // Create new Spreadsheet
  242. $spreadsheet = new Spreadsheet();
  243. // Load into this instance
  244. return $this->loadIntoExisting($pFilename, $spreadsheet);
  245. }
  246. private function openFileOrMemory($pFilename): void
  247. {
  248. // Open file
  249. $fhandle = $this->canRead($pFilename);
  250. if (!$fhandle) {
  251. throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
  252. }
  253. $this->openFile($pFilename);
  254. if ($this->inputEncoding !== 'UTF-8') {
  255. fclose($this->fileHandle);
  256. $entireFile = file_get_contents($pFilename);
  257. $this->fileHandle = fopen('php://memory', 'r+b');
  258. $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
  259. fwrite($this->fileHandle, $data);
  260. rewind($this->fileHandle);
  261. }
  262. }
  263. /**
  264. * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
  265. *
  266. * @param string $pFilename
  267. *
  268. * @return Spreadsheet
  269. */
  270. public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
  271. {
  272. $lineEnding = ini_get('auto_detect_line_endings');
  273. ini_set('auto_detect_line_endings', true);
  274. // Open file
  275. $this->openFileOrMemory($pFilename);
  276. $fileHandle = $this->fileHandle;
  277. // Skip BOM, if any
  278. $this->skipBOM();
  279. $this->checkSeparator();
  280. $this->inferSeparator();
  281. // Create new PhpSpreadsheet object
  282. while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
  283. $spreadsheet->createSheet();
  284. }
  285. $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
  286. // Set our starting row based on whether we're in contiguous mode or not
  287. $currentRow = 1;
  288. $outRow = 0;
  289. // Loop through each line of the file in turn
  290. while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
  291. $noOutputYet = true;
  292. $columnLetter = 'A';
  293. foreach ($rowData as $rowDatum) {
  294. if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
  295. if ($this->contiguous) {
  296. if ($noOutputYet) {
  297. $noOutputYet = false;
  298. ++$outRow;
  299. }
  300. } else {
  301. $outRow = $currentRow;
  302. }
  303. // Set cell value
  304. $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
  305. }
  306. ++$columnLetter;
  307. }
  308. ++$currentRow;
  309. }
  310. // Close file
  311. fclose($fileHandle);
  312. ini_set('auto_detect_line_endings', $lineEnding);
  313. // Return
  314. return $spreadsheet;
  315. }
  316. /**
  317. * Get delimiter.
  318. *
  319. * @return string
  320. */
  321. public function getDelimiter()
  322. {
  323. return $this->delimiter;
  324. }
  325. /**
  326. * Set delimiter.
  327. *
  328. * @param string $delimiter Delimiter, eg: ','
  329. *
  330. * @return $this
  331. */
  332. public function setDelimiter($delimiter)
  333. {
  334. $this->delimiter = $delimiter;
  335. return $this;
  336. }
  337. /**
  338. * Get enclosure.
  339. *
  340. * @return string
  341. */
  342. public function getEnclosure()
  343. {
  344. return $this->enclosure;
  345. }
  346. /**
  347. * Set enclosure.
  348. *
  349. * @param string $enclosure Enclosure, defaults to "
  350. *
  351. * @return $this
  352. */
  353. public function setEnclosure($enclosure)
  354. {
  355. if ($enclosure == '') {
  356. $enclosure = '"';
  357. }
  358. $this->enclosure = $enclosure;
  359. return $this;
  360. }
  361. /**
  362. * Get sheet index.
  363. *
  364. * @return int
  365. */
  366. public function getSheetIndex()
  367. {
  368. return $this->sheetIndex;
  369. }
  370. /**
  371. * Set sheet index.
  372. *
  373. * @param int $pValue Sheet index
  374. *
  375. * @return $this
  376. */
  377. public function setSheetIndex($pValue)
  378. {
  379. $this->sheetIndex = $pValue;
  380. return $this;
  381. }
  382. /**
  383. * Set Contiguous.
  384. *
  385. * @param bool $contiguous
  386. *
  387. * @return $this
  388. */
  389. public function setContiguous($contiguous)
  390. {
  391. $this->contiguous = (bool) $contiguous;
  392. return $this;
  393. }
  394. /**
  395. * Get Contiguous.
  396. *
  397. * @return bool
  398. */
  399. public function getContiguous()
  400. {
  401. return $this->contiguous;
  402. }
  403. /**
  404. * Set escape backslashes.
  405. *
  406. * @param string $escapeCharacter
  407. *
  408. * @return $this
  409. */
  410. public function setEscapeCharacter($escapeCharacter)
  411. {
  412. $this->escapeCharacter = $escapeCharacter;
  413. return $this;
  414. }
  415. /**
  416. * Get escape backslashes.
  417. *
  418. * @return string
  419. */
  420. public function getEscapeCharacter()
  421. {
  422. return $this->escapeCharacter;
  423. }
  424. /**
  425. * Can the current IReader read the file?
  426. *
  427. * @param string $pFilename
  428. *
  429. * @return bool
  430. */
  431. public function canRead($pFilename)
  432. {
  433. // Check if file exists
  434. try {
  435. $this->openFile($pFilename);
  436. } catch (InvalidArgumentException $e) {
  437. return false;
  438. }
  439. fclose($this->fileHandle);
  440. // Trust file extension if any
  441. $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
  442. if (in_array($extension, ['csv', 'tsv'])) {
  443. return true;
  444. }
  445. // Attempt to guess mimetype
  446. $type = mime_content_type($pFilename);
  447. $supportedTypes = [
  448. 'application/csv',
  449. 'text/csv',
  450. 'text/plain',
  451. 'inode/x-empty',
  452. ];
  453. return in_array($type, $supportedTypes, true);
  454. }
  455. }