PageRenderTime 26ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Csv/Reader.php

https://gitlab.com/mmerian/csv
PHP | 429 lines | 209 code | 42 blank | 178 comment | 27 complexity | c8cd4e2e76b692fbe8f190c4777e48a4 MD5 | raw file
  1. <?php
  2. /****************************
  3. * A CSV parsing library. *
  4. * Copyright Maxime Mérian. *
  5. ****************************/
  6. namespace Csv;
  7. /**
  8. * A CSV file reader
  9. *
  10. * @author Maxime Mérian
  11. *
  12. */
  13. class Reader implements \Iterator
  14. {
  15. protected $mode = 'rb';
  16. /**
  17. * Encoding of the file that will be read
  18. *
  19. * @var string
  20. */
  21. protected $inputEncoding = 'UTF-8';
  22. /**
  23. * Encoding of the data that will be returned by the reader
  24. *
  25. * @var string
  26. */
  27. protected $outputEncoding = 'UTF-8';
  28. /**
  29. * CSV delimiter
  30. *
  31. * @var string
  32. */
  33. protected $delimiter = ',';
  34. /**
  35. * CSV enclosure
  36. *
  37. * @var string
  38. */
  39. protected $enclosure = '"';
  40. /**
  41. * Should the reader silently ignore empty lines ?
  42. *
  43. * @var bool
  44. */
  45. protected $ignoreEmptyLines = true;
  46. /**
  47. * Does the CSV file hae a header with column names ?
  48. *
  49. * @var string
  50. */
  51. protected $hasHeader = false;
  52. /**
  53. * CSV file header
  54. *
  55. * @var array
  56. */
  57. protected $header = null;
  58. /**
  59. * Current line number in the CSV file that is being processed
  60. *
  61. * @var int
  62. */
  63. protected $curLine = 0;
  64. /**
  65. * Data the is currently being read
  66. *
  67. * @var array
  68. */
  69. protected $currentData = null;
  70. /**
  71. * Path to the open file, if the reader was instanciated with a file path
  72. *
  73. * @var string
  74. */
  75. protected $filePath = null;
  76. /**
  77. * Path to the file that was opened, if any.
  78. *
  79. * @var string
  80. */
  81. protected $file = null;
  82. /**
  83. * Pointer to the file that is being read
  84. *
  85. * @var resource
  86. */
  87. protected $fp = null;
  88. /**
  89. * Callback functions for formatting fields
  90. *
  91. * @var array
  92. */
  93. protected $formatters = array();
  94. /**
  95. * List of valid options
  96. *
  97. * @var array
  98. */
  99. protected $validOptions = array(
  100. 'hasHeader',
  101. 'header',
  102. 'inputEncoding',
  103. 'outputEncoding',
  104. 'delimiter',
  105. 'enclosure',
  106. 'ignoreEmptyLines'
  107. );
  108. /**
  109. * Constructor
  110. *
  111. * @param string|resource $file The file to read. Can be provided as the path to the file or as a resource
  112. * @param array $options
  113. *
  114. * @throws Csv\Error
  115. */
  116. public function __construct($file, array $options = array())
  117. {
  118. ini_set('auto_detect_line_endings', true);
  119. if (is_resource($file)) {
  120. $this->fp = $file;
  121. } else {
  122. $this->file = $file;
  123. }
  124. $this->setOptions($options);
  125. $this->init();
  126. }
  127. protected function init()
  128. {
  129. $this->openFile();
  130. $this->rewind();
  131. }
  132. /**
  133. * Destructor.
  134. *
  135. * Closes the open CSV file if necessary
  136. */
  137. public function __destruct()
  138. {
  139. /*
  140. * Only close the resource if we opened it
  141. */
  142. if ($this->file && $this->fp) {
  143. fclose($this->fp);
  144. }
  145. }
  146. /**
  147. * Returns the CSV file header
  148. *
  149. * @return array|null
  150. */
  151. public function getHeader()
  152. {
  153. return $this->header;
  154. }
  155. /**
  156. * Returns the pointer to the file being processed
  157. *
  158. * @return resource
  159. */
  160. public function getFp()
  161. {
  162. return $this->fp;
  163. }
  164. /**
  165. *
  166. * @param string $field
  167. * @param callback $callback
  168. */
  169. public function registerFormatter($field, $callback)
  170. {
  171. if (! is_callable($callback)) {
  172. throw new Error('Provided callback for field ' . $field . ' is not callable');
  173. }
  174. $this->formatters[$field] = $callback;
  175. }
  176. /**
  177. * Sets the reader options.
  178. *
  179. * @param array $options
  180. *
  181. * @return Csv\Reader
  182. */
  183. public function setOptions(array $options)
  184. {
  185. foreach ($options as $opt => $val) {
  186. $this->setOption($opt, $val);
  187. }
  188. return $this;
  189. }
  190. /**
  191. * Sets an option
  192. *
  193. * @param string $name
  194. * @param mixed $value
  195. *
  196. * @return Csv\Reader
  197. *
  198. * @throws Csv\Error
  199. */
  200. public function setOption($name, $value)
  201. {
  202. if (! in_array($name, $this->validOptions)) {
  203. throw new Error('Invalid option ' . $name . '. Valid options are : ' . join(', ', $this->validOptions));
  204. }
  205. // Check duplicate fields in header
  206. if ('header' == $name) {
  207. $cnt = array_count_values($value);
  208. $duplicates = array();
  209. foreach ($cnt as $f => $c) {
  210. if ($c > 1) {
  211. $duplicates[$f] = $c;
  212. }
  213. }
  214. if (sizeof($duplicates) > 0) {
  215. $msg = 'Duplicate fields found in header : ' . join(', ', array_keys($duplicates));
  216. throw new Error($msg);
  217. }
  218. }
  219. $this->$name = $value;
  220. return $this;
  221. }
  222. /**
  223. * Opens the CSV file for read
  224. *
  225. * @return \Csv\Reader
  226. */
  227. protected function openFile()
  228. {
  229. if (is_null($this->fp)) {
  230. $this->fp = @fopen($this->file, $this->mode);
  231. if (! $this->fp) {
  232. throw new Error('Unable to open ' . $this->file);
  233. }
  234. }
  235. return $this;
  236. }
  237. /**
  238. * Read the next line, and applies encoding conversion if required
  239. *
  240. * @return array
  241. *
  242. * @throws Csv\Error if no line can be read
  243. */
  244. protected function readLine()
  245. {
  246. if (! $this->valid()) {
  247. throw new Error('End of stream reached, no data to read');
  248. }
  249. $this->currentData = fgetcsv($this->fp, null, $this->delimiter, $this->enclosure);
  250. // Check if EOF is reached
  251. if (false === $this->currentData) {
  252. return false;
  253. } elseif (array(null) == $this->currentData) {
  254. /*
  255. * An empty line in the csv file
  256. * is returned as an array containing a NULL value.
  257. */
  258. if (! $this->ignoreEmptyLines) {
  259. throw new Error('Empty line found in file');
  260. }
  261. return $this->readLine();
  262. }
  263. $this->curLine++;
  264. if ($this->inputEncoding != $this->outputEncoding) {
  265. $inEnc = $this->inputEncoding;
  266. $outEnc = $this->outputEncoding;
  267. array_walk($this->currentData, function (&$str) use ($inEnc, $outEnc) {
  268. $str = mb_convert_encoding($str, $outEnc, $inEnc);
  269. });
  270. }
  271. return $this->currentData;
  272. }
  273. /**
  274. * Fetches and returns the next line.
  275. *
  276. * Returns false if end of file is reached.
  277. *
  278. * @return array|bool
  279. */
  280. public function fetch()
  281. {
  282. if (! $this->valid()) {
  283. return false;
  284. }
  285. $line = $this->current();
  286. $this->readLine();
  287. return $line;
  288. }
  289. /**
  290. * Returns an HTML table preview of the csv data
  291. *
  292. * @return string
  293. */
  294. public function getHtmlPreview($numLines = 5)
  295. {
  296. $html = '<table>';
  297. if ($this->header) {
  298. $html .= '<thead><tr>';
  299. foreach ($this->header as $h) {
  300. $html .= '<th>' . htmlentities($h, ENT_QUOTES, 'UTF-8') . '</th>';
  301. }
  302. $html .= '</tr></thead>';
  303. }
  304. $html .= '<tbody>';
  305. $i = 0;
  306. foreach ($this as $line) {
  307. if ($i >= $numLines) {
  308. break;
  309. }
  310. $html .= '<tr>';
  311. foreach ($line as $v) {
  312. $html .= '<td>' . htmlentities($v, ENT_QUOTES, 'UTF-8') . '</td>';
  313. }
  314. $html .= '</tr>';
  315. $i++;
  316. }
  317. $html .= '</tbody></table>';
  318. return $html;
  319. }
  320. /**
  321. * (non-PHPdoc)
  322. * @see Iterator::current()
  323. */
  324. public function current()
  325. {
  326. if ($this->header) {
  327. if (count($this->header) != count($this->currentData)) {
  328. throw new Error(
  329. 'CSV line has ' .
  330. count($this->currentData) .
  331. ' fields, but header has ' .
  332. count($this->header) .
  333. ' fields'
  334. );
  335. }
  336. $data = array_combine($this->header, $this->currentData);
  337. foreach ($this->formatters as $field => $callback) {
  338. foreach ($data as $k => &$v) {
  339. if ($field == $k) {
  340. $v = $callback($v);
  341. } elseif (@preg_match($field, $k)) {
  342. $v = $callback($v);
  343. }
  344. }
  345. }
  346. return $data;
  347. }
  348. return $this->currentData;
  349. }
  350. /**
  351. * (non-PHPdoc)
  352. * @see Iterator::rewind()
  353. */
  354. public function rewind()
  355. {
  356. rewind($this->fp);
  357. $this->curLine = 0;
  358. if ($this->hasHeader) {
  359. $this->setOption('header', $this->readLine());
  360. }
  361. $this->readLine();
  362. }
  363. /**
  364. * (non-PHPdoc)
  365. * @see Iterator::valid()
  366. */
  367. public function valid()
  368. {
  369. return (! feof($this->fp));
  370. }
  371. /**
  372. * (non-PHPdoc)
  373. * @see Iterator::next()
  374. */
  375. public function next()
  376. {
  377. $this->readLine();
  378. }
  379. /**
  380. * (non-PHPdoc)
  381. * @see Iterator::key()
  382. */
  383. public function key()
  384. {
  385. return $this->curLine;
  386. }
  387. }