/plugins/tmCsvPlugin/lib/tmCsvReader.class.php

https://github.com/staunchRobots/Symfony-Groovy-Ecom · PHP · 360 lines · 235 code · 61 blank · 64 comment · 55 complexity · 06ddd2bec03e2c02e539051ae42c108a MD5 · raw file

  1. <?php
  2. /**
  3. * @todo implement escape char if PHP >= 5.3
  4. *
  5. *
  6. * @author Tomasz Muras
  7. * @license LGPL
  8. */
  9. /**
  10. *
  11. * tmCsvReader
  12. * @author Tomasz Muras
  13. */
  14. class tmCsvReader
  15. {
  16. const UNINITIALIZED = 1;
  17. const ALL_READ = 2;
  18. const ERR_WRONG_OPTION = 1;
  19. const ERR_IO = 2;
  20. const ERR_DOCTRINE = 3;
  21. const ERR_HEADER = 4;
  22. private $hasHeader;
  23. /**
  24. * @var array The header as parsed from the CSV file in a format array('column'=>'column'). We do not allow for 2 columns with the same name.
  25. */
  26. private $header, $schema;
  27. /**
  28. * @var array The header that will actually be used
  29. */
  30. private $currentHeader;
  31. private $fhandle, $path, $opened;
  32. private $length, $delimiter, $enclosure, $to, $from;
  33. private $state;
  34. private $content;
  35. /**
  36. * Options:
  37. * * delimiter
  38. * * enclosure
  39. * * length
  40. * * escape - works only from PHP 5.3
  41. * * header - first row is a header and contains column names (default: true)
  42. * * from - encoding
  43. * * to - encoding
  44. * * ignoreMissingFields - do not ignore a row if it has less fields than a header (default: true - missing fields will be returned as NULLs)
  45. * * ignoreExtraFields - do not ignore a row if it has more fields than a header (default: true - extra fields will not be used)
  46. * @param string $path Path to the file to read
  47. * @param array $options Array with the options (delimiter, enclosure, length, header, from, to)
  48. */
  49. public function __construct($path, $options = array())
  50. {
  51. $this->path = $path;
  52. if(isset($options['delimiter'])) {
  53. $this->delimiter = $options['delimiter'];
  54. } else {
  55. $this->delimiter = ',';
  56. }
  57. if(isset($options['enclosure'])) {
  58. $this->enclosure = $options['enclosure'];
  59. } else {
  60. $this->enclosure = '"';
  61. }
  62. if(isset($options['length'])) {
  63. $this->length = $options['length'];
  64. } else {
  65. $this->length = 0;
  66. }
  67. if(isset($options['escape'])) {
  68. $this->escape = $options['escape'];
  69. } else {
  70. $this->escape = null;
  71. }
  72. if(isset($options['from'])) {
  73. $this->from = $options['from'];
  74. } else {
  75. $this->from = 'auto';
  76. }
  77. if(isset($options['to'])) {
  78. $this->to = $options['to'];
  79. } else {
  80. $this->to = 'utf-8';
  81. }
  82. if(strlen($this->enclosure) > 1 || strlen($this->delimiter) > 1 || strlen($this->escape) > 1) {
  83. new Exception('enclosure, delimiter and escape must be 1 character', self::ERR_WRONG_OPTION);
  84. }
  85. if(isset($options['header'])) {
  86. $this->hasHeader = $options['header'];
  87. } else {
  88. $this->hasHeader = true;
  89. }
  90. if(! is_bool($this->hasHeader)) {
  91. throw new Exception('Header option must equal to true or false', self::ERR_WRONG_OPTION);
  92. }
  93. $this->header = null;
  94. $this->currentHeader = null;
  95. $this->schema = null;
  96. $this->content = null;
  97. $this->opened = false;
  98. $this->state = self::UNINITIALIZED;
  99. $this->open();
  100. }
  101. /**
  102. * Returns whole content of teh CSV file as an array
  103. *
  104. * @return array
  105. */
  106. public function toArray()
  107. {
  108. //it means that well, we need it all in the memory
  109. $this->readIntoMemory();
  110. return $this->content;
  111. }
  112. public function setHeader(&$data)
  113. {
  114. if(! $this->hasHeader) {
  115. throw new Exception('Header is not set to be parsed', self::ERR_WRONG_OPTION);
  116. }
  117. if($this->header == null) {
  118. $this->readHeader();
  119. }
  120. $this->currentHeader = $data;
  121. $this->header = $data;
  122. }
  123. public function removeHeader($columns)
  124. {
  125. if(! $this->hasHeader) {
  126. throw new Exception('Header is not set to be parsed', self::ERR_WRONG_OPTION);
  127. }
  128. if($this->header == null) {
  129. $this->readHeader();
  130. }
  131. if(! is_array($columns)) {
  132. $columns = array (
  133. $columns => $columns );
  134. }
  135. foreach($columns as $c => $column) {
  136. unset($this->currentHeader[$c]);
  137. unset($this->currentHeader[$column]);
  138. }
  139. }
  140. /**
  141. * Returns columns removed from a header - the ones that we've decided to ignore
  142. */
  143. public function getRemoved()
  144. {
  145. if(! $this->hasHeader) {
  146. throw new Exception('Header is not set to be parsed', self::ERR_WRONG_OPTION);
  147. }
  148. if($this->header == null) {
  149. $this->readHeader();
  150. }
  151. return array_diff_assoc($this->header, $this->currentHeader);
  152. }
  153. private function readIntoMemory()
  154. {
  155. if($this->state == self::ALL_READ)
  156. return true;
  157. $this->open();
  158. rewind($this->fhandle);
  159. if($this->hasHeader)
  160. $this->readHeader();
  161. $this->content = array ();
  162. $row = 0;
  163. while(($data = $this->readLine()) !== false) {
  164. $num = count($data);
  165. $row ++;
  166. for($c = 0; $c < $num; $c ++) {
  167. //echo $data[$c] . "<br />\n";
  168. }
  169. $this->content[] = $data;
  170. }
  171. $this->state = self::ALL_READ;
  172. return true;
  173. }
  174. public function getHeader()
  175. {
  176. if(! $this->hasHeader) {
  177. throw new Exception('Header is not set to be parsed', self::ERR_WRONG_OPTION);
  178. }
  179. if($this->header == null) {
  180. $this->readHeader();
  181. }
  182. return $this->currentHeader;
  183. }
  184. /**
  185. * Read one line from a CSV file
  186. *
  187. * @return array
  188. */
  189. private function readLine()
  190. {
  191. if($this->state == self::ALL_READ)
  192. return false;
  193. //for PHP 5.3: fgetcsv($this->fhandle, $this->length, $this->delimiter, $this->enclosure, $this->escape)
  194. $data = fgetcsv($this->fhandle, $this->length, $this->delimiter, $this->enclosure);
  195. if($data === false || count($data) < 2) {
  196. $this->state = self::ALL_READ;
  197. return false;
  198. }
  199. //encoding
  200. foreach($data as $k => $field) {
  201. $data[$k] = $this->encode($field);
  202. }
  203. if($this->hasHeader) {
  204. //we need to consider removed columns
  205. //first, we compare data we've got with the original header
  206. //@todo check the option on what do we do in this case
  207. if(count($data) > count($this->header)) {
  208. array_splice($data, count($this->header));
  209. }
  210. //@todo check the option on what do we do in this case
  211. if(count($data) < count($this->header)) {
  212. $data = array_merge($data, array_fill(0, count($this->header) - count($data), null));
  213. }
  214. $structuredData = array ();
  215. reset($this->header);
  216. //next, current
  217. foreach($data as $field) {
  218. if(array_key_exists(current($this->header), $this->currentHeader)) {
  219. $structuredData[current($this->header)] = $field;
  220. }
  221. next($this->header);
  222. //current();
  223. }
  224. return $structuredData;
  225. //return array_combine($this->header, $data);
  226. } else {
  227. return $data;
  228. }
  229. }
  230. /**
  231. * Rewind but do not re-read the header.
  232. *
  233. */
  234. public function rewind()
  235. {
  236. rewind($this->fhandle);
  237. if($this->hasHeader) {
  238. fgetcsv($this->fhandle, $this->length, $this->delimiter, $this->enclosure);
  239. }
  240. }
  241. /**
  242. * Rewind the file to the beginning.
  243. *
  244. */
  245. private function readHeader()
  246. {
  247. rewind($this->fhandle);
  248. $this->header = array ();
  249. $data = fgetcsv($this->fhandle, $this->length, $this->delimiter, $this->enclosure);
  250. foreach($data as $field) {
  251. if(key_exists($field, $this->header)) {
  252. throw new Exception("Field: '$field' is repeated twice in a header.", self::ERR_HEADER);
  253. }
  254. $this->header[$field] = $field;
  255. }
  256. $this->currentHeader = $this->header;
  257. }
  258. public function next()
  259. {
  260. if($this->hasHeader && $this->header === null) {
  261. $this->readHeader();
  262. }
  263. return $this->readLine();
  264. }
  265. private function open()
  266. {
  267. if($this->opened) {
  268. return true;
  269. }
  270. if(! ($this->fhandle = fopen($this->path, "r"))) {
  271. throw new Exception("File can not be opened ({$this->path}).", self::ERR_IO);
  272. }
  273. return true;
  274. }
  275. public function close()
  276. {
  277. if(! $this->opened) {
  278. return;
  279. }
  280. fclose($this->fhandle);
  281. $this->fhandle = null;
  282. $this->opened = false;
  283. }
  284. function __destruct()
  285. {
  286. $this->close();
  287. }
  288. private function encode($str)
  289. {
  290. if($this->from == 'auto') {
  291. $this->from = mb_detect_encoding($str);
  292. }
  293. if(function_exists('iconv')) {
  294. return iconv($this->from, $this->to, $str);
  295. } else {
  296. return mb_convert_encoding($str, $this->to, $this->from);
  297. }
  298. }
  299. }