PageRenderTime 34ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/dev/CsvBulkLoader.php

http://github.com/silverstripe/sapphire
PHP | 371 lines | 189 code | 66 blank | 116 comment | 39 complexity | 7be082f5ce7c7f5a8d396a40f59926e7 MD5 | raw file
Possible License(s): BSD-3-Clause, MIT, CC-BY-3.0, GPL-2.0, AGPL-1.0, LGPL-2.1
  1. <?php
  2. use SilverStripe\ORM\DataObject;
  3. /**
  4. * Utility class to facilitate complex CSV-imports by defining column-mappings
  5. * and custom converters.
  6. *
  7. * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
  8. * input.
  9. *
  10. * @see http://tools.ietf.org/html/rfc4180
  11. *
  12. * @package framework
  13. * @subpackage bulkloading
  14. *
  15. * @todo Support for deleting existing records not matched in the import
  16. * (through relation checks)
  17. */
  18. class CsvBulkLoader extends BulkLoader {
  19. /**
  20. * Delimiter character (Default: comma).
  21. *
  22. * @var string
  23. */
  24. public $delimiter = ',';
  25. /**
  26. * Enclosure character (Default: doublequote)
  27. *
  28. * @var string
  29. */
  30. public $enclosure = '"';
  31. /**
  32. * Identifies if csv the has a header row.
  33. *
  34. * @var boolean
  35. */
  36. public $hasHeaderRow = true;
  37. /**
  38. * Number of lines to split large CSV files into.
  39. *
  40. * @var int
  41. *
  42. * @config
  43. */
  44. private static $lines = 1000;
  45. /**
  46. * @inheritDoc
  47. */
  48. public function preview($filepath) {
  49. return $this->processAll($filepath, true);
  50. }
  51. /**
  52. * @param string $filepath
  53. * @param boolean $preview
  54. *
  55. * @return null|BulkLoader_Result
  56. */
  57. protected function processAll($filepath, $preview = false) {
  58. $filepath = Director::getAbsFile($filepath);
  59. $files = $this->splitFile($filepath);
  60. $result = null;
  61. $last = null;
  62. try {
  63. foreach ($files as $file) {
  64. $last = $file;
  65. $next = $this->processChunk($file, false);
  66. if ($result instanceof BulkLoader_Result) {
  67. $result->merge($next);
  68. } else {
  69. $result = $next;
  70. }
  71. @unlink($file);
  72. }
  73. } catch (Exception $e) {
  74. print "Failed to parse {$last}\n";
  75. }
  76. return $result;
  77. }
  78. /**
  79. * Splits a large file up into many smaller files.
  80. *
  81. * @param string $path Path to large file to split
  82. * @param int $lines Number of lines per file
  83. *
  84. * @return array List of file paths
  85. */
  86. protected function splitFile($path, $lines = null) {
  87. $previous = ini_get('auto_detect_line_endings');
  88. ini_set('auto_detect_line_endings', true);
  89. if (!is_int($lines)) {
  90. $lines = $this->config()->get("lines");
  91. }
  92. $new = $this->getNewSplitFileName();
  93. $to = fopen($new, 'w+');
  94. $from = fopen($path, 'r');
  95. $header = null;
  96. if ($this->hasHeaderRow) {
  97. $header = fgets($from);
  98. fwrite($to, $header);
  99. }
  100. $files = array();
  101. $files[] = $new;
  102. $count = 0;
  103. while (!feof($from)) {
  104. fwrite($to, fgets($from));
  105. $count++;
  106. if ($count >= $lines) {
  107. fclose($to);
  108. // get a new temporary file name, to write the next lines to
  109. $new = $this->getNewSplitFileName();
  110. $to = fopen($new, 'w+');
  111. if ($this->hasHeaderRow) {
  112. // add the headers to the new file
  113. fwrite($to, $header);
  114. }
  115. $files[] = $new;
  116. $count = 0;
  117. }
  118. }
  119. fclose($to);
  120. ini_set('auto_detect_line_endings', $previous);
  121. return $files;
  122. }
  123. /**
  124. * @return string
  125. */
  126. protected function getNewSplitFileName() {
  127. return TEMP_FOLDER . '/' . uniqid('BulkLoader', true) . '.csv';
  128. }
  129. /**
  130. * @param string $filepath
  131. * @param boolean $preview
  132. *
  133. * @return BulkLoader_Result
  134. */
  135. protected function processChunk($filepath, $preview = false) {
  136. $results = new BulkLoader_Result();
  137. $csv = new CSVParser(
  138. $filepath,
  139. $this->delimiter,
  140. $this->enclosure
  141. );
  142. // ColumnMap has two uses, depending on whether hasHeaderRow is set
  143. if($this->columnMap) {
  144. // if the map goes to a callback, use the same key value as the map
  145. // value, rather than function name as multiple keys may use the
  146. // same callback
  147. foreach($this->columnMap as $k => $v) {
  148. if(strpos($v, "->") === 0) {
  149. $map[$k] = $k;
  150. } else {
  151. $map[$k] = $v;
  152. }
  153. }
  154. if($this->hasHeaderRow) {
  155. $csv->mapColumns($map);
  156. } else {
  157. $csv->provideHeaderRow($map);
  158. }
  159. }
  160. foreach($csv as $row) {
  161. $this->processRecord($row, $this->columnMap, $results, $preview);
  162. }
  163. return $results;
  164. }
  165. /**
  166. * @todo Better messages for relation checks and duplicate detection
  167. * Note that columnMap isn't used.
  168. *
  169. * @param array $record
  170. * @param array $columnMap
  171. * @param BulkLoader_Result $results
  172. * @param boolean $preview
  173. *
  174. * @return int
  175. */
  176. protected function processRecord($record, $columnMap, &$results, $preview = false) {
  177. $class = $this->objectClass;
  178. // find existing object, or create new one
  179. $existingObj = $this->findExistingObject($record, $columnMap);
  180. $obj = ($existingObj) ? $existingObj : new $class();
  181. // first run: find/create any relations and store them on the object
  182. // we can't combine runs, as other columns might rely on the relation being present
  183. $relations = array();
  184. foreach($record as $fieldName => $val) {
  185. // don't bother querying of value is not set
  186. if($this->isNullValue($val)) continue;
  187. // checking for existing relations
  188. if(isset($this->relationCallbacks[$fieldName])) {
  189. // trigger custom search method for finding a relation based on the given value
  190. // and write it back to the relation (or create a new object)
  191. $relationName = $this->relationCallbacks[$fieldName]['relationname'];
  192. if($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
  193. $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
  194. } elseif($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
  195. $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
  196. }
  197. if(!$relationObj || !$relationObj->exists()) {
  198. $relationClass = $obj->hasOneComponent($relationName);
  199. $relationObj = new $relationClass();
  200. //write if we aren't previewing
  201. if (!$preview) $relationObj->write();
  202. }
  203. $obj->{"{$relationName}ID"} = $relationObj->ID;
  204. //write if we are not previewing
  205. if (!$preview) {
  206. $obj->write();
  207. $obj->flushCache(); // avoid relation caching confusion
  208. }
  209. } elseif(strpos($fieldName, '.') !== false) {
  210. // we have a relation column with dot notation
  211. list($relationName, $columnName) = explode('.', $fieldName);
  212. // always gives us an component (either empty or existing)
  213. $relationObj = $obj->getComponent($relationName);
  214. if (!$preview) $relationObj->write();
  215. $obj->{"{$relationName}ID"} = $relationObj->ID;
  216. //write if we are not previewing
  217. if (!$preview) {
  218. $obj->write();
  219. $obj->flushCache(); // avoid relation caching confusion
  220. }
  221. }
  222. }
  223. // second run: save data
  224. foreach($record as $fieldName => $val) {
  225. // break out of the loop if we are previewing
  226. if ($preview) {
  227. break;
  228. }
  229. // look up the mapping to see if this needs to map to callback
  230. $mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
  231. if($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
  232. $funcName = substr($this->columnMap[$fieldName], 2);
  233. $this->$funcName($obj, $val, $record);
  234. } else if($obj->hasMethod("import{$fieldName}")) {
  235. $obj->{"import{$fieldName}"}($val, $record);
  236. } else {
  237. $obj->update(array($fieldName => $val));
  238. }
  239. }
  240. // write record
  241. $id = ($preview) ? 0 : $obj->write();
  242. // @todo better message support
  243. $message = '';
  244. // save to results
  245. if($existingObj) {
  246. $results->addUpdated($obj, $message);
  247. } else {
  248. $results->addCreated($obj, $message);
  249. }
  250. $objID = $obj->ID;
  251. $obj->destroy();
  252. // memory usage
  253. unset($existingObj);
  254. unset($obj);
  255. return $objID;
  256. }
  257. /**
  258. * Find an existing objects based on one or more uniqueness columns
  259. * specified via {@link self::$duplicateChecks}.
  260. *
  261. * @param array $record CSV data column
  262. *
  263. * @return mixed
  264. */
  265. public function findExistingObject($record) {
  266. $SNG_objectClass = singleton($this->objectClass);
  267. // checking for existing records (only if not already found)
  268. foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
  269. if(is_string($duplicateCheck)) {
  270. // Skip current duplicate check if field value is empty
  271. if(empty($record[$duplicateCheck])) continue;
  272. // Check existing record with this value
  273. $dbFieldValue = $record[$duplicateCheck];
  274. $existingRecord = DataObject::get($this->objectClass)
  275. ->filter($duplicateCheck, $dbFieldValue)
  276. ->first();
  277. if($existingRecord) return $existingRecord;
  278. } elseif(is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
  279. if($this->hasMethod($duplicateCheck['callback'])) {
  280. $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
  281. } elseif($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
  282. $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
  283. } else {
  284. user_error("CsvBulkLoader::processRecord():"
  285. . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
  286. }
  287. if($existingRecord) {
  288. return $existingRecord;
  289. }
  290. } else {
  291. user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
  292. }
  293. }
  294. return false;
  295. }
  296. /**
  297. * Determine whether any loaded files should be parsed with a
  298. * header-row (otherwise we rely on {@link self::$columnMap}.
  299. *
  300. * @return boolean
  301. */
  302. public function hasHeaderRow() {
  303. return ($this->hasHeaderRow || isset($this->columnMap));
  304. }
  305. }