PageRenderTime 26ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 1ms

/src/Dev/CsvBulkLoader.php

https://gitlab.com/djpmedia/silverstripe-framework
PHP | 453 lines | 293 code | 60 blank | 100 comment | 42 complexity | d4b4c468f5bb29db40085883767717fe MD5 | raw file
  1. <?php
  2. namespace SilverStripe\Dev;
  3. use League\Csv\Reader;
  4. use SilverStripe\Control\Director;
  5. use SilverStripe\ORM\DataObject;
  6. /**
  7. * Utility class to facilitate complex CSV-imports by defining column-mappings
  8. * and custom converters.
  9. *
  10. * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
  11. * input.
  12. *
  13. * @see http://tools.ietf.org/html/rfc4180
  14. *
  15. * @todo Support for deleting existing records not matched in the import
  16. * (through relation checks)
  17. */
  18. class CsvBulkLoader extends BulkLoader
  19. {
  20. /**
  21. * Delimiter character (Default: comma).
  22. *
  23. * @var string
  24. */
  25. public $delimiter = ',';
  26. /**
  27. * Enclosure character (Default: doublequote)
  28. *
  29. * @var string
  30. */
  31. public $enclosure = '"';
  32. /**
  33. * Identifies if csv the has a header row.
  34. *
  35. * @var boolean
  36. */
  37. public $hasHeaderRow = true;
  38. /**
  39. * Number of lines to split large CSV files into.
  40. *
  41. * @var int
  42. *
  43. * @config
  44. */
  45. private static $lines = 1000;
  46. /**
  47. * @inheritDoc
  48. */
  49. public function preview($filepath)
  50. {
  51. return $this->processAll($filepath, true);
  52. }
  53. /**
  54. * @param string $filepath
  55. * @param boolean $preview
  56. *
  57. * @return null|BulkLoader_Result
  58. */
  59. protected function processAll($filepath, $preview = false)
  60. {
  61. $previousDetectLE = ini_get('auto_detect_line_endings');
  62. ini_set('auto_detect_line_endings', true);
  63. try {
  64. $filepath = Director::getAbsFile($filepath);
  65. $csvReader = Reader::createFromPath($filepath, 'r');
  66. $tabExtractor = function ($row, $rowOffset, $iterator) {
  67. foreach ($row as &$item) {
  68. // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import
  69. if (preg_match("/^\t[\-@=\+]+.*/", $item)) {
  70. $item = ltrim($item, "\t");
  71. }
  72. }
  73. return $row;
  74. };
  75. if ($this->columnMap) {
  76. $headerMap = $this->getNormalisedColumnMap();
  77. $remapper = function ($row, $rowOffset, $iterator) use ($headerMap, $tabExtractor) {
  78. $row = $tabExtractor($row, $rowOffset, $iterator);
  79. foreach ($headerMap as $column => $renamedColumn) {
  80. if ($column == $renamedColumn) {
  81. continue;
  82. }
  83. if (array_key_exists($column, $row)) {
  84. if (strpos($renamedColumn, '_ignore_') !== 0) {
  85. $row[$renamedColumn] = $row[$column];
  86. }
  87. unset($row[$column]);
  88. }
  89. }
  90. return $row;
  91. };
  92. } else {
  93. $remapper = $tabExtractor;
  94. }
  95. if ($this->hasHeaderRow) {
  96. $rows = $csvReader->fetchAssoc(0, $remapper);
  97. } elseif ($this->columnMap) {
  98. $rows = $csvReader->fetchAssoc($headerMap, $remapper);
  99. }
  100. $result = BulkLoader_Result::create();
  101. foreach ($rows as $row) {
  102. $this->processRecord($row, $this->columnMap, $result, $preview);
  103. }
  104. } catch (\Exception $e) {
  105. $failedMessage = sprintf("Failed to parse %s", $filepath);
  106. if (Director::isDev()) {
  107. $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());
  108. }
  109. print $failedMessage . PHP_EOL;
  110. } finally {
  111. ini_set('auto_detect_line_endings', $previousDetectLE);
  112. }
  113. return $result;
  114. }
  115. protected function getNormalisedColumnMap()
  116. {
  117. $map = [];
  118. foreach ($this->columnMap as $column => $newColumn) {
  119. if (strpos($newColumn, "->") === 0) {
  120. $map[$column] = $column;
  121. } elseif (is_null($newColumn)) {
  122. // the column map must consist of unique scalar values
  123. // `null` can be present multiple times and is not scalar
  124. // so we name it in a standard way so we can remove it later
  125. $map[$column] = '_ignore_' . $column;
  126. } else {
  127. $map[$column] = $newColumn;
  128. }
  129. }
  130. return $map;
  131. }
  132. /**
  133. * Splits a large file up into many smaller files.
  134. *
  135. * @param string $path Path to large file to split
  136. * @param int $lines Number of lines per file
  137. *
  138. * @return array List of file paths
  139. */
  140. protected function splitFile($path, $lines = null)
  141. {
  142. Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream');
  143. $previous = ini_get('auto_detect_line_endings');
  144. ini_set('auto_detect_line_endings', true);
  145. if (!is_int($lines)) {
  146. $lines = $this->config()->get("lines");
  147. }
  148. $new = $this->getNewSplitFileName();
  149. $to = fopen($new, 'w+');
  150. $from = fopen($path, 'r');
  151. $header = null;
  152. if ($this->hasHeaderRow) {
  153. $header = fgets($from);
  154. fwrite($to, $header);
  155. }
  156. $files = array();
  157. $files[] = $new;
  158. $count = 0;
  159. while (!feof($from)) {
  160. fwrite($to, fgets($from));
  161. $count++;
  162. if ($count >= $lines) {
  163. fclose($to);
  164. // get a new temporary file name, to write the next lines to
  165. $new = $this->getNewSplitFileName();
  166. $to = fopen($new, 'w+');
  167. if ($this->hasHeaderRow) {
  168. // add the headers to the new file
  169. fwrite($to, $header);
  170. }
  171. $files[] = $new;
  172. $count = 0;
  173. }
  174. }
  175. fclose($to);
  176. ini_set('auto_detect_line_endings', $previous);
  177. return $files;
  178. }
  179. /**
  180. * @return string
  181. */
  182. protected function getNewSplitFileName()
  183. {
  184. Deprecation::notice('5.0', 'getNewSplitFileName is deprecated, please name your files yourself');
  185. return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv';
  186. }
  187. /**
  188. * @param string $filepath
  189. * @param boolean $preview
  190. *
  191. * @return BulkLoader_Result
  192. */
  193. protected function processChunk($filepath, $preview = false)
  194. {
  195. Deprecation::notice('5.0', 'processChunk is deprecated, please process rows individually');
  196. $results = BulkLoader_Result::create();
  197. $csv = new CSVParser(
  198. $filepath,
  199. $this->delimiter,
  200. $this->enclosure
  201. );
  202. // ColumnMap has two uses, depending on whether hasHeaderRow is set
  203. if ($this->columnMap) {
  204. // if the map goes to a callback, use the same key value as the map
  205. // value, rather than function name as multiple keys may use the
  206. // same callback
  207. $map = [];
  208. foreach ($this->columnMap as $k => $v) {
  209. if (strpos($v, "->") === 0) {
  210. $map[$k] = $k;
  211. } else {
  212. $map[$k] = $v;
  213. }
  214. }
  215. if ($this->hasHeaderRow) {
  216. $csv->mapColumns($map);
  217. } else {
  218. $csv->provideHeaderRow($map);
  219. }
  220. }
  221. foreach ($csv as $row) {
  222. $this->processRecord($row, $this->columnMap, $results, $preview);
  223. }
  224. return $results;
  225. }
  226. /**
  227. * @todo Better messages for relation checks and duplicate detection
  228. * Note that columnMap isn't used.
  229. *
  230. * @param array $record
  231. * @param array $columnMap
  232. * @param BulkLoader_Result $results
  233. * @param boolean $preview
  234. *
  235. * @return int
  236. */
  237. protected function processRecord($record, $columnMap, &$results, $preview = false)
  238. {
  239. $class = $this->objectClass;
  240. // find existing object, or create new one
  241. $existingObj = $this->findExistingObject($record, $columnMap);
  242. /** @var DataObject $obj */
  243. $obj = ($existingObj) ? $existingObj : new $class();
  244. $schema = DataObject::getSchema();
  245. // first run: find/create any relations and store them on the object
  246. // we can't combine runs, as other columns might rely on the relation being present
  247. foreach ($record as $fieldName => $val) {
  248. // don't bother querying of value is not set
  249. if ($this->isNullValue($val)) {
  250. continue;
  251. }
  252. // checking for existing relations
  253. if (isset($this->relationCallbacks[$fieldName])) {
  254. // trigger custom search method for finding a relation based on the given value
  255. // and write it back to the relation (or create a new object)
  256. $relationName = $this->relationCallbacks[$fieldName]['relationname'];
  257. /** @var DataObject $relationObj */
  258. $relationObj = null;
  259. if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
  260. $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
  261. } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
  262. $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
  263. }
  264. if (!$relationObj || !$relationObj->exists()) {
  265. $relationClass = $schema->hasOneComponent(get_class($obj), $relationName);
  266. $relationObj = new $relationClass();
  267. //write if we aren't previewing
  268. if (!$preview) {
  269. $relationObj->write();
  270. }
  271. }
  272. $obj->{"{$relationName}ID"} = $relationObj->ID;
  273. //write if we are not previewing
  274. if (!$preview) {
  275. $obj->write();
  276. $obj->flushCache(); // avoid relation caching confusion
  277. }
  278. } elseif (strpos($fieldName, '.') !== false) {
  279. // we have a relation column with dot notation
  280. list($relationName, $columnName) = explode('.', $fieldName);
  281. // always gives us an component (either empty or existing)
  282. $relationObj = $obj->getComponent($relationName);
  283. if (!$preview) {
  284. $relationObj->write();
  285. }
  286. $obj->{"{$relationName}ID"} = $relationObj->ID;
  287. //write if we are not previewing
  288. if (!$preview) {
  289. $obj->write();
  290. $obj->flushCache(); // avoid relation caching confusion
  291. }
  292. }
  293. }
  294. // second run: save data
  295. foreach ($record as $fieldName => $val) {
  296. // break out of the loop if we are previewing
  297. if ($preview) {
  298. break;
  299. }
  300. // look up the mapping to see if this needs to map to callback
  301. $mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
  302. if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
  303. $funcName = substr($this->columnMap[$fieldName], 2);
  304. $this->$funcName($obj, $val, $record);
  305. } elseif ($obj->hasMethod("import{$fieldName}")) {
  306. $obj->{"import{$fieldName}"}($val, $record);
  307. } else {
  308. $obj->update(array($fieldName => $val));
  309. }
  310. }
  311. // write record
  312. if (!$preview) {
  313. $obj->write();
  314. }
  315. // @todo better message support
  316. $message = '';
  317. // save to results
  318. if ($existingObj) {
  319. $results->addUpdated($obj, $message);
  320. } else {
  321. $results->addCreated($obj, $message);
  322. }
  323. $objID = $obj->ID;
  324. $obj->destroy();
  325. // memory usage
  326. unset($existingObj, $obj);
  327. return $objID;
  328. }
  329. /**
  330. * Find an existing objects based on one or more uniqueness columns
  331. * specified via {@link self::$duplicateChecks}.
  332. *
  333. * @todo support $columnMap
  334. *
  335. * @param array $record CSV data column
  336. * @param array $columnMap
  337. * @return DataObject
  338. */
  339. public function findExistingObject($record, $columnMap = [])
  340. {
  341. $SNG_objectClass = singleton($this->objectClass);
  342. // checking for existing records (only if not already found)
  343. foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) {
  344. $existingRecord = null;
  345. if (is_string($duplicateCheck)) {
  346. // Skip current duplicate check if field value is empty
  347. if (empty($record[$duplicateCheck])) {
  348. continue;
  349. }
  350. // Check existing record with this value
  351. $dbFieldValue = $record[$duplicateCheck];
  352. $existingRecord = DataObject::get($this->objectClass)
  353. ->filter($duplicateCheck, $dbFieldValue)
  354. ->first();
  355. if ($existingRecord) {
  356. return $existingRecord;
  357. }
  358. } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
  359. if ($this->hasMethod($duplicateCheck['callback'])) {
  360. $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
  361. } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
  362. $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
  363. } else {
  364. user_error("CsvBulkLoader::processRecord():"
  365. . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
  366. }
  367. if ($existingRecord) {
  368. return $existingRecord;
  369. }
  370. } else {
  371. user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
  372. }
  373. }
  374. return false;
  375. }
  376. /**
  377. * Determine whether any loaded files should be parsed with a
  378. * header-row (otherwise we rely on {@link self::$columnMap}.
  379. *
  380. * @return boolean
  381. */
  382. public function hasHeaderRow()
  383. {
  384. return ($this->hasHeaderRow || isset($this->columnMap));
  385. }
  386. }