/libraries/classes/Plugins/Import/ImportCsv.php
PHP | 890 lines | 633 code | 126 blank | 131 comment | 203 complexity | 75f0252ea96f282cfa5c57cb8fd02760 MD5 | raw file
Possible License(s): GPL-2.0, MIT, LGPL-3.0
- <?php
- /**
- * CSV import plugin for phpMyAdmin
- *
- * @todo add an option for handling NULL values
- */
- declare(strict_types=1);
- namespace PhpMyAdmin\Plugins\Import;
- use PhpMyAdmin\File;
- use PhpMyAdmin\Html\Generator;
- use PhpMyAdmin\Message;
- use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyRootGroup;
- use PhpMyAdmin\Properties\Options\Items\BoolPropertyItem;
- use PhpMyAdmin\Properties\Options\Items\NumberPropertyItem;
- use PhpMyAdmin\Properties\Options\Items\TextPropertyItem;
- use PhpMyAdmin\Properties\Plugins\ImportPluginProperties;
- use PhpMyAdmin\Util;
- use function __;
- use function array_shift;
- use function array_splice;
- use function basename;
- use function count;
- use function is_array;
- use function mb_strlen;
- use function mb_strtolower;
- use function mb_substr;
- use function preg_grep;
- use function preg_replace;
- use function preg_split;
- use function rtrim;
- use function str_contains;
- use function strlen;
- use function strtr;
- use function trim;
- /**
- * Handles the import for the CSV format
- */
- class ImportCsv extends AbstractImportCsv
- {
- /**
- * Whether to analyze tables
- *
- * @var bool
- */
- private $analyze;
- /**
- * @psalm-return non-empty-lowercase-string
- */
- public function getName(): string
- {
- return 'csv';
- }
- protected function setProperties(): ImportPluginProperties
- {
- $this->setAnalyze(false);
- if ($GLOBALS['plugin_param'] !== 'table') {
- $this->setAnalyze(true);
- }
- $importPluginProperties = new ImportPluginProperties();
- $importPluginProperties->setText('CSV');
- $importPluginProperties->setExtension('csv');
- $importPluginProperties->setOptionsText(__('Options'));
- // create the root group that will be the options field for
- // $importPluginProperties
- // this will be shown as "Format specific options"
- $importSpecificOptions = new OptionsPropertyRootGroup('Format Specific Options');
- $generalOptions = $this->getGeneralOptions();
- if ($GLOBALS['plugin_param'] !== 'table') {
- $leaf = new TextPropertyItem(
- 'new_tbl_name',
- __(
- 'Name of the new table (optional):'
- )
- );
- $generalOptions->addProperty($leaf);
- if ($GLOBALS['plugin_param'] === 'server') {
- $leaf = new TextPropertyItem(
- 'new_db_name',
- __(
- 'Name of the new database (optional):'
- )
- );
- $generalOptions->addProperty($leaf);
- }
- $leaf = new NumberPropertyItem(
- 'partial_import',
- __(
- 'Import these many number of rows (optional):'
- )
- );
- $generalOptions->addProperty($leaf);
- $leaf = new BoolPropertyItem(
- 'col_names',
- __(
- 'The first line of the file contains the table column names'
- . ' <i>(if this is unchecked, the first line will become part'
- . ' of the data)</i>'
- )
- );
- $generalOptions->addProperty($leaf);
- } else {
- $leaf = new NumberPropertyItem(
- 'partial_import',
- __(
- 'Import these many number of rows (optional):'
- )
- );
- $generalOptions->addProperty($leaf);
- $hint = new Message(
- __(
- 'If the data in each row of the file is not'
- . ' in the same order as in the database, list the corresponding'
- . ' column names here. Column names must be separated by commas'
- . ' and not enclosed in quotations.'
- )
- );
- $leaf = new TextPropertyItem(
- 'columns',
- __('Column names:') . ' ' . Generator::showHint($hint->getMessage())
- );
- $generalOptions->addProperty($leaf);
- }
- $leaf = new BoolPropertyItem(
- 'ignore',
- __('Do not abort on INSERT error')
- );
- $generalOptions->addProperty($leaf);
- // add the main group to the root group
- $importSpecificOptions->addProperty($generalOptions);
- // set the options for the import plugin property item
- $importPluginProperties->setOptions($importSpecificOptions);
- return $importPluginProperties;
- }
- /**
- * Handles the whole import logic
- *
- * @param array $sql_data 2-element array with sql data
- */
- public function doImport(?File $importHandle = null, array &$sql_data = []): void
- {
- global $error, $message, $dbi;
- global $db, $table, $csv_terminated, $csv_enclosed, $csv_escaped,
- $csv_new_line, $csv_columns, $errorUrl;
- // $csv_replace and $csv_ignore should have been here,
- // but we use directly from $_POST
- global $timeout_passed, $finished;
- $replacements = [
- '\\n' => "\n",
- '\\t' => "\t",
- '\\r' => "\r",
- ];
- $csv_terminated = strtr($csv_terminated, $replacements);
- $csv_enclosed = strtr($csv_enclosed, $replacements);
- $csv_escaped = strtr($csv_escaped, $replacements);
- $csv_new_line = strtr($csv_new_line, $replacements);
- [$error, $message] = $this->buildErrorsForParams(
- $csv_terminated,
- $csv_enclosed,
- $csv_escaped,
- $csv_new_line,
- (string) $errorUrl
- );
- [$sql_template, $required_fields, $fields] = $this->getSqlTemplateAndRequiredFields($db, $table, $csv_columns);
- // Defaults for parser
- $i = 0;
- $len = 0;
- $lastlen = null;
- $line = 1;
- $lasti = -1;
- $values = [];
- $csv_finish = false;
- $max_lines = 0; // defaults to 0 (get all the lines)
- /**
- * If we get a negative value, probably someone changed min value
- * attribute in DOM or there is an integer overflow, whatever be
- * the case, get all the lines.
- */
- if (isset($_REQUEST['csv_partial_import']) && $_REQUEST['csv_partial_import'] > 0) {
- $max_lines = $_REQUEST['csv_partial_import'];
- }
- $max_lines_constraint = $max_lines + 1;
- // if the first row has to be counted as column names, include one more row in the max lines
- if (isset($_REQUEST['csv_col_names'])) {
- $max_lines_constraint++;
- }
- $tempRow = [];
- $rows = [];
- $col_names = [];
- $tables = [];
- $buffer = '';
- $col_count = 0;
- $max_cols = 0;
- $csv_terminated_len = mb_strlen($csv_terminated);
- while (! ($finished && $i >= $len) && ! $error && ! $timeout_passed) {
- $data = $this->import->getNextChunk($importHandle);
- if ($data === false) {
- // subtract data we didn't handle yet and stop processing
- $GLOBALS['offset'] -= strlen($buffer);
- break;
- }
- if ($data !== true) {
- // Append new data to buffer
- $buffer .= $data;
- unset($data);
- // Force a trailing new line at EOF to prevent parsing problems
- if ($finished && $buffer) {
- $finalch = mb_substr($buffer, -1);
- if ($csv_new_line === 'auto' && $finalch != "\r" && $finalch != "\n") {
- $buffer .= "\n";
- } elseif ($csv_new_line !== 'auto' && $finalch != $csv_new_line) {
- $buffer .= $csv_new_line;
- }
- }
- // Do not parse string when we're not at the end
- // and don't have new line inside
- if (
- ($csv_new_line === 'auto'
- && ! str_contains($buffer, "\r")
- && ! str_contains($buffer, "\n"))
- || ($csv_new_line !== 'auto'
- && ! str_contains($buffer, $csv_new_line))
- ) {
- continue;
- }
- }
- // Current length of our buffer
- $len = mb_strlen($buffer);
- // Currently parsed char
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- while ($i < $len) {
- // Deadlock protection
- if ($lasti == $i && $lastlen == $len) {
- $message = Message::error(
- __('Invalid format of CSV input on line %d.')
- );
- $message->addParam($line);
- $error = true;
- break;
- }
- $lasti = $i;
- $lastlen = $len;
- // This can happen with auto EOL and \r at the end of buffer
- if (! $csv_finish) {
- // Grab empty field
- if ($ch == $csv_terminated) {
- if ($i == $len - 1) {
- break;
- }
- $values[] = '';
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- continue;
- }
- // Grab one field
- $fallbacki = $i;
- if ($ch == $csv_enclosed) {
- if ($i == $len - 1) {
- break;
- }
- $need_end = true;
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- } else {
- $need_end = false;
- }
- $fail = false;
- $value = '';
- while (
- ($need_end
- && ($ch != $csv_enclosed
- || $csv_enclosed == $csv_escaped))
- || (! $need_end
- && ! ($ch == $csv_terminated
- || $ch == $csv_new_line
- || ($csv_new_line === 'auto'
- && ($ch == "\r" || $ch == "\n"))))
- ) {
- if ($ch == $csv_escaped) {
- if ($i == $len - 1) {
- $fail = true;
- break;
- }
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- if (
- $csv_enclosed == $csv_escaped
- && ($ch == $csv_terminated
- || $ch == $csv_new_line
- || ($csv_new_line === 'auto'
- && ($ch == "\r" || $ch == "\n")))
- ) {
- break;
- }
- }
- $value .= $ch;
- if ($i == $len - 1) {
- if (! $finished) {
- $fail = true;
- }
- break;
- }
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len <= 1 || $ch != $csv_terminated[0]) {
- continue;
- }
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- // unquoted NULL string
- if ($need_end === false && $value === 'NULL') {
- $value = null;
- }
- if ($fail) {
- $i = $fallbacki;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $i += $csv_terminated_len - 1;
- }
- break;
- }
- // Need to strip trailing enclosing char?
- if ($need_end && $ch == $csv_enclosed) {
- if ($finished && $i == $len - 1) {
- $ch = null;
- } elseif ($i == $len - 1) {
- $i = $fallbacki;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $i += $csv_terminated_len - 1;
- }
- break;
- } else {
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- }
- }
- // Are we at the end?
- if (
- $ch == $csv_new_line
- || ($csv_new_line === 'auto' && ($ch == "\r" || $ch == "\n"))
- || ($finished && $i == $len - 1)
- ) {
- $csv_finish = true;
- }
- // Go to next char
- if ($ch == $csv_terminated) {
- if ($i == $len - 1) {
- $i = $fallbacki;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $i += $csv_terminated_len - 1;
- }
- break;
- }
- $i++;
- $ch = mb_substr($buffer, $i, 1);
- if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
- $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
- $i += $csv_terminated_len - 1;
- }
- }
- // If everything went okay, store value
- $values[] = $value;
- }
- // End of line
- if (
- ! $csv_finish
- && $ch != $csv_new_line
- && ($csv_new_line !== 'auto' || ($ch != "\r" && $ch != "\n"))
- ) {
- continue;
- }
- if ($csv_new_line === 'auto' && $ch == "\r") { // Handle "\r\n"
- if ($i >= ($len - 2) && ! $finished) {
- break; // We need more data to decide new line
- }
- if (mb_substr($buffer, $i + 1, 1) == "\n") {
- $i++;
- }
- }
- // We didn't parse value till the end of line, so there was
- // empty one
- if (! $csv_finish) {
- $values[] = '';
- }
- if ($this->getAnalyze()) {
- foreach ($values as $val) {
- $tempRow[] = $val;
- ++$col_count;
- }
- if ($col_count > $max_cols) {
- $max_cols = $col_count;
- }
- $col_count = 0;
- $rows[] = $tempRow;
- $tempRow = [];
- } else {
- // Do we have correct count of values?
- if (count($values) != $required_fields) {
- // Hack for excel
- if ($values[count($values) - 1] !== ';') {
- $message = Message::error(
- __(
- 'Invalid column count in CSV input on line %d.'
- )
- );
- $message->addParam($line);
- $error = true;
- break;
- }
- unset($values[count($values) - 1]);
- }
- $first = true;
- $sql = $sql_template;
- foreach ($values as $key => $val) {
- if (! $first) {
- $sql .= ', ';
- }
- if ($val === null) {
- $sql .= 'NULL';
- } else {
- $sql .= '\''
- . $dbi->escapeString($val)
- . '\'';
- }
- $first = false;
- }
- $sql .= ')';
- if (isset($_POST['csv_replace'])) {
- $sql .= ' ON DUPLICATE KEY UPDATE ';
- foreach ($fields as $field) {
- $fieldName = Util::backquote($field['Field']);
- $sql .= $fieldName . ' = VALUES(' . $fieldName
- . '), ';
- }
- $sql = rtrim($sql, ', ');
- }
- /**
- * @todo maybe we could add original line to verbose
- * SQL in comment
- */
- $this->import->runQuery($sql, $sql, $sql_data);
- }
- $line++;
- $csv_finish = false;
- $values = [];
- $buffer = mb_substr($buffer, $i + 1);
- $len = mb_strlen($buffer);
- $i = 0;
- $lasti = -1;
- $ch = mb_substr($buffer, 0, 1);
- if ($max_lines > 0 && $line == $max_lines_constraint) {
- $finished = 1;
- break;
- }
- }
- if ($max_lines > 0 && $line == $max_lines_constraint) {
- $finished = 1;
- break;
- }
- }
- if ($this->getAnalyze()) {
- /* Fill out all rows */
- $num_rows = count($rows);
- for ($i = 0; $i < $num_rows; ++$i) {
- for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
- $rows[$i][] = 'NULL';
- }
- }
- $col_names = $this->getColumnNames($col_names, $max_cols, $rows);
- /* Remove the first row if it contains the column names */
- if (isset($_REQUEST['csv_col_names'])) {
- array_shift($rows);
- }
- $tbl_name = $this->getTableNameFromImport((string) $db);
- $tables[] = [
- $tbl_name,
- $col_names,
- $rows,
- ];
- /* Obtain the best-fit MySQL types for each column */
- $analyses = [];
- $analyses[] = $this->import->analyzeTable($tables[0]);
- /**
- * string $db_name (no backquotes)
- *
- * array $table = array(table_name, array() column_names, array()() rows)
- * array $tables = array of "$table"s
- *
- * array $analysis = array(array() column_types, array() column_sizes)
- * array $analyses = array of "$analysis"s
- *
- * array $create = array of SQL strings
- *
- * array $options = an associative array of options
- */
- /* Set database name to the currently selected one, if applicable,
- * Otherwise, check if user provided the database name in the request,
- * if not, set the default name
- */
- if (isset($_REQUEST['csv_new_db_name']) && strlen($_REQUEST['csv_new_db_name']) > 0) {
- $newDb = $_REQUEST['csv_new_db_name'];
- } else {
- $result = $dbi->fetchResult('SHOW DATABASES');
- if (! is_array($result)) {
- $result = [];
- }
- $newDb = 'CSV_DB ' . (count($result) + 1);
- }
- [$db_name, $options] = $this->getDbnameAndOptions($db, $newDb);
- /* Non-applicable parameters */
- $create = null;
- /* Created and execute necessary SQL statements from data */
- $this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);
- unset($tables, $analyses);
- }
- // Commit any possible data in buffers
- $this->import->runQuery('', '', $sql_data);
- if (count($values) == 0 || $error !== false) {
- return;
- }
- $message = Message::error(
- __('Invalid format of CSV input on line %d.')
- );
- $message->addParam($line);
- $error = true;
- }
- private function buildErrorsForParams(
- string $csvTerminated,
- string $csvEnclosed,
- string $csvEscaped,
- string $csvNewLine,
- string $errUrl
- ): array {
- global $error, $message;
- $param_error = false;
- if (strlen($csvTerminated) === 0) {
- $message = Message::error(
- __('Invalid parameter for CSV import: %s')
- );
- $message->addParam(__('Columns terminated with'));
- $error = true;
- $param_error = true;
- // The default dialog of MS Excel when generating a CSV produces a
- // semi-colon-separated file with no chance of specifying the
- // enclosing character. Thus, users who want to import this file
- // tend to remove the enclosing character on the Import dialog.
- // I could not find a test case where having no enclosing characters
- // confuses this script.
- // But the parser won't work correctly with strings so we allow just
- // one character.
- } elseif (mb_strlen($csvEnclosed) > 1) {
- $message = Message::error(
- __('Invalid parameter for CSV import: %s')
- );
- $message->addParam(__('Columns enclosed with'));
- $error = true;
- $param_error = true;
- // I could not find a test case where having no escaping characters
- // confuses this script.
- // But the parser won't work correctly with strings so we allow just
- // one character.
- } elseif (mb_strlen($csvEscaped) > 1) {
- $message = Message::error(
- __('Invalid parameter for CSV import: %s')
- );
- $message->addParam(__('Columns escaped with'));
- $error = true;
- $param_error = true;
- } elseif (mb_strlen($csvNewLine) != 1 && $csvNewLine !== 'auto') {
- $message = Message::error(
- __('Invalid parameter for CSV import: %s')
- );
- $message->addParam(__('Lines terminated with'));
- $error = true;
- $param_error = true;
- }
- // If there is an error in the parameters entered,
- // indicate that immediately.
- if ($param_error) {
- Generator::mysqlDie(
- $message->getMessage(),
- '',
- false,
- $errUrl
- );
- }
- return [$error, $message];
- }
- private function getTableNameFromImport(string $databaseName): string
- {
- global $import_file_name, $dbi;
- $importFileName = basename($import_file_name, '.csv');
- $importFileName = mb_strtolower($importFileName);
- $importFileName = (string) preg_replace('/[^a-zA-Z0-9_]/', '_', $importFileName);
- // get new table name, if user didn't provide one, set the default name
- if (isset($_REQUEST['csv_new_tbl_name']) && strlen($_REQUEST['csv_new_tbl_name']) > 0) {
- return $_REQUEST['csv_new_tbl_name'];
- }
- if (mb_strlen($databaseName)) {
- $result = $dbi->fetchResult('SHOW TABLES');
- // logic to get table name from filename
- // if no table then use filename as table name
- if (count($result) === 0) {
- return $importFileName;
- }
- // check to see if {filename} as table exist
- $nameArray = preg_grep('/' . $importFileName . '/isU', $result);
- // if no use filename as table name
- if ($nameArray === false || count($nameArray) === 0) {
- return $importFileName;
- }
- // check if {filename}_ as table exist
- $nameArray = preg_grep('/' . $importFileName . '_/isU', $result);
- if ($nameArray === false) {
- return $importFileName;
- }
- return $importFileName . '_' . (count($nameArray) + 1);
- }
- return $importFileName;
- }
- private function getColumnNames(array $columnNames, int $maxCols, array $rows): array
- {
- if (isset($_REQUEST['csv_col_names'])) {
- $columnNames = array_splice($rows, 0, 1);
- $columnNames = $columnNames[0];
- // MySQL column names can't end with a space character.
- foreach ($columnNames as $key => $col_name) {
- $columnNames[$key] = rtrim($col_name);
- }
- }
- if ((isset($columnNames) && count($columnNames) != $maxCols) || ! isset($columnNames)) {
- // Fill out column names
- for ($i = 0; $i < $maxCols; ++$i) {
- $columnNames[] = 'COL ' . ($i + 1);
- }
- }
- return $columnNames;
- }
- private function getSqlTemplateAndRequiredFields(
- ?string $db,
- ?string $table,
- ?string $csvColumns
- ): array {
- global $dbi, $error, $message;
- $requiredFields = 0;
- $sqlTemplate = '';
- $fields = [];
- if (! $this->getAnalyze() && $db !== null && $table !== null) {
- $sqlTemplate = 'INSERT';
- if (isset($_POST['csv_ignore'])) {
- $sqlTemplate .= ' IGNORE';
- }
- $sqlTemplate .= ' INTO ' . Util::backquote($table);
- $tmp_fields = $dbi->getColumns($db, $table);
- if (empty($csvColumns)) {
- $fields = $tmp_fields;
- } else {
- $sqlTemplate .= ' (';
- $fields = [];
- $tmp = preg_split('/,( ?)/', $csvColumns);
- if ($tmp === false) {
- $tmp = [];
- }
- foreach ($tmp as $val) {
- if (count($fields) > 0) {
- $sqlTemplate .= ', ';
- }
- /* Trim also `, if user already included backquoted fields */
- $val = trim($val, " \t\r\n\0\x0B`");
- $found = false;
- foreach ($tmp_fields as $field) {
- if ($field['Field'] == $val) {
- $found = true;
- break;
- }
- }
- if (! $found) {
- $message = Message::error(
- __(
- 'Invalid column (%s) specified! Ensure that columns'
- . ' names are spelled correctly, separated by commas'
- . ', and not enclosed in quotes.'
- )
- );
- $message->addParam($val);
- $error = true;
- break;
- }
- if (isset($field)) {
- $fields[] = $field;
- }
- $sqlTemplate .= Util::backquote($val);
- }
- $sqlTemplate .= ') ';
- }
- $requiredFields = count($fields);
- $sqlTemplate .= ' VALUES (';
- }
- return [$sqlTemplate, $requiredFields, $fields];
- }
- /**
- * Read the expected column_separated_with String of length
- * $csv_terminated_len from the $buffer
- * into variable $ch and return the read string $ch
- *
- * @param string $buffer The original string buffer read from
- * csv file
- * @param string $ch Partially read "column Separated with"
- * string, also used to return after
- * reading length equal $csv_terminated_len
- * @param int $i Current read counter of buffer string
- * @param int $csv_terminated_len The length of "column separated with"
- * String
- *
- * @return string
- */
- public function readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len)
- {
- for ($j = 0; $j < $csv_terminated_len - 1; $j++) {
- $i++;
- $ch .= mb_substr($buffer, $i, 1);
- }
- return $ch;
- }
- /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
- /**
- * Returns true if the table should be analyzed, false otherwise
- */
- private function getAnalyze(): bool
- {
- return $this->analyze;
- }
- /**
- * Sets to true if the table should be analyzed, false otherwise
- *
- * @param bool $analyze status
- */
- private function setAnalyze($analyze): void
- {
- $this->analyze = $analyze;
- }
- }