PageRenderTime 27ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/www/addons/pma/libraries/import/csv.php

https://bitbucket.org/onekit/mrhost
PHP | 440 lines | 334 code | 45 blank | 61 comment | 142 complexity | 0825ec8508d0515055dcf40c9a36d0d2 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * CSV import plugin for phpMyAdmin
  5. *
  6. * @todo add an option for handling NULL values
  7. * @package phpMyAdmin-Import
  8. */
  9. if (! defined('PHPMYADMIN')) {
  10. exit;
  11. }
  12. $analyze = false;
  13. if ($plugin_param !== 'table') {
  14. $analyze = true;
  15. }
  16. if (isset($plugin_list)) {
  17. $plugin_list['csv'] = array(
  18. 'text' => __('CSV'),
  19. 'extension' => 'csv',
  20. 'options' => array(
  21. array('type' => 'begin_group', 'name' => 'general_opts'),
  22. array('type' => 'bool', 'name' => 'replace', 'text' => __('Replace table data with file')),
  23. array('type' => 'bool', 'name' => 'ignore', 'text' => __('Do not abort on INSERT error')),
  24. array('type' => 'text', 'name' => 'terminated', 'text' => __('Columns separated with:'), 'size' => 2, 'len' => 2),
  25. array('type' => 'text', 'name' => 'enclosed', 'text' => __('Columns enclosed with:'), 'size' => 2, 'len' => 2),
  26. array('type' => 'text', 'name' => 'escaped', 'text' => __('Columns escaped with:'), 'size' => 2, 'len' => 2),
  27. array('type' => 'text', 'name' => 'new_line', 'text' => __('Lines terminated with:'), 'size' => 2),
  28. ),
  29. 'options_text' => __('Options'),
  30. );
  31. if ($plugin_param !== 'table') {
  32. $plugin_list['csv']['options'][] =
  33. array('type' => 'bool', 'name' => 'col_names', 'text' => __('The first line of the file contains the table column names <i>(if this is unchecked, the first line will become part of the data)</i>'));
  34. } else {
  35. $hint = new PMA_Message(__('If the data in each row of the file is not in the same order as in the database, list the corresponding column names here. Column names must be separated by commas and not enclosed in quotations.'));
  36. $plugin_list['csv']['options'][] =
  37. array('type' => 'text', 'name' => 'columns', 'text' => __('Column names: ') . PMA_showHint($hint));
  38. }
  39. $plugin_list['csv']['options'][] = array('type' => 'end_group');
  40. /* We do not define function when plugin is just queried for information above */
  41. return;
  42. }
  43. $replacements = array(
  44. '\\n' => "\n",
  45. '\\t' => "\t",
  46. '\\r' => "\r",
  47. );
  48. $csv_terminated = strtr($csv_terminated, $replacements);
  49. $csv_enclosed = strtr($csv_enclosed, $replacements);
  50. $csv_escaped = strtr($csv_escaped, $replacements);
  51. $csv_new_line = strtr($csv_new_line, $replacements);
  52. $param_error = FALSE;
  53. if (strlen($csv_terminated) != 1) {
  54. $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
  55. $message->addParam(__('Columns terminated by'), false);
  56. $error = TRUE;
  57. $param_error = TRUE;
  58. // The default dialog of MS Excel when generating a CSV produces a
  59. // semi-colon-separated file with no chance of specifying the
  60. // enclosing character. Thus, users who want to import this file
  61. // tend to remove the enclosing character on the Import dialog.
  62. // I could not find a test case where having no enclosing characters
  63. // confuses this script.
  64. // But the parser won't work correctly with strings so we allow just
  65. // one character.
  66. } elseif (strlen($csv_enclosed) > 1) {
  67. $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
  68. $message->addParam(__('Columns enclosed by'), false);
  69. $error = TRUE;
  70. $param_error = TRUE;
  71. } elseif (strlen($csv_escaped) != 1) {
  72. $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
  73. $message->addParam(__('Columns escaped by'), false);
  74. $error = TRUE;
  75. $param_error = TRUE;
  76. } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
  77. $message = PMA_Message::error(__('Invalid parameter for CSV import: %s'));
  78. $message->addParam(__('Lines terminated by'), false);
  79. $error = TRUE;
  80. $param_error = TRUE;
  81. }
  82. // If there is an error in the parameters entered, indicate that immediately.
  83. if ($param_error) {
  84. PMA_mysqlDie($message->getMessage(), '', '', $err_url);
  85. }
  86. $buffer = '';
  87. $required_fields = 0;
  88. if (!$analyze) {
  89. if (isset($csv_replace)) {
  90. $sql_template = 'REPLACE';
  91. } else {
  92. $sql_template = 'INSERT';
  93. if (isset($csv_ignore)) {
  94. $sql_template .= ' IGNORE';
  95. }
  96. }
  97. $sql_template .= ' INTO ' . PMA_backquote($table);
  98. $tmp_fields = PMA_DBI_get_fields($db, $table);
  99. if (empty($csv_columns)) {
  100. $fields = $tmp_fields;
  101. } else {
  102. $sql_template .= ' (';
  103. $fields = array();
  104. $tmp = preg_split('/,( ?)/', $csv_columns);
  105. foreach ($tmp as $key => $val) {
  106. if (count($fields) > 0) {
  107. $sql_template .= ', ';
  108. }
  109. /* Trim also `, if user already included backquoted fields */
  110. $val = trim($val, " \t\r\n\0\x0B`");
  111. $found = FALSE;
  112. foreach ($tmp_fields as $id => $field) {
  113. if ($field['Field'] == $val) {
  114. $found = TRUE;
  115. break;
  116. }
  117. }
  118. if (!$found) {
  119. $message = PMA_Message::error(__('Invalid column (%s) specified! Ensure that columns names are spelled correctly, separated by commas, and not enclosed in quotes.' ));
  120. $message->addParam($val);
  121. $error = TRUE;
  122. break;
  123. }
  124. $fields[] = $field;
  125. $sql_template .= PMA_backquote($val);
  126. }
  127. $sql_template .= ') ';
  128. }
  129. $required_fields = count($fields);
  130. $sql_template .= ' VALUES (';
  131. }
  132. // Defaults for parser
  133. $i = 0;
  134. $len = 0;
  135. $line = 1;
  136. $lasti = -1;
  137. $values = array();
  138. $csv_finish = FALSE;
  139. $tempRow = array();
  140. $rows = array();
  141. $col_names = array();
  142. $tables = array();
  143. $col_count = 0;
  144. $max_cols = 0;
  145. while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
  146. $data = PMA_importGetNextChunk();
  147. if ($data === FALSE) {
  148. // subtract data we didn't handle yet and stop processing
  149. $offset -= strlen($buffer);
  150. break;
  151. } elseif ($data === TRUE) {
  152. // Handle rest of buffer
  153. } else {
  154. // Append new data to buffer
  155. $buffer .= $data;
  156. unset($data);
  157. // Do not parse string when we're not at the end and don't have new line inside
  158. if (($csv_new_line == 'auto' && strpos($buffer, "\r") === FALSE && strpos($buffer, "\n") === FALSE)
  159. || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === FALSE)) {
  160. continue;
  161. }
  162. }
  163. // Current length of our buffer
  164. $len = strlen($buffer);
  165. // Currently parsed char
  166. $ch = $buffer[$i];
  167. while ($i < $len) {
  168. // Deadlock protection
  169. if ($lasti == $i && $lastlen == $len) {
  170. $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
  171. $message->addParam($line);
  172. $error = TRUE;
  173. break;
  174. }
  175. $lasti = $i;
  176. $lastlen = $len;
  177. // This can happen with auto EOL and \r at the end of buffer
  178. if (!$csv_finish) {
  179. // Grab empty field
  180. if ($ch == $csv_terminated) {
  181. if ($i == $len - 1) {
  182. break;
  183. }
  184. $values[] = '';
  185. $i++;
  186. $ch = $buffer[$i];
  187. continue;
  188. }
  189. // Grab one field
  190. $fallbacki = $i;
  191. if ($ch == $csv_enclosed) {
  192. if ($i == $len - 1) {
  193. break;
  194. }
  195. $need_end = TRUE;
  196. $i++;
  197. $ch = $buffer[$i];
  198. } else {
  199. $need_end = FALSE;
  200. }
  201. $fail = FALSE;
  202. $value = '';
  203. while (($need_end && $ch != $csv_enclosed)
  204. || (!$need_end && !($ch == $csv_terminated
  205. || $ch == $csv_new_line || ($csv_new_line == 'auto'
  206. && ($ch == "\r" || $ch == "\n"))))) {
  207. if ($ch == $csv_escaped) {
  208. if ($i == $len - 1) {
  209. $fail = TRUE;
  210. break;
  211. }
  212. $i++;
  213. $ch = $buffer[$i];
  214. }
  215. $value .= $ch;
  216. if ($i == $len - 1) {
  217. if (!$finished) {
  218. $fail = TRUE;
  219. }
  220. break;
  221. }
  222. $i++;
  223. $ch = $buffer[$i];
  224. }
  225. // unquoted NULL string
  226. if (false === $need_end && $value === 'NULL') {
  227. $value = null;
  228. }
  229. if ($fail) {
  230. $i = $fallbacki;
  231. $ch = $buffer[$i];
  232. break;
  233. }
  234. // Need to strip trailing enclosing char?
  235. if ($need_end && $ch == $csv_enclosed) {
  236. if ($finished && $i == $len - 1) {
  237. $ch = NULL;
  238. } elseif ($i == $len - 1) {
  239. $i = $fallbacki;
  240. $ch = $buffer[$i];
  241. break;
  242. } else {
  243. $i++;
  244. $ch = $buffer[$i];
  245. }
  246. }
  247. // Are we at the end?
  248. if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
  249. $csv_finish = TRUE;
  250. }
  251. // Go to next char
  252. if ($ch == $csv_terminated) {
  253. if ($i == $len - 1) {
  254. $i = $fallbacki;
  255. $ch = $buffer[$i];
  256. break;
  257. }
  258. $i++;
  259. $ch = $buffer[$i];
  260. }
  261. // If everything went okay, store value
  262. $values[] = $value;
  263. }
  264. // End of line
  265. if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
  266. if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
  267. if ($i >= ($len - 2) && !$finished) {
  268. break; // We need more data to decide new line
  269. }
  270. if ($buffer[$i + 1] == "\n") {
  271. $i++;
  272. }
  273. }
  274. // We didn't parse value till the end of line, so there was empty one
  275. if (!$csv_finish) {
  276. $values[] = '';
  277. }
  278. if ($analyze) {
  279. foreach ($values as $ley => $val) {
  280. $tempRow[] = $val;
  281. ++$col_count;
  282. }
  283. if ($col_count > $max_cols) {
  284. $max_cols = $col_count;
  285. }
  286. $col_count = 0;
  287. $rows[] = $tempRow;
  288. $tempRow = array();
  289. } else {
  290. // Do we have correct count of values?
  291. if (count($values) != $required_fields) {
  292. // Hack for excel
  293. if ($values[count($values) - 1] == ';') {
  294. unset($values[count($values) - 1]);
  295. } else {
  296. $message = PMA_Message::error(__('Invalid column count in CSV input on line %d.'));
  297. $message->addParam($line);
  298. $error = TRUE;
  299. break;
  300. }
  301. }
  302. $first = TRUE;
  303. $sql = $sql_template;
  304. foreach ($values as $key => $val) {
  305. if (!$first) {
  306. $sql .= ', ';
  307. }
  308. if ($val === null) {
  309. $sql .= 'NULL';
  310. } else {
  311. $sql .= '\'' . addslashes($val) . '\'';
  312. }
  313. $first = FALSE;
  314. }
  315. $sql .= ')';
  316. /**
  317. * @todo maybe we could add original line to verbose SQL in comment
  318. */
  319. PMA_importRunQuery($sql, $sql);
  320. }
  321. $line++;
  322. $csv_finish = FALSE;
  323. $values = array();
  324. $buffer = substr($buffer, $i + 1);
  325. $len = strlen($buffer);
  326. $i = 0;
  327. $lasti = -1;
  328. $ch = $buffer[0];
  329. }
  330. } // End of parser loop
  331. } // End of import loop
  332. if ($analyze) {
  333. /* Fill out all rows */
  334. $num_rows = count($rows);
  335. for ($i = 0; $i < $num_rows; ++$i) {
  336. for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
  337. $rows[$i][] = 'NULL';
  338. }
  339. }
  340. if (isset($_REQUEST['csv_col_names'])) {
  341. $col_names = array_splice($rows, 0, 1);
  342. $col_names = $col_names[0];
  343. }
  344. if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
  345. // Fill out column names
  346. for ($i = 0; $i < $max_cols; ++$i) {
  347. $col_names[] = 'COL '.($i+1);
  348. }
  349. }
  350. if (strlen($db)) {
  351. $result = PMA_DBI_fetch_result('SHOW TABLES');
  352. $tbl_name = 'TABLE '.(count($result) + 1);
  353. } else {
  354. $tbl_name = 'TBL_NAME';
  355. }
  356. $tables[] = array($tbl_name, $col_names, $rows);
  357. /* Obtain the best-fit MySQL types for each column */
  358. $analyses = array();
  359. $analyses[] = PMA_analyzeTable($tables[0]);
  360. /**
  361. * string $db_name (no backquotes)
  362. *
  363. * array $table = array(table_name, array() column_names, array()() rows)
  364. * array $tables = array of "$table"s
  365. *
  366. * array $analysis = array(array() column_types, array() column_sizes)
  367. * array $analyses = array of "$analysis"s
  368. *
  369. * array $create = array of SQL strings
  370. *
  371. * array $options = an associative array of options
  372. */
  373. /* Set database name to the currently selected one, if applicable */
  374. if (strlen($db)) {
  375. $db_name = $db;
  376. $options = array('create_db' => false);
  377. } else {
  378. $db_name = 'CSV_DB';
  379. $options = NULL;
  380. }
  381. /* Non-applicable parameters */
  382. $create = NULL;
  383. /* Created and execute necessary SQL statements from data */
  384. PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
  385. unset($tables);
  386. unset($analyses);
  387. }
  388. // Commit any possible data in buffers
  389. PMA_importRunQuery();
  390. if (count($values) != 0 && !$error) {
  391. $message = PMA_Message::error(__('Invalid format of CSV input on line %d.'));
  392. $message->addParam($line);
  393. $error = TRUE;
  394. }
  395. ?>