PageRenderTime 59ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/pma/libraries/import/csv.php

https://bitbucket.org/kucing2k/ediassoc
PHP | 428 lines | 323 code | 44 blank | 61 comment | 141 complexity | 3d86096f818330e689b817d35351b193 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, BSD-2-Clause, GPL-2.0
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * CSV import plugin for phpMyAdmin
  5. *
  6. * @todo add an option for handling NULL values
  7. * @version $Id$
  8. * @package phpMyAdmin-Import
  9. */
  10. if (! defined('PHPMYADMIN')) {
  11. exit;
  12. }
  13. $analyze = false;
  14. if ($plugin_param !== 'table') {
  15. $analyze = true;
  16. }
  17. if (isset($plugin_list)) {
  18. $plugin_list['csv'] = array(
  19. 'text' => 'strCSV',
  20. 'extension' => 'csv',
  21. 'options' => array(
  22. array('type' => 'bool', 'name' => 'replace', 'text' => 'strReplaceTable'),
  23. array('type' => 'bool', 'name' => 'ignore', 'text' => 'strIgnoreDuplicates'),
  24. array('type' => 'text', 'name' => 'terminated', 'text' => 'strFieldsTerminatedBy', 'size' => 2, 'len' => 2),
  25. array('type' => 'text', 'name' => 'enclosed', 'text' => 'strFieldsEnclosedBy', 'size' => 2, 'len' => 2),
  26. array('type' => 'text', 'name' => 'escaped', 'text' => 'strFieldsEscapedBy', 'size' => 2, 'len' => 2),
  27. array('type' => 'text', 'name' => 'new_line', 'text' => 'strLinesTerminatedBy', 'size' => 2),
  28. ),
  29. 'options_text' => 'strOptions',
  30. );
  31. if ($plugin_param !== 'table') {
  32. $plugin_list['csv']['options'][] =
  33. array('type' => 'bool', 'name' => 'col_names', 'text' => 'strImportColNames');
  34. } else {
  35. $plugin_list['csv']['options'][] =
  36. array('type' => 'text', 'name' => 'columns', 'text' => 'strColumnNames');
  37. }
  38. /* We do not define function when plugin is just queried for information above */
  39. return;
  40. }
  41. $replacements = array(
  42. '\\n' => "\n",
  43. '\\t' => "\t",
  44. '\\r' => "\r",
  45. );
  46. $csv_terminated = strtr($csv_terminated, $replacements);
  47. $csv_enclosed = strtr($csv_enclosed, $replacements);
  48. $csv_escaped = strtr($csv_escaped, $replacements);
  49. $csv_new_line = strtr($csv_new_line, $replacements);
  50. if (strlen($csv_terminated) != 1) {
  51. $message = PMA_Message::error('strInvalidCSVParameter');
  52. $message->addParam('strFieldsTerminatedBy', false);
  53. $error = TRUE;
  54. // The default dialog of MS Excel when generating a CSV produces a
  55. // semi-colon-separated file with no chance of specifying the
  56. // enclosing character. Thus, users who want to import this file
  57. // tend to remove the enclosing character on the Import dialog.
  58. // I could not find a test case where having no enclosing characters
  59. // confuses this script.
  60. // But the parser won't work correctly with strings so we allow just
  61. // one character.
  62. } elseif (strlen($csv_enclosed) > 1) {
  63. $message = PMA_Message::error('strInvalidCSVParameter');
  64. $message->addParam('strFieldsEnclosedBy', false);
  65. $error = TRUE;
  66. } elseif (strlen($csv_escaped) != 1) {
  67. $message = PMA_Message::error('strInvalidCSVParameter');
  68. $message->addParam('strFieldsEscapedBy', false);
  69. $error = TRUE;
  70. } elseif (strlen($csv_new_line) != 1 && $csv_new_line != 'auto') {
  71. $message = PMA_Message::error('strInvalidCSVParameter');
  72. $message->addParam('strLinesTerminatedBy', false);
  73. $error = TRUE;
  74. }
  75. $buffer = '';
  76. $required_fields = 0;
  77. if (!$analyze) {
  78. if (isset($csv_replace)) {
  79. $sql_template = 'REPLACE';
  80. } else {
  81. $sql_template = 'INSERT';
  82. if (isset($csv_ignore)) {
  83. $sql_template .= ' IGNORE';
  84. }
  85. }
  86. $sql_template .= ' INTO ' . PMA_backquote($table);
  87. $tmp_fields = PMA_DBI_get_fields($db, $table);
  88. if (empty($csv_columns)) {
  89. $fields = $tmp_fields;
  90. } else {
  91. $sql_template .= ' (';
  92. $fields = array();
  93. $tmp = preg_split('/,( ?)/', $csv_columns);
  94. foreach ($tmp as $key => $val) {
  95. if (count($fields) > 0) {
  96. $sql_template .= ', ';
  97. }
  98. /* Trim also `, if user already included backquoted fields */
  99. $val = trim($val, " \t\r\n\0\x0B`");
  100. $found = FALSE;
  101. foreach ($tmp_fields as $id => $field) {
  102. if ($field['Field'] == $val) {
  103. $found = TRUE;
  104. break;
  105. }
  106. }
  107. if (!$found) {
  108. $message = PMA_Message::error('strInvalidColumn');
  109. $message->addParam($val);
  110. $error = TRUE;
  111. break;
  112. }
  113. $fields[] = $field;
  114. $sql_template .= PMA_backquote($val);
  115. }
  116. $sql_template .= ') ';
  117. }
  118. $required_fields = count($fields);
  119. $sql_template .= ' VALUES (';
  120. }
  121. // Defaults for parser
  122. $i = 0;
  123. $len = 0;
  124. $line = 1;
  125. $lasti = -1;
  126. $values = array();
  127. $csv_finish = FALSE;
  128. $tempRow = array();
  129. $rows = array();
  130. $col_names = array();
  131. $tables = array();
  132. $col_count = 0;
  133. $max_cols = 0;
  134. while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
  135. $data = PMA_importGetNextChunk();
  136. if ($data === FALSE) {
  137. // subtract data we didn't handle yet and stop processing
  138. $offset -= strlen($buffer);
  139. break;
  140. } elseif ($data === TRUE) {
  141. // Handle rest of buffer
  142. } else {
  143. // Append new data to buffer
  144. $buffer .= $data;
  145. unset($data);
  146. // Do not parse string when we're not at the end and don't have new line inside
  147. if (($csv_new_line == 'auto' && strpos($buffer, "\r") === FALSE && strpos($buffer, "\n") === FALSE)
  148. || ($csv_new_line != 'auto' && strpos($buffer, $csv_new_line) === FALSE)) {
  149. continue;
  150. }
  151. }
  152. // Current length of our buffer
  153. $len = strlen($buffer);
  154. // Currently parsed char
  155. $ch = $buffer[$i];
  156. while ($i < $len) {
  157. // Deadlock protection
  158. if ($lasti == $i && $lastlen == $len) {
  159. $message = PMA_Message::error('strInvalidCSVFormat');
  160. $message->addParam($line);
  161. $error = TRUE;
  162. break;
  163. }
  164. $lasti = $i;
  165. $lastlen = $len;
  166. // This can happen with auto EOL and \r at the end of buffer
  167. if (!$csv_finish) {
  168. // Grab empty field
  169. if ($ch == $csv_terminated) {
  170. if ($i == $len - 1) {
  171. break;
  172. }
  173. $values[] = '';
  174. $i++;
  175. $ch = $buffer[$i];
  176. continue;
  177. }
  178. // Grab one field
  179. $fallbacki = $i;
  180. if ($ch == $csv_enclosed) {
  181. if ($i == $len - 1) {
  182. break;
  183. }
  184. $need_end = TRUE;
  185. $i++;
  186. $ch = $buffer[$i];
  187. } else {
  188. $need_end = FALSE;
  189. }
  190. $fail = FALSE;
  191. $value = '';
  192. while (($need_end && $ch != $csv_enclosed)
  193. || (!$need_end && !($ch == $csv_terminated
  194. || $ch == $csv_new_line || ($csv_new_line == 'auto'
  195. && ($ch == "\r" || $ch == "\n"))))) {
  196. if ($ch == $csv_escaped) {
  197. if ($i == $len - 1) {
  198. $fail = TRUE;
  199. break;
  200. }
  201. $i++;
  202. $ch = $buffer[$i];
  203. }
  204. $value .= $ch;
  205. if ($i == $len - 1) {
  206. if (!$finished) {
  207. $fail = TRUE;
  208. }
  209. break;
  210. }
  211. $i++;
  212. $ch = $buffer[$i];
  213. }
  214. // unquoted NULL string
  215. if (false === $need_end && $value === 'NULL') {
  216. $value = null;
  217. }
  218. if ($fail) {
  219. $i = $fallbacki;
  220. $ch = $buffer[$i];
  221. break;
  222. }
  223. // Need to strip trailing enclosing char?
  224. if ($need_end && $ch == $csv_enclosed) {
  225. if ($finished && $i == $len - 1) {
  226. $ch = NULL;
  227. } elseif ($i == $len - 1) {
  228. $i = $fallbacki;
  229. $ch = $buffer[$i];
  230. break;
  231. } else {
  232. $i++;
  233. $ch = $buffer[$i];
  234. }
  235. }
  236. // Are we at the end?
  237. if ($ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n")) || ($finished && $i == $len - 1)) {
  238. $csv_finish = TRUE;
  239. }
  240. // Go to next char
  241. if ($ch == $csv_terminated) {
  242. if ($i == $len - 1) {
  243. $i = $fallbacki;
  244. $ch = $buffer[$i];
  245. break;
  246. }
  247. $i++;
  248. $ch = $buffer[$i];
  249. }
  250. // If everything went okay, store value
  251. $values[] = $value;
  252. }
  253. // End of line
  254. if ($csv_finish || $ch == $csv_new_line || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))) {
  255. if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
  256. if ($i >= ($len - 2) && !$finished) {
  257. break; // We need more data to decide new line
  258. }
  259. if ($buffer[$i + 1] == "\n") {
  260. $i++;
  261. }
  262. }
  263. // We didn't parse value till the end of line, so there was empty one
  264. if (!$csv_finish) {
  265. $values[] = '';
  266. }
  267. if ($analyze) {
  268. foreach ($values as $ley => $val) {
  269. $tempRow[] = $val;
  270. ++$col_count;
  271. }
  272. if ($col_count > $max_cols) {
  273. $max_cols = $col_count;
  274. }
  275. $col_count = 0;
  276. $rows[] = $tempRow;
  277. $tempRow = array();
  278. } else {
  279. // Do we have correct count of values?
  280. if (count($values) != $required_fields) {
  281. // Hack for excel
  282. if ($values[count($values) - 1] == ';') {
  283. unset($values[count($values) - 1]);
  284. } else {
  285. $message = PMA_Message::error('strInvalidCSVFieldCount');
  286. $message->addParam($line);
  287. $error = TRUE;
  288. break;
  289. }
  290. }
  291. $first = TRUE;
  292. $sql = $sql_template;
  293. foreach ($values as $key => $val) {
  294. if (!$first) {
  295. $sql .= ', ';
  296. }
  297. if ($val === null) {
  298. $sql .= 'NULL';
  299. } else {
  300. $sql .= '\'' . addslashes($val) . '\'';
  301. }
  302. $first = FALSE;
  303. }
  304. $sql .= ')';
  305. /**
  306. * @todo maybe we could add original line to verbose SQL in comment
  307. */
  308. PMA_importRunQuery($sql, $sql);
  309. }
  310. $line++;
  311. $csv_finish = FALSE;
  312. $values = array();
  313. $buffer = substr($buffer, $i + 1);
  314. $len = strlen($buffer);
  315. $i = 0;
  316. $lasti = -1;
  317. $ch = $buffer[0];
  318. }
  319. } // End of parser loop
  320. } // End of import loop
  321. if ($analyze) {
  322. /* Fill out all rows */
  323. $num_rows = count($rows);
  324. for ($i = 0; $i < $num_rows; ++$i) {
  325. for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
  326. $rows[$i][] = 'NULL';
  327. }
  328. }
  329. if ($_REQUEST['csv_col_names']) {
  330. $col_names = array_splice($rows, 0, 1);
  331. $col_names = $col_names[0];
  332. }
  333. if ((isset($col_names) && count($col_names) != $max_cols) || !isset($col_names)) {
  334. // Fill out column names
  335. for ($i = 0; $i < $max_cols; ++$i) {
  336. $col_names[] = 'COL '.($i+1);
  337. }
  338. }
  339. if (strlen($db)) {
  340. $result = PMA_DBI_fetch_result('SHOW TABLES');
  341. $tbl_name = 'TABLE '.(count($result) + 1);
  342. } else {
  343. $tbl_name = 'TBL_NAME';
  344. }
  345. $tables[] = array($tbl_name, $col_names, $rows);
  346. /* Obtain the best-fit MySQL types for each column */
  347. $analyses = array();
  348. $analyses[] = PMA_analyzeTable($tables[0]);
  349. /**
  350. * string $db_name (no backquotes)
  351. *
  352. * array $table = array(table_name, array() column_names, array()() rows)
  353. * array $tables = array of "$table"s
  354. *
  355. * array $analysis = array(array() column_types, array() column_sizes)
  356. * array $analyses = array of "$analysis"s
  357. *
  358. * array $create = array of SQL strings
  359. *
  360. * array $options = an associative array of options
  361. */
  362. /* Set database name to the currently selected one, if applicable */
  363. if (strlen($db)) {
  364. $db_name = $db;
  365. $options = array('create_db' => false);
  366. } else {
  367. $db_name = 'CSV_DB';
  368. $options = NULL;
  369. }
  370. /* Non-applicable parameters */
  371. $create = NULL;
  372. /* Created and execute necessary SQL statements from data */
  373. PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
  374. unset($tables);
  375. unset($analyses);
  376. }
  377. // Commit any possible data in buffers
  378. PMA_importRunQuery();
  379. if (count($values) != 0 && !$error) {
  380. $message = PMA_Message::error('strInvalidCSVFormat');
  381. $message->addParam($line);
  382. $error = TRUE;
  383. }
  384. ?>