PageRenderTime 29ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/src/import-export/csv/gnc-csv-model.c

http://github.com/mchochlov/Gnucash
C | 1215 lines | 794 code | 133 blank | 288 comment | 169 complexity | 5eb961aa2e898c7ef4f0eb74f9b1c12d MD5 | raw file
Possible License(s): GPL-2.0
  1. #include "gnc-csv-model.h"
  2. #include <glib/gi18n.h>
  3. #include <goffice/goffice-features.h>
  4. #if (GO_VERSION_EPOCH == 0) && (GO_VERSION_MAJOR == 7) && (GO_VERSION_MINOR == 8)
  5. /* For libgoffice-0.7.8, disable its internal inclusion of <regutf8.h>
  6. so to avoid clashing symbol definitions with <regex.h> */
  7. # define GO_REGUTF8_H
  8. #endif
  9. #include <goffice/utils/go-glib-extras.h>
  10. #include <string.h>
  11. #include <sys/time.h>
  12. #include <sys/types.h>
  13. #include <sys/stat.h>
  14. #include <regex.h>
  15. #include <unistd.h>
  16. #include <fcntl.h>
  17. #include <stdlib.h>
  18. #include <math.h>
  19. #include <time.h>
  20. #ifndef HAVE_LOCALTIME_R
  21. #include "localtime_r.h"
  22. #endif
  23. static QofLogModule log_module = GNC_MOD_IMPORT;
  24. const int num_date_formats = 5;
  25. const gchar* date_format_user[] = {N_("y-m-d"),
  26. N_("d-m-y"),
  27. N_("m-d-y"),
  28. N_("d-m"),
  29. N_("m-d")
  30. };
  31. /* This array contains all of the different strings for different column types. */
  32. gchar* gnc_csv_column_type_strs[GNC_CSV_NUM_COL_TYPES] = {N_("None"),
  33. N_("Date"),
  34. N_("Description"),
  35. N_("Balance"),
  36. N_("Deposit"),
  37. N_("Withdrawal"),
  38. N_("Num")
  39. };
  40. /** A set of sensible defaults for parsing CSV files.
  41. * @return StfParseOptions_t* for parsing a file with comma separators
  42. */
  43. static StfParseOptions_t* default_parse_options(void)
  44. {
  45. StfParseOptions_t* options = stf_parse_options_new();
  46. stf_parse_options_set_type(options, PARSE_TYPE_CSV);
  47. stf_parse_options_csv_set_separators(options, ",", NULL);
  48. return options;
  49. }
  50. /** Parses a string into a date, given a format. The format must
  51. * include the year. This function should only be called by
  52. * parse_date.
  53. * @param date_str The string containing a date being parsed
  54. * @param format An index specifying a format in date_format_user
  55. * @return The parsed value of date_str on success or -1 on failure
  56. */
  57. static time_t parse_date_with_year(const char* date_str, int format)
  58. {
  59. time_t rawtime; /* The integer time */
  60. struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
  61. int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
  62. /* Buffer for containing individual parts (e.g. year, month, day) of a date */
  63. char date_segment[5];
  64. /* The compiled regular expression */
  65. regex_t preg = {0};
  66. /* An array containing indices specifying the matched substrings in date_str */
  67. regmatch_t pmatch[4] = { {0}, {0}, {0}, {0} };
  68. /* The regular expression for parsing dates */
  69. const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+) *[-/.'] *([0-9]+).*$|^ *([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]).*$";
  70. /* We get our matches using the regular expression. */
  71. regcomp(&preg, regex, REG_EXTENDED);
  72. regexec(&preg, date_str, 4, pmatch, 0);
  73. regfree(&preg);
  74. /* If there wasn't a match, there was an error. */
  75. if (pmatch[0].rm_eo == 0)
  76. return -1;
  77. /* If this is a string without separators ... */
  78. if (pmatch[1].rm_so == -1)
  79. {
  80. /* ... we will fill in the indices based on the user's selection. */
  81. int k = 0; /* k traverses date_str by keeping track of where separators "should" be. */
  82. j = 1; /* j traverses pmatch. */
  83. for (i = 0; date_format_user[format][i]; i++)
  84. {
  85. char segment_type = date_format_user[format][i];
  86. /* Only do something if this is a meaningful character */
  87. if (segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
  88. {
  89. pmatch[j].rm_so = k;
  90. switch (segment_type)
  91. {
  92. case 'm':
  93. case 'd':
  94. k += 2;
  95. break;
  96. case 'y':
  97. k += 4;
  98. break;
  99. }
  100. pmatch[j].rm_eo = k;
  101. j++;
  102. }
  103. }
  104. }
  105. /* Put some sane values in retvalue by using the current time for
  106. * the non-year-month-day parts of the date. */
  107. time(&rawtime);
  108. localtime_r(&rawtime, &retvalue);
  109. /* j traverses pmatch (index 0 contains the entire string, so we
  110. * start at index 1 for the first meaningful match). */
  111. j = 1;
  112. /* Go through the date format and interpret the matches in order of
  113. * the sections in the date format. */
  114. for (i = 0; date_format_user[format][i]; i++)
  115. {
  116. char segment_type = date_format_user[format][i];
  117. /* Only do something if this is a meaningful character */
  118. if (segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
  119. {
  120. /* Copy the matching substring into date_segment so that we can
  121. * convert it into an integer. */
  122. mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
  123. memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
  124. date_segment[mem_length] = '\0';
  125. /* Set the appropriate member of retvalue. Save the original
  126. * values so that we can check if the change when we use mktime
  127. * below. */
  128. switch (segment_type)
  129. {
  130. case 'y':
  131. retvalue.tm_year = atoi(date_segment);
  132. /* Handle two-digit years. */
  133. if (retvalue.tm_year < 100)
  134. {
  135. /* We allow two-digit years in the range 1969 - 2068. */
  136. if (retvalue.tm_year < 69)
  137. retvalue.tm_year += 100;
  138. }
  139. else
  140. retvalue.tm_year -= 1900;
  141. orig_year = retvalue.tm_year;
  142. break;
  143. case 'm':
  144. orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
  145. break;
  146. case 'd':
  147. orig_day = retvalue.tm_mday = atoi(date_segment);
  148. break;
  149. }
  150. j++;
  151. }
  152. }
  153. /* Convert back to an integer. If mktime leaves retvalue unchanged,
  154. * everything is okay; otherwise, an error has occurred. */
  155. /* We have to use a "test" date value to account for changes in
  156. * daylight savings time, which can cause a date change with mktime
  157. * near midnight, causing the code to incorrectly think a date is
  158. * incorrect. */
  159. test_retvalue = retvalue;
  160. mktime(&test_retvalue);
  161. retvalue.tm_isdst = test_retvalue.tm_isdst;
  162. rawtime = mktime(&retvalue);
  163. if (retvalue.tm_mday == orig_day &&
  164. retvalue.tm_mon == orig_month &&
  165. retvalue.tm_year == orig_year)
  166. {
  167. return rawtime;
  168. }
  169. else
  170. {
  171. return -1;
  172. }
  173. }
  174. /** Parses a string into a date, given a format. The format cannot
  175. * include the year. This function should only be called by
  176. * parse_date.
  177. * @param date_str The string containing a date being parsed
  178. * @param format An index specifying a format in date_format_user
  179. * @return The parsed value of date_str on success or -1 on failure
  180. */
  181. static time_t parse_date_without_year(const char* date_str, int format)
  182. {
  183. time_t rawtime; /* The integer time */
  184. struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
  185. int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
  186. /* Buffer for containing individual parts (e.g. year, month, day) of a date */
  187. gchar* date_segment;
  188. /* The compiled regular expression */
  189. regex_t preg = {0};
  190. /* An array containing indices specifying the matched substrings in date_str */
  191. regmatch_t pmatch[3] = { {0}, {0}, {0} };
  192. /* The regular expression for parsing dates */
  193. const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+).*$";
  194. /* We get our matches using the regular expression. */
  195. regcomp(&preg, regex, REG_EXTENDED);
  196. regexec(&preg, date_str, 3, pmatch, 0);
  197. regfree(&preg);
  198. /* If there wasn't a match, there was an error. */
  199. if (pmatch[0].rm_eo == 0)
  200. return -1;
  201. /* Put some sane values in retvalue by using the current time for
  202. * the non-year-month-day parts of the date. */
  203. time(&rawtime);
  204. localtime_r(&rawtime, &retvalue);
  205. orig_year = retvalue.tm_year;
  206. /* j traverses pmatch (index 0 contains the entire string, so we
  207. * start at index 1 for the first meaningful match). */
  208. j = 1;
  209. /* Go through the date format and interpret the matches in order of
  210. * the sections in the date format. */
  211. for (i = 0; date_format_user[format][i]; i++)
  212. {
  213. char segment_type = date_format_user[format][i];
  214. /* Only do something if this is a meaningful character */
  215. if (segment_type == 'm' || segment_type == 'd')
  216. {
  217. /* Copy the matching substring into date_segment so that we can
  218. * convert it into an integer. */
  219. mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
  220. date_segment = g_new(gchar, mem_length);
  221. memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
  222. date_segment[mem_length] = '\0';
  223. /* Set the appropriate member of retvalue. Save the original
  224. * values so that we can check if the change when we use mktime
  225. * below. */
  226. switch (segment_type)
  227. {
  228. case 'm':
  229. orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
  230. break;
  231. case 'd':
  232. orig_day = retvalue.tm_mday = atoi(date_segment);
  233. break;
  234. }
  235. g_free(date_segment);
  236. j++;
  237. }
  238. }
  239. /* Convert back to an integer. If mktime leaves retvalue unchanged,
  240. * everything is okay; otherwise, an error has occurred. */
  241. /* We have to use a "test" date value to account for changes in
  242. * daylight savings time, which can cause a date change with mktime
  243. * near midnight, causing the code to incorrectly think a date is
  244. * incorrect. */
  245. test_retvalue = retvalue;
  246. mktime(&test_retvalue);
  247. retvalue.tm_isdst = test_retvalue.tm_isdst;
  248. rawtime = mktime(&retvalue);
  249. if (retvalue.tm_mday == orig_day &&
  250. retvalue.tm_mon == orig_month &&
  251. retvalue.tm_year == orig_year)
  252. {
  253. return rawtime;
  254. }
  255. else
  256. {
  257. return -1;
  258. }
  259. }
  260. /** Parses a string into a date, given a format. This function
  261. * requires only knowing the order in which the year, month and day
  262. * appear. For example, 01-02-2003 will be parsed the same way as
  263. * 01/02/2003.
  264. * @param date_str The string containing a date being parsed
  265. * @param format An index specifying a format in date_format_user
  266. * @return The parsed value of date_str on success or -1 on failure
  267. */
  268. static time_t parse_date(const char* date_str, int format)
  269. {
  270. if (strchr(date_format_user[format], 'y'))
  271. return parse_date_with_year(date_str, format);
  272. else
  273. return parse_date_without_year(date_str, format);
  274. }
  275. /** Constructor for GncCsvParseData.
  276. * @return Pointer to a new GncCSvParseData
  277. */
  278. GncCsvParseData* gnc_csv_new_parse_data(void)
  279. {
  280. GncCsvParseData* parse_data = g_new(GncCsvParseData, 1);
  281. parse_data->encoding = "UTF-8";
  282. /* All of the data pointers are initially NULL. This is so that, if
  283. * gnc_csv_parse_data_free is called before all of the data is
  284. * initialized, only the data that needs to be freed is freed. */
  285. parse_data->raw_str.begin = parse_data->raw_str.end
  286. = parse_data->file_str.begin = parse_data->file_str.end = NULL;
  287. parse_data->orig_lines = NULL;
  288. parse_data->orig_row_lengths = NULL;
  289. parse_data->column_types = NULL;
  290. parse_data->error_lines = parse_data->transactions = NULL;
  291. parse_data->options = default_parse_options();
  292. parse_data->date_format = -1;
  293. parse_data->chunk = g_string_chunk_new(100 * 1024);
  294. return parse_data;
  295. }
  296. /** Destructor for GncCsvParseData.
  297. * @param parse_data Parse data whose memory will be freed
  298. */
  299. void gnc_csv_parse_data_free(GncCsvParseData* parse_data)
  300. {
  301. /* All non-NULL pointers have been initialized and must be freed. */
  302. if (parse_data->raw_mapping != NULL)
  303. g_mapped_file_free(parse_data->raw_mapping);
  304. if (parse_data->file_str.begin != NULL)
  305. g_free(parse_data->file_str.begin);
  306. if (parse_data->orig_lines != NULL)
  307. stf_parse_general_free(parse_data->orig_lines);
  308. if (parse_data->orig_row_lengths != NULL)
  309. g_array_free(parse_data->orig_row_lengths, FALSE);
  310. if (parse_data->options != NULL)
  311. stf_parse_options_free(parse_data->options);
  312. if (parse_data->column_types != NULL)
  313. g_array_free(parse_data->column_types, TRUE);
  314. if (parse_data->error_lines != NULL)
  315. g_list_free(parse_data->error_lines);
  316. if (parse_data->transactions != NULL)
  317. {
  318. GList* transactions = parse_data->transactions;
  319. /* We have to free the GncCsvTransLine's that are at each node in
  320. * the list before freeing the entire list. */
  321. do
  322. {
  323. g_free(transactions->data);
  324. transactions = g_list_next(transactions);
  325. }
  326. while (transactions != NULL);
  327. g_list_free(parse_data->transactions);
  328. }
  329. g_free(parse_data->chunk);
  330. g_free(parse_data);
  331. }
  332. /** Converts raw file data using a new encoding. This function must be
  333. * called after gnc_csv_load_file only if gnc_csv_load_file guessed
  334. * the wrong encoding.
  335. * @param parse_data Data that is being parsed
  336. * @param encoding Encoding that data should be translated using
  337. * @param error Will point to an error on failure
  338. * @return 0 on success, 1 on failure
  339. */
  340. int gnc_csv_convert_encoding(GncCsvParseData* parse_data, const char* encoding,
  341. GError** error)
  342. {
  343. gsize bytes_read, bytes_written;
  344. /* If parse_data->file_str has already been initialized it must be
  345. * freed first. (This should always be the case, since
  346. * gnc_csv_load_file should always be called before this
  347. * function.) */
  348. if (parse_data->file_str.begin != NULL)
  349. g_free(parse_data->file_str.begin);
  350. /* Do the actual translation to UTF-8. */
  351. parse_data->file_str.begin = g_convert(parse_data->raw_str.begin,
  352. parse_data->raw_str.end - parse_data->raw_str.begin,
  353. "UTF-8", encoding, &bytes_read, &bytes_written,
  354. error);
  355. /* Handle errors that occur. */
  356. if (parse_data->file_str.begin == NULL)
  357. return 1;
  358. /* On success, save the ending pointer of the translated data and
  359. * the encoding type and return 0. */
  360. parse_data->file_str.end = parse_data->file_str.begin + bytes_written;
  361. parse_data->encoding = (gchar*)encoding;
  362. return 0;
  363. }
  364. /** Loads a file into a GncCsvParseData. This is the first function
  365. * that must be called after createing a new GncCsvParseData. If this
  366. * fails because the file couldn't be opened, no more functions can be
  367. * called on the parse data until this succeeds (or until it fails
  368. * because of an encoding guess error). If it fails because the
  369. * encoding could not be guessed, gnc_csv_convert_encoding must be
  370. * called until it succeeds.
  371. * @param parse_data Data that is being parsed
  372. * @param filename Name of the file that should be opened
  373. * @param error Will contain an error if there is a failure
  374. * @return 0 on success, 1 on failure
  375. */
  376. int gnc_csv_load_file(GncCsvParseData* parse_data, const char* filename,
  377. GError** error)
  378. {
  379. const char* guess_enc;
  380. /* Get the raw data first and handle an error if one occurs. */
  381. parse_data->raw_mapping = g_mapped_file_new(filename, FALSE, error);
  382. if (parse_data->raw_mapping == NULL)
  383. {
  384. /* TODO Handle file opening errors more specifically,
  385. * e.g. inexistent file versus no read permission. */
  386. parse_data->raw_str.begin = NULL;
  387. g_set_error(error, 0, GNC_CSV_FILE_OPEN_ERR, "%s", _("File opening failed."));
  388. return 1;
  389. }
  390. /* Copy the mapping's contents into parse-data->raw_str. */
  391. parse_data->raw_str.begin = g_mapped_file_get_contents(parse_data->raw_mapping);
  392. parse_data->raw_str.end = parse_data->raw_str.begin + g_mapped_file_get_length(parse_data->raw_mapping);
  393. /* Make a guess at the encoding of the data. */
  394. guess_enc = go_guess_encoding((const char*)(parse_data->raw_str.begin),
  395. (size_t)(parse_data->raw_str.end - parse_data->raw_str.begin),
  396. "UTF-8", NULL);
  397. if (guess_enc == NULL)
  398. {
  399. g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
  400. return 1;
  401. }
  402. /* Convert using the guessed encoding into parse_data->file_str and
  403. * handle any errors that occur. */
  404. gnc_csv_convert_encoding(parse_data, guess_enc, error);
  405. if (parse_data->file_str.begin == NULL)
  406. {
  407. g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
  408. return 1;
  409. }
  410. else
  411. return 0;
  412. }
  413. /** Parses a file into cells. This requires having an encoding that
  414. * works (see gnc_csv_convert_encoding). parse_data->options should be
  415. * set according to how the user wants before calling this
  416. * function. (Note: this function must be called with guessColTypes as
  417. * TRUE before it is ever called with it as FALSE.) (Note: if
  418. * guessColTypes is TRUE, all the column types will be GNC_CSV_NONE
  419. * right now.)
  420. * @param parse_data Data that is being parsed
  421. * @param guessColTypes TRUE to guess what the types of columns are based on the cell contents
  422. * @param error Will contain an error if there is a failure
  423. * @return 0 on success, 1 on failure
  424. */
  425. int gnc_csv_parse(GncCsvParseData* parse_data, gboolean guessColTypes, GError** error)
  426. {
  427. /* max_cols is the number of columns in the row with the most columns. */
  428. int i, max_cols = 0;
  429. if (parse_data->orig_lines != NULL)
  430. {
  431. stf_parse_general_free(parse_data->orig_lines);
  432. }
  433. /* If everything is fine ... */
  434. if (parse_data->file_str.begin != NULL)
  435. {
  436. /* Do the actual parsing. */
  437. parse_data->orig_lines = stf_parse_general(parse_data->options, parse_data->chunk,
  438. parse_data->file_str.begin,
  439. parse_data->file_str.end);
  440. }
  441. /* If we couldn't get the encoding right, we just want an empty array. */
  442. else
  443. {
  444. parse_data->orig_lines = g_ptr_array_new();
  445. }
  446. /* Record the original row lengths of parse_data->orig_lines. */
  447. if (parse_data->orig_row_lengths != NULL)
  448. g_array_free(parse_data->orig_row_lengths, FALSE);
  449. parse_data->orig_row_lengths =
  450. g_array_sized_new(FALSE, FALSE, sizeof(int), parse_data->orig_lines->len);
  451. g_array_set_size(parse_data->orig_row_lengths, parse_data->orig_lines->len);
  452. parse_data->orig_max_row = 0;
  453. for (i = 0; i < parse_data->orig_lines->len; i++)
  454. {
  455. int length = ((GPtrArray*)parse_data->orig_lines->pdata[i])->len;
  456. parse_data->orig_row_lengths->data[i] = length;
  457. if (length > parse_data->orig_max_row)
  458. parse_data->orig_max_row = length;
  459. }
  460. /* If it failed, generate an error. */
  461. if (parse_data->orig_lines == NULL)
  462. {
  463. g_set_error(error, 0, 0, "Parsing failed.");
  464. return 1;
  465. }
  466. /* Now that we have data, let's set max_cols. */
  467. for (i = 0; i < parse_data->orig_lines->len; i++)
  468. {
  469. if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
  470. max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
  471. }
  472. if (guessColTypes)
  473. {
  474. /* Free parse_data->column_types if it's already been created. */
  475. if (parse_data->column_types != NULL)
  476. g_array_free(parse_data->column_types, TRUE);
  477. /* Create parse_data->column_types and fill it with guesses based
  478. * on the contents of each column. */
  479. parse_data->column_types = g_array_sized_new(FALSE, FALSE, sizeof(int),
  480. max_cols);
  481. g_array_set_size(parse_data->column_types, max_cols);
  482. /* TODO Make it actually guess. */
  483. for (i = 0; i < parse_data->column_types->len; i++)
  484. {
  485. parse_data->column_types->data[i] = GNC_CSV_NONE;
  486. }
  487. }
  488. else
  489. {
  490. /* If we don't need to guess column types, we will simply set any
  491. * new columns that are created that didn't exist before to "None"
  492. * since we don't want gibberish to appear. Note:
  493. * parse_data->column_types should have already been
  494. * initialized, so we don't check for it being NULL. */
  495. int i = parse_data->column_types->len;
  496. g_array_set_size(parse_data->column_types, max_cols);
  497. for (; i < parse_data->column_types->len; i++)
  498. {
  499. parse_data->column_types->data[i] = GNC_CSV_NONE;
  500. }
  501. }
  502. return 0;
  503. }
  504. /** A struct containing TransProperties that all describe a single transaction. */
  505. typedef struct
  506. {
  507. int date_format; /**< The format for parsing dates */
  508. Account* account; /**< The account the transaction belongs to */
  509. GList* properties; /**< List of TransProperties */
  510. } TransPropertyList;
  511. /** A struct encapsulating a property of a transaction. */
  512. typedef struct
  513. {
  514. int type; /**< A value from the GncCsvColumnType enum except
  515. * GNC_CSV_NONE and GNC_CSV_NUM_COL_TYPES */
  516. void* value; /**< Pointer to the data that will be used to configure a transaction */
  517. TransPropertyList* list; /**< The list the property belongs to */
  518. } TransProperty;
  519. /** Constructor for TransProperty.
  520. * @param type The type of the new property (see TransProperty.type for possible values)
  521. */
  522. static TransProperty* trans_property_new(int type, TransPropertyList* list)
  523. {
  524. TransProperty* prop = g_new(TransProperty, 1);
  525. prop->type = type;
  526. prop->list = list;
  527. prop->value = NULL;
  528. return prop;
  529. }
  530. /** Destructor for TransProperty.
  531. * @param prop The property to be freed
  532. */
  533. static void trans_property_free(TransProperty* prop)
  534. {
  535. switch (prop->type)
  536. {
  537. /* The types for "Date" and "Balance" (time_t and gnc_numeric,
  538. * respectively) are typically not pointed to, we have to free
  539. * them, unlike types like char* ("Description"). */
  540. case GNC_CSV_DATE:
  541. case GNC_CSV_BALANCE:
  542. case GNC_CSV_DEPOSIT:
  543. case GNC_CSV_WITHDRAWAL:
  544. if (prop->value != NULL)
  545. g_free(prop->value);
  546. break;
  547. }
  548. g_free(prop);
  549. }
  550. /** Sets the value of the property by parsing str. Note: this should
  551. * only be called once on an instance of TransProperty, as calling it
  552. * more than once can cause memory leaks.
  553. * @param prop The property being set
  554. * @param str The string to be parsed
  555. * @return TRUE on success, FALSE on failure
  556. */
  557. static gboolean trans_property_set(TransProperty* prop, char* str)
  558. {
  559. char *endptr, *possible_currency_symbol, *str_dupe;
  560. double value;
  561. switch (prop->type)
  562. {
  563. case GNC_CSV_DATE:
  564. prop->value = g_new(time_t, 1);
  565. *((time_t*)(prop->value)) = parse_date(str, prop->list->date_format);
  566. return *((time_t*)(prop->value)) != -1;
  567. case GNC_CSV_DESCRIPTION:
  568. case GNC_CSV_NUM:
  569. prop->value = g_strdup(str);
  570. return TRUE;
  571. case GNC_CSV_BALANCE:
  572. case GNC_CSV_DEPOSIT:
  573. case GNC_CSV_WITHDRAWAL:
  574. str_dupe = g_strdup(str); /* First, we make a copy so we can't mess up real data. */
  575. /* Go through str_dupe looking for currency symbols. */
  576. for (possible_currency_symbol = str_dupe; *possible_currency_symbol;
  577. possible_currency_symbol = g_utf8_next_char(possible_currency_symbol))
  578. {
  579. if (g_unichar_type(g_utf8_get_char(possible_currency_symbol)) == G_UNICODE_CURRENCY_SYMBOL)
  580. {
  581. /* If we find a currency symbol, save the position just ahead
  582. * of the currency symbol (next_symbol), and find the null
  583. * terminator of the string (last_symbol). */
  584. char *next_symbol = g_utf8_next_char(possible_currency_symbol), *last_symbol = next_symbol;
  585. while (*last_symbol)
  586. last_symbol = g_utf8_next_char(last_symbol);
  587. /* Move all of the string (including the null byte, which is
  588. * why we have +1 in the size parameter) following the
  589. * currency symbol back one character, thereby overwriting the
  590. * currency symbol. */
  591. memmove(possible_currency_symbol, next_symbol, last_symbol - next_symbol + 1);
  592. break;
  593. }
  594. }
  595. /* Translate the string (now clean of currency symbols) into a number. */
  596. value = strtod(str_dupe, &endptr);
  597. /* If this isn't a valid numeric string, this is an error. */
  598. if (endptr != str_dupe + strlen(str_dupe))
  599. {
  600. g_free(str_dupe);
  601. return FALSE;
  602. }
  603. g_free(str_dupe);
  604. /* Change abs to fabs, to fix bug 586805 */
  605. if (fabs(value) > 0.00001)
  606. {
  607. prop->value = g_new(gnc_numeric, 1);
  608. *((gnc_numeric*)(prop->value)) =
  609. double_to_gnc_numeric(value, xaccAccountGetCommoditySCU(prop->list->account),
  610. GNC_HOW_RND_ROUND_HALF_UP);
  611. }
  612. return TRUE;
  613. }
  614. return FALSE; /* We should never actually get here. */
  615. }
  616. /** Constructor for TransPropertyList.
  617. * @param account The account with which transactions should be built
  618. * @param date_format An index from date_format_user for how date properties should be parsed
  619. * @return A pointer to a new TransPropertyList
  620. */
  621. static TransPropertyList* trans_property_list_new(Account* account, int date_format)
  622. {
  623. TransPropertyList* list = g_new(TransPropertyList, 1);
  624. list->account = account;
  625. list->date_format = date_format;
  626. list->properties = NULL;
  627. return list;
  628. }
  629. /** Destructor for TransPropertyList.
  630. * @param list The list to be freed
  631. */
  632. static void trans_property_list_free(TransPropertyList* list)
  633. {
  634. /* Free all of the properties in this list before freeeing the list itself. */
  635. GList* properties_begin = list->properties;
  636. while (list->properties != NULL)
  637. {
  638. trans_property_free((TransProperty*)(list->properties->data));
  639. list->properties = g_list_next(list->properties);
  640. }
  641. g_list_free(properties_begin);
  642. g_free(list);
  643. }
  644. /** Adds a property to the list it's linked with.
  645. * (The TransPropertyList is not passed as a parameter because the property is
  646. * associated with a list when it's constructed.)
  647. * @param property The property to be added to its list
  648. */
  649. static void trans_property_list_add(TransProperty* property)
  650. {
  651. property->list->properties = g_list_append(property->list->properties, property);
  652. }
  653. /** Adds a split to a transaction.
  654. * @param trans The transaction to add a split to
  655. * @param account The account used for the split
  656. * @param book The book where the split should be stored
  657. * @param amount The amount of the split
  658. */
  659. static void trans_add_split(Transaction* trans, Account* account, QofBook* book,
  660. gnc_numeric amount)
  661. {
  662. Split* split = xaccMallocSplit(book);
  663. xaccSplitSetAccount(split, account);
  664. xaccSplitSetParent(split, trans);
  665. xaccSplitSetAmount(split, amount);
  666. xaccSplitSetValue(split, amount);
  667. xaccSplitSetAction(split, "Deposit");
  668. }
  669. /** Tests a TransPropertyList for having enough essential properties.
  670. * Essential properties are "Date" and one of the following: "Balance", "Deposit", or
  671. * "Withdrawal".
  672. * @param list The list we are checking
  673. * @param error Contains an error message on failure
  674. * @return TRUE if there are enough essentials; FALSE otherwise
  675. */
  676. static gboolean trans_property_list_verify_essentials(TransPropertyList* list, gchar** error)
  677. {
  678. int i;
  679. /* possible_errors lists the ways in which a list can fail this test. */
  680. enum PossibleErrorTypes {NO_DATE, NO_AMOUNT, NUM_OF_POSSIBLE_ERRORS};
  681. gchar* possible_errors[NUM_OF_POSSIBLE_ERRORS] =
  682. {
  683. N_("No date column."),
  684. N_("No balance, deposit, or withdrawal column.")
  685. };
  686. int possible_error_lengths[NUM_OF_POSSIBLE_ERRORS] = {0};
  687. GList *properties_begin = list->properties, *errors_list = NULL;
  688. /* Go through each of the properties and erase possible errors. */
  689. while (list->properties)
  690. {
  691. switch (((TransProperty*)(list->properties->data))->type)
  692. {
  693. case GNC_CSV_DATE:
  694. possible_errors[NO_DATE] = NULL;
  695. break;
  696. case GNC_CSV_BALANCE:
  697. case GNC_CSV_DEPOSIT:
  698. case GNC_CSV_WITHDRAWAL:
  699. possible_errors[NO_AMOUNT] = NULL;
  700. break;
  701. }
  702. list->properties = g_list_next(list->properties);
  703. }
  704. list->properties = properties_begin;
  705. /* Accumulate a list of the actual errors. */
  706. for (i = 0; i < NUM_OF_POSSIBLE_ERRORS; i++)
  707. {
  708. if (possible_errors[i] != NULL)
  709. {
  710. errors_list = g_list_append(errors_list, GINT_TO_POINTER(i));
  711. /* Since we added an error, we want to also store its length for
  712. * when we construct the full error string. */
  713. possible_error_lengths[i] = strlen(_(possible_errors[i]));
  714. }
  715. }
  716. /* If there are no errors, we can quit now. */
  717. if (errors_list == NULL)
  718. return TRUE;
  719. else
  720. {
  721. /* full_error_size is the full length of the error message. */
  722. int full_error_size = 0, string_length = 0;
  723. GList* errors_list_begin = errors_list;
  724. gchar *error_message, *error_message_begin;
  725. /* Find the value for full_error_size. */
  726. while (errors_list)
  727. {
  728. /* We add an extra 1 to account for spaces in between messages. */
  729. full_error_size += possible_error_lengths[GPOINTER_TO_INT(errors_list->data)] + 1;
  730. errors_list = g_list_next(errors_list);
  731. }
  732. errors_list = errors_list_begin;
  733. /* Append the error messages one after another. */
  734. error_message = error_message_begin = g_new(gchar, full_error_size);
  735. while (errors_list)
  736. {
  737. i = GPOINTER_TO_INT(errors_list->data);
  738. string_length = possible_error_lengths[i];
  739. /* Copy the error message and put a space after it. */
  740. strncpy(error_message, _(possible_errors[i]), string_length);
  741. error_message += string_length;
  742. *error_message = ' ';
  743. error_message++;
  744. errors_list = g_list_next(errors_list);
  745. }
  746. *error_message = '\0'; /* Replace the last space with the null byte. */
  747. g_list_free(errors_list_begin);
  748. *error = error_message_begin;
  749. return FALSE;
  750. }
  751. }
  752. /** Create a Transaction from a TransPropertyList.
  753. * @param list The list of properties
  754. * @param error Contains an error on failure
  755. * @return On success, a GncCsvTransLine; on failure, the trans pointer is NULL
  756. */
  757. static GncCsvTransLine* trans_property_list_to_trans(TransPropertyList* list, gchar** error)
  758. {
  759. GncCsvTransLine* trans_line = g_new(GncCsvTransLine, 1);
  760. GList* properties_begin = list->properties;
  761. QofBook* book = gnc_account_get_book(list->account);
  762. gnc_commodity* currency = xaccAccountGetCommodity(list->account);
  763. gnc_numeric amount = double_to_gnc_numeric(0.0, xaccAccountGetCommoditySCU(list->account),
  764. GNC_HOW_RND_ROUND_HALF_UP);
  765. /* This flag is set to TRUE if we can use the "Deposit" or "Withdrawal" column. */
  766. gboolean amount_set = FALSE;
  767. /* The balance is 0 by default. */
  768. trans_line->balance_set = FALSE;
  769. trans_line->balance = amount;
  770. /* We make the line_no -1 just to mark that it hasn't been set. We
  771. * may get rid of line_no soon anyway, so it's not particularly
  772. * important. */
  773. trans_line->line_no = -1;
  774. /* Make sure this is a transaction with all the columns we need. */
  775. if (!trans_property_list_verify_essentials(list, error))
  776. {
  777. g_free(trans_line);
  778. return NULL;
  779. }
  780. trans_line->trans = xaccMallocTransaction(book);
  781. xaccTransBeginEdit(trans_line->trans);
  782. xaccTransSetCurrency(trans_line->trans, currency);
  783. /* Go through each of the properties and edit the transaction accordingly. */
  784. list->properties = properties_begin;
  785. while (list->properties != NULL)
  786. {
  787. TransProperty* prop = (TransProperty*)(list->properties->data);
  788. switch (prop->type)
  789. {
  790. case GNC_CSV_DATE:
  791. xaccTransSetDatePostedSecs(trans_line->trans, *((time_t*)(prop->value)));
  792. break;
  793. case GNC_CSV_DESCRIPTION:
  794. xaccTransSetDescription(trans_line->trans, (char*)(prop->value));
  795. break;
  796. case GNC_CSV_NUM:
  797. xaccTransSetNum(trans_line->trans, (char*)(prop->value));
  798. break;
  799. case GNC_CSV_DEPOSIT: /* Add deposits to the existing amount. */
  800. if (prop->value != NULL)
  801. {
  802. amount = gnc_numeric_add(*((gnc_numeric*)(prop->value)),
  803. amount,
  804. xaccAccountGetCommoditySCU(list->account),
  805. GNC_HOW_RND_ROUND_HALF_UP);
  806. amount_set = TRUE;
  807. /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
  808. trans_line->balance_set = FALSE;
  809. }
  810. break;
  811. case GNC_CSV_WITHDRAWAL: /* Withdrawals are just negative deposits. */
  812. if (prop->value != NULL)
  813. {
  814. amount = gnc_numeric_add(gnc_numeric_neg(*((gnc_numeric*)(prop->value))),
  815. amount,
  816. xaccAccountGetCommoditySCU(list->account),
  817. GNC_HOW_RND_ROUND_HALF_UP);
  818. amount_set = TRUE;
  819. /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
  820. trans_line->balance_set = FALSE;
  821. }
  822. break;
  823. case GNC_CSV_BALANCE: /* The balance gets stored in a separate field in trans_line. */
  824. /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
  825. if (!amount_set && prop->value != NULL)
  826. {
  827. /* This gets put into the actual transaction at the end of gnc_csv_parse_to_trans. */
  828. trans_line->balance = *((gnc_numeric*)(prop->value));
  829. trans_line->balance_set = TRUE;
  830. }
  831. break;
  832. }
  833. list->properties = g_list_next(list->properties);
  834. }
  835. /* Add a split with the cumulative amount value. */
  836. trans_add_split(trans_line->trans, list->account, book, amount);
  837. return trans_line;
  838. }
  839. /** Creates a list of transactions from parsed data. Transactions that
  840. * could be created from rows are placed in parse_data->transactions;
  841. * rows that fail are placed in parse_data->error_lines. (Note: there
  842. * is no way for this function to "fail," i.e. it only returns 0, so
  843. * it may be changed to a void function in the future.)
  844. * @param parse_data Data that is being parsed
  845. * @param account Account with which transactions are created
  846. * @param redo_errors TRUE to convert only error data, FALSE for all data
  847. * @return 0 on success, 1 on failure
  848. */
  849. int gnc_csv_parse_to_trans(GncCsvParseData* parse_data, Account* account,
  850. gboolean redo_errors)
  851. {
  852. gboolean hasBalanceColumn;
  853. int i, j, max_cols = 0;
  854. GArray* column_types = parse_data->column_types;
  855. GList *error_lines = NULL, *begin_error_lines = NULL;
  856. /* last_transaction points to the last element in
  857. * parse_data->transactions, or NULL if it's empty. */
  858. GList* last_transaction = NULL;
  859. /* Free parse_data->error_lines and parse_data->transactions if they
  860. * already exist. */
  861. if (redo_errors) /* If we're redoing errors, we save freeing until the end. */
  862. {
  863. begin_error_lines = error_lines = parse_data->error_lines;
  864. }
  865. else
  866. {
  867. if (parse_data->error_lines != NULL)
  868. {
  869. g_list_free(parse_data->error_lines);
  870. }
  871. if (parse_data->transactions != NULL)
  872. {
  873. g_list_free(parse_data->transactions);
  874. }
  875. }
  876. parse_data->error_lines = NULL;
  877. if (redo_errors) /* If we're looking only at error data ... */
  878. {
  879. if (parse_data->transactions == NULL)
  880. {
  881. last_transaction = NULL;
  882. }
  883. else
  884. {
  885. /* Move last_transaction to the end. */
  886. last_transaction = parse_data->transactions;
  887. while (g_list_next(last_transaction) != NULL)
  888. {
  889. last_transaction = g_list_next(last_transaction);
  890. }
  891. }
  892. /* ... we use only the lines in error_lines. */
  893. if (error_lines == NULL)
  894. i = parse_data->orig_lines->len; /* Don't go into the for loop. */
  895. else
  896. i = GPOINTER_TO_INT(error_lines->data);
  897. }
  898. else /* Otherwise, we look at all the data. */
  899. {
  900. /* The following while-loop effectively behaves like the following for-loop:
  901. * for(i = 0; i < parse_data->orig_lines->len; i++). */
  902. i = 0;
  903. last_transaction = NULL;
  904. }
  905. while (i < parse_data->orig_lines->len)
  906. {
  907. GPtrArray* line = parse_data->orig_lines->pdata[i];
  908. /* This flag is TRUE if there are any errors in this row. */
  909. gboolean errors = FALSE;
  910. gchar* error_message = NULL;
  911. TransPropertyList* list = trans_property_list_new(account, parse_data->date_format);
  912. GncCsvTransLine* trans_line = NULL;
  913. for (j = 0; j < line->len; j++)
  914. {
  915. /* We do nothing in "None" columns. */
  916. if (column_types->data[j] != GNC_CSV_NONE)
  917. {
  918. /* Affect the transaction appropriately. */
  919. TransProperty* property = trans_property_new(column_types->data[j], list);
  920. gboolean succeeded = trans_property_set(property, line->pdata[j]);
  921. /* TODO Maybe move error handling to within TransPropertyList functions? */
  922. if (succeeded)
  923. {
  924. trans_property_list_add(property);
  925. }
  926. else
  927. {
  928. errors = TRUE;
  929. error_message = g_strdup_printf(_("%s column could not be understood."),
  930. _(gnc_csv_column_type_strs[property->type]));
  931. trans_property_free(property);
  932. break;
  933. }
  934. }
  935. }
  936. /* If we had success, add the transaction to parse_data->transaction. */
  937. if (!errors)
  938. {
  939. trans_line = trans_property_list_to_trans(list, &error_message);
  940. errors = trans_line == NULL;
  941. }
  942. trans_property_list_free(list);
  943. /* If there were errors, add this line to parse_data->error_lines. */
  944. if (errors)
  945. {
  946. parse_data->error_lines = g_list_append(parse_data->error_lines,
  947. GINT_TO_POINTER(i));
  948. /* If there's already an error message, we need to replace it. */
  949. if (line->len > (int)(parse_data->orig_row_lengths->data[i]))
  950. {
  951. g_free(line->pdata[line->len - 1]);
  952. line->pdata[line->len - 1] = error_message;
  953. }
  954. else
  955. {
  956. /* Put the error message at the end of the line. */
  957. g_ptr_array_add(line, error_message);
  958. }
  959. }
  960. else
  961. {
  962. /* If all went well, add this transaction to the list. */
  963. trans_line->line_no = i;
  964. /* We keep the transactions sorted by date. We start at the end
  965. * of the list and go backward, simply because the file itself
  966. * is probably also sorted by date (but we need to handle the
  967. * exception anyway). */
  968. /* If we can just put it at the end, do so and increment last_transaction. */
  969. if (last_transaction == NULL ||
  970. xaccTransGetDate(((GncCsvTransLine*)(last_transaction->data))->trans) <= xaccTransGetDate(trans_line->trans))
  971. {
  972. parse_data->transactions = g_list_append(parse_data->transactions, trans_line);
  973. /* If this is the first transaction, we need to get last_transaction on track. */
  974. if (last_transaction == NULL)
  975. last_transaction = parse_data->transactions;
  976. else /* Otherwise, we can just continue. */
  977. last_transaction = g_list_next(last_transaction);
  978. }
  979. /* Otherwise, search backward for the correct spot. */
  980. else
  981. {
  982. GList* insertion_spot = last_transaction;
  983. while (insertion_spot != NULL &&
  984. xaccTransGetDate(((GncCsvTransLine*)(insertion_spot->data))->trans) > xaccTransGetDate(trans_line->trans))
  985. {
  986. insertion_spot = g_list_previous(insertion_spot);
  987. }
  988. /* Move insertion_spot one location forward since we have to
  989. * use the g_list_insert_before function. */
  990. if (insertion_spot == NULL) /* We need to handle the case of inserting at the beginning of the list. */
  991. insertion_spot = parse_data->transactions;
  992. else
  993. insertion_spot = g_list_next(insertion_spot);
  994. parse_data->transactions = g_list_insert_before(parse_data->transactions, insertion_spot, trans_line);
  995. }
  996. }
  997. /* Increment to the next row. */
  998. if (redo_errors)
  999. {
  1000. /* Move to the next error line in the list. */
  1001. error_lines = g_list_next(error_lines);
  1002. if (error_lines == NULL)
  1003. i = parse_data->orig_lines->len; /* Don't continue the for loop. */
  1004. else
  1005. i = GPOINTER_TO_INT(error_lines->data);
  1006. }
  1007. else
  1008. {
  1009. i++;
  1010. }
  1011. }
  1012. /* If we have a balance column, set the appropriate amounts on the transactions. */
  1013. hasBalanceColumn = FALSE;
  1014. for (i = 0; i < parse_data->column_types->len; i++)
  1015. {
  1016. if (parse_data->column_types->data[i] == GNC_CSV_BALANCE)
  1017. {
  1018. hasBalanceColumn = TRUE;
  1019. break;
  1020. }
  1021. }
  1022. if (hasBalanceColumn)
  1023. {
  1024. GList* transactions = parse_data->transactions;
  1025. /* balance_offset is how much the balance currently in the account
  1026. * differs from what it will be after the transactions are
  1027. * imported. This will be sum of all the previous transactions for
  1028. * any given transaction. */
  1029. gnc_numeric balance_offset = double_to_gnc_numeric(0.0,
  1030. xaccAccountGetCommoditySCU(account),
  1031. GNC_HOW_RND_ROUND_HALF_UP);
  1032. while (transactions != NULL)
  1033. {
  1034. GncCsvTransLine* trans_line = (GncCsvTransLine*)transactions->data;
  1035. if (trans_line->balance_set)
  1036. {
  1037. time_t date = xaccTransGetDate(trans_line->trans);
  1038. /* Find what the balance should be by adding the offset to the actual balance. */
  1039. gnc_numeric existing_balance = gnc_numeric_add(balance_offset,
  1040. xaccAccountGetBalanceAsOfDate(account, date),
  1041. xaccAccountGetCommoditySCU(account),
  1042. GNC_HOW_RND_ROUND_HALF_UP);
  1043. /* The amount of the transaction is the difference between the new and existing balance. */
  1044. gnc_numeric amount = gnc_numeric_sub(trans_line->balance,
  1045. existing_balance,
  1046. xaccAccountGetCommoditySCU(account),
  1047. GNC_HOW_RND_ROUND_HALF_UP);
  1048. SplitList* splits = xaccTransGetSplitList(trans_line->trans);
  1049. while (splits)
  1050. {
  1051. SplitList* next_splits = g_list_next(splits);
  1052. xaccSplitDestroy((Split*)splits->data);
  1053. splits = next_splits;
  1054. }
  1055. trans_add_split(trans_line->trans, account, gnc_account_get_book(account), amount);
  1056. /* This new transaction needs to be added to the balance offset. */
  1057. balance_offset = gnc_numeric_add(balance_offset,
  1058. amount,
  1059. xaccAccountGetCommoditySCU(account),
  1060. GNC_HOW_RND_ROUND_HALF_UP);
  1061. }
  1062. transactions = g_list_next(transactions);
  1063. }
  1064. }
  1065. if (redo_errors) /* Now that we're at the end, we do the freeing. */
  1066. {
  1067. g_list_free(begin_error_lines);
  1068. }
  1069. /* We need to resize parse_data->column_types since errors may have added columns. */
  1070. for (i = 0; i < parse_data->orig_lines->len; i++)
  1071. {
  1072. if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
  1073. max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
  1074. }
  1075. i = parse_data->column_types->len;
  1076. parse_data->column_types = g_array_set_size(parse_data->column_types, max_cols);
  1077. for (; i < max_cols; i++)
  1078. {
  1079. parse_data->column_types->data[i] = GNC_CSV_NONE;
  1080. }
  1081. return 0;
  1082. }