/src/main/groovy/com/cmcmarkets/csv/CsvReader.groovy

http://github.com/dkandalov/groovy-beans · Groovy · 117 lines · 78 code · 19 blank · 20 comment · 11 complexity · 06e4cdf791277d0136984a2b7bf096c6 MD5 · raw file

  1. package com.cmcmarkets.csv
  2. import com.cmcmarkets.beans.Bean
  3. /**
  4. * Reads {@link Bean}s from csv file.
  5. *
  6. * Should:
  7. * + read file in one go
  8. * + read file line by line
  9. * - it would be useful to have "streams" of data and be able to sequence them in functional style
  10. *
  11. * + read all columns into beans as is
  12. * + read subset of columns
  13. *
  14. * - smart reader: should guess field types according to file content (try all date formats, integer, double, string)
  15. *
  16. * - detect when file content doesn't match "beanType"
  17. * - nice reader: fill fields that match
  18. * - strict reader: fail fast
  19. *
  20. * User: dima
  21. * Date: 9/2/11
  22. */
  23. class CsvReader {
  24. def header
  25. def beanType = [:]
  26. List columnsToRead = []
  27. Map columnMapping = new LinkedHashMap()
  28. static List<Bean> readCsv(File file) {
  29. new CsvReader().read(new FileReader(file))
  30. }
  31. static List<Bean> readCsv(String fileName) {
  32. new CsvReader().read(fileName)
  33. }
  34. CsvReader withBeanType(def beanType) {
  35. this.beanType = beanType
  36. this
  37. }
  38. CsvReader usingColumns(List columnsToRead) {
  39. this.columnsToRead = columnsToRead
  40. this
  41. }
  42. List<Bean> readString(String csvString) {
  43. read(new StringReader(csvString))
  44. }
  45. List<Bean> read(String fileName) {
  46. read(new FileReader(fileName))
  47. }
  48. List<Bean> read(Reader inputReader) {
  49. def result = []
  50. readEachLine(inputReader) { result << it }
  51. result
  52. }
  53. def readEachLine(String fileName, Closure closure) {
  54. readEachLine(new FileReader(fileName), closure)
  55. }
  56. def readEachLine(Reader inputReader, Closure closure) {
  57. inputReader.eachLine { line, i ->
  58. if (i == 1) {
  59. header = readHeader(line) // TODO throw exception if this line is not what expected (like empty line)
  60. prepareHeaderMapping()
  61. } else {
  62. closure.call(readBean(line))
  63. }
  64. }
  65. }
  66. private def readBean(String s) {
  67. def values = splitIntoValues(s)
  68. if (values.size() < header.size())
  69. throw new IllegalStateException("Too few values in line \"${s}\". Header has ${header.size()} column, but read ${values.size()} values.")
  70. if (values.size() > header.size()) throw new IllegalStateException("Too many values in line \"${s}\"")
  71. def map = [:]
  72. columnMapping.each {
  73. map.put(it.key, values[it.value])
  74. }
  75. new Bean(map, beanType)
  76. }
  77. static def splitIntoValues(String s) {
  78. (s =~ /"(.*?)",|"(.*)"$|(.*?),|(.+)$|(?<=,)(.*)$/)
  79. .collect { it[1..5].find{ it != null }.replaceAll("\"\"", "\"") }
  80. }
  81. private def readHeader(String s) {
  82. s.split(",").toList().collect {
  83. if (it.startsWith("\"") && it.endsWith("\"")) {
  84. it[1..-2]
  85. } else {
  86. it
  87. }
  88. }
  89. }
  90. private def prepareHeaderMapping() {
  91. if (!header.containsAll(columnsToRead)) throw new IllegalStateException()
  92. columnMapping = new LinkedHashMap()
  93. header.eachWithIndex {columnName, columnIndex -> columnMapping.put(columnName, columnIndex) }
  94. if (!columnsToRead.empty) {
  95. columnMapping = columnMapping.findAll { columnsToRead.contains(it.key) }
  96. }
  97. }
  98. }