/src/main/groovy/groovybeans/csv/CsvReader.groovy

http://github.com/dkandalov/groovy-beans · Groovy · 114 lines · 78 code · 19 blank · 17 comment · 11 complexity · 1b2730224227848da13adf4018c1849f MD5 · raw file

  1. package groovybeans.csv
  2. import groovybeans.beans.Bean
  3. /**
  4. * Reads {@link Bean}s from csv file.
  5. *
  6. * Should:
  7. * + read file in one go
  8. * + read file line by line
  9. * - it would be useful to have "streams" of data and be able to sequence them in functional style
  10. *
  11. * + read all columns into beans as is
  12. * + read subset of columns
  13. *
  14. * - smart reader: should guess field types according to file content (try all date formats, integer, double, string)
  15. *
  16. * - detect when file content doesn't match "beanType"
  17. * - nice reader: fill fields that match
  18. * - strict reader: fail fast
  19. */
  20. class CsvReader {
  21. def header
  22. def beanType = [:]
  23. List columnsToRead = []
  24. Map columnMapping = new LinkedHashMap()
  25. static List<Bean> readCsv(File file) {
  26. new CsvReader().read(new FileReader(file))
  27. }
  28. static List<Bean> readCsv(String fileName) {
  29. new CsvReader().read(fileName)
  30. }
  31. CsvReader withBeanType(def beanType) {
  32. this.beanType = beanType
  33. this
  34. }
  35. CsvReader usingColumns(List columnsToRead) {
  36. this.columnsToRead = columnsToRead
  37. this
  38. }
  39. List<Bean> readString(String csvString) {
  40. read(new StringReader(csvString))
  41. }
  42. List<Bean> read(String fileName) {
  43. read(new FileReader(fileName))
  44. }
  45. List<Bean> read(Reader inputReader) {
  46. def result = []
  47. readEachLine(inputReader) { result << it }
  48. result
  49. }
  50. def readEachLine(String fileName, Closure closure) {
  51. readEachLine(new FileReader(fileName), closure)
  52. }
  53. def readEachLine(Reader inputReader, Closure closure) {
  54. inputReader.eachLine { line, i ->
  55. if (i == 1) {
  56. header = readHeader(line) // TODO throw exception if this line is not what expected (like empty line)
  57. prepareHeaderMapping()
  58. } else {
  59. closure.call(readBean(line))
  60. }
  61. }
  62. }
  63. private def readBean(String s) {
  64. def values = splitIntoValues(s)
  65. if (values.size() < header.size())
  66. throw new IllegalStateException("Too few values in line \"${s}\". Header has ${header.size()} column, but read ${values.size()} values.")
  67. if (values.size() > header.size()) throw new IllegalStateException("Too many values in line \"${s}\"")
  68. def map = [:]
  69. columnMapping.each {
  70. map.put(it.key, values[it.value])
  71. }
  72. new Bean(map, beanType)
  73. }
  74. static def splitIntoValues(String s) {
  75. (s =~ /"(.*?)",|"(.*)"$|(.*?),|(.+)$|(?<=,)(.*)$/)
  76. .collect { it[1..5].find{ it != null }.replaceAll("\"\"", "\"") }
  77. }
  78. private def readHeader(String s) {
  79. s.split(",").toList().collect {
  80. if (it.startsWith("\"") && it.endsWith("\"")) {
  81. it[1..-2]
  82. } else {
  83. it
  84. }
  85. }
  86. }
  87. private def prepareHeaderMapping() {
  88. if (!header.containsAll(columnsToRead)) throw new IllegalStateException()
  89. columnMapping = new LinkedHashMap()
  90. header.eachWithIndex {columnName, columnIndex -> columnMapping.put(columnName, columnIndex) }
  91. if (!columnsToRead.empty) {
  92. columnMapping = columnMapping.findAll { columnsToRead.contains(it.key) }
  93. }
  94. }
  95. }