/edu/uncc/parsets/util/BatchConvert.java

https://code.google.com/p/parsets/ · Java · 120 lines · 75 code · 16 blank · 29 comment · 15 complexity · f2a3c9297e07ffa8968a105c6aa27a1c MD5 · raw file

  1. package edu.uncc.parsets.util;
  2. import java.io.File;
  3. import java.util.ArrayList;
  4. import java.util.List;
  5. import edu.uncc.parsets.data.JSONExport;
  6. import edu.uncc.parsets.data.LocalDB;
  7. import edu.uncc.parsets.data.LocalDBDataSet;
  8. import edu.uncc.parsets.data.old.CSVDataSet;
  9. import edu.uncc.parsets.data.old.CSVParser;
  10. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
  11. * Copyright (c) 2009, Robert Kosara, Caroline Ziemkiewicz,
  12. * and others (see Authors.txt for full list)
  13. * All rights reserved.
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions are met:
  17. *
  18. * * Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * * Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * * Neither the name of UNC Charlotte nor the names of its contributors
  24. * may be used to endorse or promote products derived from this software
  25. * without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY ITS AUTHORS ''AS IS'' AND ANY
  28. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  29. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
  31. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  32. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  33. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  34. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  35. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  36. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37. \* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  38. public class BatchConvert {
  39. private static final String TEMPDBFILENAME = "temp.db";
  40. private static final String USAGE = "Usage: convert <srcdir> <destdir>";
  41. public static final String BASEURL = "http://data.eagereyes.org/";
  42. public static void batchConvert(String args[]) {
  43. if (args.length != 3 || !args[0].equals("convert")) {
  44. System.err.println(USAGE);
  45. return;
  46. }
  47. File tempDBFile = new File(TEMPDBFILENAME);
  48. if (tempDBFile.exists())
  49. if (!tempDBFile.delete())
  50. System.err.println("Could not delete temporary database "+TEMPDBFILENAME);
  51. File srcDir = new File(args[1]);
  52. File dstDir = new File(args[2]);
  53. LocalDB tempDB = new LocalDB(TEMPDBFILENAME);
  54. List<File> csvFiles = scanDir(srcDir, dstDir);
  55. for (File f : csvFiles) {
  56. CSVParser parser = new CSVParser(f.getPath(), null);
  57. Thread t = parser.analyzeCSVFile();
  58. try {
  59. t.join();
  60. } catch (InterruptedException e) {
  61. e.printStackTrace();
  62. }
  63. CSVDataSet csvData = parser.getDataSet();
  64. System.out.println("Importing "+csvData.getName()+" ...");
  65. String newPath = rebase(f.getPath(), dstDir.getPath())+".json.gz";
  66. csvData.setURL(BASEURL+newPath);
  67. parser.streamToDB(tempDB);
  68. }
  69. for (LocalDBDataSet ds : tempDB.getDataSets()) {
  70. String newPath = ds.getURL().substring(BASEURL.length());
  71. System.out.println("Exporting "+ds.getName()+" to "+newPath);
  72. JSONExport.exportDataSet(ds, newPath);
  73. }
  74. System.out.println("Exporting index ...");
  75. String indexName = JSONExport.exportDBIndex(tempDB, dstDir.getPath()+File.separatorChar+"index");
  76. System.out.println("Index: "+indexName);
  77. System.out.println("Done.");
  78. if (!tempDBFile.delete())
  79. System.err.println("Could not delete temporary database "+TEMPDBFILENAME);
  80. }
  81. private static String rebase(String original, String newBase) {
  82. StringBuilder newPath = new StringBuilder(newBase);
  83. newPath.append(original.substring(original.indexOf(File.separatorChar), original.lastIndexOf(".")));
  84. return newPath.toString();
  85. }
  86. private static List<File> scanDir(File srcDir, File dstDir) {
  87. if (!dstDir.exists())
  88. if (!dstDir.mkdir())
  89. System.err.println("Could not create directory "+dstDir.getPath());
  90. List<File> files = new ArrayList<File>();
  91. for (File f : srcDir.listFiles()) {
  92. if (f.isDirectory()) {
  93. if (!f.getName().startsWith(".")) {
  94. File newDest = new File(dstDir.getPath()+File.separatorChar+f.getName());
  95. files.addAll(scanDir(f, newDest));
  96. }
  97. } else if (f.getName().endsWith(".csv")) {
  98. files.add(f);
  99. // System.out.println(f.getPath());
  100. }
  101. }
  102. return files;
  103. }
  104. }