/alaspatial/src/main/java/org/ala/spatial/analysis/index/RemoveOccurrenceRecords.java

http://alageospatialportal.googlecode.com/ · Java · 140 lines · 90 code · 24 blank · 26 comment · 27 complexity · 0b017c1b21190cd8a7d07519376f78e8 MD5 · raw file

  1. /*
  2. * To change this template, choose Tools | Templates
  3. * and open the template in the editor.
  4. */
  5. package org.ala.spatial.analysis.index;
  6. import java.io.BufferedReader;
  7. import java.io.FileReader;
  8. import java.io.FileWriter;
  9. import java.util.ArrayList;
  10. /**
  11. * 1. extract occurrenceId's from an occurrences.csv (A)
  12. * 2. remove occurrenceId's from an occurrences.csv (B)
  13. * 3. export (B) with occurrenceId's removed.
  14. *
  15. * id is in first column of occurrences.csv's
  16. *
  17. * @author Adam
  18. */
  19. public class RemoveOccurrenceRecords {
  20. static String match = null;
  21. /**
  22. * arg[0] is occurrences file for id extraction
  23. * arg[1] is occurrences file for id removal
  24. * arg[2] is occurrences file for export.
  25. * arg[3] is optional string to match for record removal.
  26. *
  27. * @param args
  28. */
  29. static void main(String[] args) {
  30. if (args.length > 3) {
  31. match = args[3];
  32. }
  33. ArrayList<Long> ids = extractIds(args[0]);
  34. java.util.Collections.sort(ids);
  35. removeIds(args[1], args[2], ids);
  36. }
  37. private static ArrayList<Long> extractIds(String filename) {
  38. ArrayList<Long> ids = new ArrayList<Long>();
  39. /* read occurances_csv */
  40. try {
  41. BufferedReader br = new BufferedReader(new FileReader(filename));
  42. String s;
  43. String[] sa;
  44. while ((s = br.readLine()) != null) {
  45. //check for continuation line
  46. while (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') {
  47. String spart = br.readLine();
  48. if (spart == null) { //same as whole line is null
  49. break;
  50. } else {
  51. s.replace('\\', ' '); //new line is same as 'space'
  52. s += spart;
  53. }
  54. }//repeat as necessary
  55. sa = s.split(",");
  56. try {
  57. long l = Long.parseLong(sa[0].replace("\"", ""));
  58. ids.add(l);
  59. } catch (Exception e) {
  60. }
  61. }
  62. br.close();
  63. } catch (Exception e) {
  64. e.printStackTrace();
  65. }
  66. return ids;
  67. }
  68. private static void removeIds(String inputFilename, String outputFilename, ArrayList<Long> ids) {
  69. /* read occurances_csv */
  70. try {
  71. BufferedReader br = new BufferedReader(new FileReader(inputFilename));
  72. FileWriter fw = new FileWriter(outputFilename);
  73. String s;
  74. String[] sa;
  75. int idsRemoved = 0;
  76. int stringMatchesRemoved = 0;
  77. while ((s = br.readLine()) != null) {
  78. //check for continuation line
  79. while (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') {
  80. String spart = br.readLine();
  81. if (spart == null) { //same as whole line is null
  82. break;
  83. } else {
  84. s.replace('\\', ' '); //new line is same as 'space'
  85. s += spart;
  86. }
  87. }//repeat as necessary
  88. sa = s.split(",");
  89. boolean export = true;
  90. try {
  91. long l = Long.parseLong(sa[0].replace("\"", ""));
  92. if (java.util.Collections.binarySearch(ids, l) >= 0) {
  93. //don't export
  94. export = false;
  95. idsRemoved++;
  96. } else if (stringMatch(s)) {
  97. export = false;
  98. stringMatchesRemoved++;
  99. }
  100. } catch (Exception e) {
  101. }
  102. if (export) {
  103. fw.append(s).append("\n");
  104. }
  105. }
  106. System.out.println("ids Removed: " + idsRemoved + ", string matches Removed: " + stringMatchesRemoved);
  107. br.close();
  108. fw.close();
  109. } catch (Exception e) {
  110. e.printStackTrace();
  111. }
  112. }
  113. private static boolean stringMatch(String s) {
  114. return match == null || s.contains(match);
  115. }
  116. }