/alaspatial/src/main/java/org/ala/spatial/analysis/index/RemoveOccurrenceRecords.java
Java | 140 lines | 90 code | 24 blank | 26 comment | 27 complexity | 0b017c1b21190cd8a7d07519376f78e8 MD5 | raw file
1/* 2 * To change this template, choose Tools | Templates 3 * and open the template in the editor. 4 */ 5package org.ala.spatial.analysis.index; 6 7import java.io.BufferedReader; 8import java.io.FileReader; 9import java.io.FileWriter; 10import java.util.ArrayList; 11 12/** 13 * 1. extract occurrenceId's from an occurrences.csv (A) 14 * 2. remove occurrenceId's from an occurrences.csv (B) 15 * 3. export (B) with occurrenceId's removed. 16 * 17 * id is in first column of occurrences.csv's 18 * 19 * @author Adam 20 */ 21public class RemoveOccurrenceRecords { 22 23 static String match = null; 24 25 /** 26 * arg[0] is occurrences file for id extraction 27 * arg[1] is occurrences file for id removal 28 * arg[2] is occurrences file for export. 29 * arg[3] is optional string to match for record removal. 30 * 31 * @param args 32 */ 33 static void main(String[] args) { 34 if (args.length > 3) { 35 match = args[3]; 36 } 37 38 ArrayList<Long> ids = extractIds(args[0]); 39 java.util.Collections.sort(ids); 40 41 removeIds(args[1], args[2], ids); 42 } 43 44 private static ArrayList<Long> extractIds(String filename) { 45 ArrayList<Long> ids = new ArrayList<Long>(); 46 47 /* read occurances_csv */ 48 try { 49 BufferedReader br = new BufferedReader(new FileReader(filename)); 50 51 String s; 52 String[] sa; 53 54 while ((s = br.readLine()) != null) { 55 //check for continuation line 56 while (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') { 57 String spart = br.readLine(); 58 if (spart == null) { //same as whole line is null 59 break; 60 } else { 61 s.replace('\\', ' '); //new line is same as 'space' 62 s += spart; 63 } 64 }//repeat as necessary 65 66 sa = s.split(","); 67 68 try { 69 long l = Long.parseLong(sa[0].replace("\"", "")); 70 71 ids.add(l); 72 } catch (Exception e) { 73 } 74 } 75 br.close(); 76 } catch (Exception e) { 77 e.printStackTrace(); 78 } 79 80 return ids; 81 } 82 83 private static void removeIds(String inputFilename, String outputFilename, ArrayList<Long> ids) { 84 /* read occurances_csv */ 85 try { 86 BufferedReader br = new BufferedReader(new FileReader(inputFilename)); 87 FileWriter fw = new FileWriter(outputFilename); 88 89 String s; 90 String[] sa; 91 92 int idsRemoved = 0; 93 int stringMatchesRemoved = 0; 94 95 while ((s = br.readLine()) != null) { 96 //check for continuation line 97 while (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') { 98 String spart = br.readLine(); 99 if (spart == null) { //same as whole line is null 100 break; 101 } else { 102 s.replace('\\', ' '); //new line is same as 'space' 103 s += spart; 104 } 105 }//repeat as necessary 106 107 sa = s.split(","); 108 109 boolean export = true; 110 try { 111 long l = Long.parseLong(sa[0].replace("\"", "")); 112 113 if (java.util.Collections.binarySearch(ids, l) >= 0) { 114 //don't export 115 export = false; 116 idsRemoved++; 117 } else if (stringMatch(s)) { 118 export = false; 119 stringMatchesRemoved++; 120 } 121 } catch (Exception e) { 122 } 123 124 if (export) { 125 fw.append(s).append("\n"); 126 } 127 } 128 129 System.out.println("ids Removed: " + idsRemoved + ", string matches Removed: " + stringMatchesRemoved); 130 br.close(); 131 fw.close(); 132 } catch (Exception e) { 133 e.printStackTrace(); 134 } 135 } 136 137 private static boolean stringMatch(String s) { 138 return match == null || s.contains(match); 139 } 140}