/alaspatial/src/main/java/org/ala/spatial/analysis/service/SamplingService.java

http://alageospatialportal.googlecode.com/ · Java · 613 lines · 367 code · 94 blank · 152 comment · 111 complexity · a85e5ad0c15892522f62b4bd7bcc212c MD5 · raw file

  1. package org.ala.spatial.analysis.service;
  2. import java.io.File;
  3. import java.io.FileWriter;
  4. import java.util.ArrayList;
  5. import org.ala.spatial.analysis.index.BoundingBoxes;
  6. import org.ala.spatial.analysis.index.IndexedRecord;
  7. import org.ala.spatial.analysis.index.OccurrenceRecordNumbers;
  8. import org.ala.spatial.analysis.index.OccurrencesCollection;
  9. import org.ala.spatial.analysis.index.OccurrencesFilter;
  10. import org.ala.spatial.analysis.index.SpeciesColourOption;
  11. import org.ala.spatial.util.AnalysisJobSampling;
  12. import org.ala.spatial.util.Layers;
  13. import org.ala.spatial.util.OccurrencesFieldsUtil;
  14. import org.ala.spatial.util.SimpleRegion;
  15. import org.ala.spatial.util.SpatialLogger;
  16. import org.ala.spatial.util.TabulationSettings;
  17. /**
  18. * service for returning occurrences + optional values from layer intersections
  19. *
  20. * @author adam
  21. *
  22. */
  23. public class SamplingService {
  24. public static SamplingService newForLSID(String lsid) {
  25. if (SamplingLoadedPointsService.isLoadedPointsLSID(lsid)) {
  26. return new SamplingLoadedPointsService();
  27. } else {
  28. return new SamplingService();
  29. }
  30. }
  31. /**
  32. * constructor init
  33. */
  34. SamplingService() {
  35. TabulationSettings.load();
  36. }
  37. /**
  38. * gets samples; occurrences records + optional intersecting layer values,
  39. *
  40. *
  41. * limit output
  42. *
  43. * @param filter species name as String
  44. * @param layers list of layer names of additional data to include as String []
  45. * @param region region to restrict results as SimpleRegion
  46. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  47. * @param max_rows upper limit of records to return as int
  48. * @return samples as grid, String [][]
  49. */
  50. public String sampleSpeciesAsCSV(String filter, String[] layers, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, int max_rows) {
  51. return sampleSpeciesAsCSV(filter, layers, region, records, max_rows, null);
  52. }
  53. /**
  54. * gets samples; occurrences records + optional intersecting layer values,
  55. *
  56. *
  57. * limit output
  58. *
  59. * @param filter species name as String
  60. * @param layers list of layer names of additional data to include as String []
  61. * @param region region to restrict results as SimpleRegion
  62. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  63. * @param max_rows upper limit of records to return as int
  64. * @return samples as grid, String [][]
  65. */
  66. public String[][] sampleSpecies(String filter, String[] layers, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, int max_rows) {
  67. return sampleSpecies(filter, layers, region, records, max_rows, null);
  68. }
  69. public String getHeader(String[] layers) {
  70. StringBuffer header = new StringBuffer();
  71. OccurrencesFieldsUtil ofu = new OccurrencesFieldsUtil();
  72. for (String s : ofu.getOutputColumnNames()) {
  73. header.append(s).append(",");
  74. }
  75. if (layers != null) {
  76. for (String l : layers) {
  77. header.append(Layers.layerNameToDisplayName(l)).append(",");
  78. }
  79. }
  80. header.deleteCharAt(header.length() - 1); //take off end ','
  81. return header.toString();
  82. }
  83. public String[][] sampleSpecies(String filter, String[] layers, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, int max_rows, AnalysisJobSampling job) {
  84. ArrayList<String> as = OccurrencesCollection.getFullRecords(new OccurrencesFilter(filter, region, records, layers, max_rows));
  85. //split records and append header
  86. if (as.size() > 0) {
  87. String[] header = getHeader(layers).split(",");
  88. int numCols = header.length;
  89. String[][] output = new String[as.size() + 1][numCols];
  90. //header
  91. for (int j = 0; j < header.length && j < numCols; j++) {
  92. output[0][j] = header[j];
  93. }
  94. //records
  95. for (int i = 0; i < as.size(); i++) {
  96. String[] s = as.get(i).split(",");
  97. for (int j = 0; j < s.length && j < numCols; j++) {
  98. output[i + 1][j] = s[j];
  99. }
  100. }
  101. return output;
  102. }
  103. return null;
  104. }
  105. /**
  106. * gets array of points for species (genus, etc) name matches within
  107. * a specified region
  108. *
  109. * @param filter species (genus, etc) name
  110. * @param region region to filter results by
  111. * @param records sorted pool of records to intersect with as int []
  112. * @return points as double[], first is longitude, every second is latitude.
  113. */
  114. public double[] sampleSpeciesPoints(String filter, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records) {
  115. //test on bounding box
  116. double[] bb = BoundingBoxes.getLsidBoundingBoxDouble(filter);
  117. double[][] regionbb = region.getBoundingBox();
  118. if (bb != null && regionbb != null
  119. && bb[0] <= regionbb[1][0] && bb[2] >= regionbb[0][0]
  120. && bb[1] <= regionbb[1][1] && bb[3] >= regionbb[0][1]) {
  121. return OccurrencesCollection.getPoints(new OccurrencesFilter(filter, region, records, TabulationSettings.MAX_RECORD_COUNT_CLUSTER));
  122. }
  123. return null;
  124. }
  125. /**
  126. * gets array of points for species (genus, etc) name matches within
  127. * a specified region
  128. *
  129. * can return other field or sampling for points returned
  130. *
  131. * @param filter species (genus, etc) name
  132. * @param region region to filter results by
  133. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  134. * @return points as double[], first is longitude, every second is latitude.
  135. */
  136. public double[] sampleSpeciesPoints(String filter, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, ArrayList<SpeciesColourOption> extra) {
  137. //test on bounding box
  138. double[] bb = BoundingBoxes.getLsidBoundingBoxDouble(filter);
  139. if (region == null) {
  140. return OccurrencesCollection.getPoints(new OccurrencesFilter(filter, region, records, TabulationSettings.MAX_RECORD_COUNT_CLUSTER), extra);
  141. }
  142. double[][] regionbb = region.getBoundingBox();
  143. if (bb != null && bb[0] <= regionbb[1][0] && bb[2] >= regionbb[0][0]
  144. && bb[1] <= regionbb[1][1] && bb[3] >= regionbb[0][1]) {
  145. /* get points */
  146. return OccurrencesCollection.getPoints(new OccurrencesFilter(filter, region, records, TabulationSettings.MAX_RECORD_COUNT_CLUSTER), extra);
  147. }
  148. return null;
  149. }
  150. /**
  151. * for Sensitive Coordinates
  152. *
  153. * gets array of points for species (genus, etc) name matches within
  154. * a specified region
  155. *
  156. * @param filter species (genus, etc) name
  157. * @param region region to filter results by
  158. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  159. * @return points as double[], first is longitude, every second is latitude.
  160. */
  161. public double[] sampleSpeciesPointsSensitive(String filter, SimpleRegion region, int[] records) {
  162. IndexedRecord[] ir = null;// OccurrencesIndex.filterSpeciesRecords(filter);
  163. if (ir != null && ir.length > 0) {
  164. /* get points */
  165. double[] points = null;//OccurrencesIndex.getPointsSensitive(ir[0].record_start, ir[0].record_end);
  166. /* test for region absence */
  167. if (region == null) {
  168. return points;
  169. }
  170. int i;
  171. int count = 0;
  172. int recordsPos = 0; //for test on records
  173. /* return all valid points within the region */
  174. for (i = 0; i < points.length; i += 2) {
  175. //do not add if does not intersect with records list
  176. if (records != null) {
  177. int currentRecord = i + ir[0].record_start;
  178. //increment recordsPos as required
  179. while (recordsPos < records.length
  180. && records[recordsPos] < currentRecord) {
  181. recordsPos++;
  182. }
  183. //test for intersect
  184. if (recordsPos >= records.length
  185. || currentRecord != records[recordsPos]) {
  186. continue;
  187. }
  188. }
  189. //region test
  190. if (region.isWithin(points[i], points[i + 1])) {
  191. count += 2;
  192. } else {
  193. points[i] = Double.NaN;
  194. }
  195. }
  196. //move into 'output'
  197. if (count > 0) {
  198. double[] output = new double[count];
  199. int p = 0;
  200. for (i = 0; i < points.length; i += 2) {
  201. if (!Double.isNaN(points[i])) {
  202. output[p++] = points[i];
  203. output[p++] = points[i + 1];
  204. }
  205. }
  206. return output;
  207. }
  208. }
  209. return null;
  210. }
  211. /**
  212. * for Sensitive Coordinates
  213. *
  214. * gets array of points for species (genus, etc) name matches within
  215. * a specified region
  216. *
  217. * removes points for all species that are sensitive
  218. *
  219. * @param filter species (genus, etc) name
  220. * @param region region to filter results by
  221. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  222. * @return points as double[], first is longitude, every second is latitude.
  223. */
  224. public double[] sampleSpeciesPointsMinusSensitiveSpecies(String filter, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, StringBuffer removedSpecies) {
  225. /* get points */
  226. return OccurrencesCollection.getPointsMinusSensitiveSpecies(new OccurrencesFilter(filter, region, records, TabulationSettings.MAX_RECORD_COUNT_CLUSTER), removedSpecies);
  227. }
  228. /**
  229. * for Sensitive Records
  230. *
  231. * Checks if the records are sensitive within filter range
  232. *
  233. * @param filter species (genus, etc) name
  234. * @param region region to filter results by
  235. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  236. * @return int
  237. * 0 when non-sensitive and has records,
  238. * 1 when sensitive or no records,
  239. * -1 when cannot be determined
  240. */
  241. public static int isSensitiveRecord(String filter, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records) {
  242. StringBuffer sb = new StringBuffer();
  243. try {
  244. double[] d = OccurrencesCollection.getPointsMinusSensitiveSpecies(new OccurrencesFilter(filter, region, records, TabulationSettings.MAX_RECORD_COUNT_CLUSTER), sb);
  245. if (d == null) {
  246. return 1;
  247. } else {
  248. return 0;
  249. }
  250. } catch (Exception e) {
  251. e.printStackTrace();
  252. }
  253. return -1;
  254. }
  255. /**
  256. * gets samples; occurrences records + optional intersecting layer values,
  257. *
  258. *
  259. * limit output
  260. *
  261. * @param filter species name as String
  262. * @param layers list of layer names of additional data to include as String []
  263. * @param region region to restrict results as SimpleRegion
  264. * @param records sorted pool of records to intersect with as ArrayList<Integer>
  265. * @param max_rows upper limit of records to return as int
  266. * @return samples as grid, String [][]
  267. */
  268. public String sampleSpeciesAsCSV(String species, String[] layers, SimpleRegion region, ArrayList<OccurrenceRecordNumbers> records, int max_rows, AnalysisJobSampling job) {
  269. try {
  270. System.out.println("Limiting sampling to : " + max_rows);
  271. String[][] results = sampleSpecies(species, layers, region, records, max_rows);
  272. StringBuilder sbResults = new StringBuilder();
  273. for (int i = 0; i < results.length; i++) {
  274. for (int j = 0; j < results[i].length; j++) {
  275. if (results[i][j] != null) {
  276. sbResults.append(results[i][j]);
  277. }
  278. if (j < results[i].length - 1) {
  279. sbResults.append(",");
  280. }
  281. }
  282. sbResults.append("\r\n");
  283. }
  284. /* open output file */
  285. File temporary_file = java.io.File.createTempFile("sample", ".csv");
  286. FileWriter fw = new FileWriter(temporary_file);
  287. fw.append(sbResults.toString());
  288. fw.close();
  289. return temporary_file.getPath();
  290. } catch (Exception e) {
  291. System.out.println("error with samplesSpeciesAsCSV:");
  292. e.printStackTrace(System.out);
  293. }
  294. return "";
  295. }
  296. public static String getLSIDAsGeoJSON(String lsid, File outputpath) {
  297. if (SamplingLoadedPointsService.isLoadedPointsLSID(lsid)) {
  298. return getLSIDAsGeoJSON(lsid, outputpath);
  299. }
  300. int i;
  301. /* get samples records from records indexes */
  302. String[][] samples = (new SamplingService()).sampleSpecies(lsid, null, null, null, TabulationSettings.MAX_RECORD_COUNT_DOWNLOAD, null);
  303. StringBuffer sbGeoJSON = new StringBuffer();
  304. sbGeoJSON.append("{");
  305. sbGeoJSON.append(" \"type\": \"FeatureCollection\",");
  306. sbGeoJSON.append(" \"features\": [");
  307. for (i = 1; i < samples.length; i++) {
  308. String s = getRecordAsGeoJSON(samples, i);
  309. if (s != null) {
  310. sbGeoJSON.append(s);
  311. if (i < samples.length - 1) {
  312. sbGeoJSON.append(",");
  313. }
  314. }
  315. }
  316. sbGeoJSON.append(" ],");
  317. sbGeoJSON.append(" \"crs\": {");
  318. sbGeoJSON.append(" \"type\": \"EPSG\",");
  319. sbGeoJSON.append(" \"properties\": {");
  320. sbGeoJSON.append(" \"code\": \"4326\"");
  321. sbGeoJSON.append(" }");
  322. sbGeoJSON.append(" }");
  323. //sbGeoJSON.append(", \"bbox\": [");
  324. //sbGeoJSON.append(" ").append(bbox[0][0]).append(",").append(bbox[0][1]).append(",").append(bbox[1][0]).append(",").append(bbox[1][1]);
  325. //sbGeoJSON.append(" ]");
  326. sbGeoJSON.append("}");
  327. /* write samples to a file */
  328. try {
  329. File temporary_file = java.io.File.createTempFile("filter_sample", ".csv", outputpath);
  330. FileWriter fw = new FileWriter(temporary_file);
  331. fw.write(sbGeoJSON.toString());
  332. fw.close();
  333. return temporary_file.getName(); //return location of temp file
  334. } catch (Exception e) {
  335. SpatialLogger.log("SamplingService: getLSIDAsGeoJSON()", e.toString());
  336. e.printStackTrace();
  337. }
  338. return "";
  339. }
  340. /**
  341. * creates a file with geojson for lsid at outputpath
  342. *
  343. * returns filename (first line), number of parts (2nd line)
  344. *
  345. * @param lsid
  346. * @param outputpath
  347. * @return
  348. */
  349. public static String getLSIDAsGeoJSONIntoParts(String lsid, File outputpath) {
  350. if (SamplingLoadedPointsService.isLoadedPointsLSID(lsid)) {
  351. return getLSIDAsGeoJSONIntoParts(lsid, outputpath);
  352. }
  353. int i;
  354. /* get samples records from records indexes */
  355. String[][] samples = (new SamplingService()).sampleSpecies(lsid, null, null, null, TabulationSettings.MAX_RECORD_COUNT_DOWNLOAD, null);
  356. int max_parts_size = 2000;
  357. int count = 0;
  358. //-1 on samples.length for header
  359. int partCount = (int) Math.ceil((samples.length - 1) / (double) max_parts_size);
  360. //test for filename, return if it exists
  361. File file;
  362. String filename = outputpath + File.separator + lsid.replace(":", "_").replace(".", "_");
  363. try {
  364. file = new File(filename + "_" + (partCount - 1));
  365. if (file.exists()) {
  366. return lsid.replace(":", "_").replace(".", "_") + "\n" + partCount;
  367. }
  368. } catch (Exception e) {
  369. e.printStackTrace();
  370. }
  371. for (int j = 1; j < samples.length; j += max_parts_size) {
  372. StringBuffer sbGeoJSON = new StringBuffer();
  373. sbGeoJSON.append("{");
  374. sbGeoJSON.append("\"type\": \"FeatureCollection\",");
  375. sbGeoJSON.append("\"features\": [");
  376. int len = j + max_parts_size;
  377. if (len > samples.length) {
  378. len = samples.length;
  379. }
  380. for (i = j; i < len; i++) {
  381. String s = getRecordAsGeoJSON(samples, i);
  382. if (s != null) {
  383. sbGeoJSON.append(s);
  384. if (i < len - 1) {
  385. sbGeoJSON.append(",");
  386. }
  387. }
  388. }
  389. sbGeoJSON.append("],");
  390. sbGeoJSON.append("\"crs\": {");
  391. sbGeoJSON.append("\"type\": \"EPSG\",");
  392. sbGeoJSON.append("\"properties\": {");
  393. sbGeoJSON.append("\"code\": \"4326\"");
  394. sbGeoJSON.append("}");
  395. sbGeoJSON.append("}");
  396. sbGeoJSON.append("}");
  397. /* write samples to a file */
  398. try {
  399. //File temporary_file = java.io.File.createTempFile("filter_sample", ".csv", outputpath);
  400. FileWriter fw = new FileWriter(
  401. filename + "_" + count);
  402. count++;
  403. fw.write(sbGeoJSON.toString());
  404. fw.close();
  405. //return temporary_file.getName(); //return location of temp file
  406. } catch (Exception e) {
  407. SpatialLogger.log("SamplingService: getLSIDAsGeoJSON()", e.toString());
  408. e.printStackTrace();
  409. }
  410. }
  411. return lsid.replace(":", "_").replace(".", "_") + "\n" + partCount;
  412. }
  413. private static String getRecordAsGeoJSON(String[][] rec, int rw) {
  414. //String[] recdata = rec.split(",");
  415. if (rec == null || rec.length <= rw || rec[rw].length <= TabulationSettings.geojson_latitude) {
  416. return null;
  417. }
  418. for (int i = 0; i < TabulationSettings.geojson_latitude; i++) {
  419. if (rec[rw][i] == null) {
  420. return null;
  421. }
  422. }
  423. StringBuffer sbRec = new StringBuffer();
  424. sbRec.append("{");
  425. sbRec.append(" \"type\":\"Feature\",");
  426. sbRec.append(" \"id\":\"occurrences.data.").append(rec[rw][TabulationSettings.geojson_id]).append("\",");
  427. sbRec.append(" \"geometry\":{");
  428. sbRec.append(" \"type\":\"Point\",");
  429. sbRec.append(" \"coordinates\":[\"").append(rec[rw][TabulationSettings.geojson_longitude]).append("\",\"").append(rec[rw][TabulationSettings.geojson_latitude].trim()).append("\"]");
  430. sbRec.append(" },");
  431. sbRec.append(" \"geometry_name\":\"the_geom\",");
  432. sbRec.append(" \"properties\":{");
  433. for (int i = 0; i < TabulationSettings.geojson_property_names.length; i++) {
  434. sbRec.append(" \"").append(TabulationSettings.geojson_property_names[i]).append("\":\"").append(rec[rw][TabulationSettings.geojson_property_fields[i]]).append("\"");
  435. if (i < TabulationSettings.geojson_property_names.length - 1) {
  436. sbRec.append(",");
  437. }
  438. }
  439. sbRec.append(" }");
  440. sbRec.append("}");
  441. return sbRec.toString();
  442. }
  443. private String[][] sampleSpeciesSmall(String filter, String[] layers, SimpleRegion region, ArrayList<Integer> records, int max_rows, AnalysisJobSampling job) {
  444. IndexedRecord[] ir = null;// OccurrencesIndex.filterSpeciesRecords(filter);
  445. if (ir != null && ir.length > 0) {
  446. if (records != null) {
  447. java.util.Collections.sort(records);
  448. }
  449. /* get points */
  450. double[] points = null;//OccurrencesIndex.getPoints(ir[0].record_start, ir[0].record_end);
  451. /* test for region absence */
  452. int i;
  453. int alen = 0;
  454. int[] a = new int[max_rows];
  455. int recordsPos = 0; //for test on records
  456. /* return all valid points within the region */
  457. for (i = 0; i < points.length && alen < max_rows; i += 2) {
  458. int currentRecord = (i / 2) + ir[0].record_start;
  459. //do not add if does not intersect with records list
  460. if (records != null) {
  461. //increment recordsPos as required
  462. while (recordsPos < records.size()
  463. && records.get(recordsPos).intValue() < currentRecord) {
  464. recordsPos++;
  465. }
  466. //test for intersect
  467. if (recordsPos >= records.size()
  468. || currentRecord != records.get(recordsPos).intValue()) {
  469. continue;
  470. }
  471. }
  472. //region test
  473. if (region == null || region.isWithin(points[i], points[i + 1])) {
  474. a[alen++] = currentRecord;
  475. }
  476. }
  477. if (alen == 0) {
  478. return null;
  479. }
  480. //filled a up to alen, get the data
  481. if (alen < max_rows) {
  482. a = java.util.Arrays.copyOf(a, alen);
  483. }
  484. String[] oi = null;
  485. ;//OccurrencesIndex.getSortedRecords(a);
  486. int layerscount = (layers == null) ? 0 : layers.length;
  487. int headercount = oi[0].split(",").length;
  488. String[][] output = new String[oi.length + 1][headercount + layerscount];//+1 for header
  489. //fill
  490. for (i = 0; i < oi.length; i++) {
  491. String[] line = oi[i].split(",");
  492. for (int j = 0; j < line.length && j < headercount; j++) {
  493. output[i + 1][j] = line[j]; //+1 for header
  494. }
  495. }
  496. for (i = 0; layers != null && i < layers.length; i++) {
  497. String[] si = null;//SamplingIndex.getRecords(layers[i], a);
  498. if (si != null) {
  499. for (int j = 0; j < si.length && j < output.length; j++) {
  500. output[j][headercount + i] = si[j];
  501. }
  502. }
  503. }
  504. //header
  505. OccurrencesFieldsUtil ofu = new OccurrencesFieldsUtil();
  506. i = 0;
  507. for (String s : ofu.getOutputColumnNames()) {
  508. output[0][i++] = s.trim();
  509. }
  510. if (layers != null) {
  511. for (String l : layers) {
  512. output[0][i++] = Layers.layerNameToDisplayName(l).trim();
  513. }
  514. }
  515. return output;
  516. }
  517. return null;
  518. }
  519. }