PageRenderTime 51ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/ifcSoft/model/dataSet/RawData.java

https://code.google.com/p/ifcsoft/
Java | 555 lines | 365 code | 78 blank | 112 comment | 61 complexity | f5f5cbd30262d75f74c15f89ecebb8b1 MD5 | raw file
Possible License(s): GPL-3.0
  1. /**
  2. * Copyright (C) 2011 Kyle Thayer <kyle.thayer AT gmail.com>
  3. *
  4. * This file is part of the IFCSoft project (http://ifcsoft.com)
  5. *
  6. * IFCSoft is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. package ifcSoft.model.dataSet;
  20. import FloCK.FCSLoader.FCSLoader;
  21. import FloCK.FCSLoader.Stats;
  22. import java.io.BufferedReader;
  23. import java.io.DataInputStream;
  24. import java.io.FileReader;
  25. import java.io.IOException;
  26. import java.io.InputStreamReader;
  27. import java.net.URL;
  28. import java.net.URLConnection;
  29. import java.util.LinkedList;
  30. /**
  31. *
  32. * @author Kyle Thayer <kthayer@emory.edu>
  33. */
  34. public class RawData extends DataSet {
  35. private String fileName;
  36. private String[] columnLabels;
  37. private LinkedList<float[][]> data = new LinkedList<float[][]>();
  38. private int length;
  39. private boolean hasNames = false;
  40. private LinkedList<String[]> dataNames = new LinkedList<String[]>();
  41. private boolean didload = false;
  42. private int loadProgress = 0;
  43. private FCSLoader fcs; //needs to be an object variable so progress can be checked
  44. /**
  45. * Constructor that takes a file
  46. * I'll want to add another constructor for copy/paste into program
  47. * @param filename
  48. */
  49. public RawData(String filename){
  50. fileName = filename;
  51. //get the name of the file from the filename
  52. int lastind1 = filename.lastIndexOf('/'); //either method of directory division
  53. int lastind2 = filename.lastIndexOf('\\');
  54. if(lastind1 >= 0){
  55. if(lastind2 > lastind1){ //if "\" happened last
  56. name = filename.substring(lastind2+1);
  57. }else{
  58. name = filename.substring(lastind1+1);
  59. }
  60. }else{
  61. if(lastind2 >=0){
  62. name = filename.substring(lastind2+1);
  63. }else{ //neither slash was contained in filename
  64. name = filename;
  65. }
  66. }
  67. if(name.endsWith(".csv") || name.endsWith(".fcs")){
  68. name = name.substring(0, name.length()-4);
  69. }
  70. }
  71. /**
  72. *
  73. * @throws Exception
  74. */
  75. public void loadFile()throws Exception{
  76. DataInputStream in;
  77. if(fileName.startsWith("http") && fileName.endsWith(".csv") ){
  78. URL u = new URL(fileName);
  79. URLConnection urlCon = u.openConnection();
  80. urlCon.setDoInput(true);
  81. in = new DataInputStream(urlCon.getInputStream());
  82. BufferedReader br = new BufferedReader(new InputStreamReader(in));
  83. readCSV(br);
  84. }else if(fileName.endsWith(".csv")){
  85. BufferedReader br = new BufferedReader(new FileReader(fileName));
  86. readCSV(br);
  87. }else if(fileName.endsWith(".fcs")){
  88. fcs = new FCSLoader();
  89. Stats stats = fcs.ReadFCS(fileName);
  90. double [][] dataArray = stats.DataArray;
  91. columnLabels = stats.ChannelNames;
  92. System.out.println("data.length = "+dataArray.length + " data[0].length = "+dataArray[0].length);
  93. System.out.print("columnLabels:");
  94. for(int i = 0; i < columnLabels.length; i++){
  95. System.out.print(columnLabels[i] +", ");
  96. }
  97. System.out.println();
  98. numValsInDim = new int[columnLabels.length];
  99. mins = new float[columnLabels.length];
  100. maxes = new float[columnLabels.length];
  101. means = new double[columnLabels.length];
  102. for(int k = 0; k < columnLabels.length; k++){
  103. mins[k] = Float.MAX_VALUE;
  104. maxes[k] = Float.MIN_VALUE;
  105. means[k] = 0; //for now we'll use it to keep sums
  106. }
  107. length = 0;
  108. while(length < dataArray[0].length){
  109. float[][]newSeg;
  110. if(dataArray.length - length < DataSet.SEGSIZE){
  111. newSeg = new float[dataArray[0].length - length][columnLabels.length];
  112. }else{
  113. newSeg = new float[DataSet.SEGSIZE][columnLabels.length];
  114. }
  115. for(int i = 0; i < newSeg.length; i++){
  116. for(int k = 0; k < columnLabels.length; k++){
  117. newSeg[i][k] = (float) dataArray[k][length];
  118. if(newSeg[i][k] < mins[k]){
  119. mins[k] = newSeg[i][k];
  120. }
  121. if(newSeg[i][k] > maxes[k]){
  122. maxes[k] = newSeg[i][k];
  123. }
  124. }
  125. length++;
  126. }
  127. data.add(newSeg);
  128. }
  129. findstats();
  130. System.out.println("length="+length);
  131. //find actual means
  132. for(int k = 0; k < columnLabels.length; k++){
  133. System.out.println("min "+k + "= " + mins[k]);
  134. System.out.println("maxes "+k + "= " + maxes[k]);
  135. System.out.println("mean "+k + "= " + means[k]);
  136. }
  137. didload = true;
  138. return;
  139. }
  140. }
  141. /**
  142. *
  143. * @param data
  144. * @param length
  145. * @param colLabels
  146. * @param name
  147. */
  148. public RawData(LinkedList<float[][]> data, int length, String[] colLabels, String name) {
  149. this.data = data;
  150. this.columnLabels = colLabels;
  151. this.name = name;
  152. this.length = length;
  153. numValsInDim = new int[columnLabels.length];
  154. mins = new float[colLabels.length];
  155. maxes = new float[colLabels.length];
  156. means = new double[colLabels.length];
  157. this.findstats();
  158. }
  159. /**
  160. *
  161. * @return
  162. */
  163. public boolean didLoad(){
  164. return didload;
  165. }
  166. /**
  167. *
  168. * @return
  169. */
  170. public int getProgress(){
  171. if(fcs != null){
  172. return fcs.getProgress();
  173. }
  174. return loadProgress;
  175. }
  176. /**
  177. *
  178. * @return
  179. */
  180. @Override
  181. public int getDimensions(){
  182. return columnLabels.length;
  183. }
  184. /**
  185. *
  186. * @return
  187. */
  188. @Override
  189. public int UnMaskedLength(){
  190. return length;
  191. }
  192. /**
  193. *
  194. * @param index
  195. * @return
  196. */
  197. @Override
  198. public float[] getUnMaskedVals(int index) {
  199. //find the right segment
  200. int i = 0;
  201. int sofar = 0;
  202. while(sofar + data.get(i).length <= index){
  203. sofar += data.get(i).length;
  204. i++;
  205. }
  206. //i is now pointing at the correct segment and sofar tells us how many points were in previous segments
  207. return data.get(i)[index - sofar];
  208. }
  209. @Override
  210. public String getUnMaskedPointName(int index){
  211. if(hasNames){
  212. //find the right segment
  213. int i = 0;
  214. int sofar = 0;
  215. while(sofar + data.get(i).length <= index){
  216. sofar += data.get(i).length;
  217. i++;
  218. }
  219. //i is now pointing at the correct segment and sofar tells us how many points were in previous segments
  220. return dataNames.get(i)[index - sofar];
  221. }
  222. return ""+index;
  223. }
  224. @Override
  225. public boolean hasPointNames(){
  226. return hasNames;
  227. }
  228. /**
  229. *
  230. * @return
  231. */
  232. @Override
  233. public String[] getColLabels() {
  234. return columnLabels;
  235. }
  236. /**
  237. *
  238. * @return
  239. */
  240. public String getFileName() {
  241. return fileName;
  242. }
  243. /**
  244. *
  245. * @param index
  246. * @return
  247. */
  248. @Override
  249. public String getUnMaskedPointSetName(int index) {
  250. return name;
  251. }
  252. /**
  253. *
  254. * @return
  255. */
  256. @Override
  257. public LinkedList<DataSet> getParents() {
  258. return new LinkedList<DataSet>(); //there are no parents of a raw data set;
  259. }
  260. /**
  261. *
  262. * @return
  263. */
  264. @Override
  265. public LinkedList<String> getRawSetNames() {
  266. LinkedList<String>temp = new LinkedList<String>();
  267. temp.add(name);
  268. return temp;
  269. }
  270. @Override
  271. public LinkedList<DataSet> getRawSets() {
  272. LinkedList<DataSet>temp = new LinkedList<DataSet>();
  273. temp.add(this);
  274. return temp;
  275. }
  276. /**
  277. *
  278. * @param dataSet
  279. * @param lastRemoved
  280. */
  281. @Override
  282. public void parentPointsRemoved(DataSet dataSet, DataSetMaskRemoved lastRemoved) {
  283. throw new UnsupportedOperationException("The program should never get here.");
  284. }
  285. /*************************************************************/
  286. /* File reading stuff */
  287. private void readCSV(BufferedReader br) {
  288. String line = null;
  289. LinkedList<float[]> tempData = new LinkedList<float[]>(); //it is read into here, first, then stuck in the data array
  290. LinkedList<String> tempNames = new LinkedList<String>();
  291. try{
  292. line = br.readLine();
  293. }catch(IOException ex){
  294. System.out.println("IO Error:" + ex);
  295. return;
  296. }
  297. length = 0;
  298. int dataRows = 0;
  299. int fileRowCounter = 0;
  300. String st[];
  301. while(line != null){
  302. line = line.replaceAll(",,", ", ,"); //make sure at least a space between each comma
  303. if(line.endsWith(",")){//if it ends with a comma, an extra space will make
  304. line = line + " "; //sure the split gives the right number of elements
  305. }
  306. st = line.split(",");
  307. //if the column labels aren't set, then this is the first read
  308. if(columnLabels == null){
  309. columnLabels = st;
  310. if(columnLabels[0].equalsIgnoreCase("name") || columnLabels[0].equalsIgnoreCase("file")){
  311. hasNames = true;
  312. String temp[] = new String[columnLabels.length-1];
  313. for(int i=0; i < temp.length; i++){
  314. temp[i] = columnLabels[i+1];
  315. }
  316. columnLabels = temp;
  317. }
  318. columnLabels = fixQuotes(columnLabels);
  319. }else{//the column labels have been read
  320. float[] thisrow = new float[columnLabels.length];
  321. boolean didLoadRow = true;
  322. String pointName = null;
  323. if(hasNames){
  324. pointName = st[0];
  325. if(st.length-1 != columnLabels.length){ //if it uses name, the file has one extra column
  326. System.out.println("Error reading FCS, columns in row"
  327. +dataRows+" didn't match");
  328. didLoadRow = false;
  329. }
  330. }else{
  331. if(st.length != columnLabels.length){
  332. System.out.println("Error reading FCS, columns in row"
  333. +dataRows+" didn't match");
  334. didLoadRow = false;
  335. }
  336. }
  337. try{
  338. for(int i = 0; i < columnLabels.length; i++){
  339. if(hasNames){
  340. if(st[i+1].trim().length() == 0){ //if no number is empty
  341. thisrow[i] = Float.NaN;
  342. }else{
  343. thisrow[i] = Float.parseFloat(st[i+1]); //start one over on file if first is name
  344. }
  345. }else{
  346. if(st[i].trim().length() == 0){ //if no number is empty
  347. thisrow[i] = Float.NaN;
  348. }else{
  349. thisrow[i] = Float.parseFloat(st[i]);
  350. }
  351. }
  352. }
  353. }catch(Exception e){
  354. didLoadRow = false;
  355. System.out.println("Error reading FCS, couldn't parse floats in row"
  356. +fileRowCounter);
  357. }
  358. fileRowCounter++;
  359. if(didLoadRow){
  360. tempData.add(thisrow);
  361. if(hasNames){
  362. tempNames.add(pointName);
  363. }
  364. dataRows++;
  365. loadProgress++;
  366. if(dataRows == SEGSIZE){//we'll read them into groups of MAXPERSEG (to try not to crash)
  367. addSegToData(tempData, tempNames, dataRows);
  368. //reset for next dataSegment
  369. tempData = new LinkedList<float[]>();
  370. tempNames = new LinkedList<String>();
  371. dataRows = 0;
  372. }
  373. }
  374. }
  375. try{
  376. line = br.readLine();
  377. if(line == null){ //if end of file
  378. break;
  379. }
  380. }catch(IOException ex){
  381. //if(ex.)
  382. System.out.println("IO Error:" + ex);
  383. return;
  384. }
  385. }
  386. if(dataRows > 0){ //if the last pass read some data, add it
  387. addSegToData(tempData, tempNames, dataRows);
  388. //use this to clear the linked list
  389. tempData = null;
  390. }
  391. findstats(); //since my live rolling average failed due to rounding, I'll try
  392. System.out.println("length="+length);
  393. //find actual means
  394. for(int k = 0; k < columnLabels.length; k++){
  395. System.out.println("min "+k + "= " + mins[k]);
  396. System.out.println("maxes "+k + "= " + maxes[k]);
  397. System.out.println("mean "+k + "= " + means[k]);
  398. }
  399. didload = true;
  400. return;
  401. }
  402. private void addSegToData(LinkedList<float[]> tempData, LinkedList<String> tempNames, int dataRows) {
  403. //I've read the data into a linked list, but I really want to access it as
  404. //an array, so I will now copy that over.
  405. //while I'm at it, I'll find the min and max of each data thingy
  406. float[][] dataSeg = new float[dataRows][];
  407. String[] nameSeg = new String[dataRows];
  408. if(mins == null){
  409. numValsInDim = new int[columnLabels.length];
  410. mins = new float[columnLabels.length];
  411. maxes = new float[columnLabels.length];
  412. means = new double[columnLabels.length];
  413. for(int k = 0; k < columnLabels.length; k++){
  414. mins[k] = Float.MAX_VALUE;
  415. maxes[k] = Float.MIN_VALUE;
  416. means[k] = 0; //for now we'll use it to keep sums
  417. }
  418. }
  419. for(int i = 0; i < dataRows; i++){
  420. dataSeg[i] = tempData.removeFirst();
  421. if(hasNames){
  422. nameSeg[i] = tempNames.removeFirst();
  423. }
  424. //while we're adding the data, calculate the means and average
  425. length ++;
  426. for(int k=0; k < columnLabels.length; k++){
  427. //means[k] += means[k]*((1.0 - length) / length) + dataSeg[i][k] / (1.0*length);
  428. //Note the means fails due to rounding errors
  429. if(dataSeg[i][k] < mins[k]){
  430. mins[k] = dataSeg[i][k];
  431. }
  432. if(dataSeg[i][k] > maxes[k]){
  433. maxes[k] = dataSeg[i][k];
  434. }
  435. }
  436. }
  437. data.add(dataSeg);
  438. if(hasNames){
  439. dataNames.add(nameSeg);
  440. }
  441. System.out.println("length after seg "+ data.size()+": "+ length);
  442. }
  443. /**
  444. * In a csv file, column labels may have a comma in them and be surrounded by quotes.
  445. * eg. name, "(0,0)", "(0,1)", etc.
  446. * This should deal with it in most cases, though I'm not sure what the rule is when
  447. * you have more complicated labels with quotes and commas next to each other
  448. * @param columnLabels
  449. * @return
  450. */
  451. private String[] fixQuotes(String[] columnLabels) {
  452. LinkedList<String> newLabels = new LinkedList<String>();
  453. String currentString = null;
  454. for(int i = 0; i < columnLabels.length; i++){
  455. if(currentString == null){
  456. if(columnLabels[i].startsWith("\"")){ //start of a special case
  457. currentString = columnLabels[i];
  458. if(columnLabels[i].endsWith("\"")){ //if it also ends with a "
  459. newLabels.add(currentString);
  460. currentString = null;
  461. }
  462. }else{ //normal string value
  463. newLabels.add(columnLabels[i]);
  464. }
  465. }else{ //we are trying to find the end of the string value (ends with ")
  466. if(columnLabels[i].endsWith("\"")){ //end of the string value
  467. currentString+= ","+columnLabels[i];
  468. newLabels.add(currentString);
  469. currentString = null;
  470. }else{ //continuing the middle
  471. currentString+= ","+columnLabels[i];
  472. }
  473. }
  474. }
  475. return (String[]) newLabels.toArray(new String[0]);
  476. }
  477. }