PageRenderTime 60ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/sPlot/src/Splot.java

https://github.com/alphonse23/luc_splot
Java | 1750 lines | 1399 code | 252 blank | 99 comment | 325 complexity | 704c123d7ac196e27bc786bb083d59ca MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. import java.beans.PropertyChangeSupport;
  2. import java.io.BufferedReader;
  3. import java.io.BufferedWriter;
  4. import java.io.File;
  5. import java.io.FileReader;
  6. import java.io.FileWriter;
  7. import java.io.IOException;
  8. import java.io.PrintWriter;
  9. import java.text.DateFormat;
  10. import java.text.SimpleDateFormat;
  11. import java.util.Calendar;
  12. import javax.swing.JOptionPane;
  13. public class Splot {
  14. //Input
  15. private int w_size; //window size
  16. private int nmer_size;
  17. private boolean use_complementary;
  18. //Parameters
  19. private int num_possible_nmers;
  20. private float min_percent_nt_in_w = (float)0.1;
  21. private int min_num_nmers_in_w=50;
  22. //Output
  23. private int len_X;
  24. private int len_Y;
  25. private char[][] seq_inX_w; //Holds all X window sequences [# of windows][sequence for each window]
  26. private int w_num_X; //Number of windows for X sequence
  27. private char[][] seq_inY_w; //Holds all Y window sequences [# of windows][sequence for each window]
  28. private int w_num_Y; //Number of windows for Y sequence
  29. private static String[] X_geneSynonyms;
  30. private static String[] Y_geneSynonyms;
  31. private float[][] score;
  32. private int[] hisPlot;
  33. private float[] avgCorrX;
  34. private float[] avgCorrY;
  35. private float[] stdev_pearCorrX;
  36. private float[] stdev_pearCorrY;
  37. private boolean[] valid_X;
  38. private boolean[] valid_Y;
  39. private float[][] nmer_counts_X;
  40. private float[][] nmer_counts_Y;
  41. private float[] w_meanVals_X;
  42. private float[] w_meanVals_Y;
  43. private float[] X_square;
  44. private float[] Y_square;
  45. private static String X_header;
  46. private static String Y_header;
  47. //FileWriters
  48. private PrintWriter writeGC;
  49. //Sean's stuff
  50. private String args[];
  51. public final PropertyChangeSupport pcs = new PropertyChangeSupport( this );
  52. private boolean process = false;
  53. // Some OO changes
  54. public Splot(String[] args_i){
  55. /*
  56. * String[] args
  57. * 0 - Sequence X
  58. * 1 - Sequence Y
  59. * 2 - nmer_size
  60. * 3 - w_size //VALUE=-1 IF USING GENE
  61. * 4 - use_complementary
  62. * 5 - gene file X
  63. * 6 - gene file Y
  64. * 7 - Dir for scoreMatrix file
  65. * 8 - Dir for avg+stdev file
  66. * 9 - Dir for window_GC_content file
  67. * 10 - Dir for maxCorr_w_pairs file
  68. * 11 - Dir for geneScoreMatrix file
  69. * 12 - Dir for geneAvgStdev file
  70. * 13 - Dir for gene_GC_content file
  71. * 14 - Dir for maxCorr_gene_pairs file
  72. *
  73. */
  74. this.args = args_i;
  75. }
  76. public void init(){
  77. System.err.println("heLL!");
  78. int one=1;
  79. nmer_size = Integer.parseInt(args[2]);
  80. w_size = Integer.parseInt(args[3]);
  81. if(w_size!=-1)
  82. {
  83. parse_X(args[0]);
  84. if(process) return;
  85. parse_Y(args[1]);
  86. if(process) return;
  87. use_complementary = Boolean.valueOf(args[4]);
  88. score = new float[w_num_X][w_num_Y];
  89. valid_X = new boolean[w_num_X];
  90. valid_Y = new boolean[w_num_Y];
  91. num_possible_nmers = one<<(nmer_size*2);
  92. nmer_counts_X = new float[w_num_X][num_possible_nmers];
  93. nmer_counts_Y = new float[w_num_Y][num_possible_nmers];
  94. w_meanVals_X = new float[w_num_X];
  95. w_meanVals_Y = new float[w_num_Y];
  96. X_square = new float[w_num_X];
  97. Y_square = new float[w_num_Y];
  98. compute();
  99. //writeScoreMatrix(args[7]);
  100. //write_Avg_Stdev(args[8]);
  101. //write_GC_w_content(args[9]);
  102. //write_max_w_Corr(args[10]);
  103. }
  104. else
  105. {
  106. Prepare_Sequences_var_window_X(args[0], args[5]);
  107. if(process) return;
  108. Prepare_Sequences_var_window_Y(args[1], args[6]);
  109. if(process) return;
  110. use_complementary = Boolean.valueOf(args[4]);
  111. score = new float[w_num_X][w_num_Y]; //508952576 33030144
  112. valid_X = new boolean[w_num_X]; //C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\NC_009925.fna
  113. valid_Y = new boolean[w_num_Y];
  114. // TEST WITH N-MER SIZE 3
  115. num_possible_nmers = one<<(nmer_size*2);
  116. nmer_counts_X = new float[w_num_X][num_possible_nmers];
  117. nmer_counts_Y = new float[w_num_Y][num_possible_nmers];
  118. w_meanVals_X = new float[w_num_X];
  119. w_meanVals_Y = new float[w_num_Y];
  120. X_square = new float[w_num_X];
  121. Y_square = new float[w_num_Y];
  122. S_Plot_Correlation_var_windows();
  123. //writeGeneMatrix(args[11]);
  124. //write_Gene_Avg_Stdev(args[12]);
  125. //write_GC_gene_content(args[13]);
  126. //write_max_gene_Corr(args[14]);
  127. }
  128. }
  129. public void parse_X(String xSquence){
  130. File f_X = new File(xSquence);
  131. BufferedReader in = getReader(f_X);
  132. StringBuilder sb = new StringBuilder();
  133. String line = getLine(in);
  134. while (line!=null) {
  135. line = getLine(in);
  136. if(line!=null){
  137. len_X += (long)line.length();
  138. sb.append(line);
  139. }
  140. }
  141. StringBuilder sb2 = sb;
  142. w_num_X=len_X/w_size;
  143. if(((len_X%w_size)/(float)w_size)>=min_percent_nt_in_w) w_num_X++;
  144. seq_inX_w = new char[w_num_X][w_size+1];
  145. int k = 0;
  146. int j = 0;
  147. for(int i=0; i<w_num_X; i++){
  148. for(j=0; j<w_size; j++){
  149. seq_inX_w[i][j] = sb2.charAt(k);
  150. if(k<(len_X-1)){k++;}
  151. else{break;}
  152. }
  153. seq_inX_w[i][j]='\0';
  154. }
  155. }
  156. public void parse_Y(String ySequence){
  157. File f_Y = new File(ySequence);
  158. BufferedReader in = getReader(f_Y);
  159. StringBuilder sb = new StringBuilder();
  160. String line = getLine(in);
  161. while (line!=null) {
  162. line = getLine(in);
  163. if(line!=null){
  164. len_Y += (long)line.length();
  165. sb.append(line);
  166. }
  167. }
  168. StringBuilder sb2 = sb;
  169. w_num_Y=len_Y/w_size;
  170. if(((len_Y%w_size)/(float)w_size)>=min_percent_nt_in_w) w_num_Y++;
  171. seq_inY_w = new char[w_num_Y][w_size+1];
  172. int k = 0;
  173. int j = 0;
  174. for(int i=0; i<w_num_Y; i++){
  175. for(j=0; j<w_size; j++){
  176. seq_inY_w[i][j] = sb2.charAt(k);
  177. if(k<(len_Y-1)){k++;}
  178. else{break;}
  179. }
  180. seq_inY_w[i][j]='\0';
  181. }
  182. }
  183. public void getTime() {
  184. Calendar cal = Calendar.getInstance();
  185. SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SS");
  186. System.out.println(sdf.format(cal.getTime()));
  187. }
  188. public void compute(){
  189. int i, j;
  190. int n_x, n_y;
  191. char c;
  192. String sNmer = new String();
  193. boolean okay = true;
  194. /*
  195. Sean
  196. */
  197. frame.progressBar.setStringPainted(true);
  198. frame.progressBar.setMinimum(0);
  199. frame.progressBar.setMaximum(w_num_Y+w_num_X+w_num_X);
  200. ///---------------------------X
  201. for(n_x=0;n_x<w_num_X;n_x++){
  202. i=-1;
  203. for(;;){
  204. i++;
  205. if(seq_inX_w[n_x][i]=='\0') {break;}
  206. c=seq_inX_w[n_x][i];
  207. if(c=='a'||c=='A'){sNmer+="00";}
  208. else if(c=='t'||c=='T'){sNmer+="01";}
  209. else if(c=='g'||c=='G'){sNmer+="10";}
  210. else if(c=='c'||c=='C'){sNmer+="11";}
  211. else continue;
  212. for(j=1;j<nmer_size;j++){
  213. i++;
  214. if(seq_inX_w[n_x][i]=='\0'){break;}
  215. c=seq_inX_w[n_x][i];
  216. okay=true;
  217. if(c=='a'||c=='A'){sNmer+="00";}
  218. else if(c=='t'||c=='T'){sNmer+="01";}
  219. else if(c=='g'||c=='G'){sNmer+="10";}
  220. else if(c=='c'||c=='C'){sNmer+="11";}
  221. else okay=false;
  222. if(!okay){j=nmer_size;}
  223. } //first nmer found
  224. if(okay){
  225. nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
  226. }
  227. if(use_complementary&&okay){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
  228. for(;;){
  229. i++;
  230. if(seq_inX_w[n_x][i]=='\0') {
  231. i--;
  232. break; }
  233. c=seq_inX_w[n_x][i];
  234. if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
  235. else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
  236. else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
  237. else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
  238. else break;
  239. nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
  240. if(use_complementary){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
  241. }
  242. sNmer = new String();
  243. }
  244. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){
  245. w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i]; }
  246. if(w_meanVals_X[n_x]<min_num_nmers_in_w) {valid_X[n_x]=true; continue;}
  247. if(w_meanVals_X[n_x]/i<min_percent_nt_in_w) {valid_X[n_x]=true; continue;}
  248. w_meanVals_X[n_x] = w_meanVals_X[n_x]/num_possible_nmers;
  249. pcs.firePropertyChange("progress", null, n_x); // progressbar stuff
  250. }//end window loop
  251. ///---------------------Y
  252. sNmer = new String();
  253. for(n_y=0;n_y<w_num_Y;n_y++){
  254. i=-1;
  255. for(;;){
  256. i++;
  257. if(seq_inY_w[n_y][i]=='\0'){break;}
  258. c=seq_inY_w[n_y][i];
  259. if(c=='a'||c=='A'){sNmer+="00";}
  260. else if(c=='t'||c=='T'){sNmer+="01";}
  261. else if(c=='g'||c=='G'){sNmer+="10";}
  262. else if(c=='c'||c=='C'){sNmer+="11";}
  263. else continue;
  264. for(j=1;j<nmer_size;j++){
  265. i++;
  266. if(seq_inY_w[n_y][i]=='\0')break;
  267. c=seq_inY_w[n_y][i];
  268. okay=true;
  269. if(c=='a'||c=='A'){sNmer+="00";}
  270. else if(c=='t'||c=='T'){sNmer+="01";}
  271. else if(c=='g'||c=='G'){sNmer+="10";}
  272. else if(c=='c'||c=='C'){sNmer+="11";}
  273. else okay=false;
  274. if(!okay){j=nmer_size;}
  275. }//found first nmer
  276. if(okay){
  277. nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
  278. }
  279. if(use_complementary&&okay){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
  280. for(;;){
  281. i++;
  282. if(seq_inY_w[n_y][i]=='\0') {
  283. i--;
  284. break; }
  285. c=seq_inY_w[n_y][i];
  286. if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
  287. else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
  288. else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
  289. else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
  290. else break;
  291. nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
  292. if(use_complementary){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
  293. }
  294. sNmer = new String();
  295. }
  296. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];}
  297. if(w_meanVals_Y[n_y]<min_num_nmers_in_w){valid_Y[n_y]=true; continue;}
  298. if(w_meanVals_Y[n_y]/i<min_percent_nt_in_w){valid_Y[n_y]=true; continue;}
  299. w_meanVals_Y[n_y] = w_meanVals_Y[n_y]/num_possible_nmers;
  300. pcs.firePropertyChange("progress", null, (w_num_X + n_y)); // progressbar
  301. }//end of window loop
  302. ///Computations-----------X
  303. for(n_x=0;n_x<w_num_X;n_x++) // for each window on X axis loop
  304. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  305. {
  306. if(valid_X[n_x]) continue;
  307. nmer_counts_X[n_x][nmer_i]=nmer_counts_X[n_x][nmer_i]-w_meanVals_X[n_x];
  308. X_square[n_x]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_X[n_x][nmer_i];
  309. }
  310. ///Computations------------Y
  311. for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
  312. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  313. {
  314. if(valid_Y[n_y]) continue;
  315. nmer_counts_Y[n_y][nmer_i]=nmer_counts_Y[n_y][nmer_i]-w_meanVals_Y[n_y];
  316. Y_square[n_y]+=nmer_counts_Y[n_y][nmer_i]*nmer_counts_Y[n_y][nmer_i];
  317. }
  318. //Computations------------Score
  319. float minScore=0;
  320. boolean firstTime=true;
  321. for(n_x=0;n_x<w_num_X;n_x++){ // for each window on X axis loop
  322. for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
  323. {
  324. if(valid_X[n_x]){continue;}
  325. if(valid_Y[n_y]){continue;}
  326. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  327. score[n_x][n_y]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_Y[n_y][nmer_i];
  328. score[n_x][n_y]=score[n_x][n_y]/(float)Math.sqrt((X_square[n_x]*Y_square[n_y]));
  329. if(firstTime){minScore=score[n_x][n_y]; firstTime=false;}
  330. if(score[n_x][n_y]<minScore){minScore=score[n_x][n_y];}
  331. }
  332. pcs.firePropertyChange("progress", null, (w_num_X + w_num_Y + n_x)); // progressbar
  333. }
  334. //-----------------------Clear memory
  335. valid_X = null;
  336. valid_Y = null;
  337. nmer_counts_X = null;
  338. nmer_counts_Y = null;
  339. w_meanVals_X = null;
  340. w_meanVals_Y = null;
  341. X_square = null;
  342. Y_square = null;
  343. }
  344. public void Prepare_Sequences_var_window_X(String xSequence, String xGenes)
  345. {
  346. int i,j,k,l,m;
  347. boolean overOrigin=false;
  348. //genes for sequence x
  349. File f_g = new File(xGenes);
  350. BufferedReader in = getReader(f_g);
  351. String line = getLine(in); //gets the first line
  352. line=getLine(in); //gets the number of genes + word protein
  353. String [] result = line.split(" ");
  354. w_num_X=Integer.parseInt(result[0]);
  355. int[][] w_pos=new int [w_num_X][2];
  356. X_geneSynonyms = new String [w_num_X];
  357. boolean[] strand=new boolean [w_num_X];
  358. line=getLine(in); //gets the column headings
  359. for(i=0; i<w_num_X; i++)
  360. {
  361. line=getLine(in); //gets a gene
  362. j=line.indexOf("."); //location of end of start
  363. k=line.indexOf("\t"); //location of end of end
  364. l=line.indexOf("\t",k+1);
  365. l=line.indexOf("\t",l+1);
  366. l=line.indexOf("\t",l+1);
  367. l=line.indexOf("\t",l+1);
  368. l=line.indexOf("\t",l+1);
  369. m=line.lastIndexOf("\t",l-1);
  370. result[0]=line.substring(0,j);
  371. w_pos[i][0]=Integer.parseInt(result[0]);
  372. w_pos[i][0]--;
  373. result[0]=line.substring(j+2,k);
  374. w_pos[i][1]=Integer.parseInt(result[0]);
  375. w_pos[i][1]--;
  376. result[0]=line.substring(k+1,k+2);
  377. if(result[0]=="+") strand[i]=true;
  378. else strand[i]=false;
  379. X_geneSynonyms[i]=line.substring(m+1,l);
  380. }
  381. //sequence x
  382. File f_s = new File(xSequence);
  383. in = getReader(f_s);
  384. StringBuilder sb = new StringBuilder();
  385. X_header = getLine(in);
  386. while(line!=null)
  387. {
  388. line=getLine(in);
  389. if(line!=null)
  390. {
  391. len_X += (long)line.length();
  392. sb.append(line);
  393. }
  394. }
  395. StringBuilder sb2 = sb;
  396. //set up arrays
  397. seq_inX_w=new char [w_num_X][];
  398. for(i=0; i<w_num_X; i++)
  399. {
  400. if(w_pos[i][1]>=w_pos[i][0])
  401. {
  402. seq_inX_w[i]=new char[(w_pos[i][1]-w_pos[i][0]+2)]; //why add two?
  403. overOrigin=false;
  404. }
  405. else
  406. {
  407. seq_inX_w[i]=new char[(len_X-w_pos[i][0])+w_pos[i][1]+2]; //changed from (length-end)+start to (length-start)+end! -vinnie
  408. overOrigin=true;
  409. }
  410. }
  411. for(i=0; i<w_num_X; i++)
  412. {
  413. if(strand[i]) //original strand
  414. {
  415. if(overOrigin==false)
  416. {
  417. for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++){
  418. seq_inX_w[i][j]=sb2.charAt(w_pos[i][0]+j);
  419. }
  420. seq_inX_w[i][j]='\0';
  421. }
  422. else
  423. {
  424. k=0;
  425. for(j=w_pos[i][0]; j<len_X; j++)
  426. {
  427. seq_inX_w[i][k]=sb2.charAt(j);
  428. k++;
  429. }
  430. for(j=0; j<=w_pos[i][1]; j++)
  431. {
  432. seq_inX_w[i][k]=sb2.charAt(j);
  433. k++;
  434. }
  435. seq_inX_w[i][k]='\0';
  436. }
  437. }
  438. else //complementary strand
  439. {
  440. if(overOrigin==false)
  441. {
  442. for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++)
  443. {
  444. seq_inX_w[i][j]=sb2.charAt(w_pos[i][1]-w_pos[i][0]-j);
  445. switch(seq_inX_w[i][j]){
  446. case 'a': case 'A': seq_inX_w[i][j]='T'; break;
  447. case 't': case 'T': seq_inX_w[i][j]='A'; break;
  448. case 'g': case 'G': seq_inX_w[i][j]='C'; break;
  449. case 'c': case 'C': seq_inX_w[i][j]='G'; break;
  450. default: seq_inX_w[i][j]='N';
  451. }
  452. }
  453. seq_inX_w[i][j]='\0';
  454. }
  455. else
  456. {
  457. k=0;
  458. j=w_pos[i][1];
  459. while(j>=0)
  460. {
  461. seq_inX_w[i][k]= sb2.charAt(j);
  462. j--;
  463. switch(seq_inX_w[i][k]){
  464. case 'a': case 'A': seq_inX_w[i][k]='T'; break;
  465. case 't': case 'T': seq_inX_w[i][k]='A'; break;
  466. case 'g': case 'G': seq_inX_w[i][k]='C'; break;
  467. case 'c': case 'C': seq_inX_w[i][k]='G'; break;
  468. default: seq_inX_w[i][k]='N';
  469. }
  470. k++;
  471. }
  472. for(j=len_X-1; j>=w_pos[i][0]; j--)
  473. {
  474. seq_inX_w[i][k]=sb2.charAt(j);
  475. switch(seq_inX_w[i][k]){
  476. case 'a': case 'A': seq_inX_w[i][k]='T'; break;
  477. case 't': case 'T': seq_inX_w[i][k]='A'; break;
  478. case 'g': case 'G': seq_inX_w[i][k]='C'; break;
  479. case 'c': case 'C': seq_inX_w[i][k]='G'; break;
  480. default: seq_inX_w[i][k]='N';
  481. }
  482. k++;
  483. }
  484. seq_inX_w[i][k]='\0';
  485. }
  486. }
  487. }
  488. }
  489. public void Prepare_Sequences_var_window_Y(String ySequence, String yGenes)
  490. {
  491. int i,j,k,l,m;
  492. boolean overOrigin=false;
  493. //genes for sequence y
  494. File f_g = new File(yGenes);
  495. BufferedReader in = getReader(f_g);
  496. String line = getLine(in); //gets the first line
  497. line=getLine(in); //gets the number of genes + word protein
  498. String [] result = line.split(" ");
  499. w_num_Y=Integer.parseInt(result[0]);
  500. int[][] w_pos=new int [w_num_Y][2];
  501. boolean[] strand=new boolean [w_num_Y];
  502. Y_geneSynonyms = new String [w_num_Y];
  503. line=getLine(in); //get the column headings
  504. for(i=0; i<w_num_Y; i++)
  505. {
  506. line=getLine(in); //gets a gene
  507. j=line.indexOf("."); //location of end of start
  508. k=line.indexOf("\t"); //location of end of end
  509. l=line.indexOf("\t",k+1);
  510. l=line.indexOf("\t",l+1);
  511. l=line.indexOf("\t",l+1);
  512. l=line.indexOf("\t",l+1);
  513. l=line.indexOf("\t",l+1);
  514. m=line.lastIndexOf("\t",l-1);
  515. result[0]=line.substring(0,j);
  516. w_pos[i][0]=Integer.parseInt(result[0]);
  517. w_pos[i][0]--;
  518. result[0]=line.substring(j+2,k);
  519. w_pos[i][1]=Integer.parseInt(result[0]);
  520. w_pos[i][1]--;
  521. result[0]=line.substring(k+1,k+2);
  522. if(result[0]=="+") strand[i]=true;
  523. else strand[i]=false;
  524. Y_geneSynonyms[i]=line.substring(m+1,l);
  525. }
  526. //sequence y
  527. File f_s = new File(ySequence);
  528. in = getReader(f_s);
  529. StringBuilder sb = new StringBuilder();
  530. Y_header = getLine(in);
  531. while(line!=null)
  532. {
  533. line=getLine(in);
  534. if(line!=null)
  535. {
  536. len_Y += (long)line.length();
  537. sb.append(line);
  538. }
  539. }
  540. StringBuilder sb2 = sb;
  541. //set up the array ready to read in
  542. seq_inY_w=new char [w_num_Y][];
  543. for(i=0; i<w_num_Y; i++)
  544. {
  545. if(w_pos[i][1]>=w_pos[i][0])
  546. {
  547. seq_inY_w[i]=new char[(w_pos[i][1]-w_pos[i][0]+2)];
  548. overOrigin=false;
  549. }
  550. else
  551. {
  552. seq_inY_w[i]=new char[(len_Y-w_pos[i][0])+w_pos[i][1]+2];
  553. overOrigin=true;
  554. }
  555. }
  556. for(i=0; i<w_num_Y; i++)
  557. {
  558. if(strand[i]) //original strand
  559. {
  560. if(overOrigin==false)
  561. {
  562. for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++){
  563. seq_inY_w[i][j]=sb2.charAt(w_pos[i][0]+j);
  564. }
  565. seq_inY_w[i][j]='\0';
  566. }
  567. else
  568. {
  569. k=0;
  570. for(j=w_pos[i][0]; j<len_Y; j++)
  571. {
  572. seq_inY_w[i][k]=sb2.charAt(j);
  573. k++;
  574. }
  575. for(j=0; j<=w_pos[i][1]; j++)
  576. {
  577. seq_inY_w[i][k]=sb2.charAt(j);
  578. k++;
  579. }
  580. seq_inY_w[i][k]='\0';
  581. }
  582. }
  583. else //complementary strand
  584. {
  585. if(overOrigin==false)
  586. {
  587. for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++)
  588. {
  589. seq_inY_w[i][j]=sb2.charAt(w_pos[i][1]-w_pos[i][0]-j);
  590. switch(seq_inY_w[i][j]){
  591. case 'a': case 'A': seq_inY_w[i][j]='T'; break;
  592. case 't': case 'T': seq_inY_w[i][j]='A'; break;
  593. case 'g': case 'G': seq_inY_w[i][j]='C'; break;
  594. case 'c': case 'C': seq_inY_w[i][j]='G'; break;
  595. default: seq_inY_w[i][j]='N';
  596. }
  597. }
  598. seq_inY_w[i][j]='\0';
  599. }
  600. else
  601. {
  602. k=0;
  603. j=w_pos[i][1];
  604. while(j>=0)
  605. {
  606. seq_inY_w[i][k]= sb2.charAt(j);
  607. j--;
  608. switch(seq_inY_w[i][k]){
  609. case 'a': case 'A': seq_inY_w[i][k]='T'; break;
  610. case 't': case 'T': seq_inY_w[i][k]='A'; break;
  611. case 'g': case 'G': seq_inY_w[i][k]='C'; break;
  612. case 'c': case 'C': seq_inY_w[i][k]='G'; break;
  613. default: seq_inY_w[i][k]='N';
  614. }
  615. k++;
  616. }
  617. for(j=len_Y-1; j>=w_pos[i][0]; j--)
  618. {
  619. seq_inY_w[i][k]=sb2.charAt(j);
  620. switch(seq_inY_w[i][k]){
  621. case 'a': case 'A': seq_inY_w[i][k]='T'; break;
  622. case 't': case 'T': seq_inY_w[i][k]='A'; break;
  623. case 'g': case 'G': seq_inY_w[i][k]='C'; break;
  624. case 'c': case 'C': seq_inY_w[i][k]='G'; break;
  625. default: seq_inY_w[i][k]='N';
  626. }
  627. k++;
  628. }
  629. seq_inY_w[i][k]='\0';
  630. }
  631. }
  632. }
  633. }
  634. public void S_Plot_Correlation_var_windows()
  635. {
  636. int i, j;
  637. int n_x, n_y;
  638. char c;
  639. String sNmer = new String();
  640. boolean okay = true;
  641. frame.progressBar.setStringPainted(true);
  642. frame.progressBar.setMinimum(0);
  643. frame.progressBar.setMaximum(w_num_Y+w_num_X+w_num_X);
  644. ///---------------------------X
  645. for(n_x=0;n_x<w_num_X;n_x++){
  646. i=-1;
  647. for(;;){
  648. i++;
  649. if(seq_inX_w[n_x][i]=='\0') {break;}
  650. c=seq_inX_w[n_x][i];
  651. if(c=='a'||c=='A'){sNmer+="00";}
  652. else if(c=='t'||c=='T'){sNmer+="01";}
  653. else if(c=='g'||c=='G'){sNmer+="10";}
  654. else if(c=='c'||c=='C'){sNmer+="11";}
  655. else continue;
  656. for(j=1;j<nmer_size;j++){
  657. i++;
  658. if(seq_inX_w[n_x][i]=='\0'){break;}
  659. c=seq_inX_w[n_x][i];
  660. okay=true;
  661. if(c=='a'||c=='A'){sNmer+="00";}
  662. else if(c=='t'||c=='T'){sNmer+="01";}
  663. else if(c=='g'||c=='G'){sNmer+="10";}
  664. else if(c=='c'||c=='C'){sNmer+="11";}
  665. else okay=false;
  666. if(!okay){j=nmer_size;}
  667. } //first nmer found
  668. if(okay){
  669. nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
  670. }
  671. if(use_complementary&&okay){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
  672. for(;;){
  673. i++;
  674. if(seq_inX_w[n_x][i]=='\0') {
  675. i--;
  676. break; }
  677. c=seq_inX_w[n_x][i];
  678. if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
  679. else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
  680. else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
  681. else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
  682. else break;
  683. nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
  684. if(use_complementary){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
  685. }
  686. sNmer = new String();
  687. }
  688. //validation checks
  689. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i];}
  690. if(w_meanVals_X[n_x]<min_num_nmers_in_w) {valid_X[n_x]=true; continue;}
  691. if(w_meanVals_X[n_x]/i<min_percent_nt_in_w) {valid_X[n_x]=true; continue;}
  692. //take % because other variable window may be significantly different size
  693. w_meanVals_X[n_x]=0;
  694. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  695. {
  696. nmer_counts_X[n_x][nmer_i]=(nmer_counts_X[n_x][nmer_i]/(float)i);
  697. w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i];
  698. }
  699. w_meanVals_X[n_x]=w_meanVals_X[n_x]/num_possible_nmers;
  700. pcs.firePropertyChange("progress", null, n_x); // progressbar stuff
  701. }//end window loop
  702. ///---------------------Y
  703. sNmer = new String();
  704. for(n_y=0;n_y<w_num_Y;n_y++){
  705. i=-1;
  706. for(;;){
  707. i++;
  708. if(seq_inY_w[n_y][i]=='\0'){break;}
  709. c=seq_inY_w[n_y][i];
  710. if(c=='a'||c=='A'){sNmer+="00";}
  711. else if(c=='t'||c=='T'){sNmer+="01";}
  712. else if(c=='g'||c=='G'){sNmer+="10";}
  713. else if(c=='c'||c=='C'){sNmer+="11";}
  714. else continue;
  715. for(j=1;j<nmer_size;j++){
  716. i++;
  717. if(seq_inY_w[n_y][i]=='\0')break;
  718. c=seq_inY_w[n_y][i];
  719. okay=true;
  720. if(c=='a'||c=='A'){sNmer+="00";}
  721. else if(c=='t'||c=='T'){sNmer+="01";}
  722. else if(c=='g'||c=='G'){sNmer+="10";}
  723. else if(c=='c'||c=='C'){sNmer+="11";}
  724. else okay=false;
  725. if(!okay){j=nmer_size;}
  726. }//found first nmer
  727. if(okay){
  728. nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
  729. }
  730. if(use_complementary&&okay){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
  731. for(;;){
  732. i++;
  733. if(seq_inY_w[n_y][i]=='\0') {
  734. i--;
  735. break; }
  736. c=seq_inY_w[n_y][i];
  737. if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
  738. else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
  739. else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
  740. else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
  741. else break;
  742. nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
  743. if(use_complementary){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
  744. }
  745. sNmer = new String();
  746. }
  747. //validation
  748. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];}
  749. if(w_meanVals_Y[n_y]<min_num_nmers_in_w){valid_Y[n_y]=true; continue;}
  750. if(w_meanVals_Y[n_y]/i<min_percent_nt_in_w){valid_Y[n_y]=true; continue;}
  751. //take % because other variable window may be significantly different size
  752. w_meanVals_Y[n_y]=0;
  753. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  754. {
  755. nmer_counts_Y[n_y][nmer_i]=(nmer_counts_Y[n_y][nmer_i]/(float)i);
  756. w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];
  757. }
  758. w_meanVals_Y[n_y] = w_meanVals_Y[n_y]/num_possible_nmers;
  759. pcs.firePropertyChange("progress", null, (w_num_X + n_y)); // progressbar
  760. }//end of window loop
  761. ///Computations-----------X
  762. for(n_x=0;n_x<w_num_X;n_x++) // for each window on X axis loop
  763. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  764. {
  765. if(valid_X[n_x]) continue;
  766. nmer_counts_X[n_x][nmer_i]=nmer_counts_X[n_x][nmer_i]-w_meanVals_X[n_x];
  767. X_square[n_x]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_X[n_x][nmer_i];
  768. }
  769. ///Computations------------Y
  770. for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
  771. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  772. {
  773. if(valid_Y[n_y]) continue;
  774. nmer_counts_Y[n_y][nmer_i]=nmer_counts_Y[n_y][nmer_i]-w_meanVals_Y[n_y];
  775. Y_square[n_y]+=nmer_counts_Y[n_y][nmer_i]*nmer_counts_Y[n_y][nmer_i];
  776. }
  777. //Computations------------Score
  778. for(n_x=0;n_x<w_num_X;n_x++){ // for each window on X axis loop
  779. for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
  780. {
  781. if(valid_X[n_x]){continue;}
  782. if(valid_Y[n_y]){continue;}
  783. for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
  784. score[n_x][n_y]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_Y[n_y][nmer_i];
  785. score[n_x][n_y]=score[n_x][n_y]/(float)Math.sqrt((X_square[n_x]*Y_square[n_y]));
  786. }
  787. pcs.firePropertyChange("progress", null, (w_num_X + w_num_Y + n_x)); // progressbar
  788. }
  789. //-----------------------Clear memory
  790. valid_X = null;
  791. valid_Y = null;
  792. nmer_counts_X = null;
  793. nmer_counts_Y = null;
  794. w_meanVals_X = null;
  795. w_meanVals_Y = null;
  796. X_square = null;
  797. Y_square = null;
  798. }
  799. public void writeScoreMatrix(String writeLocation){
  800. String f_score = writeLocation;
  801. File F_score = new File(f_score);
  802. try{
  803. if(F_score.createNewFile()){System.out.println("File F_score was created");}
  804. else{System.out.println("File F_score was not created");}
  805. }
  806. catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
  807. PrintWriter out = openWriter(F_score);
  808. Calendar cal = Calendar.getInstance();
  809. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  810. out.print("Pearson Correlation Values");
  811. out.println("\t\t\t" + df.format(cal.getTime()));
  812. out.println();
  813. out.println("Sequence X:\t" + X_header);
  814. out.println("Sequence Y:\t" + Y_header);
  815. out.println();
  816. out.println("Window Size:\t" + w_size);
  817. out.println("Nmer Size:\t" + nmer_size);
  818. out.println("Compl. considered:\t" + use_complementary);
  819. out.println();
  820. out.println();
  821. out.println();
  822. out.print("\t0");
  823. for(int p=1;p<w_num_Y;p++){out.print("\t"+p);}
  824. out.println();
  825. for(int k=0; k<w_num_X; k++){
  826. StringBuilder sb2 = new StringBuilder();
  827. sb2.append(k);
  828. for(int l=0;l<w_num_Y;l++){
  829. String v = "\t" + score[k][l];
  830. sb2.append(v);
  831. if(l==(w_num_Y-1)){out.println(sb2);}
  832. }
  833. }
  834. out.close();
  835. }
  836. public void writeGeneMatrix(String writeLocation){
  837. String f_geneMatrix = writeLocation;
  838. File F_geneMatrix = new File(f_geneMatrix);
  839. try{
  840. if(F_geneMatrix.createNewFile()){System.out.println("File F_geneMatrix was created");}
  841. else{System.out.println("File F_geneMatrix was not created");}
  842. }
  843. catch(IOException e){System.out.println("I/O Error: writeGeneMatrix");}
  844. PrintWriter out = openWriter(F_geneMatrix);
  845. Calendar cal = Calendar.getInstance();
  846. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  847. out.print("Pearson Correlation Gene Values");
  848. out.println("\t\t\t" + df.format(cal.getTime()));
  849. out.println();
  850. out.println("Sequence X:\t" + X_header);
  851. out.println("Sequence Y:\t" + Y_header);
  852. out.println();
  853. out.println("Nmer Size:\t3");
  854. out.println("Compl. considered:\t" + use_complementary);
  855. out.println();
  856. out.println();
  857. out.println();
  858. String syn;
  859. for(int p=0;p<w_num_Y;p++){
  860. syn = Y_geneSynonyms[p];
  861. out.print("\t"+syn);
  862. }
  863. out.println();
  864. for(int k=0; k<w_num_X; k++){
  865. StringBuilder sb2 = new StringBuilder();
  866. syn = X_geneSynonyms[k];
  867. sb2.append(syn);
  868. for(int l=0;l<w_num_Y;l++){
  869. String v = "\t" + score[k][l];
  870. sb2.append(v);
  871. if(l==(w_num_Y-1)){out.println(sb2);}
  872. }
  873. }
  874. out.close();
  875. }
  876. public void write_Avg_Stdev(String writeLocation){
  877. calcAvg_window_Corr();
  878. stdev_pearCorrX = new float [w_num_X];
  879. stdev_pearCorrY = new float [w_num_Y];
  880. String f_avg_stdev = writeLocation;
  881. File F_avg_stdev = new File(f_avg_stdev);
  882. try{
  883. if(F_avg_stdev.createNewFile()){System.out.println("File F_avg_stdev was created");}
  884. else{System.out.println("File F_avg_stdev was not created");}
  885. }
  886. catch(IOException e){System.out.println("I/O Error: write_Avg_Stdev");}
  887. PrintWriter out = openWriter(F_avg_stdev);
  888. Calendar cal = Calendar.getInstance();
  889. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  890. out.print("Average and Standard Deviations");
  891. out.println("\t\t\t" + df.format(cal.getTime()));
  892. out.println();
  893. out.println("Sequence X:\t" + X_header);
  894. out.println("Sequence Y:\t" + Y_header);
  895. out.println();
  896. out.println("Window Size:\t" + w_size);
  897. out.println("Nmer Size:\t" + nmer_size);
  898. out.println("Compl. considered:\t" + use_complementary);
  899. out.println();
  900. out.println();
  901. out.println();
  902. out.println("Sequence X:");
  903. out.println("Window\tAvg\tStdev");
  904. float sum_of_the_difference=0;
  905. for(int i=0;i<w_num_X;i++){
  906. for(int j=0;j<w_num_Y;j++){
  907. sum_of_the_difference+=Math.abs(score[i][j]-avgCorrX[i]);
  908. }
  909. stdev_pearCorrX[i]=sum_of_the_difference/(float)w_num_Y;
  910. sum_of_the_difference=0;
  911. }
  912. sum_of_the_difference=0;
  913. for(int i=0;i<w_num_Y;i++){
  914. for(int j=0;j<w_num_X;j++){
  915. sum_of_the_difference+=Math.abs(score[j][i]-avgCorrY[i]);
  916. }
  917. stdev_pearCorrY[i]=sum_of_the_difference/(float)w_num_X;
  918. sum_of_the_difference=0;
  919. }
  920. for(int i=0;i<w_num_X;i++){
  921. out.println(i+"\t"+avgCorrX[i]+"\t"+stdev_pearCorrX[i]);
  922. }
  923. out.println();
  924. out.println();
  925. out.println("Sequence Y:");
  926. out.println("Window\tAvg\tStdev");
  927. for(int i=0;i<w_num_Y;i++){
  928. out.println(i+"\t"+avgCorrY[i]+"\t"+stdev_pearCorrY[i]);
  929. }
  930. avgCorrX = null;
  931. avgCorrY = null;
  932. stdev_pearCorrX = null;
  933. stdev_pearCorrY = null;
  934. out.close();
  935. }
  936. public void write_Gene_Avg_Stdev(String writeLocation){
  937. calcAvg_window_Corr();
  938. String f_gene_avg_stdev = writeLocation;
  939. File F_gene_avg_stdev = new File(f_gene_avg_stdev);
  940. try{
  941. if(F_gene_avg_stdev.createNewFile()){System.out.println("File F_gene_avg_stdev was created");}
  942. else{System.out.println("File F_gene_avg_stdev was not created");}
  943. }
  944. catch(IOException e){System.out.println("I/O Error: write_Gene_Avg_Stdev");}
  945. PrintWriter out = openWriter(F_gene_avg_stdev);
  946. Calendar cal = Calendar.getInstance();
  947. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  948. out.print("Gene Average and Standard Deviations");
  949. out.println("\t\t\t" + df.format(cal.getTime()));
  950. out.println();
  951. out.println("Sequence X:\t" + X_header);
  952. out.println("Sequence Y:\t" + Y_header);
  953. out.println();
  954. out.println("Nmer Size:\t" + 3);
  955. out.println("Compl. considered:\t" + use_complementary);
  956. out.println();
  957. out.println();
  958. out.println();
  959. out.println("Sequence X:");
  960. out.println("Gene\tAvg\tStdev");
  961. //finding stdev of each window in Sequence X
  962. float sum_of_the_difference=0;
  963. for(int i=0;i<w_num_X;i++){
  964. for(int j=0;j<w_num_Y;i++){
  965. sum_of_the_difference+=Math.abs(score[i][j]-avgCorrX[i]);
  966. }
  967. stdev_pearCorrX[i]=sum_of_the_difference/(float)w_num_Y;
  968. sum_of_the_difference=0;
  969. }
  970. //finding stdev of each window in Sequence Y
  971. sum_of_the_difference=0;
  972. for(int i=0;i<w_num_Y;i++){
  973. for(int j=0;j<w_num_X;i++){
  974. sum_of_the_difference+=Math.abs(score[j][i]-avgCorrY[i]);
  975. }
  976. stdev_pearCorrY[i]=sum_of_the_difference/(float)w_num_X;
  977. sum_of_the_difference=0;
  978. }
  979. //writing out avg and stdev for each window in sequence X
  980. for(int i=0;i<w_num_X;i++){
  981. out.println(X_geneSynonyms[i]+"\t"+avgCorrX[i]+"\t"+stdev_pearCorrX[i]);
  982. }
  983. out.println();
  984. out.println();
  985. out.println("Sequence Y:");
  986. out.println("Gene\tAvg\tStdev");
  987. //writing out avg and stdev for each window in sequence Y
  988. for(int i=0;i<w_num_Y;i++){
  989. out.println(Y_geneSynonyms[i]+"\t"+avgCorrY[i]+"\t"+stdev_pearCorrY[i]);
  990. }
  991. out.println();
  992. avgCorrX = null;
  993. avgCorrY = null;
  994. stdev_pearCorrX = null;
  995. stdev_pearCorrY = null;
  996. out.close();
  997. }
  998. public void write_GC_w_content(String writeLocation){
  999. String f_w_GC_content = writeLocation;
  1000. File F_w_GC_content = new File(f_w_GC_content);
  1001. try{
  1002. if(F_w_GC_content.createNewFile()){System.out.println("File F_w_GC_content was created");}
  1003. else{System.out.println("File F_w_GC_content was not created");}
  1004. }
  1005. catch(IOException e){System.out.println("I/O Error: write_GC_w_content");}
  1006. PrintWriter out = openWriter(F_w_GC_content);
  1007. Calendar cal = Calendar.getInstance();
  1008. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  1009. out.print("G+C window content");
  1010. out.println("\t\t\t" + df.format(cal.getTime()));
  1011. out.println();
  1012. out.println("Sequence X:\t" + X_header);
  1013. out.println("Sequence Y:\t" + Y_header);
  1014. out.println();
  1015. out.println("Window size:\t"+w_size);
  1016. out.println("Nmer Size:\t" + nmer_size);
  1017. out.println("Compl. considered:\t" + use_complementary);
  1018. out.println();
  1019. out.println();
  1020. out.println();
  1021. out.println("Sequence X:");
  1022. out.println("Window\tG+C");
  1023. //writing out the GC content of each window in sequence X
  1024. for(int i=0;i<w_num_X;i++){
  1025. out.println(i+"\t"+calcGC(i,true));
  1026. }
  1027. out.println();
  1028. out.println();
  1029. out.println("Sequence Y:");
  1030. out.println("Window\tG+C");
  1031. //writing out the GC content of each window in sequence Y
  1032. for(int i=0;i<w_num_Y;i++){
  1033. out.println(i+"\t"+calcGC(i,false));
  1034. }
  1035. out.println();
  1036. out.close();
  1037. }
  1038. public void write_GC_gene_content(String writeLocation){
  1039. String f_gene_GC_content = writeLocation;
  1040. File F_gene_GC_content = new File(f_gene_GC_content);
  1041. try{
  1042. if(F_gene_GC_content.createNewFile()){System.out.println("File F_gene_GC_content was created");}
  1043. else{System.out.println("File F_gene_GC_content was not created");}
  1044. }
  1045. catch(IOException e){System.out.println("I/O Error: write_GC_gene_content");}
  1046. PrintWriter out = openWriter(F_gene_GC_content);
  1047. Calendar cal = Calendar.getInstance();
  1048. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  1049. out.print("G+C gene content");
  1050. out.println("\t\t\t" + df.format(cal.getTime()));
  1051. out.println();
  1052. out.println("Sequence X:\t" + X_header);
  1053. out.println("Sequence Y:\t" + Y_header);
  1054. out.println();
  1055. out.println("Nmer Size:\t" + 3);
  1056. out.println("Compl. considered:\t" + use_complementary);
  1057. out.println();
  1058. out.println();
  1059. out.println();
  1060. out.println("Sequence X:");
  1061. out.println("Gene\tG+C");
  1062. //writing out the GC content of each gene in sequence X
  1063. for(int i=0;i<w_num_X;i++){
  1064. out.println(X_geneSynonyms[i]+"\t"+calcGC(i,true));
  1065. }
  1066. out.println();
  1067. out.println();
  1068. out.println("Sequence Y:");
  1069. out.println("Gene\tG+C");
  1070. //writing out the GC content of each gene in sequence Y
  1071. for(int i=0;i<w_num_Y;i++){
  1072. out.println(Y_geneSynonyms[i]+"\t"+calcGC(i,false));
  1073. }
  1074. out.println();
  1075. out.close();
  1076. }
  1077. public void write_max_w_Corr(String writeLocation){
  1078. String f_maxCorrs = writeLocation;
  1079. File F_maxCorrs = new File(f_maxCorrs);
  1080. try{
  1081. if(F_maxCorrs.createNewFile()){System.out.println("File F_maxCorrs was created");}
  1082. else{System.out.println("File F_maxCorrs was not created");}
  1083. }
  1084. catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
  1085. PrintWriter out = openWriter(F_maxCorrs);
  1086. Calendar cal = Calendar.getInstance();
  1087. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  1088. out.print("Highest Correlated Windows");
  1089. out.println("\t\t\t" + df.format(cal.getTime()));
  1090. out.println();
  1091. out.println("Sequence X:\t" + X_header);
  1092. out.println("Sequence Y:\t" + Y_header);
  1093. out.println();
  1094. out.println("Window Size:\t" + w_size);
  1095. out.println("Nmer Size:\t" + nmer_size);
  1096. out.println("Compl. considered:\t" + use_complementary);
  1097. out.println();
  1098. out.println();
  1099. out.println();
  1100. float maxScore=score[0][0];
  1101. int max_Ypos=0,max_Xpos=0;
  1102. //writing out the highest correlated window in Y for every window in X
  1103. out.println("Sequence X:");
  1104. out.println("window in X\tValue\twindow in Y");
  1105. for(int k=0; k<w_num_X; k++){
  1106. for(int l=0;l<w_num_Y;l++){
  1107. if(score[k][l]>maxScore){
  1108. maxScore=score[k][l];
  1109. max_Ypos=l;
  1110. }
  1111. }
  1112. out.println(k+"\t"+score[k][max_Ypos]+"\t"+max_Ypos);
  1113. if((k+1)!=w_num_X){maxScore=score[k+1][0];}
  1114. }
  1115. maxScore = score[0][0];
  1116. out.println();
  1117. out.println();
  1118. //writing out the highest correlated window in X for every window in Y
  1119. out.println("Sequence Y:");
  1120. out.println("window in Y\tValue\twindow in X");
  1121. for(int k=0; k<w_num_Y; k++){
  1122. for(int l=0;l<w_num_X;l++){
  1123. if(score[l][k]>maxScore){
  1124. maxScore=score[l][k];
  1125. max_Xpos=l;
  1126. }
  1127. }
  1128. out.println(k+"\t"+score[max_Xpos][k]+"\t"+max_Xpos);
  1129. if((k+1)!=w_num_Y){maxScore=score[0][k+1];}
  1130. }
  1131. out.close();
  1132. }
  1133. public void write_max_gene_Corr(String writeLocation){
  1134. String f_maxCorrs = writeLocation;
  1135. File F_maxCorrs = new File(f_maxCorrs);
  1136. try{
  1137. if(F_maxCorrs.createNewFile()){System.out.println("File F_maxCorrs was created");}
  1138. else{System.out.println("File F_maxCorrs was not created");}
  1139. }
  1140. catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
  1141. PrintWriter out = openWriter(F_maxCorrs);
  1142. Calendar cal = Calendar.getInstance();
  1143. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  1144. out.print("Highest Correlated Genes");
  1145. out.println("\t\t\t" + df.format(cal.getTime()));
  1146. out.println();
  1147. out.println("Sequence X:\t" + X_header);
  1148. out.println("Sequence Y:\t" + Y_header);
  1149. out.println();
  1150. out.println("Nmer Size:\t" + 3);
  1151. out.println("Compl. considered:\t" + use_complementary);
  1152. out.println();
  1153. out.println();
  1154. out.println();
  1155. float maxScore=score[0][0];
  1156. int max_Ypos=0,max_Xpos=0;
  1157. //writing out the highest correlated window in Y for every window in X
  1158. out.println("Sequence X:");
  1159. out.println("window in X\tValue\twindow in Y");
  1160. for(int k=0; k<w_num_X; k++){
  1161. for(int l=0;l<w_num_Y;l++){
  1162. if(score[k][l]>maxScore){
  1163. maxScore=score[k][l];
  1164. max_Ypos=l;
  1165. }
  1166. }
  1167. out.println(X_geneSynonyms[k]+"\t"+score[k][max_Ypos]+"\t"+Y_geneSynonyms[max_Ypos]);
  1168. if((k+1)!=w_num_X){maxScore=score[k+1][0];}
  1169. }
  1170. maxScore = score[0][0];
  1171. out.println();
  1172. out.println();
  1173. //writing out the highest correlated window in X for every window in Y
  1174. out.println("Sequence Y:");
  1175. out.println("window in Y\tValue\twindow in X");
  1176. for(int k=0; k<w_num_Y; k++){
  1177. for(int l=0;l<w_num_X;l++){
  1178. if(score[l][k]>maxScore){
  1179. maxScore=score[l][k];
  1180. max_Xpos=l;
  1181. }
  1182. }
  1183. out.println(Y_geneSynonyms[k]+"\t"+score[max_Xpos][k]+"\t"+max_Xpos);
  1184. if((k+1)!=w_num_Y){maxScore=score[0][k+1];}
  1185. }
  1186. out.close();
  1187. X_geneSynonyms = null;
  1188. Y_geneSynonyms = null;
  1189. }
  1190. public void calcAvg_window_Corr(){
  1191. float numerator = 0;
  1192. avgCorrX = new float [w_num_X];
  1193. avgCorrY = new float [w_num_Y];
  1194. //Calculating averages for the X sequence
  1195. for(int k=0; k<w_num_X; k++){
  1196. for(int l=0;l<w_num_Y;l++){
  1197. numerator = score[k][l] + numerator;
  1198. }
  1199. avgCorrX[k] = numerator/(float)w_num_Y;
  1200. numerator = 0;
  1201. }
  1202. //Calculating averages for the Y sequence
  1203. for(int k=0; k<w_num_Y; k++){
  1204. for(int l=0;l<w_num_X;l++){
  1205. numerator = score[l][k] + numerator;
  1206. }
  1207. avgCorrY[k] = numerator/(float)w_num_X;
  1208. numerator = 0;
  1209. }
  1210. }
  1211. public float calcGC(int w_num, boolean XorY)
  1212. {
  1213. int gc=0;
  1214. int i=-1;
  1215. float perGC=0;
  1216. if(XorY){ //XorY=true means your looking in the X sequence
  1217. do{
  1218. i++;
  1219. if((seq_inX_w[w_num][i]=='c')||(seq_inX_w[w_num][i]=='C')||
  1220. (seq_inX_w[w_num][i]=='g')||(seq_inX_w[w_num][i]=='G')) {gc++;}
  1221. }while(seq_inX_w[w_num][i]!='\0');
  1222. perGC = 100*((float)gc/(float)i);
  1223. }//end if
  1224. if(!XorY){ //XorY=false means your looking in the Y sequence
  1225. do{
  1226. i++;
  1227. if((seq_inY_w[w_num][i]=='c')||(seq_inY_w[w_num][i]=='C')||
  1228. (seq_inY_w[w_num][i]=='g')||(seq_inY_w[w_num][i]=='G')) {gc++;}
  1229. }while(seq_inY_w[w_num][i]!='\0');
  1230. perGC = 100*((float)gc/(float)i);
  1231. }//end if
  1232. return perGC;
  1233. }
  1234. /*public static void hisPlot(){
  1235. int index;
  1236. float maxScore=score[0][0];
  1237. for(int k=0; k<w_num_X; k++){
  1238. for(int l=0;l<w_num_Y;l++){
  1239. if(score[k][l]>maxScore){maxScore=score[k][l];}
  1240. }
  1241. index=(int)((10-(maxScore*10))*2);
  1242. hisPlot[index]++;
  1243. //System.out.println((int)((10-(maxScore*10))*2));
  1244. if((k+1)!=w_num_X){maxScore=score[k+1][0];}
  1245. }
  1246. System.out.println();
  1247. System.out.println();
  1248. for(int i=0;i<hisPlot.length;i++){
  1249. System.out.println(hisPlot[i]);
  1250. }
  1251. }
  1252. */
  1253. public void alignMax(){
  1254. int max_Ypos=0;
  1255. float Xgc,Ygc;
  1256. float maxScore=score[0][0];
  1257. String path4_test_writeOut = "C:\\Documents and Settings\\NTUser\\Desktop\\FileTests\\Maxes&GC.txt";
  1258. File writeOut_X_Y = new File(path4_test_writeOut);
  1259. try{
  1260. if(writeOut_X_Y.createNewFile()){System.out.println("Maxes&GC was created");}
  1261. else{System.out.println("File Maxes&GC was not created");}
  1262. }
  1263. catch(IOException e){System.out.println("I/O Error");}
  1264. writeGC = openWriter(writeOut_X_Y);
  1265. writeGC.print("\t\t\t");
  1266. //write accession
  1267. writeGC.print("\n");
  1268. for(int k=0; k<w_num_X; k++){
  1269. for(int l=0;l<w_num_Y;l++){
  1270. if(score[k][l]>maxScore){
  1271. maxScore=score[k][l];
  1272. max_Ypos=l;
  1273. }
  1274. }
  1275. Xgc = calcGC(k,true);
  1276. Ygc = calcGC(max_Ypos,false);
  1277. writeWindowGC_Max(Xgc,Ygc,k,max_Ypos,maxScore);
  1278. if((k+1)!=w_num_X){maxScore=score[k+1][0];}
  1279. }
  1280. System.out.println();
  1281. System.out.println();
  1282. for(int i=0;i<hisPlot.length;i++){
  1283. System.out.println(hisPlot[i]);
  1284. }
  1285. writeGC.close();
  1286. }
  1287. public void writeWindowGC_Max(float GC_X, float GC_Y, int windowX, int windowY, float max){
  1288. writeGC.print(GC_X+"\t"+windowX+"\t"+max+"\t"+windowY+"\t"+GC_Y+"\t\n");
  1289. }
  1290. public String giveR_ComplementX(int window, int nt){
  1291. String reverseCNmer = new String();
  1292. char c;
  1293. int endNmerIndex = nt-nmer_size;
  1294. for(int q=nt;q>endNmerIndex;q--){
  1295. c=seq_inX_w[window][q];
  1296. if(c=='a'||c=='A'){reverseCNmer+="01";}
  1297. else if(c=='t'||c=='T'){reverseCNmer+="00";}
  1298. else if(c=='g'||c=='G'){reverseCNmer+="11";}
  1299. else if(c=='c'||c=='C'){reverseCNmer+="10";}
  1300. }
  1301. return reverseCNmer;
  1302. }
  1303. public String giveR_ComplementY(int window, int nt){
  1304. String reverseCNmer = new String();
  1305. char c;
  1306. int endNmerIndex = nt-nmer_size;
  1307. for(int q=nt;q>endNmerIndex;q--){
  1308. c=seq_inY_w[window][q];
  1309. if(c=='a'||c=='A'){reverseCNmer+="01";}
  1310. else if(c=='t'||c=='T'){reverseCNmer+="00";}
  1311. else if(c=='g'||c=='G'){reverseCNmer+="11";}
  1312. else if(c=='c'||c=='C'){reverseCNmer+="10";}
  1313. }
  1314. return reverseCNmer;
  1315. }
  1316. public PrintWriter openWriter(File name){ //probably don't need this method
  1317. try{
  1318. PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(name)), true);
  1319. return out;
  1320. }
  1321. catch(IOException e){
  1322. System.out.println("PrintWriter could not find parsed_f_X");
  1323. System.exit(0);
  1324. }
  1325. return null;
  1326. }
  1327. public BufferedReader getReader(File file){
  1328. BufferedReader in = null;
  1329. try{
  1330. in = new BufferedReader(new FileReader(file));
  1331. }
  1332. catch(IOException e){
  1333. JOptionPane.showMessageDialog(frame.displayPanel,
  1334. "BufferedReader I/O Error or coundn't find the file.", "Error",
  1335. JOptionPane.ERROR_MESSAGE);
  1336. process = true;
  1337. }
  1338. return in;
  1339. }
  1340. public String getLine(BufferedReader in){
  1341. String line = null;
  1342. try{line = in.readLine();}
  1343. catch(IOException e){
  1344. JOptionPane.showMessageDialog(frame.displayPanel,
  1345. "bReader couldn't read next line.", "Error",
  1346. JOptionPane.ERROR_MESSAGE);
  1347. process = true;
  1348. }
  1349. if(line==null){return null;}
  1350. else{return line;}
  1351. }
  1352. public void writeOutNmerCounts(){
  1353. String path4_test_writeOut = "C:\\Documents and Settings\\NTUser\\Desktop\\FileTests\\NmerCounts.txt";
  1354. File writeOutNmerCounts = new File(path4_test_writeOut);
  1355. try{
  1356. if(writeOutNmerCounts.createNewFile()){System.out.println("writeOutNmerCounts was created");}
  1357. else{System.out.println("File writeOut_X_Y was not created");}
  1358. }
  1359. catch(IOException e){System.out.println("I/O Error");}
  1360. PrintWriter out = openWriter(writeOutNmerCounts);
  1361. for(int k=0; k<w_num_X; k++){
  1362. StringBuilder sb2 = new StringBuilder();
  1363. sb2.append(k);
  1364. for(int l=0;l<num_possible_nmers;l++){
  1365. String v = "\t" + nmer_counts_X[k][l];
  1366. sb2.append(v);
  1367. if(l==(num_possible_nmers-1)){out.println(sb2);}
  1368. }
  1369. }
  1370. out.close();
  1371. }
  1372. //This method should be executed only when the entire genome is being looked at. It will make one tab-delimited file containing all the windows of both genomes.
  1373. public void write_windows(String writeLocation){
  1374. String f_wind_contents = writeLocation;
  1375. File F_wind_contents = new File(f_wind_contents);
  1376. try{
  1377. if(F_wind_contents.createNewFile()){System.out.println("File F_wind_contents was created");}
  1378. else{System.out.println("File F_wind_contents was not created");}
  1379. }
  1380. catch(IOException e){System.out.println("I/O Error: write_windows");}
  1381. PrintWriter out = openWriter(F_wind_contents);
  1382. Calendar cal = Calendar.getInstance();
  1383. DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
  1384. out.print("Sequence partitions");
  1385. out.println("\t\t\t" + df.format(cal.getTime()));
  1386. out.println();
  1387. out.println("Sequence X:\t" + X_header);
  1388. out.println("Sequence Y:\t" + Y_header);
  1389. out.println();
  1390. out.println("Window size:\t" + w_size);
  1391. out.println("Nmer Size:\t" + nmer_size);
  1392. out.println("Compl. considered:\t" + use_complementary);
  1393. out.println();
  1394. out.println();
  1395. out.println();
  1396. out.println("Sequence X window partitions: ");
  1397. int i,j=0;
  1398. for(i=0;i<w_num_X;i++){
  1399. out.println("> " +i);
  1400. for(j=0;j<w_size;j++){
  1401. out.print(seq_inX_w[i][j]);
  1402. }
  1403. out.println();
  1404. }
  1405. out.println();
  1406. out.println();
  1407. out.println("Sequence Y window partitions: ");
  1408. for(i=0;i<w_num_Y;i++){
  1409. out.println("> " +i);
  1410. for(j=0;j<w_size;j++){
  1411. out.print(seq_inY_w[i][j]);
  1412. }
  1413. out.println();
  1414. }
  1415. }
  1416. //This method should only be executed when only the gene are being looked at. It will make one tab-delimited file containing all the gene sequences for both genomes.
  1417. public void write_GeneWindows(String writeLocation){
  1418. String f_gene_contents = writeLocation;
  1419. File F_gene_contents = new File(f_gene_contents);
  1420. try{
  1421. if(F_gene_contents.createNewFile()){System.out.println("File F_gene_contents was created");}
  1422. else{System.out.printl

Large files files are truncated, but you can click here to view the full file