/sPlot/src/Splot.java
Java | 1750 lines | 1399 code | 252 blank | 99 comment | 325 complexity | 704c123d7ac196e27bc786bb083d59ca MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- import java.beans.PropertyChangeSupport;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.PrintWriter;
- import java.text.DateFormat;
- import java.text.SimpleDateFormat;
- import java.util.Calendar;
-
- import javax.swing.JOptionPane;
-
- public class Splot {
- //Input
- private int w_size; //window size
- private int nmer_size;
- private boolean use_complementary;
- //Parameters
- private int num_possible_nmers;
- private float min_percent_nt_in_w = (float)0.1;
- private int min_num_nmers_in_w=50;
-
- //Output
- private int len_X;
- private int len_Y;
-
- private char[][] seq_inX_w; //Holds all X window sequences [# of windows][sequence for each window]
- private int w_num_X; //Number of windows for X sequence
-
- private char[][] seq_inY_w; //Holds all Y window sequences [# of windows][sequence for each window]
- private int w_num_Y; //Number of windows for Y sequence
-
- private static String[] X_geneSynonyms;
- private static String[] Y_geneSynonyms;
-
- private float[][] score;
- private int[] hisPlot;
- private float[] avgCorrX;
- private float[] avgCorrY;
- private float[] stdev_pearCorrX;
- private float[] stdev_pearCorrY;
-
- private boolean[] valid_X;
- private boolean[] valid_Y;
-
- private float[][] nmer_counts_X;
- private float[][] nmer_counts_Y;
-
- private float[] w_meanVals_X;
- private float[] w_meanVals_Y;
-
- private float[] X_square;
- private float[] Y_square;
- private static String X_header;
- private static String Y_header;
-
- //FileWriters
- private PrintWriter writeGC;
-
- //Sean's stuff
- private String args[];
- public final PropertyChangeSupport pcs = new PropertyChangeSupport( this );
- private boolean process = false;
-
- // Some OO changes
- public Splot(String[] args_i){
- /*
- * String[] args
- * 0 - Sequence X
- * 1 - Sequence Y
- * 2 - nmer_size
- * 3 - w_size //VALUE=-1 IF USING GENE
- * 4 - use_complementary
- * 5 - gene file X
- * 6 - gene file Y
- * 7 - Dir for scoreMatrix file
- * 8 - Dir for avg+stdev file
- * 9 - Dir for window_GC_content file
- * 10 - Dir for maxCorr_w_pairs file
- * 11 - Dir for geneScoreMatrix file
- * 12 - Dir for geneAvgStdev file
- * 13 - Dir for gene_GC_content file
- * 14 - Dir for maxCorr_gene_pairs file
- *
- */
-
- this.args = args_i;
- }
-
- public void init(){
- System.err.println("heLL!");
- int one=1;
- nmer_size = Integer.parseInt(args[2]);
- w_size = Integer.parseInt(args[3]);
- if(w_size!=-1)
- {
- parse_X(args[0]);
- if(process) return;
- parse_Y(args[1]);
- if(process) return;
-
- use_complementary = Boolean.valueOf(args[4]);
- score = new float[w_num_X][w_num_Y];
- valid_X = new boolean[w_num_X];
- valid_Y = new boolean[w_num_Y];
-
- num_possible_nmers = one<<(nmer_size*2);
- nmer_counts_X = new float[w_num_X][num_possible_nmers];
- nmer_counts_Y = new float[w_num_Y][num_possible_nmers];
-
- w_meanVals_X = new float[w_num_X];
- w_meanVals_Y = new float[w_num_Y];
- X_square = new float[w_num_X];
- Y_square = new float[w_num_Y];
-
- compute();
-
- //writeScoreMatrix(args[7]);
- //write_Avg_Stdev(args[8]);
- //write_GC_w_content(args[9]);
- //write_max_w_Corr(args[10]);
- }
- else
- {
- Prepare_Sequences_var_window_X(args[0], args[5]);
- if(process) return;
- Prepare_Sequences_var_window_Y(args[1], args[6]);
- if(process) return;
- use_complementary = Boolean.valueOf(args[4]);
-
- score = new float[w_num_X][w_num_Y]; //508952576 33030144
- valid_X = new boolean[w_num_X]; //C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\NC_009925.fna
- valid_Y = new boolean[w_num_Y];
-
- // TEST WITH N-MER SIZE 3
- num_possible_nmers = one<<(nmer_size*2);
- nmer_counts_X = new float[w_num_X][num_possible_nmers];
- nmer_counts_Y = new float[w_num_Y][num_possible_nmers];
- w_meanVals_X = new float[w_num_X];
- w_meanVals_Y = new float[w_num_Y];
- X_square = new float[w_num_X];
- Y_square = new float[w_num_Y];
-
- S_Plot_Correlation_var_windows();
- //writeGeneMatrix(args[11]);
- //write_Gene_Avg_Stdev(args[12]);
- //write_GC_gene_content(args[13]);
- //write_max_gene_Corr(args[14]);
- }
- }
-
- public void parse_X(String xSquence){
- File f_X = new File(xSquence);
- BufferedReader in = getReader(f_X);
-
- StringBuilder sb = new StringBuilder();
- String line = getLine(in);
- while (line!=null) {
- line = getLine(in);
- if(line!=null){
- len_X += (long)line.length();
- sb.append(line);
- }
- }
- StringBuilder sb2 = sb;
-
- w_num_X=len_X/w_size;
- if(((len_X%w_size)/(float)w_size)>=min_percent_nt_in_w) w_num_X++;
- seq_inX_w = new char[w_num_X][w_size+1];
-
- int k = 0;
- int j = 0;
- for(int i=0; i<w_num_X; i++){
- for(j=0; j<w_size; j++){
- seq_inX_w[i][j] = sb2.charAt(k);
- if(k<(len_X-1)){k++;}
- else{break;}
- }
- seq_inX_w[i][j]='\0';
- }
- }
- public void parse_Y(String ySequence){
- File f_Y = new File(ySequence);
- BufferedReader in = getReader(f_Y);
-
- StringBuilder sb = new StringBuilder();
- String line = getLine(in);
- while (line!=null) {
- line = getLine(in);
- if(line!=null){
- len_Y += (long)line.length();
- sb.append(line);
- }
- }
- StringBuilder sb2 = sb;
-
- w_num_Y=len_Y/w_size;
- if(((len_Y%w_size)/(float)w_size)>=min_percent_nt_in_w) w_num_Y++;
- seq_inY_w = new char[w_num_Y][w_size+1];
-
- int k = 0;
- int j = 0;
- for(int i=0; i<w_num_Y; i++){
- for(j=0; j<w_size; j++){
- seq_inY_w[i][j] = sb2.charAt(k);
- if(k<(len_Y-1)){k++;}
- else{break;}
- }
- seq_inY_w[i][j]='\0';
- }
- }
- public void getTime() {
- Calendar cal = Calendar.getInstance();
- SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SS");
- System.out.println(sdf.format(cal.getTime()));
- }
- public void compute(){
- int i, j;
- int n_x, n_y;
- char c;
- String sNmer = new String();
- boolean okay = true;
-
- /*
- Sean
- */
- frame.progressBar.setStringPainted(true);
- frame.progressBar.setMinimum(0);
- frame.progressBar.setMaximum(w_num_Y+w_num_X+w_num_X);
-
- ///---------------------------X
-
- for(n_x=0;n_x<w_num_X;n_x++){
- i=-1;
- for(;;){
- i++;
- if(seq_inX_w[n_x][i]=='\0') {break;}
- c=seq_inX_w[n_x][i];
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else continue;
-
-
- for(j=1;j<nmer_size;j++){
- i++;
- if(seq_inX_w[n_x][i]=='\0'){break;}
- c=seq_inX_w[n_x][i];
-
- okay=true;
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else okay=false;
-
- if(!okay){j=nmer_size;}
-
- } //first nmer found
-
- if(okay){
- nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
- }
-
- if(use_complementary&&okay){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
-
- for(;;){
- i++;
- if(seq_inX_w[n_x][i]=='\0') {
- i--;
- break; }
-
- c=seq_inX_w[n_x][i];
- if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
- else break;
-
-
- nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
- if(use_complementary){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
-
- }
-
- sNmer = new String();
-
- }
-
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){
- w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i]; }
-
- if(w_meanVals_X[n_x]<min_num_nmers_in_w) {valid_X[n_x]=true; continue;}
- if(w_meanVals_X[n_x]/i<min_percent_nt_in_w) {valid_X[n_x]=true; continue;}
-
- w_meanVals_X[n_x] = w_meanVals_X[n_x]/num_possible_nmers;
-
- pcs.firePropertyChange("progress", null, n_x); // progressbar stuff
-
- }//end window loop
-
- ///---------------------Y
- sNmer = new String();
-
- for(n_y=0;n_y<w_num_Y;n_y++){
- i=-1;
- for(;;){
- i++;
- if(seq_inY_w[n_y][i]=='\0'){break;}
- c=seq_inY_w[n_y][i];
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else continue;
-
-
- for(j=1;j<nmer_size;j++){
- i++;
- if(seq_inY_w[n_y][i]=='\0')break;
- c=seq_inY_w[n_y][i];
-
- okay=true;
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else okay=false;
-
- if(!okay){j=nmer_size;}
-
- }//found first nmer
-
- if(okay){
- nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
- }
- if(use_complementary&&okay){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
-
- for(;;){
- i++;
- if(seq_inY_w[n_y][i]=='\0') {
- i--;
- break; }
-
- c=seq_inY_w[n_y][i];
- if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
- else break;
-
- nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
- if(use_complementary){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
-
- }
-
- sNmer = new String();
-
- }
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];}
-
- if(w_meanVals_Y[n_y]<min_num_nmers_in_w){valid_Y[n_y]=true; continue;}
- if(w_meanVals_Y[n_y]/i<min_percent_nt_in_w){valid_Y[n_y]=true; continue;}
-
- w_meanVals_Y[n_y] = w_meanVals_Y[n_y]/num_possible_nmers;
-
- pcs.firePropertyChange("progress", null, (w_num_X + n_y)); // progressbar
-
- }//end of window loop
-
- ///Computations-----------X
- for(n_x=0;n_x<w_num_X;n_x++) // for each window on X axis loop
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- if(valid_X[n_x]) continue;
- nmer_counts_X[n_x][nmer_i]=nmer_counts_X[n_x][nmer_i]-w_meanVals_X[n_x];
- X_square[n_x]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_X[n_x][nmer_i];
- }
-
- ///Computations------------Y
- for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- if(valid_Y[n_y]) continue;
- nmer_counts_Y[n_y][nmer_i]=nmer_counts_Y[n_y][nmer_i]-w_meanVals_Y[n_y];
- Y_square[n_y]+=nmer_counts_Y[n_y][nmer_i]*nmer_counts_Y[n_y][nmer_i];
- }
- //Computations------------Score
- float minScore=0;
- boolean firstTime=true;
- for(n_x=0;n_x<w_num_X;n_x++){ // for each window on X axis loop
- for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
- {
- if(valid_X[n_x]){continue;}
- if(valid_Y[n_y]){continue;}
-
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- score[n_x][n_y]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_Y[n_y][nmer_i];
-
- score[n_x][n_y]=score[n_x][n_y]/(float)Math.sqrt((X_square[n_x]*Y_square[n_y]));
-
- if(firstTime){minScore=score[n_x][n_y]; firstTime=false;}
- if(score[n_x][n_y]<minScore){minScore=score[n_x][n_y];}
-
- }
-
- pcs.firePropertyChange("progress", null, (w_num_X + w_num_Y + n_x)); // progressbar
-
- }
-
- //-----------------------Clear memory
- valid_X = null;
- valid_Y = null;
- nmer_counts_X = null;
- nmer_counts_Y = null;
- w_meanVals_X = null;
- w_meanVals_Y = null;
- X_square = null;
- Y_square = null;
-
- }
-
- public void Prepare_Sequences_var_window_X(String xSequence, String xGenes)
- {
- int i,j,k,l,m;
- boolean overOrigin=false;
-
- //genes for sequence x
- File f_g = new File(xGenes);
- BufferedReader in = getReader(f_g);
-
- String line = getLine(in); //gets the first line
- line=getLine(in); //gets the number of genes + word protein
- String [] result = line.split(" ");
- w_num_X=Integer.parseInt(result[0]);
-
- int[][] w_pos=new int [w_num_X][2];
- X_geneSynonyms = new String [w_num_X];
- boolean[] strand=new boolean [w_num_X];
- line=getLine(in); //gets the column headings
- for(i=0; i<w_num_X; i++)
- {
- line=getLine(in); //gets a gene
- j=line.indexOf("."); //location of end of start
- k=line.indexOf("\t"); //location of end of end
- l=line.indexOf("\t",k+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- m=line.lastIndexOf("\t",l-1);
- result[0]=line.substring(0,j);
- w_pos[i][0]=Integer.parseInt(result[0]);
- w_pos[i][0]--;
- result[0]=line.substring(j+2,k);
- w_pos[i][1]=Integer.parseInt(result[0]);
- w_pos[i][1]--;
- result[0]=line.substring(k+1,k+2);
- if(result[0]=="+") strand[i]=true;
- else strand[i]=false;
- X_geneSynonyms[i]=line.substring(m+1,l);
- }
-
- //sequence x
- File f_s = new File(xSequence);
- in = getReader(f_s);
-
- StringBuilder sb = new StringBuilder();
- X_header = getLine(in);
- while(line!=null)
- {
- line=getLine(in);
- if(line!=null)
- {
- len_X += (long)line.length();
- sb.append(line);
- }
- }
- StringBuilder sb2 = sb;
-
- //set up arrays
- seq_inX_w=new char [w_num_X][];
-
- for(i=0; i<w_num_X; i++)
- {
- if(w_pos[i][1]>=w_pos[i][0])
- {
- seq_inX_w[i]=new char[(w_pos[i][1]-w_pos[i][0]+2)]; //why add two?
- overOrigin=false;
- }
- else
- {
- seq_inX_w[i]=new char[(len_X-w_pos[i][0])+w_pos[i][1]+2]; //changed from (length-end)+start to (length-start)+end! -vinnie
- overOrigin=true;
- }
- }
- for(i=0; i<w_num_X; i++)
- {
- if(strand[i]) //original strand
- {
- if(overOrigin==false)
- {
- for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++){
- seq_inX_w[i][j]=sb2.charAt(w_pos[i][0]+j);
- }
- seq_inX_w[i][j]='\0';
- }
- else
- {
- k=0;
- for(j=w_pos[i][0]; j<len_X; j++)
- {
- seq_inX_w[i][k]=sb2.charAt(j);
- k++;
- }
- for(j=0; j<=w_pos[i][1]; j++)
- {
- seq_inX_w[i][k]=sb2.charAt(j);
- k++;
- }
- seq_inX_w[i][k]='\0';
- }
- }
- else //complementary strand
- {
- if(overOrigin==false)
- {
- for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++)
- {
- seq_inX_w[i][j]=sb2.charAt(w_pos[i][1]-w_pos[i][0]-j);
- switch(seq_inX_w[i][j]){
- case 'a': case 'A': seq_inX_w[i][j]='T'; break;
- case 't': case 'T': seq_inX_w[i][j]='A'; break;
- case 'g': case 'G': seq_inX_w[i][j]='C'; break;
- case 'c': case 'C': seq_inX_w[i][j]='G'; break;
- default: seq_inX_w[i][j]='N';
- }
- }
- seq_inX_w[i][j]='\0';
- }
- else
- {
- k=0;
- j=w_pos[i][1];
- while(j>=0)
- {
- seq_inX_w[i][k]= sb2.charAt(j);
- j--;
- switch(seq_inX_w[i][k]){
- case 'a': case 'A': seq_inX_w[i][k]='T'; break;
- case 't': case 'T': seq_inX_w[i][k]='A'; break;
- case 'g': case 'G': seq_inX_w[i][k]='C'; break;
- case 'c': case 'C': seq_inX_w[i][k]='G'; break;
- default: seq_inX_w[i][k]='N';
- }
- k++;
- }
- for(j=len_X-1; j>=w_pos[i][0]; j--)
- {
- seq_inX_w[i][k]=sb2.charAt(j);
- switch(seq_inX_w[i][k]){
- case 'a': case 'A': seq_inX_w[i][k]='T'; break;
- case 't': case 'T': seq_inX_w[i][k]='A'; break;
- case 'g': case 'G': seq_inX_w[i][k]='C'; break;
- case 'c': case 'C': seq_inX_w[i][k]='G'; break;
- default: seq_inX_w[i][k]='N';
- }
- k++;
- }
- seq_inX_w[i][k]='\0';
- }
- }
- }
- }
- public void Prepare_Sequences_var_window_Y(String ySequence, String yGenes)
- {
- int i,j,k,l,m;
- boolean overOrigin=false;
-
- //genes for sequence y
- File f_g = new File(yGenes);
- BufferedReader in = getReader(f_g);
-
- String line = getLine(in); //gets the first line
- line=getLine(in); //gets the number of genes + word protein
- String [] result = line.split(" ");
- w_num_Y=Integer.parseInt(result[0]);
-
- int[][] w_pos=new int [w_num_Y][2];
- boolean[] strand=new boolean [w_num_Y];
- Y_geneSynonyms = new String [w_num_Y];
-
- line=getLine(in); //get the column headings
- for(i=0; i<w_num_Y; i++)
- {
- line=getLine(in); //gets a gene
- j=line.indexOf("."); //location of end of start
- k=line.indexOf("\t"); //location of end of end
- l=line.indexOf("\t",k+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- l=line.indexOf("\t",l+1);
- m=line.lastIndexOf("\t",l-1);
- result[0]=line.substring(0,j);
- w_pos[i][0]=Integer.parseInt(result[0]);
- w_pos[i][0]--;
- result[0]=line.substring(j+2,k);
- w_pos[i][1]=Integer.parseInt(result[0]);
- w_pos[i][1]--;
- result[0]=line.substring(k+1,k+2);
- if(result[0]=="+") strand[i]=true;
- else strand[i]=false;
- Y_geneSynonyms[i]=line.substring(m+1,l);
- }
-
- //sequence y
- File f_s = new File(ySequence);
- in = getReader(f_s);
-
- StringBuilder sb = new StringBuilder();
- Y_header = getLine(in);
- while(line!=null)
- {
- line=getLine(in);
- if(line!=null)
- {
- len_Y += (long)line.length();
- sb.append(line);
- }
- }
- StringBuilder sb2 = sb;
-
- //set up the array ready to read in
- seq_inY_w=new char [w_num_Y][];
- for(i=0; i<w_num_Y; i++)
- {
- if(w_pos[i][1]>=w_pos[i][0])
- {
- seq_inY_w[i]=new char[(w_pos[i][1]-w_pos[i][0]+2)];
- overOrigin=false;
- }
- else
- {
- seq_inY_w[i]=new char[(len_Y-w_pos[i][0])+w_pos[i][1]+2];
- overOrigin=true;
- }
- }
- for(i=0; i<w_num_Y; i++)
- {
- if(strand[i]) //original strand
- {
- if(overOrigin==false)
- {
- for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++){
- seq_inY_w[i][j]=sb2.charAt(w_pos[i][0]+j);
- }
- seq_inY_w[i][j]='\0';
- }
- else
- {
- k=0;
- for(j=w_pos[i][0]; j<len_Y; j++)
- {
- seq_inY_w[i][k]=sb2.charAt(j);
- k++;
- }
- for(j=0; j<=w_pos[i][1]; j++)
- {
- seq_inY_w[i][k]=sb2.charAt(j);
- k++;
- }
- seq_inY_w[i][k]='\0';
- }
- }
- else //complementary strand
- {
- if(overOrigin==false)
- {
- for(j=0; j<w_pos[i][1]-w_pos[i][0]+1; j++)
- {
- seq_inY_w[i][j]=sb2.charAt(w_pos[i][1]-w_pos[i][0]-j);
- switch(seq_inY_w[i][j]){
- case 'a': case 'A': seq_inY_w[i][j]='T'; break;
- case 't': case 'T': seq_inY_w[i][j]='A'; break;
- case 'g': case 'G': seq_inY_w[i][j]='C'; break;
- case 'c': case 'C': seq_inY_w[i][j]='G'; break;
- default: seq_inY_w[i][j]='N';
- }
- }
- seq_inY_w[i][j]='\0';
- }
- else
- {
- k=0;
- j=w_pos[i][1];
-
- while(j>=0)
- {
- seq_inY_w[i][k]= sb2.charAt(j);
- j--;
- switch(seq_inY_w[i][k]){
- case 'a': case 'A': seq_inY_w[i][k]='T'; break;
- case 't': case 'T': seq_inY_w[i][k]='A'; break;
- case 'g': case 'G': seq_inY_w[i][k]='C'; break;
- case 'c': case 'C': seq_inY_w[i][k]='G'; break;
- default: seq_inY_w[i][k]='N';
- }
- k++;
- }
- for(j=len_Y-1; j>=w_pos[i][0]; j--)
- {
- seq_inY_w[i][k]=sb2.charAt(j);
- switch(seq_inY_w[i][k]){
- case 'a': case 'A': seq_inY_w[i][k]='T'; break;
- case 't': case 'T': seq_inY_w[i][k]='A'; break;
- case 'g': case 'G': seq_inY_w[i][k]='C'; break;
- case 'c': case 'C': seq_inY_w[i][k]='G'; break;
- default: seq_inY_w[i][k]='N';
- }
- k++;
- }
- seq_inY_w[i][k]='\0';
- }
- }
- }
- }
- public void S_Plot_Correlation_var_windows()
- {
- int i, j;
- int n_x, n_y;
- char c;
- String sNmer = new String();
- boolean okay = true;
-
- frame.progressBar.setStringPainted(true);
- frame.progressBar.setMinimum(0);
- frame.progressBar.setMaximum(w_num_Y+w_num_X+w_num_X);
-
- ///---------------------------X
- for(n_x=0;n_x<w_num_X;n_x++){
- i=-1;
- for(;;){
- i++;
- if(seq_inX_w[n_x][i]=='\0') {break;}
- c=seq_inX_w[n_x][i];
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else continue;
-
-
- for(j=1;j<nmer_size;j++){
- i++;
- if(seq_inX_w[n_x][i]=='\0'){break;}
- c=seq_inX_w[n_x][i];
-
- okay=true;
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else okay=false;
-
- if(!okay){j=nmer_size;}
-
- } //first nmer found
-
- if(okay){
- nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
- }
-
- if(use_complementary&&okay){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
-
- for(;;){
- i++;
- if(seq_inX_w[n_x][i]=='\0') {
- i--;
- break; }
-
- c=seq_inX_w[n_x][i];
- if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
- else break;
-
- nmer_counts_X[n_x][Integer.parseInt(sNmer,2)]++;
- if(use_complementary){nmer_counts_X[n_x][Integer.parseInt(giveR_ComplementX(n_x,i),2)]++;}
- }
- sNmer = new String();
- }
-
- //validation checks
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i];}
- if(w_meanVals_X[n_x]<min_num_nmers_in_w) {valid_X[n_x]=true; continue;}
- if(w_meanVals_X[n_x]/i<min_percent_nt_in_w) {valid_X[n_x]=true; continue;}
-
- //take % because other variable window may be significantly different size
- w_meanVals_X[n_x]=0;
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- nmer_counts_X[n_x][nmer_i]=(nmer_counts_X[n_x][nmer_i]/(float)i);
- w_meanVals_X[n_x]+=nmer_counts_X[n_x][nmer_i];
- }
- w_meanVals_X[n_x]=w_meanVals_X[n_x]/num_possible_nmers;
-
- pcs.firePropertyChange("progress", null, n_x); // progressbar stuff
-
- }//end window loop
-
- ///---------------------Y
- sNmer = new String();
- for(n_y=0;n_y<w_num_Y;n_y++){
- i=-1;
- for(;;){
- i++;
- if(seq_inY_w[n_y][i]=='\0'){break;}
- c=seq_inY_w[n_y][i];
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else continue;
-
-
- for(j=1;j<nmer_size;j++){
- i++;
- if(seq_inY_w[n_y][i]=='\0')break;
- c=seq_inY_w[n_y][i];
-
- okay=true;
- if(c=='a'||c=='A'){sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer+="11";}
- else okay=false;
-
- if(!okay){j=nmer_size;}
-
- }//found first nmer
-
- if(okay){
- nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
- }
- if(use_complementary&&okay){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
-
- for(;;){
- i++;
- if(seq_inY_w[n_y][i]=='\0') {
- i--;
- break; }
-
- c=seq_inY_w[n_y][i];
- if(c=='a'||c=='A'){sNmer=sNmer.substring(2);sNmer+="00";}
- else if(c=='t'||c=='T'){sNmer=sNmer.substring(2);sNmer+="01";}
- else if(c=='g'||c=='G'){sNmer=sNmer.substring(2);sNmer+="10";}
- else if(c=='c'||c=='C'){sNmer=sNmer.substring(2);sNmer+="11";}
- else break;
-
- nmer_counts_Y[n_y][Integer.parseInt(sNmer,2)]++;
- if(use_complementary){nmer_counts_Y[n_y][Integer.parseInt(giveR_ComplementY(n_y,i),2)]++;}
-
- }
-
- sNmer = new String();
-
- }
- //validation
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++){w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];}
- if(w_meanVals_Y[n_y]<min_num_nmers_in_w){valid_Y[n_y]=true; continue;}
- if(w_meanVals_Y[n_y]/i<min_percent_nt_in_w){valid_Y[n_y]=true; continue;}
-
- //take % because other variable window may be significantly different size
- w_meanVals_Y[n_y]=0;
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- nmer_counts_Y[n_y][nmer_i]=(nmer_counts_Y[n_y][nmer_i]/(float)i);
- w_meanVals_Y[n_y]+=nmer_counts_Y[n_y][nmer_i];
- }
- w_meanVals_Y[n_y] = w_meanVals_Y[n_y]/num_possible_nmers;
-
- pcs.firePropertyChange("progress", null, (w_num_X + n_y)); // progressbar
-
- }//end of window loop
-
- ///Computations-----------X
- for(n_x=0;n_x<w_num_X;n_x++) // for each window on X axis loop
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- if(valid_X[n_x]) continue;
- nmer_counts_X[n_x][nmer_i]=nmer_counts_X[n_x][nmer_i]-w_meanVals_X[n_x];
- X_square[n_x]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_X[n_x][nmer_i];
- }
-
- ///Computations------------Y
- for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- {
- if(valid_Y[n_y]) continue;
- nmer_counts_Y[n_y][nmer_i]=nmer_counts_Y[n_y][nmer_i]-w_meanVals_Y[n_y];
- Y_square[n_y]+=nmer_counts_Y[n_y][nmer_i]*nmer_counts_Y[n_y][nmer_i];
- }
-
-
- //Computations------------Score
- for(n_x=0;n_x<w_num_X;n_x++){ // for each window on X axis loop
- for(n_y=0;n_y<w_num_Y;n_y++) // for each window on Y axis loop
- {
- if(valid_X[n_x]){continue;}
- if(valid_Y[n_y]){continue;}
-
- for(int nmer_i=0;nmer_i<num_possible_nmers;nmer_i++)
- score[n_x][n_y]+=nmer_counts_X[n_x][nmer_i]*nmer_counts_Y[n_y][nmer_i];
-
- score[n_x][n_y]=score[n_x][n_y]/(float)Math.sqrt((X_square[n_x]*Y_square[n_y]));
- }
-
- pcs.firePropertyChange("progress", null, (w_num_X + w_num_Y + n_x)); // progressbar
-
- }
-
- //-----------------------Clear memory
- valid_X = null;
- valid_Y = null;
- nmer_counts_X = null;
- nmer_counts_Y = null;
- w_meanVals_X = null;
- w_meanVals_Y = null;
- X_square = null;
- Y_square = null;
-
- }
-
- public void writeScoreMatrix(String writeLocation){
- String f_score = writeLocation;
- File F_score = new File(f_score);
- try{
- if(F_score.createNewFile()){System.out.println("File F_score was created");}
- else{System.out.println("File F_score was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
-
- PrintWriter out = openWriter(F_score);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Pearson Correlation Values");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Window Size:\t" + w_size);
- out.println("Nmer Size:\t" + nmer_size);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- out.print("\t0");
- for(int p=1;p<w_num_Y;p++){out.print("\t"+p);}
- out.println();
-
- for(int k=0; k<w_num_X; k++){
- StringBuilder sb2 = new StringBuilder();
- sb2.append(k);
- for(int l=0;l<w_num_Y;l++){
- String v = "\t" + score[k][l];
- sb2.append(v);
- if(l==(w_num_Y-1)){out.println(sb2);}
- }
- }
- out.close();
- }
- public void writeGeneMatrix(String writeLocation){
- String f_geneMatrix = writeLocation;
- File F_geneMatrix = new File(f_geneMatrix);
- try{
- if(F_geneMatrix.createNewFile()){System.out.println("File F_geneMatrix was created");}
- else{System.out.println("File F_geneMatrix was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: writeGeneMatrix");}
-
- PrintWriter out = openWriter(F_geneMatrix);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Pearson Correlation Gene Values");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Nmer Size:\t3");
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- String syn;
- for(int p=0;p<w_num_Y;p++){
- syn = Y_geneSynonyms[p];
- out.print("\t"+syn);
- }
- out.println();
-
- for(int k=0; k<w_num_X; k++){
- StringBuilder sb2 = new StringBuilder();
- syn = X_geneSynonyms[k];
- sb2.append(syn);
- for(int l=0;l<w_num_Y;l++){
- String v = "\t" + score[k][l];
- sb2.append(v);
- if(l==(w_num_Y-1)){out.println(sb2);}
- }
- }
- out.close();
- }
- public void write_Avg_Stdev(String writeLocation){
- calcAvg_window_Corr();
- stdev_pearCorrX = new float [w_num_X];
- stdev_pearCorrY = new float [w_num_Y];
-
- String f_avg_stdev = writeLocation;
- File F_avg_stdev = new File(f_avg_stdev);
- try{
- if(F_avg_stdev.createNewFile()){System.out.println("File F_avg_stdev was created");}
- else{System.out.println("File F_avg_stdev was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: write_Avg_Stdev");}
-
- PrintWriter out = openWriter(F_avg_stdev);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Average and Standard Deviations");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Window Size:\t" + w_size);
- out.println("Nmer Size:\t" + nmer_size);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- out.println("Sequence X:");
- out.println("Window\tAvg\tStdev");
-
- float sum_of_the_difference=0;
- for(int i=0;i<w_num_X;i++){
- for(int j=0;j<w_num_Y;j++){
- sum_of_the_difference+=Math.abs(score[i][j]-avgCorrX[i]);
- }
- stdev_pearCorrX[i]=sum_of_the_difference/(float)w_num_Y;
- sum_of_the_difference=0;
- }
-
- sum_of_the_difference=0;
- for(int i=0;i<w_num_Y;i++){
- for(int j=0;j<w_num_X;j++){
- sum_of_the_difference+=Math.abs(score[j][i]-avgCorrY[i]);
- }
- stdev_pearCorrY[i]=sum_of_the_difference/(float)w_num_X;
- sum_of_the_difference=0;
- }
-
- for(int i=0;i<w_num_X;i++){
- out.println(i+"\t"+avgCorrX[i]+"\t"+stdev_pearCorrX[i]);
- }
-
- out.println();
- out.println();
- out.println("Sequence Y:");
- out.println("Window\tAvg\tStdev");
- for(int i=0;i<w_num_Y;i++){
- out.println(i+"\t"+avgCorrY[i]+"\t"+stdev_pearCorrY[i]);
- }
-
- avgCorrX = null;
- avgCorrY = null;
- stdev_pearCorrX = null;
- stdev_pearCorrY = null;
- out.close();
- }
- public void write_Gene_Avg_Stdev(String writeLocation){
- calcAvg_window_Corr();
- String f_gene_avg_stdev = writeLocation;
- File F_gene_avg_stdev = new File(f_gene_avg_stdev);
- try{
- if(F_gene_avg_stdev.createNewFile()){System.out.println("File F_gene_avg_stdev was created");}
- else{System.out.println("File F_gene_avg_stdev was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: write_Gene_Avg_Stdev");}
-
- PrintWriter out = openWriter(F_gene_avg_stdev);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Gene Average and Standard Deviations");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Nmer Size:\t" + 3);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- out.println("Sequence X:");
- out.println("Gene\tAvg\tStdev");
-
- //finding stdev of each window in Sequence X
- float sum_of_the_difference=0;
- for(int i=0;i<w_num_X;i++){
- for(int j=0;j<w_num_Y;i++){
- sum_of_the_difference+=Math.abs(score[i][j]-avgCorrX[i]);
- }
- stdev_pearCorrX[i]=sum_of_the_difference/(float)w_num_Y;
- sum_of_the_difference=0;
- }
-
- //finding stdev of each window in Sequence Y
- sum_of_the_difference=0;
- for(int i=0;i<w_num_Y;i++){
- for(int j=0;j<w_num_X;i++){
- sum_of_the_difference+=Math.abs(score[j][i]-avgCorrY[i]);
- }
- stdev_pearCorrY[i]=sum_of_the_difference/(float)w_num_X;
- sum_of_the_difference=0;
- }
-
- //writing out avg and stdev for each window in sequence X
- for(int i=0;i<w_num_X;i++){
- out.println(X_geneSynonyms[i]+"\t"+avgCorrX[i]+"\t"+stdev_pearCorrX[i]);
- }
-
- out.println();
- out.println();
- out.println("Sequence Y:");
- out.println("Gene\tAvg\tStdev");
-
- //writing out avg and stdev for each window in sequence Y
- for(int i=0;i<w_num_Y;i++){
- out.println(Y_geneSynonyms[i]+"\t"+avgCorrY[i]+"\t"+stdev_pearCorrY[i]);
- }
- out.println();
-
- avgCorrX = null;
- avgCorrY = null;
- stdev_pearCorrX = null;
- stdev_pearCorrY = null;
- out.close();
- }
- public void write_GC_w_content(String writeLocation){
- String f_w_GC_content = writeLocation;
- File F_w_GC_content = new File(f_w_GC_content);
- try{
- if(F_w_GC_content.createNewFile()){System.out.println("File F_w_GC_content was created");}
- else{System.out.println("File F_w_GC_content was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: write_GC_w_content");}
-
- PrintWriter out = openWriter(F_w_GC_content);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("G+C window content");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Window size:\t"+w_size);
- out.println("Nmer Size:\t" + nmer_size);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
- out.println("Sequence X:");
- out.println("Window\tG+C");
-
- //writing out the GC content of each window in sequence X
- for(int i=0;i<w_num_X;i++){
- out.println(i+"\t"+calcGC(i,true));
- }
-
- out.println();
- out.println();
- out.println("Sequence Y:");
- out.println("Window\tG+C");
-
- //writing out the GC content of each window in sequence Y
- for(int i=0;i<w_num_Y;i++){
- out.println(i+"\t"+calcGC(i,false));
- }
- out.println();
- out.close();
- }
- public void write_GC_gene_content(String writeLocation){
-
- String f_gene_GC_content = writeLocation;
- File F_gene_GC_content = new File(f_gene_GC_content);
-
- try{
- if(F_gene_GC_content.createNewFile()){System.out.println("File F_gene_GC_content was created");}
- else{System.out.println("File F_gene_GC_content was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: write_GC_gene_content");}
-
- PrintWriter out = openWriter(F_gene_GC_content);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("G+C gene content");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Nmer Size:\t" + 3);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
- out.println("Sequence X:");
- out.println("Gene\tG+C");
-
- //writing out the GC content of each gene in sequence X
- for(int i=0;i<w_num_X;i++){
- out.println(X_geneSynonyms[i]+"\t"+calcGC(i,true));
- }
-
- out.println();
- out.println();
- out.println("Sequence Y:");
- out.println("Gene\tG+C");
-
- //writing out the GC content of each gene in sequence Y
- for(int i=0;i<w_num_Y;i++){
- out.println(Y_geneSynonyms[i]+"\t"+calcGC(i,false));
- }
- out.println();
- out.close();
- }
- public void write_max_w_Corr(String writeLocation){
- String f_maxCorrs = writeLocation;
- File F_maxCorrs = new File(f_maxCorrs);
- try{
- if(F_maxCorrs.createNewFile()){System.out.println("File F_maxCorrs was created");}
- else{System.out.println("File F_maxCorrs was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
-
- PrintWriter out = openWriter(F_maxCorrs);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Highest Correlated Windows");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Window Size:\t" + w_size);
- out.println("Nmer Size:\t" + nmer_size);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- float maxScore=score[0][0];
- int max_Ypos=0,max_Xpos=0;
-
- //writing out the highest correlated window in Y for every window in X
- out.println("Sequence X:");
- out.println("window in X\tValue\twindow in Y");
- for(int k=0; k<w_num_X; k++){
- for(int l=0;l<w_num_Y;l++){
- if(score[k][l]>maxScore){
- maxScore=score[k][l];
- max_Ypos=l;
- }
- }
- out.println(k+"\t"+score[k][max_Ypos]+"\t"+max_Ypos);
- if((k+1)!=w_num_X){maxScore=score[k+1][0];}
- }
- maxScore = score[0][0];
-
- out.println();
- out.println();
-
- //writing out the highest correlated window in X for every window in Y
- out.println("Sequence Y:");
- out.println("window in Y\tValue\twindow in X");
- for(int k=0; k<w_num_Y; k++){
- for(int l=0;l<w_num_X;l++){
- if(score[l][k]>maxScore){
- maxScore=score[l][k];
- max_Xpos=l;
- }
- }
- out.println(k+"\t"+score[max_Xpos][k]+"\t"+max_Xpos);
- if((k+1)!=w_num_Y){maxScore=score[0][k+1];}
- }
- out.close();
-
- }
- public void write_max_gene_Corr(String writeLocation){
- String f_maxCorrs = writeLocation;
- File F_maxCorrs = new File(f_maxCorrs);
- try{
- if(F_maxCorrs.createNewFile()){System.out.println("File F_maxCorrs was created");}
- else{System.out.println("File F_maxCorrs was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: writeScoreMatrix");}
-
- PrintWriter out = openWriter(F_maxCorrs);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Highest Correlated Genes");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Nmer Size:\t" + 3);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- float maxScore=score[0][0];
- int max_Ypos=0,max_Xpos=0;
-
- //writing out the highest correlated window in Y for every window in X
- out.println("Sequence X:");
- out.println("window in X\tValue\twindow in Y");
- for(int k=0; k<w_num_X; k++){
- for(int l=0;l<w_num_Y;l++){
- if(score[k][l]>maxScore){
- maxScore=score[k][l];
- max_Ypos=l;
- }
- }
- out.println(X_geneSynonyms[k]+"\t"+score[k][max_Ypos]+"\t"+Y_geneSynonyms[max_Ypos]);
- if((k+1)!=w_num_X){maxScore=score[k+1][0];}
- }
- maxScore = score[0][0];
-
- out.println();
- out.println();
-
- //writing out the highest correlated window in X for every window in Y
- out.println("Sequence Y:");
- out.println("window in Y\tValue\twindow in X");
- for(int k=0; k<w_num_Y; k++){
- for(int l=0;l<w_num_X;l++){
- if(score[l][k]>maxScore){
- maxScore=score[l][k];
- max_Xpos=l;
- }
- }
- out.println(Y_geneSynonyms[k]+"\t"+score[max_Xpos][k]+"\t"+max_Xpos);
- if((k+1)!=w_num_Y){maxScore=score[0][k+1];}
- }
-
- out.close();
- X_geneSynonyms = null;
- Y_geneSynonyms = null;
- }
-
-
- public void calcAvg_window_Corr(){
- float numerator = 0;
- avgCorrX = new float [w_num_X];
- avgCorrY = new float [w_num_Y];
-
- //Calculating averages for the X sequence
- for(int k=0; k<w_num_X; k++){
- for(int l=0;l<w_num_Y;l++){
- numerator = score[k][l] + numerator;
- }
- avgCorrX[k] = numerator/(float)w_num_Y;
- numerator = 0;
- }
-
- //Calculating averages for the Y sequence
- for(int k=0; k<w_num_Y; k++){
- for(int l=0;l<w_num_X;l++){
- numerator = score[l][k] + numerator;
- }
- avgCorrY[k] = numerator/(float)w_num_X;
- numerator = 0;
- }
- }
- public float calcGC(int w_num, boolean XorY)
- {
- int gc=0;
- int i=-1;
- float perGC=0;
-
- if(XorY){ //XorY=true means your looking in the X sequence
- do{
- i++;
- if((seq_inX_w[w_num][i]=='c')||(seq_inX_w[w_num][i]=='C')||
- (seq_inX_w[w_num][i]=='g')||(seq_inX_w[w_num][i]=='G')) {gc++;}
-
- }while(seq_inX_w[w_num][i]!='\0');
-
- perGC = 100*((float)gc/(float)i);
- }//end if
-
- if(!XorY){ //XorY=false means your looking in the Y sequence
- do{
- i++;
- if((seq_inY_w[w_num][i]=='c')||(seq_inY_w[w_num][i]=='C')||
- (seq_inY_w[w_num][i]=='g')||(seq_inY_w[w_num][i]=='G')) {gc++;}
-
- }while(seq_inY_w[w_num][i]!='\0');
-
- perGC = 100*((float)gc/(float)i);
- }//end if
-
- return perGC;
- }
-
-
- /*public static void hisPlot(){
- int index;
- float maxScore=score[0][0];
-
- for(int k=0; k<w_num_X; k++){
- for(int l=0;l<w_num_Y;l++){
- if(score[k][l]>maxScore){maxScore=score[k][l];}
- }
-
- index=(int)((10-(maxScore*10))*2);
- hisPlot[index]++;
- //System.out.println((int)((10-(maxScore*10))*2));
- if((k+1)!=w_num_X){maxScore=score[k+1][0];}
- }
-
- System.out.println();
- System.out.println();
- for(int i=0;i<hisPlot.length;i++){
- System.out.println(hisPlot[i]);
- }
- }
- */
-
- public void alignMax(){
- int max_Ypos=0;
- float Xgc,Ygc;
- float maxScore=score[0][0];
-
- String path4_test_writeOut = "C:\\Documents and Settings\\NTUser\\Desktop\\FileTests\\Maxes&GC.txt";
- File writeOut_X_Y = new File(path4_test_writeOut);
- try{
- if(writeOut_X_Y.createNewFile()){System.out.println("Maxes&GC was created");}
- else{System.out.println("File Maxes&GC was not created");}
- }
- catch(IOException e){System.out.println("I/O Error");}
-
- writeGC = openWriter(writeOut_X_Y);
- writeGC.print("\t\t\t");
- //write accession
- writeGC.print("\n");
-
- for(int k=0; k<w_num_X; k++){
- for(int l=0;l<w_num_Y;l++){
- if(score[k][l]>maxScore){
- maxScore=score[k][l];
- max_Ypos=l;
- }
- }
- Xgc = calcGC(k,true);
- Ygc = calcGC(max_Ypos,false);
- writeWindowGC_Max(Xgc,Ygc,k,max_Ypos,maxScore);
- if((k+1)!=w_num_X){maxScore=score[k+1][0];}
- }
-
- System.out.println();
- System.out.println();
- for(int i=0;i<hisPlot.length;i++){
- System.out.println(hisPlot[i]);
- }
- writeGC.close();
- }
-
-
- public void writeWindowGC_Max(float GC_X, float GC_Y, int windowX, int windowY, float max){
-
- writeGC.print(GC_X+"\t"+windowX+"\t"+max+"\t"+windowY+"\t"+GC_Y+"\t\n");
- }
-
-
- public String giveR_ComplementX(int window, int nt){
-
- String reverseCNmer = new String();
- char c;
- int endNmerIndex = nt-nmer_size;
-
- for(int q=nt;q>endNmerIndex;q--){
- c=seq_inX_w[window][q];
- if(c=='a'||c=='A'){reverseCNmer+="01";}
- else if(c=='t'||c=='T'){reverseCNmer+="00";}
- else if(c=='g'||c=='G'){reverseCNmer+="11";}
- else if(c=='c'||c=='C'){reverseCNmer+="10";}
- }
- return reverseCNmer;
- }
-
- public String giveR_ComplementY(int window, int nt){
-
- String reverseCNmer = new String();
- char c;
- int endNmerIndex = nt-nmer_size;
-
- for(int q=nt;q>endNmerIndex;q--){
- c=seq_inY_w[window][q];
- if(c=='a'||c=='A'){reverseCNmer+="01";}
- else if(c=='t'||c=='T'){reverseCNmer+="00";}
- else if(c=='g'||c=='G'){reverseCNmer+="11";}
- else if(c=='c'||c=='C'){reverseCNmer+="10";}
- }
- return reverseCNmer;
- }
-
- public PrintWriter openWriter(File name){ //probably don't need this method
- try{
- PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(name)), true);
- return out;
- }
- catch(IOException e){
- System.out.println("PrintWriter could not find parsed_f_X");
- System.exit(0);
- }
- return null;
- }
- public BufferedReader getReader(File file){
- BufferedReader in = null;
- try{
- in = new BufferedReader(new FileReader(file));
- }
- catch(IOException e){
- JOptionPane.showMessageDialog(frame.displayPanel,
- "BufferedReader I/O Error or coundn't find the file.", "Error",
- JOptionPane.ERROR_MESSAGE);
- process = true;
- }
- return in;
-
- }
- public String getLine(BufferedReader in){
- String line = null;
- try{line = in.readLine();}
- catch(IOException e){
- JOptionPane.showMessageDialog(frame.displayPanel,
- "bReader couldn't read next line.", "Error",
- JOptionPane.ERROR_MESSAGE);
- process = true;
- }
- if(line==null){return null;}
- else{return line;}
- }
- public void writeOutNmerCounts(){
-
- String path4_test_writeOut = "C:\\Documents and Settings\\NTUser\\Desktop\\FileTests\\NmerCounts.txt";
- File writeOutNmerCounts = new File(path4_test_writeOut);
- try{
- if(writeOutNmerCounts.createNewFile()){System.out.println("writeOutNmerCounts was created");}
- else{System.out.println("File writeOut_X_Y was not created");}
- }
- catch(IOException e){System.out.println("I/O Error");}
-
- PrintWriter out = openWriter(writeOutNmerCounts);
-
- for(int k=0; k<w_num_X; k++){
- StringBuilder sb2 = new StringBuilder();
- sb2.append(k);
- for(int l=0;l<num_possible_nmers;l++){
- String v = "\t" + nmer_counts_X[k][l];
- sb2.append(v);
- if(l==(num_possible_nmers-1)){out.println(sb2);}
- }
- }
- out.close();
- }
-
- //This method should be executed only when the entire genome is being looked at. It will make one tab-delimited file containing all the windows of both genomes.
- public void write_windows(String writeLocation){
- String f_wind_contents = writeLocation;
- File F_wind_contents = new File(f_wind_contents);
- try{
- if(F_wind_contents.createNewFile()){System.out.println("File F_wind_contents was created");}
- else{System.out.println("File F_wind_contents was not created");}
- }
- catch(IOException e){System.out.println("I/O Error: write_windows");}
-
- PrintWriter out = openWriter(F_wind_contents);
-
- Calendar cal = Calendar.getInstance();
- DateFormat df = DateFormat.getDateTimeInstance(DateFormat.FULL, DateFormat.MEDIUM);
-
- out.print("Sequence partitions");
- out.println("\t\t\t" + df.format(cal.getTime()));
- out.println();
- out.println("Sequence X:\t" + X_header);
- out.println("Sequence Y:\t" + Y_header);
- out.println();
- out.println("Window size:\t" + w_size);
- out.println("Nmer Size:\t" + nmer_size);
- out.println("Compl. considered:\t" + use_complementary);
- out.println();
- out.println();
- out.println();
-
- out.println("Sequence X window partitions: ");
- int i,j=0;
- for(i=0;i<w_num_X;i++){
- out.println("> " +i);
- for(j=0;j<w_size;j++){
- out.print(seq_inX_w[i][j]);
- }
- out.println();
- }
-
- out.println();
- out.println();
- out.println("Sequence Y window partitions: ");
-
- for(i=0;i<w_num_Y;i++){
- out.println("> " +i);
- for(j=0;j<w_size;j++){
- out.print(seq_inY_w[i][j]);
- }
- out.println();
- }
-
- }
-
- //This method should only be executed when only the gene are being looked at. It will make one tab-delimited file containing all the gene sequences for both genomes.
- public void write_GeneWindows(String writeLocation){
- String f_gene_contents = writeLocation;
- File F_gene_contents = new File(f_gene_contents);
- try{
- if(F_gene_contents.createNewFile()){System.out.println("File F_gene_contents was created");}
- else{System.out.printl…
Large files files are truncated, but you can click here to view the full file