/code_comm/ft1.cpp
C++ | 835 lines | 450 code | 285 blank | 100 comment | 54 complexity | 393a2823d9cfe45ab17f47739aedc20d MD5 | raw file
- /***************************************************************************
- main.cpp - description
- -------------------
- begin : Tue Nov 19 15:31:09 IST 2002
- copyright : (C) 2002 by guest of ani
- email : guest@mgldel-incorp.com
- ***************************************************************************/
-
- /***************************************************************************
- * *
- * This program is free software; you can redistribute it and/or modify *
- * it under the terms of the GNU General Public License as published by *
- * the Free Software Foundation; either version 2 of the License, or *
- * (at your option) any later version. *
- * *
- ***************************************************************************/
-
- //#include<stdio.h>
- #include<iostream.h>
- #include<stdio.h>
- #include<string.h>
- #include<stdlib.h>
- #include<ctype.h>
- #include<math.h>
- //Window Analysis code
- #include <vector>
- #include<complex>
- using namespace::std;
-
-
-
- //begin Power spectrum code
-
-
- struct structCR
- {
- long start;
- long end;
- char type;
- };
-
-
- class WindowAnalysis //__declspec(dllexport)
- {
- public:
-
-
- long *s;
- double *x;
- int pcity;
-
- vector<structCR> vecCRegion;
- WindowAnalysis();
- WindowAnalysis(int len ,double thold,long intron , long exon);
- virtual ~WindowAnalysis();
- void setWindowLen(int len);
- void settherHold(double thold);
- void setIntronExon(long intron , long exon );
- int doFt(const char * strSeq , char *logfile);
- void setpcity(int pc);
-
-
- private :
-
- int lenWindow;
- double thersHold;
- long exonCutOff;
- long intronCutOff;
-
- int findexp(int WindowSize);
- long framecount;
- complex<double> * expval;
- WindowAnalysis& operator =(const WindowAnalysis &cwa);
- WindowAnalysis(const WindowAnalysis &cwa);
-
- };
-
- WindowAnalysis::WindowAnalysis()
- {
- expval = NULL;
- s = NULL;
- x = NULL;
- framecount=0;
-
-
- }
-
- WindowAnalysis::~WindowAnalysis()
- {
-
- delete []expval;
- delete []s ;
- delete []x ;
- //printf("Destructor of WindowAnalysis ");
- fflush(stdout);
- //delete []CRegion;
- //this->vecCRegion.clear();
-
-
- }
-
- WindowAnalysis::WindowAnalysis(int len , double thold ,long intron , long exon)
- {
-
- lenWindow=len;
- thersHold=thold;
- exonCutOff=exon;
- intronCutOff =intron;
-
- s = NULL;
- x = NULL;
-
- framecount=0;
- // cout<<"ThresHold Value "<<thersHold;
- // cout<<"Intron :"<<intronCutOff<<" Exon :"<<exonCutOff<<endl;
-
- expval=new complex<double>[lenWindow];
-
- }
-
-
- void WindowAnalysis::settherHold(double thold)
- {
- thersHold =thold;
- // cout<< "ThresHold Value "<<thersHold;
-
- }
- void WindowAnalysis::setpcity(int pc)
- {
- pcity=pc;
- }
-
- void WindowAnalysis::setWindowLen(int len)
- {
- lenWindow=len;
- delete [] expval;
- expval=new complex<double>[lenWindow];
-
- }
- void WindowAnalysis::setIntronExon(long intron , long exon )
- {
- exonCutOff=exon;
- intronCutOff =intron;
-
-
- }
- int WindowAnalysis::doFt(const char * strSeq , char * logfile)
- {
-
-
- const double F3 = 1.000/pcity;
- const double PI = 3.14159274;
-
-
- //ofstream out("output" );
- //ofstream out(logfile ,ios::app);
-
- long lenStr = (strlen(strSeq)/pcity) * pcity;
- //lenWindow=(lenStr/pc)*pc;
-
- if(lenStr<lenWindow)
- {
- //out<<"Sequence length is less than windowlength."<<endl;
- return 1;
- }
-
- //cout<<"\n Sequence Length :"<<lenStr<<endl;
-
- int result= findexp(lenWindow);
-
- //long *CRegion = new long[lenStr-lenWindow+ 1];
- //long *s = new long[lenStr-lenWindow+ 1];
- //double *x = new double[lenStr-lenWindow+1];
-
- //delete [] s;
- //delete [] x;
- s = new long[lenStr-lenWindow+ 1];
- x = new double[lenStr-lenWindow+1];
-
-
- char nuclt[5]= "ATGC";
-
- double li=(1.0/lenWindow);
-
-
- double winsq = li+ (li*li);
-
- double winsq1 = (1.0/lenWindow)+(1.0/(lenWindow*lenWindow));
-
-
- int countA =0;
- int countC =0;
- int countG =0;
- int countT= 0;
-
- complex<double> s1(0.0,0.0);
-
- double sq=0.0;
- double sq1=0.0;
- double s2=0.0;
- double sbar =0.0;
- double p=0.0;
-
- framecount=0;
-
-
-
- for(long k = 0 ; k < lenStr-lenWindow+1 ; k++ )
-
- {
- sq =0.0;
- s2 =0.0;
- sbar =0.0;
- p =0.0;
-
- // Frame wise count of A C G T seperately
-
-
- if(k==0)
- {
- for(long i =k; i < k+lenWindow; i++ )
- {
-
- if( strSeq[i] == 'A')
- { countA++; continue;}
- if( strSeq[i] == 'G')
- { countG++; continue;}
- if( strSeq[i] == 'C')
- { countC++; continue;}
- if( strSeq[i] == 'T')
- { countT++; continue;}
-
- } //end of count for look.
-
- }else
- {
-
- switch(strSeq[k-1])
- {
- case 'A' :
- { countA--; break; }
- case 'T' :
- { countT--; break; }
- case 'G' :
- { countG--; break; }
- case 'C' :
- { countC--; break; }
- }
-
- switch(strSeq[k+lenWindow-1])
- {
- case 'A' :
- { countA++; break; }
- case 'T' :
- { countT++; break; }
- case 'G' :
- { countG++; break; }
- case 'C' :
- { countC++; break; }
- }
-
-
- }
-
- //out<<"Count A :"<<countA <<" Frame count :"<<k<<endl;
- //out<<"Count T :"<<countT <<" Frame count :"<<k<<endl;
- //out<<"Count G :"<<countG <<" Frame count :"<<k<<endl;
- //out<<"Count C :"<<countC <<" Frame count :"<<k<<endl;
-
- sq1 = 0.0+ pow(countA,2)+pow(countG,2) + pow(countT,2) + pow(countC,2);
-
- // for each Nucleotide. A T G C
-
-
- for(int i = 0; i <4 ; i ++ )
- {
- s1= complex<double>(0.0,0.0);
-
-
- for (int bi=0; bi<lenWindow; bi++)
- {
- if(strSeq[k+bi] == nuclt[i] )
- s1= s1+expval[bi];
- }
- //cout<<s1<<endl;
-
- s2 = s2+ (abs(s1)*abs(s1));
-
- }
-
-
- // to calculater power peak
-
-
- sq = sq1*li*li*li;
- sbar =(winsq-sq);
- sq = sbar * (lenWindow/2.0);
-
- // to calcuater signal to noise ratio
-
- s2 = s2/(sq*lenWindow*lenWindow);
-
- p= s2/sbar;
-
-
- s[framecount]=k+(lenWindow/2);
- x[framecount]=p;
-
- //out<<s[framecount]<<" "<<x[framecount]<<"\n";
- framecount++;
-
-
- } // End of Outer for loop(Frame wise)
-
- //return(0);
- //}
- // end of Ft function..
- //void WindowAnalysis::findCodingRegion(long seqfileId , const char * seqId)
- //{
-
- long *CRegion = new long[framecount+1];
- int flag=0;
-
- long i =0 , m =0;
-
- for (m =0; m<framecount; m++)
- {
-
- if(x[m]>=thersHold)
- {
- if(flag == 0)
- {
-
- CRegion[i] =s[m];
- //out<< "\n START :" <<s[m]<<" : "<<x[m];
- //i++;
- flag =1;
- }
-
- }
-
- else
- {
- if(flag==1)
- {
- if(x[m]<thersHold)
- {
-
- //out<< "\n START :" <<CRegion[i];
- //out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
- i++;
- CRegion[i]=s[m-1];
- i++;
- }
- flag=0;
-
- }
-
- } // end of if.
- }// End Coding region loop.
-
-
-
- if (flag==1)
- {
- //out<< "\n START :" <<CRegion[i];
- //out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
- i++;
- CRegion[i]=s[m-1];
- i++;
- }
-
- //delete []CRegion;
- // that data is deleted 07/02/02 To inprove performance.
-
-
- //out<<"\n Final Coding Regions :"<<endl;
-
- flag =0;
- int code =0;
- long sta=0 , stp =0;
- structCR obj;
- obj.type=' ';
-
- for(int j=0 ; j<i ; j +=2)
- {
- if(flag ==0)
- {
- sta = CRegion[j];
- stp = CRegion[j+1];
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
- flag = 1 ;
- }
- else
- {
- if( (CRegion[j]-CRegion[j-1]) >=intronCutOff )
- {
- if(code ==1)
- {
- //out <<"\nStart :"<<sta <<" End :" << stp;
- // inset in database start new set.
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- //insetcodes( sta , stp,seqfileId,seqId);
- sta = CRegion[j];
- stp = CRegion[j+1];
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0; flag = 1 ;
- }
- else
- {
- //cout << forget the old set.
- if ((stp-sta)>= exonCutOff )
- {
- //out <<"\nStart :"<<sta <<" End :" << stp;
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- //insetcodes( sta , stp,seqfileId,seqId);
- }
- // may need to be commented later.
- sta = CRegion[j];
- stp = CRegion[j+1];
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
- flag = 1 ;
- }
- }
- else
- {
-
- if( ( CRegion[j+1]-CRegion[j] ) >= exonCutOff ) code =1;
- stp = CRegion[j+1];
- flag =1;
-
- }
- }
- }
-
- if ((code == 1) || ((stp-sta) >exonCutOff) )
- {
-
- //out <<"\nStart :"<<sta <<" End :" << stp;
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- }
-
-
- //delete []CRegion;
- //CRegion=NULL;
-
-
- vector<structCR>::iterator it;
-
-
-
- char * strtemp=NULL;
-
-
-
- for(it =vecCRegion.begin(); it !=vecCRegion.end(); it++)
- {
- //long start =it->start;
- //long end =it->end;
-
- strtemp = new char[( (it->end - it->start) +1)];
-
- memset(strtemp , '\0' ,(it->end - it->start) +1 );
-
- strncpy( strtemp , &strSeq[it->start] ,(it->end - it->start) );
-
- //strncpy( strtemp , &strSeq[start] , end -start);
-
- //printf("Length : %d \n ", strlen(strtemp));
-
- //printf("start %ld , End %d length %d\n " , it->start ,it->end , (it->end - it->start));
- //fflush(stdout);
-
-
- delete []strtemp;
- strtemp = NULL;
- }
-
-
-
- //store the coding regions in an array
-
-
- //int elsize;
- //elsize=vecCRegion.size();
-
-
-
- delete []strtemp;
- strtemp = NULL;
-
-
- //delete []CRegion;
- //CRegion=NULL;
-
- //delete []s ;
- //delete []x ;
-
- return(0);
-
- } // end of Coding region finding.
-
-
- // to find exponential positional values to window size .
-
- int WindowAnalysis::findexp(int WindowSize)
- {
-
-
- complex<double> i1(0.0,1.0);
- const double PI = 3.14159274;
- const double F3 = 1.000/3;
-
- for(int k=0; k<WindowSize; k++)
- {
-
- expval[k] = exp(-i1*2.0*PI*F3*(k+1.0));
-
- //cout<<cos(-1*2.0*PI*F3*(k+1.0))<<endl;
- }
-
- return(0);
-
- }
-
-
-
- // end Window Analysis code
-
-
-
-
-
- class ftcds
- {
-
-
- WindowAnalysis *winobj, *winobj1;
-
-
-
- FILE *fp1,*fp2,*fp3,*fpl,*fpc;
- char *seq ,*seqs;
-
- long begpos,endpos;
-
-
-
-
-
- public:
-
- void doGenscan(char *,char*,int,int,int,int,int);
-
- ftcds()
- {
- //seq = NULL;
- winobj = new WindowAnalysis();
- winobj1 = new WindowAnalysis();
-
-
- }
-
-
- ~ftcds()
- {
- delete [] seq;
- seq = NULL;
- //cout<<"Destructor of ftcds"<<endl;
- delete winobj;
- delete winobj1;
-
- }
-
-
- };
-
-
-
- void ftcds::doGenscan(char *inputfile,char *outfile,int winlen,int thold,int pcity,int exoncut,int intcut)
- {
-
- //variable where the sequence will be stored
-
- int seqno=1;
-
- //char fname[50];
- char ch,ch1;
- char *seqname;
- int snin;
-
- vector<structCR>::iterator it1;
-
-
- fp1=fopen(inputfile,"r");
-
- if(fp1==NULL)
- {
- cout<<"File doesn't exist"<<endl;
- exit(1);
- }
-
- //create a temporary file to store the sequence without the newline characters
-
- fpc=fopen(outfile,"w");
- if(fpc==NULL)
- {
- cout<<"Could not open output file"<<endl;
- exit(1);
- }
-
- ch=fgetc(fp1);
-
- while(ch!=EOF)
- {
-
-
- fp2=fopen("tmpseq.tmp","w+");
-
- if(fp2==NULL)
- {
- cout<<"Could not create temporary file"<<endl;
- exit(1);
- }
-
- do
- {
-
- //check for the initial '>'symbol put the characters after the next line
-
- if(ch=='>')
- {
-
- begpos=ftell(fp1);
-
- while(ch!='\n')
- {
- ch=fgetc(fp1);
- }
-
- endpos= ftell(fp1);
- seqname=new char[endpos -begpos + 2];
- memset(seqname,'\0',endpos-begpos +2);
-
- fseek(fp1,begpos,SEEK_SET);
- snin=0;
- while(true)
- {
- ch=fgetc(fp1);
-
-
- if(ch=='\n')
- {
- ch=fgetc(fp1);
- break;
- }
- seqname[snin]=ch;
- snin++;
- }//end ofwhile
-
- }//end of if
-
- if(ch!='\n')
- fputc(ch,fp2);
-
- ch=fgetc(fp1);
-
- if(ch==EOF)
- {
- break;
- }
-
- }while(ch!='>');//end of while
-
-
- endpos=ftell(fp2);
-
- //int endpos=begpos;
-
-
- seq=new char[endpos + 2];
- memset(seq,'\0',endpos + 2);
-
-
- begpos=0;
- fseek(fp2,begpos,SEEK_SET);
-
- ch1=fgetc(fp2);
- int seqin=0;
-
- while(ch1!=EOF)
- {
- seq[seqin]=ch1;
- seqin++;
- ch1=fgetc(fp2);
- }
-
-
- winobj->setWindowLen(winlen);
- winobj->setpcity(pcity);
- winobj->settherHold(thold*1.0);
- winobj->setIntronExon(exoncut,intcut);
-
-
- //cout<<"Length:"<<strlen(seq)<<endl;
-
- winobj->doFt(seq,"log");
-
-
-
-
- /*for(it1=winobj->vecCRegion.begin();it1!=winobj->vecCRegion.end();it1++)
- {
- // it1=winobj->vecCRegion.begin();
- cout<<it1->start<<" "<<it1->end<<endl;
- }*/
-
- // cout<<"\n"<<endl;
-
-
- for(it1=winobj->vecCRegion.begin();it1!=winobj->vecCRegion.end();it1++)
- {
-
- unsigned long actstartpos=it1->start - ((winlen/2)-1);
- unsigned long actendpos=it1->end;
-
- if(actendpos>strlen(seq))
- actendpos=strlen(seq);
-
-
- seqs=new char[actendpos -actstartpos +2];
- memset(seqs,'\0',actendpos -actstartpos +2);
-
- int j=0;
-
- for(int i=actstartpos -1;i<actendpos;i++,j++)
- seqs[j]=seq[i];
-
-
- int len=(strlen(seqs)/3)*3;
-
-
- //cout<<"Length:"<<len<<endl;
- winobj1->setWindowLen(len);
-
- winobj1->setpcity(pcity);
- winobj1->settherHold(thold*1.0);
- winobj1->doFt(seqs,"log");
-
-
- fprintf(fpc,">%s [%d - %d] fourier-CDS%d Length-%d P2NR-%0.4f\n",seqname,actstartpos,actendpos,seqno,(actendpos-actstartpos)+1,winobj1->x[0]);
- fprintf(fpc,"%s\n",seqs);
- printf(".");
- seqno++;
-
-
- }//end of for
-
- printf("\n");
-
- fclose(fp2);
- remove("tmpseq.tmp");
- winobj->vecCRegion.clear();
-
- delete []seq;
- seq = NULL;
-
- }//end of while
-
- cout<<"\nFourier transform for file: "<<inputfile<<"\nResults written to file : "<<outfile<<" (Multiple Sequence Fasta Format)\n\n"<<endl;
-
- fclose(fpc);
-
- }//end of main function
-
-
- int main(int argc, char* argv[])
- {
-
- ftcds *Ch = new ftcds();
-
- int pcity;
- int excut,incut;
-
-
- int winlen,thold;
- char *infile,*outfile;
-
- if(argc!=5)
- {
- cout<<"Usage:[filename] -i [inputfile] -o [outputfile]"<<endl;
- exit(1);
- }
-
- for(int s=1;s<argc;s++)
- {
-
-
- if(strcmp(argv[s],"-i")==0)
- {
- infile=new char[strlen(argv[s +1])+1];
- strcpy(infile,argv[++s]);
- }
-
- if(strcmp(argv[s],"-o")==0)
- {
- outfile=new char[strlen(argv[s +1])+1];
- strcpy(outfile,argv[++s]);
- }
-
-
- }//end of for
-
-
- Ch->doGenscan(infile,outfile,100,4,3,100,100);
-
- //Ch->doGenscan("amrna.txt","amrnaout.txt",300,4,3,50,50);
-
- delete Ch;
-
- return 0;
-
-
- }//end main
-
-
-
-
-
-