/code_comm/checkpcity.cpp
C++ | 1671 lines | 875 code | 575 blank | 221 comment | 111 complexity | 0e6276feb354160f0c14f51e59a52a1e MD5 | raw file
- // checkpcity.cpp : Defines the entry point for the console application.
- //
-
- //#include "stdafx.h"
- #include<stdio.h>
- #include<fstream>
- #include<iostream.h>
- #include<math.h>
- #include<string.h>
- #include<stdlib.h>
- //#include<comdef.h>
- #include<ctype.h>
-
- //#include "WindowAnalysis.h"
- //#include "PowerSpectrum.h"
- #include<vector>
- #include<complex>
- using namespace std;
- class PowerSpectrum;
-
-
-
- //Included the Powerspectrum module
-
-
-
- class PowerSpectrum
- {
- public:
- PowerSpectrum();
- virtual ~PowerSpectrum();
- char * strCRegion;
- double *s;
- double *x;
- //temp check
- double *x1;
- double *xp;
- double *xp1;
- double *xr;
- double *xr1;
-
- int doPowerSpectrumAnalysis();
- void setSequence(const char * seq);
-
- };
-
-
-
- PowerSpectrum::PowerSpectrum()
- {
- s=NULL;
- x=NULL;
- strCRegion=NULL;
-
- }
-
- PowerSpectrum::~PowerSpectrum()
- {
- delete []s;
- delete []x;
- delete []strCRegion;
-
- }
-
- void PowerSpectrum::setSequence(const char * seq)
- {
-
- delete []strCRegion;
-
- strCRegion=new char[ strlen(seq)+1 ];
-
- memset(strCRegion, '\0' , strlen(seq)+1 ) ;
-
- strcpy(strCRegion,seq);
-
- }
-
-
- int PowerSpectrum::doPowerSpectrumAnalysis()
- {
-
-
- const double PI = 3.1415926535897932384626433832795;
- //public static final double PI ;
-
-
- complex<double> i1(0.0,1.0);
- //complex<double> i1 = sqrt(-1);
-
- long lenStr = strlen(strCRegion);
-
- delete []s;
- delete []x;
-
-
- cout<<"Coding Region Length:"<<lenStr<<endl;
-
-
- s = new double[lenStr/2 + 1];
- x = new double[lenStr/2 + 1];
-
- //temporary check;
- x1 = new double[lenStr/2 + 1];
- xp = new double[lenStr/2 + 1];
- xp1 = new double[lenStr/2 + 1];
- xr = new double[lenStr/2 + 1];
- xr1 = new double[lenStr/2 + 1];
-
- double *sf = new double[lenStr/2 + 1];
-
- char nuclt[5]= "ATGC";
- int const lenWindow = (lenStr/3)*3;
- //int const lenWindow = lenStr;
-
-
- double winsq = (1.0/lenWindow)+(1.0/(lenWindow*lenWindow));
- long wincube = lenWindow*lenWindow*lenWindow;
-
-
- int countA =0;
- int countC =0;
- int countG =0;
- int countT= 0;
-
- complex<double> s1(0.0,0.0);
- double ss2=0.0;
- double sq=0.0;
- double s2=0.0;
- //temporary check
- double p1=0.0;
- double p2=0.0;
- double p3=0.0;
-
- double sbar =0.0;
- double p=0.0;
-
-
- //ofstream out("power");
-
- for(long i =0; i < lenWindow; i++ )
- {
-
- if( strCRegion[i] == 'A')
- { countA++; continue;}
- if( strCRegion[i] == 'G')
- { countG++; continue;}
- if( strCRegion[i] == 'C')
- { countC++; continue;}
- if( strCRegion[i] == 'T')
- { countT++; continue;}
-
- }
-
- //out<<"Count A :"<<countA <<" countT :"<<countT <<"countG :"<<countG <<"countC :"<<countC <<endl;
-
-
- sq = 0.0+ pow(countA,2)+pow(countG,2) + pow(countT,2) + pow(countC,2);
-
- double f =0.0;
-
- for(long k = 1 ; k <=lenWindow/2 ; k++ )
- {
-
- s2 =0.0;
- sbar =0.0;
- p =0.0;
- f= ((1.0)*k)/lenWindow;
-
- // for each Nucleotide. A T G C
-
-
- for(int i = 0; i <4 ; i ++ )
- {
- s1= complex<double>(0.0,0.0);
-
-
- for (int bi=0; bi<lenWindow; bi++)
- {
- if(strCRegion[bi] == nuclt[i] )
- s1= s1+exp(i1*2.0*PI*f*(bi+1.0));
- }
-
- s2 = s2+ (abs(s1)*abs(s1));
- //ss2=ss2 + (1/lenWindow*lenWindow)*(abs(s1)*abs(s1));
-
- }
-
- //store the value of sf for each freq.
- sf[k-1]=s2;
-
- // to calculater power specturm and code region
-
- sq = sq/wincube;
- sbar =(winsq-sq);
- sq = sbar * (lenWindow/2.0);
-
- // to calcuater signal to noise ratio
-
- s2 = s2/(sq*lenWindow*lenWindow);
-
- p= s2/sbar;
- //temporary check;
- //p1=(p*lenWindow*sbar)/2;
-
-
- //p2=p1*2;
- p2=p/(lenWindow*sbar);
- p3=2*p2;
-
- s[k-1]=f;
- x[k-1]=p;
- xp[k-1]=p2;
- xp1[k-1]=p3;
-
-
- //temporary check
- //x1[k-1]=p1;
- //x2[k-1]=p2;
- //x3[k-1]=p3;
-
-
- // out<<"\n Frequency:"<<f <<" Power : "<<p;
-
- } // outer for loop
-
- //calculating the sbar value
-
- double sbar1=0.00;
- double sbar2=0.00;
-
- for(k = 1 ; k <=lenWindow/2 ; k++ )
- {
- sbar1=sbar1+sf[k-1];
-
- }
- sbar1=(sbar1*2)/lenWindow;
- sbar2=sbar1/(lenWindow*lenWindow);
- //calculating the sf/sbar1
-
-
- for(k = 1 ; k <=lenWindow/2 ; k++ )
- {
- x1[k-1]=sf[k-1]/sbar1;
-
- xr[k-1]=x1[k-1]/(lenWindow*sbar2);
-
- xr1[k-1]=xr[k-1]*2;
-
- }
-
-
-
- int numPeaks=0;
- int genePeak=0;
-
- for( k = 1 ; k <= (lenWindow/2) ; k++ )
- {
-
- if( x[k-1] > 4.0)
- {
- //if( s[k-1] == 1.0/3.0 )] //added by Sreenu on mandars suggestion on 19/04/2002
- if( ( s[k-1] >= 0.32 ) && ( s[k-1] <= 0.34 ))
- genePeak=1;
- else
- numPeaks++;
- }
-
- }
-
-
- //delete []s ;
- //delete []x ;
- //s =NULL ;
- //x =NULL ;
-
-
- if(genePeak==1)
- {
- if(numPeaks==0)
- return 1;
- else
- return 2;
- }
- else
- {
- return 0;
- }
-
-
-
- } // end of member function.
-
-
-
-
-
- //End of the pwerspectrum module
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- //Included the Windowanalysis module
-
- struct structCR
- {
- long start;
- long end;
- char type;
- };
-
-
- class WindowAnalysis //__declspec(dllexport)
- {
- public:
-
-
- long *s;
- double *x;
-
- vector<structCR> vecCRegion;
- WindowAnalysis();
- WindowAnalysis(int len ,double thold,long intron , long exon);
- virtual ~WindowAnalysis();
- void setWindowLen(int len);
- void settherHold(double thold);
- void setIntronExon(long intron , long exon );
- int doFt(const char * strSeq , char *logfile);
- long framecount;
-
- private :
-
- int lenWindow;
- double thersHold;
- long exonCutOff;
- long intronCutOff;
- int findexp(int WindowSize);
- //long framecount;
- complex<double> * expval;
- WindowAnalysis& WindowAnalysis::operator =(const WindowAnalysis &cwa);
- WindowAnalysis(const WindowAnalysis &cwa);
-
- };
-
-
-
-
-
-
-
-
-
- WindowAnalysis::WindowAnalysis()
- {
- expval = NULL;
- s = NULL;
- x = NULL;
- framecount=0;
-
-
- }
-
- WindowAnalysis::~WindowAnalysis()
- {
-
- delete []expval;
- delete []s ;
- delete []x ;
- printf("Destructor of WindowAnalysis ");
- fflush(stdout);
- //delete []CRegion;
-
-
- }
-
- WindowAnalysis::WindowAnalysis(int len , double thold ,long intron , long exon)
- {
-
- lenWindow=len;
- thersHold=thold;
- exonCutOff=exon;
- intronCutOff =intron;
-
- s = NULL;
- x = NULL;
-
- framecount=0;
- // cout<<"ThresHold Value "<<thersHold;
- // cout<<"Intron :"<<intronCutOff<<" Exon :"<<exonCutOff<<endl;
-
- expval=new complex<double>[lenWindow];
-
- }
-
-
- void WindowAnalysis::settherHold(double thold)
- {
- thersHold =thold;
- // cout<< "ThresHold Value "<<thersHold;
-
- }
-
- void WindowAnalysis::setWindowLen(int len)
- {
- lenWindow=len;
- delete [] expval;
- expval=new complex<double>[lenWindow];
-
- }
- void WindowAnalysis::setIntronExon(long intron , long exon )
- {
- exonCutOff=exon;
- intronCutOff =intron;
-
-
- }
- int WindowAnalysis::doFt(const char * strSeq , char * logfile)
- {
-
-
- const double F3 = 1.000/3;
- const double PI = 3.14159265;
-
-
- //ofstream out("output" );
- ofstream out(logfile ,ios::app);
-
- long lenStr = strlen(strSeq);
-
- if(lenStr<lenWindow)
- {
- out<<"Sequence length is less than windowlength."<<endl;
- return 1;
- }
-
- cout<<"\n Sequence Length :"<<lenStr<<endl;
-
-
- int result= findexp(lenWindow);
-
- //long *CRegion = new long[lenStr-lenWindow+ 1];
- //long *s = new long[lenStr-lenWindow+ 1];
- //double *x = new double[lenStr-lenWindow+1];
-
- delete [] s;
- delete [] x;
- s = new long[lenStr-lenWindow+ 1];
- x = new double[lenStr-lenWindow+1];
-
-
- char nuclt[5]= "ATGC";
-
- double winsq = (1.0/lenWindow)+(1.0/(lenWindow*lenWindow));
- long wincube = lenWindow*lenWindow*lenWindow;
-
-
- int countA =0;
- int countC =0;
- int countG =0;
- int countT= 0;
-
- complex<double> s1(0.0,0.0);
-
- double sq=0.0;
- double s2=0.0;
- double sbar =0.0;
- double p=0.0;
-
- framecount=0;
-
-
-
- for(long k = 0 ; k < lenStr-lenWindow+1 ; k++ )
-
- {
- sq =0.0;
- s2 =0.0;
- sbar =0.0;
- p =0.0;
-
- // Frame wise count of A C G T seperately
-
-
- if(k==0)
- {
- for(long i =k; i < k+lenWindow; i++ )
- {
-
- if( strSeq[i] == 'A')
- { countA++; continue;}
- if( strSeq[i] == 'G')
- { countG++; continue;}
- if( strSeq[i] == 'C')
- { countC++; continue;}
- if( strSeq[i] == 'T')
- { countT++; continue;}
-
- } //end of count for look.
-
- }else
- {
-
- switch(strSeq[k-1])
- {
- case 'A' :
- { countA--; break; }
- case 'T' :
- { countT--; break; }
- case 'G' :
- { countG--; break; }
- case 'C' :
- { countC--; break; }
- }
-
- switch(strSeq[k+lenWindow-1])
- {
- case 'A' :
- { countA++; break; }
- case 'T' :
- { countT++; break; }
- case 'G' :
- { countG++; break; }
- case 'C' :
- { countC++; break; }
- }
-
-
- }
-
- //out<<"Count A :"<<countA <<" Frame count :"<<k<<endl;
- //out<<"Count T :"<<countT <<" Frame count :"<<k<<endl;
- //out<<"Count G :"<<countG <<" Frame count :"<<k<<endl;
- //out<<"Count C :"<<countC <<" Frame count :"<<k<<endl;
-
- sq = 0.0+ pow(countA,2)+pow(countG,2) + pow(countT,2) + pow(countC,2);
-
- // for each Nucleotide. A T G C
-
-
- for(int i = 0; i <4 ; i ++ )
- {
- s1= complex<double>(0.0,0.0);
-
-
- for (int bi=0; bi<lenWindow; bi++)
- {
- if(strSeq[k+bi] == nuclt[i] )
- s1= s1+expval[bi];
- }
-
- s2 = s2+ (abs(s1)*abs(s1));
-
- }
-
-
- // to calculater power peak
-
- sq = sq/wincube;
- sbar =(winsq-sq);
- sq = sbar * (lenWindow/2.0);
-
- // to calcuater signal to noise ratio
-
- s2 = s2/(sq*lenWindow*lenWindow);
-
- p= s2/sbar;
-
-
- s[framecount]=k+(lenWindow/2);
- x[framecount]=p;
-
- out<<s[framecount]<<" "<<x[framecount]<<"\n";
- framecount++;
-
-
- } // End of Outer for loop(Frame wise)
-
- //return(0);
- //}
- // end of Ft function..
- //void WindowAnalysis::findCodingRegion(long seqfileId , const char * seqId)
- //{
-
- long *CRegion = new long[framecount];
- //memset(CRegion , 0 ,(framecount+1)*sizeof(long));
-
- int flag=0;
- long i =0 , m =0;
-
- for (m =0; m<framecount; m++)
- {
-
- if(x[m]>=thersHold)
- {
- if(flag == 0)
- {
-
- CRegion[i] =s[m];
- //out<< "\n START :" <<s[m]<<" : "<<x[m];
- //i++;
- flag =1;
- }
-
- }
-
- else
- {
- if(flag==1)
- {
- if(x[m]<thersHold)
- {
-
- out<< "\n START :" <<CRegion[i];
- out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
- i++;
- CRegion[i]=s[m-1];
- i++;
- }
- flag=0;
-
- }
-
- } // end of if.
- }// End Coding region loop.
-
-
-
- if (flag==1)
- {
- out<< "\n START :" <<CRegion[i];
- out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
- i++;
- CRegion[i]=s[m-1];
- i++;
- }
-
- //delete []CRegion;
- // that data is deleted 07/02/02 To inprove performance.
-
- //delete []s ;
- //delete []x ;
- //s=NULL;
- //x=NULL;
-
- out<<"\n Final Coding Regions :"<<endl;
-
- flag =0;
- int code =0;
- long sta=0 , stp =0;
- structCR obj;
- obj.type=' ';
-
- for(int j=0 ; j<i ; j +=2)
- {
- if(flag ==0)
- {
- sta = CRegion[j];
- stp = CRegion[j+1];
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
- flag = 1 ;
- }
- else
- {
- if( (CRegion[j]-CRegion[j-1]) >=intronCutOff )
- {
- if(code ==1)
- {
- out <<"\nStart :"<<sta <<" End :" << stp;
- // inset in database start new set.
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- //insetcodes( sta , stp,seqfileId,seqId);
- sta = CRegion[j];
- stp = CRegion[j+1];
-
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
- flag = 1 ;
- }
- else
- {
- //cout << forget the old set.
- if ((stp-sta)>= exonCutOff )
- { out <<"\nStart :"<<sta <<" End :" << stp;
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- //insetcodes( sta , stp,seqfileId,seqId);
- }
- // may need to be commented later.
- sta = CRegion[j];
- stp = CRegion[j+1];
- if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
- flag = 1 ;
- }
- }
- else
- {
-
- if( ( CRegion[j+1]-CRegion[j] ) >= exonCutOff ) code =1;
- stp = CRegion[j+1];
- flag =1;
-
- }
- }
- }
-
- if ((code == 1) || ((stp-sta) >exonCutOff) )
- {
-
- out <<"\nStart :"<<sta <<" End :" << stp;
- obj.start=sta;
- obj.end=stp;
- vecCRegion.push_back(obj);
- //mapCRegion[sta] =stp;
- //insetcodes( sta , stp,seqfileId,seqId);
- }
-
-
- delete []CRegion;
- CRegion=NULL;
-
-
- vector<structCR>::iterator it;
-
- PowerSpectrum powerObj;
-
- char * strtemp=NULL;
-
- for(it =vecCRegion.begin(); it !=vecCRegion.end(); it++)
- {
- //long start =it->start;
- //long end =it->end;
-
- strtemp = new char[( (it->end - it->start) +1)];
-
- memset(strtemp , '\0' ,(it->end - it->start) +1 );
-
- strncpy( strtemp , &strSeq[it->start] ,(it->end - it->start) );
-
- //strncpy( strtemp , &strSeq[start] , end -start);
-
- printf("Length : %d \n ", strlen(strtemp));
-
- printf("start %ld , End %d length %d\n " , it->start ,it->end , (it->end - it->start));
- fflush(stdout);
-
- powerObj.setSequence(strtemp);
- int peaks =powerObj.doPowerSpectrumAnalysis();
- if(peaks==0)
- {
- it=vecCRegion.erase(it);
- if( it !=vecCRegion.end() )
- --it;
- else
- break;
- }
- else if(peaks==1)
- {
- it->type ='S';
- }
- else if(peaks==2)
- {
- it->type ='M';
- }
- delete []strtemp;
- strtemp = NULL;
- }
-
-
-
- //store the coding regions in an array
-
-
- //int elsize;
- //elsize=vecCRegion.size();
-
-
-
- delete []strtemp;
- strtemp = NULL;
-
-
- //delete []CRegion;
- //CRegion=NULL;
-
- //delete []s ;
- //delete []x ;
-
- return(0);
-
- } // end of Coding region finding.
-
-
- // to find exponential positional values to window size .
-
- int WindowAnalysis::findexp(int WindowSize)
- {
-
-
- complex<double> i1(0.0,1.0);
- const double PI = 3.14159274;
- const double F3 = 1.000/3;
-
- for(int k=0; k<WindowSize; k++)
- {
-
- expval[k] = exp(-i1*2.0*PI*F3*(k+1.0));
-
- }
-
- return(0);
-
- }
-
- // ///////////////// End //////////////
-
-
-
- //End of Windowanalysis
-
-
-
-
-
-
-
-
-
-
-
- struct storeseq
- {
- char seqhead[100];
-
- storeseq(char *s)
- {strcpy(seqhead,s);}
-
- };
-
- struct storeth
- {
- char seqt[100];
- double sf;
- storeth(char *s,double b)
- {strcpy(seqt,s);sf=b;}
-
-
- };
-
- class Chpcity
- {
-
-
-
-
-
- FILE *fp1,*fp2,*fp3,*fp4;
- char *seq;
- fpos_t begpos,endpos;
- vector <storeseq> stseq;
- vector <storeseq>::iterator it;
-
- vector <storeth> stth;
- vector <storeth> stth1;
- vector <storeth> ::iterator it2;
- vector <storeth> ::iterator it3;
-
-
-
-
- public:
- void doGenscan();
- void doBlast();
- void doParse();
- void dofft();
- void filterSeq();
-
-
- ~Chpcity()
- {
- cout<<"Destructor of chpcity"<<endl;
- stseq.clear();
- }
-
-
- };
-
-
- void Chpcity::doGenscan()
- {
-
- //variable where the sequence will be stored
-
- WindowAnalysis winobj;
- int seqno=1;
- //storeth objth;
-
-
- char fname[60];
- char ch,ch1;
- int sin;
- char seqhead[100];
-
- vector<structCR>::iterator it1;
-
-
- winobj.settherHold(0.0);
- winobj.setIntronExon(50,50);
-
-
-
-
- printf("Enter the file name:");
- gets(fname);
-
- fp1=fopen(fname,"r");
-
-
- if(fp1==NULL)
- {
- cout<<"File doesn't exist"<<endl;
- exit(1);
- }
-
- //create a temporary file to store the sequence without the newline characters
-
- //cout<<"Enter the file name for non coding sequences:");
- //gets(fname);
-
- fp3=fopen("d:\\tholdamy.txt","w");
-
- ch=fgetc(fp1);
-
- while(ch!=EOF)
- {
- sin=0;
-
- fp2=fopen("tmpseq.tmp","w+");
-
- if(fp2==NULL)
- {
- cout<<"Could not create temporary file"<<endl;
- exit(1);
- }
-
- do
- {
-
- //check for the initial '>'symbol put the characters after the next line
- if(ch=='>')
- {
- while(true)
- {
- ch=fgetc(fp1);
-
- if(ch=='\n')
- {
- ch=fgetc(fp1);
- break;
- }
- seqhead[sin]=ch;
- sin++;
- }//end ofwhile
- seqhead[sin]='\0';
-
-
- }//end of if
-
-
- if(ch!='\n')
- fputc(ch,fp2);
-
- ch=fgetc(fp1);
-
- if(ch==EOF)
- {
- break;
- }
-
- }while(ch!='>');//end of while
-
-
-
-
- fgetpos(fp2,&endpos);
-
- //int endpos=begpos;
-
- seq=new char[endpos +2];
- memset(seq,'\0',endpos +2);
-
-
- begpos=0;
- fsetpos(fp2,&begpos);
-
- ch1=fgetc(fp2);
- int seqin=0;
-
- while(ch1!=EOF)
- {
- seq[seqin]=toupper(ch1);
- seqin++;
- ch1=fgetc(fp2);
- }
-
-
- /*char exseqh[30];
-
- for(int j1=0;seqhead[j1]!=',';j1++);
-
- for(int ie=j1+1,i=0;seqhead[ie]!='\0';ie++,i++)
- {
- exseqh[i]=seqhead[ie];
- }
-
- exseqh[i]='\0';*/
-
- //if(strcmp(seqhead,"(gi|16127994:1080570-1080686, 1080677-1081408), b1016m")==0)
- //{
- //winobj.setWindowLen(strlen(seq));
-
- winobj.setWindowLen(300);
- winobj.doFt(seq,"log");
-
- //cout<<winobj.x[0]<<endl;
-
- //objth.sf=winobj.x[0];
-
- //strcpy(objth.seqt,seqhead);
- //double tx=winobj.x[0];
-
- //cout<<objth.sf<<endl;
-
- for(int fr=0;fr<winobj.framecount;fr++)
- stth.push_back(storeth(seqhead,winobj.x[fr]));
- //}
-
-
- fclose(fp2);
-
- remove("tmpseq.tmp");
- winobj.vecCRegion.clear();
- seqno++;
-
- delete []seq;
-
-
- }
- char tempseqt[100];
- float tempsf;
-
-
- for(it2=stth.begin();it2!=stth.end();it2++)
- {
- it3=it2;
- it3++;
- for(;it3!=stth.end();it3++)
- {
- if(it2->sf < it3->sf)
- {
- tempsf=it3->sf;
- it3->sf=it2->sf;
- it2->sf=tempsf;
-
- strcpy(tempseqt,it3->seqt);
- strcpy(it3->seqt,it2->seqt);
- strcpy(it2->seqt,tempseqt);
-
-
- }
- }
-
- }
-
-
- fprintf(fp3,"%s\n",fname);
- for(it2=stth.begin();it2!=stth.end();it2++)
- {
- fprintf(fp3,"%0.2f\n",it2->sf);
- //cout<<it2->seqt<<" "<<it2->sf<<endl;
- }
-
-
-
-
- _fcloseall();
-
- //system("del d:\\tmpseq.tmp");
-
-
-
-
-
-
- }
-
-
-
- void Chpcity::dofft()
- {
-
- PowerSpectrum powobj;
-
- char heading[150];
- int hin;
- char filename[50];
- char ch,ch1;
- int lenseq;
-
- int no=1;
- int no1=1;
-
-
- printf("Enter the file name for powerspectrum anal.:");
- gets(filename);
-
- fp1=fopen(filename,"r");
-
- if(fp1==NULL)
- {
- cout<<"Cannot Open file"<<endl;
- exit(1);
- }
-
-
-
-
- //fp4=fopen("d:\\anno\\ecolsfr.txt","w");
-
-
- ch=fgetc(fp1);
-
- while(ch!=EOF)
- {
- hin=0;
-
- fp2=fopen("tmpseq.tmp","w+");
-
- if(fp2==NULL)
- {
- cout<<"Could not create temporary file"<<endl;
- exit(1);
- }
-
- do
- {
-
- //check for the initial '>'symbol put the characters after the next line
- if(ch=='>')
- {
- while(true)
- {
- ch=fgetc(fp1);
-
- if(ch=='\n')
- {
- ch=fgetc(fp1);
- break;
- }
-
- heading[hin]=ch;
- hin++;
- }//end ofwhile
-
- heading[hin]='\0';
-
- }//end of if
-
-
-
- if(ch!='\n')
- fputc(ch,fp2);
-
- ch=fgetc(fp1);
-
- if(ch==EOF)
- {
- break;
- }
-
- }while(ch!='>');//end of while
-
-
- fgetpos(fp2,&endpos);
-
- //int endpos=begpos;
-
- seq=new char[endpos +2];
- memset(seq,'\0',endpos +2);
-
-
- begpos=0;
- fsetpos(fp2,&begpos);
-
- ch1=fgetc(fp2);
- int seqin=0;
-
- while(ch1!=EOF)
- {
- seq[seqin]=toupper(ch1);
- seqin++;
- ch1=fgetc(fp2);
- }
-
- //cout<<seq<<endl;
- char exseqh[50];
-
- for(int j1=0;heading[j1]!=',';j1++);
-
- for(int ie=j1+1,i=0;heading[ie]!='\0';ie++,i++)
- {
- exseqh[i]=heading[ie];
- }
-
- exseqh[i]='\0';
-
-
-
-
- fp3=fopen(exseqh,"w");
-
- if(fp3==NULL)
- {
- cout<<"Could not create File"<<endl;
- }
-
- powobj.setSequence(seq);
- int peak=powobj.doPowerSpectrumAnalysis();
-
- lenseq=strlen(powobj.strCRegion);
-
- lenseq=(lenseq/3)*3;
-
- //fprintf(fp3,">%s\n",heading);
-
- //fprintf(fp4,">%s\n",exseqh);
-
- //fprintf(fp3,"%s\n",seq);
-
-
- char cfreq[7];
- float ffreq;
-
- //double frq;
- //frq=1.0/3.0;
-
- for(long k1=0;k1<lenseq/2;k1++)
- {
-
-
- //if(powobj.s[k1]==frq)
- //{
-
- //sprintf(cfreq,"%0.4f",powobj.s[k1]);
- //ffreq=atof(cfreq);
- //ffreq=1/ffreq;
- //cout<<powobj.s[k1]<<" "<<powobj.x1[k1]<<" "<<powobj.x[k1]<<endl;
- //if(powobj.x1[k1]>4.0)
- //{
- //stth.push_back(storeth(heading,powobj.x1[k1]));
- //}
- /*else
- {
-
- stth1.push_back(storeth(exseqh,powobj.x1[k1]));
- }*/
-
-
- fprintf(fp3,"%f %f\n",powobj.s[k1],powobj.x[k1]);
-
- no++;
- //}
-
- }
-
-
-
-
- //stth.clear();
- //stth1.clear();
-
- //fprintf(fp3,"\n");
-
- fclose(fp2);
-
- remove("tmpseq.tmp");
- delete []seq;
-
-
-
-
- }//end of while
-
- /*char tempseqt[100];
- double tempsf;
-
- for(it2=stth.begin();it2!=stth.end();it2++)
- {
- it3=it2;
- it3++;
- for(;it3!=stth.end();it3++)
- {
- if(it2->sf < it3->sf)
- {
- tempsf=it3->sf;
- it3->sf=it2->sf;
- it2->sf=tempsf;
-
- strcpy(tempseqt,it3->seqt);
- strcpy(it3->seqt,it2->seqt);
- strcpy(it2->seqt,tempseqt);
-
-
- }
- }
-
- }
-
-
-
-
- for(it2=stth.begin();it2!=stth.end();it2++)
- {
- fprintf(fp3,"%d %s %0.14lf\n",no,it2->seqt,it2->sf);
- no++;
- }*/
-
-
-
-
- /*for(it2=stth1.begin();it2!=stth1.end();it2++)
- {
- it3=it2;
- it3++;
- for(;it3!=stth1.end();it3++)
- {
- if(it2->sf < it3->sf)
- {
- tempsf=it3->sf;
- it3->sf=it2->sf;
- it2->sf=tempsf;
-
- strcpy(tempseqt,it3->seqt);
- strcpy(it3->seqt,it2->seqt);
- strcpy(it2->seqt,tempseqt);
-
-
- }
- }
-
- }
-
-
- for(it2=stth1.begin();it2!=stth1.end();it2++)
- {
- fprintf(fp4,"%d %s %0.14lf\n",no1,it2->seqt,it2->sf);
- no1++;
- }*/
-
-
- _fcloseall();
-
-
- }
-
-
- void Chpcity::doParse()
- {
-
- char pfname[50];
- char chp;
-
- //printf("Enter the file name to parse:");
- //gets(pfname);
-
- strcpy(pfname,"c:\\localb\\testbl.txt");
-
- fp1=fopen(pfname,"r");
-
- if(fp1==NULL)
- {
- cout<<"File dooesn't exist"<<endl;
- exit(1);
- }
-
- chp=fgetc(fp1);
-
- char *seqsrch,*posit;
-
-
-
- char seqname[30];
-
- while(chp!=EOF)
- {
-
- while(chp!=EOF)
- {
- seqsrch=new char[7];
- memset(seqsrch,'\0',7);
-
-
- for(int i=0;i<6;i++)
- {
-
- if(chp=='\n' || chp==' ')
- {
- chp=fgetc(fp1);
- break;
- }
-
- seqsrch[i]=chp;
- chp=fgetc(fp1);
-
- if(chp==EOF)
- break;
-
-
- }
- //seqsrch[i]='\0';
-
- if(strcmp(seqsrch,"Query=")==0)
- break;
-
- delete []seqsrch;
- }
-
-
- if(chp==EOF)
- break;
-
-
-
- //extract the sequence name
- int j=0;
- chp=fgetc(fp1);
-
-
- while(chp!='\n')
- {
-
- seqname[j]=chp;
- j++;
- chp=fgetc(fp1);
-
-
- }
- seqname[j]='\0';
-
- //check for hits and extract positives
-
- chp=fgetc(fp1);
-
- int nohit=0;
- int nomatch=0;
- char positptage[4];//store the percentage from the positives
-
- while(chp!=EOF)
- {
- posit=new char[10];
- memset(posit,'\0',10);
-
- for(int i=0;i<9;i++)
- {
-
- if(chp=='\n' || chp==' ')
- {
- chp=fgetc(fp1);
- break;
- }
-
-
- if(chp=='*')//*means there is no hit for the sequence
- {
- nohit=1;
- break;
- }
-
- posit[i]=chp;
- chp=fgetc(fp1);
-
- if(strcmp(posit,"Lambda")==0)
- {
- nomatch=1;
- break;
-
- }
-
-
- }
-
- //posit[i]='\0';
-
- if(nohit==1)//no hits found for the sequence
- break;
-
- if(nomatch==1)//no positives >=60%
- break;
-
- if(strcmp(posit,"Positives")==0 )
- {
- //check for >=60%
-
- while(chp!='(')
- {
- chp=fgetc(fp1);
- }
-
- chp=fgetc(fp1);
-
- int pspin=0;
-
- while(chp!='%')
- {
- positptage[pspin]=chp;
- pspin++;
- chp=fgetc(fp1);
- }
-
- positptage[pspin]='\0';
-
- int intptage=atoi(positptage);
-
- if(intptage>=60)
- {
- stseq.push_back(storeseq(seqname));
- break;
- }
-
- }//end of if
- delete []posit;
-
- }
-
- }
-
- fclose(fp1);
-
-
- for(it=stseq.begin();it!=stseq.end();it++)
- {
- cout<<it->seqhead<<endl;
-
- }
-
-
-
- }
-
- void Chpcity::filterSeq()
- {
- //this module filters out the sequences that has Positives <=x%
-
- char fch;
- char seqhead[30];
- int findseq,fseqin;
-
- fp1=fopen("d:\\noncodseq\\noncoseq.txt","r");
-
- if(fp1==NULL)
- {
- cout<<"File doesn't exist"<<endl;
- exit(1);
-
- }
-
-
- fp2=fopen("d:\\noncodseq\\fnoncoseq.txt","w");
-
-
- fch=fgetc(fp1);
-
- for(it=stseq.begin();it!=stseq.end();it++)
- {
- findseq=0;
-
- while(findseq==0)
- {
- fseqin=0;
-
- //check for the initial '>'symbol put the characters after the next line
- if(fch=='>')
- {
- while(true)
- {
- fch=fgetc(fp1);
-
- if(fch=='\n')
- {
- fch=fgetc(fp1);
- break;
- }
-
- seqhead[fseqin]=fch;
- fseqin++;
- }//end ofwhile
-
- seqhead[fseqin]='\0';
-
- }//end of if
-
- if(strcmp(it->seqhead,seqhead)==0)
- {
- fprintf(fp2,">%s\n",seqhead);
- while(fch!='\n')
- {
- fputc(fch,fp2);
- fch=fgetc(fp1);
-
- }
- fputc('\n',fp2);
-
- findseq=1;
-
- }
-
- fch=fgetc(fp1);
-
-
- }
-
-
- }//end of for loop
-
-
- _fcloseall();
-
- }
-
-
- void Chpcity::doBlast()
- {
-
- //execute the blastall program
- int retval=system("c:\\localb\\blastall -p blastx -d c:\\localb\\pdbaa -i d:\\noncodseq\\noncoseq.txt -o c:\\localb\\testbl.txt -e 0.1");
-
- }
-
-
-
- int main(int argc, char* argv[])
- {
- Chpcity Ch;
- //Ch.doGenscan();
- //Ch.doBlast();
- //Ch.doParse();
-
- //Ch.filterSeq();
- Ch.dofft();
-
- //{//blastall -p blastx -d pdbaa -i d:\noncoseq.txt -o test1.txt -e 0.1}
- //blastall -p blastx -d pdbaa -i d:\noncoseq.txt -o test.xml -m 7 -e 0.1
-
-
- return 0;
- }