PageRenderTime 28ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/code_comm/checkpcity.cpp

http://research-code-base-animesh.googlecode.com/
C++ | 1671 lines | 875 code | 575 blank | 221 comment | 111 complexity | 0e6276feb354160f0c14f51e59a52a1e MD5 | raw file
  1. // checkpcity.cpp : Defines the entry point for the console application.
  2. //
  3. //#include "stdafx.h"
  4. #include<stdio.h>
  5. #include<fstream>
  6. #include<iostream.h>
  7. #include<math.h>
  8. #include<string.h>
  9. #include<stdlib.h>
  10. //#include<comdef.h>
  11. #include<ctype.h>
  12. //#include "WindowAnalysis.h"
  13. //#include "PowerSpectrum.h"
  14. #include<vector>
  15. #include<complex>
  16. using namespace std;
  17. class PowerSpectrum;
  18. //Included the Powerspectrum module
  19. class PowerSpectrum
  20. {
  21. public:
  22. PowerSpectrum();
  23. virtual ~PowerSpectrum();
  24. char * strCRegion;
  25. double *s;
  26. double *x;
  27. //temp check
  28. double *x1;
  29. double *xp;
  30. double *xp1;
  31. double *xr;
  32. double *xr1;
  33. int doPowerSpectrumAnalysis();
  34. void setSequence(const char * seq);
  35. };
  36. PowerSpectrum::PowerSpectrum()
  37. {
  38. s=NULL;
  39. x=NULL;
  40. strCRegion=NULL;
  41. }
  42. PowerSpectrum::~PowerSpectrum()
  43. {
  44. delete []s;
  45. delete []x;
  46. delete []strCRegion;
  47. }
  48. void PowerSpectrum::setSequence(const char * seq)
  49. {
  50. delete []strCRegion;
  51. strCRegion=new char[ strlen(seq)+1 ];
  52. memset(strCRegion, '\0' , strlen(seq)+1 ) ;
  53. strcpy(strCRegion,seq);
  54. }
  55. int PowerSpectrum::doPowerSpectrumAnalysis()
  56. {
  57. const double PI = 3.1415926535897932384626433832795;
  58. //public static final double PI ;
  59. complex<double> i1(0.0,1.0);
  60. //complex<double> i1 = sqrt(-1);
  61. long lenStr = strlen(strCRegion);
  62. delete []s;
  63. delete []x;
  64. cout<<"Coding Region Length:"<<lenStr<<endl;
  65. s = new double[lenStr/2 + 1];
  66. x = new double[lenStr/2 + 1];
  67. //temporary check;
  68. x1 = new double[lenStr/2 + 1];
  69. xp = new double[lenStr/2 + 1];
  70. xp1 = new double[lenStr/2 + 1];
  71. xr = new double[lenStr/2 + 1];
  72. xr1 = new double[lenStr/2 + 1];
  73. double *sf = new double[lenStr/2 + 1];
  74. char nuclt[5]= "ATGC";
  75. int const lenWindow = (lenStr/3)*3;
  76. //int const lenWindow = lenStr;
  77. double winsq = (1.0/lenWindow)+(1.0/(lenWindow*lenWindow));
  78. long wincube = lenWindow*lenWindow*lenWindow;
  79. int countA =0;
  80. int countC =0;
  81. int countG =0;
  82. int countT= 0;
  83. complex<double> s1(0.0,0.0);
  84. double ss2=0.0;
  85. double sq=0.0;
  86. double s2=0.0;
  87. //temporary check
  88. double p1=0.0;
  89. double p2=0.0;
  90. double p3=0.0;
  91. double sbar =0.0;
  92. double p=0.0;
  93. //ofstream out("power");
  94. for(long i =0; i < lenWindow; i++ )
  95. {
  96. if( strCRegion[i] == 'A')
  97. { countA++; continue;}
  98. if( strCRegion[i] == 'G')
  99. { countG++; continue;}
  100. if( strCRegion[i] == 'C')
  101. { countC++; continue;}
  102. if( strCRegion[i] == 'T')
  103. { countT++; continue;}
  104. }
  105. //out<<"Count A :"<<countA <<" countT :"<<countT <<"countG :"<<countG <<"countC :"<<countC <<endl;
  106. sq = 0.0+ pow(countA,2)+pow(countG,2) + pow(countT,2) + pow(countC,2);
  107. double f =0.0;
  108. for(long k = 1 ; k <=lenWindow/2 ; k++ )
  109. {
  110. s2 =0.0;
  111. sbar =0.0;
  112. p =0.0;
  113. f= ((1.0)*k)/lenWindow;
  114. // for each Nucleotide. A T G C
  115. for(int i = 0; i <4 ; i ++ )
  116. {
  117. s1= complex<double>(0.0,0.0);
  118. for (int bi=0; bi<lenWindow; bi++)
  119. {
  120. if(strCRegion[bi] == nuclt[i] )
  121. s1= s1+exp(i1*2.0*PI*f*(bi+1.0));
  122. }
  123. s2 = s2+ (abs(s1)*abs(s1));
  124. //ss2=ss2 + (1/lenWindow*lenWindow)*(abs(s1)*abs(s1));
  125. }
  126. //store the value of sf for each freq.
  127. sf[k-1]=s2;
  128. // to calculater power specturm and code region
  129. sq = sq/wincube;
  130. sbar =(winsq-sq);
  131. sq = sbar * (lenWindow/2.0);
  132. // to calcuater signal to noise ratio
  133. s2 = s2/(sq*lenWindow*lenWindow);
  134. p= s2/sbar;
  135. //temporary check;
  136. //p1=(p*lenWindow*sbar)/2;
  137. //p2=p1*2;
  138. p2=p/(lenWindow*sbar);
  139. p3=2*p2;
  140. s[k-1]=f;
  141. x[k-1]=p;
  142. xp[k-1]=p2;
  143. xp1[k-1]=p3;
  144. //temporary check
  145. //x1[k-1]=p1;
  146. //x2[k-1]=p2;
  147. //x3[k-1]=p3;
  148. // out<<"\n Frequency:"<<f <<" Power : "<<p;
  149. } // outer for loop
  150. //calculating the sbar value
  151. double sbar1=0.00;
  152. double sbar2=0.00;
  153. for(k = 1 ; k <=lenWindow/2 ; k++ )
  154. {
  155. sbar1=sbar1+sf[k-1];
  156. }
  157. sbar1=(sbar1*2)/lenWindow;
  158. sbar2=sbar1/(lenWindow*lenWindow);
  159. //calculating the sf/sbar1
  160. for(k = 1 ; k <=lenWindow/2 ; k++ )
  161. {
  162. x1[k-1]=sf[k-1]/sbar1;
  163. xr[k-1]=x1[k-1]/(lenWindow*sbar2);
  164. xr1[k-1]=xr[k-1]*2;
  165. }
  166. int numPeaks=0;
  167. int genePeak=0;
  168. for( k = 1 ; k <= (lenWindow/2) ; k++ )
  169. {
  170. if( x[k-1] > 4.0)
  171. {
  172. //if( s[k-1] == 1.0/3.0 )] //added by Sreenu on mandars suggestion on 19/04/2002
  173. if( ( s[k-1] >= 0.32 ) && ( s[k-1] <= 0.34 ))
  174. genePeak=1;
  175. else
  176. numPeaks++;
  177. }
  178. }
  179. //delete []s ;
  180. //delete []x ;
  181. //s =NULL ;
  182. //x =NULL ;
  183. if(genePeak==1)
  184. {
  185. if(numPeaks==0)
  186. return 1;
  187. else
  188. return 2;
  189. }
  190. else
  191. {
  192. return 0;
  193. }
  194. } // end of member function.
  195. //End of the pwerspectrum module
  196. //Included the Windowanalysis module
  197. struct structCR
  198. {
  199. long start;
  200. long end;
  201. char type;
  202. };
  203. class WindowAnalysis //__declspec(dllexport)
  204. {
  205. public:
  206. long *s;
  207. double *x;
  208. vector<structCR> vecCRegion;
  209. WindowAnalysis();
  210. WindowAnalysis(int len ,double thold,long intron , long exon);
  211. virtual ~WindowAnalysis();
  212. void setWindowLen(int len);
  213. void settherHold(double thold);
  214. void setIntronExon(long intron , long exon );
  215. int doFt(const char * strSeq , char *logfile);
  216. long framecount;
  217. private :
  218. int lenWindow;
  219. double thersHold;
  220. long exonCutOff;
  221. long intronCutOff;
  222. int findexp(int WindowSize);
  223. //long framecount;
  224. complex<double> * expval;
  225. WindowAnalysis& WindowAnalysis::operator =(const WindowAnalysis &cwa);
  226. WindowAnalysis(const WindowAnalysis &cwa);
  227. };
  228. WindowAnalysis::WindowAnalysis()
  229. {
  230. expval = NULL;
  231. s = NULL;
  232. x = NULL;
  233. framecount=0;
  234. }
  235. WindowAnalysis::~WindowAnalysis()
  236. {
  237. delete []expval;
  238. delete []s ;
  239. delete []x ;
  240. printf("Destructor of WindowAnalysis ");
  241. fflush(stdout);
  242. //delete []CRegion;
  243. }
  244. WindowAnalysis::WindowAnalysis(int len , double thold ,long intron , long exon)
  245. {
  246. lenWindow=len;
  247. thersHold=thold;
  248. exonCutOff=exon;
  249. intronCutOff =intron;
  250. s = NULL;
  251. x = NULL;
  252. framecount=0;
  253. // cout<<"ThresHold Value "<<thersHold;
  254. // cout<<"Intron :"<<intronCutOff<<" Exon :"<<exonCutOff<<endl;
  255. expval=new complex<double>[lenWindow];
  256. }
  257. void WindowAnalysis::settherHold(double thold)
  258. {
  259. thersHold =thold;
  260. // cout<< "ThresHold Value "<<thersHold;
  261. }
  262. void WindowAnalysis::setWindowLen(int len)
  263. {
  264. lenWindow=len;
  265. delete [] expval;
  266. expval=new complex<double>[lenWindow];
  267. }
  268. void WindowAnalysis::setIntronExon(long intron , long exon )
  269. {
  270. exonCutOff=exon;
  271. intronCutOff =intron;
  272. }
  273. int WindowAnalysis::doFt(const char * strSeq , char * logfile)
  274. {
  275. const double F3 = 1.000/3;
  276. const double PI = 3.14159265;
  277. //ofstream out("output" );
  278. ofstream out(logfile ,ios::app);
  279. long lenStr = strlen(strSeq);
  280. if(lenStr<lenWindow)
  281. {
  282. out<<"Sequence length is less than windowlength."<<endl;
  283. return 1;
  284. }
  285. cout<<"\n Sequence Length :"<<lenStr<<endl;
  286. int result= findexp(lenWindow);
  287. //long *CRegion = new long[lenStr-lenWindow+ 1];
  288. //long *s = new long[lenStr-lenWindow+ 1];
  289. //double *x = new double[lenStr-lenWindow+1];
  290. delete [] s;
  291. delete [] x;
  292. s = new long[lenStr-lenWindow+ 1];
  293. x = new double[lenStr-lenWindow+1];
  294. char nuclt[5]= "ATGC";
  295. double winsq = (1.0/lenWindow)+(1.0/(lenWindow*lenWindow));
  296. long wincube = lenWindow*lenWindow*lenWindow;
  297. int countA =0;
  298. int countC =0;
  299. int countG =0;
  300. int countT= 0;
  301. complex<double> s1(0.0,0.0);
  302. double sq=0.0;
  303. double s2=0.0;
  304. double sbar =0.0;
  305. double p=0.0;
  306. framecount=0;
  307. for(long k = 0 ; k < lenStr-lenWindow+1 ; k++ )
  308. {
  309. sq =0.0;
  310. s2 =0.0;
  311. sbar =0.0;
  312. p =0.0;
  313. // Frame wise count of A C G T seperately
  314. if(k==0)
  315. {
  316. for(long i =k; i < k+lenWindow; i++ )
  317. {
  318. if( strSeq[i] == 'A')
  319. { countA++; continue;}
  320. if( strSeq[i] == 'G')
  321. { countG++; continue;}
  322. if( strSeq[i] == 'C')
  323. { countC++; continue;}
  324. if( strSeq[i] == 'T')
  325. { countT++; continue;}
  326. } //end of count for look.
  327. }else
  328. {
  329. switch(strSeq[k-1])
  330. {
  331. case 'A' :
  332. { countA--; break; }
  333. case 'T' :
  334. { countT--; break; }
  335. case 'G' :
  336. { countG--; break; }
  337. case 'C' :
  338. { countC--; break; }
  339. }
  340. switch(strSeq[k+lenWindow-1])
  341. {
  342. case 'A' :
  343. { countA++; break; }
  344. case 'T' :
  345. { countT++; break; }
  346. case 'G' :
  347. { countG++; break; }
  348. case 'C' :
  349. { countC++; break; }
  350. }
  351. }
  352. //out<<"Count A :"<<countA <<" Frame count :"<<k<<endl;
  353. //out<<"Count T :"<<countT <<" Frame count :"<<k<<endl;
  354. //out<<"Count G :"<<countG <<" Frame count :"<<k<<endl;
  355. //out<<"Count C :"<<countC <<" Frame count :"<<k<<endl;
  356. sq = 0.0+ pow(countA,2)+pow(countG,2) + pow(countT,2) + pow(countC,2);
  357. // for each Nucleotide. A T G C
  358. for(int i = 0; i <4 ; i ++ )
  359. {
  360. s1= complex<double>(0.0,0.0);
  361. for (int bi=0; bi<lenWindow; bi++)
  362. {
  363. if(strSeq[k+bi] == nuclt[i] )
  364. s1= s1+expval[bi];
  365. }
  366. s2 = s2+ (abs(s1)*abs(s1));
  367. }
  368. // to calculater power peak
  369. sq = sq/wincube;
  370. sbar =(winsq-sq);
  371. sq = sbar * (lenWindow/2.0);
  372. // to calcuater signal to noise ratio
  373. s2 = s2/(sq*lenWindow*lenWindow);
  374. p= s2/sbar;
  375. s[framecount]=k+(lenWindow/2);
  376. x[framecount]=p;
  377. out<<s[framecount]<<" "<<x[framecount]<<"\n";
  378. framecount++;
  379. } // End of Outer for loop(Frame wise)
  380. //return(0);
  381. //}
  382. // end of Ft function..
  383. //void WindowAnalysis::findCodingRegion(long seqfileId , const char * seqId)
  384. //{
  385. long *CRegion = new long[framecount];
  386. //memset(CRegion , 0 ,(framecount+1)*sizeof(long));
  387. int flag=0;
  388. long i =0 , m =0;
  389. for (m =0; m<framecount; m++)
  390. {
  391. if(x[m]>=thersHold)
  392. {
  393. if(flag == 0)
  394. {
  395. CRegion[i] =s[m];
  396. //out<< "\n START :" <<s[m]<<" : "<<x[m];
  397. //i++;
  398. flag =1;
  399. }
  400. }
  401. else
  402. {
  403. if(flag==1)
  404. {
  405. if(x[m]<thersHold)
  406. {
  407. out<< "\n START :" <<CRegion[i];
  408. out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
  409. i++;
  410. CRegion[i]=s[m-1];
  411. i++;
  412. }
  413. flag=0;
  414. }
  415. } // end of if.
  416. }// End Coding region loop.
  417. if (flag==1)
  418. {
  419. out<< "\n START :" <<CRegion[i];
  420. out<<"\t\tEND: "<<s[m-1]<<" : "<<x[m-1];
  421. i++;
  422. CRegion[i]=s[m-1];
  423. i++;
  424. }
  425. //delete []CRegion;
  426. // that data is deleted 07/02/02 To inprove performance.
  427. //delete []s ;
  428. //delete []x ;
  429. //s=NULL;
  430. //x=NULL;
  431. out<<"\n Final Coding Regions :"<<endl;
  432. flag =0;
  433. int code =0;
  434. long sta=0 , stp =0;
  435. structCR obj;
  436. obj.type=' ';
  437. for(int j=0 ; j<i ; j +=2)
  438. {
  439. if(flag ==0)
  440. {
  441. sta = CRegion[j];
  442. stp = CRegion[j+1];
  443. if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
  444. flag = 1 ;
  445. }
  446. else
  447. {
  448. if( (CRegion[j]-CRegion[j-1]) >=intronCutOff )
  449. {
  450. if(code ==1)
  451. {
  452. out <<"\nStart :"<<sta <<" End :" << stp;
  453. // inset in database start new set.
  454. obj.start=sta;
  455. obj.end=stp;
  456. vecCRegion.push_back(obj);
  457. //insetcodes( sta , stp,seqfileId,seqId);
  458. sta = CRegion[j];
  459. stp = CRegion[j+1];
  460. if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
  461. flag = 1 ;
  462. }
  463. else
  464. {
  465. //cout << forget the old set.
  466. if ((stp-sta)>= exonCutOff )
  467. { out <<"\nStart :"<<sta <<" End :" << stp;
  468. obj.start=sta;
  469. obj.end=stp;
  470. vecCRegion.push_back(obj);
  471. //insetcodes( sta , stp,seqfileId,seqId);
  472. }
  473. // may need to be commented later.
  474. sta = CRegion[j];
  475. stp = CRegion[j+1];
  476. if ( (stp-sta)>= exonCutOff ) code =1; else code =0;
  477. flag = 1 ;
  478. }
  479. }
  480. else
  481. {
  482. if( ( CRegion[j+1]-CRegion[j] ) >= exonCutOff ) code =1;
  483. stp = CRegion[j+1];
  484. flag =1;
  485. }
  486. }
  487. }
  488. if ((code == 1) || ((stp-sta) >exonCutOff) )
  489. {
  490. out <<"\nStart :"<<sta <<" End :" << stp;
  491. obj.start=sta;
  492. obj.end=stp;
  493. vecCRegion.push_back(obj);
  494. //mapCRegion[sta] =stp;
  495. //insetcodes( sta , stp,seqfileId,seqId);
  496. }
  497. delete []CRegion;
  498. CRegion=NULL;
  499. vector<structCR>::iterator it;
  500. PowerSpectrum powerObj;
  501. char * strtemp=NULL;
  502. for(it =vecCRegion.begin(); it !=vecCRegion.end(); it++)
  503. {
  504. //long start =it->start;
  505. //long end =it->end;
  506. strtemp = new char[( (it->end - it->start) +1)];
  507. memset(strtemp , '\0' ,(it->end - it->start) +1 );
  508. strncpy( strtemp , &strSeq[it->start] ,(it->end - it->start) );
  509. //strncpy( strtemp , &strSeq[start] , end -start);
  510. printf("Length : %d \n ", strlen(strtemp));
  511. printf("start %ld , End %d length %d\n " , it->start ,it->end , (it->end - it->start));
  512. fflush(stdout);
  513. powerObj.setSequence(strtemp);
  514. int peaks =powerObj.doPowerSpectrumAnalysis();
  515. if(peaks==0)
  516. {
  517. it=vecCRegion.erase(it);
  518. if( it !=vecCRegion.end() )
  519. --it;
  520. else
  521. break;
  522. }
  523. else if(peaks==1)
  524. {
  525. it->type ='S';
  526. }
  527. else if(peaks==2)
  528. {
  529. it->type ='M';
  530. }
  531. delete []strtemp;
  532. strtemp = NULL;
  533. }
  534. //store the coding regions in an array
  535. //int elsize;
  536. //elsize=vecCRegion.size();
  537. delete []strtemp;
  538. strtemp = NULL;
  539. //delete []CRegion;
  540. //CRegion=NULL;
  541. //delete []s ;
  542. //delete []x ;
  543. return(0);
  544. } // end of Coding region finding.
  545. // to find exponential positional values to window size .
  546. int WindowAnalysis::findexp(int WindowSize)
  547. {
  548. complex<double> i1(0.0,1.0);
  549. const double PI = 3.14159274;
  550. const double F3 = 1.000/3;
  551. for(int k=0; k<WindowSize; k++)
  552. {
  553. expval[k] = exp(-i1*2.0*PI*F3*(k+1.0));
  554. }
  555. return(0);
  556. }
  557. // ///////////////// End //////////////
  558. //End of Windowanalysis
  559. struct storeseq
  560. {
  561. char seqhead[100];
  562. storeseq(char *s)
  563. {strcpy(seqhead,s);}
  564. };
  565. struct storeth
  566. {
  567. char seqt[100];
  568. double sf;
  569. storeth(char *s,double b)
  570. {strcpy(seqt,s);sf=b;}
  571. };
  572. class Chpcity
  573. {
  574. FILE *fp1,*fp2,*fp3,*fp4;
  575. char *seq;
  576. fpos_t begpos,endpos;
  577. vector <storeseq> stseq;
  578. vector <storeseq>::iterator it;
  579. vector <storeth> stth;
  580. vector <storeth> stth1;
  581. vector <storeth> ::iterator it2;
  582. vector <storeth> ::iterator it3;
  583. public:
  584. void doGenscan();
  585. void doBlast();
  586. void doParse();
  587. void dofft();
  588. void filterSeq();
  589. ~Chpcity()
  590. {
  591. cout<<"Destructor of chpcity"<<endl;
  592. stseq.clear();
  593. }
  594. };
  595. void Chpcity::doGenscan()
  596. {
  597. //variable where the sequence will be stored
  598. WindowAnalysis winobj;
  599. int seqno=1;
  600. //storeth objth;
  601. char fname[60];
  602. char ch,ch1;
  603. int sin;
  604. char seqhead[100];
  605. vector<structCR>::iterator it1;
  606. winobj.settherHold(0.0);
  607. winobj.setIntronExon(50,50);
  608. printf("Enter the file name:");
  609. gets(fname);
  610. fp1=fopen(fname,"r");
  611. if(fp1==NULL)
  612. {
  613. cout<<"File doesn't exist"<<endl;
  614. exit(1);
  615. }
  616. //create a temporary file to store the sequence without the newline characters
  617. //cout<<"Enter the file name for non coding sequences:");
  618. //gets(fname);
  619. fp3=fopen("d:\\tholdamy.txt","w");
  620. ch=fgetc(fp1);
  621. while(ch!=EOF)
  622. {
  623. sin=0;
  624. fp2=fopen("tmpseq.tmp","w+");
  625. if(fp2==NULL)
  626. {
  627. cout<<"Could not create temporary file"<<endl;
  628. exit(1);
  629. }
  630. do
  631. {
  632. //check for the initial '>'symbol put the characters after the next line
  633. if(ch=='>')
  634. {
  635. while(true)
  636. {
  637. ch=fgetc(fp1);
  638. if(ch=='\n')
  639. {
  640. ch=fgetc(fp1);
  641. break;
  642. }
  643. seqhead[sin]=ch;
  644. sin++;
  645. }//end ofwhile
  646. seqhead[sin]='\0';
  647. }//end of if
  648. if(ch!='\n')
  649. fputc(ch,fp2);
  650. ch=fgetc(fp1);
  651. if(ch==EOF)
  652. {
  653. break;
  654. }
  655. }while(ch!='>');//end of while
  656. fgetpos(fp2,&endpos);
  657. //int endpos=begpos;
  658. seq=new char[endpos +2];
  659. memset(seq,'\0',endpos +2);
  660. begpos=0;
  661. fsetpos(fp2,&begpos);
  662. ch1=fgetc(fp2);
  663. int seqin=0;
  664. while(ch1!=EOF)
  665. {
  666. seq[seqin]=toupper(ch1);
  667. seqin++;
  668. ch1=fgetc(fp2);
  669. }
  670. /*char exseqh[30];
  671. for(int j1=0;seqhead[j1]!=',';j1++);
  672. for(int ie=j1+1,i=0;seqhead[ie]!='\0';ie++,i++)
  673. {
  674. exseqh[i]=seqhead[ie];
  675. }
  676. exseqh[i]='\0';*/
  677. //if(strcmp(seqhead,"(gi|16127994:1080570-1080686, 1080677-1081408), b1016m")==0)
  678. //{
  679. //winobj.setWindowLen(strlen(seq));
  680. winobj.setWindowLen(300);
  681. winobj.doFt(seq,"log");
  682. //cout<<winobj.x[0]<<endl;
  683. //objth.sf=winobj.x[0];
  684. //strcpy(objth.seqt,seqhead);
  685. //double tx=winobj.x[0];
  686. //cout<<objth.sf<<endl;
  687. for(int fr=0;fr<winobj.framecount;fr++)
  688. stth.push_back(storeth(seqhead,winobj.x[fr]));
  689. //}
  690. fclose(fp2);
  691. remove("tmpseq.tmp");
  692. winobj.vecCRegion.clear();
  693. seqno++;
  694. delete []seq;
  695. }
  696. char tempseqt[100];
  697. float tempsf;
  698. for(it2=stth.begin();it2!=stth.end();it2++)
  699. {
  700. it3=it2;
  701. it3++;
  702. for(;it3!=stth.end();it3++)
  703. {
  704. if(it2->sf < it3->sf)
  705. {
  706. tempsf=it3->sf;
  707. it3->sf=it2->sf;
  708. it2->sf=tempsf;
  709. strcpy(tempseqt,it3->seqt);
  710. strcpy(it3->seqt,it2->seqt);
  711. strcpy(it2->seqt,tempseqt);
  712. }
  713. }
  714. }
  715. fprintf(fp3,"%s\n",fname);
  716. for(it2=stth.begin();it2!=stth.end();it2++)
  717. {
  718. fprintf(fp3,"%0.2f\n",it2->sf);
  719. //cout<<it2->seqt<<" "<<it2->sf<<endl;
  720. }
  721. _fcloseall();
  722. //system("del d:\\tmpseq.tmp");
  723. }
  724. void Chpcity::dofft()
  725. {
  726. PowerSpectrum powobj;
  727. char heading[150];
  728. int hin;
  729. char filename[50];
  730. char ch,ch1;
  731. int lenseq;
  732. int no=1;
  733. int no1=1;
  734. printf("Enter the file name for powerspectrum anal.:");
  735. gets(filename);
  736. fp1=fopen(filename,"r");
  737. if(fp1==NULL)
  738. {
  739. cout<<"Cannot Open file"<<endl;
  740. exit(1);
  741. }
  742. //fp4=fopen("d:\\anno\\ecolsfr.txt","w");
  743. ch=fgetc(fp1);
  744. while(ch!=EOF)
  745. {
  746. hin=0;
  747. fp2=fopen("tmpseq.tmp","w+");
  748. if(fp2==NULL)
  749. {
  750. cout<<"Could not create temporary file"<<endl;
  751. exit(1);
  752. }
  753. do
  754. {
  755. //check for the initial '>'symbol put the characters after the next line
  756. if(ch=='>')
  757. {
  758. while(true)
  759. {
  760. ch=fgetc(fp1);
  761. if(ch=='\n')
  762. {
  763. ch=fgetc(fp1);
  764. break;
  765. }
  766. heading[hin]=ch;
  767. hin++;
  768. }//end ofwhile
  769. heading[hin]='\0';
  770. }//end of if
  771. if(ch!='\n')
  772. fputc(ch,fp2);
  773. ch=fgetc(fp1);
  774. if(ch==EOF)
  775. {
  776. break;
  777. }
  778. }while(ch!='>');//end of while
  779. fgetpos(fp2,&endpos);
  780. //int endpos=begpos;
  781. seq=new char[endpos +2];
  782. memset(seq,'\0',endpos +2);
  783. begpos=0;
  784. fsetpos(fp2,&begpos);
  785. ch1=fgetc(fp2);
  786. int seqin=0;
  787. while(ch1!=EOF)
  788. {
  789. seq[seqin]=toupper(ch1);
  790. seqin++;
  791. ch1=fgetc(fp2);
  792. }
  793. //cout<<seq<<endl;
  794. char exseqh[50];
  795. for(int j1=0;heading[j1]!=',';j1++);
  796. for(int ie=j1+1,i=0;heading[ie]!='\0';ie++,i++)
  797. {
  798. exseqh[i]=heading[ie];
  799. }
  800. exseqh[i]='\0';
  801. fp3=fopen(exseqh,"w");
  802. if(fp3==NULL)
  803. {
  804. cout<<"Could not create File"<<endl;
  805. }
  806. powobj.setSequence(seq);
  807. int peak=powobj.doPowerSpectrumAnalysis();
  808. lenseq=strlen(powobj.strCRegion);
  809. lenseq=(lenseq/3)*3;
  810. //fprintf(fp3,">%s\n",heading);
  811. //fprintf(fp4,">%s\n",exseqh);
  812. //fprintf(fp3,"%s\n",seq);
  813. char cfreq[7];
  814. float ffreq;
  815. //double frq;
  816. //frq=1.0/3.0;
  817. for(long k1=0;k1<lenseq/2;k1++)
  818. {
  819. //if(powobj.s[k1]==frq)
  820. //{
  821. //sprintf(cfreq,"%0.4f",powobj.s[k1]);
  822. //ffreq=atof(cfreq);
  823. //ffreq=1/ffreq;
  824. //cout<<powobj.s[k1]<<" "<<powobj.x1[k1]<<" "<<powobj.x[k1]<<endl;
  825. //if(powobj.x1[k1]>4.0)
  826. //{
  827. //stth.push_back(storeth(heading,powobj.x1[k1]));
  828. //}
  829. /*else
  830. {
  831. stth1.push_back(storeth(exseqh,powobj.x1[k1]));
  832. }*/
  833. fprintf(fp3,"%f %f\n",powobj.s[k1],powobj.x[k1]);
  834. no++;
  835. //}
  836. }
  837. //stth.clear();
  838. //stth1.clear();
  839. //fprintf(fp3,"\n");
  840. fclose(fp2);
  841. remove("tmpseq.tmp");
  842. delete []seq;
  843. }//end of while
  844. /*char tempseqt[100];
  845. double tempsf;
  846. for(it2=stth.begin();it2!=stth.end();it2++)
  847. {
  848. it3=it2;
  849. it3++;
  850. for(;it3!=stth.end();it3++)
  851. {
  852. if(it2->sf < it3->sf)
  853. {
  854. tempsf=it3->sf;
  855. it3->sf=it2->sf;
  856. it2->sf=tempsf;
  857. strcpy(tempseqt,it3->seqt);
  858. strcpy(it3->seqt,it2->seqt);
  859. strcpy(it2->seqt,tempseqt);
  860. }
  861. }
  862. }
  863. for(it2=stth.begin();it2!=stth.end();it2++)
  864. {
  865. fprintf(fp3,"%d %s %0.14lf\n",no,it2->seqt,it2->sf);
  866. no++;
  867. }*/
  868. /*for(it2=stth1.begin();it2!=stth1.end();it2++)
  869. {
  870. it3=it2;
  871. it3++;
  872. for(;it3!=stth1.end();it3++)
  873. {
  874. if(it2->sf < it3->sf)
  875. {
  876. tempsf=it3->sf;
  877. it3->sf=it2->sf;
  878. it2->sf=tempsf;
  879. strcpy(tempseqt,it3->seqt);
  880. strcpy(it3->seqt,it2->seqt);
  881. strcpy(it2->seqt,tempseqt);
  882. }
  883. }
  884. }
  885. for(it2=stth1.begin();it2!=stth1.end();it2++)
  886. {
  887. fprintf(fp4,"%d %s %0.14lf\n",no1,it2->seqt,it2->sf);
  888. no1++;
  889. }*/
  890. _fcloseall();
  891. }
  892. void Chpcity::doParse()
  893. {
  894. char pfname[50];
  895. char chp;
  896. //printf("Enter the file name to parse:");
  897. //gets(pfname);
  898. strcpy(pfname,"c:\\localb\\testbl.txt");
  899. fp1=fopen(pfname,"r");
  900. if(fp1==NULL)
  901. {
  902. cout<<"File dooesn't exist"<<endl;
  903. exit(1);
  904. }
  905. chp=fgetc(fp1);
  906. char *seqsrch,*posit;
  907. char seqname[30];
  908. while(chp!=EOF)
  909. {
  910. while(chp!=EOF)
  911. {
  912. seqsrch=new char[7];
  913. memset(seqsrch,'\0',7);
  914. for(int i=0;i<6;i++)
  915. {
  916. if(chp=='\n' || chp==' ')
  917. {
  918. chp=fgetc(fp1);
  919. break;
  920. }
  921. seqsrch[i]=chp;
  922. chp=fgetc(fp1);
  923. if(chp==EOF)
  924. break;
  925. }
  926. //seqsrch[i]='\0';
  927. if(strcmp(seqsrch,"Query=")==0)
  928. break;
  929. delete []seqsrch;
  930. }
  931. if(chp==EOF)
  932. break;
  933. //extract the sequence name
  934. int j=0;
  935. chp=fgetc(fp1);
  936. while(chp!='\n')
  937. {
  938. seqname[j]=chp;
  939. j++;
  940. chp=fgetc(fp1);
  941. }
  942. seqname[j]='\0';
  943. //check for hits and extract positives
  944. chp=fgetc(fp1);
  945. int nohit=0;
  946. int nomatch=0;
  947. char positptage[4];//store the percentage from the positives
  948. while(chp!=EOF)
  949. {
  950. posit=new char[10];
  951. memset(posit,'\0',10);
  952. for(int i=0;i<9;i++)
  953. {
  954. if(chp=='\n' || chp==' ')
  955. {
  956. chp=fgetc(fp1);
  957. break;
  958. }
  959. if(chp=='*')//*means there is no hit for the sequence
  960. {
  961. nohit=1;
  962. break;
  963. }
  964. posit[i]=chp;
  965. chp=fgetc(fp1);
  966. if(strcmp(posit,"Lambda")==0)
  967. {
  968. nomatch=1;
  969. break;
  970. }
  971. }
  972. //posit[i]='\0';
  973. if(nohit==1)//no hits found for the sequence
  974. break;
  975. if(nomatch==1)//no positives >=60%
  976. break;
  977. if(strcmp(posit,"Positives")==0 )
  978. {
  979. //check for >=60%
  980. while(chp!='(')
  981. {
  982. chp=fgetc(fp1);
  983. }
  984. chp=fgetc(fp1);
  985. int pspin=0;
  986. while(chp!='%')
  987. {
  988. positptage[pspin]=chp;
  989. pspin++;
  990. chp=fgetc(fp1);
  991. }
  992. positptage[pspin]='\0';
  993. int intptage=atoi(positptage);
  994. if(intptage>=60)
  995. {
  996. stseq.push_back(storeseq(seqname));
  997. break;
  998. }
  999. }//end of if
  1000. delete []posit;
  1001. }
  1002. }
  1003. fclose(fp1);
  1004. for(it=stseq.begin();it!=stseq.end();it++)
  1005. {
  1006. cout<<it->seqhead<<endl;
  1007. }
  1008. }
  1009. void Chpcity::filterSeq()
  1010. {
  1011. //this module filters out the sequences that has Positives <=x%
  1012. char fch;
  1013. char seqhead[30];
  1014. int findseq,fseqin;
  1015. fp1=fopen("d:\\noncodseq\\noncoseq.txt","r");
  1016. if(fp1==NULL)
  1017. {
  1018. cout<<"File doesn't exist"<<endl;
  1019. exit(1);
  1020. }
  1021. fp2=fopen("d:\\noncodseq\\fnoncoseq.txt","w");
  1022. fch=fgetc(fp1);
  1023. for(it=stseq.begin();it!=stseq.end();it++)
  1024. {
  1025. findseq=0;
  1026. while(findseq==0)
  1027. {
  1028. fseqin=0;
  1029. //check for the initial '>'symbol put the characters after the next line
  1030. if(fch=='>')
  1031. {
  1032. while(true)
  1033. {
  1034. fch=fgetc(fp1);
  1035. if(fch=='\n')
  1036. {
  1037. fch=fgetc(fp1);
  1038. break;
  1039. }
  1040. seqhead[fseqin]=fch;
  1041. fseqin++;
  1042. }//end ofwhile
  1043. seqhead[fseqin]='\0';
  1044. }//end of if
  1045. if(strcmp(it->seqhead,seqhead)==0)
  1046. {
  1047. fprintf(fp2,">%s\n",seqhead);
  1048. while(fch!='\n')
  1049. {
  1050. fputc(fch,fp2);
  1051. fch=fgetc(fp1);
  1052. }
  1053. fputc('\n',fp2);
  1054. findseq=1;
  1055. }
  1056. fch=fgetc(fp1);
  1057. }
  1058. }//end of for loop
  1059. _fcloseall();
  1060. }
  1061. void Chpcity::doBlast()
  1062. {
  1063. //execute the blastall program
  1064. int retval=system("c:\\localb\\blastall -p blastx -d c:\\localb\\pdbaa -i d:\\noncodseq\\noncoseq.txt -o c:\\localb\\testbl.txt -e 0.1");
  1065. }
  1066. int main(int argc, char* argv[])
  1067. {
  1068. Chpcity Ch;
  1069. //Ch.doGenscan();
  1070. //Ch.doBlast();
  1071. //Ch.doParse();
  1072. //Ch.filterSeq();
  1073. Ch.dofft();
  1074. //{//blastall -p blastx -d pdbaa -i d:\noncoseq.txt -o test1.txt -e 0.1}
  1075. //blastall -p blastx -d pdbaa -i d:\noncoseq.txt -o test.xml -m 7 -e 0.1
  1076. return 0;
  1077. }